vf_nlmeans.c 20 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493
  1. /*
  2. * Copyright (c) 2016 Clément Bœsch <u pkh me>
  3. *
  4. * This file is part of FFmpeg.
  5. *
  6. * FFmpeg is free software; you can redistribute it and/or
  7. * modify it under the terms of the GNU Lesser General Public
  8. * License as published by the Free Software Foundation; either
  9. * version 2.1 of the License, or (at your option) any later version.
  10. *
  11. * FFmpeg is distributed in the hope that it will be useful,
  12. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  14. * Lesser General Public License for more details.
  15. *
  16. * You should have received a copy of the GNU Lesser General Public
  17. * License along with FFmpeg; if not, write to the Free Software
  18. * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  19. */
  20. /**
  21. * @todo
  22. * - better automatic defaults? see "Parameters" @ http://www.ipol.im/pub/art/2011/bcm_nlm/
  23. * - temporal support (probably doesn't need any displacement according to
  24. * "Denoising image sequences does not require motion estimation")
  25. * - Bayer pixel format support for at least raw photos? (DNG support would be
  26. * handy here)
  27. * - FATE test (probably needs visual threshold test mechanism due to the use
  28. * of floats)
  29. */
  30. #include "libavutil/avassert.h"
  31. #include "libavutil/opt.h"
  32. #include "libavutil/pixdesc.h"
  33. #include "avfilter.h"
  34. #include "formats.h"
  35. #include "internal.h"
  36. #include "vf_nlmeans.h"
  37. #include "vf_nlmeans_init.h"
  38. #include "video.h"
  39. typedef struct NLMeansContext {
  40. const AVClass *class;
  41. int nb_planes;
  42. int chroma_w, chroma_h;
  43. double pdiff_scale; // invert of the filtering parameter (sigma*10) squared
  44. double sigma; // denoising strength
  45. int patch_size, patch_hsize; // patch size and half size
  46. int patch_size_uv, patch_hsize_uv; // patch size and half size for chroma planes
  47. int research_size, research_hsize; // research size and half size
  48. int research_size_uv, research_hsize_uv; // research size and half size for chroma planes
  49. uint32_t *ii_orig; // integral image
  50. uint32_t *ii; // integral image starting after the 0-line and 0-column
  51. int ii_w, ii_h; // width and height of the integral image
  52. ptrdiff_t ii_lz_32; // linesize in 32-bit units of the integral image
  53. float *total_weight; // total weight for every pixel
  54. float *sum; // weighted sum for every pixel
  55. int linesize; // sum and total_weight linesize
  56. float *weight_lut; // lookup table mapping (scaled) patch differences to their associated weights
  57. uint32_t max_meaningful_diff; // maximum difference considered (if the patch difference is too high we ignore the pixel)
  58. NLMeansDSPContext dsp;
  59. } NLMeansContext;
  60. #define OFFSET(x) offsetof(NLMeansContext, x)
  61. #define FLAGS AV_OPT_FLAG_FILTERING_PARAM|AV_OPT_FLAG_VIDEO_PARAM
  62. static const AVOption nlmeans_options[] = {
  63. { "s", "denoising strength", OFFSET(sigma), AV_OPT_TYPE_DOUBLE, { .dbl = 1.0 }, 1.0, 30.0, FLAGS },
  64. { "p", "patch size", OFFSET(patch_size), AV_OPT_TYPE_INT, { .i64 = 3*2+1 }, 0, 99, FLAGS },
  65. { "pc", "patch size for chroma planes", OFFSET(patch_size_uv), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 99, FLAGS },
  66. { "r", "research window", OFFSET(research_size), AV_OPT_TYPE_INT, { .i64 = 7*2+1 }, 0, 99, FLAGS },
  67. { "rc", "research window for chroma planes", OFFSET(research_size_uv), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 99, FLAGS },
  68. { NULL }
  69. };
  70. AVFILTER_DEFINE_CLASS(nlmeans);
  71. static const enum AVPixelFormat pix_fmts[] = {
  72. AV_PIX_FMT_YUV410P, AV_PIX_FMT_YUV411P,
  73. AV_PIX_FMT_YUV420P, AV_PIX_FMT_YUV422P,
  74. AV_PIX_FMT_YUV440P, AV_PIX_FMT_YUV444P,
  75. AV_PIX_FMT_YUVJ444P, AV_PIX_FMT_YUVJ440P,
  76. AV_PIX_FMT_YUVJ422P, AV_PIX_FMT_YUVJ420P,
  77. AV_PIX_FMT_YUVJ411P,
  78. AV_PIX_FMT_GRAY8, AV_PIX_FMT_GBRP,
  79. AV_PIX_FMT_NONE
  80. };
  81. /**
  82. * Compute squared difference of an unsafe area (the zone nor s1 nor s2 could
  83. * be readable).
  84. *
  85. * On the other hand, the line above dst and the column to its left are always
  86. * readable.
  87. *
  88. * There is little point in having this function SIMDified as it is likely too
  89. * complex and only handle small portions of the image.
  90. *
  91. * @param dst integral image
  92. * @param dst_linesize_32 integral image linesize (in 32-bit integers unit)
  93. * @param startx integral starting x position
  94. * @param starty integral starting y position
  95. * @param src source plane buffer
  96. * @param linesize source plane linesize
  97. * @param offx source offsetting in x
  98. * @param offy source offsetting in y
  99. * @paran r absolute maximum source offsetting
  100. * @param sw source width
  101. * @param sh source height
  102. * @param w width to compute
  103. * @param h height to compute
  104. */
  105. static inline void compute_unsafe_ssd_integral_image(uint32_t *dst, ptrdiff_t dst_linesize_32,
  106. int startx, int starty,
  107. const uint8_t *src, ptrdiff_t linesize,
  108. int offx, int offy, int r, int sw, int sh,
  109. int w, int h)
  110. {
  111. for (int y = starty; y < starty + h; y++) {
  112. uint32_t acc = dst[y*dst_linesize_32 + startx - 1] - dst[(y-1)*dst_linesize_32 + startx - 1];
  113. const int s1y = av_clip(y - r, 0, sh - 1);
  114. const int s2y = av_clip(y - (r + offy), 0, sh - 1);
  115. for (int x = startx; x < startx + w; x++) {
  116. const int s1x = av_clip(x - r, 0, sw - 1);
  117. const int s2x = av_clip(x - (r + offx), 0, sw - 1);
  118. const uint8_t v1 = src[s1y*linesize + s1x];
  119. const uint8_t v2 = src[s2y*linesize + s2x];
  120. const int d = v1 - v2;
  121. acc += d * d;
  122. dst[y*dst_linesize_32 + x] = dst[(y-1)*dst_linesize_32 + x] + acc;
  123. }
  124. }
  125. }
  126. /*
  127. * Compute the sum of squared difference integral image
  128. * http://www.ipol.im/pub/art/2014/57/
  129. * Integral Images for Block Matching - Gabriele Facciolo, Nicolas Limare, Enric Meinhardt-Llopis
  130. *
  131. * @param ii integral image of dimension (w+e*2) x (h+e*2) with
  132. * an additional zeroed top line and column already
  133. * "applied" to the pointer value
  134. * @param ii_linesize_32 integral image linesize (in 32-bit integers unit)
  135. * @param src source plane buffer
  136. * @param linesize source plane linesize
  137. * @param offx x-offsetting ranging in [-e;e]
  138. * @param offy y-offsetting ranging in [-e;e]
  139. * @param w source width
  140. * @param h source height
  141. * @param e research padding edge
  142. */
  143. static void compute_ssd_integral_image(const NLMeansDSPContext *dsp,
  144. uint32_t *ii, ptrdiff_t ii_linesize_32,
  145. const uint8_t *src, ptrdiff_t linesize, int offx, int offy,
  146. int e, int w, int h)
  147. {
  148. // ii has a surrounding padding of thickness "e"
  149. const int ii_w = w + e*2;
  150. const int ii_h = h + e*2;
  151. // we center the first source
  152. const int s1x = e;
  153. const int s1y = e;
  154. // 2nd source is the frame with offsetting
  155. const int s2x = e + offx;
  156. const int s2y = e + offy;
  157. // get the dimension of the overlapping rectangle where it is always safe
  158. // to compare the 2 sources pixels
  159. const int startx_safe = FFMAX(s1x, s2x);
  160. const int starty_safe = FFMAX(s1y, s2y);
  161. const int u_endx_safe = FFMIN(s1x + w, s2x + w); // unaligned
  162. const int endy_safe = FFMIN(s1y + h, s2y + h);
  163. // deduce the safe area width and height
  164. const int safe_pw = (u_endx_safe - startx_safe) & ~0xf;
  165. const int safe_ph = endy_safe - starty_safe;
  166. // adjusted end x position of the safe area after width of the safe area gets aligned
  167. const int endx_safe = startx_safe + safe_pw;
  168. // top part where only one of s1 and s2 is still readable, or none at all
  169. compute_unsafe_ssd_integral_image(ii, ii_linesize_32,
  170. 0, 0,
  171. src, linesize,
  172. offx, offy, e, w, h,
  173. ii_w, starty_safe);
  174. // fill the left column integral required to compute the central
  175. // overlapping one
  176. compute_unsafe_ssd_integral_image(ii, ii_linesize_32,
  177. 0, starty_safe,
  178. src, linesize,
  179. offx, offy, e, w, h,
  180. startx_safe, safe_ph);
  181. // main and safe part of the integral
  182. av_assert1(startx_safe - s1x >= 0); av_assert1(startx_safe - s1x < w);
  183. av_assert1(starty_safe - s1y >= 0); av_assert1(starty_safe - s1y < h);
  184. av_assert1(startx_safe - s2x >= 0); av_assert1(startx_safe - s2x < w);
  185. av_assert1(starty_safe - s2y >= 0); av_assert1(starty_safe - s2y < h);
  186. if (safe_pw && safe_ph)
  187. dsp->compute_safe_ssd_integral_image(ii + starty_safe*ii_linesize_32 + startx_safe, ii_linesize_32,
  188. src + (starty_safe - s1y) * linesize + (startx_safe - s1x), linesize,
  189. src + (starty_safe - s2y) * linesize + (startx_safe - s2x), linesize,
  190. safe_pw, safe_ph);
  191. // right part of the integral
  192. compute_unsafe_ssd_integral_image(ii, ii_linesize_32,
  193. endx_safe, starty_safe,
  194. src, linesize,
  195. offx, offy, e, w, h,
  196. ii_w - endx_safe, safe_ph);
  197. // bottom part where only one of s1 and s2 is still readable, or none at all
  198. compute_unsafe_ssd_integral_image(ii, ii_linesize_32,
  199. 0, endy_safe,
  200. src, linesize,
  201. offx, offy, e, w, h,
  202. ii_w, ii_h - endy_safe);
  203. }
  204. static int config_input(AVFilterLink *inlink)
  205. {
  206. AVFilterContext *ctx = inlink->dst;
  207. NLMeansContext *s = ctx->priv;
  208. const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(inlink->format);
  209. const int e = FFMAX(s->research_hsize, s->research_hsize_uv)
  210. + FFMAX(s->patch_hsize, s->patch_hsize_uv);
  211. s->chroma_w = AV_CEIL_RSHIFT(inlink->w, desc->log2_chroma_w);
  212. s->chroma_h = AV_CEIL_RSHIFT(inlink->h, desc->log2_chroma_h);
  213. s->nb_planes = av_pix_fmt_count_planes(inlink->format);
  214. /* Allocate the integral image with extra edges of thickness "e"
  215. *
  216. * +_+-------------------------------+
  217. * |0|0000000000000000000000000000000|
  218. * +-x-------------------------------+
  219. * |0|\ ^ |
  220. * |0| ii | e |
  221. * |0| v |
  222. * |0| +-----------------------+ |
  223. * |0| | | |
  224. * |0|<->| | |
  225. * |0| e | | |
  226. * |0| | | |
  227. * |0| +-----------------------+ |
  228. * |0| |
  229. * |0| |
  230. * |0| |
  231. * +-+-------------------------------+
  232. */
  233. s->ii_w = inlink->w + e*2;
  234. s->ii_h = inlink->h + e*2;
  235. // align to 4 the linesize, "+1" is for the space of the left 0-column
  236. s->ii_lz_32 = FFALIGN(s->ii_w + 1, 4);
  237. // "+1" is for the space of the top 0-line
  238. s->ii_orig = av_calloc(s->ii_h + 1, s->ii_lz_32 * sizeof(*s->ii_orig));
  239. if (!s->ii_orig)
  240. return AVERROR(ENOMEM);
  241. // skip top 0-line and left 0-column
  242. s->ii = s->ii_orig + s->ii_lz_32 + 1;
  243. // allocate weighted average for every pixel
  244. s->linesize = inlink->w + 100;
  245. s->total_weight = av_malloc_array(s->linesize, inlink->h * sizeof(*s->total_weight));
  246. s->sum = av_malloc_array(s->linesize, inlink->h * sizeof(*s->sum));
  247. if (!s->total_weight || !s->sum)
  248. return AVERROR(ENOMEM);
  249. return 0;
  250. }
  251. struct thread_data {
  252. const uint8_t *src;
  253. ptrdiff_t src_linesize;
  254. int startx, starty;
  255. int endx, endy;
  256. const uint32_t *ii_start;
  257. int p;
  258. };
  259. static int nlmeans_slice(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs)
  260. {
  261. NLMeansContext *s = ctx->priv;
  262. const uint32_t max_meaningful_diff = s->max_meaningful_diff;
  263. const struct thread_data *td = arg;
  264. const ptrdiff_t src_linesize = td->src_linesize;
  265. const int process_h = td->endy - td->starty;
  266. const int slice_start = (process_h * jobnr ) / nb_jobs;
  267. const int slice_end = (process_h * (jobnr+1)) / nb_jobs;
  268. const int starty = td->starty + slice_start;
  269. const int endy = td->starty + slice_end;
  270. const int p = td->p;
  271. const uint32_t *ii = td->ii_start + (starty - p - 1) * s->ii_lz_32 - p - 1;
  272. const int dist_b = 2*p + 1;
  273. const int dist_d = dist_b * s->ii_lz_32;
  274. const int dist_e = dist_d + dist_b;
  275. const float *const weight_lut = s->weight_lut;
  276. NLMeansDSPContext *dsp = &s->dsp;
  277. for (int y = starty; y < endy; y++) {
  278. const uint8_t *const src = td->src + y*src_linesize;
  279. float *total_weight = s->total_weight + y*s->linesize;
  280. float *sum = s->sum + y*s->linesize;
  281. const uint32_t *const iia = ii;
  282. const uint32_t *const iib = ii + dist_b;
  283. const uint32_t *const iid = ii + dist_d;
  284. const uint32_t *const iie = ii + dist_e;
  285. dsp->compute_weights_line(iia, iib, iid, iie, src, total_weight, sum,
  286. weight_lut, max_meaningful_diff,
  287. td->startx, td->endx);
  288. ii += s->ii_lz_32;
  289. }
  290. return 0;
  291. }
  292. static void weight_averages(uint8_t *dst, ptrdiff_t dst_linesize,
  293. const uint8_t *src, ptrdiff_t src_linesize,
  294. float *total_weight, float *sum, ptrdiff_t linesize,
  295. int w, int h)
  296. {
  297. for (int y = 0; y < h; y++) {
  298. for (int x = 0; x < w; x++) {
  299. // Also weight the centered pixel
  300. total_weight[x] += 1.f;
  301. sum[x] += 1.f * src[x];
  302. dst[x] = av_clip_uint8(sum[x] / total_weight[x] + 0.5f);
  303. }
  304. dst += dst_linesize;
  305. src += src_linesize;
  306. total_weight += linesize;
  307. sum += linesize;
  308. }
  309. }
  310. static int nlmeans_plane(AVFilterContext *ctx, int w, int h, int p, int r,
  311. uint8_t *dst, ptrdiff_t dst_linesize,
  312. const uint8_t *src, ptrdiff_t src_linesize)
  313. {
  314. NLMeansContext *s = ctx->priv;
  315. /* patches center points cover the whole research window so the patches
  316. * themselves overflow the research window */
  317. const int e = r + p;
  318. /* focus an integral pointer on the centered image (s1) */
  319. const uint32_t *centered_ii = s->ii + e*s->ii_lz_32 + e;
  320. memset(s->total_weight, 0, s->linesize * h * sizeof(*s->total_weight));
  321. memset(s->sum, 0, s->linesize * h * sizeof(*s->sum));
  322. for (int offy = -r; offy <= r; offy++) {
  323. for (int offx = -r; offx <= r; offx++) {
  324. if (offx || offy) {
  325. struct thread_data td = {
  326. .src = src + offy*src_linesize + offx,
  327. .src_linesize = src_linesize,
  328. .startx = FFMAX(0, -offx),
  329. .starty = FFMAX(0, -offy),
  330. .endx = FFMIN(w, w - offx),
  331. .endy = FFMIN(h, h - offy),
  332. .ii_start = centered_ii + offy*s->ii_lz_32 + offx,
  333. .p = p,
  334. };
  335. compute_ssd_integral_image(&s->dsp, s->ii, s->ii_lz_32,
  336. src, src_linesize,
  337. offx, offy, e, w, h);
  338. ff_filter_execute(ctx, nlmeans_slice, &td, NULL,
  339. FFMIN(td.endy - td.starty, ff_filter_get_nb_threads(ctx)));
  340. }
  341. }
  342. }
  343. weight_averages(dst, dst_linesize, src, src_linesize,
  344. s->total_weight, s->sum, s->linesize, w, h);
  345. return 0;
  346. }
  347. static int filter_frame(AVFilterLink *inlink, AVFrame *in)
  348. {
  349. AVFilterContext *ctx = inlink->dst;
  350. NLMeansContext *s = ctx->priv;
  351. AVFilterLink *outlink = ctx->outputs[0];
  352. AVFrame *out = ff_get_video_buffer(outlink, outlink->w, outlink->h);
  353. if (!out) {
  354. av_frame_free(&in);
  355. return AVERROR(ENOMEM);
  356. }
  357. av_frame_copy_props(out, in);
  358. for (int i = 0; i < s->nb_planes; i++) {
  359. const int w = i ? s->chroma_w : inlink->w;
  360. const int h = i ? s->chroma_h : inlink->h;
  361. const int p = i ? s->patch_hsize_uv : s->patch_hsize;
  362. const int r = i ? s->research_hsize_uv : s->research_hsize;
  363. nlmeans_plane(ctx, w, h, p, r,
  364. out->data[i], out->linesize[i],
  365. in->data[i], in->linesize[i]);
  366. }
  367. av_frame_free(&in);
  368. return ff_filter_frame(outlink, out);
  369. }
  370. #define CHECK_ODD_FIELD(field, name) do { \
  371. if (!(s->field & 1)) { \
  372. s->field |= 1; \
  373. av_log(ctx, AV_LOG_WARNING, name " size must be odd, " \
  374. "setting it to %d\n", s->field); \
  375. } \
  376. } while (0)
  377. static av_cold int init(AVFilterContext *ctx)
  378. {
  379. NLMeansContext *s = ctx->priv;
  380. const double h = s->sigma * 10.;
  381. s->pdiff_scale = 1. / (h * h);
  382. s->max_meaningful_diff = log(255.) / s->pdiff_scale;
  383. s->weight_lut = av_calloc(s->max_meaningful_diff + 1, sizeof(*s->weight_lut));
  384. if (!s->weight_lut)
  385. return AVERROR(ENOMEM);
  386. for (int i = 0; i < s->max_meaningful_diff; i++)
  387. s->weight_lut[i] = exp(-i * s->pdiff_scale);
  388. CHECK_ODD_FIELD(research_size, "Luma research window");
  389. CHECK_ODD_FIELD(patch_size, "Luma patch");
  390. if (!s->research_size_uv) s->research_size_uv = s->research_size;
  391. if (!s->patch_size_uv) s->patch_size_uv = s->patch_size;
  392. CHECK_ODD_FIELD(research_size_uv, "Chroma research window");
  393. CHECK_ODD_FIELD(patch_size_uv, "Chroma patch");
  394. s->research_hsize = s->research_size / 2;
  395. s->research_hsize_uv = s->research_size_uv / 2;
  396. s->patch_hsize = s->patch_size / 2;
  397. s->patch_hsize_uv = s->patch_size_uv / 2;
  398. av_log(ctx, AV_LOG_DEBUG, "Research window: %dx%d / %dx%d, patch size: %dx%d / %dx%d\n",
  399. s->research_size, s->research_size, s->research_size_uv, s->research_size_uv,
  400. s->patch_size, s->patch_size, s->patch_size_uv, s->patch_size_uv);
  401. ff_nlmeans_init(&s->dsp);
  402. return 0;
  403. }
  404. static av_cold void uninit(AVFilterContext *ctx)
  405. {
  406. NLMeansContext *s = ctx->priv;
  407. av_freep(&s->weight_lut);
  408. av_freep(&s->ii_orig);
  409. av_freep(&s->total_weight);
  410. av_freep(&s->sum);
  411. }
  412. static const AVFilterPad nlmeans_inputs[] = {
  413. {
  414. .name = "default",
  415. .type = AVMEDIA_TYPE_VIDEO,
  416. .config_props = config_input,
  417. .filter_frame = filter_frame,
  418. },
  419. };
  420. static const AVFilterPad nlmeans_outputs[] = {
  421. {
  422. .name = "default",
  423. .type = AVMEDIA_TYPE_VIDEO,
  424. },
  425. };
  426. const AVFilter ff_vf_nlmeans = {
  427. .name = "nlmeans",
  428. .description = NULL_IF_CONFIG_SMALL("Non-local means denoiser."),
  429. .priv_size = sizeof(NLMeansContext),
  430. .init = init,
  431. .uninit = uninit,
  432. FILTER_INPUTS(nlmeans_inputs),
  433. FILTER_OUTPUTS(nlmeans_outputs),
  434. FILTER_PIXFMTS_ARRAY(pix_fmts),
  435. .priv_class = &nlmeans_class,
  436. .flags = AVFILTER_FLAG_SUPPORT_TIMELINE_GENERIC | AVFILTER_FLAG_SLICE_THREADS,
  437. };