avf_ahistogram.c 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413
  1. /*
  2. * Copyright (c) 2015 Paul B Mahol
  3. *
  4. * This file is part of FFmpeg.
  5. *
  6. * FFmpeg is free software; you can redistribute it and/or
  7. * modify it under the terms of the GNU Lesser General Public
  8. * License as published by the Free Software Foundation; either
  9. * version 2.1 of the License, or (at your option) any later version.
  10. *
  11. * FFmpeg is distributed in the hope that it will be useful,
  12. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  14. * Lesser General Public License for more details.
  15. *
  16. * You should have received a copy of the GNU Lesser General Public
  17. * License along with FFmpeg; if not, write to the Free Software
  18. * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  19. */
  20. #include "libavutil/avassert.h"
  21. #include "libavutil/opt.h"
  22. #include "libavutil/parseutils.h"
  23. #include "avfilter.h"
  24. #include "formats.h"
  25. #include "audio.h"
  26. #include "video.h"
  27. #include "internal.h"
  28. enum DisplayScale { LINEAR, SQRT, CBRT, LOG, RLOG, NB_SCALES };
  29. enum AmplitudeScale { ALINEAR, ALOG, NB_ASCALES };
  30. enum SlideMode { REPLACE, SCROLL, NB_SLIDES };
  31. enum DisplayMode { SINGLE, SEPARATE, NB_DMODES };
  32. enum HistogramMode { ACCUMULATE, CURRENT, NB_HMODES };
  33. typedef struct AudioHistogramContext {
  34. const AVClass *class;
  35. AVFrame *out;
  36. int w, h;
  37. AVRational frame_rate;
  38. uint64_t *achistogram;
  39. uint64_t *shistogram;
  40. int ascale;
  41. int scale;
  42. float phisto;
  43. int histogram_h;
  44. int apos;
  45. int ypos;
  46. int slide;
  47. int dmode;
  48. int dchannels;
  49. int count;
  50. int frame_count;
  51. float *combine_buffer;
  52. AVFrame *in[101];
  53. int first;
  54. } AudioHistogramContext;
  55. #define OFFSET(x) offsetof(AudioHistogramContext, x)
  56. #define FLAGS AV_OPT_FLAG_FILTERING_PARAM|AV_OPT_FLAG_VIDEO_PARAM
  57. static const AVOption ahistogram_options[] = {
  58. { "dmode", "set method to display channels", OFFSET(dmode), AV_OPT_TYPE_INT, {.i64=SINGLE}, 0, NB_DMODES-1, FLAGS, "dmode" },
  59. { "single", "all channels use single histogram", 0, AV_OPT_TYPE_CONST, {.i64=SINGLE}, 0, 0, FLAGS, "dmode" },
  60. { "separate", "each channel have own histogram", 0, AV_OPT_TYPE_CONST, {.i64=SEPARATE}, 0, 0, FLAGS, "dmode" },
  61. { "rate", "set video rate", OFFSET(frame_rate), AV_OPT_TYPE_VIDEO_RATE, {.str="25"}, 0, INT_MAX, FLAGS },
  62. { "r", "set video rate", OFFSET(frame_rate), AV_OPT_TYPE_VIDEO_RATE, {.str="25"}, 0, INT_MAX, FLAGS },
  63. { "size", "set video size", OFFSET(w), AV_OPT_TYPE_IMAGE_SIZE, {.str="hd720"}, 0, 0, FLAGS },
  64. { "s", "set video size", OFFSET(w), AV_OPT_TYPE_IMAGE_SIZE, {.str="hd720"}, 0, 0, FLAGS },
  65. { "scale", "set display scale", OFFSET(scale), AV_OPT_TYPE_INT, {.i64=LOG}, LINEAR, NB_SCALES-1, FLAGS, "scale" },
  66. { "log", "logarithmic", 0, AV_OPT_TYPE_CONST, {.i64=LOG}, 0, 0, FLAGS, "scale" },
  67. { "sqrt", "square root", 0, AV_OPT_TYPE_CONST, {.i64=SQRT}, 0, 0, FLAGS, "scale" },
  68. { "cbrt", "cubic root", 0, AV_OPT_TYPE_CONST, {.i64=CBRT}, 0, 0, FLAGS, "scale" },
  69. { "lin", "linear", 0, AV_OPT_TYPE_CONST, {.i64=LINEAR}, 0, 0, FLAGS, "scale" },
  70. { "rlog", "reverse logarithmic", 0, AV_OPT_TYPE_CONST, {.i64=RLOG}, 0, 0, FLAGS, "scale" },
  71. { "ascale", "set amplitude scale", OFFSET(ascale), AV_OPT_TYPE_INT, {.i64=ALOG}, LINEAR, NB_ASCALES-1, FLAGS, "ascale" },
  72. { "log", "logarithmic", 0, AV_OPT_TYPE_CONST, {.i64=ALOG}, 0, 0, FLAGS, "ascale" },
  73. { "lin", "linear", 0, AV_OPT_TYPE_CONST, {.i64=ALINEAR}, 0, 0, FLAGS, "ascale" },
  74. { "acount", "how much frames to accumulate", OFFSET(count), AV_OPT_TYPE_INT, {.i64=1}, -1, 100, FLAGS },
  75. { "rheight", "set histogram ratio of window height", OFFSET(phisto), AV_OPT_TYPE_FLOAT, {.dbl=0.10}, 0, 1, FLAGS },
  76. { "slide", "set sonogram sliding", OFFSET(slide), AV_OPT_TYPE_INT, {.i64=REPLACE}, 0, NB_SLIDES-1, FLAGS, "slide" },
  77. { "replace", "replace old rows with new", 0, AV_OPT_TYPE_CONST, {.i64=REPLACE}, 0, 0, FLAGS, "slide" },
  78. { "scroll", "scroll from top to bottom", 0, AV_OPT_TYPE_CONST, {.i64=SCROLL}, 0, 0, FLAGS, "slide" },
  79. { NULL }
  80. };
  81. AVFILTER_DEFINE_CLASS(ahistogram);
  82. static int query_formats(AVFilterContext *ctx)
  83. {
  84. AVFilterFormats *formats = NULL;
  85. AVFilterChannelLayouts *layouts = NULL;
  86. AVFilterLink *inlink = ctx->inputs[0];
  87. AVFilterLink *outlink = ctx->outputs[0];
  88. static const enum AVSampleFormat sample_fmts[] = { AV_SAMPLE_FMT_FLTP, AV_SAMPLE_FMT_NONE };
  89. static const enum AVPixelFormat pix_fmts[] = { AV_PIX_FMT_YUVA444P, AV_PIX_FMT_NONE };
  90. int ret = AVERROR(EINVAL);
  91. formats = ff_make_format_list(sample_fmts);
  92. if ((ret = ff_formats_ref (formats, &inlink->out_formats )) < 0 ||
  93. (layouts = ff_all_channel_counts()) == NULL ||
  94. (ret = ff_channel_layouts_ref (layouts, &inlink->out_channel_layouts)) < 0)
  95. return ret;
  96. formats = ff_all_samplerates();
  97. if ((ret = ff_formats_ref(formats, &inlink->out_samplerates)) < 0)
  98. return ret;
  99. formats = ff_make_format_list(pix_fmts);
  100. if ((ret = ff_formats_ref(formats, &outlink->in_formats)) < 0)
  101. return ret;
  102. return 0;
  103. }
  104. static int config_input(AVFilterLink *inlink)
  105. {
  106. AVFilterContext *ctx = inlink->dst;
  107. AudioHistogramContext *s = ctx->priv;
  108. int nb_samples;
  109. nb_samples = FFMAX(1024, ((double)inlink->sample_rate / av_q2d(s->frame_rate)) + 0.5);
  110. inlink->partial_buf_size =
  111. inlink->min_samples =
  112. inlink->max_samples = nb_samples;
  113. s->dchannels = s->dmode == SINGLE ? 1 : inlink->channels;
  114. s->shistogram = av_calloc(s->w, s->dchannels * sizeof(*s->shistogram));
  115. if (!s->shistogram)
  116. return AVERROR(ENOMEM);
  117. s->achistogram = av_calloc(s->w, s->dchannels * sizeof(*s->achistogram));
  118. if (!s->achistogram)
  119. return AVERROR(ENOMEM);
  120. return 0;
  121. }
  122. static int config_output(AVFilterLink *outlink)
  123. {
  124. AudioHistogramContext *s = outlink->src->priv;
  125. outlink->w = s->w;
  126. outlink->h = s->h;
  127. outlink->sample_aspect_ratio = (AVRational){1,1};
  128. outlink->frame_rate = s->frame_rate;
  129. s->histogram_h = s->h * s->phisto;
  130. s->ypos = s->h * s->phisto;
  131. if (s->dmode == SEPARATE) {
  132. s->combine_buffer = av_malloc_array(outlink->w * 3, sizeof(*s->combine_buffer));
  133. if (!s->combine_buffer)
  134. return AVERROR(ENOMEM);
  135. }
  136. return 0;
  137. }
  138. static int filter_frame(AVFilterLink *inlink, AVFrame *in)
  139. {
  140. AVFilterContext *ctx = inlink->dst;
  141. AVFilterLink *outlink = ctx->outputs[0];
  142. AudioHistogramContext *s = ctx->priv;
  143. const int H = s->histogram_h;
  144. const int w = s->w;
  145. int c, y, n, p, bin;
  146. uint64_t acmax = 1;
  147. if (!s->out || s->out->width != outlink->w ||
  148. s->out->height != outlink->h) {
  149. av_frame_free(&s->out);
  150. s->out = ff_get_video_buffer(outlink, outlink->w, outlink->h);
  151. if (!s->out) {
  152. av_frame_free(&in);
  153. return AVERROR(ENOMEM);
  154. }
  155. for (n = H; n < s->h; n++) {
  156. memset(s->out->data[0] + n * s->out->linesize[0], 0, w);
  157. memset(s->out->data[1] + n * s->out->linesize[0], 127, w);
  158. memset(s->out->data[2] + n * s->out->linesize[0], 127, w);
  159. memset(s->out->data[3] + n * s->out->linesize[0], 0, w);
  160. }
  161. }
  162. if (s->dmode == SEPARATE) {
  163. for (y = 0; y < w; y++) {
  164. s->combine_buffer[3 * y ] = 0;
  165. s->combine_buffer[3 * y + 1] = 127.5;
  166. s->combine_buffer[3 * y + 2] = 127.5;
  167. }
  168. }
  169. for (n = 0; n < H; n++) {
  170. memset(s->out->data[0] + n * s->out->linesize[0], 0, w);
  171. memset(s->out->data[1] + n * s->out->linesize[0], 127, w);
  172. memset(s->out->data[2] + n * s->out->linesize[0], 127, w);
  173. memset(s->out->data[3] + n * s->out->linesize[0], 0, w);
  174. }
  175. s->out->pts = in->pts;
  176. s->first = s->frame_count;
  177. switch (s->ascale) {
  178. case ALINEAR:
  179. for (c = 0; c < inlink->channels; c++) {
  180. const float *src = (const float *)in->extended_data[c];
  181. uint64_t *achistogram = &s->achistogram[(s->dmode == SINGLE ? 0: c) * w];
  182. for (n = 0; n < in->nb_samples; n++) {
  183. bin = lrint(av_clipf(fabsf(src[n]), 0, 1) * (w - 1));
  184. achistogram[bin]++;
  185. }
  186. if (s->in[s->first] && s->count >= 0) {
  187. uint64_t *shistogram = &s->shistogram[(s->dmode == SINGLE ? 0: c) * w];
  188. const float *src2 = (const float *)s->in[s->first]->extended_data[c];
  189. for (n = 0; n < in->nb_samples; n++) {
  190. bin = lrint(av_clipf(fabsf(src2[n]), 0, 1) * (w - 1));
  191. shistogram[bin]++;
  192. }
  193. }
  194. }
  195. break;
  196. case ALOG:
  197. for (c = 0; c < inlink->channels; c++) {
  198. const float *src = (const float *)in->extended_data[c];
  199. uint64_t *achistogram = &s->achistogram[(s->dmode == SINGLE ? 0: c) * w];
  200. for (n = 0; n < in->nb_samples; n++) {
  201. bin = lrint(av_clipf(1 + log10(fabsf(src[n])) / 6, 0, 1) * (w - 1));
  202. achistogram[bin]++;
  203. }
  204. if (s->in[s->first] && s->count >= 0) {
  205. uint64_t *shistogram = &s->shistogram[(s->dmode == SINGLE ? 0: c) * w];
  206. const float *src2 = (const float *)s->in[s->first]->extended_data[c];
  207. for (n = 0; n < in->nb_samples; n++) {
  208. bin = lrint(av_clipf(1 + log10(fabsf(src2[n])) / 6, 0, 1) * (w - 1));
  209. shistogram[bin]++;
  210. }
  211. }
  212. }
  213. break;
  214. }
  215. av_frame_free(&s->in[s->frame_count]);
  216. s->in[s->frame_count] = in;
  217. s->frame_count++;
  218. if (s->frame_count > s->count)
  219. s->frame_count = 0;
  220. for (n = 0; n < w * s->dchannels; n++) {
  221. acmax = FFMAX(s->achistogram[n] - s->shistogram[n], acmax);
  222. }
  223. for (c = 0; c < s->dchannels; c++) {
  224. uint64_t *shistogram = &s->shistogram[c * w];
  225. uint64_t *achistogram = &s->achistogram[c * w];
  226. float yf, uf, vf;
  227. if (s->dmode == SEPARATE) {
  228. yf = 256.0f / s->dchannels;
  229. uf = yf * M_PI;
  230. vf = yf * M_PI;
  231. uf *= 0.5 * sin((2 * M_PI * c) / s->dchannels);
  232. vf *= 0.5 * cos((2 * M_PI * c) / s->dchannels);
  233. }
  234. for (n = 0; n < w; n++) {
  235. double a, aa;
  236. int h;
  237. a = achistogram[n] - shistogram[n];
  238. switch (s->scale) {
  239. case LINEAR:
  240. aa = a / (double)acmax;
  241. break;
  242. case SQRT:
  243. aa = sqrt(a) / sqrt(acmax);
  244. break;
  245. case CBRT:
  246. aa = cbrt(a) / cbrt(acmax);
  247. break;
  248. case LOG:
  249. aa = log2(a + 1) / log2(acmax + 1);
  250. break;
  251. case RLOG:
  252. aa = 1. - log2(a + 1) / log2(acmax + 1);
  253. if (aa == 1.)
  254. aa = 0;
  255. break;
  256. default:
  257. av_assert0(0);
  258. }
  259. h = aa * (H - 1);
  260. if (s->dmode == SINGLE) {
  261. for (y = H - h; y < H; y++) {
  262. s->out->data[0][y * s->out->linesize[0] + n] = 255;
  263. s->out->data[3][y * s->out->linesize[0] + n] = 255;
  264. }
  265. if (s->h - H > 0) {
  266. h = aa * 255;
  267. s->out->data[0][s->ypos * s->out->linesize[0] + n] = h;
  268. s->out->data[1][s->ypos * s->out->linesize[1] + n] = 127;
  269. s->out->data[2][s->ypos * s->out->linesize[2] + n] = 127;
  270. s->out->data[3][s->ypos * s->out->linesize[3] + n] = 255;
  271. }
  272. } else if (s->dmode == SEPARATE) {
  273. float *out = &s->combine_buffer[3 * n];
  274. int old;
  275. old = s->out->data[0][(H - h) * s->out->linesize[0] + n];
  276. for (y = H - h; y < H; y++) {
  277. if (s->out->data[0][y * s->out->linesize[0] + n] != old)
  278. break;
  279. old = s->out->data[0][y * s->out->linesize[0] + n];
  280. s->out->data[0][y * s->out->linesize[0] + n] = yf;
  281. s->out->data[1][y * s->out->linesize[1] + n] = 128+uf;
  282. s->out->data[2][y * s->out->linesize[2] + n] = 128+vf;
  283. s->out->data[3][y * s->out->linesize[3] + n] = 255;
  284. }
  285. out[0] += aa * yf;
  286. out[1] += aa * uf;
  287. out[2] += aa * vf;
  288. }
  289. }
  290. }
  291. if (s->h - H > 0) {
  292. if (s->dmode == SEPARATE) {
  293. for (n = 0; n < w; n++) {
  294. float *cb = &s->combine_buffer[3 * n];
  295. s->out->data[0][s->ypos * s->out->linesize[0] + n] = cb[0];
  296. s->out->data[1][s->ypos * s->out->linesize[1] + n] = cb[1];
  297. s->out->data[2][s->ypos * s->out->linesize[2] + n] = cb[2];
  298. s->out->data[3][s->ypos * s->out->linesize[3] + n] = 255;
  299. }
  300. }
  301. if (s->slide == SCROLL) {
  302. for (p = 0; p < 4; p++) {
  303. for (y = s->h; y >= H + 1; y--) {
  304. memmove(s->out->data[p] + (y ) * s->out->linesize[p],
  305. s->out->data[p] + (y-1) * s->out->linesize[p], w);
  306. }
  307. }
  308. }
  309. s->ypos++;
  310. if (s->slide == SCROLL || s->ypos >= s->h)
  311. s->ypos = H;
  312. }
  313. return ff_filter_frame(outlink, av_frame_clone(s->out));
  314. }
  315. static av_cold void uninit(AVFilterContext *ctx)
  316. {
  317. AudioHistogramContext *s = ctx->priv;
  318. int i;
  319. av_frame_free(&s->out);
  320. av_freep(&s->shistogram);
  321. av_freep(&s->achistogram);
  322. av_freep(&s->combine_buffer);
  323. for (i = 0; i < 101; i++)
  324. av_frame_free(&s->in[i]);
  325. }
  326. static const AVFilterPad audiovectorscope_inputs[] = {
  327. {
  328. .name = "default",
  329. .type = AVMEDIA_TYPE_AUDIO,
  330. .config_props = config_input,
  331. .filter_frame = filter_frame,
  332. },
  333. { NULL }
  334. };
  335. static const AVFilterPad audiovectorscope_outputs[] = {
  336. {
  337. .name = "default",
  338. .type = AVMEDIA_TYPE_VIDEO,
  339. .config_props = config_output,
  340. },
  341. { NULL }
  342. };
  343. AVFilter ff_avf_ahistogram = {
  344. .name = "ahistogram",
  345. .description = NULL_IF_CONFIG_SMALL("Convert input audio to histogram video output."),
  346. .uninit = uninit,
  347. .query_formats = query_formats,
  348. .priv_size = sizeof(AudioHistogramContext),
  349. .inputs = audiovectorscope_inputs,
  350. .outputs = audiovectorscope_outputs,
  351. .priv_class = &ahistogram_class,
  352. };