avf_showcqt.c 29 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807
  1. /*
  2. * Copyright (c) 2014 Muhammad Faiz <mfcc64@gmail.com>
  3. *
  4. * This file is part of FFmpeg.
  5. *
  6. * FFmpeg is free software; you can redistribute it and/or
  7. * modify it under the terms of the GNU Lesser General Public
  8. * License as published by the Free Software Foundation; either
  9. * version 2.1 of the License, or (at your option) any later version.
  10. *
  11. * FFmpeg is distributed in the hope that it will be useful,
  12. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  14. * Lesser General Public License for more details.
  15. *
  16. * You should have received a copy of the GNU Lesser General Public
  17. * License along with FFmpeg; if not, write to the Free Software
  18. * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  19. */
  20. #include "config.h"
  21. #include "libavcodec/avfft.h"
  22. #include "libavutil/avassert.h"
  23. #include "libavutil/channel_layout.h"
  24. #include "libavutil/opt.h"
  25. #include "libavutil/xga_font_data.h"
  26. #include "libavutil/qsort.h"
  27. #include "libavutil/time.h"
  28. #include "libavutil/eval.h"
  29. #include "avfilter.h"
  30. #include "internal.h"
  31. #include <math.h>
  32. #include <stdlib.h>
  33. #if CONFIG_LIBFREETYPE
  34. #include <ft2build.h>
  35. #include FT_FREETYPE_H
  36. #endif
  37. /* this filter is designed to do 16 bins/semitones constant Q transform with Brown-Puckette algorithm
  38. * start from E0 to D#10 (10 octaves)
  39. * so there are 16 bins/semitones * 12 semitones/octaves * 10 octaves = 1920 bins
  40. * match with full HD resolution */
  41. #define VIDEO_WIDTH 1920
  42. #define VIDEO_HEIGHT 1080
  43. #define FONT_HEIGHT 32
  44. #define SPECTOGRAM_HEIGHT ((VIDEO_HEIGHT-FONT_HEIGHT)/2)
  45. #define SPECTOGRAM_START (VIDEO_HEIGHT-SPECTOGRAM_HEIGHT)
  46. #define BASE_FREQ 20.051392800492
  47. #define COEFF_CLAMP 1.0e-4
  48. #define TLENGTH_MIN 0.001
  49. #define TLENGTH_DEFAULT "384/f*tc/(384/f+tc)"
  50. #define VOLUME_MIN 1e-10
  51. #define VOLUME_MAX 100.0
  52. #define FONTCOLOR_DEFAULT "st(0, (midi(f)-59.5)/12);" \
  53. "st(1, if(between(ld(0),0,1), 0.5-0.5*cos(2*PI*ld(0)), 0));" \
  54. "r(1-ld(1)) + b(ld(1))"
  55. typedef struct {
  56. FFTSample value;
  57. int index;
  58. } SparseCoeff;
  59. typedef struct {
  60. const AVClass *class;
  61. AVFrame *outpicref;
  62. FFTContext *fft_context;
  63. FFTComplex *fft_data;
  64. FFTComplex *fft_result_left;
  65. FFTComplex *fft_result_right;
  66. uint8_t *spectogram;
  67. SparseCoeff *coeff_sort;
  68. SparseCoeff *coeffs[VIDEO_WIDTH];
  69. uint8_t *font_alpha;
  70. char *fontfile; /* using freetype */
  71. int coeffs_len[VIDEO_WIDTH];
  72. uint8_t fontcolor_value[VIDEO_WIDTH*3]; /* result of fontcolor option */
  73. int64_t frame_count;
  74. int spectogram_count;
  75. int spectogram_index;
  76. int fft_bits;
  77. int req_fullfilled;
  78. int remaining_fill;
  79. char *tlength;
  80. char *volume;
  81. char *fontcolor;
  82. double timeclamp; /* lower timeclamp, time-accurate, higher timeclamp, freq-accurate (at low freq)*/
  83. float coeffclamp; /* lower coeffclamp, more precise, higher coeffclamp, faster */
  84. int fullhd; /* if true, output video is at full HD resolution, otherwise it will be halved */
  85. float gamma; /* lower gamma, more contrast, higher gamma, more range */
  86. int fps; /* the required fps is so strict, so it's enough to be int, but 24000/1001 etc cannot be encoded */
  87. int count; /* fps * count = transform rate */
  88. } ShowCQTContext;
  89. #define OFFSET(x) offsetof(ShowCQTContext, x)
  90. #define FLAGS AV_OPT_FLAG_FILTERING_PARAM|AV_OPT_FLAG_VIDEO_PARAM
  91. static const AVOption showcqt_options[] = {
  92. { "volume", "set volume", OFFSET(volume), AV_OPT_TYPE_STRING, { .str = "16" }, CHAR_MIN, CHAR_MAX, FLAGS },
  93. { "tlength", "set transform length", OFFSET(tlength), AV_OPT_TYPE_STRING, { .str = TLENGTH_DEFAULT }, CHAR_MIN, CHAR_MAX, FLAGS },
  94. { "timeclamp", "set timeclamp", OFFSET(timeclamp), AV_OPT_TYPE_DOUBLE, { .dbl = 0.17 }, 0.1, 1.0, FLAGS },
  95. { "coeffclamp", "set coeffclamp", OFFSET(coeffclamp), AV_OPT_TYPE_FLOAT, { .dbl = 1 }, 0.1, 10, FLAGS },
  96. { "gamma", "set gamma", OFFSET(gamma), AV_OPT_TYPE_FLOAT, { .dbl = 3 }, 1, 7, FLAGS },
  97. { "fullhd", "set full HD resolution", OFFSET(fullhd), AV_OPT_TYPE_INT, { .i64 = 1 }, 0, 1, FLAGS },
  98. { "fps", "set video fps", OFFSET(fps), AV_OPT_TYPE_INT, { .i64 = 25 }, 10, 100, FLAGS },
  99. { "count", "set number of transform per frame", OFFSET(count), AV_OPT_TYPE_INT, { .i64 = 6 }, 1, 30, FLAGS },
  100. { "fontfile", "set font file", OFFSET(fontfile), AV_OPT_TYPE_STRING, { .str = NULL }, CHAR_MIN, CHAR_MAX, FLAGS },
  101. { "fontcolor", "set font color", OFFSET(fontcolor), AV_OPT_TYPE_STRING, { .str = FONTCOLOR_DEFAULT }, CHAR_MIN, CHAR_MAX, FLAGS },
  102. { NULL }
  103. };
  104. AVFILTER_DEFINE_CLASS(showcqt);
  105. static av_cold void uninit(AVFilterContext *ctx)
  106. {
  107. int k;
  108. ShowCQTContext *s = ctx->priv;
  109. av_fft_end(s->fft_context);
  110. s->fft_context = NULL;
  111. for (k = 0; k < VIDEO_WIDTH; k++)
  112. av_freep(&s->coeffs[k]);
  113. av_freep(&s->fft_data);
  114. av_freep(&s->fft_result_left);
  115. av_freep(&s->fft_result_right);
  116. av_freep(&s->coeff_sort);
  117. av_freep(&s->spectogram);
  118. av_freep(&s->font_alpha);
  119. av_frame_free(&s->outpicref);
  120. }
  121. static int query_formats(AVFilterContext *ctx)
  122. {
  123. AVFilterFormats *formats = NULL;
  124. AVFilterChannelLayouts *layouts = NULL;
  125. AVFilterLink *inlink = ctx->inputs[0];
  126. AVFilterLink *outlink = ctx->outputs[0];
  127. static const enum AVSampleFormat sample_fmts[] = { AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_NONE };
  128. static const enum AVPixelFormat pix_fmts[] = { AV_PIX_FMT_RGB24, AV_PIX_FMT_NONE };
  129. static const int64_t channel_layouts[] = { AV_CH_LAYOUT_STEREO, AV_CH_LAYOUT_STEREO_DOWNMIX, -1 };
  130. static const int samplerates[] = { 44100, 48000, -1 };
  131. /* set input audio formats */
  132. formats = ff_make_format_list(sample_fmts);
  133. if (!formats)
  134. return AVERROR(ENOMEM);
  135. ff_formats_ref(formats, &inlink->out_formats);
  136. layouts = avfilter_make_format64_list(channel_layouts);
  137. if (!layouts)
  138. return AVERROR(ENOMEM);
  139. ff_channel_layouts_ref(layouts, &inlink->out_channel_layouts);
  140. formats = ff_make_format_list(samplerates);
  141. if (!formats)
  142. return AVERROR(ENOMEM);
  143. ff_formats_ref(formats, &inlink->out_samplerates);
  144. /* set output video format */
  145. formats = ff_make_format_list(pix_fmts);
  146. if (!formats)
  147. return AVERROR(ENOMEM);
  148. ff_formats_ref(formats, &outlink->in_formats);
  149. return 0;
  150. }
  151. #if CONFIG_LIBFREETYPE
  152. static void load_freetype_font(AVFilterContext *ctx)
  153. {
  154. static const char str[] = "EF G A BC D ";
  155. ShowCQTContext *s = ctx->priv;
  156. FT_Library lib = NULL;
  157. FT_Face face = NULL;
  158. int video_scale = s->fullhd ? 2 : 1;
  159. int video_width = (VIDEO_WIDTH/2) * video_scale;
  160. int font_height = (FONT_HEIGHT/2) * video_scale;
  161. int font_width = 8 * video_scale;
  162. int font_repeat = font_width * 12;
  163. int linear_hori_advance = font_width * 65536;
  164. int non_monospace_warning = 0;
  165. int x;
  166. s->font_alpha = NULL;
  167. if (!s->fontfile)
  168. return;
  169. if (FT_Init_FreeType(&lib))
  170. goto fail;
  171. if (FT_New_Face(lib, s->fontfile, 0, &face))
  172. goto fail;
  173. if (FT_Set_Char_Size(face, 16*64, 0, 0, 0))
  174. goto fail;
  175. if (FT_Load_Char(face, 'A', FT_LOAD_RENDER))
  176. goto fail;
  177. if (FT_Set_Char_Size(face, 16*64 * linear_hori_advance / face->glyph->linearHoriAdvance, 0, 0, 0))
  178. goto fail;
  179. s->font_alpha = av_malloc(font_height * video_width);
  180. if (!s->font_alpha)
  181. goto fail;
  182. memset(s->font_alpha, 0, font_height * video_width);
  183. for (x = 0; x < 12; x++) {
  184. int sx, sy, rx, bx, by, dx, dy;
  185. if (str[x] == ' ')
  186. continue;
  187. if (FT_Load_Char(face, str[x], FT_LOAD_RENDER))
  188. goto fail;
  189. if (face->glyph->advance.x != font_width*64 && !non_monospace_warning) {
  190. av_log(ctx, AV_LOG_WARNING, "Font is not monospace\n");
  191. non_monospace_warning = 1;
  192. }
  193. sy = font_height - 4*video_scale - face->glyph->bitmap_top;
  194. for (rx = 0; rx < 10; rx++) {
  195. sx = rx * font_repeat + x * font_width + face->glyph->bitmap_left;
  196. for (by = 0; by < face->glyph->bitmap.rows; by++) {
  197. dy = by + sy;
  198. if (dy < 0)
  199. continue;
  200. if (dy >= font_height)
  201. break;
  202. for (bx = 0; bx < face->glyph->bitmap.width; bx++) {
  203. dx = bx + sx;
  204. if (dx < 0)
  205. continue;
  206. if (dx >= video_width)
  207. break;
  208. s->font_alpha[dy*video_width+dx] = face->glyph->bitmap.buffer[by*face->glyph->bitmap.width+bx];
  209. }
  210. }
  211. }
  212. }
  213. FT_Done_Face(face);
  214. FT_Done_FreeType(lib);
  215. return;
  216. fail:
  217. av_log(ctx, AV_LOG_WARNING, "Error while loading freetype font, using default font instead\n");
  218. FT_Done_Face(face);
  219. FT_Done_FreeType(lib);
  220. av_freep(&s->font_alpha);
  221. return;
  222. }
  223. #endif
  224. static double a_weighting(void *p, double f)
  225. {
  226. double ret = 12200.0*12200.0 * (f*f*f*f);
  227. ret /= (f*f + 20.6*20.6) * (f*f + 12200.0*12200.0) *
  228. sqrt((f*f + 107.7*107.7) * (f*f + 737.9*737.9));
  229. return ret;
  230. }
  231. static double b_weighting(void *p, double f)
  232. {
  233. double ret = 12200.0*12200.0 * (f*f*f);
  234. ret /= (f*f + 20.6*20.6) * (f*f + 12200.0*12200.0) * sqrt(f*f + 158.5*158.5);
  235. return ret;
  236. }
  237. static double c_weighting(void *p, double f)
  238. {
  239. double ret = 12200.0*12200.0 * (f*f);
  240. ret /= (f*f + 20.6*20.6) * (f*f + 12200.0*12200.0);
  241. return ret;
  242. }
  243. static double midi(void *p, double f)
  244. {
  245. return log2(f/440.0) * 12.0 + 69.0;
  246. }
  247. static double r_func(void *p, double x)
  248. {
  249. x = av_clipd(x, 0.0, 1.0);
  250. return (int)(x*255.0+0.5) << 16;
  251. }
  252. static double g_func(void *p, double x)
  253. {
  254. x = av_clipd(x, 0.0, 1.0);
  255. return (int)(x*255.0+0.5) << 8;
  256. }
  257. static double b_func(void *p, double x)
  258. {
  259. x = av_clipd(x, 0.0, 1.0);
  260. return (int)(x*255.0+0.5);
  261. }
  262. static inline int qsort_sparsecoeff(const SparseCoeff *a, const SparseCoeff *b)
  263. {
  264. if (fabsf(a->value) >= fabsf(b->value))
  265. return 1;
  266. else
  267. return -1;
  268. }
  269. static int config_output(AVFilterLink *outlink)
  270. {
  271. AVFilterContext *ctx = outlink->src;
  272. AVFilterLink *inlink = ctx->inputs[0];
  273. ShowCQTContext *s = ctx->priv;
  274. AVExpr *tlength_expr = NULL, *volume_expr = NULL, *fontcolor_expr = NULL;
  275. uint8_t *fontcolor_value = s->fontcolor_value;
  276. static const char * const expr_vars[] = { "timeclamp", "tc", "frequency", "freq", "f", NULL };
  277. static const char * const expr_func_names[] = { "a_weighting", "b_weighting", "c_weighting", NULL };
  278. static const char * const expr_fontcolor_func_names[] = { "midi", "r", "g", "b", NULL };
  279. static double (* const expr_funcs[])(void *, double) = { a_weighting, b_weighting, c_weighting, NULL };
  280. static double (* const expr_fontcolor_funcs[])(void *, double) = { midi, r_func, g_func, b_func, NULL };
  281. int fft_len, k, x, y, ret;
  282. int num_coeffs = 0;
  283. int rate = inlink->sample_rate;
  284. double max_len = rate * (double) s->timeclamp;
  285. int64_t start_time, end_time;
  286. int video_scale = s->fullhd ? 2 : 1;
  287. int video_width = (VIDEO_WIDTH/2) * video_scale;
  288. int video_height = (VIDEO_HEIGHT/2) * video_scale;
  289. int spectogram_height = (SPECTOGRAM_HEIGHT/2) * video_scale;
  290. s->fft_bits = ceil(log2(max_len));
  291. fft_len = 1 << s->fft_bits;
  292. if (rate % (s->fps * s->count)) {
  293. av_log(ctx, AV_LOG_ERROR, "Rate (%u) is not divisible by fps*count (%u*%u)\n", rate, s->fps, s->count);
  294. return AVERROR(EINVAL);
  295. }
  296. s->fft_data = av_malloc_array(fft_len, sizeof(*s->fft_data));
  297. s->coeff_sort = av_malloc_array(fft_len, sizeof(*s->coeff_sort));
  298. s->fft_result_left = av_malloc_array(fft_len, sizeof(*s->fft_result_left));
  299. s->fft_result_right = av_malloc_array(fft_len, sizeof(*s->fft_result_right));
  300. s->fft_context = av_fft_init(s->fft_bits, 0);
  301. if (!s->fft_data || !s->coeff_sort || !s->fft_result_left || !s->fft_result_right || !s->fft_context)
  302. return AVERROR(ENOMEM);
  303. #if CONFIG_LIBFREETYPE
  304. load_freetype_font(ctx);
  305. #else
  306. if (s->fontfile)
  307. av_log(ctx, AV_LOG_WARNING, "Freetype is not available, ignoring fontfile option\n");
  308. s->font_alpha = NULL;
  309. #endif
  310. av_log(ctx, AV_LOG_INFO, "Calculating spectral kernel, please wait\n");
  311. start_time = av_gettime_relative();
  312. ret = av_expr_parse(&tlength_expr, s->tlength, expr_vars, NULL, NULL, NULL, NULL, 0, ctx);
  313. if (ret < 0)
  314. goto eval_error;
  315. ret = av_expr_parse(&volume_expr, s->volume, expr_vars, expr_func_names,
  316. expr_funcs, NULL, NULL, 0, ctx);
  317. if (ret < 0)
  318. goto eval_error;
  319. ret = av_expr_parse(&fontcolor_expr, s->fontcolor, expr_vars, expr_fontcolor_func_names,
  320. expr_fontcolor_funcs, NULL, NULL, 0, ctx);
  321. if (ret < 0)
  322. goto eval_error;
  323. for (k = 0; k < VIDEO_WIDTH; k++) {
  324. int hlen = fft_len >> 1;
  325. float total = 0;
  326. float partial = 0;
  327. double freq = BASE_FREQ * exp2(k * (1.0/192.0));
  328. double tlen, tlength, volume;
  329. double expr_vars_val[] = { s->timeclamp, s->timeclamp, freq, freq, freq, 0 };
  330. /* a window function from Albert H. Nuttall,
  331. * "Some Windows with Very Good Sidelobe Behavior"
  332. * -93.32 dB peak sidelobe and 18 dB/octave asymptotic decay
  333. * coefficient normalized to a0 = 1 */
  334. double a0 = 0.355768;
  335. double a1 = 0.487396/a0;
  336. double a2 = 0.144232/a0;
  337. double a3 = 0.012604/a0;
  338. double sv_step, cv_step, sv, cv;
  339. double sw_step, cw_step, sw, cw, w;
  340. tlength = av_expr_eval(tlength_expr, expr_vars_val, NULL);
  341. if (isnan(tlength)) {
  342. av_log(ctx, AV_LOG_WARNING, "at freq %g: tlength is nan, setting it to %g\n", freq, s->timeclamp);
  343. tlength = s->timeclamp;
  344. } else if (tlength < TLENGTH_MIN) {
  345. av_log(ctx, AV_LOG_WARNING, "at freq %g: tlength is %g, setting it to %g\n", freq, tlength, TLENGTH_MIN);
  346. tlength = TLENGTH_MIN;
  347. } else if (tlength > s->timeclamp) {
  348. av_log(ctx, AV_LOG_WARNING, "at freq %g: tlength is %g, setting it to %g\n", freq, tlength, s->timeclamp);
  349. tlength = s->timeclamp;
  350. }
  351. volume = FFABS(av_expr_eval(volume_expr, expr_vars_val, NULL));
  352. if (isnan(volume)) {
  353. av_log(ctx, AV_LOG_WARNING, "at freq %g: volume is nan, setting it to 0\n", freq);
  354. volume = VOLUME_MIN;
  355. } else if (volume < VOLUME_MIN) {
  356. volume = VOLUME_MIN;
  357. } else if (volume > VOLUME_MAX) {
  358. av_log(ctx, AV_LOG_WARNING, "at freq %g: volume is %g, setting it to %g\n", freq, volume, VOLUME_MAX);
  359. volume = VOLUME_MAX;
  360. }
  361. if (s->fullhd || !(k & 1)) {
  362. int fontcolor = av_expr_eval(fontcolor_expr, expr_vars_val, NULL);
  363. fontcolor_value[0] = (fontcolor >> 16) & 0xFF;
  364. fontcolor_value[1] = (fontcolor >> 8) & 0xFF;
  365. fontcolor_value[2] = fontcolor & 0xFF;
  366. fontcolor_value += 3;
  367. }
  368. tlen = tlength * rate;
  369. s->fft_data[0].re = 0;
  370. s->fft_data[0].im = 0;
  371. s->fft_data[hlen].re = (1.0 + a1 + a2 + a3) * (1.0/tlen) * volume * (1.0/fft_len);
  372. s->fft_data[hlen].im = 0;
  373. sv_step = sv = sin(2.0*M_PI*freq*(1.0/rate));
  374. cv_step = cv = cos(2.0*M_PI*freq*(1.0/rate));
  375. /* also optimizing window func */
  376. sw_step = sw = sin(2.0*M_PI*(1.0/tlen));
  377. cw_step = cw = cos(2.0*M_PI*(1.0/tlen));
  378. for (x = 1; x < 0.5 * tlen; x++) {
  379. double cv_tmp, cw_tmp;
  380. double cw2, cw3, sw2;
  381. cw2 = cw * cw - sw * sw;
  382. sw2 = cw * sw + sw * cw;
  383. cw3 = cw * cw2 - sw * sw2;
  384. w = (1.0 + a1 * cw + a2 * cw2 + a3 * cw3) * (1.0/tlen) * volume * (1.0/fft_len);
  385. s->fft_data[hlen + x].re = w * cv;
  386. s->fft_data[hlen + x].im = w * sv;
  387. s->fft_data[hlen - x].re = s->fft_data[hlen + x].re;
  388. s->fft_data[hlen - x].im = -s->fft_data[hlen + x].im;
  389. cv_tmp = cv * cv_step - sv * sv_step;
  390. sv = sv * cv_step + cv * sv_step;
  391. cv = cv_tmp;
  392. cw_tmp = cw * cw_step - sw * sw_step;
  393. sw = sw * cw_step + cw * sw_step;
  394. cw = cw_tmp;
  395. }
  396. for (; x < hlen; x++) {
  397. s->fft_data[hlen + x].re = 0;
  398. s->fft_data[hlen + x].im = 0;
  399. s->fft_data[hlen - x].re = 0;
  400. s->fft_data[hlen - x].im = 0;
  401. }
  402. av_fft_permute(s->fft_context, s->fft_data);
  403. av_fft_calc(s->fft_context, s->fft_data);
  404. for (x = 0; x < fft_len; x++) {
  405. s->coeff_sort[x].index = x;
  406. s->coeff_sort[x].value = s->fft_data[x].re;
  407. }
  408. AV_QSORT(s->coeff_sort, fft_len, SparseCoeff, qsort_sparsecoeff);
  409. for (x = 0; x < fft_len; x++)
  410. total += fabsf(s->coeff_sort[x].value);
  411. for (x = 0; x < fft_len; x++) {
  412. partial += fabsf(s->coeff_sort[x].value);
  413. if (partial > total * s->coeffclamp * COEFF_CLAMP) {
  414. s->coeffs_len[k] = fft_len - x;
  415. num_coeffs += s->coeffs_len[k];
  416. s->coeffs[k] = av_malloc_array(s->coeffs_len[k], sizeof(*s->coeffs[k]));
  417. if (!s->coeffs[k]) {
  418. ret = AVERROR(ENOMEM);
  419. goto eval_error;
  420. }
  421. for (y = 0; y < s->coeffs_len[k]; y++)
  422. s->coeffs[k][y] = s->coeff_sort[x+y];
  423. break;
  424. }
  425. }
  426. }
  427. av_expr_free(fontcolor_expr);
  428. av_expr_free(volume_expr);
  429. av_expr_free(tlength_expr);
  430. end_time = av_gettime_relative();
  431. av_log(ctx, AV_LOG_INFO, "Elapsed time %.6f s (fft_len=%u, num_coeffs=%u)\n", 1e-6 * (end_time-start_time), fft_len, num_coeffs);
  432. outlink->w = video_width;
  433. outlink->h = video_height;
  434. s->req_fullfilled = 0;
  435. s->spectogram_index = 0;
  436. s->frame_count = 0;
  437. s->spectogram_count = 0;
  438. s->remaining_fill = fft_len >> 1;
  439. memset(s->fft_data, 0, fft_len * sizeof(*s->fft_data));
  440. s->outpicref = ff_get_video_buffer(outlink, outlink->w, outlink->h);
  441. if (!s->outpicref)
  442. return AVERROR(ENOMEM);
  443. s->spectogram = av_calloc(spectogram_height, s->outpicref->linesize[0]);
  444. if (!s->spectogram)
  445. return AVERROR(ENOMEM);
  446. outlink->sample_aspect_ratio = av_make_q(1, 1);
  447. outlink->time_base = av_make_q(1, s->fps);
  448. outlink->frame_rate = av_make_q(s->fps, 1);
  449. return 0;
  450. eval_error:
  451. av_expr_free(fontcolor_expr);
  452. av_expr_free(volume_expr);
  453. av_expr_free(tlength_expr);
  454. return ret;
  455. }
  456. static int plot_cqt(AVFilterLink *inlink)
  457. {
  458. AVFilterContext *ctx = inlink->dst;
  459. ShowCQTContext *s = ctx->priv;
  460. AVFilterLink *outlink = ctx->outputs[0];
  461. int fft_len = 1 << s->fft_bits;
  462. FFTSample result[VIDEO_WIDTH][4];
  463. int x, y, ret = 0;
  464. int linesize = s->outpicref->linesize[0];
  465. int video_scale = s->fullhd ? 2 : 1;
  466. int video_width = (VIDEO_WIDTH/2) * video_scale;
  467. int spectogram_height = (SPECTOGRAM_HEIGHT/2) * video_scale;
  468. int spectogram_start = (SPECTOGRAM_START/2) * video_scale;
  469. int font_height = (FONT_HEIGHT/2) * video_scale;
  470. /* real part contains left samples, imaginary part contains right samples */
  471. memcpy(s->fft_result_left, s->fft_data, fft_len * sizeof(*s->fft_data));
  472. av_fft_permute(s->fft_context, s->fft_result_left);
  473. av_fft_calc(s->fft_context, s->fft_result_left);
  474. /* separate left and right, (and multiply by 2.0) */
  475. s->fft_result_right[0].re = 2.0f * s->fft_result_left[0].im;
  476. s->fft_result_right[0].im = 0;
  477. s->fft_result_left[0].re = 2.0f * s->fft_result_left[0].re;
  478. s->fft_result_left[0].im = 0;
  479. for (x = 1; x <= fft_len >> 1; x++) {
  480. FFTSample tmpy = s->fft_result_left[fft_len-x].im - s->fft_result_left[x].im;
  481. s->fft_result_right[x].re = s->fft_result_left[x].im + s->fft_result_left[fft_len-x].im;
  482. s->fft_result_right[x].im = s->fft_result_left[x].re - s->fft_result_left[fft_len-x].re;
  483. s->fft_result_right[fft_len-x].re = s->fft_result_right[x].re;
  484. s->fft_result_right[fft_len-x].im = -s->fft_result_right[x].im;
  485. s->fft_result_left[x].re = s->fft_result_left[x].re + s->fft_result_left[fft_len-x].re;
  486. s->fft_result_left[x].im = tmpy;
  487. s->fft_result_left[fft_len-x].re = s->fft_result_left[x].re;
  488. s->fft_result_left[fft_len-x].im = -s->fft_result_left[x].im;
  489. }
  490. /* calculating cqt */
  491. for (x = 0; x < VIDEO_WIDTH; x++) {
  492. int u;
  493. float g = 1.0f / s->gamma;
  494. FFTComplex l = {0,0};
  495. FFTComplex r = {0,0};
  496. for (u = 0; u < s->coeffs_len[x]; u++) {
  497. FFTSample value = s->coeffs[x][u].value;
  498. int index = s->coeffs[x][u].index;
  499. l.re += value * s->fft_result_left[index].re;
  500. l.im += value * s->fft_result_left[index].im;
  501. r.re += value * s->fft_result_right[index].re;
  502. r.im += value * s->fft_result_right[index].im;
  503. }
  504. /* result is power, not amplitude */
  505. result[x][0] = l.re * l.re + l.im * l.im;
  506. result[x][2] = r.re * r.re + r.im * r.im;
  507. result[x][1] = 0.5f * (result[x][0] + result[x][2]);
  508. result[x][3] = result[x][1];
  509. result[x][0] = 255.0f * powf(FFMIN(1.0f,result[x][0]), g);
  510. result[x][1] = 255.0f * powf(FFMIN(1.0f,result[x][1]), g);
  511. result[x][2] = 255.0f * powf(FFMIN(1.0f,result[x][2]), g);
  512. }
  513. if (!s->fullhd) {
  514. for (x = 0; x < video_width; x++) {
  515. result[x][0] = 0.5f * (result[2*x][0] + result[2*x+1][0]);
  516. result[x][1] = 0.5f * (result[2*x][1] + result[2*x+1][1]);
  517. result[x][2] = 0.5f * (result[2*x][2] + result[2*x+1][2]);
  518. result[x][3] = 0.5f * (result[2*x][3] + result[2*x+1][3]);
  519. }
  520. }
  521. for (x = 0; x < video_width; x++) {
  522. s->spectogram[s->spectogram_index*linesize + 3*x] = result[x][0] + 0.5f;
  523. s->spectogram[s->spectogram_index*linesize + 3*x + 1] = result[x][1] + 0.5f;
  524. s->spectogram[s->spectogram_index*linesize + 3*x + 2] = result[x][2] + 0.5f;
  525. }
  526. /* drawing */
  527. if (!s->spectogram_count) {
  528. uint8_t *data = (uint8_t*) s->outpicref->data[0];
  529. float rcp_result[VIDEO_WIDTH];
  530. int total_length = linesize * spectogram_height;
  531. int back_length = linesize * s->spectogram_index;
  532. for (x = 0; x < video_width; x++)
  533. rcp_result[x] = 1.0f / (result[x][3]+0.0001f);
  534. /* drawing bar */
  535. for (y = 0; y < spectogram_height; y++) {
  536. float height = (spectogram_height - y) * (1.0f/spectogram_height);
  537. uint8_t *lineptr = data + y * linesize;
  538. for (x = 0; x < video_width; x++) {
  539. float mul;
  540. if (result[x][3] <= height) {
  541. *lineptr++ = 0;
  542. *lineptr++ = 0;
  543. *lineptr++ = 0;
  544. } else {
  545. mul = (result[x][3] - height) * rcp_result[x];
  546. *lineptr++ = mul * result[x][0] + 0.5f;
  547. *lineptr++ = mul * result[x][1] + 0.5f;
  548. *lineptr++ = mul * result[x][2] + 0.5f;
  549. }
  550. }
  551. }
  552. /* drawing font */
  553. if (s->font_alpha) {
  554. for (y = 0; y < font_height; y++) {
  555. uint8_t *lineptr = data + (spectogram_height + y) * linesize;
  556. uint8_t *spectogram_src = s->spectogram + s->spectogram_index * linesize;
  557. uint8_t *fontcolor_value = s->fontcolor_value;
  558. for (x = 0; x < video_width; x++) {
  559. uint8_t alpha = s->font_alpha[y*video_width+x];
  560. lineptr[3*x] = (spectogram_src[3*x] * (255-alpha) + fontcolor_value[0] * alpha + 255) >> 8;
  561. lineptr[3*x+1] = (spectogram_src[3*x+1] * (255-alpha) + fontcolor_value[1] * alpha + 255) >> 8;
  562. lineptr[3*x+2] = (spectogram_src[3*x+2] * (255-alpha) + fontcolor_value[2] * alpha + 255) >> 8;
  563. fontcolor_value += 3;
  564. }
  565. }
  566. } else {
  567. for (y = 0; y < font_height; y++) {
  568. uint8_t *lineptr = data + (spectogram_height + y) * linesize;
  569. memcpy(lineptr, s->spectogram + s->spectogram_index * linesize, video_width*3);
  570. }
  571. for (x = 0; x < video_width; x += video_width/10) {
  572. int u;
  573. static const char str[] = "EF G A BC D ";
  574. uint8_t *startptr = data + spectogram_height * linesize + x * 3;
  575. for (u = 0; str[u]; u++) {
  576. int v;
  577. for (v = 0; v < 16; v++) {
  578. uint8_t *p = startptr + v * linesize * video_scale + 8 * 3 * u * video_scale;
  579. int ux = x + 8 * u * video_scale;
  580. int mask;
  581. for (mask = 0x80; mask; mask >>= 1) {
  582. if (mask & avpriv_vga16_font[str[u] * 16 + v]) {
  583. p[0] = s->fontcolor_value[3*ux];
  584. p[1] = s->fontcolor_value[3*ux+1];
  585. p[2] = s->fontcolor_value[3*ux+2];
  586. if (video_scale == 2) {
  587. p[linesize] = p[0];
  588. p[linesize+1] = p[1];
  589. p[linesize+2] = p[2];
  590. p[3] = p[linesize+3] = s->fontcolor_value[3*ux+3];
  591. p[4] = p[linesize+4] = s->fontcolor_value[3*ux+4];
  592. p[5] = p[linesize+5] = s->fontcolor_value[3*ux+5];
  593. }
  594. }
  595. p += 3 * video_scale;
  596. ux += video_scale;
  597. }
  598. }
  599. }
  600. }
  601. }
  602. /* drawing spectogram/sonogram */
  603. data += spectogram_start * linesize;
  604. memcpy(data, s->spectogram + s->spectogram_index*linesize, total_length - back_length);
  605. data += total_length - back_length;
  606. if (back_length)
  607. memcpy(data, s->spectogram, back_length);
  608. s->outpicref->pts = s->frame_count;
  609. ret = ff_filter_frame(outlink, av_frame_clone(s->outpicref));
  610. s->req_fullfilled = 1;
  611. s->frame_count++;
  612. }
  613. s->spectogram_count = (s->spectogram_count + 1) % s->count;
  614. s->spectogram_index = (s->spectogram_index + spectogram_height - 1) % spectogram_height;
  615. return ret;
  616. }
  617. static int filter_frame(AVFilterLink *inlink, AVFrame *insamples)
  618. {
  619. AVFilterContext *ctx = inlink->dst;
  620. ShowCQTContext *s = ctx->priv;
  621. int step = inlink->sample_rate / (s->fps * s->count);
  622. int fft_len = 1 << s->fft_bits;
  623. int remaining;
  624. float *audio_data;
  625. if (!insamples) {
  626. while (s->remaining_fill < (fft_len >> 1)) {
  627. int ret, x;
  628. memset(&s->fft_data[fft_len - s->remaining_fill], 0, sizeof(*s->fft_data) * s->remaining_fill);
  629. ret = plot_cqt(inlink);
  630. if (ret < 0)
  631. return ret;
  632. for (x = 0; x < (fft_len-step); x++)
  633. s->fft_data[x] = s->fft_data[x+step];
  634. s->remaining_fill += step;
  635. }
  636. return AVERROR(EOF);
  637. }
  638. remaining = insamples->nb_samples;
  639. audio_data = (float*) insamples->data[0];
  640. while (remaining) {
  641. if (remaining >= s->remaining_fill) {
  642. int i = insamples->nb_samples - remaining;
  643. int j = fft_len - s->remaining_fill;
  644. int m, ret;
  645. for (m = 0; m < s->remaining_fill; m++) {
  646. s->fft_data[j+m].re = audio_data[2*(i+m)];
  647. s->fft_data[j+m].im = audio_data[2*(i+m)+1];
  648. }
  649. ret = plot_cqt(inlink);
  650. if (ret < 0) {
  651. av_frame_free(&insamples);
  652. return ret;
  653. }
  654. remaining -= s->remaining_fill;
  655. for (m = 0; m < fft_len-step; m++)
  656. s->fft_data[m] = s->fft_data[m+step];
  657. s->remaining_fill = step;
  658. } else {
  659. int i = insamples->nb_samples - remaining;
  660. int j = fft_len - s->remaining_fill;
  661. int m;
  662. for (m = 0; m < remaining; m++) {
  663. s->fft_data[m+j].re = audio_data[2*(i+m)];
  664. s->fft_data[m+j].im = audio_data[2*(i+m)+1];
  665. }
  666. s->remaining_fill -= remaining;
  667. remaining = 0;
  668. }
  669. }
  670. av_frame_free(&insamples);
  671. return 0;
  672. }
  673. static int request_frame(AVFilterLink *outlink)
  674. {
  675. ShowCQTContext *s = outlink->src->priv;
  676. AVFilterLink *inlink = outlink->src->inputs[0];
  677. int ret;
  678. s->req_fullfilled = 0;
  679. do {
  680. ret = ff_request_frame(inlink);
  681. } while (!s->req_fullfilled && ret >= 0);
  682. if (ret == AVERROR_EOF && s->outpicref)
  683. filter_frame(inlink, NULL);
  684. return ret;
  685. }
  686. static const AVFilterPad showcqt_inputs[] = {
  687. {
  688. .name = "default",
  689. .type = AVMEDIA_TYPE_AUDIO,
  690. .filter_frame = filter_frame,
  691. },
  692. { NULL }
  693. };
  694. static const AVFilterPad showcqt_outputs[] = {
  695. {
  696. .name = "default",
  697. .type = AVMEDIA_TYPE_VIDEO,
  698. .config_props = config_output,
  699. .request_frame = request_frame,
  700. },
  701. { NULL }
  702. };
  703. AVFilter ff_avf_showcqt = {
  704. .name = "showcqt",
  705. .description = NULL_IF_CONFIG_SMALL("Convert input audio to a CQT (Constant Q Transform) spectrum video output."),
  706. .uninit = uninit,
  707. .query_formats = query_formats,
  708. .priv_size = sizeof(ShowCQTContext),
  709. .inputs = showcqt_inputs,
  710. .outputs = showcqt_outputs,
  711. .priv_class = &showcqt_class,
  712. };