af_loudnorm.c 31 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920
  1. /*
  2. * Copyright (c) 2016 Kyle Swanson <k@ylo.ph>.
  3. *
  4. * This file is part of FFmpeg.
  5. *
  6. * FFmpeg is free software; you can redistribute it and/or
  7. * modify it under the terms of the GNU Lesser General Public
  8. * License as published by the Free Software Foundation; either
  9. * version 2.1 of the License, or (at your option) any later version.
  10. *
  11. * FFmpeg is distributed in the hope that it will be useful,
  12. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  14. * Lesser General Public License for more details.
  15. *
  16. * You should have received a copy of the GNU Lesser General Public
  17. * License along with FFmpeg; if not, write to the Free Software
  18. * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  19. */
  20. /* http://k.ylo.ph/2016/04/04/loudnorm.html */
  21. #include "libavutil/opt.h"
  22. #include "avfilter.h"
  23. #include "internal.h"
  24. #include "audio.h"
  25. #include <ebur128.h>
  26. enum FrameType {
  27. FIRST_FRAME,
  28. INNER_FRAME,
  29. FINAL_FRAME,
  30. LINEAR_MODE,
  31. FRAME_NB
  32. };
  33. enum LimiterState {
  34. OUT,
  35. ATTACK,
  36. SUSTAIN,
  37. RELEASE,
  38. STATE_NB
  39. };
  40. enum PrintFormat {
  41. NONE,
  42. JSON,
  43. SUMMARY,
  44. PF_NB
  45. };
  46. typedef struct LoudNormContext {
  47. const AVClass *class;
  48. double target_i;
  49. double target_lra;
  50. double target_tp;
  51. double measured_i;
  52. double measured_lra;
  53. double measured_tp;
  54. double measured_thresh;
  55. double offset;
  56. int linear;
  57. int dual_mono;
  58. enum PrintFormat print_format;
  59. double *buf;
  60. int buf_size;
  61. int buf_index;
  62. int prev_buf_index;
  63. double delta[30];
  64. double weights[21];
  65. double prev_delta;
  66. int index;
  67. double gain_reduction[2];
  68. double *limiter_buf;
  69. double *prev_smp;
  70. int limiter_buf_index;
  71. int limiter_buf_size;
  72. enum LimiterState limiter_state;
  73. int peak_index;
  74. int env_index;
  75. int env_cnt;
  76. int attack_length;
  77. int release_length;
  78. int64_t pts;
  79. enum FrameType frame_type;
  80. int above_threshold;
  81. int prev_nb_samples;
  82. int channels;
  83. ebur128_state *r128_in;
  84. ebur128_state *r128_out;
  85. } LoudNormContext;
  86. #define OFFSET(x) offsetof(LoudNormContext, x)
  87. #define FLAGS AV_OPT_FLAG_AUDIO_PARAM|AV_OPT_FLAG_FILTERING_PARAM
  88. static const AVOption loudnorm_options[] = {
  89. { "I", "set integrated loudness target", OFFSET(target_i), AV_OPT_TYPE_DOUBLE, {.dbl = -24.}, -70., -5., FLAGS },
  90. { "i", "set integrated loudness target", OFFSET(target_i), AV_OPT_TYPE_DOUBLE, {.dbl = -24.}, -70., -5., FLAGS },
  91. { "LRA", "set loudness range target", OFFSET(target_lra), AV_OPT_TYPE_DOUBLE, {.dbl = 7.}, 1., 20., FLAGS },
  92. { "lra", "set loudness range target", OFFSET(target_lra), AV_OPT_TYPE_DOUBLE, {.dbl = 7.}, 1., 20., FLAGS },
  93. { "TP", "set maximum true peak", OFFSET(target_tp), AV_OPT_TYPE_DOUBLE, {.dbl = -2.}, -9., 0., FLAGS },
  94. { "tp", "set maximum true peak", OFFSET(target_tp), AV_OPT_TYPE_DOUBLE, {.dbl = -2.}, -9., 0., FLAGS },
  95. { "measured_I", "measured IL of input file", OFFSET(measured_i), AV_OPT_TYPE_DOUBLE, {.dbl = 0.}, -99., 0., FLAGS },
  96. { "measured_i", "measured IL of input file", OFFSET(measured_i), AV_OPT_TYPE_DOUBLE, {.dbl = 0.}, -99., 0., FLAGS },
  97. { "measured_LRA", "measured LRA of input file", OFFSET(measured_lra), AV_OPT_TYPE_DOUBLE, {.dbl = 0.}, 0., 99., FLAGS },
  98. { "measured_lra", "measured LRA of input file", OFFSET(measured_lra), AV_OPT_TYPE_DOUBLE, {.dbl = 0.}, 0., 99., FLAGS },
  99. { "measured_TP", "measured true peak of input file", OFFSET(measured_tp), AV_OPT_TYPE_DOUBLE, {.dbl = 99.}, -99., 99., FLAGS },
  100. { "measured_tp", "measured true peak of input file", OFFSET(measured_tp), AV_OPT_TYPE_DOUBLE, {.dbl = 99.}, -99., 99., FLAGS },
  101. { "measured_thresh", "measured threshold of input file", OFFSET(measured_thresh), AV_OPT_TYPE_DOUBLE, {.dbl = -70.}, -99., 0., FLAGS },
  102. { "offset", "set offset gain", OFFSET(offset), AV_OPT_TYPE_DOUBLE, {.dbl = 0.}, -99., 99., FLAGS },
  103. { "linear", "normalize linearly if possible", OFFSET(linear), AV_OPT_TYPE_BOOL, {.i64 = 1}, 0, 1, FLAGS },
  104. { "dual_mono", "treat mono input as dual-mono", OFFSET(dual_mono), AV_OPT_TYPE_BOOL, {.i64 = 0}, 0, 1, FLAGS },
  105. { "print_format", "set print format for stats", OFFSET(print_format), AV_OPT_TYPE_INT, {.i64 = NONE}, NONE, PF_NB -1, FLAGS, "print_format" },
  106. { "none", 0, 0, AV_OPT_TYPE_CONST, {.i64 = NONE}, 0, 0, FLAGS, "print_format" },
  107. { "json", 0, 0, AV_OPT_TYPE_CONST, {.i64 = JSON}, 0, 0, FLAGS, "print_format" },
  108. { "summary", 0, 0, AV_OPT_TYPE_CONST, {.i64 = SUMMARY}, 0, 0, FLAGS, "print_format" },
  109. { NULL }
  110. };
  111. AVFILTER_DEFINE_CLASS(loudnorm);
  112. static inline int frame_size(int sample_rate, int frame_len_msec)
  113. {
  114. const int frame_size = round((double)sample_rate * (frame_len_msec / 1000.0));
  115. return frame_size + (frame_size % 2);
  116. }
  117. static void init_gaussian_filter(LoudNormContext *s)
  118. {
  119. double total_weight = 0.0;
  120. const double sigma = 3.5;
  121. double adjust;
  122. int i;
  123. const int offset = 21 / 2;
  124. const double c1 = 1.0 / (sigma * sqrt(2.0 * M_PI));
  125. const double c2 = 2.0 * pow(sigma, 2.0);
  126. for (i = 0; i < 21; i++) {
  127. const int x = i - offset;
  128. s->weights[i] = c1 * exp(-(pow(x, 2.0) / c2));
  129. total_weight += s->weights[i];
  130. }
  131. adjust = 1.0 / total_weight;
  132. for (i = 0; i < 21; i++)
  133. s->weights[i] *= adjust;
  134. }
  135. static double gaussian_filter(LoudNormContext *s, int index)
  136. {
  137. double result = 0.;
  138. int i;
  139. index = index - 10 > 0 ? index - 10 : index + 20;
  140. for (i = 0; i < 21; i++)
  141. result += s->delta[((index + i) < 30) ? (index + i) : (index + i - 30)] * s->weights[i];
  142. return result;
  143. }
  144. static void detect_peak(LoudNormContext *s, int offset, int nb_samples, int channels, int *peak_delta, double *peak_value)
  145. {
  146. int n, c, i, index;
  147. double ceiling;
  148. double *buf;
  149. *peak_delta = -1;
  150. buf = s->limiter_buf;
  151. ceiling = s->target_tp;
  152. index = s->limiter_buf_index + (offset * channels) + (1920 * channels);
  153. if (index >= s->limiter_buf_size)
  154. index -= s->limiter_buf_size;
  155. if (s->frame_type == FIRST_FRAME) {
  156. for (c = 0; c < channels; c++)
  157. s->prev_smp[c] = fabs(buf[index + c - channels]);
  158. }
  159. for (n = 0; n < nb_samples; n++) {
  160. for (c = 0; c < channels; c++) {
  161. double this, next, max_peak;
  162. this = fabs(buf[(index + c) < s->limiter_buf_size ? (index + c) : (index + c - s->limiter_buf_size)]);
  163. next = fabs(buf[(index + c + channels) < s->limiter_buf_size ? (index + c + channels) : (index + c + channels - s->limiter_buf_size)]);
  164. if ((s->prev_smp[c] <= this) && (next <= this) && (this > ceiling) && (n > 0)) {
  165. int detected;
  166. detected = 1;
  167. for (i = 2; i < 12; i++) {
  168. next = fabs(buf[(index + c + (i * channels)) < s->limiter_buf_size ? (index + c + (i * channels)) : (index + c + (i * channels) - s->limiter_buf_size)]);
  169. if (next > this) {
  170. detected = 0;
  171. break;
  172. }
  173. }
  174. if (!detected)
  175. continue;
  176. for (c = 0; c < channels; c++) {
  177. if (c == 0 || fabs(buf[index + c]) > max_peak)
  178. max_peak = fabs(buf[index + c]);
  179. s->prev_smp[c] = fabs(buf[(index + c) < s->limiter_buf_size ? (index + c) : (index + c - s->limiter_buf_size)]);
  180. }
  181. *peak_delta = n;
  182. s->peak_index = index;
  183. *peak_value = max_peak;
  184. return;
  185. }
  186. s->prev_smp[c] = this;
  187. }
  188. index += channels;
  189. if (index >= s->limiter_buf_size)
  190. index -= s->limiter_buf_size;
  191. }
  192. }
  193. static void true_peak_limiter(LoudNormContext *s, double *out, int nb_samples, int channels)
  194. {
  195. int n, c, index, peak_delta, smp_cnt;
  196. double ceiling, peak_value;
  197. double *buf;
  198. buf = s->limiter_buf;
  199. ceiling = s->target_tp;
  200. index = s->limiter_buf_index;
  201. smp_cnt = 0;
  202. if (s->frame_type == FIRST_FRAME) {
  203. double max;
  204. max = 0.;
  205. for (n = 0; n < 1920; n++) {
  206. for (c = 0; c < channels; c++) {
  207. max = fabs(buf[c]) > max ? fabs(buf[c]) : max;
  208. }
  209. buf += channels;
  210. }
  211. if (max > ceiling) {
  212. s->gain_reduction[1] = ceiling / max;
  213. s->limiter_state = SUSTAIN;
  214. buf = s->limiter_buf;
  215. for (n = 0; n < 1920; n++) {
  216. for (c = 0; c < channels; c++) {
  217. double env;
  218. env = s->gain_reduction[1];
  219. buf[c] *= env;
  220. }
  221. buf += channels;
  222. }
  223. }
  224. buf = s->limiter_buf;
  225. }
  226. do {
  227. switch(s->limiter_state) {
  228. case OUT:
  229. detect_peak(s, smp_cnt, nb_samples - smp_cnt, channels, &peak_delta, &peak_value);
  230. if (peak_delta != -1) {
  231. s->env_cnt = 0;
  232. smp_cnt += (peak_delta - s->attack_length);
  233. s->gain_reduction[0] = 1.;
  234. s->gain_reduction[1] = ceiling / peak_value;
  235. s->limiter_state = ATTACK;
  236. s->env_index = s->peak_index - (s->attack_length * channels);
  237. if (s->env_index < 0)
  238. s->env_index += s->limiter_buf_size;
  239. s->env_index += (s->env_cnt * channels);
  240. if (s->env_index > s->limiter_buf_size)
  241. s->env_index -= s->limiter_buf_size;
  242. } else {
  243. smp_cnt = nb_samples;
  244. }
  245. break;
  246. case ATTACK:
  247. for (; s->env_cnt < s->attack_length; s->env_cnt++) {
  248. for (c = 0; c < channels; c++) {
  249. double env;
  250. env = s->gain_reduction[0] - ((double) s->env_cnt / (s->attack_length - 1) * (s->gain_reduction[0] - s->gain_reduction[1]));
  251. buf[s->env_index + c] *= env;
  252. }
  253. s->env_index += channels;
  254. if (s->env_index >= s->limiter_buf_size)
  255. s->env_index -= s->limiter_buf_size;
  256. smp_cnt++;
  257. if (smp_cnt >= nb_samples) {
  258. s->env_cnt++;
  259. break;
  260. }
  261. }
  262. if (smp_cnt < nb_samples) {
  263. s->env_cnt = 0;
  264. s->attack_length = 1920;
  265. s->limiter_state = SUSTAIN;
  266. }
  267. break;
  268. case SUSTAIN:
  269. detect_peak(s, smp_cnt, nb_samples, channels, &peak_delta, &peak_value);
  270. if (peak_delta == -1) {
  271. s->limiter_state = RELEASE;
  272. s->gain_reduction[0] = s->gain_reduction[1];
  273. s->gain_reduction[1] = 1.;
  274. s->env_cnt = 0;
  275. break;
  276. } else {
  277. double gain_reduction;
  278. gain_reduction = ceiling / peak_value;
  279. if (gain_reduction < s->gain_reduction[1]) {
  280. s->limiter_state = ATTACK;
  281. s->attack_length = peak_delta;
  282. if (s->attack_length <= 1)
  283. s->attack_length = 2;
  284. s->gain_reduction[0] = s->gain_reduction[1];
  285. s->gain_reduction[1] = gain_reduction;
  286. s->env_cnt = 0;
  287. break;
  288. }
  289. for (s->env_cnt = 0; s->env_cnt < peak_delta; s->env_cnt++) {
  290. for (c = 0; c < channels; c++) {
  291. double env;
  292. env = s->gain_reduction[1];
  293. buf[s->env_index + c] *= env;
  294. }
  295. s->env_index += channels;
  296. if (s->env_index >= s->limiter_buf_size)
  297. s->env_index -= s->limiter_buf_size;
  298. smp_cnt++;
  299. if (smp_cnt >= nb_samples) {
  300. s->env_cnt++;
  301. break;
  302. }
  303. }
  304. }
  305. break;
  306. case RELEASE:
  307. for (; s->env_cnt < s->release_length; s->env_cnt++) {
  308. for (c = 0; c < channels; c++) {
  309. double env;
  310. env = s->gain_reduction[0] + (((double) s->env_cnt / (s->release_length - 1)) * (s->gain_reduction[1] - s->gain_reduction[0]));
  311. buf[s->env_index + c] *= env;
  312. }
  313. s->env_index += channels;
  314. if (s->env_index >= s->limiter_buf_size)
  315. s->env_index -= s->limiter_buf_size;
  316. smp_cnt++;
  317. if (smp_cnt >= nb_samples) {
  318. s->env_cnt++;
  319. break;
  320. }
  321. }
  322. if (smp_cnt < nb_samples) {
  323. s->env_cnt = 0;
  324. s->limiter_state = OUT;
  325. }
  326. break;
  327. }
  328. } while (smp_cnt < nb_samples);
  329. for (n = 0; n < nb_samples; n++) {
  330. for (c = 0; c < channels; c++) {
  331. out[c] = buf[index + c];
  332. if (fabs(out[c]) > ceiling) {
  333. out[c] = ceiling * (out[c] < 0 ? -1 : 1);
  334. }
  335. }
  336. out += channels;
  337. index += channels;
  338. if (index >= s->limiter_buf_size)
  339. index -= s->limiter_buf_size;
  340. }
  341. }
  342. static int filter_frame(AVFilterLink *inlink, AVFrame *in)
  343. {
  344. AVFilterContext *ctx = inlink->dst;
  345. LoudNormContext *s = ctx->priv;
  346. AVFilterLink *outlink = ctx->outputs[0];
  347. AVFrame *out;
  348. const double *src;
  349. double *dst;
  350. double *buf;
  351. double *limiter_buf;
  352. int i, n, c, subframe_length, src_index;
  353. double gain, gain_next, env_global, env_shortterm,
  354. global, shortterm, lra, relative_threshold;
  355. if (av_frame_is_writable(in)) {
  356. out = in;
  357. } else {
  358. out = ff_get_audio_buffer(inlink, in->nb_samples);
  359. if (!out) {
  360. av_frame_free(&in);
  361. return AVERROR(ENOMEM);
  362. }
  363. av_frame_copy_props(out, in);
  364. }
  365. out->pts = s->pts;
  366. src = (const double *)in->data[0];
  367. dst = (double *)out->data[0];
  368. buf = s->buf;
  369. limiter_buf = s->limiter_buf;
  370. ebur128_add_frames_double(s->r128_in, src, in->nb_samples);
  371. if (s->frame_type == FIRST_FRAME && in->nb_samples < frame_size(inlink->sample_rate, 3000)) {
  372. double offset, offset_tp, true_peak;
  373. ebur128_loudness_global(s->r128_in, &global);
  374. for (c = 0; c < inlink->channels; c++) {
  375. double tmp;
  376. ebur128_sample_peak(s->r128_in, c, &tmp);
  377. if (c == 0 || tmp > true_peak)
  378. true_peak = tmp;
  379. }
  380. offset = s->target_i - global;
  381. offset_tp = true_peak + offset;
  382. s->offset = offset_tp < s->target_tp ? offset : s->target_tp - true_peak;
  383. s->offset = pow(10., s->offset / 20.);
  384. s->frame_type = LINEAR_MODE;
  385. }
  386. switch (s->frame_type) {
  387. case FIRST_FRAME:
  388. for (n = 0; n < in->nb_samples; n++) {
  389. for (c = 0; c < inlink->channels; c++) {
  390. buf[s->buf_index + c] = src[c];
  391. }
  392. src += inlink->channels;
  393. s->buf_index += inlink->channels;
  394. }
  395. ebur128_loudness_shortterm(s->r128_in, &shortterm);
  396. if (shortterm < s->measured_thresh) {
  397. s->above_threshold = 0;
  398. env_shortterm = shortterm <= -70. ? 0. : s->target_i - s->measured_i;
  399. } else {
  400. s->above_threshold = 1;
  401. env_shortterm = shortterm <= -70. ? 0. : s->target_i - shortterm;
  402. }
  403. for (n = 0; n < 30; n++)
  404. s->delta[n] = pow(10., env_shortterm / 20.);
  405. s->prev_delta = s->delta[s->index];
  406. s->buf_index =
  407. s->limiter_buf_index = 0;
  408. for (n = 0; n < (s->limiter_buf_size / inlink->channels); n++) {
  409. for (c = 0; c < inlink->channels; c++) {
  410. limiter_buf[s->limiter_buf_index + c] = buf[s->buf_index + c] * s->delta[s->index] * s->offset;
  411. }
  412. s->limiter_buf_index += inlink->channels;
  413. if (s->limiter_buf_index >= s->limiter_buf_size)
  414. s->limiter_buf_index -= s->limiter_buf_size;
  415. s->buf_index += inlink->channels;
  416. }
  417. subframe_length = frame_size(inlink->sample_rate, 100);
  418. true_peak_limiter(s, dst, subframe_length, inlink->channels);
  419. ebur128_add_frames_double(s->r128_out, dst, subframe_length);
  420. s->pts +=
  421. out->nb_samples =
  422. inlink->min_samples =
  423. inlink->max_samples =
  424. inlink->partial_buf_size = subframe_length;
  425. s->frame_type = INNER_FRAME;
  426. break;
  427. case INNER_FRAME:
  428. gain = gaussian_filter(s, s->index + 10 < 30 ? s->index + 10 : s->index + 10 - 30);
  429. gain_next = gaussian_filter(s, s->index + 11 < 30 ? s->index + 11 : s->index + 11 - 30);
  430. for (n = 0; n < in->nb_samples; n++) {
  431. for (c = 0; c < inlink->channels; c++) {
  432. buf[s->prev_buf_index + c] = src[c];
  433. limiter_buf[s->limiter_buf_index + c] = buf[s->buf_index + c] * (gain + (((double) n / in->nb_samples) * (gain_next - gain))) * s->offset;
  434. }
  435. src += inlink->channels;
  436. s->limiter_buf_index += inlink->channels;
  437. if (s->limiter_buf_index >= s->limiter_buf_size)
  438. s->limiter_buf_index -= s->limiter_buf_size;
  439. s->prev_buf_index += inlink->channels;
  440. if (s->prev_buf_index >= s->buf_size)
  441. s->prev_buf_index -= s->buf_size;
  442. s->buf_index += inlink->channels;
  443. if (s->buf_index >= s->buf_size)
  444. s->buf_index -= s->buf_size;
  445. }
  446. subframe_length = (frame_size(inlink->sample_rate, 100) - in->nb_samples) * inlink->channels;
  447. s->limiter_buf_index = s->limiter_buf_index + subframe_length < s->limiter_buf_size ? s->limiter_buf_index + subframe_length : s->limiter_buf_index + subframe_length - s->limiter_buf_size;
  448. true_peak_limiter(s, dst, in->nb_samples, inlink->channels);
  449. ebur128_add_frames_double(s->r128_out, dst, in->nb_samples);
  450. ebur128_loudness_range(s->r128_in, &lra);
  451. ebur128_loudness_global(s->r128_in, &global);
  452. ebur128_loudness_shortterm(s->r128_in, &shortterm);
  453. ebur128_relative_threshold(s->r128_in, &relative_threshold);
  454. if (s->above_threshold == 0) {
  455. double shortterm_out;
  456. if (shortterm > s->measured_thresh)
  457. s->prev_delta *= 1.0058;
  458. ebur128_loudness_shortterm(s->r128_out, &shortterm_out);
  459. if (shortterm_out >= s->target_i)
  460. s->above_threshold = 1;
  461. }
  462. if (shortterm < relative_threshold || shortterm <= -70. || s->above_threshold == 0) {
  463. s->delta[s->index] = s->prev_delta;
  464. } else {
  465. env_global = fabs(shortterm - global) < (s->target_lra / 2.) ? shortterm - global : (s->target_lra / 2.) * ((shortterm - global) < 0 ? -1 : 1);
  466. env_shortterm = s->target_i - shortterm;
  467. s->delta[s->index] = pow(10., (env_global + env_shortterm) / 20.);
  468. }
  469. s->prev_delta = s->delta[s->index];
  470. s->index++;
  471. if (s->index >= 30)
  472. s->index -= 30;
  473. s->prev_nb_samples = in->nb_samples;
  474. s->pts += in->nb_samples;
  475. break;
  476. case FINAL_FRAME:
  477. gain = gaussian_filter(s, s->index + 10 < 30 ? s->index + 10 : s->index + 10 - 30);
  478. s->limiter_buf_index = 0;
  479. src_index = 0;
  480. for (n = 0; n < s->limiter_buf_size / inlink->channels; n++) {
  481. for (c = 0; c < inlink->channels; c++) {
  482. s->limiter_buf[s->limiter_buf_index + c] = src[src_index + c] * gain * s->offset;
  483. }
  484. src_index += inlink->channels;
  485. s->limiter_buf_index += inlink->channels;
  486. if (s->limiter_buf_index >= s->limiter_buf_size)
  487. s->limiter_buf_index -= s->limiter_buf_size;
  488. }
  489. subframe_length = frame_size(inlink->sample_rate, 100);
  490. for (i = 0; i < in->nb_samples / subframe_length; i++) {
  491. true_peak_limiter(s, dst, subframe_length, inlink->channels);
  492. for (n = 0; n < subframe_length; n++) {
  493. for (c = 0; c < inlink->channels; c++) {
  494. if (src_index < (in->nb_samples * inlink->channels)) {
  495. limiter_buf[s->limiter_buf_index + c] = src[src_index + c] * gain * s->offset;
  496. } else {
  497. limiter_buf[s->limiter_buf_index + c] = 0.;
  498. }
  499. }
  500. if (src_index < (in->nb_samples * inlink->channels))
  501. src_index += inlink->channels;
  502. s->limiter_buf_index += inlink->channels;
  503. if (s->limiter_buf_index >= s->limiter_buf_size)
  504. s->limiter_buf_index -= s->limiter_buf_size;
  505. }
  506. dst += (subframe_length * inlink->channels);
  507. }
  508. dst = (double *)out->data[0];
  509. ebur128_add_frames_double(s->r128_out, dst, in->nb_samples);
  510. break;
  511. case LINEAR_MODE:
  512. for (n = 0; n < in->nb_samples; n++) {
  513. for (c = 0; c < inlink->channels; c++) {
  514. dst[c] = src[c] * s->offset;
  515. }
  516. src += inlink->channels;
  517. dst += inlink->channels;
  518. }
  519. dst = (double *)out->data[0];
  520. ebur128_add_frames_double(s->r128_out, dst, in->nb_samples);
  521. s->pts += in->nb_samples;
  522. break;
  523. }
  524. if (in != out)
  525. av_frame_free(&in);
  526. return ff_filter_frame(outlink, out);
  527. }
  528. static int request_frame(AVFilterLink *outlink)
  529. {
  530. int ret;
  531. AVFilterContext *ctx = outlink->src;
  532. AVFilterLink *inlink = ctx->inputs[0];
  533. LoudNormContext *s = ctx->priv;
  534. ret = ff_request_frame(inlink);
  535. if (ret == AVERROR_EOF && s->frame_type == INNER_FRAME) {
  536. double *src;
  537. double *buf;
  538. int nb_samples, n, c, offset;
  539. AVFrame *frame;
  540. nb_samples = (s->buf_size / inlink->channels) - s->prev_nb_samples;
  541. nb_samples -= (frame_size(inlink->sample_rate, 100) - s->prev_nb_samples);
  542. frame = ff_get_audio_buffer(outlink, nb_samples);
  543. if (!frame)
  544. return AVERROR(ENOMEM);
  545. frame->nb_samples = nb_samples;
  546. buf = s->buf;
  547. src = (double *)frame->data[0];
  548. offset = ((s->limiter_buf_size / inlink->channels) - s->prev_nb_samples) * inlink->channels;
  549. offset -= (frame_size(inlink->sample_rate, 100) - s->prev_nb_samples) * inlink->channels;
  550. s->buf_index = s->buf_index - offset < 0 ? s->buf_index - offset + s->buf_size : s->buf_index - offset;
  551. for (n = 0; n < nb_samples; n++) {
  552. for (c = 0; c < inlink->channels; c++) {
  553. src[c] = buf[s->buf_index + c];
  554. }
  555. src += inlink->channels;
  556. s->buf_index += inlink->channels;
  557. if (s->buf_index >= s->buf_size)
  558. s->buf_index -= s->buf_size;
  559. }
  560. s->frame_type = FINAL_FRAME;
  561. ret = filter_frame(inlink, frame);
  562. }
  563. return ret;
  564. }
  565. static int query_formats(AVFilterContext *ctx)
  566. {
  567. AVFilterFormats *formats;
  568. AVFilterChannelLayouts *layouts;
  569. AVFilterLink *inlink = ctx->inputs[0];
  570. AVFilterLink *outlink = ctx->outputs[0];
  571. static const int input_srate[] = {192000, -1};
  572. static const enum AVSampleFormat sample_fmts[] = {
  573. AV_SAMPLE_FMT_DBL,
  574. AV_SAMPLE_FMT_NONE
  575. };
  576. int ret;
  577. layouts = ff_all_channel_counts();
  578. if (!layouts)
  579. return AVERROR(ENOMEM);
  580. ret = ff_set_common_channel_layouts(ctx, layouts);
  581. if (ret < 0)
  582. return ret;
  583. formats = ff_make_format_list(sample_fmts);
  584. if (!formats)
  585. return AVERROR(ENOMEM);
  586. ret = ff_set_common_formats(ctx, formats);
  587. if (ret < 0)
  588. return ret;
  589. formats = ff_make_format_list(input_srate);
  590. if (!formats)
  591. return AVERROR(ENOMEM);
  592. ret = ff_formats_ref(formats, &inlink->out_samplerates);
  593. if (ret < 0)
  594. return ret;
  595. ret = ff_formats_ref(formats, &outlink->in_samplerates);
  596. if (ret < 0)
  597. return ret;
  598. return 0;
  599. }
  600. static int config_input(AVFilterLink *inlink)
  601. {
  602. AVFilterContext *ctx = inlink->dst;
  603. LoudNormContext *s = ctx->priv;
  604. s->r128_in = ebur128_init(inlink->channels, inlink->sample_rate, EBUR128_MODE_I | EBUR128_MODE_S | EBUR128_MODE_LRA | EBUR128_MODE_SAMPLE_PEAK);
  605. if (!s->r128_in)
  606. return AVERROR(ENOMEM);
  607. s->r128_out = ebur128_init(inlink->channels, inlink->sample_rate, EBUR128_MODE_I | EBUR128_MODE_S | EBUR128_MODE_LRA | EBUR128_MODE_SAMPLE_PEAK);
  608. if (!s->r128_out)
  609. return AVERROR(ENOMEM);
  610. if (inlink->channels == 1 && s->dual_mono) {
  611. ebur128_set_channel(s->r128_in, 0, EBUR128_DUAL_MONO);
  612. ebur128_set_channel(s->r128_out, 0, EBUR128_DUAL_MONO);
  613. }
  614. s->buf_size = frame_size(inlink->sample_rate, 3000) * inlink->channels;
  615. s->buf = av_malloc_array(s->buf_size, sizeof(*s->buf));
  616. if (!s->buf)
  617. return AVERROR(ENOMEM);
  618. s->limiter_buf_size = frame_size(inlink->sample_rate, 210) * inlink->channels;
  619. s->limiter_buf = av_malloc_array(s->buf_size, sizeof(*s->limiter_buf));
  620. if (!s->limiter_buf)
  621. return AVERROR(ENOMEM);
  622. s->prev_smp = av_malloc_array(inlink->channels, sizeof(*s->prev_smp));
  623. if (!s->prev_smp)
  624. return AVERROR(ENOMEM);
  625. init_gaussian_filter(s);
  626. s->frame_type = FIRST_FRAME;
  627. if (s->linear) {
  628. double offset, offset_tp;
  629. offset = s->target_i - s->measured_i;
  630. offset_tp = s->measured_tp + offset;
  631. if (s->measured_tp != 99 && s->measured_thresh != -70 && s->measured_lra != 0 && s->measured_i != 0) {
  632. if ((offset_tp <= s->target_tp) && (s->measured_lra <= s->target_lra)) {
  633. s->frame_type = LINEAR_MODE;
  634. s->offset = offset;
  635. }
  636. }
  637. }
  638. if (s->frame_type != LINEAR_MODE) {
  639. inlink->min_samples =
  640. inlink->max_samples =
  641. inlink->partial_buf_size = frame_size(inlink->sample_rate, 3000);
  642. }
  643. s->pts =
  644. s->buf_index =
  645. s->prev_buf_index =
  646. s->limiter_buf_index = 0;
  647. s->channels = inlink->channels;
  648. s->index = 1;
  649. s->limiter_state = OUT;
  650. s->offset = pow(10., s->offset / 20.);
  651. s->target_tp = pow(10., s->target_tp / 20.);
  652. s->attack_length = frame_size(inlink->sample_rate, 10);
  653. s->release_length = frame_size(inlink->sample_rate, 100);
  654. return 0;
  655. }
  656. static av_cold void uninit(AVFilterContext *ctx)
  657. {
  658. LoudNormContext *s = ctx->priv;
  659. double i_in, i_out, lra_in, lra_out, thresh_in, thresh_out, tp_in, tp_out;
  660. int c;
  661. if (!s->r128_in || !s->r128_out)
  662. goto end;
  663. ebur128_loudness_range(s->r128_in, &lra_in);
  664. ebur128_loudness_global(s->r128_in, &i_in);
  665. ebur128_relative_threshold(s->r128_in, &thresh_in);
  666. for (c = 0; c < s->channels; c++) {
  667. double tmp;
  668. ebur128_sample_peak(s->r128_in, c, &tmp);
  669. if ((c == 0) || (tmp > tp_in))
  670. tp_in = tmp;
  671. }
  672. ebur128_loudness_range(s->r128_out, &lra_out);
  673. ebur128_loudness_global(s->r128_out, &i_out);
  674. ebur128_relative_threshold(s->r128_out, &thresh_out);
  675. for (c = 0; c < s->channels; c++) {
  676. double tmp;
  677. ebur128_sample_peak(s->r128_out, c, &tmp);
  678. if ((c == 0) || (tmp > tp_out))
  679. tp_out = tmp;
  680. }
  681. switch(s->print_format) {
  682. case NONE:
  683. break;
  684. case JSON:
  685. av_log(ctx, AV_LOG_INFO,
  686. "\n{\n"
  687. "\t\"input_i\" : \"%.2f\",\n"
  688. "\t\"input_tp\" : \"%.2f\",\n"
  689. "\t\"input_lra\" : \"%.2f\",\n"
  690. "\t\"input_thresh\" : \"%.2f\",\n"
  691. "\t\"output_i\" : \"%.2f\",\n"
  692. "\t\"output_tp\" : \"%+.2f\",\n"
  693. "\t\"output_lra\" : \"%.2f\",\n"
  694. "\t\"output_thresh\" : \"%.2f\",\n"
  695. "\t\"normalization_type\" : \"%s\",\n"
  696. "\t\"target_offset\" : \"%.2f\"\n"
  697. "}\n",
  698. i_in,
  699. 20. * log10(tp_in),
  700. lra_in,
  701. thresh_in,
  702. i_out,
  703. 20. * log10(tp_out),
  704. lra_out,
  705. thresh_out,
  706. s->frame_type == LINEAR_MODE ? "linear" : "dynamic",
  707. s->target_i - i_out
  708. );
  709. break;
  710. case SUMMARY:
  711. av_log(ctx, AV_LOG_INFO,
  712. "\n"
  713. "Input Integrated: %+6.1f LUFS\n"
  714. "Input True Peak: %+6.1f dBTP\n"
  715. "Input LRA: %6.1f LU\n"
  716. "Input Threshold: %+6.1f LUFS\n"
  717. "\n"
  718. "Output Integrated: %+6.1f LUFS\n"
  719. "Output True Peak: %+6.1f dBTP\n"
  720. "Output LRA: %6.1f LU\n"
  721. "Output Threshold: %+6.1f LUFS\n"
  722. "\n"
  723. "Normalization Type: %s\n"
  724. "Target Offset: %+6.1f LU\n",
  725. i_in,
  726. 20. * log10(tp_in),
  727. lra_in,
  728. thresh_in,
  729. i_out,
  730. 20. * log10(tp_out),
  731. lra_out,
  732. thresh_out,
  733. s->frame_type == LINEAR_MODE ? "Linear" : "Dynamic",
  734. s->target_i - i_out
  735. );
  736. break;
  737. }
  738. end:
  739. if (s->r128_in)
  740. ebur128_destroy(&s->r128_in);
  741. if (s->r128_out)
  742. ebur128_destroy(&s->r128_out);
  743. av_freep(&s->limiter_buf);
  744. av_freep(&s->prev_smp);
  745. av_freep(&s->buf);
  746. }
  747. static const AVFilterPad avfilter_af_loudnorm_inputs[] = {
  748. {
  749. .name = "default",
  750. .type = AVMEDIA_TYPE_AUDIO,
  751. .config_props = config_input,
  752. .filter_frame = filter_frame,
  753. },
  754. { NULL }
  755. };
  756. static const AVFilterPad avfilter_af_loudnorm_outputs[] = {
  757. {
  758. .name = "default",
  759. .request_frame = request_frame,
  760. .type = AVMEDIA_TYPE_AUDIO,
  761. },
  762. { NULL }
  763. };
  764. AVFilter ff_af_loudnorm = {
  765. .name = "loudnorm",
  766. .description = NULL_IF_CONFIG_SMALL("EBU R128 loudness normalization"),
  767. .priv_size = sizeof(LoudNormContext),
  768. .priv_class = &loudnorm_class,
  769. .query_formats = query_formats,
  770. .uninit = uninit,
  771. .inputs = avfilter_af_loudnorm_inputs,
  772. .outputs = avfilter_af_loudnorm_outputs,
  773. };