af_amix.c 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565
  1. /*
  2. * Audio Mix Filter
  3. * Copyright (c) 2012 Justin Ruggles <justin.ruggles@gmail.com>
  4. *
  5. * This file is part of FFmpeg.
  6. *
  7. * FFmpeg is free software; you can redistribute it and/or
  8. * modify it under the terms of the GNU Lesser General Public
  9. * License as published by the Free Software Foundation; either
  10. * version 2.1 of the License, or (at your option) any later version.
  11. *
  12. * FFmpeg is distributed in the hope that it will be useful,
  13. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  15. * Lesser General Public License for more details.
  16. *
  17. * You should have received a copy of the GNU Lesser General Public
  18. * License along with FFmpeg; if not, write to the Free Software
  19. * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  20. */
  21. /**
  22. * @file
  23. * Audio Mix Filter
  24. *
  25. * Mixes audio from multiple sources into a single output. The channel layout,
  26. * sample rate, and sample format will be the same for all inputs and the
  27. * output.
  28. */
  29. #include "libavutil/attributes.h"
  30. #include "libavutil/audio_fifo.h"
  31. #include "libavutil/avassert.h"
  32. #include "libavutil/avstring.h"
  33. #include "libavutil/channel_layout.h"
  34. #include "libavutil/common.h"
  35. #include "libavutil/float_dsp.h"
  36. #include "libavutil/mathematics.h"
  37. #include "libavutil/opt.h"
  38. #include "libavutil/samplefmt.h"
  39. #include "audio.h"
  40. #include "avfilter.h"
  41. #include "formats.h"
  42. #include "internal.h"
  43. #define INPUT_ON 1 /**< input is active */
  44. #define INPUT_EOF 2 /**< input has reached EOF (may still be active) */
  45. #define DURATION_LONGEST 0
  46. #define DURATION_SHORTEST 1
  47. #define DURATION_FIRST 2
  48. typedef struct FrameInfo {
  49. int nb_samples;
  50. int64_t pts;
  51. struct FrameInfo *next;
  52. } FrameInfo;
  53. /**
  54. * Linked list used to store timestamps and frame sizes of all frames in the
  55. * FIFO for the first input.
  56. *
  57. * This is needed to keep timestamps synchronized for the case where multiple
  58. * input frames are pushed to the filter for processing before a frame is
  59. * requested by the output link.
  60. */
  61. typedef struct FrameList {
  62. int nb_frames;
  63. int nb_samples;
  64. FrameInfo *list;
  65. FrameInfo *end;
  66. } FrameList;
  67. static void frame_list_clear(FrameList *frame_list)
  68. {
  69. if (frame_list) {
  70. while (frame_list->list) {
  71. FrameInfo *info = frame_list->list;
  72. frame_list->list = info->next;
  73. av_free(info);
  74. }
  75. frame_list->nb_frames = 0;
  76. frame_list->nb_samples = 0;
  77. frame_list->end = NULL;
  78. }
  79. }
  80. static int frame_list_next_frame_size(FrameList *frame_list)
  81. {
  82. if (!frame_list->list)
  83. return 0;
  84. return frame_list->list->nb_samples;
  85. }
  86. static int64_t frame_list_next_pts(FrameList *frame_list)
  87. {
  88. if (!frame_list->list)
  89. return AV_NOPTS_VALUE;
  90. return frame_list->list->pts;
  91. }
  92. static void frame_list_remove_samples(FrameList *frame_list, int nb_samples)
  93. {
  94. if (nb_samples >= frame_list->nb_samples) {
  95. frame_list_clear(frame_list);
  96. } else {
  97. int samples = nb_samples;
  98. while (samples > 0) {
  99. FrameInfo *info = frame_list->list;
  100. av_assert0(info);
  101. if (info->nb_samples <= samples) {
  102. samples -= info->nb_samples;
  103. frame_list->list = info->next;
  104. if (!frame_list->list)
  105. frame_list->end = NULL;
  106. frame_list->nb_frames--;
  107. frame_list->nb_samples -= info->nb_samples;
  108. av_free(info);
  109. } else {
  110. info->nb_samples -= samples;
  111. info->pts += samples;
  112. frame_list->nb_samples -= samples;
  113. samples = 0;
  114. }
  115. }
  116. }
  117. }
  118. static int frame_list_add_frame(FrameList *frame_list, int nb_samples, int64_t pts)
  119. {
  120. FrameInfo *info = av_malloc(sizeof(*info));
  121. if (!info)
  122. return AVERROR(ENOMEM);
  123. info->nb_samples = nb_samples;
  124. info->pts = pts;
  125. info->next = NULL;
  126. if (!frame_list->list) {
  127. frame_list->list = info;
  128. frame_list->end = info;
  129. } else {
  130. av_assert0(frame_list->end);
  131. frame_list->end->next = info;
  132. frame_list->end = info;
  133. }
  134. frame_list->nb_frames++;
  135. frame_list->nb_samples += nb_samples;
  136. return 0;
  137. }
  138. typedef struct MixContext {
  139. const AVClass *class; /**< class for AVOptions */
  140. AVFloatDSPContext *fdsp;
  141. int nb_inputs; /**< number of inputs */
  142. int active_inputs; /**< number of input currently active */
  143. int duration_mode; /**< mode for determining duration */
  144. float dropout_transition; /**< transition time when an input drops out */
  145. int nb_channels; /**< number of channels */
  146. int sample_rate; /**< sample rate */
  147. int planar;
  148. AVAudioFifo **fifos; /**< audio fifo for each input */
  149. uint8_t *input_state; /**< current state of each input */
  150. float *input_scale; /**< mixing scale factor for each input */
  151. float scale_norm; /**< normalization factor for all inputs */
  152. int64_t next_pts; /**< calculated pts for next output frame */
  153. FrameList *frame_list; /**< list of frame info for the first input */
  154. } MixContext;
  155. #define OFFSET(x) offsetof(MixContext, x)
  156. #define A AV_OPT_FLAG_AUDIO_PARAM
  157. #define F AV_OPT_FLAG_FILTERING_PARAM
  158. static const AVOption amix_options[] = {
  159. { "inputs", "Number of inputs.",
  160. OFFSET(nb_inputs), AV_OPT_TYPE_INT, { .i64 = 2 }, 1, 32, A|F },
  161. { "duration", "How to determine the end-of-stream.",
  162. OFFSET(duration_mode), AV_OPT_TYPE_INT, { .i64 = DURATION_LONGEST }, 0, 2, A|F, "duration" },
  163. { "longest", "Duration of longest input.", 0, AV_OPT_TYPE_CONST, { .i64 = DURATION_LONGEST }, INT_MIN, INT_MAX, A|F, "duration" },
  164. { "shortest", "Duration of shortest input.", 0, AV_OPT_TYPE_CONST, { .i64 = DURATION_SHORTEST }, INT_MIN, INT_MAX, A|F, "duration" },
  165. { "first", "Duration of first input.", 0, AV_OPT_TYPE_CONST, { .i64 = DURATION_FIRST }, INT_MIN, INT_MAX, A|F, "duration" },
  166. { "dropout_transition", "Transition time, in seconds, for volume "
  167. "renormalization when an input stream ends.",
  168. OFFSET(dropout_transition), AV_OPT_TYPE_FLOAT, { .dbl = 2.0 }, 0, INT_MAX, A|F },
  169. { NULL }
  170. };
  171. AVFILTER_DEFINE_CLASS(amix);
  172. /**
  173. * Update the scaling factors to apply to each input during mixing.
  174. *
  175. * This balances the full volume range between active inputs and handles
  176. * volume transitions when EOF is encountered on an input but mixing continues
  177. * with the remaining inputs.
  178. */
  179. static void calculate_scales(MixContext *s, int nb_samples)
  180. {
  181. int i;
  182. if (s->scale_norm > s->active_inputs) {
  183. s->scale_norm -= nb_samples / (s->dropout_transition * s->sample_rate);
  184. s->scale_norm = FFMAX(s->scale_norm, s->active_inputs);
  185. }
  186. for (i = 0; i < s->nb_inputs; i++) {
  187. if (s->input_state[i] & INPUT_ON)
  188. s->input_scale[i] = 1.0f / s->scale_norm;
  189. else
  190. s->input_scale[i] = 0.0f;
  191. }
  192. }
  193. static int config_output(AVFilterLink *outlink)
  194. {
  195. AVFilterContext *ctx = outlink->src;
  196. MixContext *s = ctx->priv;
  197. int i;
  198. char buf[64];
  199. s->planar = av_sample_fmt_is_planar(outlink->format);
  200. s->sample_rate = outlink->sample_rate;
  201. outlink->time_base = (AVRational){ 1, outlink->sample_rate };
  202. s->next_pts = AV_NOPTS_VALUE;
  203. s->frame_list = av_mallocz(sizeof(*s->frame_list));
  204. if (!s->frame_list)
  205. return AVERROR(ENOMEM);
  206. s->fifos = av_mallocz_array(s->nb_inputs, sizeof(*s->fifos));
  207. if (!s->fifos)
  208. return AVERROR(ENOMEM);
  209. s->nb_channels = av_get_channel_layout_nb_channels(outlink->channel_layout);
  210. for (i = 0; i < s->nb_inputs; i++) {
  211. s->fifos[i] = av_audio_fifo_alloc(outlink->format, s->nb_channels, 1024);
  212. if (!s->fifos[i])
  213. return AVERROR(ENOMEM);
  214. }
  215. s->input_state = av_malloc(s->nb_inputs);
  216. if (!s->input_state)
  217. return AVERROR(ENOMEM);
  218. memset(s->input_state, INPUT_ON, s->nb_inputs);
  219. s->active_inputs = s->nb_inputs;
  220. s->input_scale = av_mallocz_array(s->nb_inputs, sizeof(*s->input_scale));
  221. if (!s->input_scale)
  222. return AVERROR(ENOMEM);
  223. s->scale_norm = s->active_inputs;
  224. calculate_scales(s, 0);
  225. av_get_channel_layout_string(buf, sizeof(buf), -1, outlink->channel_layout);
  226. av_log(ctx, AV_LOG_VERBOSE,
  227. "inputs:%d fmt:%s srate:%d cl:%s\n", s->nb_inputs,
  228. av_get_sample_fmt_name(outlink->format), outlink->sample_rate, buf);
  229. return 0;
  230. }
  231. static int calc_active_inputs(MixContext *s);
  232. /**
  233. * Read samples from the input FIFOs, mix, and write to the output link.
  234. */
  235. static int output_frame(AVFilterLink *outlink)
  236. {
  237. AVFilterContext *ctx = outlink->src;
  238. MixContext *s = ctx->priv;
  239. AVFrame *out_buf, *in_buf;
  240. int nb_samples, ns, ret, i;
  241. ret = calc_active_inputs(s);
  242. if (ret < 0)
  243. return ret;
  244. if (s->input_state[0] & INPUT_ON) {
  245. /* first input live: use the corresponding frame size */
  246. nb_samples = frame_list_next_frame_size(s->frame_list);
  247. for (i = 1; i < s->nb_inputs; i++) {
  248. if (s->input_state[i] & INPUT_ON) {
  249. ns = av_audio_fifo_size(s->fifos[i]);
  250. if (ns < nb_samples) {
  251. if (!(s->input_state[i] & INPUT_EOF))
  252. /* unclosed input with not enough samples */
  253. return 0;
  254. /* closed input to drain */
  255. nb_samples = ns;
  256. }
  257. }
  258. }
  259. } else {
  260. /* first input closed: use the available samples */
  261. nb_samples = INT_MAX;
  262. for (i = 1; i < s->nb_inputs; i++) {
  263. if (s->input_state[i] & INPUT_ON) {
  264. ns = av_audio_fifo_size(s->fifos[i]);
  265. nb_samples = FFMIN(nb_samples, ns);
  266. }
  267. }
  268. if (nb_samples == INT_MAX)
  269. return AVERROR_EOF;
  270. }
  271. s->next_pts = frame_list_next_pts(s->frame_list);
  272. frame_list_remove_samples(s->frame_list, nb_samples);
  273. calculate_scales(s, nb_samples);
  274. if (nb_samples == 0)
  275. return 0;
  276. out_buf = ff_get_audio_buffer(outlink, nb_samples);
  277. if (!out_buf)
  278. return AVERROR(ENOMEM);
  279. in_buf = ff_get_audio_buffer(outlink, nb_samples);
  280. if (!in_buf) {
  281. av_frame_free(&out_buf);
  282. return AVERROR(ENOMEM);
  283. }
  284. for (i = 0; i < s->nb_inputs; i++) {
  285. if (s->input_state[i] & INPUT_ON) {
  286. int planes, plane_size, p;
  287. av_audio_fifo_read(s->fifos[i], (void **)in_buf->extended_data,
  288. nb_samples);
  289. planes = s->planar ? s->nb_channels : 1;
  290. plane_size = nb_samples * (s->planar ? 1 : s->nb_channels);
  291. plane_size = FFALIGN(plane_size, 16);
  292. for (p = 0; p < planes; p++) {
  293. s->fdsp->vector_fmac_scalar((float *)out_buf->extended_data[p],
  294. (float *) in_buf->extended_data[p],
  295. s->input_scale[i], plane_size);
  296. }
  297. }
  298. }
  299. av_frame_free(&in_buf);
  300. out_buf->pts = s->next_pts;
  301. if (s->next_pts != AV_NOPTS_VALUE)
  302. s->next_pts += nb_samples;
  303. return ff_filter_frame(outlink, out_buf);
  304. }
  305. /**
  306. * Requests a frame, if needed, from each input link other than the first.
  307. */
  308. static int request_samples(AVFilterContext *ctx, int min_samples)
  309. {
  310. MixContext *s = ctx->priv;
  311. int i, ret;
  312. av_assert0(s->nb_inputs > 1);
  313. for (i = 1; i < s->nb_inputs; i++) {
  314. ret = 0;
  315. if (!(s->input_state[i] & INPUT_ON))
  316. continue;
  317. if (av_audio_fifo_size(s->fifos[i]) >= min_samples)
  318. continue;
  319. ret = ff_request_frame(ctx->inputs[i]);
  320. if (ret == AVERROR_EOF) {
  321. s->input_state[i] |= INPUT_EOF;
  322. if (av_audio_fifo_size(s->fifos[i]) == 0) {
  323. s->input_state[i] = 0;
  324. continue;
  325. }
  326. } else if (ret < 0)
  327. return ret;
  328. }
  329. return output_frame(ctx->outputs[0]);
  330. }
  331. /**
  332. * Calculates the number of active inputs and determines EOF based on the
  333. * duration option.
  334. *
  335. * @return 0 if mixing should continue, or AVERROR_EOF if mixing should stop.
  336. */
  337. static int calc_active_inputs(MixContext *s)
  338. {
  339. int i;
  340. int active_inputs = 0;
  341. for (i = 0; i < s->nb_inputs; i++)
  342. active_inputs += !!(s->input_state[i] & INPUT_ON);
  343. s->active_inputs = active_inputs;
  344. if (!active_inputs ||
  345. (s->duration_mode == DURATION_FIRST && !(s->input_state[0] & INPUT_ON)) ||
  346. (s->duration_mode == DURATION_SHORTEST && active_inputs != s->nb_inputs))
  347. return AVERROR_EOF;
  348. return 0;
  349. }
  350. static int request_frame(AVFilterLink *outlink)
  351. {
  352. AVFilterContext *ctx = outlink->src;
  353. MixContext *s = ctx->priv;
  354. int ret;
  355. int wanted_samples;
  356. ret = calc_active_inputs(s);
  357. if (ret < 0)
  358. return ret;
  359. if (!(s->input_state[0] & INPUT_ON))
  360. return request_samples(ctx, 1);
  361. if (s->frame_list->nb_frames == 0) {
  362. ret = ff_request_frame(ctx->inputs[0]);
  363. if (ret == AVERROR_EOF) {
  364. s->input_state[0] = 0;
  365. if (s->nb_inputs == 1)
  366. return AVERROR_EOF;
  367. return output_frame(ctx->outputs[0]);
  368. }
  369. return ret;
  370. }
  371. av_assert0(s->frame_list->nb_frames > 0);
  372. wanted_samples = frame_list_next_frame_size(s->frame_list);
  373. return request_samples(ctx, wanted_samples);
  374. }
  375. static int filter_frame(AVFilterLink *inlink, AVFrame *buf)
  376. {
  377. AVFilterContext *ctx = inlink->dst;
  378. MixContext *s = ctx->priv;
  379. AVFilterLink *outlink = ctx->outputs[0];
  380. int i, ret = 0;
  381. for (i = 0; i < ctx->nb_inputs; i++)
  382. if (ctx->inputs[i] == inlink)
  383. break;
  384. if (i >= ctx->nb_inputs) {
  385. av_log(ctx, AV_LOG_ERROR, "unknown input link\n");
  386. ret = AVERROR(EINVAL);
  387. goto fail;
  388. }
  389. if (i == 0) {
  390. int64_t pts = av_rescale_q(buf->pts, inlink->time_base,
  391. outlink->time_base);
  392. ret = frame_list_add_frame(s->frame_list, buf->nb_samples, pts);
  393. if (ret < 0)
  394. goto fail;
  395. }
  396. ret = av_audio_fifo_write(s->fifos[i], (void **)buf->extended_data,
  397. buf->nb_samples);
  398. av_frame_free(&buf);
  399. return output_frame(outlink);
  400. fail:
  401. av_frame_free(&buf);
  402. return ret;
  403. }
  404. static av_cold int init(AVFilterContext *ctx)
  405. {
  406. MixContext *s = ctx->priv;
  407. int i;
  408. for (i = 0; i < s->nb_inputs; i++) {
  409. char name[32];
  410. AVFilterPad pad = { 0 };
  411. snprintf(name, sizeof(name), "input%d", i);
  412. pad.type = AVMEDIA_TYPE_AUDIO;
  413. pad.name = av_strdup(name);
  414. if (!pad.name)
  415. return AVERROR(ENOMEM);
  416. pad.filter_frame = filter_frame;
  417. ff_insert_inpad(ctx, i, &pad);
  418. }
  419. s->fdsp = avpriv_float_dsp_alloc(0);
  420. if (!s->fdsp)
  421. return AVERROR(ENOMEM);
  422. return 0;
  423. }
  424. static av_cold void uninit(AVFilterContext *ctx)
  425. {
  426. int i;
  427. MixContext *s = ctx->priv;
  428. if (s->fifos) {
  429. for (i = 0; i < s->nb_inputs; i++)
  430. av_audio_fifo_free(s->fifos[i]);
  431. av_freep(&s->fifos);
  432. }
  433. frame_list_clear(s->frame_list);
  434. av_freep(&s->frame_list);
  435. av_freep(&s->input_state);
  436. av_freep(&s->input_scale);
  437. av_freep(&s->fdsp);
  438. for (i = 0; i < ctx->nb_inputs; i++)
  439. av_freep(&ctx->input_pads[i].name);
  440. }
  441. static int query_formats(AVFilterContext *ctx)
  442. {
  443. AVFilterFormats *formats = NULL;
  444. AVFilterChannelLayouts *layouts;
  445. int ret;
  446. layouts = ff_all_channel_layouts();
  447. if (!layouts) {
  448. ret = AVERROR(ENOMEM);
  449. goto fail;
  450. }
  451. if ((ret = ff_add_format(&formats, AV_SAMPLE_FMT_FLT )) < 0 ||
  452. (ret = ff_add_format(&formats, AV_SAMPLE_FMT_FLTP)) < 0 ||
  453. (ret = ff_set_common_formats (ctx, formats)) < 0 ||
  454. (ret = ff_set_common_channel_layouts(ctx, layouts)) < 0 ||
  455. (ret = ff_set_common_samplerates(ctx, ff_all_samplerates())) < 0)
  456. goto fail;
  457. return 0;
  458. fail:
  459. if (layouts)
  460. av_freep(&layouts->channel_layouts);
  461. av_freep(&layouts);
  462. return ret;
  463. }
  464. static const AVFilterPad avfilter_af_amix_outputs[] = {
  465. {
  466. .name = "default",
  467. .type = AVMEDIA_TYPE_AUDIO,
  468. .config_props = config_output,
  469. .request_frame = request_frame
  470. },
  471. { NULL }
  472. };
  473. AVFilter ff_af_amix = {
  474. .name = "amix",
  475. .description = NULL_IF_CONFIG_SMALL("Audio mixing."),
  476. .priv_size = sizeof(MixContext),
  477. .priv_class = &amix_class,
  478. .init = init,
  479. .uninit = uninit,
  480. .query_formats = query_formats,
  481. .inputs = NULL,
  482. .outputs = avfilter_af_amix_outputs,
  483. .flags = AVFILTER_FLAG_DYNAMIC_INPUTS,
  484. };