af_amix.c 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552
  1. /*
  2. * Audio Mix Filter
  3. * Copyright (c) 2012 Justin Ruggles <justin.ruggles@gmail.com>
  4. *
  5. * This file is part of Libav.
  6. *
  7. * Libav is free software; you can redistribute it and/or
  8. * modify it under the terms of the GNU Lesser General Public
  9. * License as published by the Free Software Foundation; either
  10. * version 2.1 of the License, or (at your option) any later version.
  11. *
  12. * Libav is distributed in the hope that it will be useful,
  13. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  15. * Lesser General Public License for more details.
  16. *
  17. * You should have received a copy of the GNU Lesser General Public
  18. * License along with Libav; if not, write to the Free Software
  19. * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  20. */
  21. /**
  22. * @file
  23. * Audio Mix Filter
  24. *
  25. * Mixes audio from multiple sources into a single output. The channel layout,
  26. * sample rate, and sample format will be the same for all inputs and the
  27. * output.
  28. */
  29. #include "libavutil/audioconvert.h"
  30. #include "libavutil/audio_fifo.h"
  31. #include "libavutil/avassert.h"
  32. #include "libavutil/avstring.h"
  33. #include "libavutil/float_dsp.h"
  34. #include "libavutil/mathematics.h"
  35. #include "libavutil/opt.h"
  36. #include "libavutil/samplefmt.h"
  37. #include "audio.h"
  38. #include "avfilter.h"
  39. #include "formats.h"
  40. #include "internal.h"
  41. #define INPUT_OFF 0 /**< input has reached EOF */
  42. #define INPUT_ON 1 /**< input is active */
  43. #define INPUT_INACTIVE 2 /**< input is on, but is currently inactive */
  44. #define DURATION_LONGEST 0
  45. #define DURATION_SHORTEST 1
  46. #define DURATION_FIRST 2
  47. typedef struct FrameInfo {
  48. int nb_samples;
  49. int64_t pts;
  50. struct FrameInfo *next;
  51. } FrameInfo;
  52. /**
  53. * Linked list used to store timestamps and frame sizes of all frames in the
  54. * FIFO for the first input.
  55. *
  56. * This is needed to keep timestamps synchronized for the case where multiple
  57. * input frames are pushed to the filter for processing before a frame is
  58. * requested by the output link.
  59. */
  60. typedef struct FrameList {
  61. int nb_frames;
  62. int nb_samples;
  63. FrameInfo *list;
  64. FrameInfo *end;
  65. } FrameList;
  66. static void frame_list_clear(FrameList *frame_list)
  67. {
  68. if (frame_list) {
  69. while (frame_list->list) {
  70. FrameInfo *info = frame_list->list;
  71. frame_list->list = info->next;
  72. av_free(info);
  73. }
  74. frame_list->nb_frames = 0;
  75. frame_list->nb_samples = 0;
  76. frame_list->end = NULL;
  77. }
  78. }
  79. static int frame_list_next_frame_size(FrameList *frame_list)
  80. {
  81. if (!frame_list->list)
  82. return 0;
  83. return frame_list->list->nb_samples;
  84. }
  85. static int64_t frame_list_next_pts(FrameList *frame_list)
  86. {
  87. if (!frame_list->list)
  88. return AV_NOPTS_VALUE;
  89. return frame_list->list->pts;
  90. }
  91. static void frame_list_remove_samples(FrameList *frame_list, int nb_samples)
  92. {
  93. if (nb_samples >= frame_list->nb_samples) {
  94. frame_list_clear(frame_list);
  95. } else {
  96. int samples = nb_samples;
  97. while (samples > 0) {
  98. FrameInfo *info = frame_list->list;
  99. av_assert0(info != NULL);
  100. if (info->nb_samples <= samples) {
  101. samples -= info->nb_samples;
  102. frame_list->list = info->next;
  103. if (!frame_list->list)
  104. frame_list->end = NULL;
  105. frame_list->nb_frames--;
  106. frame_list->nb_samples -= info->nb_samples;
  107. av_free(info);
  108. } else {
  109. info->nb_samples -= samples;
  110. info->pts += samples;
  111. frame_list->nb_samples -= samples;
  112. samples = 0;
  113. }
  114. }
  115. }
  116. }
  117. static int frame_list_add_frame(FrameList *frame_list, int nb_samples, int64_t pts)
  118. {
  119. FrameInfo *info = av_malloc(sizeof(*info));
  120. if (!info)
  121. return AVERROR(ENOMEM);
  122. info->nb_samples = nb_samples;
  123. info->pts = pts;
  124. info->next = NULL;
  125. if (!frame_list->list) {
  126. frame_list->list = info;
  127. frame_list->end = info;
  128. } else {
  129. av_assert0(frame_list->end != NULL);
  130. frame_list->end->next = info;
  131. frame_list->end = info;
  132. }
  133. frame_list->nb_frames++;
  134. frame_list->nb_samples += nb_samples;
  135. return 0;
  136. }
  137. typedef struct MixContext {
  138. const AVClass *class; /**< class for AVOptions */
  139. AVFloatDSPContext fdsp;
  140. int nb_inputs; /**< number of inputs */
  141. int active_inputs; /**< number of input currently active */
  142. int duration_mode; /**< mode for determining duration */
  143. float dropout_transition; /**< transition time when an input drops out */
  144. int nb_channels; /**< number of channels */
  145. int sample_rate; /**< sample rate */
  146. int planar;
  147. AVAudioFifo **fifos; /**< audio fifo for each input */
  148. uint8_t *input_state; /**< current state of each input */
  149. float *input_scale; /**< mixing scale factor for each input */
  150. float scale_norm; /**< normalization factor for all inputs */
  151. int64_t next_pts; /**< calculated pts for next output frame */
  152. FrameList *frame_list; /**< list of frame info for the first input */
  153. } MixContext;
  154. #define OFFSET(x) offsetof(MixContext, x)
  155. #define A AV_OPT_FLAG_AUDIO_PARAM
  156. static const AVOption amix_options[] = {
  157. { "inputs", "Number of inputs.",
  158. OFFSET(nb_inputs), AV_OPT_TYPE_INT, { 2 }, 1, 32, A },
  159. { "duration", "How to determine the end-of-stream.",
  160. OFFSET(duration_mode), AV_OPT_TYPE_INT, { DURATION_LONGEST }, 0, 2, A, "duration" },
  161. { "longest", "Duration of longest input.", 0, AV_OPT_TYPE_CONST, { DURATION_LONGEST }, INT_MIN, INT_MAX, A, "duration" },
  162. { "shortest", "Duration of shortest input.", 0, AV_OPT_TYPE_CONST, { DURATION_SHORTEST }, INT_MIN, INT_MAX, A, "duration" },
  163. { "first", "Duration of first input.", 0, AV_OPT_TYPE_CONST, { DURATION_FIRST }, INT_MIN, INT_MAX, A, "duration" },
  164. { "dropout_transition", "Transition time, in seconds, for volume "
  165. "renormalization when an input stream ends.",
  166. OFFSET(dropout_transition), AV_OPT_TYPE_FLOAT, { 2.0 }, 0, INT_MAX, A },
  167. { NULL },
  168. };
  169. AVFILTER_DEFINE_CLASS(amix);
  170. /**
  171. * Update the scaling factors to apply to each input during mixing.
  172. *
  173. * This balances the full volume range between active inputs and handles
  174. * volume transitions when EOF is encountered on an input but mixing continues
  175. * with the remaining inputs.
  176. */
  177. static void calculate_scales(MixContext *s, int nb_samples)
  178. {
  179. int i;
  180. if (s->scale_norm > s->active_inputs) {
  181. s->scale_norm -= nb_samples / (s->dropout_transition * s->sample_rate);
  182. s->scale_norm = FFMAX(s->scale_norm, s->active_inputs);
  183. }
  184. for (i = 0; i < s->nb_inputs; i++) {
  185. if (s->input_state[i] == INPUT_ON)
  186. s->input_scale[i] = 1.0f / s->scale_norm;
  187. else
  188. s->input_scale[i] = 0.0f;
  189. }
  190. }
  191. static int config_output(AVFilterLink *outlink)
  192. {
  193. AVFilterContext *ctx = outlink->src;
  194. MixContext *s = ctx->priv;
  195. int i;
  196. char buf[64];
  197. s->planar = av_sample_fmt_is_planar(outlink->format);
  198. s->sample_rate = outlink->sample_rate;
  199. outlink->time_base = (AVRational){ 1, outlink->sample_rate };
  200. s->next_pts = AV_NOPTS_VALUE;
  201. s->frame_list = av_mallocz(sizeof(*s->frame_list));
  202. if (!s->frame_list)
  203. return AVERROR(ENOMEM);
  204. s->fifos = av_mallocz(s->nb_inputs * sizeof(*s->fifos));
  205. if (!s->fifos)
  206. return AVERROR(ENOMEM);
  207. s->nb_channels = av_get_channel_layout_nb_channels(outlink->channel_layout);
  208. for (i = 0; i < s->nb_inputs; i++) {
  209. s->fifos[i] = av_audio_fifo_alloc(outlink->format, s->nb_channels, 1024);
  210. if (!s->fifos[i])
  211. return AVERROR(ENOMEM);
  212. }
  213. s->input_state = av_malloc(s->nb_inputs);
  214. if (!s->input_state)
  215. return AVERROR(ENOMEM);
  216. memset(s->input_state, INPUT_ON, s->nb_inputs);
  217. s->active_inputs = s->nb_inputs;
  218. s->input_scale = av_mallocz(s->nb_inputs * sizeof(*s->input_scale));
  219. if (!s->input_scale)
  220. return AVERROR(ENOMEM);
  221. s->scale_norm = s->active_inputs;
  222. calculate_scales(s, 0);
  223. av_get_channel_layout_string(buf, sizeof(buf), -1, outlink->channel_layout);
  224. av_log(ctx, AV_LOG_VERBOSE,
  225. "inputs:%d fmt:%s srate:%d cl:%s\n", s->nb_inputs,
  226. av_get_sample_fmt_name(outlink->format), outlink->sample_rate, buf);
  227. return 0;
  228. }
  229. /**
  230. * Read samples from the input FIFOs, mix, and write to the output link.
  231. */
  232. static int output_frame(AVFilterLink *outlink, int nb_samples)
  233. {
  234. AVFilterContext *ctx = outlink->src;
  235. MixContext *s = ctx->priv;
  236. AVFilterBufferRef *out_buf, *in_buf;
  237. int i;
  238. calculate_scales(s, nb_samples);
  239. out_buf = ff_get_audio_buffer(outlink, AV_PERM_WRITE, nb_samples);
  240. if (!out_buf)
  241. return AVERROR(ENOMEM);
  242. in_buf = ff_get_audio_buffer(outlink, AV_PERM_WRITE, nb_samples);
  243. if (!in_buf)
  244. return AVERROR(ENOMEM);
  245. for (i = 0; i < s->nb_inputs; i++) {
  246. if (s->input_state[i] == INPUT_ON) {
  247. int planes, plane_size, p;
  248. av_audio_fifo_read(s->fifos[i], (void **)in_buf->extended_data,
  249. nb_samples);
  250. planes = s->planar ? s->nb_channels : 1;
  251. plane_size = nb_samples * (s->planar ? 1 : s->nb_channels);
  252. plane_size = FFALIGN(plane_size, 16);
  253. for (p = 0; p < planes; p++) {
  254. s->fdsp.vector_fmac_scalar((float *)out_buf->extended_data[p],
  255. (float *) in_buf->extended_data[p],
  256. s->input_scale[i], plane_size);
  257. }
  258. }
  259. }
  260. avfilter_unref_buffer(in_buf);
  261. out_buf->pts = s->next_pts;
  262. if (s->next_pts != AV_NOPTS_VALUE)
  263. s->next_pts += nb_samples;
  264. ff_filter_samples(outlink, out_buf);
  265. return 0;
  266. }
  267. /**
  268. * Returns the smallest number of samples available in the input FIFOs other
  269. * than that of the first input.
  270. */
  271. static int get_available_samples(MixContext *s)
  272. {
  273. int i;
  274. int available_samples = INT_MAX;
  275. av_assert0(s->nb_inputs > 1);
  276. for (i = 1; i < s->nb_inputs; i++) {
  277. int nb_samples;
  278. if (s->input_state[i] == INPUT_OFF)
  279. continue;
  280. nb_samples = av_audio_fifo_size(s->fifos[i]);
  281. available_samples = FFMIN(available_samples, nb_samples);
  282. }
  283. if (available_samples == INT_MAX)
  284. return 0;
  285. return available_samples;
  286. }
  287. /**
  288. * Requests a frame, if needed, from each input link other than the first.
  289. */
  290. static int request_samples(AVFilterContext *ctx, int min_samples)
  291. {
  292. MixContext *s = ctx->priv;
  293. int i, ret;
  294. av_assert0(s->nb_inputs > 1);
  295. for (i = 1; i < s->nb_inputs; i++) {
  296. ret = 0;
  297. if (s->input_state[i] == INPUT_OFF)
  298. continue;
  299. while (!ret && av_audio_fifo_size(s->fifos[i]) < min_samples)
  300. ret = ff_request_frame(ctx->inputs[i]);
  301. if (ret == AVERROR_EOF) {
  302. if (av_audio_fifo_size(s->fifos[i]) == 0) {
  303. s->input_state[i] = INPUT_OFF;
  304. continue;
  305. }
  306. } else if (ret)
  307. return ret;
  308. }
  309. return 0;
  310. }
  311. /**
  312. * Calculates the number of active inputs and determines EOF based on the
  313. * duration option.
  314. *
  315. * @return 0 if mixing should continue, or AVERROR_EOF if mixing should stop.
  316. */
  317. static int calc_active_inputs(MixContext *s)
  318. {
  319. int i;
  320. int active_inputs = 0;
  321. for (i = 0; i < s->nb_inputs; i++)
  322. active_inputs += !!(s->input_state[i] != INPUT_OFF);
  323. s->active_inputs = active_inputs;
  324. if (!active_inputs ||
  325. (s->duration_mode == DURATION_FIRST && s->input_state[0] == INPUT_OFF) ||
  326. (s->duration_mode == DURATION_SHORTEST && active_inputs != s->nb_inputs))
  327. return AVERROR_EOF;
  328. return 0;
  329. }
  330. static int request_frame(AVFilterLink *outlink)
  331. {
  332. AVFilterContext *ctx = outlink->src;
  333. MixContext *s = ctx->priv;
  334. int ret;
  335. int wanted_samples, available_samples;
  336. ret = calc_active_inputs(s);
  337. if (ret < 0)
  338. return ret;
  339. if (s->input_state[0] == INPUT_OFF) {
  340. ret = request_samples(ctx, 1);
  341. if (ret < 0)
  342. return ret;
  343. ret = calc_active_inputs(s);
  344. if (ret < 0)
  345. return ret;
  346. available_samples = get_available_samples(s);
  347. if (!available_samples)
  348. return 0;
  349. return output_frame(outlink, available_samples);
  350. }
  351. if (s->frame_list->nb_frames == 0) {
  352. ret = ff_request_frame(ctx->inputs[0]);
  353. if (ret == AVERROR_EOF) {
  354. s->input_state[0] = INPUT_OFF;
  355. if (s->nb_inputs == 1)
  356. return AVERROR_EOF;
  357. else
  358. return AVERROR(EAGAIN);
  359. } else if (ret)
  360. return ret;
  361. }
  362. av_assert0(s->frame_list->nb_frames > 0);
  363. wanted_samples = frame_list_next_frame_size(s->frame_list);
  364. if (s->active_inputs > 1) {
  365. ret = request_samples(ctx, wanted_samples);
  366. if (ret < 0)
  367. return ret;
  368. ret = calc_active_inputs(s);
  369. if (ret < 0)
  370. return ret;
  371. available_samples = get_available_samples(s);
  372. if (!available_samples)
  373. return 0;
  374. available_samples = FFMIN(available_samples, wanted_samples);
  375. } else {
  376. available_samples = wanted_samples;
  377. }
  378. s->next_pts = frame_list_next_pts(s->frame_list);
  379. frame_list_remove_samples(s->frame_list, available_samples);
  380. return output_frame(outlink, available_samples);
  381. }
  382. static void filter_samples(AVFilterLink *inlink, AVFilterBufferRef *buf)
  383. {
  384. AVFilterContext *ctx = inlink->dst;
  385. MixContext *s = ctx->priv;
  386. AVFilterLink *outlink = ctx->outputs[0];
  387. int i;
  388. for (i = 0; i < ctx->nb_inputs; i++)
  389. if (ctx->inputs[i] == inlink)
  390. break;
  391. if (i >= ctx->nb_inputs) {
  392. av_log(ctx, AV_LOG_ERROR, "unknown input link\n");
  393. return;
  394. }
  395. if (i == 0) {
  396. int64_t pts = av_rescale_q(buf->pts, inlink->time_base,
  397. outlink->time_base);
  398. frame_list_add_frame(s->frame_list, buf->audio->nb_samples, pts);
  399. }
  400. av_audio_fifo_write(s->fifos[i], (void **)buf->extended_data,
  401. buf->audio->nb_samples);
  402. avfilter_unref_buffer(buf);
  403. }
  404. static int init(AVFilterContext *ctx, const char *args)
  405. {
  406. MixContext *s = ctx->priv;
  407. int i, ret;
  408. s->class = &amix_class;
  409. av_opt_set_defaults(s);
  410. if ((ret = av_set_options_string(s, args, "=", ":")) < 0) {
  411. av_log(ctx, AV_LOG_ERROR, "Error parsing options string '%s'.\n", args);
  412. return ret;
  413. }
  414. av_opt_free(s);
  415. for (i = 0; i < s->nb_inputs; i++) {
  416. char name[32];
  417. AVFilterPad pad = { 0 };
  418. snprintf(name, sizeof(name), "input%d", i);
  419. pad.type = AVMEDIA_TYPE_AUDIO;
  420. pad.name = av_strdup(name);
  421. pad.filter_samples = filter_samples;
  422. ff_insert_inpad(ctx, i, &pad);
  423. }
  424. avpriv_float_dsp_init(&s->fdsp, 0);
  425. return 0;
  426. }
  427. static void uninit(AVFilterContext *ctx)
  428. {
  429. int i;
  430. MixContext *s = ctx->priv;
  431. if (s->fifos) {
  432. for (i = 0; i < s->nb_inputs; i++)
  433. av_audio_fifo_free(s->fifos[i]);
  434. av_freep(&s->fifos);
  435. }
  436. frame_list_clear(s->frame_list);
  437. av_freep(&s->frame_list);
  438. av_freep(&s->input_state);
  439. av_freep(&s->input_scale);
  440. for (i = 0; i < ctx->nb_inputs; i++)
  441. av_freep(&ctx->input_pads[i].name);
  442. }
  443. static int query_formats(AVFilterContext *ctx)
  444. {
  445. AVFilterFormats *formats = NULL;
  446. ff_add_format(&formats, AV_SAMPLE_FMT_FLT);
  447. ff_add_format(&formats, AV_SAMPLE_FMT_FLTP);
  448. ff_set_common_formats(ctx, formats);
  449. ff_set_common_channel_layouts(ctx, ff_all_channel_layouts());
  450. ff_set_common_samplerates(ctx, ff_all_samplerates());
  451. return 0;
  452. }
  453. AVFilter avfilter_af_amix = {
  454. .name = "amix",
  455. .description = NULL_IF_CONFIG_SMALL("Audio mixing."),
  456. .priv_size = sizeof(MixContext),
  457. .init = init,
  458. .uninit = uninit,
  459. .query_formats = query_formats,
  460. .inputs = (const AVFilterPad[]) {{ .name = NULL}},
  461. .outputs = (const AVFilterPad[]) {{ .name = "default",
  462. .type = AVMEDIA_TYPE_AUDIO,
  463. .config_props = config_output,
  464. .request_frame = request_frame },
  465. { .name = NULL}},
  466. };