123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119 |
- /*
- * Immersive Audio Model and Formats parsing
- * Copyright (c) 2023 James Almer <jamrial@gmail.com>
- *
- * This file is part of FFmpeg.
- *
- * FFmpeg is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * FFmpeg is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with FFmpeg; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- */
- #include "libavutil/avassert.h"
- #include "libavutil/iamf.h"
- #include "libavutil/intreadwrite.h"
- #include "libavutil/log.h"
- #include "libavutil/mem.h"
- #include "libavcodec/get_bits.h"
- #include "libavcodec/flac.h"
- #include "libavcodec/leb.h"
- #include "libavcodec/mpeg4audio.h"
- #include "libavcodec/put_bits.h"
- #include "avio_internal.h"
- #include "iamf_parse.h"
- #include "isom.h"
- static int opus_decoder_config(IAMFCodecConfig *codec_config,
- AVIOContext *pb, int len)
- {
- int ret, left = len - avio_tell(pb);
- if (left < 11 || codec_config->audio_roll_distance >= 0)
- return AVERROR_INVALIDDATA;
- codec_config->extradata = av_malloc(left + 8);
- if (!codec_config->extradata)
- return AVERROR(ENOMEM);
- AV_WB32A(codec_config->extradata, MKBETAG('O','p','u','s'));
- AV_WB32A(codec_config->extradata + 4, MKBETAG('H','e','a','d'));
- ret = ffio_read_size(pb, codec_config->extradata + 8, left);
- if (ret < 0)
- return ret;
- codec_config->extradata_size = left + 8;
- codec_config->sample_rate = 48000;
- return 0;
- }
- static int aac_decoder_config(IAMFCodecConfig *codec_config,
- AVIOContext *pb, int len, void *logctx)
- {
- MPEG4AudioConfig cfg = { 0 };
- int object_type_id, codec_id, stream_type;
- int ret, tag, left;
- if (codec_config->audio_roll_distance >= 0)
- return AVERROR_INVALIDDATA;
- ff_mp4_read_descr(logctx, pb, &tag);
- if (tag != MP4DecConfigDescrTag)
- return AVERROR_INVALIDDATA;
- object_type_id = avio_r8(pb);
- if (object_type_id != 0x40)
- return AVERROR_INVALIDDATA;
- stream_type = avio_r8(pb);
- if (((stream_type >> 2) != 5) || ((stream_type >> 1) & 1))
- return AVERROR_INVALIDDATA;
- avio_skip(pb, 3); // buffer size db
- avio_skip(pb, 4); // rc_max_rate
- avio_skip(pb, 4); // avg bitrate
- codec_id = ff_codec_get_id(ff_mp4_obj_type, object_type_id);
- if (codec_id && codec_id != codec_config->codec_id)
- return AVERROR_INVALIDDATA;
- left = ff_mp4_read_descr(logctx, pb, &tag);
- if (tag != MP4DecSpecificDescrTag ||
- !left || left > (len - avio_tell(pb)))
- return AVERROR_INVALIDDATA;
- // We pad extradata here because avpriv_mpeg4audio_get_config2() needs it.
- codec_config->extradata = av_malloc((size_t)left + AV_INPUT_BUFFER_PADDING_SIZE);
- if (!codec_config->extradata)
- return AVERROR(ENOMEM);
- ret = ffio_read_size(pb, codec_config->extradata, left);
- if (ret < 0)
- return ret;
- codec_config->extradata_size = left;
- memset(codec_config->extradata + codec_config->extradata_size, 0,
- AV_INPUT_BUFFER_PADDING_SIZE);
- ret = avpriv_mpeg4audio_get_config2(&cfg, codec_config->extradata,
- codec_config->extradata_size, 1, logctx);
- if (ret < 0)
- return ret;
- codec_config->sample_rate = cfg.sample_rate;
- return 0;
- }
- static int flac_decoder_config(IAMFCodecConfig *codec_config,
- AVIOContext *pb, int len)
- {
- int ret, left;
- if (codec_config->audio_roll_distance)
- return AVERROR_INVALIDDATA;
- avio_skip(pb, 4); // METADATA_BLOCK_HEADER
- left = len - avio_tell(pb);
- if (left < FLAC_STREAMINFO_SIZE)
- return AVERROR_INVALIDDATA;
- codec_config->extradata = av_malloc(left);
- if (!codec_config->extradata)
- return AVERROR(ENOMEM);
- ret = ffio_read_size(pb, codec_config->extradata, left);
- if (ret < 0)
- return ret;
- codec_config->extradata_size = left;
- codec_config->sample_rate = AV_RB24(codec_config->extradata + 10) >> 4;
- return 0;
- }
- static int ipcm_decoder_config(IAMFCodecConfig *codec_config,
- AVIOContext *pb, int len)
- {
- static const enum AVCodecID sample_fmt[2][3] = {
- { AV_CODEC_ID_PCM_S16BE, AV_CODEC_ID_PCM_S24BE, AV_CODEC_ID_PCM_S32BE },
- { AV_CODEC_ID_PCM_S16LE, AV_CODEC_ID_PCM_S24LE, AV_CODEC_ID_PCM_S32LE },
- };
- int sample_format = avio_r8(pb); // 0 = BE, 1 = LE
- int sample_size = (avio_r8(pb) / 8 - 2); // 16, 24, 32
- if (sample_format > 1 || sample_size > 2U || codec_config->audio_roll_distance)
- return AVERROR_INVALIDDATA;
- codec_config->codec_id = sample_fmt[sample_format][sample_size];
- codec_config->sample_rate = avio_rb32(pb);
- if (len - avio_tell(pb))
- return AVERROR_INVALIDDATA;
- return 0;
- }
- static int codec_config_obu(void *s, IAMFContext *c, AVIOContext *pb, int len)
- {
- IAMFCodecConfig **tmp, *codec_config = NULL;
- FFIOContext b;
- AVIOContext *pbc;
- uint8_t *buf;
- enum AVCodecID avcodec_id;
- unsigned codec_config_id, nb_samples, codec_id;
- int16_t audio_roll_distance;
- int ret;
- buf = av_malloc(len);
- if (!buf)
- return AVERROR(ENOMEM);
- ret = ffio_read_size(pb, buf, len);
- if (ret < 0)
- goto fail;
- ffio_init_context(&b, buf, len, 0, NULL, NULL, NULL, NULL);
- pbc = &b.pub;
- codec_config_id = ffio_read_leb(pbc);
- codec_id = avio_rb32(pbc);
- nb_samples = ffio_read_leb(pbc);
- audio_roll_distance = avio_rb16(pbc);
- switch(codec_id) {
- case MKBETAG('O','p','u','s'):
- avcodec_id = AV_CODEC_ID_OPUS;
- break;
- case MKBETAG('m','p','4','a'):
- avcodec_id = AV_CODEC_ID_AAC;
- break;
- case MKBETAG('f','L','a','C'):
- avcodec_id = AV_CODEC_ID_FLAC;
- break;
- default:
- avcodec_id = AV_CODEC_ID_NONE;
- break;
- }
- for (int i = 0; i < c->nb_codec_configs; i++)
- if (c->codec_configs[i]->codec_config_id == codec_config_id) {
- ret = AVERROR_INVALIDDATA;
- goto fail;
- }
- tmp = av_realloc_array(c->codec_configs, c->nb_codec_configs + 1, sizeof(*c->codec_configs));
- if (!tmp) {
- ret = AVERROR(ENOMEM);
- goto fail;
- }
- c->codec_configs = tmp;
- codec_config = av_mallocz(sizeof(*codec_config));
- if (!codec_config) {
- ret = AVERROR(ENOMEM);
- goto fail;
- }
- codec_config->codec_config_id = codec_config_id;
- codec_config->codec_id = avcodec_id;
- codec_config->nb_samples = nb_samples;
- codec_config->audio_roll_distance = audio_roll_distance;
- switch(codec_id) {
- case MKBETAG('O','p','u','s'):
- ret = opus_decoder_config(codec_config, pbc, len);
- break;
- case MKBETAG('m','p','4','a'):
- ret = aac_decoder_config(codec_config, pbc, len, s);
- break;
- case MKBETAG('f','L','a','C'):
- ret = flac_decoder_config(codec_config, pbc, len);
- break;
- case MKBETAG('i','p','c','m'):
- ret = ipcm_decoder_config(codec_config, pbc, len);
- break;
- default:
- break;
- }
- if (ret < 0)
- goto fail;
- if ((codec_config->nb_samples > INT_MAX) || codec_config->nb_samples <= 0 ||
- (-codec_config->audio_roll_distance > INT_MAX / codec_config->nb_samples)) {
- ret = AVERROR_INVALIDDATA;
- goto fail;
- }
- c->codec_configs[c->nb_codec_configs++] = codec_config;
- len -= avio_tell(pbc);
- if (len)
- av_log(s, AV_LOG_WARNING, "Underread in codec_config_obu. %d bytes left at the end\n", len);
- ret = 0;
- fail:
- av_free(buf);
- if (ret < 0) {
- if (codec_config)
- av_free(codec_config->extradata);
- av_free(codec_config);
- }
- return ret;
- }
- static int update_extradata(AVCodecParameters *codecpar)
- {
- GetBitContext gb;
- PutBitContext pb;
- int ret;
- switch(codecpar->codec_id) {
- case AV_CODEC_ID_OPUS:
- AV_WB8(codecpar->extradata + 9, codecpar->ch_layout.nb_channels);
- AV_WL16A(codecpar->extradata + 10, AV_RB16A(codecpar->extradata + 10)); // Byte swap pre-skip
- AV_WL32A(codecpar->extradata + 12, AV_RB32A(codecpar->extradata + 12)); // Byte swap sample rate
- AV_WL16A(codecpar->extradata + 16, AV_RB16A(codecpar->extradata + 16)); // Byte swap Output Gain
- break;
- case AV_CODEC_ID_AAC: {
- uint8_t buf[5];
- init_put_bits(&pb, buf, sizeof(buf));
- ret = init_get_bits8(&gb, codecpar->extradata, codecpar->extradata_size);
- if (ret < 0)
- return ret;
- ret = get_bits(&gb, 5);
- put_bits(&pb, 5, ret);
- if (ret == AOT_ESCAPE) // violates section 3.11.2, but better check for it
- put_bits(&pb, 6, get_bits(&gb, 6));
- ret = get_bits(&gb, 4);
- put_bits(&pb, 4, ret);
- if (ret == 0x0f)
- put_bits(&pb, 24, get_bits(&gb, 24));
- skip_bits(&gb, 4);
- put_bits(&pb, 4, codecpar->ch_layout.nb_channels); // set channel config
- ret = put_bits_left(&pb);
- put_bits(&pb, ret, get_bits_long(&gb, ret));
- flush_put_bits(&pb);
- memcpy(codecpar->extradata, buf, put_bytes_output(&pb));
- break;
- }
- case AV_CODEC_ID_FLAC: {
- uint8_t buf[13];
- init_put_bits(&pb, buf, sizeof(buf));
- ret = init_get_bits8(&gb, codecpar->extradata, codecpar->extradata_size);
- if (ret < 0)
- return ret;
- put_bits32(&pb, get_bits_long(&gb, 32)); // min/max blocksize
- put_bits64(&pb, 48, get_bits64(&gb, 48)); // min/max framesize
- put_bits(&pb, 20, get_bits(&gb, 20)); // samplerate
- skip_bits(&gb, 3);
- put_bits(&pb, 3, codecpar->ch_layout.nb_channels - 1);
- ret = put_bits_left(&pb);
- put_bits(&pb, ret, get_bits(&gb, ret));
- flush_put_bits(&pb);
- memcpy(codecpar->extradata, buf, sizeof(buf));
- break;
- }
- }
- return 0;
- }
- static int scalable_channel_layout_config(void *s, AVIOContext *pb,
- IAMFAudioElement *audio_element,
- const IAMFCodecConfig *codec_config)
- {
- int nb_layers, k = 0;
- nb_layers = avio_r8(pb) >> 5; // get_bits(&gb, 3);
- // skip_bits(&gb, 5); //reserved
- if (nb_layers > 6 || nb_layers == 0)
- return AVERROR_INVALIDDATA;
- audio_element->layers = av_calloc(nb_layers, sizeof(*audio_element->layers));
- if (!audio_element->layers)
- return AVERROR(ENOMEM);
- audio_element->nb_layers = nb_layers;
- for (int i = 0; i < nb_layers; i++) {
- AVIAMFLayer *layer;
- int loudspeaker_layout, output_gain_is_present_flag;
- int substream_count, coupled_substream_count;
- int ret, byte = avio_r8(pb);
- layer = av_iamf_audio_element_add_layer(audio_element->element);
- if (!layer)
- return AVERROR(ENOMEM);
- loudspeaker_layout = byte >> 4; // get_bits(&gb, 4);
- output_gain_is_present_flag = (byte >> 3) & 1; //get_bits1(&gb);
- if ((byte >> 2) & 1)
- layer->flags |= AV_IAMF_LAYER_FLAG_RECON_GAIN;
- substream_count = avio_r8(pb);
- coupled_substream_count = avio_r8(pb);
- if (substream_count + k > audio_element->nb_substreams)
- return AVERROR_INVALIDDATA;
- audio_element->layers[i].substream_count = substream_count;
- audio_element->layers[i].coupled_substream_count = coupled_substream_count;
- if (output_gain_is_present_flag) {
- layer->output_gain_flags = avio_r8(pb) >> 2; // get_bits(&gb, 6);
- layer->output_gain = av_make_q(sign_extend(avio_rb16(pb), 16), 1 << 8);
- }
- if (loudspeaker_layout < 10)
- av_channel_layout_copy(&layer->ch_layout, &ff_iamf_scalable_ch_layouts[loudspeaker_layout]);
- else
- layer->ch_layout = (AVChannelLayout){ .order = AV_CHANNEL_ORDER_UNSPEC,
- .nb_channels = substream_count +
- coupled_substream_count };
- for (int j = 0; j < substream_count; j++) {
- IAMFSubStream *substream = &audio_element->substreams[k++];
- substream->codecpar->ch_layout = coupled_substream_count-- > 0 ? (AVChannelLayout)AV_CHANNEL_LAYOUT_STEREO :
- (AVChannelLayout)AV_CHANNEL_LAYOUT_MONO;
- ret = update_extradata(substream->codecpar);
- if (ret < 0)
- return ret;
- }
- }
- return 0;
- }
- static int ambisonics_config(void *s, AVIOContext *pb,
- IAMFAudioElement *audio_element,
- const IAMFCodecConfig *codec_config)
- {
- AVIAMFLayer *layer;
- unsigned ambisonics_mode;
- int output_channel_count, substream_count, order;
- int ret;
- ambisonics_mode = ffio_read_leb(pb);
- if (ambisonics_mode > 1)
- return AVERROR_INVALIDDATA;
- output_channel_count = avio_r8(pb); // C
- substream_count = avio_r8(pb); // N
- if (audio_element->nb_substreams != substream_count)
- return AVERROR_INVALIDDATA;
- order = floor(sqrt(output_channel_count - 1));
- /* incomplete order - some harmonics are missing */
- if ((order + 1) * (order + 1) != output_channel_count)
- return AVERROR_INVALIDDATA;
- audio_element->layers = av_mallocz(sizeof(*audio_element->layers));
- if (!audio_element->layers)
- return AVERROR(ENOMEM);
- audio_element->nb_layers = 1;
- audio_element->layers->substream_count = substream_count;
- layer = av_iamf_audio_element_add_layer(audio_element->element);
- if (!layer)
- return AVERROR(ENOMEM);
- layer->ambisonics_mode = ambisonics_mode;
- if (ambisonics_mode == 0) {
- for (int i = 0; i < substream_count; i++) {
- IAMFSubStream *substream = &audio_element->substreams[i];
- substream->codecpar->ch_layout = (AVChannelLayout)AV_CHANNEL_LAYOUT_MONO;
- ret = update_extradata(substream->codecpar);
- if (ret < 0)
- return ret;
- }
- layer->ch_layout.order = AV_CHANNEL_ORDER_CUSTOM;
- layer->ch_layout.nb_channels = output_channel_count;
- layer->ch_layout.u.map = av_calloc(output_channel_count, sizeof(*layer->ch_layout.u.map));
- if (!layer->ch_layout.u.map)
- return AVERROR(ENOMEM);
- for (int i = 0; i < output_channel_count; i++)
- layer->ch_layout.u.map[i].id = avio_r8(pb) + AV_CHAN_AMBISONIC_BASE;
- } else {
- int coupled_substream_count = avio_r8(pb); // M
- int nb_demixing_matrix = substream_count + coupled_substream_count;
- int demixing_matrix_size = nb_demixing_matrix * output_channel_count;
- audio_element->layers->coupled_substream_count = coupled_substream_count;
- layer->ch_layout = (AVChannelLayout){ .order = AV_CHANNEL_ORDER_AMBISONIC, .nb_channels = output_channel_count };
- layer->demixing_matrix = av_malloc_array(demixing_matrix_size, sizeof(*layer->demixing_matrix));
- if (!layer->demixing_matrix)
- return AVERROR(ENOMEM);
- for (int i = 0; i < demixing_matrix_size; i++)
- layer->demixing_matrix[i] = av_make_q(sign_extend(avio_rb16(pb), 16), 1 << 8);
- for (int i = 0; i < substream_count; i++) {
- IAMFSubStream *substream = &audio_element->substreams[i];
- substream->codecpar->ch_layout = coupled_substream_count-- > 0 ? (AVChannelLayout)AV_CHANNEL_LAYOUT_STEREO :
- (AVChannelLayout)AV_CHANNEL_LAYOUT_MONO;
- ret = update_extradata(substream->codecpar);
- if (ret < 0)
- return ret;
- }
- }
- return 0;
- }
- static int param_parse(void *s, IAMFContext *c, AVIOContext *pb,
- unsigned int type,
- const IAMFAudioElement *audio_element,
- AVIAMFParamDefinition **out_param_definition)
- {
- IAMFParamDefinition *param_definition = NULL;
- AVIAMFParamDefinition *param;
- unsigned int parameter_id, parameter_rate, mode;
- unsigned int duration = 0, constant_subblock_duration = 0, nb_subblocks = 0;
- size_t param_size;
- parameter_id = ffio_read_leb(pb);
- for (int i = 0; i < c->nb_param_definitions; i++)
- if (c->param_definitions[i]->param->parameter_id == parameter_id) {
- param_definition = c->param_definitions[i];
- break;
- }
- parameter_rate = ffio_read_leb(pb);
- mode = avio_r8(pb) >> 7;
- if (mode == 0) {
- duration = ffio_read_leb(pb);
- if (!duration)
- return AVERROR_INVALIDDATA;
- constant_subblock_duration = ffio_read_leb(pb);
- if (constant_subblock_duration == 0)
- nb_subblocks = ffio_read_leb(pb);
- else
- nb_subblocks = duration / constant_subblock_duration;
- }
- param = av_iamf_param_definition_alloc(type, nb_subblocks, ¶m_size);
- if (!param)
- return AVERROR(ENOMEM);
- for (int i = 0; i < nb_subblocks; i++) {
- void *subblock = av_iamf_param_definition_get_subblock(param, i);
- unsigned int subblock_duration = constant_subblock_duration;
- if (constant_subblock_duration == 0)
- subblock_duration = ffio_read_leb(pb);
- switch (type) {
- case AV_IAMF_PARAMETER_DEFINITION_MIX_GAIN: {
- AVIAMFMixGain *mix = subblock;
- mix->subblock_duration = subblock_duration;
- break;
- }
- case AV_IAMF_PARAMETER_DEFINITION_DEMIXING: {
- AVIAMFDemixingInfo *demix = subblock;
- demix->subblock_duration = subblock_duration;
- // DefaultDemixingInfoParameterData
- av_assert0(audio_element);
- demix->dmixp_mode = avio_r8(pb) >> 5;
- audio_element->element->default_w = avio_r8(pb) >> 4;
- break;
- }
- case AV_IAMF_PARAMETER_DEFINITION_RECON_GAIN: {
- AVIAMFReconGain *recon = subblock;
- recon->subblock_duration = subblock_duration;
- break;
- }
- default:
- av_free(param);
- return AVERROR_INVALIDDATA;
- }
- }
- param->parameter_id = parameter_id;
- param->parameter_rate = parameter_rate;
- param->duration = duration;
- param->constant_subblock_duration = constant_subblock_duration;
- param->nb_subblocks = nb_subblocks;
- if (param_definition) {
- if (param_definition->param_size != param_size || memcmp(param_definition->param, param, param_size)) {
- av_log(s, AV_LOG_ERROR, "Incosistent parameters for parameter_id %u\n", parameter_id);
- av_free(param);
- return AVERROR_INVALIDDATA;
- }
- } else {
- IAMFParamDefinition **tmp = av_realloc_array(c->param_definitions, c->nb_param_definitions + 1,
- sizeof(*c->param_definitions));
- if (!tmp) {
- av_free(param);
- return AVERROR(ENOMEM);
- }
- c->param_definitions = tmp;
- param_definition = av_mallocz(sizeof(*param_definition));
- if (!param_definition) {
- av_free(param);
- return AVERROR(ENOMEM);
- }
- param_definition->param = param;
- param_definition->mode = !mode;
- param_definition->param_size = param_size;
- param_definition->audio_element = audio_element;
- c->param_definitions[c->nb_param_definitions++] = param_definition;
- }
- av_assert0(out_param_definition);
- *out_param_definition = param;
- return 0;
- }
- static int audio_element_obu(void *s, IAMFContext *c, AVIOContext *pb, int len)
- {
- const IAMFCodecConfig *codec_config;
- AVIAMFAudioElement *element;
- IAMFAudioElement **tmp, *audio_element = NULL;
- FFIOContext b;
- AVIOContext *pbc;
- uint8_t *buf;
- unsigned audio_element_id, nb_substreams, codec_config_id, num_parameters;
- int audio_element_type, ret;
- buf = av_malloc(len);
- if (!buf)
- return AVERROR(ENOMEM);
- ret = ffio_read_size(pb, buf, len);
- if (ret < 0)
- goto fail;
- ffio_init_context(&b, buf, len, 0, NULL, NULL, NULL, NULL);
- pbc = &b.pub;
- audio_element_id = ffio_read_leb(pbc);
- for (int i = 0; i < c->nb_audio_elements; i++)
- if (c->audio_elements[i]->audio_element_id == audio_element_id) {
- av_log(s, AV_LOG_ERROR, "Duplicate audio_element_id %d\n", audio_element_id);
- ret = AVERROR_INVALIDDATA;
- goto fail;
- }
- audio_element_type = avio_r8(pbc) >> 5;
- if (audio_element_type > AV_IAMF_AUDIO_ELEMENT_TYPE_SCENE) {
- av_log(s, AV_LOG_DEBUG, "Unknown audio_element_type referenced in an audio element. Ignoring\n");
- ret = 0;
- goto fail;
- }
- codec_config_id = ffio_read_leb(pbc);
- codec_config = ff_iamf_get_codec_config(c, codec_config_id);
- if (!codec_config) {
- av_log(s, AV_LOG_ERROR, "Non existant codec config id %d referenced in an audio element\n", codec_config_id);
- ret = AVERROR_INVALIDDATA;
- goto fail;
- }
- if (codec_config->codec_id == AV_CODEC_ID_NONE) {
- av_log(s, AV_LOG_DEBUG, "Unknown codec id referenced in an audio element. Ignoring\n");
- ret = 0;
- goto fail;
- }
- tmp = av_realloc_array(c->audio_elements, c->nb_audio_elements + 1, sizeof(*c->audio_elements));
- if (!tmp) {
- ret = AVERROR(ENOMEM);
- goto fail;
- }
- c->audio_elements = tmp;
- audio_element = av_mallocz(sizeof(*audio_element));
- if (!audio_element) {
- ret = AVERROR(ENOMEM);
- goto fail;
- }
- nb_substreams = ffio_read_leb(pbc);
- audio_element->codec_config_id = codec_config_id;
- audio_element->audio_element_id = audio_element_id;
- audio_element->substreams = av_calloc(nb_substreams, sizeof(*audio_element->substreams));
- if (!audio_element->substreams) {
- ret = AVERROR(ENOMEM);
- goto fail;
- }
- audio_element->nb_substreams = nb_substreams;
- element = audio_element->element = av_iamf_audio_element_alloc();
- if (!element) {
- ret = AVERROR(ENOMEM);
- goto fail;
- }
- audio_element->celement = element;
- element->audio_element_type = audio_element_type;
- for (int i = 0; i < audio_element->nb_substreams; i++) {
- IAMFSubStream *substream = &audio_element->substreams[i];
- substream->codecpar = avcodec_parameters_alloc();
- if (!substream->codecpar) {
- ret = AVERROR(ENOMEM);
- goto fail;
- }
- substream->audio_substream_id = ffio_read_leb(pbc);
- substream->codecpar->codec_type = AVMEDIA_TYPE_AUDIO;
- substream->codecpar->codec_id = codec_config->codec_id;
- substream->codecpar->frame_size = codec_config->nb_samples;
- substream->codecpar->sample_rate = codec_config->sample_rate;
- substream->codecpar->seek_preroll = -codec_config->audio_roll_distance * codec_config->nb_samples;
- switch(substream->codecpar->codec_id) {
- case AV_CODEC_ID_AAC:
- case AV_CODEC_ID_FLAC:
- case AV_CODEC_ID_OPUS:
- substream->codecpar->extradata = av_malloc(codec_config->extradata_size + AV_INPUT_BUFFER_PADDING_SIZE);
- if (!substream->codecpar->extradata) {
- ret = AVERROR(ENOMEM);
- goto fail;
- }
- memcpy(substream->codecpar->extradata, codec_config->extradata, codec_config->extradata_size);
- memset(substream->codecpar->extradata + codec_config->extradata_size, 0, AV_INPUT_BUFFER_PADDING_SIZE);
- substream->codecpar->extradata_size = codec_config->extradata_size;
- break;
- }
- }
- num_parameters = ffio_read_leb(pbc);
- if (num_parameters && audio_element_type != 0) {
- av_log(s, AV_LOG_ERROR, "Audio Element parameter count %u is invalid"
- " for Scene representations\n", num_parameters);
- ret = AVERROR_INVALIDDATA;
- goto fail;
- }
- for (int i = 0; i < num_parameters; i++) {
- unsigned type;
- type = ffio_read_leb(pbc);
- if (type == AV_IAMF_PARAMETER_DEFINITION_MIX_GAIN)
- ret = AVERROR_INVALIDDATA;
- else if (type == AV_IAMF_PARAMETER_DEFINITION_DEMIXING)
- ret = param_parse(s, c, pbc, type, audio_element, &element->demixing_info);
- else if (type == AV_IAMF_PARAMETER_DEFINITION_RECON_GAIN)
- ret = param_parse(s, c, pbc, type, audio_element, &element->recon_gain_info);
- else {
- unsigned param_definition_size = ffio_read_leb(pbc);
- avio_skip(pbc, param_definition_size);
- }
- if (ret < 0)
- goto fail;
- }
- if (audio_element_type == AV_IAMF_AUDIO_ELEMENT_TYPE_CHANNEL) {
- ret = scalable_channel_layout_config(s, pbc, audio_element, codec_config);
- if (ret < 0)
- goto fail;
- } else if (audio_element_type == AV_IAMF_AUDIO_ELEMENT_TYPE_SCENE) {
- ret = ambisonics_config(s, pbc, audio_element, codec_config);
- if (ret < 0)
- goto fail;
- } else {
- av_assert0(0);
- }
- c->audio_elements[c->nb_audio_elements++] = audio_element;
- len -= avio_tell(pbc);
- if (len)
- av_log(s, AV_LOG_WARNING, "Underread in audio_element_obu. %d bytes left at the end\n", len);
- ret = 0;
- fail:
- av_free(buf);
- if (ret < 0)
- ff_iamf_free_audio_element(&audio_element);
- return ret;
- }
- static int label_string(AVIOContext *pb, char **label)
- {
- uint8_t buf[128];
- avio_get_str(pb, sizeof(buf), buf, sizeof(buf));
- if (pb->error)
- return pb->error;
- if (pb->eof_reached)
- return AVERROR_INVALIDDATA;
- *label = av_strdup(buf);
- if (!*label)
- return AVERROR(ENOMEM);
- return 0;
- }
- static int mix_presentation_obu(void *s, IAMFContext *c, AVIOContext *pb, int len)
- {
- AVIAMFMixPresentation *mix;
- IAMFMixPresentation **tmp, *mix_presentation = NULL;
- FFIOContext b;
- AVIOContext *pbc;
- uint8_t *buf;
- unsigned nb_submixes, mix_presentation_id;
- int ret;
- buf = av_malloc(len);
- if (!buf)
- return AVERROR(ENOMEM);
- ret = ffio_read_size(pb, buf, len);
- if (ret < 0)
- goto fail;
- ffio_init_context(&b, buf, len, 0, NULL, NULL, NULL, NULL);
- pbc = &b.pub;
- mix_presentation_id = ffio_read_leb(pbc);
- for (int i = 0; i < c->nb_mix_presentations; i++)
- if (c->mix_presentations[i]->mix_presentation_id == mix_presentation_id) {
- av_log(s, AV_LOG_ERROR, "Duplicate mix_presentation_id %d\n", mix_presentation_id);
- ret = AVERROR_INVALIDDATA;
- goto fail;
- }
- tmp = av_realloc_array(c->mix_presentations, c->nb_mix_presentations + 1, sizeof(*c->mix_presentations));
- if (!tmp) {
- ret = AVERROR(ENOMEM);
- goto fail;
- }
- c->mix_presentations = tmp;
- mix_presentation = av_mallocz(sizeof(*mix_presentation));
- if (!mix_presentation) {
- ret = AVERROR(ENOMEM);
- goto fail;
- }
- mix_presentation->mix_presentation_id = mix_presentation_id;
- mix = mix_presentation->mix = av_iamf_mix_presentation_alloc();
- if (!mix) {
- ret = AVERROR(ENOMEM);
- goto fail;
- }
- mix_presentation->cmix = mix;
- mix_presentation->count_label = ffio_read_leb(pbc);
- mix_presentation->language_label = av_calloc(mix_presentation->count_label,
- sizeof(*mix_presentation->language_label));
- if (!mix_presentation->language_label) {
- mix_presentation->count_label = 0;
- ret = AVERROR(ENOMEM);
- goto fail;
- }
- for (int i = 0; i < mix_presentation->count_label; i++) {
- ret = label_string(pbc, &mix_presentation->language_label[i]);
- if (ret < 0)
- goto fail;
- }
- for (int i = 0; i < mix_presentation->count_label; i++) {
- char *annotation = NULL;
- ret = label_string(pbc, &annotation);
- if (ret < 0)
- goto fail;
- ret = av_dict_set(&mix->annotations, mix_presentation->language_label[i], annotation,
- AV_DICT_DONT_STRDUP_VAL | AV_DICT_DONT_OVERWRITE);
- if (ret < 0)
- goto fail;
- }
- nb_submixes = ffio_read_leb(pbc);
- for (int i = 0; i < nb_submixes; i++) {
- AVIAMFSubmix *sub_mix;
- unsigned nb_elements, nb_layouts;
- sub_mix = av_iamf_mix_presentation_add_submix(mix);
- if (!sub_mix) {
- ret = AVERROR(ENOMEM);
- goto fail;
- }
- nb_elements = ffio_read_leb(pbc);
- for (int j = 0; j < nb_elements; j++) {
- AVIAMFSubmixElement *submix_element;
- IAMFAudioElement *audio_element = NULL;
- unsigned int rendering_config_extension_size;
- submix_element = av_iamf_submix_add_element(sub_mix);
- if (!submix_element) {
- ret = AVERROR(ENOMEM);
- goto fail;
- }
- submix_element->audio_element_id = ffio_read_leb(pbc);
- for (int k = 0; k < c->nb_audio_elements; k++)
- if (c->audio_elements[k]->audio_element_id == submix_element->audio_element_id) {
- audio_element = c->audio_elements[k];
- break;
- }
- if (!audio_element) {
- av_log(s, AV_LOG_ERROR, "Invalid Audio Element with id %u referenced by Mix Parameters %u\n",
- submix_element->audio_element_id, mix_presentation_id);
- ret = AVERROR_INVALIDDATA;
- goto fail;
- }
- for (int k = 0; k < mix_presentation->count_label; k++) {
- char *annotation = NULL;
- ret = label_string(pbc, &annotation);
- if (ret < 0)
- goto fail;
- ret = av_dict_set(&submix_element->annotations, mix_presentation->language_label[k], annotation,
- AV_DICT_DONT_STRDUP_VAL | AV_DICT_DONT_OVERWRITE);
- if (ret < 0)
- goto fail;
- }
- submix_element->headphones_rendering_mode = avio_r8(pbc) >> 6;
- rendering_config_extension_size = ffio_read_leb(pbc);
- avio_skip(pbc, rendering_config_extension_size);
- ret = param_parse(s, c, pbc, AV_IAMF_PARAMETER_DEFINITION_MIX_GAIN,
- NULL,
- &submix_element->element_mix_config);
- if (ret < 0)
- goto fail;
- submix_element->default_mix_gain = av_make_q(sign_extend(avio_rb16(pbc), 16), 1 << 8);
- }
- ret = param_parse(s, c, pbc, AV_IAMF_PARAMETER_DEFINITION_MIX_GAIN, NULL, &sub_mix->output_mix_config);
- if (ret < 0)
- goto fail;
- sub_mix->default_mix_gain = av_make_q(sign_extend(avio_rb16(pbc), 16), 1 << 8);
- nb_layouts = ffio_read_leb(pbc);
- for (int j = 0; j < nb_layouts; j++) {
- AVIAMFSubmixLayout *submix_layout;
- int info_type;
- int byte = avio_r8(pbc);
- submix_layout = av_iamf_submix_add_layout(sub_mix);
- if (!submix_layout) {
- ret = AVERROR(ENOMEM);
- goto fail;
- }
- submix_layout->layout_type = byte >> 6;
- if (submix_layout->layout_type < AV_IAMF_SUBMIX_LAYOUT_TYPE_LOUDSPEAKERS ||
- submix_layout->layout_type > AV_IAMF_SUBMIX_LAYOUT_TYPE_BINAURAL) {
- av_log(s, AV_LOG_ERROR, "Invalid Layout type %u in a submix from Mix Presentation %u\n",
- submix_layout->layout_type, mix_presentation_id);
- ret = AVERROR_INVALIDDATA;
- goto fail;
- }
- if (submix_layout->layout_type == 2) {
- int sound_system;
- sound_system = (byte >> 2) & 0xF;
- if (sound_system >= FF_ARRAY_ELEMS(ff_iamf_sound_system_map)) {
- ret = AVERROR_INVALIDDATA;
- goto fail;
- }
- av_channel_layout_copy(&submix_layout->sound_system, &ff_iamf_sound_system_map[sound_system].layout);
- } else
- submix_layout->sound_system = (AVChannelLayout)AV_CHANNEL_LAYOUT_BINAURAL;
- info_type = avio_r8(pbc);
- submix_layout->integrated_loudness = av_make_q(sign_extend(avio_rb16(pbc), 16), 1 << 8);
- submix_layout->digital_peak = av_make_q(sign_extend(avio_rb16(pbc), 16), 1 << 8);
- if (info_type & 1)
- submix_layout->true_peak = av_make_q(sign_extend(avio_rb16(pbc), 16), 1 << 8);
- if (info_type & 2) {
- unsigned int num_anchored_loudness = avio_r8(pbc);
- for (int k = 0; k < num_anchored_loudness; k++) {
- unsigned int anchor_element = avio_r8(pbc);
- AVRational anchored_loudness = av_make_q(sign_extend(avio_rb16(pbc), 16), 1 << 8);
- if (anchor_element == IAMF_ANCHOR_ELEMENT_DIALOGUE)
- submix_layout->dialogue_anchored_loudness = anchored_loudness;
- else if (anchor_element <= IAMF_ANCHOR_ELEMENT_ALBUM)
- submix_layout->album_anchored_loudness = anchored_loudness;
- else
- av_log(s, AV_LOG_DEBUG, "Unknown anchor_element. Ignoring\n");
- }
- }
- if (info_type & 0xFC) {
- unsigned int info_type_size = ffio_read_leb(pbc);
- avio_skip(pbc, info_type_size);
- }
- }
- }
- c->mix_presentations[c->nb_mix_presentations++] = mix_presentation;
- len -= avio_tell(pbc);
- if (len)
- av_log(s, AV_LOG_WARNING, "Underread in mix_presentation_obu. %d bytes left at the end\n", len);
- ret = 0;
- fail:
- av_free(buf);
- if (ret < 0)
- ff_iamf_free_mix_presentation(&mix_presentation);
- return ret;
- }
- int ff_iamf_parse_obu_header(const uint8_t *buf, int buf_size,
- unsigned *obu_size, int *start_pos, enum IAMF_OBU_Type *type,
- unsigned *skip_samples, unsigned *discard_padding)
- {
- GetBitContext gb;
- int ret, extension_flag, trimming, start;
- unsigned skip = 0, discard = 0;
- unsigned size;
- ret = init_get_bits8(&gb, buf, FFMIN(buf_size, MAX_IAMF_OBU_HEADER_SIZE));
- if (ret < 0)
- return ret;
- *type = get_bits(&gb, 5);
- /*redundant =*/ get_bits1(&gb);
- trimming = get_bits1(&gb);
- extension_flag = get_bits1(&gb);
- *obu_size = get_leb(&gb);
- if (*obu_size > INT_MAX)
- return AVERROR_INVALIDDATA;
- start = get_bits_count(&gb) / 8;
- if (trimming) {
- discard = get_leb(&gb); // num_samples_to_trim_at_end
- skip = get_leb(&gb); // num_samples_to_trim_at_start
- }
- if (skip_samples)
- *skip_samples = skip;
- if (discard_padding)
- *discard_padding = discard;
- if (extension_flag) {
- unsigned int extension_bytes;
- extension_bytes = get_leb(&gb);
- if (extension_bytes > INT_MAX / 8)
- return AVERROR_INVALIDDATA;
- skip_bits_long(&gb, extension_bytes * 8);
- }
- if (get_bits_left(&gb) < 0)
- return AVERROR_INVALIDDATA;
- size = *obu_size + start;
- if (size > INT_MAX)
- return AVERROR_INVALIDDATA;
- *obu_size -= get_bits_count(&gb) / 8 - start;
- *start_pos = size - *obu_size;
- return size;
- }
- int ff_iamfdec_read_descriptors(IAMFContext *c, AVIOContext *pb,
- int max_size, void *log_ctx)
- {
- uint8_t header[MAX_IAMF_OBU_HEADER_SIZE + AV_INPUT_BUFFER_PADDING_SIZE];
- int ret;
- while (1) {
- unsigned obu_size;
- enum IAMF_OBU_Type type;
- int start_pos, len, size;
- if ((ret = ffio_ensure_seekback(pb, FFMIN(MAX_IAMF_OBU_HEADER_SIZE, max_size))) < 0)
- return ret;
- size = avio_read(pb, header, FFMIN(MAX_IAMF_OBU_HEADER_SIZE, max_size));
- if (size < 0)
- return size;
- memset(header + size, 0, AV_INPUT_BUFFER_PADDING_SIZE);
- len = ff_iamf_parse_obu_header(header, size, &obu_size, &start_pos, &type, NULL, NULL);
- if (len < 0 || obu_size > max_size) {
- av_log(log_ctx, AV_LOG_ERROR, "Failed to read obu header\n");
- avio_seek(pb, -size, SEEK_CUR);
- return len;
- }
- if (type >= IAMF_OBU_IA_PARAMETER_BLOCK && type < IAMF_OBU_IA_SEQUENCE_HEADER) {
- avio_seek(pb, -size, SEEK_CUR);
- break;
- }
- avio_seek(pb, -(size - start_pos), SEEK_CUR);
- switch (type) {
- case IAMF_OBU_IA_CODEC_CONFIG:
- ret = codec_config_obu(log_ctx, c, pb, obu_size);
- break;
- case IAMF_OBU_IA_AUDIO_ELEMENT:
- ret = audio_element_obu(log_ctx, c, pb, obu_size);
- break;
- case IAMF_OBU_IA_MIX_PRESENTATION:
- ret = mix_presentation_obu(log_ctx, c, pb, obu_size);
- break;
- default: {
- int64_t offset = avio_skip(pb, obu_size);
- if (offset < 0)
- ret = offset;
- break;
- }
- }
- if (ret < 0) {
- av_log(log_ctx, AV_LOG_ERROR, "Failed to read obu type %d\n", type);
- return ret;
- }
- max_size -= obu_size + start_pos;
- if (max_size < 0)
- return AVERROR_INVALIDDATA;
- if (!max_size)
- break;
- }
- return 0;
- }
|