iamf_parse.c 37 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119
  1. /*
  2. * Immersive Audio Model and Formats parsing
  3. * Copyright (c) 2023 James Almer <jamrial@gmail.com>
  4. *
  5. * This file is part of FFmpeg.
  6. *
  7. * FFmpeg is free software; you can redistribute it and/or
  8. * modify it under the terms of the GNU Lesser General Public
  9. * License as published by the Free Software Foundation; either
  10. * version 2.1 of the License, or (at your option) any later version.
  11. *
  12. * FFmpeg is distributed in the hope that it will be useful,
  13. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  15. * Lesser General Public License for more details.
  16. *
  17. * You should have received a copy of the GNU Lesser General Public
  18. * License along with FFmpeg; if not, write to the Free Software
  19. * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  20. */
  21. #include "libavutil/avassert.h"
  22. #include "libavutil/iamf.h"
  23. #include "libavutil/intreadwrite.h"
  24. #include "libavutil/log.h"
  25. #include "libavutil/mem.h"
  26. #include "libavcodec/get_bits.h"
  27. #include "libavcodec/flac.h"
  28. #include "libavcodec/leb.h"
  29. #include "libavcodec/mpeg4audio.h"
  30. #include "libavcodec/put_bits.h"
  31. #include "avio_internal.h"
  32. #include "iamf_parse.h"
  33. #include "isom.h"
  34. static int opus_decoder_config(IAMFCodecConfig *codec_config,
  35. AVIOContext *pb, int len)
  36. {
  37. int ret, left = len - avio_tell(pb);
  38. if (left < 11 || codec_config->audio_roll_distance >= 0)
  39. return AVERROR_INVALIDDATA;
  40. codec_config->extradata = av_malloc(left + 8);
  41. if (!codec_config->extradata)
  42. return AVERROR(ENOMEM);
  43. AV_WB32A(codec_config->extradata, MKBETAG('O','p','u','s'));
  44. AV_WB32A(codec_config->extradata + 4, MKBETAG('H','e','a','d'));
  45. ret = ffio_read_size(pb, codec_config->extradata + 8, left);
  46. if (ret < 0)
  47. return ret;
  48. codec_config->extradata_size = left + 8;
  49. codec_config->sample_rate = 48000;
  50. return 0;
  51. }
  52. static int aac_decoder_config(IAMFCodecConfig *codec_config,
  53. AVIOContext *pb, int len, void *logctx)
  54. {
  55. MPEG4AudioConfig cfg = { 0 };
  56. int object_type_id, codec_id, stream_type;
  57. int ret, tag, left;
  58. if (codec_config->audio_roll_distance >= 0)
  59. return AVERROR_INVALIDDATA;
  60. ff_mp4_read_descr(logctx, pb, &tag);
  61. if (tag != MP4DecConfigDescrTag)
  62. return AVERROR_INVALIDDATA;
  63. object_type_id = avio_r8(pb);
  64. if (object_type_id != 0x40)
  65. return AVERROR_INVALIDDATA;
  66. stream_type = avio_r8(pb);
  67. if (((stream_type >> 2) != 5) || ((stream_type >> 1) & 1))
  68. return AVERROR_INVALIDDATA;
  69. avio_skip(pb, 3); // buffer size db
  70. avio_skip(pb, 4); // rc_max_rate
  71. avio_skip(pb, 4); // avg bitrate
  72. codec_id = ff_codec_get_id(ff_mp4_obj_type, object_type_id);
  73. if (codec_id && codec_id != codec_config->codec_id)
  74. return AVERROR_INVALIDDATA;
  75. left = ff_mp4_read_descr(logctx, pb, &tag);
  76. if (tag != MP4DecSpecificDescrTag ||
  77. !left || left > (len - avio_tell(pb)))
  78. return AVERROR_INVALIDDATA;
  79. // We pad extradata here because avpriv_mpeg4audio_get_config2() needs it.
  80. codec_config->extradata = av_malloc((size_t)left + AV_INPUT_BUFFER_PADDING_SIZE);
  81. if (!codec_config->extradata)
  82. return AVERROR(ENOMEM);
  83. ret = ffio_read_size(pb, codec_config->extradata, left);
  84. if (ret < 0)
  85. return ret;
  86. codec_config->extradata_size = left;
  87. memset(codec_config->extradata + codec_config->extradata_size, 0,
  88. AV_INPUT_BUFFER_PADDING_SIZE);
  89. ret = avpriv_mpeg4audio_get_config2(&cfg, codec_config->extradata,
  90. codec_config->extradata_size, 1, logctx);
  91. if (ret < 0)
  92. return ret;
  93. codec_config->sample_rate = cfg.sample_rate;
  94. return 0;
  95. }
  96. static int flac_decoder_config(IAMFCodecConfig *codec_config,
  97. AVIOContext *pb, int len)
  98. {
  99. int ret, left;
  100. if (codec_config->audio_roll_distance)
  101. return AVERROR_INVALIDDATA;
  102. avio_skip(pb, 4); // METADATA_BLOCK_HEADER
  103. left = len - avio_tell(pb);
  104. if (left < FLAC_STREAMINFO_SIZE)
  105. return AVERROR_INVALIDDATA;
  106. codec_config->extradata = av_malloc(left);
  107. if (!codec_config->extradata)
  108. return AVERROR(ENOMEM);
  109. ret = ffio_read_size(pb, codec_config->extradata, left);
  110. if (ret < 0)
  111. return ret;
  112. codec_config->extradata_size = left;
  113. codec_config->sample_rate = AV_RB24(codec_config->extradata + 10) >> 4;
  114. return 0;
  115. }
  116. static int ipcm_decoder_config(IAMFCodecConfig *codec_config,
  117. AVIOContext *pb, int len)
  118. {
  119. static const enum AVCodecID sample_fmt[2][3] = {
  120. { AV_CODEC_ID_PCM_S16BE, AV_CODEC_ID_PCM_S24BE, AV_CODEC_ID_PCM_S32BE },
  121. { AV_CODEC_ID_PCM_S16LE, AV_CODEC_ID_PCM_S24LE, AV_CODEC_ID_PCM_S32LE },
  122. };
  123. int sample_format = avio_r8(pb); // 0 = BE, 1 = LE
  124. int sample_size = (avio_r8(pb) / 8 - 2); // 16, 24, 32
  125. if (sample_format > 1 || sample_size > 2U || codec_config->audio_roll_distance)
  126. return AVERROR_INVALIDDATA;
  127. codec_config->codec_id = sample_fmt[sample_format][sample_size];
  128. codec_config->sample_rate = avio_rb32(pb);
  129. if (len - avio_tell(pb))
  130. return AVERROR_INVALIDDATA;
  131. return 0;
  132. }
  133. static int codec_config_obu(void *s, IAMFContext *c, AVIOContext *pb, int len)
  134. {
  135. IAMFCodecConfig **tmp, *codec_config = NULL;
  136. FFIOContext b;
  137. AVIOContext *pbc;
  138. uint8_t *buf;
  139. enum AVCodecID avcodec_id;
  140. unsigned codec_config_id, nb_samples, codec_id;
  141. int16_t audio_roll_distance;
  142. int ret;
  143. buf = av_malloc(len);
  144. if (!buf)
  145. return AVERROR(ENOMEM);
  146. ret = ffio_read_size(pb, buf, len);
  147. if (ret < 0)
  148. goto fail;
  149. ffio_init_context(&b, buf, len, 0, NULL, NULL, NULL, NULL);
  150. pbc = &b.pub;
  151. codec_config_id = ffio_read_leb(pbc);
  152. codec_id = avio_rb32(pbc);
  153. nb_samples = ffio_read_leb(pbc);
  154. audio_roll_distance = avio_rb16(pbc);
  155. switch(codec_id) {
  156. case MKBETAG('O','p','u','s'):
  157. avcodec_id = AV_CODEC_ID_OPUS;
  158. break;
  159. case MKBETAG('m','p','4','a'):
  160. avcodec_id = AV_CODEC_ID_AAC;
  161. break;
  162. case MKBETAG('f','L','a','C'):
  163. avcodec_id = AV_CODEC_ID_FLAC;
  164. break;
  165. default:
  166. avcodec_id = AV_CODEC_ID_NONE;
  167. break;
  168. }
  169. for (int i = 0; i < c->nb_codec_configs; i++)
  170. if (c->codec_configs[i]->codec_config_id == codec_config_id) {
  171. ret = AVERROR_INVALIDDATA;
  172. goto fail;
  173. }
  174. tmp = av_realloc_array(c->codec_configs, c->nb_codec_configs + 1, sizeof(*c->codec_configs));
  175. if (!tmp) {
  176. ret = AVERROR(ENOMEM);
  177. goto fail;
  178. }
  179. c->codec_configs = tmp;
  180. codec_config = av_mallocz(sizeof(*codec_config));
  181. if (!codec_config) {
  182. ret = AVERROR(ENOMEM);
  183. goto fail;
  184. }
  185. codec_config->codec_config_id = codec_config_id;
  186. codec_config->codec_id = avcodec_id;
  187. codec_config->nb_samples = nb_samples;
  188. codec_config->audio_roll_distance = audio_roll_distance;
  189. switch(codec_id) {
  190. case MKBETAG('O','p','u','s'):
  191. ret = opus_decoder_config(codec_config, pbc, len);
  192. break;
  193. case MKBETAG('m','p','4','a'):
  194. ret = aac_decoder_config(codec_config, pbc, len, s);
  195. break;
  196. case MKBETAG('f','L','a','C'):
  197. ret = flac_decoder_config(codec_config, pbc, len);
  198. break;
  199. case MKBETAG('i','p','c','m'):
  200. ret = ipcm_decoder_config(codec_config, pbc, len);
  201. break;
  202. default:
  203. break;
  204. }
  205. if (ret < 0)
  206. goto fail;
  207. if ((codec_config->nb_samples > INT_MAX) || codec_config->nb_samples <= 0 ||
  208. (-codec_config->audio_roll_distance > INT_MAX / codec_config->nb_samples)) {
  209. ret = AVERROR_INVALIDDATA;
  210. goto fail;
  211. }
  212. c->codec_configs[c->nb_codec_configs++] = codec_config;
  213. len -= avio_tell(pbc);
  214. if (len)
  215. av_log(s, AV_LOG_WARNING, "Underread in codec_config_obu. %d bytes left at the end\n", len);
  216. ret = 0;
  217. fail:
  218. av_free(buf);
  219. if (ret < 0) {
  220. if (codec_config)
  221. av_free(codec_config->extradata);
  222. av_free(codec_config);
  223. }
  224. return ret;
  225. }
  226. static int update_extradata(AVCodecParameters *codecpar)
  227. {
  228. GetBitContext gb;
  229. PutBitContext pb;
  230. int ret;
  231. switch(codecpar->codec_id) {
  232. case AV_CODEC_ID_OPUS:
  233. AV_WB8(codecpar->extradata + 9, codecpar->ch_layout.nb_channels);
  234. AV_WL16A(codecpar->extradata + 10, AV_RB16A(codecpar->extradata + 10)); // Byte swap pre-skip
  235. AV_WL32A(codecpar->extradata + 12, AV_RB32A(codecpar->extradata + 12)); // Byte swap sample rate
  236. AV_WL16A(codecpar->extradata + 16, AV_RB16A(codecpar->extradata + 16)); // Byte swap Output Gain
  237. break;
  238. case AV_CODEC_ID_AAC: {
  239. uint8_t buf[5];
  240. init_put_bits(&pb, buf, sizeof(buf));
  241. ret = init_get_bits8(&gb, codecpar->extradata, codecpar->extradata_size);
  242. if (ret < 0)
  243. return ret;
  244. ret = get_bits(&gb, 5);
  245. put_bits(&pb, 5, ret);
  246. if (ret == AOT_ESCAPE) // violates section 3.11.2, but better check for it
  247. put_bits(&pb, 6, get_bits(&gb, 6));
  248. ret = get_bits(&gb, 4);
  249. put_bits(&pb, 4, ret);
  250. if (ret == 0x0f)
  251. put_bits(&pb, 24, get_bits(&gb, 24));
  252. skip_bits(&gb, 4);
  253. put_bits(&pb, 4, codecpar->ch_layout.nb_channels); // set channel config
  254. ret = put_bits_left(&pb);
  255. put_bits(&pb, ret, get_bits_long(&gb, ret));
  256. flush_put_bits(&pb);
  257. memcpy(codecpar->extradata, buf, put_bytes_output(&pb));
  258. break;
  259. }
  260. case AV_CODEC_ID_FLAC: {
  261. uint8_t buf[13];
  262. init_put_bits(&pb, buf, sizeof(buf));
  263. ret = init_get_bits8(&gb, codecpar->extradata, codecpar->extradata_size);
  264. if (ret < 0)
  265. return ret;
  266. put_bits32(&pb, get_bits_long(&gb, 32)); // min/max blocksize
  267. put_bits64(&pb, 48, get_bits64(&gb, 48)); // min/max framesize
  268. put_bits(&pb, 20, get_bits(&gb, 20)); // samplerate
  269. skip_bits(&gb, 3);
  270. put_bits(&pb, 3, codecpar->ch_layout.nb_channels - 1);
  271. ret = put_bits_left(&pb);
  272. put_bits(&pb, ret, get_bits(&gb, ret));
  273. flush_put_bits(&pb);
  274. memcpy(codecpar->extradata, buf, sizeof(buf));
  275. break;
  276. }
  277. }
  278. return 0;
  279. }
  280. static int scalable_channel_layout_config(void *s, AVIOContext *pb,
  281. IAMFAudioElement *audio_element,
  282. const IAMFCodecConfig *codec_config)
  283. {
  284. int nb_layers, k = 0;
  285. nb_layers = avio_r8(pb) >> 5; // get_bits(&gb, 3);
  286. // skip_bits(&gb, 5); //reserved
  287. if (nb_layers > 6 || nb_layers == 0)
  288. return AVERROR_INVALIDDATA;
  289. audio_element->layers = av_calloc(nb_layers, sizeof(*audio_element->layers));
  290. if (!audio_element->layers)
  291. return AVERROR(ENOMEM);
  292. audio_element->nb_layers = nb_layers;
  293. for (int i = 0; i < nb_layers; i++) {
  294. AVIAMFLayer *layer;
  295. int loudspeaker_layout, output_gain_is_present_flag;
  296. int substream_count, coupled_substream_count;
  297. int ret, byte = avio_r8(pb);
  298. layer = av_iamf_audio_element_add_layer(audio_element->element);
  299. if (!layer)
  300. return AVERROR(ENOMEM);
  301. loudspeaker_layout = byte >> 4; // get_bits(&gb, 4);
  302. output_gain_is_present_flag = (byte >> 3) & 1; //get_bits1(&gb);
  303. if ((byte >> 2) & 1)
  304. layer->flags |= AV_IAMF_LAYER_FLAG_RECON_GAIN;
  305. substream_count = avio_r8(pb);
  306. coupled_substream_count = avio_r8(pb);
  307. if (substream_count + k > audio_element->nb_substreams)
  308. return AVERROR_INVALIDDATA;
  309. audio_element->layers[i].substream_count = substream_count;
  310. audio_element->layers[i].coupled_substream_count = coupled_substream_count;
  311. if (output_gain_is_present_flag) {
  312. layer->output_gain_flags = avio_r8(pb) >> 2; // get_bits(&gb, 6);
  313. layer->output_gain = av_make_q(sign_extend(avio_rb16(pb), 16), 1 << 8);
  314. }
  315. if (loudspeaker_layout < 10)
  316. av_channel_layout_copy(&layer->ch_layout, &ff_iamf_scalable_ch_layouts[loudspeaker_layout]);
  317. else
  318. layer->ch_layout = (AVChannelLayout){ .order = AV_CHANNEL_ORDER_UNSPEC,
  319. .nb_channels = substream_count +
  320. coupled_substream_count };
  321. for (int j = 0; j < substream_count; j++) {
  322. IAMFSubStream *substream = &audio_element->substreams[k++];
  323. substream->codecpar->ch_layout = coupled_substream_count-- > 0 ? (AVChannelLayout)AV_CHANNEL_LAYOUT_STEREO :
  324. (AVChannelLayout)AV_CHANNEL_LAYOUT_MONO;
  325. ret = update_extradata(substream->codecpar);
  326. if (ret < 0)
  327. return ret;
  328. }
  329. }
  330. return 0;
  331. }
  332. static int ambisonics_config(void *s, AVIOContext *pb,
  333. IAMFAudioElement *audio_element,
  334. const IAMFCodecConfig *codec_config)
  335. {
  336. AVIAMFLayer *layer;
  337. unsigned ambisonics_mode;
  338. int output_channel_count, substream_count, order;
  339. int ret;
  340. ambisonics_mode = ffio_read_leb(pb);
  341. if (ambisonics_mode > 1)
  342. return AVERROR_INVALIDDATA;
  343. output_channel_count = avio_r8(pb); // C
  344. substream_count = avio_r8(pb); // N
  345. if (audio_element->nb_substreams != substream_count)
  346. return AVERROR_INVALIDDATA;
  347. order = floor(sqrt(output_channel_count - 1));
  348. /* incomplete order - some harmonics are missing */
  349. if ((order + 1) * (order + 1) != output_channel_count)
  350. return AVERROR_INVALIDDATA;
  351. audio_element->layers = av_mallocz(sizeof(*audio_element->layers));
  352. if (!audio_element->layers)
  353. return AVERROR(ENOMEM);
  354. audio_element->nb_layers = 1;
  355. audio_element->layers->substream_count = substream_count;
  356. layer = av_iamf_audio_element_add_layer(audio_element->element);
  357. if (!layer)
  358. return AVERROR(ENOMEM);
  359. layer->ambisonics_mode = ambisonics_mode;
  360. if (ambisonics_mode == 0) {
  361. for (int i = 0; i < substream_count; i++) {
  362. IAMFSubStream *substream = &audio_element->substreams[i];
  363. substream->codecpar->ch_layout = (AVChannelLayout)AV_CHANNEL_LAYOUT_MONO;
  364. ret = update_extradata(substream->codecpar);
  365. if (ret < 0)
  366. return ret;
  367. }
  368. layer->ch_layout.order = AV_CHANNEL_ORDER_CUSTOM;
  369. layer->ch_layout.nb_channels = output_channel_count;
  370. layer->ch_layout.u.map = av_calloc(output_channel_count, sizeof(*layer->ch_layout.u.map));
  371. if (!layer->ch_layout.u.map)
  372. return AVERROR(ENOMEM);
  373. for (int i = 0; i < output_channel_count; i++)
  374. layer->ch_layout.u.map[i].id = avio_r8(pb) + AV_CHAN_AMBISONIC_BASE;
  375. } else {
  376. int coupled_substream_count = avio_r8(pb); // M
  377. int nb_demixing_matrix = substream_count + coupled_substream_count;
  378. int demixing_matrix_size = nb_demixing_matrix * output_channel_count;
  379. audio_element->layers->coupled_substream_count = coupled_substream_count;
  380. layer->ch_layout = (AVChannelLayout){ .order = AV_CHANNEL_ORDER_AMBISONIC, .nb_channels = output_channel_count };
  381. layer->demixing_matrix = av_malloc_array(demixing_matrix_size, sizeof(*layer->demixing_matrix));
  382. if (!layer->demixing_matrix)
  383. return AVERROR(ENOMEM);
  384. for (int i = 0; i < demixing_matrix_size; i++)
  385. layer->demixing_matrix[i] = av_make_q(sign_extend(avio_rb16(pb), 16), 1 << 8);
  386. for (int i = 0; i < substream_count; i++) {
  387. IAMFSubStream *substream = &audio_element->substreams[i];
  388. substream->codecpar->ch_layout = coupled_substream_count-- > 0 ? (AVChannelLayout)AV_CHANNEL_LAYOUT_STEREO :
  389. (AVChannelLayout)AV_CHANNEL_LAYOUT_MONO;
  390. ret = update_extradata(substream->codecpar);
  391. if (ret < 0)
  392. return ret;
  393. }
  394. }
  395. return 0;
  396. }
  397. static int param_parse(void *s, IAMFContext *c, AVIOContext *pb,
  398. unsigned int type,
  399. const IAMFAudioElement *audio_element,
  400. AVIAMFParamDefinition **out_param_definition)
  401. {
  402. IAMFParamDefinition *param_definition = NULL;
  403. AVIAMFParamDefinition *param;
  404. unsigned int parameter_id, parameter_rate, mode;
  405. unsigned int duration = 0, constant_subblock_duration = 0, nb_subblocks = 0;
  406. size_t param_size;
  407. parameter_id = ffio_read_leb(pb);
  408. for (int i = 0; i < c->nb_param_definitions; i++)
  409. if (c->param_definitions[i]->param->parameter_id == parameter_id) {
  410. param_definition = c->param_definitions[i];
  411. break;
  412. }
  413. parameter_rate = ffio_read_leb(pb);
  414. mode = avio_r8(pb) >> 7;
  415. if (mode == 0) {
  416. duration = ffio_read_leb(pb);
  417. if (!duration)
  418. return AVERROR_INVALIDDATA;
  419. constant_subblock_duration = ffio_read_leb(pb);
  420. if (constant_subblock_duration == 0)
  421. nb_subblocks = ffio_read_leb(pb);
  422. else
  423. nb_subblocks = duration / constant_subblock_duration;
  424. }
  425. param = av_iamf_param_definition_alloc(type, nb_subblocks, &param_size);
  426. if (!param)
  427. return AVERROR(ENOMEM);
  428. for (int i = 0; i < nb_subblocks; i++) {
  429. void *subblock = av_iamf_param_definition_get_subblock(param, i);
  430. unsigned int subblock_duration = constant_subblock_duration;
  431. if (constant_subblock_duration == 0)
  432. subblock_duration = ffio_read_leb(pb);
  433. switch (type) {
  434. case AV_IAMF_PARAMETER_DEFINITION_MIX_GAIN: {
  435. AVIAMFMixGain *mix = subblock;
  436. mix->subblock_duration = subblock_duration;
  437. break;
  438. }
  439. case AV_IAMF_PARAMETER_DEFINITION_DEMIXING: {
  440. AVIAMFDemixingInfo *demix = subblock;
  441. demix->subblock_duration = subblock_duration;
  442. // DefaultDemixingInfoParameterData
  443. av_assert0(audio_element);
  444. demix->dmixp_mode = avio_r8(pb) >> 5;
  445. audio_element->element->default_w = avio_r8(pb) >> 4;
  446. break;
  447. }
  448. case AV_IAMF_PARAMETER_DEFINITION_RECON_GAIN: {
  449. AVIAMFReconGain *recon = subblock;
  450. recon->subblock_duration = subblock_duration;
  451. break;
  452. }
  453. default:
  454. av_free(param);
  455. return AVERROR_INVALIDDATA;
  456. }
  457. }
  458. param->parameter_id = parameter_id;
  459. param->parameter_rate = parameter_rate;
  460. param->duration = duration;
  461. param->constant_subblock_duration = constant_subblock_duration;
  462. param->nb_subblocks = nb_subblocks;
  463. if (param_definition) {
  464. if (param_definition->param_size != param_size || memcmp(param_definition->param, param, param_size)) {
  465. av_log(s, AV_LOG_ERROR, "Incosistent parameters for parameter_id %u\n", parameter_id);
  466. av_free(param);
  467. return AVERROR_INVALIDDATA;
  468. }
  469. } else {
  470. IAMFParamDefinition **tmp = av_realloc_array(c->param_definitions, c->nb_param_definitions + 1,
  471. sizeof(*c->param_definitions));
  472. if (!tmp) {
  473. av_free(param);
  474. return AVERROR(ENOMEM);
  475. }
  476. c->param_definitions = tmp;
  477. param_definition = av_mallocz(sizeof(*param_definition));
  478. if (!param_definition) {
  479. av_free(param);
  480. return AVERROR(ENOMEM);
  481. }
  482. param_definition->param = param;
  483. param_definition->mode = !mode;
  484. param_definition->param_size = param_size;
  485. param_definition->audio_element = audio_element;
  486. c->param_definitions[c->nb_param_definitions++] = param_definition;
  487. }
  488. av_assert0(out_param_definition);
  489. *out_param_definition = param;
  490. return 0;
  491. }
  492. static int audio_element_obu(void *s, IAMFContext *c, AVIOContext *pb, int len)
  493. {
  494. const IAMFCodecConfig *codec_config;
  495. AVIAMFAudioElement *element;
  496. IAMFAudioElement **tmp, *audio_element = NULL;
  497. FFIOContext b;
  498. AVIOContext *pbc;
  499. uint8_t *buf;
  500. unsigned audio_element_id, nb_substreams, codec_config_id, num_parameters;
  501. int audio_element_type, ret;
  502. buf = av_malloc(len);
  503. if (!buf)
  504. return AVERROR(ENOMEM);
  505. ret = ffio_read_size(pb, buf, len);
  506. if (ret < 0)
  507. goto fail;
  508. ffio_init_context(&b, buf, len, 0, NULL, NULL, NULL, NULL);
  509. pbc = &b.pub;
  510. audio_element_id = ffio_read_leb(pbc);
  511. for (int i = 0; i < c->nb_audio_elements; i++)
  512. if (c->audio_elements[i]->audio_element_id == audio_element_id) {
  513. av_log(s, AV_LOG_ERROR, "Duplicate audio_element_id %d\n", audio_element_id);
  514. ret = AVERROR_INVALIDDATA;
  515. goto fail;
  516. }
  517. audio_element_type = avio_r8(pbc) >> 5;
  518. if (audio_element_type > AV_IAMF_AUDIO_ELEMENT_TYPE_SCENE) {
  519. av_log(s, AV_LOG_DEBUG, "Unknown audio_element_type referenced in an audio element. Ignoring\n");
  520. ret = 0;
  521. goto fail;
  522. }
  523. codec_config_id = ffio_read_leb(pbc);
  524. codec_config = ff_iamf_get_codec_config(c, codec_config_id);
  525. if (!codec_config) {
  526. av_log(s, AV_LOG_ERROR, "Non existant codec config id %d referenced in an audio element\n", codec_config_id);
  527. ret = AVERROR_INVALIDDATA;
  528. goto fail;
  529. }
  530. if (codec_config->codec_id == AV_CODEC_ID_NONE) {
  531. av_log(s, AV_LOG_DEBUG, "Unknown codec id referenced in an audio element. Ignoring\n");
  532. ret = 0;
  533. goto fail;
  534. }
  535. tmp = av_realloc_array(c->audio_elements, c->nb_audio_elements + 1, sizeof(*c->audio_elements));
  536. if (!tmp) {
  537. ret = AVERROR(ENOMEM);
  538. goto fail;
  539. }
  540. c->audio_elements = tmp;
  541. audio_element = av_mallocz(sizeof(*audio_element));
  542. if (!audio_element) {
  543. ret = AVERROR(ENOMEM);
  544. goto fail;
  545. }
  546. nb_substreams = ffio_read_leb(pbc);
  547. audio_element->codec_config_id = codec_config_id;
  548. audio_element->audio_element_id = audio_element_id;
  549. audio_element->substreams = av_calloc(nb_substreams, sizeof(*audio_element->substreams));
  550. if (!audio_element->substreams) {
  551. ret = AVERROR(ENOMEM);
  552. goto fail;
  553. }
  554. audio_element->nb_substreams = nb_substreams;
  555. element = audio_element->element = av_iamf_audio_element_alloc();
  556. if (!element) {
  557. ret = AVERROR(ENOMEM);
  558. goto fail;
  559. }
  560. audio_element->celement = element;
  561. element->audio_element_type = audio_element_type;
  562. for (int i = 0; i < audio_element->nb_substreams; i++) {
  563. IAMFSubStream *substream = &audio_element->substreams[i];
  564. substream->codecpar = avcodec_parameters_alloc();
  565. if (!substream->codecpar) {
  566. ret = AVERROR(ENOMEM);
  567. goto fail;
  568. }
  569. substream->audio_substream_id = ffio_read_leb(pbc);
  570. substream->codecpar->codec_type = AVMEDIA_TYPE_AUDIO;
  571. substream->codecpar->codec_id = codec_config->codec_id;
  572. substream->codecpar->frame_size = codec_config->nb_samples;
  573. substream->codecpar->sample_rate = codec_config->sample_rate;
  574. substream->codecpar->seek_preroll = -codec_config->audio_roll_distance * codec_config->nb_samples;
  575. switch(substream->codecpar->codec_id) {
  576. case AV_CODEC_ID_AAC:
  577. case AV_CODEC_ID_FLAC:
  578. case AV_CODEC_ID_OPUS:
  579. substream->codecpar->extradata = av_malloc(codec_config->extradata_size + AV_INPUT_BUFFER_PADDING_SIZE);
  580. if (!substream->codecpar->extradata) {
  581. ret = AVERROR(ENOMEM);
  582. goto fail;
  583. }
  584. memcpy(substream->codecpar->extradata, codec_config->extradata, codec_config->extradata_size);
  585. memset(substream->codecpar->extradata + codec_config->extradata_size, 0, AV_INPUT_BUFFER_PADDING_SIZE);
  586. substream->codecpar->extradata_size = codec_config->extradata_size;
  587. break;
  588. }
  589. }
  590. num_parameters = ffio_read_leb(pbc);
  591. if (num_parameters && audio_element_type != 0) {
  592. av_log(s, AV_LOG_ERROR, "Audio Element parameter count %u is invalid"
  593. " for Scene representations\n", num_parameters);
  594. ret = AVERROR_INVALIDDATA;
  595. goto fail;
  596. }
  597. for (int i = 0; i < num_parameters; i++) {
  598. unsigned type;
  599. type = ffio_read_leb(pbc);
  600. if (type == AV_IAMF_PARAMETER_DEFINITION_MIX_GAIN)
  601. ret = AVERROR_INVALIDDATA;
  602. else if (type == AV_IAMF_PARAMETER_DEFINITION_DEMIXING)
  603. ret = param_parse(s, c, pbc, type, audio_element, &element->demixing_info);
  604. else if (type == AV_IAMF_PARAMETER_DEFINITION_RECON_GAIN)
  605. ret = param_parse(s, c, pbc, type, audio_element, &element->recon_gain_info);
  606. else {
  607. unsigned param_definition_size = ffio_read_leb(pbc);
  608. avio_skip(pbc, param_definition_size);
  609. }
  610. if (ret < 0)
  611. goto fail;
  612. }
  613. if (audio_element_type == AV_IAMF_AUDIO_ELEMENT_TYPE_CHANNEL) {
  614. ret = scalable_channel_layout_config(s, pbc, audio_element, codec_config);
  615. if (ret < 0)
  616. goto fail;
  617. } else if (audio_element_type == AV_IAMF_AUDIO_ELEMENT_TYPE_SCENE) {
  618. ret = ambisonics_config(s, pbc, audio_element, codec_config);
  619. if (ret < 0)
  620. goto fail;
  621. } else {
  622. av_assert0(0);
  623. }
  624. c->audio_elements[c->nb_audio_elements++] = audio_element;
  625. len -= avio_tell(pbc);
  626. if (len)
  627. av_log(s, AV_LOG_WARNING, "Underread in audio_element_obu. %d bytes left at the end\n", len);
  628. ret = 0;
  629. fail:
  630. av_free(buf);
  631. if (ret < 0)
  632. ff_iamf_free_audio_element(&audio_element);
  633. return ret;
  634. }
  635. static int label_string(AVIOContext *pb, char **label)
  636. {
  637. uint8_t buf[128];
  638. avio_get_str(pb, sizeof(buf), buf, sizeof(buf));
  639. if (pb->error)
  640. return pb->error;
  641. if (pb->eof_reached)
  642. return AVERROR_INVALIDDATA;
  643. *label = av_strdup(buf);
  644. if (!*label)
  645. return AVERROR(ENOMEM);
  646. return 0;
  647. }
  648. static int mix_presentation_obu(void *s, IAMFContext *c, AVIOContext *pb, int len)
  649. {
  650. AVIAMFMixPresentation *mix;
  651. IAMFMixPresentation **tmp, *mix_presentation = NULL;
  652. FFIOContext b;
  653. AVIOContext *pbc;
  654. uint8_t *buf;
  655. unsigned nb_submixes, mix_presentation_id;
  656. int ret;
  657. buf = av_malloc(len);
  658. if (!buf)
  659. return AVERROR(ENOMEM);
  660. ret = ffio_read_size(pb, buf, len);
  661. if (ret < 0)
  662. goto fail;
  663. ffio_init_context(&b, buf, len, 0, NULL, NULL, NULL, NULL);
  664. pbc = &b.pub;
  665. mix_presentation_id = ffio_read_leb(pbc);
  666. for (int i = 0; i < c->nb_mix_presentations; i++)
  667. if (c->mix_presentations[i]->mix_presentation_id == mix_presentation_id) {
  668. av_log(s, AV_LOG_ERROR, "Duplicate mix_presentation_id %d\n", mix_presentation_id);
  669. ret = AVERROR_INVALIDDATA;
  670. goto fail;
  671. }
  672. tmp = av_realloc_array(c->mix_presentations, c->nb_mix_presentations + 1, sizeof(*c->mix_presentations));
  673. if (!tmp) {
  674. ret = AVERROR(ENOMEM);
  675. goto fail;
  676. }
  677. c->mix_presentations = tmp;
  678. mix_presentation = av_mallocz(sizeof(*mix_presentation));
  679. if (!mix_presentation) {
  680. ret = AVERROR(ENOMEM);
  681. goto fail;
  682. }
  683. mix_presentation->mix_presentation_id = mix_presentation_id;
  684. mix = mix_presentation->mix = av_iamf_mix_presentation_alloc();
  685. if (!mix) {
  686. ret = AVERROR(ENOMEM);
  687. goto fail;
  688. }
  689. mix_presentation->cmix = mix;
  690. mix_presentation->count_label = ffio_read_leb(pbc);
  691. mix_presentation->language_label = av_calloc(mix_presentation->count_label,
  692. sizeof(*mix_presentation->language_label));
  693. if (!mix_presentation->language_label) {
  694. mix_presentation->count_label = 0;
  695. ret = AVERROR(ENOMEM);
  696. goto fail;
  697. }
  698. for (int i = 0; i < mix_presentation->count_label; i++) {
  699. ret = label_string(pbc, &mix_presentation->language_label[i]);
  700. if (ret < 0)
  701. goto fail;
  702. }
  703. for (int i = 0; i < mix_presentation->count_label; i++) {
  704. char *annotation = NULL;
  705. ret = label_string(pbc, &annotation);
  706. if (ret < 0)
  707. goto fail;
  708. ret = av_dict_set(&mix->annotations, mix_presentation->language_label[i], annotation,
  709. AV_DICT_DONT_STRDUP_VAL | AV_DICT_DONT_OVERWRITE);
  710. if (ret < 0)
  711. goto fail;
  712. }
  713. nb_submixes = ffio_read_leb(pbc);
  714. for (int i = 0; i < nb_submixes; i++) {
  715. AVIAMFSubmix *sub_mix;
  716. unsigned nb_elements, nb_layouts;
  717. sub_mix = av_iamf_mix_presentation_add_submix(mix);
  718. if (!sub_mix) {
  719. ret = AVERROR(ENOMEM);
  720. goto fail;
  721. }
  722. nb_elements = ffio_read_leb(pbc);
  723. for (int j = 0; j < nb_elements; j++) {
  724. AVIAMFSubmixElement *submix_element;
  725. IAMFAudioElement *audio_element = NULL;
  726. unsigned int rendering_config_extension_size;
  727. submix_element = av_iamf_submix_add_element(sub_mix);
  728. if (!submix_element) {
  729. ret = AVERROR(ENOMEM);
  730. goto fail;
  731. }
  732. submix_element->audio_element_id = ffio_read_leb(pbc);
  733. for (int k = 0; k < c->nb_audio_elements; k++)
  734. if (c->audio_elements[k]->audio_element_id == submix_element->audio_element_id) {
  735. audio_element = c->audio_elements[k];
  736. break;
  737. }
  738. if (!audio_element) {
  739. av_log(s, AV_LOG_ERROR, "Invalid Audio Element with id %u referenced by Mix Parameters %u\n",
  740. submix_element->audio_element_id, mix_presentation_id);
  741. ret = AVERROR_INVALIDDATA;
  742. goto fail;
  743. }
  744. for (int k = 0; k < mix_presentation->count_label; k++) {
  745. char *annotation = NULL;
  746. ret = label_string(pbc, &annotation);
  747. if (ret < 0)
  748. goto fail;
  749. ret = av_dict_set(&submix_element->annotations, mix_presentation->language_label[k], annotation,
  750. AV_DICT_DONT_STRDUP_VAL | AV_DICT_DONT_OVERWRITE);
  751. if (ret < 0)
  752. goto fail;
  753. }
  754. submix_element->headphones_rendering_mode = avio_r8(pbc) >> 6;
  755. rendering_config_extension_size = ffio_read_leb(pbc);
  756. avio_skip(pbc, rendering_config_extension_size);
  757. ret = param_parse(s, c, pbc, AV_IAMF_PARAMETER_DEFINITION_MIX_GAIN,
  758. NULL,
  759. &submix_element->element_mix_config);
  760. if (ret < 0)
  761. goto fail;
  762. submix_element->default_mix_gain = av_make_q(sign_extend(avio_rb16(pbc), 16), 1 << 8);
  763. }
  764. ret = param_parse(s, c, pbc, AV_IAMF_PARAMETER_DEFINITION_MIX_GAIN, NULL, &sub_mix->output_mix_config);
  765. if (ret < 0)
  766. goto fail;
  767. sub_mix->default_mix_gain = av_make_q(sign_extend(avio_rb16(pbc), 16), 1 << 8);
  768. nb_layouts = ffio_read_leb(pbc);
  769. for (int j = 0; j < nb_layouts; j++) {
  770. AVIAMFSubmixLayout *submix_layout;
  771. int info_type;
  772. int byte = avio_r8(pbc);
  773. submix_layout = av_iamf_submix_add_layout(sub_mix);
  774. if (!submix_layout) {
  775. ret = AVERROR(ENOMEM);
  776. goto fail;
  777. }
  778. submix_layout->layout_type = byte >> 6;
  779. if (submix_layout->layout_type < AV_IAMF_SUBMIX_LAYOUT_TYPE_LOUDSPEAKERS ||
  780. submix_layout->layout_type > AV_IAMF_SUBMIX_LAYOUT_TYPE_BINAURAL) {
  781. av_log(s, AV_LOG_ERROR, "Invalid Layout type %u in a submix from Mix Presentation %u\n",
  782. submix_layout->layout_type, mix_presentation_id);
  783. ret = AVERROR_INVALIDDATA;
  784. goto fail;
  785. }
  786. if (submix_layout->layout_type == 2) {
  787. int sound_system;
  788. sound_system = (byte >> 2) & 0xF;
  789. if (sound_system >= FF_ARRAY_ELEMS(ff_iamf_sound_system_map)) {
  790. ret = AVERROR_INVALIDDATA;
  791. goto fail;
  792. }
  793. av_channel_layout_copy(&submix_layout->sound_system, &ff_iamf_sound_system_map[sound_system].layout);
  794. } else
  795. submix_layout->sound_system = (AVChannelLayout)AV_CHANNEL_LAYOUT_BINAURAL;
  796. info_type = avio_r8(pbc);
  797. submix_layout->integrated_loudness = av_make_q(sign_extend(avio_rb16(pbc), 16), 1 << 8);
  798. submix_layout->digital_peak = av_make_q(sign_extend(avio_rb16(pbc), 16), 1 << 8);
  799. if (info_type & 1)
  800. submix_layout->true_peak = av_make_q(sign_extend(avio_rb16(pbc), 16), 1 << 8);
  801. if (info_type & 2) {
  802. unsigned int num_anchored_loudness = avio_r8(pbc);
  803. for (int k = 0; k < num_anchored_loudness; k++) {
  804. unsigned int anchor_element = avio_r8(pbc);
  805. AVRational anchored_loudness = av_make_q(sign_extend(avio_rb16(pbc), 16), 1 << 8);
  806. if (anchor_element == IAMF_ANCHOR_ELEMENT_DIALOGUE)
  807. submix_layout->dialogue_anchored_loudness = anchored_loudness;
  808. else if (anchor_element <= IAMF_ANCHOR_ELEMENT_ALBUM)
  809. submix_layout->album_anchored_loudness = anchored_loudness;
  810. else
  811. av_log(s, AV_LOG_DEBUG, "Unknown anchor_element. Ignoring\n");
  812. }
  813. }
  814. if (info_type & 0xFC) {
  815. unsigned int info_type_size = ffio_read_leb(pbc);
  816. avio_skip(pbc, info_type_size);
  817. }
  818. }
  819. }
  820. c->mix_presentations[c->nb_mix_presentations++] = mix_presentation;
  821. len -= avio_tell(pbc);
  822. if (len)
  823. av_log(s, AV_LOG_WARNING, "Underread in mix_presentation_obu. %d bytes left at the end\n", len);
  824. ret = 0;
  825. fail:
  826. av_free(buf);
  827. if (ret < 0)
  828. ff_iamf_free_mix_presentation(&mix_presentation);
  829. return ret;
  830. }
  831. int ff_iamf_parse_obu_header(const uint8_t *buf, int buf_size,
  832. unsigned *obu_size, int *start_pos, enum IAMF_OBU_Type *type,
  833. unsigned *skip_samples, unsigned *discard_padding)
  834. {
  835. GetBitContext gb;
  836. int ret, extension_flag, trimming, start;
  837. unsigned skip = 0, discard = 0;
  838. unsigned size;
  839. ret = init_get_bits8(&gb, buf, FFMIN(buf_size, MAX_IAMF_OBU_HEADER_SIZE));
  840. if (ret < 0)
  841. return ret;
  842. *type = get_bits(&gb, 5);
  843. /*redundant =*/ get_bits1(&gb);
  844. trimming = get_bits1(&gb);
  845. extension_flag = get_bits1(&gb);
  846. *obu_size = get_leb(&gb);
  847. if (*obu_size > INT_MAX)
  848. return AVERROR_INVALIDDATA;
  849. start = get_bits_count(&gb) / 8;
  850. if (trimming) {
  851. discard = get_leb(&gb); // num_samples_to_trim_at_end
  852. skip = get_leb(&gb); // num_samples_to_trim_at_start
  853. }
  854. if (skip_samples)
  855. *skip_samples = skip;
  856. if (discard_padding)
  857. *discard_padding = discard;
  858. if (extension_flag) {
  859. unsigned int extension_bytes;
  860. extension_bytes = get_leb(&gb);
  861. if (extension_bytes > INT_MAX / 8)
  862. return AVERROR_INVALIDDATA;
  863. skip_bits_long(&gb, extension_bytes * 8);
  864. }
  865. if (get_bits_left(&gb) < 0)
  866. return AVERROR_INVALIDDATA;
  867. size = *obu_size + start;
  868. if (size > INT_MAX)
  869. return AVERROR_INVALIDDATA;
  870. *obu_size -= get_bits_count(&gb) / 8 - start;
  871. *start_pos = size - *obu_size;
  872. return size;
  873. }
  874. int ff_iamfdec_read_descriptors(IAMFContext *c, AVIOContext *pb,
  875. int max_size, void *log_ctx)
  876. {
  877. uint8_t header[MAX_IAMF_OBU_HEADER_SIZE + AV_INPUT_BUFFER_PADDING_SIZE];
  878. int ret;
  879. while (1) {
  880. unsigned obu_size;
  881. enum IAMF_OBU_Type type;
  882. int start_pos, len, size;
  883. if ((ret = ffio_ensure_seekback(pb, FFMIN(MAX_IAMF_OBU_HEADER_SIZE, max_size))) < 0)
  884. return ret;
  885. size = avio_read(pb, header, FFMIN(MAX_IAMF_OBU_HEADER_SIZE, max_size));
  886. if (size < 0)
  887. return size;
  888. memset(header + size, 0, AV_INPUT_BUFFER_PADDING_SIZE);
  889. len = ff_iamf_parse_obu_header(header, size, &obu_size, &start_pos, &type, NULL, NULL);
  890. if (len < 0 || obu_size > max_size) {
  891. av_log(log_ctx, AV_LOG_ERROR, "Failed to read obu header\n");
  892. avio_seek(pb, -size, SEEK_CUR);
  893. return len;
  894. }
  895. if (type >= IAMF_OBU_IA_PARAMETER_BLOCK && type < IAMF_OBU_IA_SEQUENCE_HEADER) {
  896. avio_seek(pb, -size, SEEK_CUR);
  897. break;
  898. }
  899. avio_seek(pb, -(size - start_pos), SEEK_CUR);
  900. switch (type) {
  901. case IAMF_OBU_IA_CODEC_CONFIG:
  902. ret = codec_config_obu(log_ctx, c, pb, obu_size);
  903. break;
  904. case IAMF_OBU_IA_AUDIO_ELEMENT:
  905. ret = audio_element_obu(log_ctx, c, pb, obu_size);
  906. break;
  907. case IAMF_OBU_IA_MIX_PRESENTATION:
  908. ret = mix_presentation_obu(log_ctx, c, pb, obu_size);
  909. break;
  910. default: {
  911. int64_t offset = avio_skip(pb, obu_size);
  912. if (offset < 0)
  913. ret = offset;
  914. break;
  915. }
  916. }
  917. if (ret < 0) {
  918. av_log(log_ctx, AV_LOG_ERROR, "Failed to read obu type %d\n", type);
  919. return ret;
  920. }
  921. max_size -= obu_size + start_pos;
  922. if (max_size < 0)
  923. return AVERROR_INVALIDDATA;
  924. if (!max_size)
  925. break;
  926. }
  927. return 0;
  928. }