123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365 |
- /*
- * TED Talks captions format decoder
- * Copyright (c) 2012 Nicolas George
- *
- * This file is part of FFmpeg.
- *
- * FFmpeg is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * FFmpeg is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with FFmpeg; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- */
- #include "libavutil/bprint.h"
- #include "libavutil/log.h"
- #include "libavutil/opt.h"
- #include "avformat.h"
- #include "internal.h"
- #include "subtitles.h"
- typedef struct {
- AVClass *class;
- int64_t start_time;
- FFDemuxSubtitlesQueue subs;
- } TEDCaptionsDemuxer;
- static const AVOption tedcaptions_options[] = {
- { "start_time", "set the start time (offset) of the subtitles, in ms",
- offsetof(TEDCaptionsDemuxer, start_time), FF_OPT_TYPE_INT64,
- { .i64 = 15000 }, INT64_MIN, INT64_MAX,
- AV_OPT_FLAG_SUBTITLE_PARAM | AV_OPT_FLAG_DECODING_PARAM },
- { NULL },
- };
- static const AVClass tedcaptions_demuxer_class = {
- .class_name = "tedcaptions_demuxer",
- .item_name = av_default_item_name,
- .option = tedcaptions_options,
- .version = LIBAVUTIL_VERSION_INT,
- };
- #define BETWEEN(a, amin, amax) ((unsigned)((a) - (amin)) <= (amax) - (amin))
- #define HEX_DIGIT_TEST(c) (BETWEEN(c, '0', '9') || BETWEEN((c) | 32, 'a', 'z'))
- #define HEX_DIGIT_VAL(c) ((c) <= '9' ? (c) - '0' : ((c) | 32) - 'a' + 10)
- #define ERR_CODE(c) (c < 0 ? c : AVERROR_INVALIDDATA)
- static void av_bprint_utf8(AVBPrint *bp, unsigned c)
- {
- int bytes, i;
- if (c <= 0x7F) {
- av_bprint_chars(bp, c, 1);
- return;
- }
- bytes = (av_log2(c) - 2) / 5;
- av_bprint_chars(bp, (c >> (bytes * 6)) | ((0xFF80 >> bytes) & 0xFF), 1);
- for (i = bytes - 1; i >= 0; i--)
- av_bprint_chars(bp, ((c >> (i * 6)) & 0x3F) | 0x80, 1);
- }
- static void next_byte(AVIOContext *pb, int *cur_byte)
- {
- uint8_t b;
- int ret = avio_read(pb, &b, 1);
- *cur_byte = ret > 0 ? b : ret == 0 ? AVERROR_EOF : ret;
- }
- static void skip_spaces(AVIOContext *pb, int *cur_byte)
- {
- while (*cur_byte == ' ' || *cur_byte == '\t' ||
- *cur_byte == '\n' || *cur_byte == '\r')
- next_byte(pb, cur_byte);
- }
- static int expect_byte(AVIOContext *pb, int *cur_byte, uint8_t c)
- {
- skip_spaces(pb, cur_byte);
- if (*cur_byte != c)
- return ERR_CODE(*cur_byte);
- next_byte(pb, cur_byte);
- return 0;
- }
- static int parse_string(AVIOContext *pb, int *cur_byte, AVBPrint *bp, int full)
- {
- int ret;
- av_bprint_init(bp, 0, full ? -1 : 1);
- ret = expect_byte(pb, cur_byte, '"');
- if (ret < 0)
- goto fail;
- while (*cur_byte > 0 && *cur_byte != '"') {
- if (*cur_byte == '\\') {
- next_byte(pb, cur_byte);
- if (*cur_byte < 0) {
- ret = AVERROR_INVALIDDATA;
- goto fail;
- }
- if ((*cur_byte | 32) == 'u') {
- unsigned chr = 0, i;
- for (i = 0; i < 4; i++) {
- next_byte(pb, cur_byte);
- if (!HEX_DIGIT_TEST(*cur_byte)) {
- ret = ERR_CODE(*cur_byte);
- goto fail;
- }
- chr = chr * 16 + HEX_DIGIT_VAL(*cur_byte);
- }
- av_bprint_utf8(bp, chr);
- } else {
- av_bprint_chars(bp, *cur_byte, 1);
- }
- } else {
- av_bprint_chars(bp, *cur_byte, 1);
- }
- next_byte(pb, cur_byte);
- }
- ret = expect_byte(pb, cur_byte, '"');
- if (ret < 0)
- goto fail;
- if (full && !av_bprint_is_complete(bp)) {
- ret = AVERROR(ENOMEM);
- goto fail;
- }
- return 0;
- fail:
- av_bprint_finalize(bp, NULL);
- return ret;
- }
- static int parse_label(AVIOContext *pb, int *cur_byte, AVBPrint *bp)
- {
- int ret;
- ret = parse_string(pb, cur_byte, bp, 0);
- if (ret < 0)
- return ret;
- ret = expect_byte(pb, cur_byte, ':');
- if (ret < 0)
- return ret;
- return 0;
- }
- static int parse_boolean(AVIOContext *pb, int *cur_byte, int *result)
- {
- const char *text[] = { "false", "true" }, *p;
- int i;
- skip_spaces(pb, cur_byte);
- for (i = 0; i < 2; i++) {
- p = text[i];
- if (*cur_byte != *p)
- continue;
- for (; *p; p++, next_byte(pb, cur_byte))
- if (*cur_byte != *p)
- return AVERROR_INVALIDDATA;
- if (BETWEEN(*cur_byte | 32, 'a', 'z'))
- return AVERROR_INVALIDDATA;
- *result = i;
- return 0;
- }
- return AVERROR_INVALIDDATA;
- }
- static int parse_int(AVIOContext *pb, int *cur_byte, int64_t *result)
- {
- int64_t val = 0;
- skip_spaces(pb, cur_byte);
- if ((unsigned)*cur_byte - '0' > 9)
- return AVERROR_INVALIDDATA;
- while (BETWEEN(*cur_byte, '0', '9')) {
- val = val * 10 + (*cur_byte - '0');
- next_byte(pb, cur_byte);
- }
- *result = val;
- return 0;
- }
- static int parse_file(AVIOContext *pb, FFDemuxSubtitlesQueue *subs)
- {
- int ret, cur_byte, start_of_par;
- AVBPrint label, content;
- int64_t pos, start, duration;
- AVPacket *pkt;
- next_byte(pb, &cur_byte);
- ret = expect_byte(pb, &cur_byte, '{');
- if (ret < 0)
- return AVERROR_INVALIDDATA;
- ret = parse_label(pb, &cur_byte, &label);
- if (ret < 0 || strcmp(label.str, "captions"))
- return AVERROR_INVALIDDATA;
- ret = expect_byte(pb, &cur_byte, '[');
- if (ret < 0)
- return AVERROR_INVALIDDATA;
- while (1) {
- content.size = 0;
- start = duration = AV_NOPTS_VALUE;
- ret = expect_byte(pb, &cur_byte, '{');
- if (ret < 0)
- return ret;
- pos = avio_tell(pb) - 1;
- while (1) {
- ret = parse_label(pb, &cur_byte, &label);
- if (ret < 0)
- return ret;
- if (!strcmp(label.str, "startOfParagraph")) {
- ret = parse_boolean(pb, &cur_byte, &start_of_par);
- if (ret < 0)
- return ret;
- } else if (!strcmp(label.str, "content")) {
- ret = parse_string(pb, &cur_byte, &content, 1);
- if (ret < 0)
- return ret;
- } else if (!strcmp(label.str, "startTime")) {
- ret = parse_int(pb, &cur_byte, &start);
- if (ret < 0)
- return ret;
- } else if (!strcmp(label.str, "duration")) {
- ret = parse_int(pb, &cur_byte, &duration);
- if (ret < 0)
- return ret;
- } else {
- return AVERROR_INVALIDDATA;
- }
- skip_spaces(pb, &cur_byte);
- if (cur_byte != ',')
- break;
- next_byte(pb, &cur_byte);
- }
- ret = expect_byte(pb, &cur_byte, '}');
- if (ret < 0)
- return ret;
- if (!content.size || start == AV_NOPTS_VALUE ||
- duration == AV_NOPTS_VALUE)
- return AVERROR_INVALIDDATA;
- pkt = ff_subtitles_queue_insert(subs, content.str, content.len, 0);
- if (!pkt)
- return AVERROR(ENOMEM);
- pkt->pos = pos;
- pkt->pts = start;
- pkt->duration = duration;
- av_bprint_finalize(&content, NULL);
- skip_spaces(pb, &cur_byte);
- if (cur_byte != ',')
- break;
- next_byte(pb, &cur_byte);
- }
- ret = expect_byte(pb, &cur_byte, ']');
- if (ret < 0)
- return ret;
- ret = expect_byte(pb, &cur_byte, '}');
- if (ret < 0)
- return ret;
- skip_spaces(pb, &cur_byte);
- if (cur_byte != AVERROR_EOF)
- return ERR_CODE(cur_byte);
- return 0;
- }
- static av_cold int tedcaptions_read_header(AVFormatContext *avf)
- {
- TEDCaptionsDemuxer *tc = avf->priv_data;
- AVStream *st;
- int ret, i;
- AVPacket *last;
- ret = parse_file(avf->pb, &tc->subs);
- if (ret < 0) {
- if (ret == AVERROR_INVALIDDATA)
- av_log(avf, AV_LOG_ERROR, "Syntax error near offset %"PRId64".\n",
- avio_tell(avf->pb));
- ff_subtitles_queue_clean(&tc->subs);
- return ret;
- }
- ff_subtitles_queue_finalize(&tc->subs);
- for (i = 0; i < tc->subs.nb_subs; i++)
- tc->subs.subs[i].pts += tc->start_time;
- last = &tc->subs.subs[tc->subs.nb_subs - 1];
- st = avformat_new_stream(avf, NULL);
- if (!st)
- return AVERROR(ENOMEM);
- st->codec->codec_type = AVMEDIA_TYPE_SUBTITLE;
- st->codec->codec_id = CODEC_ID_TEXT;
- avpriv_set_pts_info(st, 64, 1, 1000);
- st->probe_packets = 0;
- st->start_time = 0;
- st->duration = last->pts + last->duration;
- st->cur_dts = 0;
- return 0;
- }
- static int tedcaptions_read_packet(AVFormatContext *avf, AVPacket *packet)
- {
- TEDCaptionsDemuxer *tc = avf->priv_data;
- return ff_subtitles_queue_read_packet(&tc->subs, packet);
- }
- static int tedcaptions_read_close(AVFormatContext *avf)
- {
- TEDCaptionsDemuxer *tc = avf->priv_data;
- ff_subtitles_queue_clean(&tc->subs);
- return 0;
- }
- static av_cold int tedcaptions_read_probe(AVProbeData *p)
- {
- static const char *const tags[] = {
- "\"captions\"", "\"duration\"", "\"content\"",
- "\"startOfParagraph\"", "\"startTime\"",
- };
- unsigned i, count = 0;
- const char *t;
- if (p->buf[strspn(p->buf, " \t\r\n")] != '{')
- return 0;
- for (i = 0; i < FF_ARRAY_ELEMS(tags); i++) {
- if (!(t = strstr(p->buf, tags[i])))
- continue;
- t += strlen(tags[i]);
- t += strspn(t, " \t\r\n");
- if (*t == ':')
- count++;
- }
- return count == FF_ARRAY_ELEMS(tags) ? AVPROBE_SCORE_MAX :
- count ? AVPROBE_SCORE_MAX / 2 : 0;
- }
- static int tedcaptions_read_seek(AVFormatContext *avf, int stream_index,
- int64_t min_ts, int64_t ts, int64_t max_ts,
- int flags)
- {
- TEDCaptionsDemuxer *tc = avf->priv_data;
- return ff_subtitles_queue_seek(&tc->subs, avf, stream_index,
- min_ts, ts, max_ts, flags);
- }
- AVInputFormat ff_tedcaptions_demuxer = {
- .name = "tedcaptions",
- .long_name = NULL_IF_CONFIG_SMALL("TED Talks captions"),
- .priv_data_size = sizeof(TEDCaptionsDemuxer),
- .priv_class = &tedcaptions_demuxer_class,
- .read_header = tedcaptions_read_header,
- .read_packet = tedcaptions_read_packet,
- .read_close = tedcaptions_read_close,
- .read_probe = tedcaptions_read_probe,
- .read_seek2 = tedcaptions_read_seek,
- };
|