123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301 |
- import json
- import re
- from .common import InfoExtractor
- from ..utils import (
- ExtractorError,
- clean_html,
- determine_ext,
- dict_get,
- int_or_none,
- merge_dicts,
- parse_age_limit,
- parse_iso8601,
- parse_qs,
- str_or_none,
- try_get,
- url_or_none,
- variadic,
- )
- class ERTFlixBaseIE(InfoExtractor):
- def _call_api(
- self, video_id, method='Player/AcquireContent', api_version=1,
- param_headers=None, data=None, headers=None, **params):
- platform_codename = {'platformCodename': 'www'}
- headers_as_param = {'X-Api-Date-Format': 'iso', 'X-Api-Camel-Case': False}
- headers_as_param.update(param_headers or {})
- headers = headers or {}
- if data:
- headers['Content-Type'] = headers_as_param['Content-Type'] = 'application/json;charset=utf-8'
- data = json.dumps(merge_dicts(platform_codename, data)).encode()
- query = merge_dicts(
- {} if data else platform_codename,
- {'$headers': json.dumps(headers_as_param)},
- params)
- response = self._download_json(
- f'https://api.app.ertflix.gr/v{api_version!s}/{method}',
- video_id, fatal=False, query=query, data=data, headers=headers)
- if try_get(response, lambda x: x['Result']['Success']) is True:
- return response
- def _call_api_get_tiles(self, video_id, *tile_ids):
- requested_tile_ids = [video_id, *tile_ids]
- requested_tiles = [{'Id': tile_id} for tile_id in requested_tile_ids]
- tiles_response = self._call_api(
- video_id, method='Tile/GetTiles', api_version=2,
- data={'RequestedTiles': requested_tiles})
- tiles = try_get(tiles_response, lambda x: x['Tiles'], list) or []
- if tile_ids:
- if sorted([tile['Id'] for tile in tiles]) != sorted(requested_tile_ids):
- raise ExtractorError('Requested tiles not found', video_id=video_id)
- return tiles
- try:
- return next(tile for tile in tiles if tile['Id'] == video_id)
- except StopIteration:
- raise ExtractorError('No matching tile found', video_id=video_id)
- class ERTFlixCodenameIE(ERTFlixBaseIE):
- IE_NAME = 'ertflix:codename'
- IE_DESC = 'ERTFLIX videos by codename'
- _VALID_URL = r'ertflix:(?P<id>[\w-]+)'
- _TESTS = [{
- 'url': 'ertflix:monogramma-praxitelis-tzanoylinos',
- 'md5': '5b9c2cd171f09126167e4082fc1dd0ef',
- 'info_dict': {
- 'id': 'monogramma-praxitelis-tzanoylinos',
- 'ext': 'mp4',
- 'title': 'md5:ef0b439902963d56c43ac83c3f41dd0e',
- },
- },
- ]
- def _extract_formats_and_subs(self, video_id):
- media_info = self._call_api(video_id, codename=video_id)
- formats, subs = [], {}
- for media_file in try_get(media_info, lambda x: x['MediaFiles'], list) or []:
- for media in try_get(media_file, lambda x: x['Formats'], list) or []:
- fmt_url = url_or_none(try_get(media, lambda x: x['Url']))
- if not fmt_url:
- continue
- ext = determine_ext(fmt_url)
- if ext == 'm3u8':
- formats_, subs_ = self._extract_m3u8_formats_and_subtitles(
- fmt_url, video_id, m3u8_id='hls', ext='mp4', fatal=False)
- elif ext == 'mpd':
- formats_, subs_ = self._extract_mpd_formats_and_subtitles(
- fmt_url, video_id, mpd_id='dash', fatal=False)
- else:
- formats.append({
- 'url': fmt_url,
- 'format_id': str_or_none(media.get('Id')),
- })
- continue
- formats.extend(formats_)
- self._merge_subtitles(subs_, target=subs)
- return formats, subs
- def _real_extract(self, url):
- video_id = self._match_id(url)
- formats, subs = self._extract_formats_and_subs(video_id)
- if formats:
- return {
- 'id': video_id,
- 'formats': formats,
- 'subtitles': subs,
- 'title': self._generic_title(url),
- }
- class ERTFlixIE(ERTFlixBaseIE):
- IE_NAME = 'ertflix'
- IE_DESC = 'ERTFLIX videos'
- _VALID_URL = r'https?://www\.ertflix\.gr/(?:[^/]+/)?(?:series|vod)/(?P<id>[a-z]{3}\.\d+)'
- _TESTS = [{
- 'url': 'https://www.ertflix.gr/vod/vod.173258-aoratoi-ergates',
- 'md5': '6479d5e60fd7e520b07ba5411dcdd6e7',
- 'info_dict': {
- 'id': 'aoratoi-ergates',
- 'ext': 'mp4',
- 'title': 'md5:c1433d598fbba0211b0069021517f8b4',
- 'description': 'md5:01a64d113c31957eb7eb07719ab18ff4',
- 'thumbnail': r're:https?://.+\.jpg',
- 'episode_id': 'vod.173258',
- 'timestamp': 1639648800,
- 'upload_date': '20211216',
- 'duration': 3166,
- 'age_limit': 8,
- },
- }, {
- 'url': 'https://www.ertflix.gr/series/ser.3448-monogramma',
- 'info_dict': {
- 'id': 'ser.3448',
- 'age_limit': 8,
- 'description': 'Η εκπομπή σαράντα ετών που σημάδεψε τον πολιτισμό μας.',
- 'title': 'Μονόγραμμα',
- },
- 'playlist_mincount': 64,
- }, {
- 'url': 'https://www.ertflix.gr/series/ser.3448-monogramma?season=1',
- 'info_dict': {
- 'id': 'ser.3448',
- 'age_limit': 8,
- 'description': 'Η εκπομπή σαράντα ετών που σημάδεψε τον πολιτισμό μας.',
- 'title': 'Μονόγραμμα',
- },
- 'playlist_count': 22,
- }, {
- 'url': 'https://www.ertflix.gr/series/ser.3448-monogramma?season=1&season=2021%20-%202022',
- 'info_dict': {
- 'id': 'ser.3448',
- 'age_limit': 8,
- 'description': 'Η εκπομπή σαράντα ετών που σημάδεψε τον πολιτισμό μας.',
- 'title': 'Μονόγραμμα',
- },
- 'playlist_mincount': 36,
- }, {
- 'url': 'https://www.ertflix.gr/series/ser.164991-to-diktuo-1?season=1-9',
- 'info_dict': {
- 'id': 'ser.164991',
- 'age_limit': 8,
- 'description': 'Η πρώτη ελληνική εκπομπή με θεματολογία αποκλειστικά γύρω από το ίντερνετ.',
- 'title': 'Το δίκτυο',
- },
- 'playlist_mincount': 9,
- }, {
- 'url': 'https://www.ertflix.gr/en/vod/vod.127652-ta-kalytera-mas-chronia-ep1-mia-volta-sto-feggari',
- 'only_matching': True,
- }]
- def _extract_episode(self, episode):
- codename = try_get(episode, lambda x: x['Codename'], str)
- title = episode.get('Title')
- description = clean_html(dict_get(episode, ('ShortDescription', 'TinyDescription')))
- if not codename or not title or not episode.get('HasPlayableStream', True):
- return
- thumbnail = next((
- url_or_none(thumb.get('Url'))
- for thumb in variadic(dict_get(episode, ('Images', 'Image')) or {})
- if thumb.get('IsMain')),
- None)
- return {
- '_type': 'url_transparent',
- 'thumbnail': thumbnail,
- 'id': codename,
- 'episode_id': episode.get('Id'),
- 'title': title,
- 'alt_title': episode.get('Subtitle'),
- 'description': description,
- 'timestamp': parse_iso8601(episode.get('PublishDate')),
- 'duration': episode.get('DurationSeconds'),
- 'age_limit': self._parse_age_rating(episode),
- 'url': f'ertflix:{codename}',
- }
- @staticmethod
- def _parse_age_rating(info_dict):
- return parse_age_limit(
- info_dict.get('AgeRating')
- or (info_dict.get('IsAdultContent') and 18)
- or (info_dict.get('IsKidsContent') and 0))
- def _extract_series(self, video_id, season_titles=None, season_numbers=None):
- media_info = self._call_api(video_id, method='Tile/GetSeriesDetails', id=video_id)
- series = try_get(media_info, lambda x: x['Series'], dict) or {}
- series_info = {
- 'age_limit': self._parse_age_rating(series),
- 'title': series.get('Title'),
- 'description': dict_get(series, ('ShortDescription', 'TinyDescription')),
- }
- if season_numbers:
- season_titles = season_titles or []
- for season in try_get(series, lambda x: x['Seasons'], list) or []:
- if season.get('SeasonNumber') in season_numbers and season.get('Title'):
- season_titles.append(season['Title'])
- def gen_episode(m_info, season_titles):
- for episode_group in try_get(m_info, lambda x: x['EpisodeGroups'], list) or []:
- if season_titles and episode_group.get('Title') not in season_titles:
- continue
- episodes = try_get(episode_group, lambda x: x['Episodes'], list)
- if not episodes:
- continue
- season_info = {
- 'season': episode_group.get('Title'),
- 'season_number': int_or_none(episode_group.get('SeasonNumber')),
- }
- try:
- episodes = [(int(ep['EpisodeNumber']), ep) for ep in episodes]
- episodes.sort()
- except (KeyError, ValueError):
- episodes = enumerate(episodes, 1)
- for n, episode in episodes:
- info = self._extract_episode(episode)
- if info is None:
- continue
- info['episode_number'] = n
- info.update(season_info)
- yield info
- return self.playlist_result(
- gen_episode(media_info, season_titles), playlist_id=video_id, **series_info)
- def _real_extract(self, url):
- video_id = self._match_id(url)
- if video_id.startswith('ser.'):
- param_season = parse_qs(url).get('season', [None])
- param_season = [
- (have_number, int_or_none(v) if have_number else str_or_none(v))
- for have_number, v in
- [(int_or_none(ps) is not None, ps) for ps in param_season]
- if v is not None
- ]
- season_kwargs = {
- k: [v for is_num, v in param_season if is_num is c] or None
- for k, c in
- [('season_titles', False), ('season_numbers', True)]
- }
- return self._extract_series(video_id, **season_kwargs)
- return self._extract_episode(self._call_api_get_tiles(video_id))
- class ERTWebtvEmbedIE(InfoExtractor):
- IE_NAME = 'ertwebtv:embed'
- IE_DESC = 'ert.gr webtv embedded videos'
- _BASE_PLAYER_URL_RE = re.escape('//www.ert.gr/webtv/live-uni/vod/dt-uni-vod.php')
- _VALID_URL = rf'https?:{_BASE_PLAYER_URL_RE}\?([^#]+&)?f=(?P<id>[^#&]+)'
- _EMBED_REGEX = [rf'<iframe[^>]+?src=(?P<_q1>["\'])(?P<url>(?:https?:)?{_BASE_PLAYER_URL_RE}\?(?:(?!(?P=_q1)).)+)(?P=_q1)']
- _TESTS = [{
- 'url': 'https://www.ert.gr/webtv/live-uni/vod/dt-uni-vod.php?f=trailers/E2251_TO_DIKTYO_E09_16-01_1900.mp4&bgimg=/photos/2022/1/to_diktio_ep09_i_istoria_tou_diadiktiou_stin_Ellada_1021x576.jpg',
- 'md5': 'f9e9900c25c26f4ecfbddbb4b6305854',
- 'info_dict': {
- 'id': 'trailers/E2251_TO_DIKTYO_E09_16-01_1900.mp4',
- 'title': 'md5:914f06a73cd8b62fbcd6fb90c636e497',
- 'ext': 'mp4',
- 'thumbnail': 'https://program.ert.gr/photos/2022/1/to_diktio_ep09_i_istoria_tou_diadiktiou_stin_Ellada_1021x576.jpg',
- },
- }]
- def _real_extract(self, url):
- video_id = self._match_id(url)
- formats, subs = self._extract_m3u8_formats_and_subtitles(
- f'https://mediastream.ert.gr/vodedge/_definst_/mp4:dvrorigin/{video_id}/playlist.m3u8',
- video_id, 'mp4')
- thumbnail_id = parse_qs(url).get('bgimg', [None])[0]
- if thumbnail_id and not thumbnail_id.startswith('http'):
- thumbnail_id = f'https://program.ert.gr{thumbnail_id}'
- return {
- 'id': video_id,
- 'title': f'VOD - {video_id}',
- 'thumbnail': thumbnail_id,
- 'formats': formats,
- 'subtitles': subs,
- }
|