drtv.py 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401
  1. import json
  2. import uuid
  3. from .common import InfoExtractor
  4. from ..utils import (
  5. ExtractorError,
  6. int_or_none,
  7. mimetype2ext,
  8. parse_iso8601,
  9. try_call,
  10. update_url_query,
  11. url_or_none,
  12. )
  13. from ..utils.traversal import traverse_obj
  14. SERIES_API = 'https://production-cdn.dr-massive.com/api/page?device=web_browser&item_detail_expand=all&lang=da&max_list_prefetch=3&path=%s'
  15. class DRTVIE(InfoExtractor):
  16. _VALID_URL = r'''(?x)
  17. https?://
  18. (?:
  19. (?:www\.)?dr\.dk/tv/se(?:/ondemand)?/(?:[^/?#]+/)*|
  20. (?:www\.)?(?:dr\.dk|dr-massive\.com)/drtv/(?:se|episode|program)/
  21. )
  22. (?P<id>[\da-z_-]+)
  23. '''
  24. _GEO_BYPASS = False
  25. _GEO_COUNTRIES = ['DK']
  26. IE_NAME = 'drtv'
  27. _TESTS = [{
  28. 'url': 'https://www.dr.dk/tv/se/boern/ultra/klassen-ultra/klassen-darlig-taber-10',
  29. 'md5': '25e659cccc9a2ed956110a299fdf5983',
  30. 'info_dict': {
  31. 'id': 'klassen-darlig-taber-10',
  32. 'ext': 'mp4',
  33. 'title': 'Klassen - Dårlig taber (10)',
  34. 'description': 'md5:815fe1b7fa656ed80580f31e8b3c79aa',
  35. 'timestamp': 1539085800,
  36. 'upload_date': '20181009',
  37. 'duration': 606.84,
  38. 'series': 'Klassen',
  39. 'season': 'Klassen I',
  40. 'season_number': 1,
  41. 'season_id': 'urn:dr:mu:bundle:57d7e8216187a4031cfd6f6b',
  42. 'episode': 'Episode 10',
  43. 'episode_number': 10,
  44. 'release_year': 2016,
  45. },
  46. 'expected_warnings': ['Unable to download f4m manifest'],
  47. 'skip': 'this video has been removed',
  48. }, {
  49. # with SignLanguage formats
  50. 'url': 'https://www.dr.dk/tv/se/historien-om-danmark/-/historien-om-danmark-stenalder',
  51. 'info_dict': {
  52. 'id': '00831690010',
  53. 'ext': 'mp4',
  54. 'title': 'Historien om Danmark: Stenalder',
  55. 'description': 'md5:8c66dcbc1669bbc6f873879880f37f2a',
  56. 'timestamp': 1546628400,
  57. 'upload_date': '20190104',
  58. 'duration': 3504.619,
  59. 'formats': 'mincount:20',
  60. 'release_year': 2017,
  61. 'season_id': 'urn:dr:mu:bundle:5afc03ad6187a4065ca5fd35',
  62. 'season_number': 1,
  63. 'season': 'Historien om Danmark',
  64. 'series': 'Historien om Danmark',
  65. },
  66. 'skip': 'this video has been removed',
  67. }, {
  68. 'url': 'https://www.dr.dk/drtv/se/frank-and-kastaniegaarden_71769',
  69. 'info_dict': {
  70. 'id': '00951930010',
  71. 'ext': 'mp4',
  72. 'title': 'Frank & Kastaniegaarden',
  73. 'description': 'md5:974e1780934cf3275ef10280204bccb0',
  74. 'release_timestamp': 1546545600,
  75. 'release_date': '20190103',
  76. 'duration': 2576,
  77. 'season': 'Frank & Kastaniegaarden',
  78. 'season_id': '67125',
  79. 'release_year': 2019,
  80. 'season_number': 2019,
  81. 'series': 'Frank & Kastaniegaarden',
  82. 'episode_number': 1,
  83. 'episode': 'Frank & Kastaniegaarden',
  84. 'thumbnail': r're:https?://.+',
  85. },
  86. 'params': {
  87. 'skip_download': True,
  88. },
  89. }, {
  90. # Foreign and Regular subtitle track
  91. 'url': 'https://www.dr.dk/drtv/se/spise-med-price_-pasta-selv_397445',
  92. 'info_dict': {
  93. 'id': '00212301010',
  94. 'ext': 'mp4',
  95. 'episode_number': 1,
  96. 'title': 'Spise med Price: Pasta Selv',
  97. 'alt_title': '1. Pasta Selv',
  98. 'release_date': '20230807',
  99. 'description': 'md5:2da9060524fed707810d71080b3d0cd8',
  100. 'duration': 1750,
  101. 'season': 'Spise med Price',
  102. 'release_timestamp': 1691438400,
  103. 'season_id': '397440',
  104. 'episode': 'Spise med Price: Pasta Selv',
  105. 'thumbnail': r're:https?://.+',
  106. 'season_number': 15,
  107. 'series': 'Spise med Price',
  108. 'release_year': 2022,
  109. 'subtitles': 'mincount:2',
  110. },
  111. 'params': {
  112. 'skip_download': 'm3u8',
  113. },
  114. }, {
  115. 'url': 'https://www.dr.dk/drtv/episode/bonderoeven_71769',
  116. 'only_matching': True,
  117. }, {
  118. 'url': 'https://dr-massive.com/drtv/se/bonderoeven_71769',
  119. 'only_matching': True,
  120. }, {
  121. 'url': 'https://www.dr.dk/drtv/program/jagten_220924',
  122. 'only_matching': True,
  123. }]
  124. SUBTITLE_LANGS = {
  125. 'DanishLanguageSubtitles': 'da',
  126. 'ForeignLanguageSubtitles': 'da_foreign',
  127. 'CombinedLanguageSubtitles': 'da_combined',
  128. }
  129. _TOKEN = None
  130. def _real_initialize(self):
  131. if self._TOKEN:
  132. return
  133. token_response = self._download_json(
  134. 'https://production.dr-massive.com/api/authorization/anonymous-sso', None,
  135. note='Downloading anonymous token', headers={
  136. 'content-type': 'application/json',
  137. }, query={
  138. 'device': 'web_browser',
  139. 'ff': 'idp,ldp,rpt',
  140. 'lang': 'da',
  141. 'supportFallbackToken': 'true',
  142. }, data=json.dumps({
  143. 'deviceId': str(uuid.uuid4()),
  144. 'scopes': ['Catalog'],
  145. 'optout': True,
  146. }).encode())
  147. self._TOKEN = traverse_obj(
  148. token_response, (lambda _, x: x['type'] == 'UserAccount', 'value', {str}), get_all=False)
  149. if not self._TOKEN:
  150. raise ExtractorError('Unable to get anonymous token')
  151. def _real_extract(self, url):
  152. url_slug = self._match_id(url)
  153. webpage = self._download_webpage(url, url_slug)
  154. json_data = self._search_json(
  155. r'window\.__data\s*=', webpage, 'data', url_slug, fatal=False) or {}
  156. item = traverse_obj(
  157. json_data, ('cache', 'page', ..., (None, ('entries', 0)), 'item', {dict}), get_all=False)
  158. if item:
  159. item_id = item.get('id')
  160. else:
  161. item_id = url_slug.rsplit('_', 1)[-1]
  162. item = self._download_json(
  163. f'https://production-cdn.dr-massive.com/api/items/{item_id}', item_id,
  164. note='Attempting to download backup item data', query={
  165. 'device': 'web_browser',
  166. 'expand': 'all',
  167. 'ff': 'idp,ldp,rpt',
  168. 'geoLocation': 'dk',
  169. 'isDeviceAbroad': 'false',
  170. 'lang': 'da',
  171. 'segments': 'drtv,optedout',
  172. 'sub': 'Anonymous',
  173. })
  174. video_id = try_call(lambda: item['customId'].rsplit(':', 1)[-1]) or item_id
  175. stream_data = self._download_json(
  176. f'https://production.dr-massive.com/api/account/items/{item_id}/videos', video_id,
  177. note='Downloading stream data', query={
  178. 'delivery': 'stream',
  179. 'device': 'web_browser',
  180. 'ff': 'idp,ldp,rpt',
  181. 'lang': 'da',
  182. 'resolution': 'HD-1080',
  183. 'sub': 'Anonymous',
  184. }, headers={'authorization': f'Bearer {self._TOKEN}'})
  185. formats = []
  186. subtitles = {}
  187. for stream in traverse_obj(stream_data, (lambda _, x: x['url'])):
  188. format_id = stream.get('format', 'na')
  189. access_service = stream.get('accessService')
  190. preference = None
  191. subtitle_suffix = ''
  192. if access_service in ('SpokenSubtitles', 'SignLanguage', 'VisuallyInterpreted'):
  193. preference = -1
  194. format_id += f'-{access_service}'
  195. subtitle_suffix = f'-{access_service}'
  196. elif access_service == 'StandardVideo':
  197. preference = 1
  198. fmts, subs = self._extract_m3u8_formats_and_subtitles(
  199. stream.get('url'), video_id, ext='mp4', preference=preference, m3u8_id=format_id, fatal=False)
  200. formats.extend(fmts)
  201. api_subtitles = traverse_obj(stream, ('subtitles', lambda _, v: url_or_none(v['link']), {dict}))
  202. if not api_subtitles:
  203. self._merge_subtitles(subs, target=subtitles)
  204. for sub_track in api_subtitles:
  205. lang = sub_track.get('language') or 'da'
  206. subtitles.setdefault(self.SUBTITLE_LANGS.get(lang, lang) + subtitle_suffix, []).append({
  207. 'url': sub_track['link'],
  208. 'ext': mimetype2ext(sub_track.get('format')) or 'vtt',
  209. })
  210. if not formats and traverse_obj(item, ('season', 'customFields', 'IsGeoRestricted')):
  211. self.raise_geo_restricted(countries=self._GEO_COUNTRIES)
  212. return {
  213. 'id': video_id,
  214. 'formats': formats,
  215. 'subtitles': subtitles,
  216. **traverse_obj(item, {
  217. 'title': 'title',
  218. 'alt_title': 'contextualTitle',
  219. 'description': 'description',
  220. 'thumbnail': ('images', 'wallpaper'),
  221. 'release_timestamp': ('customFields', 'BroadcastTimeDK', {parse_iso8601}),
  222. 'duration': ('duration', {int_or_none}),
  223. 'series': ('season', 'show', 'title'),
  224. 'season': ('season', 'title'),
  225. 'season_number': ('season', 'seasonNumber', {int_or_none}),
  226. 'season_id': 'seasonId',
  227. 'episode': 'episodeName',
  228. 'episode_number': ('episodeNumber', {int_or_none}),
  229. 'release_year': ('releaseYear', {int_or_none}),
  230. }),
  231. }
  232. class DRTVLiveIE(InfoExtractor):
  233. IE_NAME = 'drtv:live'
  234. _VALID_URL = r'https?://(?:www\.)?dr\.dk/(?:tv|TV)/live/(?P<id>[\da-z-]+)'
  235. _GEO_COUNTRIES = ['DK']
  236. _TEST = {
  237. 'url': 'https://www.dr.dk/tv/live/dr1',
  238. 'info_dict': {
  239. 'id': 'dr1',
  240. 'ext': 'mp4',
  241. 'title': 're:^DR1 [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
  242. },
  243. 'params': {
  244. # m3u8 download
  245. 'skip_download': True,
  246. },
  247. }
  248. def _real_extract(self, url):
  249. channel_id = self._match_id(url)
  250. channel_data = self._download_json(
  251. 'https://www.dr.dk/mu-online/api/1.0/channel/' + channel_id,
  252. channel_id)
  253. title = channel_data['Title']
  254. formats = []
  255. for streaming_server in channel_data.get('StreamingServers', []):
  256. server = streaming_server.get('Server')
  257. if not server:
  258. continue
  259. link_type = streaming_server.get('LinkType')
  260. for quality in streaming_server.get('Qualities', []):
  261. for stream in quality.get('Streams', []):
  262. stream_path = stream.get('Stream')
  263. if not stream_path:
  264. continue
  265. stream_url = update_url_query(
  266. f'{server}/{stream_path}', {'b': ''})
  267. if link_type == 'HLS':
  268. formats.extend(self._extract_m3u8_formats(
  269. stream_url, channel_id, 'mp4',
  270. m3u8_id=link_type, fatal=False, live=True))
  271. elif link_type == 'HDS':
  272. formats.extend(self._extract_f4m_formats(update_url_query(
  273. f'{server}/{stream_path}', {'hdcore': '3.7.0'}),
  274. channel_id, f4m_id=link_type, fatal=False))
  275. return {
  276. 'id': channel_id,
  277. 'title': title,
  278. 'thumbnail': channel_data.get('PrimaryImageUri'),
  279. 'formats': formats,
  280. 'is_live': True,
  281. }
  282. class DRTVSeasonIE(InfoExtractor):
  283. IE_NAME = 'drtv:season'
  284. _VALID_URL = r'https?://(?:www\.)?(?:dr\.dk|dr-massive\.com)/drtv/saeson/(?P<display_id>[\w-]+)_(?P<id>\d+)'
  285. _GEO_COUNTRIES = ['DK']
  286. _TESTS = [{
  287. 'url': 'https://www.dr.dk/drtv/saeson/frank-and-kastaniegaarden_9008',
  288. 'info_dict': {
  289. 'id': '9008',
  290. 'display_id': 'frank-and-kastaniegaarden',
  291. 'title': 'Frank & Kastaniegaarden',
  292. 'series': 'Frank & Kastaniegaarden',
  293. 'season_number': 2008,
  294. 'alt_title': 'Season 2008',
  295. },
  296. 'playlist_mincount': 8,
  297. }, {
  298. 'url': 'https://www.dr.dk/drtv/saeson/frank-and-kastaniegaarden_8761',
  299. 'info_dict': {
  300. 'id': '8761',
  301. 'display_id': 'frank-and-kastaniegaarden',
  302. 'title': 'Frank & Kastaniegaarden',
  303. 'series': 'Frank & Kastaniegaarden',
  304. 'season_number': 2009,
  305. 'alt_title': 'Season 2009',
  306. },
  307. 'playlist_mincount': 19,
  308. }]
  309. def _real_extract(self, url):
  310. display_id, season_id = self._match_valid_url(url).group('display_id', 'id')
  311. data = self._download_json(SERIES_API % f'/saeson/{display_id}_{season_id}', display_id)
  312. entries = [{
  313. '_type': 'url',
  314. 'url': f'https://www.dr.dk/drtv{episode["path"]}',
  315. 'ie_key': DRTVIE.ie_key(),
  316. 'title': episode.get('title'),
  317. 'alt_title': episode.get('contextualTitle'),
  318. 'episode': episode.get('episodeName'),
  319. 'description': episode.get('shortDescription'),
  320. 'series': traverse_obj(data, ('entries', 0, 'item', 'title')),
  321. 'season_number': traverse_obj(data, ('entries', 0, 'item', 'seasonNumber')),
  322. 'episode_number': episode.get('episodeNumber'),
  323. } for episode in traverse_obj(data, ('entries', 0, 'item', 'episodes', 'items'))]
  324. return {
  325. '_type': 'playlist',
  326. 'id': season_id,
  327. 'display_id': display_id,
  328. 'title': traverse_obj(data, ('entries', 0, 'item', 'title')),
  329. 'alt_title': traverse_obj(data, ('entries', 0, 'item', 'contextualTitle')),
  330. 'series': traverse_obj(data, ('entries', 0, 'item', 'title')),
  331. 'entries': entries,
  332. 'season_number': traverse_obj(data, ('entries', 0, 'item', 'seasonNumber')),
  333. }
  334. class DRTVSeriesIE(InfoExtractor):
  335. IE_NAME = 'drtv:series'
  336. _VALID_URL = r'https?://(?:www\.)?(?:dr\.dk|dr-massive\.com)/drtv/serie/(?P<display_id>[\w-]+)_(?P<id>\d+)'
  337. _GEO_COUNTRIES = ['DK']
  338. _TESTS = [{
  339. 'url': 'https://www.dr.dk/drtv/serie/frank-and-kastaniegaarden_6954',
  340. 'info_dict': {
  341. 'id': '6954',
  342. 'display_id': 'frank-and-kastaniegaarden',
  343. 'title': 'Frank & Kastaniegaarden',
  344. 'series': 'Frank & Kastaniegaarden',
  345. 'alt_title': '',
  346. },
  347. 'playlist_mincount': 15,
  348. }]
  349. def _real_extract(self, url):
  350. display_id, series_id = self._match_valid_url(url).group('display_id', 'id')
  351. data = self._download_json(SERIES_API % f'/serie/{display_id}_{series_id}', display_id)
  352. entries = [{
  353. '_type': 'url',
  354. 'url': f'https://www.dr.dk/drtv{season.get("path")}',
  355. 'ie_key': DRTVSeasonIE.ie_key(),
  356. 'title': season.get('title'),
  357. 'alt_title': season.get('contextualTitle'),
  358. 'series': traverse_obj(data, ('entries', 0, 'item', 'title')),
  359. 'season_number': traverse_obj(data, ('entries', 0, 'item', 'seasonNumber')),
  360. } for season in traverse_obj(data, ('entries', 0, 'item', 'show', 'seasons', 'items'))]
  361. return {
  362. '_type': 'playlist',
  363. 'id': series_id,
  364. 'display_id': display_id,
  365. 'title': traverse_obj(data, ('entries', 0, 'item', 'title')),
  366. 'alt_title': traverse_obj(data, ('entries', 0, 'item', 'contextualTitle')),
  367. 'series': traverse_obj(data, ('entries', 0, 'item', 'title')),
  368. 'entries': entries,
  369. }