redbee.py 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380
  1. import json
  2. import re
  3. import time
  4. import urllib.parse
  5. import uuid
  6. from .common import InfoExtractor
  7. from ..utils import (
  8. ExtractorError,
  9. float_or_none,
  10. int_or_none,
  11. strip_or_none,
  12. traverse_obj,
  13. try_call,
  14. unified_timestamp,
  15. )
  16. class RedBeeBaseIE(InfoExtractor):
  17. _DEVICE_ID = str(uuid.uuid4())
  18. @property
  19. def _API_URL(self):
  20. """
  21. Ref: https://apidocs.emp.ebsd.ericsson.net
  22. Subclasses must set _REDBEE_CUSTOMER, _REDBEE_BUSINESS_UNIT
  23. """
  24. return f'https://exposure.api.redbee.live/v2/customer/{self._REDBEE_CUSTOMER}/businessunit/{self._REDBEE_BUSINESS_UNIT}'
  25. def _get_bearer_token(self, asset_id, jwt=None):
  26. request = {
  27. 'deviceId': self._DEVICE_ID,
  28. 'device': {
  29. 'deviceId': self._DEVICE_ID,
  30. 'name': 'Mozilla Firefox 102',
  31. 'type': 'WEB',
  32. },
  33. }
  34. if jwt:
  35. request['jwt'] = jwt
  36. return self._download_json(
  37. f'{self._API_URL}/auth/{"gigyaLogin" if jwt else "anonymous"}',
  38. asset_id, data=json.dumps(request).encode(), headers={
  39. 'Content-Type': 'application/json;charset=utf-8',
  40. })['sessionToken']
  41. def _get_formats_and_subtitles(self, asset_id, **kwargs):
  42. bearer_token = self._get_bearer_token(asset_id, **kwargs)
  43. api_response = self._download_json(
  44. f'{self._API_URL}/entitlement/{asset_id}/play',
  45. asset_id, headers={
  46. 'Authorization': f'Bearer {bearer_token}',
  47. 'Accept': 'application/json, text/plain, */*',
  48. })
  49. formats, subtitles = [], {}
  50. for format_data in api_response['formats']:
  51. if not format_data.get('mediaLocator'):
  52. continue
  53. fmts, subs = [], {}
  54. if format_data.get('format') == 'DASH':
  55. fmts, subs = self._extract_mpd_formats_and_subtitles(
  56. format_data['mediaLocator'], asset_id, fatal=False)
  57. elif format_data.get('format') == 'SMOOTHSTREAMING':
  58. fmts, subs = self._extract_ism_formats_and_subtitles(
  59. format_data['mediaLocator'], asset_id, fatal=False)
  60. elif format_data.get('format') == 'HLS':
  61. fmts, subs = self._extract_m3u8_formats_and_subtitles(
  62. format_data['mediaLocator'], asset_id, fatal=False)
  63. if format_data.get('drm'):
  64. for f in fmts:
  65. f['has_drm'] = True
  66. formats.extend(fmts)
  67. self._merge_subtitles(subs, target=subtitles)
  68. return formats, subtitles
  69. class ParliamentLiveUKIE(RedBeeBaseIE):
  70. IE_NAME = 'parliamentlive.tv'
  71. IE_DESC = 'UK parliament videos'
  72. _VALID_URL = r'(?i)https?://(?:www\.)?parliamentlive\.tv/Event/Index/(?P<id>[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})'
  73. _REDBEE_CUSTOMER = 'UKParliament'
  74. _REDBEE_BUSINESS_UNIT = 'ParliamentLive'
  75. _TESTS = [{
  76. 'url': 'http://parliamentlive.tv/Event/Index/c1e9d44d-fd6c-4263-b50f-97ed26cc998b',
  77. 'info_dict': {
  78. 'id': 'c1e9d44d-fd6c-4263-b50f-97ed26cc998b',
  79. 'ext': 'mp4',
  80. 'title': 'Home Affairs Committee',
  81. 'timestamp': 1395153872,
  82. 'upload_date': '20140318',
  83. 'thumbnail': r're:https?://[^?#]+c1e9d44d-fd6c-4263-b50f-97ed26cc998b[^/]*/thumbnail',
  84. },
  85. }, {
  86. 'url': 'http://parliamentlive.tv/event/index/3f24936f-130f-40bf-9a5d-b3d6479da6a4',
  87. 'only_matching': True,
  88. }, {
  89. 'url': 'https://parliamentlive.tv/Event/Index/27cf25e4-e77b-42a3-93c5-c815cd6d7377',
  90. 'info_dict': {
  91. 'id': '27cf25e4-e77b-42a3-93c5-c815cd6d7377',
  92. 'ext': 'mp4',
  93. 'title': 'House of Commons',
  94. 'timestamp': 1658392447,
  95. 'upload_date': '20220721',
  96. 'thumbnail': r're:https?://[^?#]+27cf25e4-e77b-42a3-93c5-c815cd6d7377[^/]*/thumbnail',
  97. },
  98. }]
  99. def _real_extract(self, url):
  100. video_id = self._match_id(url)
  101. formats, subtitles = self._get_formats_and_subtitles(video_id)
  102. video_info = self._download_json(
  103. f'https://www.parliamentlive.tv/Event/GetShareVideo/{video_id}', video_id, fatal=False)
  104. return {
  105. 'id': video_id,
  106. 'formats': formats,
  107. 'subtitles': subtitles,
  108. 'title': traverse_obj(video_info, ('event', 'title')),
  109. 'thumbnail': traverse_obj(video_info, 'thumbnailUrl'),
  110. 'timestamp': traverse_obj(
  111. video_info, ('event', 'publishedStartTime'), expected_type=unified_timestamp),
  112. '_format_sort_fields': ('res', 'proto'),
  113. }
  114. class RTBFIE(RedBeeBaseIE):
  115. _WORKING = False
  116. _VALID_URL = r'''(?x)
  117. https?://(?:www\.)?rtbf\.be/
  118. (?:
  119. video/[^?]+\?.*\bid=|
  120. ouftivi/(?:[^/]+/)*[^?]+\?.*\bvideoId=|
  121. auvio/[^/]+\?.*\b(?P<live>l)?id=
  122. )(?P<id>\d+)'''
  123. _NETRC_MACHINE = 'rtbf'
  124. _REDBEE_CUSTOMER = 'RTBF'
  125. _REDBEE_BUSINESS_UNIT = 'Auvio'
  126. _TESTS = [{
  127. 'url': 'https://www.rtbf.be/video/detail_les-diables-au-coeur-episode-2?id=1921274',
  128. 'md5': '8c876a1cceeb6cf31b476461ade72384',
  129. 'info_dict': {
  130. 'id': '1921274',
  131. 'ext': 'mp4',
  132. 'title': 'Les Diables au coeur (épisode 2)',
  133. 'description': '(du 25/04/2014)',
  134. 'duration': 3099.54,
  135. 'upload_date': '20140425',
  136. 'timestamp': 1398456300,
  137. },
  138. 'skip': 'No longer available',
  139. }, {
  140. # geo restricted
  141. 'url': 'http://www.rtbf.be/ouftivi/heros/detail_scooby-doo-mysteres-associes?id=1097&videoId=2057442',
  142. 'only_matching': True,
  143. }, {
  144. 'url': 'http://www.rtbf.be/ouftivi/niouzz?videoId=2055858',
  145. 'only_matching': True,
  146. }, {
  147. 'url': 'http://www.rtbf.be/auvio/detail_jeudi-en-prime-siegfried-bracke?id=2102996',
  148. 'only_matching': True,
  149. }, {
  150. # Live
  151. 'url': 'https://www.rtbf.be/auvio/direct_pure-fm?lid=134775',
  152. 'only_matching': True,
  153. }, {
  154. # Audio
  155. 'url': 'https://www.rtbf.be/auvio/detail_cinq-heures-cinema?id=2360811',
  156. 'only_matching': True,
  157. }, {
  158. # With Subtitle
  159. 'url': 'https://www.rtbf.be/auvio/detail_les-carnets-du-bourlingueur?id=2361588',
  160. 'only_matching': True,
  161. }, {
  162. 'url': 'https://www.rtbf.be/auvio/detail_investigation?id=2921926',
  163. 'md5': 'd5d11bb62169fef38d7ce7ac531e034f',
  164. 'info_dict': {
  165. 'id': '2921926',
  166. 'ext': 'mp4',
  167. 'title': 'Le handicap un confinement perpétuel - Maladie de Lyme',
  168. 'description': 'md5:dcbd5dcf6015488c9069b057c15ccc52',
  169. 'duration': 5258.8,
  170. 'upload_date': '20220727',
  171. 'timestamp': 1658934000,
  172. 'series': '#Investigation',
  173. 'thumbnail': r're:^https?://[^?&]+\.jpg$',
  174. },
  175. }, {
  176. 'url': 'https://www.rtbf.be/auvio/detail_la-belgique-criminelle?id=2920492',
  177. 'md5': '054f9f143bc79c89647c35e5a7d35fa8',
  178. 'info_dict': {
  179. 'id': '2920492',
  180. 'ext': 'mp4',
  181. 'title': '04 - Le crime de la rue Royale',
  182. 'description': 'md5:0c3da1efab286df83f2ab3f8f96bd7a6',
  183. 'duration': 1574.6,
  184. 'upload_date': '20220723',
  185. 'timestamp': 1658596887,
  186. 'series': 'La Belgique criminelle - TV',
  187. 'thumbnail': r're:^https?://[^?&]+\.jpg$',
  188. },
  189. }]
  190. _IMAGE_HOST = 'http://ds1.ds.static.rtbf.be'
  191. _PROVIDERS = {
  192. 'YOUTUBE': 'Youtube',
  193. 'DAILYMOTION': 'Dailymotion',
  194. 'VIMEO': 'Vimeo',
  195. }
  196. _QUALITIES = [
  197. ('mobile', 'SD'),
  198. ('web', 'MD'),
  199. ('high', 'HD'),
  200. ]
  201. _LOGIN_URL = 'https://login.rtbf.be/accounts.login'
  202. _GIGYA_API_KEY = '3_kWKuPgcdAybqnqxq_MvHVk0-6PN8Zk8pIIkJM_yXOu-qLPDDsGOtIDFfpGivtbeO'
  203. _LOGIN_COOKIE_ID = f'glt_{_GIGYA_API_KEY}'
  204. def _perform_login(self, username, password):
  205. if self._get_cookies(self._LOGIN_URL).get(self._LOGIN_COOKIE_ID):
  206. return
  207. self._set_cookie('.rtbf.be', 'gmid', 'gmid.ver4', secure=True, expire_time=time.time() + 3600)
  208. login_response = self._download_json(
  209. self._LOGIN_URL, None, data=urllib.parse.urlencode({
  210. 'loginID': username,
  211. 'password': password,
  212. 'APIKey': self._GIGYA_API_KEY,
  213. 'targetEnv': 'jssdk',
  214. 'sessionExpiration': '-2',
  215. }).encode(), headers={
  216. 'Content-Type': 'application/x-www-form-urlencoded',
  217. })
  218. if login_response['statusCode'] != 200:
  219. raise ExtractorError('Login failed. Server message: {}'.format(login_response['errorMessage']), expected=True)
  220. self._set_cookie('.rtbf.be', self._LOGIN_COOKIE_ID, login_response['sessionInfo']['login_token'],
  221. secure=True, expire_time=time.time() + 3600)
  222. def _get_formats_and_subtitles(self, url, media_id):
  223. login_token = self._get_cookies(url).get(self._LOGIN_COOKIE_ID)
  224. if not login_token:
  225. self.raise_login_required()
  226. session_jwt = try_call(lambda: self._get_cookies(url)['rtbf_jwt'].value) or self._download_json(
  227. 'https://login.rtbf.be/accounts.getJWT', media_id, query={
  228. 'login_token': login_token.value,
  229. 'APIKey': self._GIGYA_API_KEY,
  230. 'sdk': 'js_latest',
  231. 'authMode': 'cookie',
  232. 'pageURL': url,
  233. 'sdkBuild': '13273',
  234. 'format': 'json',
  235. })['id_token']
  236. return super()._get_formats_and_subtitles(media_id, jwt=session_jwt)
  237. def _real_extract(self, url):
  238. live, media_id = self._match_valid_url(url).groups()
  239. embed_page = self._download_webpage(
  240. 'https://www.rtbf.be/auvio/embed/' + ('direct' if live else 'media'),
  241. media_id, query={'id': media_id})
  242. media_data = self._html_search_regex(r'data-media="([^"]+)"', embed_page, 'media data', fatal=False)
  243. if not media_data:
  244. if re.search(r'<div[^>]+id="js-error-expired"[^>]+class="(?![^"]*hidden)', embed_page):
  245. raise ExtractorError('Livestream has ended.', expected=True)
  246. if re.search(r'<div[^>]+id="js-sso-connect"[^>]+class="(?![^"]*hidden)', embed_page):
  247. self.raise_login_required()
  248. raise ExtractorError('Could not find media data')
  249. data = self._parse_json(media_data, media_id)
  250. error = data.get('error')
  251. if error:
  252. raise ExtractorError(f'{self.IE_NAME} said: {error}', expected=True)
  253. provider = data.get('provider')
  254. if provider in self._PROVIDERS:
  255. return self.url_result(data['url'], self._PROVIDERS[provider])
  256. title = traverse_obj(data, 'subtitle', 'title')
  257. is_live = data.get('isLive')
  258. height_re = r'-(\d+)p\.'
  259. formats, subtitles = [], {}
  260. # The old api still returns m3u8 and mpd manifest for livestreams, but these are 'fake'
  261. # since all they contain is a 20s video that is completely unrelated.
  262. # https://github.com/yt-dlp/yt-dlp/issues/4656#issuecomment-1214461092
  263. m3u8_url = None if data.get('isLive') else traverse_obj(data, 'urlHlsAes128', 'urlHls')
  264. if m3u8_url:
  265. fmts, subs = self._extract_m3u8_formats_and_subtitles(
  266. m3u8_url, media_id, 'mp4', m3u8_id='hls', fatal=False)
  267. formats.extend(fmts)
  268. self._merge_subtitles(subs, target=subtitles)
  269. fix_url = lambda x: x.replace('//rtbf-vod.', '//rtbf.') if '/geo/drm/' in x else x
  270. http_url = data.get('url')
  271. if formats and http_url and re.search(height_re, http_url):
  272. http_url = fix_url(http_url)
  273. for m3u8_f in formats[:]:
  274. height = m3u8_f.get('height')
  275. if not height:
  276. continue
  277. f = m3u8_f.copy()
  278. del f['protocol']
  279. f.update({
  280. 'format_id': m3u8_f['format_id'].replace('hls-', 'http-'),
  281. 'url': re.sub(height_re, '-%dp.' % height, http_url),
  282. })
  283. formats.append(f)
  284. else:
  285. sources = data.get('sources') or {}
  286. for key, format_id in self._QUALITIES:
  287. format_url = sources.get(key)
  288. if not format_url:
  289. continue
  290. height = int_or_none(self._search_regex(
  291. height_re, format_url, 'height', default=None))
  292. formats.append({
  293. 'format_id': format_id,
  294. 'url': fix_url(format_url),
  295. 'height': height,
  296. })
  297. mpd_url = None if data.get('isLive') else data.get('urlDash')
  298. if mpd_url and (self.get_param('allow_unplayable_formats') or not data.get('drm')):
  299. fmts, subs = self._extract_mpd_formats_and_subtitles(
  300. mpd_url, media_id, mpd_id='dash', fatal=False)
  301. formats.extend(fmts)
  302. self._merge_subtitles(subs, target=subtitles)
  303. audio_url = data.get('urlAudio')
  304. if audio_url:
  305. formats.append({
  306. 'format_id': 'audio',
  307. 'url': audio_url,
  308. 'vcodec': 'none',
  309. })
  310. for track in (data.get('tracks') or {}).values():
  311. sub_url = track.get('url')
  312. if not sub_url:
  313. continue
  314. subtitles.setdefault(track.get('lang') or 'fr', []).append({
  315. 'url': sub_url,
  316. })
  317. if not formats:
  318. fmts, subs = self._get_formats_and_subtitles(url, f'live_{media_id}' if is_live else media_id)
  319. formats.extend(fmts)
  320. self._merge_subtitles(subs, target=subtitles)
  321. return {
  322. 'id': media_id,
  323. 'formats': formats,
  324. 'title': title,
  325. 'description': strip_or_none(data.get('description')),
  326. 'thumbnail': data.get('thumbnail'),
  327. 'duration': float_or_none(data.get('realDuration')),
  328. 'timestamp': int_or_none(data.get('liveFrom')),
  329. 'series': data.get('programLabel'),
  330. 'subtitles': subtitles,
  331. 'is_live': is_live,
  332. '_format_sort_fields': ('res', 'proto'),
  333. }