radiocanada.py 6.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165
  1. from .common import InfoExtractor
  2. from ..networking.exceptions import HTTPError
  3. from ..utils import (
  4. ExtractorError,
  5. determine_ext,
  6. int_or_none,
  7. unified_strdate,
  8. )
  9. class RadioCanadaIE(InfoExtractor):
  10. IE_NAME = 'radiocanada'
  11. _VALID_URL = r'(?:radiocanada:|https?://ici\.radio-canada\.ca/widgets/mediaconsole/)(?P<app_code>[^:/]+)[:/](?P<id>[0-9]+)'
  12. _TESTS = [
  13. {
  14. 'url': 'http://ici.radio-canada.ca/widgets/mediaconsole/medianet/7184272',
  15. 'info_dict': {
  16. 'id': '7184272',
  17. 'ext': 'mp4',
  18. 'title': 'Le parcours du tireur capté sur vidéo',
  19. 'description': 'Images des caméras de surveillance fournies par la GRC montrant le parcours du tireur d\'Ottawa',
  20. 'upload_date': '20141023',
  21. },
  22. 'params': {
  23. # m3u8 download
  24. 'skip_download': True,
  25. },
  26. },
  27. {
  28. # empty Title
  29. 'url': 'http://ici.radio-canada.ca/widgets/mediaconsole/medianet/7754998/',
  30. 'info_dict': {
  31. 'id': '7754998',
  32. 'ext': 'mp4',
  33. 'title': 'letelejournal22h',
  34. 'description': 'INTEGRALE WEB 22H-TJ',
  35. 'upload_date': '20170720',
  36. },
  37. 'params': {
  38. # m3u8 download
  39. 'skip_download': True,
  40. },
  41. },
  42. {
  43. # with protectionType but not actually DRM protected
  44. 'url': 'radiocanada:toutv:140872',
  45. 'info_dict': {
  46. 'id': '140872',
  47. 'title': 'Épisode 1',
  48. 'series': 'District 31',
  49. },
  50. 'only_matching': True,
  51. },
  52. ]
  53. _GEO_COUNTRIES = ['CA']
  54. _access_token = None
  55. _claims = None
  56. def _call_api(self, path, video_id=None, app_code=None, query=None):
  57. if not query:
  58. query = {}
  59. query.update({
  60. 'client_key': '773aea60-0e80-41bb-9c7f-e6d7c3ad17fb',
  61. 'output': 'json',
  62. })
  63. if video_id:
  64. query.update({
  65. 'appCode': app_code,
  66. 'idMedia': video_id,
  67. })
  68. if self._access_token:
  69. query['access_token'] = self._access_token
  70. try:
  71. return self._download_json(
  72. 'https://services.radio-canada.ca/media/' + path, video_id, query=query)
  73. except ExtractorError as e:
  74. if isinstance(e.cause, HTTPError) and e.cause.status in (401, 422):
  75. data = self._parse_json(e.cause.response.read().decode(), None)
  76. error = data.get('error_description') or data['errorMessage']['text']
  77. raise ExtractorError(error, expected=True)
  78. raise
  79. def _extract_info(self, app_code, video_id):
  80. metas = self._call_api('meta/v1/index.ashx', video_id, app_code)['Metas']
  81. def get_meta(name):
  82. for meta in metas:
  83. if meta.get('name') == name:
  84. text = meta.get('text')
  85. if text:
  86. return text
  87. # protectionType does not necessarily mean the video is DRM protected (see
  88. # https://github.com/ytdl-org/youtube-dl/pull/18609).
  89. if get_meta('protectionType'):
  90. self.report_warning('This video is probably DRM protected.')
  91. query = {
  92. 'connectionType': 'hd',
  93. 'deviceType': 'ipad',
  94. 'multibitrate': 'true',
  95. }
  96. if self._claims:
  97. query['claims'] = self._claims
  98. v_data = self._call_api('validation/v2/', video_id, app_code, query)
  99. v_url = v_data.get('url')
  100. if not v_url:
  101. error = v_data['message']
  102. if error == "Le contenu sélectionné n'est pas disponible dans votre pays":
  103. raise self.raise_geo_restricted(error, self._GEO_COUNTRIES)
  104. if error == 'Le contenu sélectionné est disponible seulement en premium':
  105. self.raise_login_required(error)
  106. raise ExtractorError(
  107. f'{self.IE_NAME} said: {error}', expected=True)
  108. formats = self._extract_m3u8_formats(v_url, video_id, 'mp4')
  109. subtitles = {}
  110. closed_caption_url = get_meta('closedCaption') or get_meta('closedCaptionHTML5')
  111. if closed_caption_url:
  112. subtitles['fr'] = [{
  113. 'url': closed_caption_url,
  114. 'ext': determine_ext(closed_caption_url, 'vtt'),
  115. }]
  116. return {
  117. 'id': video_id,
  118. 'title': get_meta('Title') or get_meta('AV-nomEmission'),
  119. 'description': get_meta('Description') or get_meta('ShortDescription'),
  120. 'thumbnail': get_meta('imageHR') or get_meta('imageMR') or get_meta('imageBR'),
  121. 'duration': int_or_none(get_meta('length')),
  122. 'series': get_meta('Emission'),
  123. 'season_number': int_or_none('SrcSaison'),
  124. 'episode_number': int_or_none('SrcEpisode'),
  125. 'upload_date': unified_strdate(get_meta('Date')),
  126. 'subtitles': subtitles,
  127. 'formats': formats,
  128. }
  129. def _real_extract(self, url):
  130. return self._extract_info(*self._match_valid_url(url).groups())
  131. class RadioCanadaAudioVideoIE(InfoExtractor):
  132. IE_NAME = 'radiocanada:audiovideo'
  133. _VALID_URL = r'https?://ici\.radio-canada\.ca/([^/]+/)*media-(?P<id>[0-9]+)'
  134. _TESTS = [{
  135. 'url': 'http://ici.radio-canada.ca/audio-video/media-7527184/barack-obama-au-vietnam',
  136. 'info_dict': {
  137. 'id': '7527184',
  138. 'ext': 'mp4',
  139. 'title': 'Barack Obama au Vietnam',
  140. 'description': 'Les États-Unis lèvent l\'embargo sur la vente d\'armes qui datait de la guerre du Vietnam',
  141. 'upload_date': '20160523',
  142. },
  143. 'params': {
  144. # m3u8 download
  145. 'skip_download': True,
  146. },
  147. }, {
  148. 'url': 'https://ici.radio-canada.ca/info/videos/media-7527184/barack-obama-au-vietnam',
  149. 'only_matching': True,
  150. }]
  151. def _real_extract(self, url):
  152. return self.url_result(f'radiocanada:medianet:{self._match_id(url)}')