amcnetworks.py 6.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147
  1. import re
  2. from .theplatform import ThePlatformIE
  3. from ..utils import (
  4. int_or_none,
  5. parse_age_limit,
  6. try_get,
  7. update_url_query,
  8. )
  9. class AMCNetworksIE(ThePlatformIE): # XXX: Do not subclass from concrete IE
  10. _VALID_URL = r'https?://(?:www\.)?(?P<site>amc|bbcamerica|ifc|(?:we|sundance)tv)\.com/(?P<id>(?:movies|shows(?:/[^/]+)+)/[^/?#&]+)'
  11. _TESTS = [{
  12. 'url': 'https://www.bbcamerica.com/shows/the-graham-norton-show/videos/tina-feys-adorable-airline-themed-family-dinner--51631',
  13. 'info_dict': {
  14. 'id': '4Lq1dzOnZGt0',
  15. 'ext': 'mp4',
  16. 'title': "The Graham Norton Show - Season 28 - Tina Fey's Adorable Airline-Themed Family Dinner",
  17. 'description': "It turns out child stewardesses are very generous with the wine! All-new episodes of 'The Graham Norton Show' premiere Fridays at 11/10c on BBC America.",
  18. 'upload_date': '20201120',
  19. 'timestamp': 1605904350,
  20. 'uploader': 'AMCN',
  21. },
  22. 'params': {
  23. # m3u8 download
  24. 'skip_download': True,
  25. },
  26. 'skip': '404 Not Found',
  27. }, {
  28. 'url': 'http://www.bbcamerica.com/shows/the-hunt/full-episodes/season-1/episode-01-the-hardest-challenge',
  29. 'only_matching': True,
  30. }, {
  31. 'url': 'http://www.amc.com/shows/preacher/full-episodes/season-01/episode-00/pilot',
  32. 'only_matching': True,
  33. }, {
  34. 'url': 'http://www.wetv.com/shows/million-dollar-matchmaker/season-01/episode-06-the-dumped-dj-and-shallow-hal',
  35. 'only_matching': True,
  36. }, {
  37. 'url': 'http://www.ifc.com/movies/chaos',
  38. 'only_matching': True,
  39. }, {
  40. 'url': 'http://www.bbcamerica.com/shows/doctor-who/full-episodes/the-power-of-the-daleks/episode-01-episode-1-color-version',
  41. 'only_matching': True,
  42. }, {
  43. 'url': 'http://www.wetv.com/shows/mama-june-from-not-to-hot/full-episode/season-01/thin-tervention',
  44. 'only_matching': True,
  45. }, {
  46. 'url': 'http://www.wetv.com/shows/la-hair/videos/season-05/episode-09-episode-9-2/episode-9-sneak-peek-3',
  47. 'only_matching': True,
  48. }, {
  49. 'url': 'https://www.sundancetv.com/shows/riviera/full-episodes/season-1/episode-01-episode-1',
  50. 'only_matching': True,
  51. }]
  52. _REQUESTOR_ID_MAP = {
  53. 'amc': 'AMC',
  54. 'bbcamerica': 'BBCA',
  55. 'ifc': 'IFC',
  56. 'sundancetv': 'SUNDANCE',
  57. 'wetv': 'WETV',
  58. }
  59. def _real_extract(self, url):
  60. site, display_id = self._match_valid_url(url).groups()
  61. requestor_id = self._REQUESTOR_ID_MAP[site]
  62. page_data = self._download_json(
  63. f'https://content-delivery-gw.svc.ds.amcn.com/api/v2/content/amcn/{requestor_id.lower()}/url/{display_id}',
  64. display_id)['data']
  65. properties = page_data.get('properties') or {}
  66. query = {
  67. 'mbr': 'true',
  68. 'manifest': 'm3u',
  69. }
  70. video_player_count = 0
  71. try:
  72. for v in page_data['children']:
  73. if v.get('type') == 'video-player':
  74. release_pid = v['properties']['currentVideo']['meta']['releasePid']
  75. tp_path = 'M_UwQC/' + release_pid
  76. media_url = 'https://link.theplatform.com/s/' + tp_path
  77. video_player_count += 1
  78. except KeyError:
  79. pass
  80. if video_player_count > 1:
  81. self.report_warning(
  82. f'The JSON data has {video_player_count} video players. Only one will be extracted')
  83. # Fall back to videoPid if releasePid not found.
  84. # TODO: Fall back to videoPid if releasePid manifest uses DRM.
  85. if not video_player_count:
  86. tp_path = 'M_UwQC/media/' + properties['videoPid']
  87. media_url = 'https://link.theplatform.com/s/' + tp_path
  88. theplatform_metadata = self._download_theplatform_metadata(tp_path, display_id)
  89. info = self._parse_theplatform_metadata(theplatform_metadata)
  90. video_id = theplatform_metadata['pid']
  91. title = theplatform_metadata['title']
  92. rating = try_get(
  93. theplatform_metadata, lambda x: x['ratings'][0]['rating'])
  94. video_category = properties.get('videoCategory')
  95. if video_category and video_category.endswith('-Auth'):
  96. resource = self._get_mvpd_resource(
  97. requestor_id, title, video_id, rating)
  98. query['auth'] = self._extract_mvpd_auth(
  99. url, video_id, requestor_id, resource)
  100. media_url = update_url_query(media_url, query)
  101. formats, subtitles = self._extract_theplatform_smil(
  102. media_url, video_id)
  103. thumbnails = []
  104. thumbnail_urls = [properties.get('imageDesktop')]
  105. if 'thumbnail' in info:
  106. thumbnail_urls.append(info.pop('thumbnail'))
  107. for thumbnail_url in thumbnail_urls:
  108. if not thumbnail_url:
  109. continue
  110. mobj = re.search(r'(\d+)x(\d+)', thumbnail_url)
  111. thumbnails.append({
  112. 'url': thumbnail_url,
  113. 'width': int(mobj.group(1)) if mobj else None,
  114. 'height': int(mobj.group(2)) if mobj else None,
  115. })
  116. info.update({
  117. 'age_limit': parse_age_limit(rating),
  118. 'formats': formats,
  119. 'id': video_id,
  120. 'subtitles': subtitles,
  121. 'thumbnails': thumbnails,
  122. })
  123. ns_keys = theplatform_metadata.get('$xmlns', {}).keys()
  124. if ns_keys:
  125. ns = next(iter(ns_keys))
  126. episode = theplatform_metadata.get(ns + '$episodeTitle') or None
  127. episode_number = int_or_none(
  128. theplatform_metadata.get(ns + '$episode'))
  129. season_number = int_or_none(
  130. theplatform_metadata.get(ns + '$season'))
  131. series = theplatform_metadata.get(ns + '$show') or None
  132. info.update({
  133. 'episode': episode,
  134. 'episode_number': episode_number,
  135. 'season_number': season_number,
  136. 'series': series,
  137. })
  138. return info