curiositystream.py 8.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202
  1. import re
  2. import urllib.parse
  3. from .common import InfoExtractor
  4. from ..utils import ExtractorError, int_or_none, urlencode_postdata
  5. class CuriosityStreamBaseIE(InfoExtractor):
  6. _NETRC_MACHINE = 'curiositystream'
  7. _auth_token = None
  8. def _handle_errors(self, result):
  9. error = result.get('error', {}).get('message')
  10. if error:
  11. if isinstance(error, dict):
  12. error = ', '.join(error.values())
  13. raise ExtractorError(
  14. f'{self.IE_NAME} said: {error}', expected=True)
  15. def _call_api(self, path, video_id, query=None):
  16. headers = {}
  17. if not self._auth_token:
  18. auth_cookie = self._get_cookies('https://curiositystream.com').get('auth_token')
  19. if auth_cookie:
  20. self.write_debug('Obtained auth_token cookie')
  21. self._auth_token = urllib.parse.unquote(auth_cookie.value)
  22. if self._auth_token:
  23. headers['X-Auth-Token'] = self._auth_token
  24. result = self._download_json(
  25. self._API_BASE_URL + path, video_id, headers=headers, query=query)
  26. self._handle_errors(result)
  27. return result['data']
  28. def _perform_login(self, username, password):
  29. result = self._download_json(
  30. 'https://api.curiositystream.com/v1/login', None,
  31. note='Logging in', data=urlencode_postdata({
  32. 'email': username,
  33. 'password': password,
  34. }))
  35. self._handle_errors(result)
  36. CuriosityStreamBaseIE._auth_token = result['message']['auth_token']
  37. class CuriosityStreamIE(CuriosityStreamBaseIE):
  38. IE_NAME = 'curiositystream'
  39. _VALID_URL = r'https?://(?:app\.)?curiositystream\.com/video/(?P<id>\d+)'
  40. _TESTS = [{
  41. 'url': 'http://app.curiositystream.com/video/2',
  42. 'info_dict': {
  43. 'id': '2',
  44. 'ext': 'mp4',
  45. 'title': 'How Did You Develop The Internet?',
  46. 'description': 'Vint Cerf, Google\'s Chief Internet Evangelist, describes how he and Bob Kahn created the internet.',
  47. 'channel': 'Curiosity Stream',
  48. 'categories': ['Technology', 'Interview'],
  49. 'average_rating': float,
  50. 'series_id': '2',
  51. 'thumbnail': r're:https://img.curiositystream.com/.+\.jpg',
  52. 'tags': [],
  53. 'duration': 158,
  54. },
  55. 'params': {
  56. # m3u8 download
  57. 'skip_download': True,
  58. },
  59. }]
  60. _API_BASE_URL = 'https://api.curiositystream.com/v1/media/'
  61. def _real_extract(self, url):
  62. video_id = self._match_id(url)
  63. formats = []
  64. for encoding_format in ('m3u8', 'mpd'):
  65. media = self._call_api(video_id, video_id, query={
  66. 'encodingsNew': 'true',
  67. 'encodingsFormat': encoding_format,
  68. })
  69. for encoding in media.get('encodings', []):
  70. playlist_url = encoding.get('master_playlist_url')
  71. if encoding_format == 'm3u8':
  72. # use `m3u8` entry_protocol until EXT-X-MAP is properly supported by `m3u8_native` entry_protocol
  73. formats.extend(self._extract_m3u8_formats(
  74. playlist_url, video_id, 'mp4',
  75. m3u8_id='hls', fatal=False))
  76. elif encoding_format == 'mpd':
  77. formats.extend(self._extract_mpd_formats(
  78. playlist_url, video_id, mpd_id='dash', fatal=False))
  79. encoding_url = encoding.get('url')
  80. file_url = encoding.get('file_url')
  81. if not encoding_url and not file_url:
  82. continue
  83. f = {
  84. 'width': int_or_none(encoding.get('width')),
  85. 'height': int_or_none(encoding.get('height')),
  86. 'vbr': int_or_none(encoding.get('video_bitrate')),
  87. 'abr': int_or_none(encoding.get('audio_bitrate')),
  88. 'filesize': int_or_none(encoding.get('size_in_bytes')),
  89. 'vcodec': encoding.get('video_codec'),
  90. 'acodec': encoding.get('audio_codec'),
  91. 'container': encoding.get('container_type'),
  92. }
  93. for f_url in (encoding_url, file_url):
  94. if not f_url:
  95. continue
  96. fmt = f.copy()
  97. rtmp = re.search(r'^(?P<url>rtmpe?://(?P<host>[^/]+)/(?P<app>.+))/(?P<playpath>mp[34]:.+)$', f_url)
  98. if rtmp:
  99. fmt.update({
  100. 'url': rtmp.group('url'),
  101. 'play_path': rtmp.group('playpath'),
  102. 'app': rtmp.group('app'),
  103. 'ext': 'flv',
  104. 'format_id': 'rtmp',
  105. })
  106. else:
  107. fmt.update({
  108. 'url': f_url,
  109. 'format_id': 'http',
  110. })
  111. formats.append(fmt)
  112. title = media['title']
  113. subtitles = {}
  114. for closed_caption in media.get('closed_captions', []):
  115. sub_url = closed_caption.get('file')
  116. if not sub_url:
  117. continue
  118. lang = closed_caption.get('code') or closed_caption.get('language') or 'en'
  119. subtitles.setdefault(lang, []).append({
  120. 'url': sub_url,
  121. })
  122. return {
  123. 'id': video_id,
  124. 'formats': formats,
  125. 'title': title,
  126. 'description': media.get('description'),
  127. 'thumbnail': media.get('image_large') or media.get('image_medium') or media.get('image_small'),
  128. 'duration': int_or_none(media.get('duration')),
  129. 'tags': media.get('tags'),
  130. 'subtitles': subtitles,
  131. 'channel': media.get('producer'),
  132. 'categories': [media.get('primary_category'), media.get('type')],
  133. 'average_rating': media.get('rating_percentage'),
  134. 'series_id': str(media.get('collection_id') or '') or None,
  135. }
  136. class CuriosityStreamCollectionBaseIE(CuriosityStreamBaseIE):
  137. def _real_extract(self, url):
  138. collection_id = self._match_id(url)
  139. collection = self._call_api(collection_id, collection_id)
  140. entries = []
  141. for media in collection.get('media', []):
  142. media_id = str(media.get('id'))
  143. media_type, ie = ('series', CuriosityStreamSeriesIE) if media.get('is_collection') else ('video', CuriosityStreamIE)
  144. entries.append(self.url_result(
  145. f'https://curiositystream.com/{media_type}/{media_id}',
  146. ie=ie.ie_key(), video_id=media_id))
  147. return self.playlist_result(
  148. entries, collection_id,
  149. collection.get('title'), collection.get('description'))
  150. class CuriosityStreamCollectionsIE(CuriosityStreamCollectionBaseIE):
  151. IE_NAME = 'curiositystream:collections'
  152. _VALID_URL = r'https?://(?:app\.)?curiositystream\.com/collections/(?P<id>\d+)'
  153. _API_BASE_URL = 'https://api.curiositystream.com/v2/collections/'
  154. _TESTS = [{
  155. 'url': 'https://curiositystream.com/collections/86',
  156. 'info_dict': {
  157. 'id': '86',
  158. 'title': 'Staff Picks',
  159. 'description': 'Wondering where to start? Here are a few of our favorite series and films... from our couch to yours.',
  160. },
  161. 'playlist_mincount': 7,
  162. }, {
  163. 'url': 'https://curiositystream.com/collections/36',
  164. 'only_matching': True,
  165. }]
  166. class CuriosityStreamSeriesIE(CuriosityStreamCollectionBaseIE):
  167. IE_NAME = 'curiositystream:series'
  168. _VALID_URL = r'https?://(?:app\.)?curiositystream\.com/(?:series|collection)/(?P<id>\d+)'
  169. _API_BASE_URL = 'https://api.curiositystream.com/v2/series/'
  170. _TESTS = [{
  171. 'url': 'https://curiositystream.com/series/2',
  172. 'info_dict': {
  173. 'id': '2',
  174. 'title': 'Curious Minds: The Internet',
  175. 'description': 'How is the internet shaping our lives in the 21st Century?',
  176. },
  177. 'playlist_mincount': 16,
  178. }, {
  179. 'url': 'https://curiositystream.com/collection/2',
  180. 'only_matching': True,
  181. }]