roosterteeth.py 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352
  1. from .common import InfoExtractor
  2. from ..networking.exceptions import HTTPError
  3. from ..utils import (
  4. ExtractorError,
  5. LazyList,
  6. int_or_none,
  7. join_nonempty,
  8. parse_iso8601,
  9. parse_qs,
  10. smuggle_url,
  11. str_or_none,
  12. url_or_none,
  13. urlencode_postdata,
  14. urljoin,
  15. )
  16. from ..utils.traversal import traverse_obj
  17. class RoosterTeethBaseIE(InfoExtractor):
  18. _NETRC_MACHINE = 'roosterteeth'
  19. _API_BASE = 'https://svod-be.roosterteeth.com'
  20. _API_BASE_URL = f'{_API_BASE}/api/v1'
  21. def _perform_login(self, username, password):
  22. if self._get_cookies(self._API_BASE_URL).get('rt_access_token'):
  23. return
  24. try:
  25. self._download_json(
  26. 'https://auth.roosterteeth.com/oauth/token',
  27. None, 'Logging in', data=urlencode_postdata({
  28. 'client_id': '4338d2b4bdc8db1239360f28e72f0d9ddb1fd01e7a38fbb07b4b1f4ba4564cc5',
  29. 'grant_type': 'password',
  30. 'username': username,
  31. 'password': password,
  32. }))
  33. except ExtractorError as e:
  34. msg = 'Unable to login'
  35. if isinstance(e.cause, HTTPError) and e.cause.status == 401:
  36. resp = self._parse_json(e.cause.response.read().decode(), None, fatal=False)
  37. if resp:
  38. error = resp.get('extra_info') or resp.get('error_description') or resp.get('error')
  39. if error:
  40. msg += ': ' + error
  41. self.report_warning(msg)
  42. def _extract_video_info(self, data):
  43. thumbnails = []
  44. for image in traverse_obj(data, ('included', 'images')):
  45. if image.get('type') not in ('episode_image', 'bonus_feature_image'):
  46. continue
  47. thumbnails.extend([{
  48. 'id': name,
  49. 'url': url,
  50. } for name, url in (image.get('attributes') or {}).items() if url_or_none(url)])
  51. attributes = data.get('attributes') or {}
  52. title = traverse_obj(attributes, 'title', 'display_title')
  53. sub_only = attributes.get('is_sponsors_only')
  54. episode_id = str_or_none(data.get('uuid'))
  55. video_id = str_or_none(data.get('id'))
  56. if video_id and 'parent_content_id' in attributes: # parent_content_id is a bonus-only key
  57. video_id += '-bonus' # there are collisions with bonus ids and regular ids
  58. elif not video_id:
  59. video_id = episode_id
  60. return {
  61. 'id': video_id,
  62. 'display_id': attributes.get('slug'),
  63. 'title': title,
  64. 'description': traverse_obj(attributes, 'description', 'caption'),
  65. 'series': traverse_obj(attributes, 'show_title', 'parent_content_title'),
  66. 'season_number': int_or_none(attributes.get('season_number')),
  67. 'season_id': str_or_none(attributes.get('season_id')),
  68. 'episode': title,
  69. 'episode_number': int_or_none(attributes.get('number')),
  70. 'episode_id': episode_id,
  71. 'channel_id': attributes.get('channel_id'),
  72. 'duration': int_or_none(attributes.get('length')),
  73. 'release_timestamp': parse_iso8601(attributes.get('original_air_date')),
  74. 'thumbnails': thumbnails,
  75. 'availability': self._availability(
  76. needs_premium=sub_only, needs_subscription=sub_only, needs_auth=sub_only,
  77. is_private=False, is_unlisted=False),
  78. 'tags': attributes.get('genres'),
  79. }
  80. class RoosterTeethIE(RoosterTeethBaseIE):
  81. _VALID_URL = r'https?://(?:.+?\.)?roosterteeth\.com/(?:bonus-feature|episode|watch)/(?P<id>[^/?#&]+)'
  82. _TESTS = [{
  83. 'url': 'http://roosterteeth.com/episode/million-dollars-but-season-2-million-dollars-but-the-game-announcement',
  84. 'info_dict': {
  85. 'id': '9156',
  86. 'display_id': 'million-dollars-but-season-2-million-dollars-but-the-game-announcement',
  87. 'ext': 'mp4',
  88. 'title': 'Million Dollars, But... The Game Announcement',
  89. 'description': 'md5:168a54b40e228e79f4ddb141e89fe4f5',
  90. 'thumbnail': r're:^https?://.*\.png$',
  91. 'series': 'Million Dollars, But...',
  92. 'episode': 'Million Dollars, But... The Game Announcement',
  93. 'tags': ['Game Show', 'Sketch'],
  94. 'season_number': 2,
  95. 'availability': 'public',
  96. 'episode_number': 10,
  97. 'episode_id': '00374575-464e-11e7-a302-065410f210c4',
  98. 'season': 'Season 2',
  99. 'season_id': 'ffa27d48-464d-11e7-a302-065410f210c4',
  100. 'channel_id': '92b6bb21-91d2-4b1b-bf95-3268fa0d9939',
  101. 'duration': 145,
  102. 'release_timestamp': 1462982400,
  103. 'release_date': '20160511',
  104. },
  105. 'params': {'skip_download': True},
  106. }, {
  107. 'url': 'https://roosterteeth.com/watch/rwby-bonus-25',
  108. 'info_dict': {
  109. 'id': '40432',
  110. 'display_id': 'rwby-bonus-25',
  111. 'title': 'Grimm',
  112. 'description': 'md5:f30ff570741213418a8d2c19868b93ab',
  113. 'episode': 'Grimm',
  114. 'channel_id': '92f780eb-ebfe-4bf5-a3b5-c6ad5460a5f1',
  115. 'thumbnail': r're:^https?://.*\.(png|jpe?g)$',
  116. 'ext': 'mp4',
  117. 'availability': 'public',
  118. 'episode_id': 'f8117b13-f068-499e-803e-eec9ea2dec8c',
  119. 'episode_number': 3,
  120. 'tags': ['Animation'],
  121. 'season_id': '4b8f0a9e-12c4-41ed-8caa-fed15a85bab8',
  122. 'season': 'Season 1',
  123. 'series': 'RWBY: World of Remnant',
  124. 'season_number': 1,
  125. 'duration': 216,
  126. 'release_timestamp': 1413489600,
  127. 'release_date': '20141016',
  128. },
  129. 'params': {'skip_download': True},
  130. }, {
  131. # bonus feature with /watch/ url
  132. 'url': 'https://roosterteeth.com/watch/rwby-bonus-21',
  133. 'info_dict': {
  134. 'id': '33-bonus',
  135. 'display_id': 'rwby-bonus-21',
  136. 'title': 'Volume 5 Yang Character Short',
  137. 'description': 'md5:8c2440bc763ea90c52cfe0a68093e1f7',
  138. 'episode': 'Volume 5 Yang Character Short',
  139. 'channel_id': '92f780eb-ebfe-4bf5-a3b5-c6ad5460a5f1',
  140. 'thumbnail': r're:^https?://.*\.(png|jpe?g)$',
  141. 'ext': 'mp4',
  142. 'availability': 'public',
  143. 'episode_id': 'f2a9f132-1fe2-44ad-8956-63d7c0267720',
  144. 'episode_number': 55,
  145. 'series': 'RWBY',
  146. 'duration': 255,
  147. 'release_timestamp': 1507993200,
  148. 'release_date': '20171014',
  149. },
  150. 'params': {'skip_download': True},
  151. }, {
  152. # only works with video_data['attributes']['url'] m3u8 url
  153. 'url': 'https://www.roosterteeth.com/watch/achievement-hunter-achievement-hunter-fatality-walkthrough-deathstroke-lex-luthor-captain-marvel-green-lantern-and-wonder-woman',
  154. 'info_dict': {
  155. 'id': '25394',
  156. 'ext': 'mp4',
  157. 'title': 'Fatality Walkthrough: Deathstroke, Lex Luthor, Captain Marvel, Green Lantern, and Wonder Woman',
  158. 'description': 'md5:91bb934698344fb9647b1c7351f16964',
  159. 'availability': 'public',
  160. 'thumbnail': r're:^https?://.*\.(png|jpe?g)$',
  161. 'episode': 'Fatality Walkthrough: Deathstroke, Lex Luthor, Captain Marvel, Green Lantern, and Wonder Woman',
  162. 'episode_number': 71,
  163. 'episode_id': 'ffaec998-464d-11e7-a302-065410f210c4',
  164. 'season': 'Season 2008',
  165. 'tags': ['Gaming'],
  166. 'series': 'Achievement Hunter',
  167. 'display_id': 'md5:4465ce4f001735f9d7a2ae529a543d31',
  168. 'season_id': 'ffa13340-464d-11e7-a302-065410f210c4',
  169. 'season_number': 2008,
  170. 'channel_id': '2cb2a70c-be50-46f5-93d7-84a1baabb4f7',
  171. 'duration': 189,
  172. 'release_timestamp': 1228317300,
  173. 'release_date': '20081203',
  174. },
  175. 'params': {'skip_download': True},
  176. }, {
  177. # brightcove fallback extraction needed
  178. 'url': 'https://roosterteeth.com/watch/lets-play-2013-126',
  179. 'info_dict': {
  180. 'id': '17845',
  181. 'ext': 'mp4',
  182. 'title': 'WWE \'13',
  183. 'availability': 'public',
  184. 'series': 'Let\'s Play',
  185. 'episode_number': 10,
  186. 'season_id': 'ffa23d9c-464d-11e7-a302-065410f210c4',
  187. 'channel_id': '75ba87e8-06fd-4482-bad9-52a4da2c6181',
  188. 'episode': 'WWE \'13',
  189. 'episode_id': 'ffdbe55e-464d-11e7-a302-065410f210c4',
  190. 'thumbnail': r're:^https?://.*\.(png|jpe?g)$',
  191. 'tags': ['Gaming', 'Our Favorites'],
  192. 'description': 'md5:b4a5226d2bbcf0dafbde11a2ba27262d',
  193. 'display_id': 'lets-play-2013-126',
  194. 'season_number': 3,
  195. 'season': 'Season 3',
  196. 'release_timestamp': 1359999840,
  197. 'release_date': '20130204',
  198. },
  199. 'expected_warnings': ['Direct m3u8 URL returned HTTP Error 403'],
  200. 'params': {'skip_download': True},
  201. }, {
  202. 'url': 'http://achievementhunter.roosterteeth.com/episode/off-topic-the-achievement-hunter-podcast-2016-i-didn-t-think-it-would-pass-31',
  203. 'only_matching': True,
  204. }, {
  205. 'url': 'http://funhaus.roosterteeth.com/episode/funhaus-shorts-2016-austin-sucks-funhaus-shorts',
  206. 'only_matching': True,
  207. }, {
  208. 'url': 'http://screwattack.roosterteeth.com/episode/death-battle-season-3-mewtwo-vs-shadow',
  209. 'only_matching': True,
  210. }, {
  211. 'url': 'http://theknow.roosterteeth.com/episode/the-know-game-news-season-1-boring-steam-sales-are-better',
  212. 'only_matching': True,
  213. }, {
  214. # only available for FIRST members
  215. 'url': 'http://roosterteeth.com/episode/rt-docs-the-world-s-greatest-head-massage-the-world-s-greatest-head-massage-an-asmr-journey-part-one',
  216. 'only_matching': True,
  217. }, {
  218. 'url': 'https://roosterteeth.com/watch/million-dollars-but-season-2-million-dollars-but-the-game-announcement',
  219. 'only_matching': True,
  220. }, {
  221. 'url': 'https://roosterteeth.com/bonus-feature/camp-camp-soundtrack-another-rap-song-about-foreign-cars-richie-branson',
  222. 'only_matching': True,
  223. }]
  224. _BRIGHTCOVE_ACCOUNT_ID = '6203312018001'
  225. def _extract_brightcove_formats_and_subtitles(self, bc_id, url, m3u8_url):
  226. account_id = self._search_regex(
  227. r'/accounts/(\d+)/videos/', m3u8_url, 'account id', default=self._BRIGHTCOVE_ACCOUNT_ID)
  228. info = self._downloader.get_info_extractor('BrightcoveNew').extract(smuggle_url(
  229. f'https://players.brightcove.net/{account_id}/default_default/index.html?videoId={bc_id}',
  230. {'referrer': url}))
  231. return info['formats'], info['subtitles']
  232. def _real_extract(self, url):
  233. display_id = self._match_id(url)
  234. api_episode_url = f'{self._API_BASE_URL}/watch/{display_id}'
  235. try:
  236. video_data = self._download_json(
  237. api_episode_url + '/videos', display_id, 'Downloading video JSON metadata',
  238. headers={'Client-Type': 'web'})['data'][0] # web client-type yields ad-free streams
  239. except ExtractorError as e:
  240. if isinstance(e.cause, HTTPError) and e.cause.status == 403:
  241. if self._parse_json(e.cause.response.read().decode(), display_id).get('access') is False:
  242. self.raise_login_required(
  243. f'{display_id} is only available for FIRST members')
  244. raise
  245. # XXX: additional ad-free URL at video_data['links']['download'] but often gives 403 errors
  246. m3u8_url = video_data['attributes']['url']
  247. is_brightcove = traverse_obj(video_data, ('attributes', 'encoding_pipeline')) == 'brightcove'
  248. bc_id = traverse_obj(video_data, ('attributes', 'uid', {str}))
  249. try:
  250. formats, subtitles = self._extract_m3u8_formats_and_subtitles(
  251. m3u8_url, display_id, 'mp4', 'm3u8_native', m3u8_id='hls')
  252. except ExtractorError as e:
  253. if is_brightcove and bc_id and isinstance(e.cause, HTTPError) and e.cause.status == 403:
  254. self.report_warning(
  255. 'Direct m3u8 URL returned HTTP Error 403; retrying with Brightcove extraction')
  256. formats, subtitles = self._extract_brightcove_formats_and_subtitles(bc_id, url, m3u8_url)
  257. else:
  258. raise
  259. episode = self._download_json(
  260. api_episode_url, display_id,
  261. 'Downloading episode JSON metadata')['data'][0]
  262. return {
  263. 'display_id': display_id,
  264. 'formats': formats,
  265. 'subtitles': subtitles,
  266. **self._extract_video_info(episode),
  267. }
  268. class RoosterTeethSeriesIE(RoosterTeethBaseIE):
  269. _VALID_URL = r'https?://(?:.+?\.)?roosterteeth\.com/series/(?P<id>[^/?#&]+)'
  270. _TESTS = [{
  271. 'url': 'https://roosterteeth.com/series/rwby?season=7',
  272. 'playlist_count': 13,
  273. 'info_dict': {
  274. 'id': 'rwby-7',
  275. 'title': 'RWBY - Season 7',
  276. },
  277. }, {
  278. 'url': 'https://roosterteeth.com/series/the-weird-place',
  279. 'playlist_count': 7,
  280. 'info_dict': {
  281. 'id': 'the-weird-place',
  282. 'title': 'The Weird Place',
  283. },
  284. }, {
  285. 'url': 'https://roosterteeth.com/series/role-initiative',
  286. 'playlist_mincount': 16,
  287. 'info_dict': {
  288. 'id': 'role-initiative',
  289. 'title': 'Role Initiative',
  290. },
  291. }, {
  292. 'url': 'https://roosterteeth.com/series/let-s-play-minecraft?season=9',
  293. 'playlist_mincount': 50,
  294. 'info_dict': {
  295. 'id': 'let-s-play-minecraft-9',
  296. 'title': 'Let\'s Play Minecraft - Season 9',
  297. },
  298. }]
  299. def _entries(self, series_id, season_number):
  300. display_id = join_nonempty(series_id, season_number)
  301. def yield_episodes(data):
  302. for episode in traverse_obj(data, ('data', lambda _, v: v['canonical_links']['self'])):
  303. yield self.url_result(
  304. urljoin('https://www.roosterteeth.com', episode['canonical_links']['self']),
  305. RoosterTeethIE, **self._extract_video_info(episode))
  306. series_data = self._download_json(
  307. f'{self._API_BASE_URL}/shows/{series_id}/seasons?order=asc&order_by', display_id)
  308. for season_data in traverse_obj(series_data, ('data', lambda _, v: v['links']['episodes'])):
  309. idx = traverse_obj(season_data, ('attributes', 'number'))
  310. if season_number is not None and idx != season_number:
  311. continue
  312. yield from yield_episodes(self._download_json(
  313. urljoin(self._API_BASE, season_data['links']['episodes']), display_id,
  314. f'Downloading season {idx} JSON metadata', query={'per_page': 1000}))
  315. if season_number is None: # extract series-level bonus features
  316. yield from yield_episodes(self._download_json(
  317. f'{self._API_BASE_URL}/shows/{series_id}/bonus_features?order=asc&order_by&per_page=1000',
  318. display_id, 'Downloading bonus features JSON metadata', fatal=False))
  319. def _real_extract(self, url):
  320. series_id = self._match_id(url)
  321. season_number = traverse_obj(parse_qs(url), ('season', 0), expected_type=int_or_none)
  322. entries = LazyList(self._entries(series_id, season_number))
  323. return self.playlist_result(
  324. entries,
  325. join_nonempty(series_id, season_number),
  326. join_nonempty(entries[0].get('series'), season_number, delim=' - Season '))