teamcoco.py 9.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280
  1. import json
  2. import re
  3. from .turner import TurnerBaseIE
  4. from ..utils import (
  5. ExtractorError,
  6. clean_html,
  7. determine_ext,
  8. make_archive_id,
  9. merge_dicts,
  10. mimetype2ext,
  11. parse_duration,
  12. parse_qs,
  13. traverse_obj,
  14. unified_timestamp,
  15. url_or_none,
  16. urljoin,
  17. )
  18. class TeamcocoBaseIE(TurnerBaseIE):
  19. _QUALITIES = {
  20. 'low': (480, 272),
  21. 'sd': (640, 360),
  22. 'hd': (1280, 720),
  23. 'uhd': (1920, 1080),
  24. }
  25. def _get_formats_and_subtitles(self, info, video_id):
  26. formats, subtitles = [], {}
  27. for src in traverse_obj(info, ('src', ..., {dict})):
  28. format_id = src.get('label')
  29. src_url = src.get('src')
  30. if re.match(r'https?:/[^/]', src_url):
  31. src_url = src_url.replace(':/', '://', 1)
  32. ext = determine_ext(src_url, mimetype2ext(src.get('type')))
  33. if not format_id or not src_url:
  34. continue
  35. elif format_id == 'hls' or ext == 'm3u8':
  36. fmts, subs = self._extract_m3u8_formats_and_subtitles(
  37. src_url, video_id, 'mp4', m3u8_id=format_id, fatal=False)
  38. formats.extend(fmts)
  39. self._merge_subtitles(subs, target=subtitles)
  40. elif format_id in self._QUALITIES:
  41. if src_url.startswith('/mp4:protected/'):
  42. # TODO: Correct extraction for these files
  43. continue
  44. formats.append({
  45. 'url': src_url,
  46. 'ext': ext,
  47. 'format_id': format_id,
  48. 'width': self._QUALITIES[format_id][0],
  49. 'height': self._QUALITIES[format_id][1],
  50. })
  51. return formats, subtitles
  52. class TeamcocoIE(TeamcocoBaseIE):
  53. _VALID_URL = r'https?://(?:www\.)?teamcoco\.com/(?P<id>([^/]+/)*[^/?#]+)'
  54. _TESTS = [
  55. {
  56. 'url': 'http://teamcoco.com/video/mary-kay-remote',
  57. 'info_dict': {
  58. 'id': '80187',
  59. 'display_id': 'video_mary-kay-remote',
  60. 'ext': 'mp4',
  61. 'title': 'Conan Becomes A Mary Kay Beauty Consultant',
  62. 'description': 'md5:9fb64e45b5aef6b2af1b67612b36c162',
  63. 'thumbnail': 'https://teamcoco.com/image/thumb?id=80187',
  64. 'upload_date': '20140402',
  65. 'timestamp': 1396440000,
  66. },
  67. 'params': {
  68. 'skip_download': 'm3u8',
  69. },
  70. }, {
  71. 'url': 'http://teamcoco.com/video/louis-ck-interview-george-w-bush',
  72. 'info_dict': {
  73. 'id': '19705',
  74. 'display_id': 'video_louis-ck-interview-george-w-bush',
  75. 'ext': 'mp4',
  76. 'title': 'Louis C.K. Interview Pt. 1 11/3/11',
  77. 'description': 'Louis C.K. got starstruck by George W. Bush, so what? Part one.',
  78. 'thumbnail': 'https://teamcoco.com/image/thumb?id=19705',
  79. 'upload_date': '20111104',
  80. 'timestamp': 1320408000,
  81. },
  82. 'params': {
  83. 'skip_download': 'm3u8',
  84. },
  85. }, {
  86. 'url': 'http://teamcoco.com/video/timothy-olyphant-drinking-whiskey',
  87. 'info_dict': {
  88. 'id': '88748',
  89. 'display_id': 'video_timothy-olyphant-drinking-whiskey',
  90. 'ext': 'mp4',
  91. 'title': 'Timothy Olyphant Raises A Toast To “Justified”',
  92. 'description': 'md5:15501f23f020e793aeca761205e42c24',
  93. 'upload_date': '20150415',
  94. 'timestamp': 1429099200,
  95. 'thumbnail': 'https://teamcoco.com/image/thumb?id=88748',
  96. },
  97. }, {
  98. 'url': 'http://teamcoco.com/video/full-episode-mon-6-1-joel-mchale-jake-tapper-and-musical-guest-courtney-barnett?playlist=x;eyJ0eXBlIjoidGFnIiwiaWQiOjl9',
  99. 'info_dict': {
  100. 'id': '89341',
  101. 'ext': 'mp4',
  102. 'title': 'Full Episode - Mon. 6/1 - Joel McHale, Jake Tapper, And Musical Guest Courtney Barnett',
  103. 'description': 'Guests: Joel McHale, Jake Tapper, And Musical Guest Courtney Barnett',
  104. },
  105. 'skip': 'This video is no longer available.',
  106. }, {
  107. 'url': 'http://teamcoco.com/video/the-conan-audiencey-awards-for-04/25/18',
  108. 'only_matching': True,
  109. }, {
  110. 'url': 'http://teamcoco.com/italy/conan-jordan-schlansky-hit-the-streets-of-florence',
  111. 'only_matching': True,
  112. }, {
  113. 'url': 'http://teamcoco.com/haiti/conan-s-haitian-history-lesson',
  114. 'only_matching': True,
  115. }, {
  116. 'url': 'http://teamcoco.com/israel/conan-hits-the-streets-beaches-of-tel-aviv',
  117. 'only_matching': True,
  118. },
  119. ]
  120. def _real_extract(self, url):
  121. display_id = self._match_id(url).replace('/', '_')
  122. webpage = self._download_webpage(url, display_id)
  123. data = self._search_nextjs_data(webpage, display_id)['props']['pageProps']['pageData']
  124. info = merge_dicts(*traverse_obj(data, (
  125. 'blocks', lambda _, v: v['name'] in ('meta-tags', 'video-player', 'video-info'), 'props', {dict})))
  126. thumbnail = traverse_obj(
  127. info, (('image', 'poster'), {lambda x: urljoin('https://teamcoco.com/', x)}), get_all=False)
  128. video_id = traverse_obj(parse_qs(thumbnail), ('id', 0)) or display_id
  129. formats, subtitles = self._get_formats_and_subtitles(info, video_id)
  130. return {
  131. 'id': video_id,
  132. 'display_id': display_id,
  133. 'formats': formats,
  134. 'subtitles': subtitles,
  135. 'thumbnail': thumbnail,
  136. **traverse_obj(info, {
  137. 'title': 'title',
  138. 'description': (('descriptionHtml', 'description'), {clean_html}),
  139. 'timestamp': ('publishedOn', {lambda x: f'{x} 12:00AM'}, {unified_timestamp}),
  140. }, get_all=False),
  141. }
  142. class ConanClassicIE(TeamcocoBaseIE):
  143. _VALID_URL = r'https?://(?:(?:www\.)?conanclassic|conan25\.teamcoco)\.com/(?P<id>([^/]+/)*[^/?#]+)'
  144. _TESTS = [{
  145. 'url': 'https://conanclassic.com/video/ice-cube-kevin-hart-conan-share-lyft',
  146. 'info_dict': {
  147. 'id': '74709',
  148. 'ext': 'mp4',
  149. 'title': 'Ice Cube, Kevin Hart, & Conan Share A Lyft Car',
  150. 'display_id': 'video/ice-cube-kevin-hart-conan-share-lyft',
  151. 'description': 'The stars of "Ride Along" teach Conan how to roll around Hollywood.',
  152. 'thumbnail': 'http://cdn.teamcococdn.com/image/640x360/lyft-5bd75f82b616c.png',
  153. 'duration': 570.0,
  154. 'upload_date': '20131211',
  155. 'timestamp': 1386721620,
  156. '_old_archive_ids': ['teamcoco 74709'],
  157. },
  158. 'params': {'skip_download': 'm3u8'},
  159. }, {
  160. 'url': 'https://conan25.teamcoco.com/video/ice-cube-kevin-hart-conan-share-lyft',
  161. 'only_matching': True,
  162. }]
  163. _GRAPHQL_QUERY = '''query find($id: ID!) {
  164. findRecord(id: $id) {
  165. ... on MetaInterface {
  166. id
  167. title
  168. teaser
  169. publishOn
  170. slug
  171. thumb {
  172. ... on FileInterface {
  173. id
  174. path
  175. preview
  176. mime
  177. }
  178. }
  179. }
  180. ... on Video {
  181. videoType
  182. duration
  183. isLive
  184. youtubeId
  185. turnerMediaId
  186. turnerMediaAuthToken
  187. airDate
  188. }
  189. ... on Episode {
  190. airDate
  191. seasonNumber
  192. episodeNumber
  193. guestNames
  194. }
  195. }
  196. findRecordVideoMetadata(id: $id) {
  197. turnerMediaId
  198. turnerMediaAuthToken
  199. duration
  200. src
  201. }
  202. }'''
  203. def _real_extract(self, url):
  204. display_id = self._match_id(url)
  205. webpage = self._download_webpage(url, display_id)
  206. data = self._search_nextjs_data(webpage, display_id)['props']['pageProps']['pageData']
  207. video_id = traverse_obj(
  208. data, ('blocks', ..., 'props', 'fieldDefs', lambda _, v: v['name'] == 'incomingVideoId', 'value'),
  209. ('blocks', ..., 'props', 'fields', 'incomingVideoRecord', 'id'), get_all=False)
  210. if not video_id:
  211. self.raise_no_formats('Unable to extract video ID from webpage', expected=True)
  212. response = self._download_json(
  213. 'https://conanclassic.com/api/legacy/graphql', video_id, data=json.dumps({
  214. 'query': self._GRAPHQL_QUERY,
  215. 'variables': {'id': video_id},
  216. }, separators=(',', ':')).encode(), headers={
  217. 'Content-Type': 'application/json',
  218. })
  219. info = traverse_obj(response, ('data', 'findRecord', {
  220. 'title': 'title',
  221. 'description': 'teaser',
  222. 'thumbnail': ('thumb', 'preview', {url_or_none}),
  223. 'duration': ('duration', {parse_duration}),
  224. 'timestamp': ('publishOn', {unified_timestamp}),
  225. }))
  226. media_id = traverse_obj(
  227. response, ('data', ('findRecord', 'findRecordVideoMetadata'), 'turnerMediaId'), get_all=False)
  228. if media_id:
  229. token = traverse_obj(
  230. response, ('data', ('findRecord', 'findRecordVideoMetadata'), 'turnerMediaAuthToken'), get_all=False)
  231. if not token:
  232. raise ExtractorError('No Turner Media auth token found in API response')
  233. self._initialize_geo_bypass({
  234. 'countries': ['US'],
  235. })
  236. info.update(self._extract_ngtv_info(media_id, {
  237. 'accessToken': token,
  238. 'accessTokenType': 'jws',
  239. }))
  240. else:
  241. formats, subtitles = self._get_formats_and_subtitles(
  242. traverse_obj(response, ('data', 'findRecordVideoMetadata')), video_id)
  243. info.update({
  244. 'formats': formats,
  245. 'subtitles': subtitles,
  246. })
  247. return {
  248. 'id': video_id,
  249. 'display_id': display_id,
  250. '_old_archive_ids': [make_archive_id('Teamcoco', video_id)],
  251. **info,
  252. }