nba.py 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416
  1. import functools
  2. import re
  3. import urllib.parse
  4. from .turner import TurnerBaseIE
  5. from ..utils import (
  6. OnDemandPagedList,
  7. int_or_none,
  8. merge_dicts,
  9. parse_duration,
  10. parse_iso8601,
  11. parse_qs,
  12. try_get,
  13. update_url_query,
  14. urljoin,
  15. )
  16. class NBACVPBaseIE(TurnerBaseIE):
  17. def _extract_nba_cvp_info(self, path, video_id, fatal=False):
  18. return self._extract_cvp_info(
  19. f'http://secure.nba.com/{path}', video_id, {
  20. 'default': {
  21. 'media_src': 'http://nba.cdn.turner.com/nba/big',
  22. },
  23. 'm3u8': {
  24. 'media_src': 'http://nbavod-f.akamaihd.net',
  25. },
  26. }, fatal=fatal)
  27. class NBAWatchBaseIE(NBACVPBaseIE):
  28. _VALID_URL_BASE = r'https?://(?:(?:www\.)?nba\.com(?:/watch)?|watch\.nba\.com)/'
  29. def _extract_video(self, filter_key, filter_value):
  30. video = self._download_json(
  31. 'https://neulionscnbav2-a.akamaihd.net/solr/nbad_program/usersearch',
  32. filter_value, query={
  33. 'fl': 'description,image,name,pid,releaseDate,runtime,tags,seoName',
  34. 'q': filter_key + ':' + filter_value,
  35. 'wt': 'json',
  36. })['response']['docs'][0]
  37. video_id = str(video['pid'])
  38. title = video['name']
  39. formats = []
  40. m3u8_url = (self._download_json(
  41. 'https://watch.nba.com/service/publishpoint', video_id, query={
  42. 'type': 'video',
  43. 'format': 'json',
  44. 'id': video_id,
  45. }, headers={
  46. 'User-Agent': 'Mozilla/5.0 (iPhone; CPU iPhone OS 11_0_1 like Mac OS X) AppleWebKit/604.1.38 (KHTML, like Gecko) Version/11.0 Mobile/15A402 Safari/604.1',
  47. }, fatal=False) or {}).get('path')
  48. if m3u8_url:
  49. m3u8_formats = self._extract_m3u8_formats(
  50. re.sub(r'_(?:pc|iphone)\.', '.', m3u8_url), video_id, 'mp4',
  51. 'm3u8_native', m3u8_id='hls', fatal=False)
  52. formats.extend(m3u8_formats)
  53. for f in m3u8_formats:
  54. http_f = f.copy()
  55. http_f.update({
  56. 'format_id': http_f['format_id'].replace('hls-', 'http-'),
  57. 'protocol': 'http',
  58. 'url': http_f['url'].replace('.m3u8', ''),
  59. })
  60. formats.append(http_f)
  61. info = {
  62. 'id': video_id,
  63. 'title': title,
  64. 'thumbnail': urljoin('https://nbadsdmt.akamaized.net/media/nba/nba/thumbs/', video.get('image')),
  65. 'description': video.get('description'),
  66. 'duration': int_or_none(video.get('runtime')),
  67. 'timestamp': parse_iso8601(video.get('releaseDate')),
  68. 'tags': video.get('tags'),
  69. }
  70. seo_name = video.get('seoName')
  71. if seo_name and re.search(r'\d{4}/\d{2}/\d{2}/', seo_name):
  72. base_path = ''
  73. if seo_name.startswith('teams/'):
  74. base_path += seo_name.split('/')[1] + '/'
  75. base_path += 'video/'
  76. cvp_info = self._extract_nba_cvp_info(
  77. base_path + seo_name + '.xml', video_id, False)
  78. if cvp_info:
  79. formats.extend(cvp_info['formats'])
  80. info = merge_dicts(info, cvp_info)
  81. info['formats'] = formats
  82. return info
  83. class NBAWatchEmbedIE(NBAWatchBaseIE):
  84. IE_NAME = 'nba:watch:embed'
  85. _VALID_URL = NBAWatchBaseIE._VALID_URL_BASE + r'embed\?.*?\bid=(?P<id>\d+)'
  86. _TESTS = [{
  87. 'url': 'http://watch.nba.com/embed?id=659395',
  88. 'md5': 'b7e3f9946595f4ca0a13903ce5edd120',
  89. 'info_dict': {
  90. 'id': '659395',
  91. 'ext': 'mp4',
  92. 'title': 'Mix clip: More than 7 points of Joe Ingles, Luc Mbah a Moute, Blake Griffin and 6 more in Utah Jazz vs. the Clippers, 4/15/2017',
  93. 'description': 'Mix clip: More than 7 points of Joe Ingles, Luc Mbah a Moute, Blake Griffin and 6 more in Utah Jazz vs. the Clippers, 4/15/2017',
  94. 'timestamp': 1492228800,
  95. 'upload_date': '20170415',
  96. },
  97. }]
  98. def _real_extract(self, url):
  99. video_id = self._match_id(url)
  100. return self._extract_video('pid', video_id)
  101. class NBAWatchIE(NBAWatchBaseIE):
  102. IE_NAME = 'nba:watch'
  103. _VALID_URL = NBAWatchBaseIE._VALID_URL_BASE + r'(?:nba/)?video/(?P<id>.+?(?=/index\.html)|(?:[^/]+/)*[^/?#&]+)'
  104. _TESTS = [{
  105. 'url': 'http://www.nba.com/video/games/nets/2012/12/04/0021200253-okc-bkn-recap.nba/index.html',
  106. 'md5': '9d902940d2a127af3f7f9d2f3dc79c96',
  107. 'info_dict': {
  108. 'id': '70946',
  109. 'ext': 'mp4',
  110. 'title': 'Thunder vs. Nets',
  111. 'description': 'Kevin Durant scores 32 points and dishes out six assists as the Thunder beat the Nets in Brooklyn.',
  112. 'duration': 181,
  113. 'timestamp': 1354597200,
  114. 'upload_date': '20121204',
  115. },
  116. }, {
  117. 'url': 'http://www.nba.com/video/games/hornets/2014/12/05/0021400276-nyk-cha-play5.nba/',
  118. 'only_matching': True,
  119. }, {
  120. 'url': 'http://watch.nba.com/video/channels/playoffs/2015/05/20/0041400301-cle-atl-recap.nba',
  121. 'md5': 'b2b39b81cf28615ae0c3360a3f9668c4',
  122. 'info_dict': {
  123. 'id': '330865',
  124. 'ext': 'mp4',
  125. 'title': 'Hawks vs. Cavaliers Game 1',
  126. 'description': 'md5:8094c3498d35a9bd6b1a8c396a071b4d',
  127. 'duration': 228,
  128. 'timestamp': 1432094400,
  129. 'upload_date': '20150521',
  130. },
  131. }, {
  132. 'url': 'http://watch.nba.com/nba/video/channels/nba_tv/2015/06/11/YT_go_big_go_home_Game4_061115',
  133. 'only_matching': True,
  134. }, {
  135. # only CVP mp4 format available
  136. 'url': 'https://watch.nba.com/video/teams/cavaliers/2012/10/15/sloan121015mov-2249106',
  137. 'only_matching': True,
  138. }, {
  139. 'url': 'https://watch.nba.com/video/top-100-dunks-from-the-2019-20-season?plsrc=nba&collection=2019-20-season-highlights',
  140. 'only_matching': True,
  141. }]
  142. def _real_extract(self, url):
  143. display_id = self._match_id(url)
  144. collection_id = parse_qs(url).get('collection', [None])[0]
  145. if self._yes_playlist(collection_id, display_id):
  146. return self.url_result(
  147. 'https://www.nba.com/watch/list/collection/' + collection_id,
  148. NBAWatchCollectionIE.ie_key(), collection_id)
  149. return self._extract_video('seoName', display_id)
  150. class NBAWatchCollectionIE(NBAWatchBaseIE):
  151. IE_NAME = 'nba:watch:collection'
  152. _VALID_URL = NBAWatchBaseIE._VALID_URL_BASE + r'list/collection/(?P<id>[^/?#&]+)'
  153. _TESTS = [{
  154. 'url': 'https://watch.nba.com/list/collection/season-preview-2020',
  155. 'info_dict': {
  156. 'id': 'season-preview-2020',
  157. },
  158. 'playlist_mincount': 43,
  159. }]
  160. _PAGE_SIZE = 100
  161. def _fetch_page(self, collection_id, page):
  162. page += 1
  163. videos = self._download_json(
  164. 'https://content-api-prod.nba.com/public/1/endeavor/video-list/collection/' + collection_id,
  165. collection_id, f'Downloading page {page} JSON metadata', query={
  166. 'count': self._PAGE_SIZE,
  167. 'page': page,
  168. })['results']['videos']
  169. for video in videos:
  170. program = video.get('program') or {}
  171. seo_name = program.get('seoName') or program.get('slug')
  172. if not seo_name:
  173. continue
  174. yield {
  175. '_type': 'url',
  176. 'id': program.get('id'),
  177. 'title': program.get('title') or video.get('title'),
  178. 'url': 'https://www.nba.com/watch/video/' + seo_name,
  179. 'thumbnail': video.get('image'),
  180. 'description': program.get('description') or video.get('description'),
  181. 'duration': parse_duration(program.get('runtimeHours')),
  182. 'timestamp': parse_iso8601(video.get('releaseDate')),
  183. }
  184. def _real_extract(self, url):
  185. collection_id = self._match_id(url)
  186. entries = OnDemandPagedList(
  187. functools.partial(self._fetch_page, collection_id),
  188. self._PAGE_SIZE)
  189. return self.playlist_result(entries, collection_id)
  190. class NBABaseIE(NBACVPBaseIE):
  191. _VALID_URL_BASE = r'''(?x)
  192. https?://(?:www\.)?nba\.com/
  193. (?P<team>
  194. blazers|
  195. bucks|
  196. bulls|
  197. cavaliers|
  198. celtics|
  199. clippers|
  200. grizzlies|
  201. hawks|
  202. heat|
  203. hornets|
  204. jazz|
  205. kings|
  206. knicks|
  207. lakers|
  208. magic|
  209. mavericks|
  210. nets|
  211. nuggets|
  212. pacers|
  213. pelicans|
  214. pistons|
  215. raptors|
  216. rockets|
  217. sixers|
  218. spurs|
  219. suns|
  220. thunder|
  221. timberwolves|
  222. warriors|
  223. wizards
  224. )
  225. (?:/play\#)?/'''
  226. _CHANNEL_PATH_REGEX = r'video/channel|series'
  227. def _embed_url_result(self, team, content_id):
  228. return self.url_result(update_url_query(
  229. 'https://secure.nba.com/assets/amp/include/video/iframe.html', {
  230. 'contentId': content_id,
  231. 'team': team,
  232. }), NBAEmbedIE.ie_key())
  233. def _call_api(self, team, content_id, query, resource):
  234. return self._download_json(
  235. f'https://api.nba.net/2/{team}/video,imported_video,wsc/',
  236. content_id, f'Download {resource} JSON metadata',
  237. query=query, headers={
  238. 'accessToken': 'internal|bb88df6b4c2244e78822812cecf1ee1b',
  239. })['response']['result']
  240. def _extract_video(self, video, team, extract_all=True):
  241. video_id = str(video['nid'])
  242. team = video['brand']
  243. info = {
  244. 'id': video_id,
  245. 'title': video.get('title') or video.get('headline') or video['shortHeadline'],
  246. 'description': video.get('description'),
  247. 'timestamp': parse_iso8601(video.get('published')),
  248. }
  249. subtitles = {}
  250. captions = try_get(video, lambda x: x['videoCaptions']['sidecars'], dict) or {}
  251. for caption_url in captions.values():
  252. subtitles.setdefault('en', []).append({'url': caption_url})
  253. formats = []
  254. mp4_url = video.get('mp4')
  255. if mp4_url:
  256. formats.append({
  257. 'url': mp4_url,
  258. })
  259. if extract_all:
  260. source_url = video.get('videoSource')
  261. if source_url and not source_url.startswith('s3://') and self._is_valid_url(source_url, video_id, 'source'):
  262. formats.append({
  263. 'format_id': 'source',
  264. 'url': source_url,
  265. 'quality': 1,
  266. })
  267. m3u8_url = video.get('m3u8')
  268. if m3u8_url:
  269. if '.akamaihd.net/i/' in m3u8_url:
  270. formats.extend(self._extract_akamai_formats(
  271. m3u8_url, video_id, {'http': 'pmd.cdn.turner.com'}))
  272. else:
  273. formats.extend(self._extract_m3u8_formats(
  274. m3u8_url, video_id, 'mp4',
  275. 'm3u8_native', m3u8_id='hls', fatal=False))
  276. content_xml = video.get('contentXml')
  277. if team and content_xml:
  278. cvp_info = self._extract_nba_cvp_info(
  279. team + content_xml, video_id, fatal=False)
  280. if cvp_info:
  281. formats.extend(cvp_info['formats'])
  282. subtitles = self._merge_subtitles(subtitles, cvp_info['subtitles'])
  283. info = merge_dicts(info, cvp_info)
  284. else:
  285. info.update(self._embed_url_result(team, video['videoId']))
  286. info.update({
  287. 'formats': formats,
  288. 'subtitles': subtitles,
  289. })
  290. return info
  291. def _real_extract(self, url):
  292. team, display_id = self._match_valid_url(url).groups()
  293. if '/play#/' in url:
  294. display_id = urllib.parse.unquote(display_id)
  295. else:
  296. webpage = self._download_webpage(url, display_id)
  297. display_id = self._search_regex(
  298. self._CONTENT_ID_REGEX + r'\s*:\s*"([^"]+)"', webpage, 'video id')
  299. return self._extract_url_results(team, display_id)
  300. class NBAEmbedIE(NBABaseIE):
  301. IE_NAME = 'nba:embed'
  302. _VALID_URL = r'https?://secure\.nba\.com/assets/amp/include/video/(?:topI|i)frame\.html\?.*?\bcontentId=(?P<id>[^?#&]+)'
  303. _TESTS = [{
  304. 'url': 'https://secure.nba.com/assets/amp/include/video/topIframe.html?contentId=teams/bulls/2020/12/04/3478774/1607105587854-20201204_SCHEDULE_RELEASE_FINAL_DRUPAL-3478774&team=bulls&adFree=false&profile=71&videoPlayerName=TAMPCVP&baseUrl=&videoAdsection=nba.com_mobile_web_teamsites_chicagobulls&ampEnv=',
  305. 'only_matching': True,
  306. }, {
  307. 'url': 'https://secure.nba.com/assets/amp/include/video/iframe.html?contentId=2016/10/29/0021600027boschaplay7&adFree=false&profile=71&team=&videoPlayerName=LAMPCVP',
  308. 'only_matching': True,
  309. }]
  310. def _real_extract(self, url):
  311. qs = parse_qs(url)
  312. content_id = qs['contentId'][0]
  313. team = qs.get('team', [None])[0]
  314. if not team:
  315. return self.url_result(
  316. 'https://watch.nba.com/video/' + content_id, NBAWatchIE.ie_key())
  317. video = self._call_api(team, content_id, {'videoid': content_id}, 'video')[0]
  318. return self._extract_video(video, team)
  319. class NBAIE(NBABaseIE):
  320. IE_NAME = 'nba'
  321. _VALID_URL = NBABaseIE._VALID_URL_BASE + f'(?!{NBABaseIE._CHANNEL_PATH_REGEX})video/(?P<id>(?:[^/]+/)*[^/?#&]+)'
  322. _TESTS = [{
  323. 'url': 'https://www.nba.com/bulls/video/teams/bulls/2020/12/04/3478774/1607105587854-20201204schedulereleasefinaldrupal-3478774',
  324. 'info_dict': {
  325. 'id': '45039',
  326. 'ext': 'mp4',
  327. 'title': 'AND WE BACK.',
  328. 'description': 'Part 1 of our 2020-21 schedule is here! Watch our games on NBC Sports Chicago.',
  329. 'duration': 94,
  330. 'timestamp': 1607112000,
  331. 'upload_date': '20201218',
  332. },
  333. }, {
  334. 'url': 'https://www.nba.com/bucks/play#/video/teams%2Fbucks%2F2020%2F12%2F17%2F64860%2F1608252863446-Op_Dream_16x9-64860',
  335. 'only_matching': True,
  336. }, {
  337. 'url': 'https://www.nba.com/bucks/play#/video/wsc%2Fteams%2F2787C911AA1ACD154B5377F7577CCC7134B2A4B0',
  338. 'only_matching': True,
  339. }]
  340. _CONTENT_ID_REGEX = r'videoID'
  341. def _extract_url_results(self, team, content_id):
  342. return self._embed_url_result(team, content_id)
  343. class NBAChannelIE(NBABaseIE):
  344. IE_NAME = 'nba:channel'
  345. _VALID_URL = NBABaseIE._VALID_URL_BASE + f'(?:{NBABaseIE._CHANNEL_PATH_REGEX})/(?P<id>[^/?#&]+)'
  346. _TESTS = [{
  347. 'url': 'https://www.nba.com/blazers/video/channel/summer_league',
  348. 'info_dict': {
  349. 'title': 'Summer League',
  350. },
  351. 'playlist_mincount': 138,
  352. }, {
  353. 'url': 'https://www.nba.com/bucks/play#/series/On%20This%20Date',
  354. 'only_matching': True,
  355. }]
  356. _CONTENT_ID_REGEX = r'videoSubCategory'
  357. _PAGE_SIZE = 100
  358. def _fetch_page(self, team, channel, page):
  359. results = self._call_api(team, channel, {
  360. 'channels': channel,
  361. 'count': self._PAGE_SIZE,
  362. 'offset': page * self._PAGE_SIZE,
  363. }, f'page {page + 1}')
  364. for video in results:
  365. yield self._extract_video(video, team, False)
  366. def _extract_url_results(self, team, content_id):
  367. entries = OnDemandPagedList(
  368. functools.partial(self._fetch_page, team, content_id),
  369. self._PAGE_SIZE)
  370. return self.playlist_result(entries, playlist_title=content_id)