cbsnews.py 19 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442
  1. import base64
  2. import re
  3. import urllib.parse
  4. import zlib
  5. from .anvato import AnvatoIE
  6. from .common import InfoExtractor
  7. from .paramountplus import ParamountPlusIE
  8. from ..networking import HEADRequest
  9. from ..utils import (
  10. ExtractorError,
  11. UserNotLive,
  12. determine_ext,
  13. float_or_none,
  14. format_field,
  15. int_or_none,
  16. make_archive_id,
  17. mimetype2ext,
  18. parse_duration,
  19. smuggle_url,
  20. traverse_obj,
  21. url_or_none,
  22. )
  23. class CBSNewsBaseIE(InfoExtractor):
  24. _LOCALES = {
  25. 'atlanta': None,
  26. 'baltimore': 'BAL',
  27. 'boston': 'BOS',
  28. 'chicago': 'CHI',
  29. 'colorado': 'DEN',
  30. 'detroit': 'DET',
  31. 'losangeles': 'LA',
  32. 'miami': 'MIA',
  33. 'minnesota': 'MIN',
  34. 'newyork': 'NY',
  35. 'philadelphia': 'PHI',
  36. 'pittsburgh': 'PIT',
  37. 'sacramento': 'SAC',
  38. 'sanfrancisco': 'SF',
  39. 'texas': 'DAL',
  40. }
  41. _LOCALE_RE = '|'.join(map(re.escape, _LOCALES))
  42. _ANVACK = '5VD6Eyd6djewbCmNwBFnsJj17YAvGRwl'
  43. def _get_item(self, webpage, display_id):
  44. return traverse_obj(self._search_json(
  45. r'CBSNEWS\.defaultPayload\s*=', webpage, 'payload', display_id,
  46. default={}), ('items', 0, {dict})) or {}
  47. def _get_video_url(self, item):
  48. return traverse_obj(item, 'video', 'video2', expected_type=url_or_none)
  49. def _extract_playlist(self, webpage, playlist_id):
  50. entries = [self.url_result(embed_url, CBSNewsEmbedIE) for embed_url in re.findall(
  51. r'<iframe[^>]+data-src="(https?://(?:www\.)?cbsnews\.com/embed/video/[^#]*#[^"]+)"', webpage)]
  52. if entries:
  53. return self.playlist_result(
  54. entries, playlist_id, self._html_search_meta(['og:title', 'twitter:title'], webpage),
  55. self._html_search_meta(['og:description', 'twitter:description', 'description'], webpage))
  56. def _extract_video(self, item, video_url, video_id):
  57. if mimetype2ext(item.get('format'), default=determine_ext(video_url)) == 'mp4':
  58. formats = [{'url': video_url, 'ext': 'mp4'}]
  59. else:
  60. manifest = self._download_webpage(video_url, video_id, note='Downloading m3u8 information')
  61. anvato_id = self._search_regex(r'anvato-(\d+)', manifest, 'Anvato ID', default=None)
  62. # Prefer Anvato if available; cbsnews.com m3u8 formats are re-encoded from Anvato source
  63. if anvato_id:
  64. return self.url_result(
  65. smuggle_url(f'anvato:{self._ANVACK}:{anvato_id}', {'token': 'default'}),
  66. AnvatoIE, url_transparent=True, _old_archive_ids=[make_archive_id(self, anvato_id)])
  67. formats, _ = self._parse_m3u8_formats_and_subtitles(
  68. manifest, video_url, 'mp4', m3u8_id='hls', video_id=video_id)
  69. def get_subtitles(subs_url):
  70. return {
  71. 'en': [{
  72. 'url': subs_url,
  73. 'ext': 'dfxp', # TTAF1
  74. }],
  75. } if url_or_none(subs_url) else None
  76. episode_meta = traverse_obj(item, {
  77. 'season_number': ('season', {int_or_none}),
  78. 'episode_number': ('episode', {int_or_none}),
  79. }) if item.get('isFullEpisode') else {}
  80. return {
  81. 'id': video_id,
  82. 'formats': formats,
  83. **traverse_obj(item, {
  84. 'title': (None, ('fulltitle', 'title')),
  85. 'description': 'dek',
  86. 'timestamp': ('timestamp', {lambda x: float_or_none(x, 1000)}),
  87. 'duration': ('duration', {float_or_none}),
  88. 'subtitles': ('captions', {get_subtitles}),
  89. 'thumbnail': ('images', ('hd', 'sd'), {url_or_none}),
  90. 'is_live': ('type', {lambda x: x == 'live'}),
  91. }, get_all=False),
  92. **episode_meta,
  93. }
  94. class CBSNewsEmbedIE(CBSNewsBaseIE):
  95. IE_NAME = 'cbsnews:embed'
  96. _VALID_URL = r'https?://(?:www\.)?cbsnews\.com/embed/video[^#]*#(?P<id>.+)'
  97. _TESTS = [{
  98. 'url': 'https://www.cbsnews.com/embed/video/?v=1.c9b5b61492913d6660db0b2f03579ef25e86307a#1Vb7b9s2EP5XBAHbT6Gt98PAMKTJ0se6LVjWYWtdGBR1stlIpEBSTtwi%2F%2FvuJNkNhmHdGxgM2NL57vjd6zt%2B8PngdN%2Fyg79qeGvhzN%2FLGrS%2F%2BuBLB531V28%2B%2BO7Qg7%2Fy97r2z3xZ42NW8yLhDbA0S0KWlHnIijwKWJBHZZnHBa8Cgbpdf%2F89NM9Hi9fXifhpr8sr%2FlP848tn%2BTdXycX25zh4cdX%2FvHl6PmmPqnWQv9w8Ed%2B9GjYRim07bFEqdG%2BZVHuwTm65A7bVRrYtR5lAyMox7pigF6W4k%2By91mjspGsJ%2BwVae4%2BsvdnaO1p73HkXs%2FVisUDTGm7R8IcdnOROeq%2B19qT1amhA1VJtPenoTUgrtfKc9m7Rq8dP7nnjwOB7wg7ADdNt7VX64DWAWlKhPtmDEq22g4GF99x6Dk9E8OSsankHXqPNKDxC%2FdK7MLKTircTDgsI3mmj4OBdSq64dy7fd1x577RU1rt4cvMtOaulFYOd%2FLewRWvDO9lIgXFpZSnkZmjbv5SxKTPoQXClFbpsf%2Fhbbpzs0IB3vb8KkyzJQ%2BywOAgCrMpgRrz%2BKk4fvb7kFbR4XJCu0gAdtNO7woCwZTu%2BBUs9bam%2Fds71drVerpeisgrubLjAB4nnOSkWQnfr5W6o1ku5Xpr1MgrCbL0M0vUyDtfLLK15WiYp47xKWSLyjFVpwVmVJSLIoCjSOFkv3W7oKsVliwZJcB9nwXpZ5GEQQwY8jNKqKCBrgjTLeFxgdCIpazojDgnRtn43J6kG7nZ6cAbxh0EeFFk4%2B1u867cY5u4344n%2FxXjCqAjucdTHgLKojNKmSfO8KRsOFY%2FzKEYCKEJBzv90QA9nfm9gL%2BHulaFqUkz9ULUYxl62B3U%2FRVNLA8IhggaPycOoBuwOCESciDQVSSUgiOMsROB%2FhKfwCKOzEk%2B4k6rWd4uuT%2FwTDz7K7t3d3WLO8ISD95jSPQbayBacthbz86XVgxHwhex5zawzgDOmtp%2F3GPcXn0VXHdSS029%2Fj99UC%2FwJUvyKQ%2FzKyixIEVlYJOn4RxxuaH43Ty9fbJ5OObykHH435XAzJTHeOF4hhEUXD8URe%2FQ%2FBT%2BMpf8d5GN02Ox%2FfiGsl7TA7POu1xZ5%2BbTzcAVKMe48mqcC21hkacVEVScM26liVVBnrKkC4CLKyzAvHu0lhEaTKMFwI3a4SN9MsrfYzdBLq2vkwRD1gVviLT8kY9h2CHH6Y%2Bix6609weFtey4ESp60WtyeWMy%2BsmBuhsoKIyuoT%2Bq2R%2FrW5qi3g%2FvzS2j40DoixDP8%2BKP0yUdpXJ4l6Vla%2Bg9vce%2BC4yM5YlUcbA%2F0jLKdpmTwvsdN5z88nAIe08%2F0HgxeG1iv%2B6Hlhjh7uiW0SDzYNI92L401uha3JKYk268UVRzdOzNQvAaJqoXzAc80dAV440NZ1WVVAAMRYQ2KrGJFmDUsq8saWSnjvIj8t78y%2FRa3JRnbHVfyFpfwoDiGpPgjzekyUiKNlU3OMlwuLMmzgvEojllYVE2Z1HhImvsnk%2BuhusTEoB21PAtSFodeFK3iYhXEH9WOG2%2FkOE833sfeG%2Ff5cfHtEFNXgYes0%2FXj7aGivUgJ9XpusCtoNcNYVVnJVrrDo0OmJAutHCpuZul4W9lLcfy7BnuLPT02%2ByXsCTk%2B9zhzswIN04YueNSK%2BPtM0jS88QdLqSLJDTLsuGZJNolm2yO0PXh3UPnz9Ix5bfIAqxPjvETQsDCEiPG4QbqNyhBZISxybLnZYCrW5H3Axp690%2F0BJdXtDZ5ITuM4xj3f4oUHGzc5JeJmZKpp%2FjwKh4wMV%2FV1yx3emLoR0MwbG4K%2F%2BZgVep3PnzXGDHZ6a3i%2Fk%2BJrONDN13%2Bnq6tBTYk4o7cLGhBtqCC4KwacGHpEVuoH5JNro%2FE6JfE6d5RydbiR76k%2BW5wioDHBIjw1euhHjUGRB0y5A97KoaPx6MlL%2BwgboUVtUFRI%2FLemgTpdtF59ii7pab08kuPcfWzs0l%2FRI5takWnFpka0zOgWRtYcuf9aIxZMxlwr6IiGpsb6j2DQUXPl%2FimXI599Ev7fWjoPD78A',
  99. 'info_dict': {
  100. 'id': '6ZP4cXvo9FaX3VLH7MF4CgY30JFpY_GA',
  101. 'ext': 'mp4',
  102. 'title': 'Cops investigate gorilla incident at Cincinnati Zoo',
  103. 'description': 'md5:fee7441ab8aaeb3c693482394738102b',
  104. 'duration': 350,
  105. 'timestamp': 1464719713,
  106. 'upload_date': '20160531',
  107. 'thumbnail': r're:^https?://.*\.jpg$',
  108. },
  109. 'params': {'skip_download': 'm3u8'},
  110. }]
  111. def _real_extract(self, url):
  112. item = traverse_obj(self._parse_json(zlib.decompress(base64.b64decode(
  113. urllib.parse.unquote(self._match_id(url))),
  114. -zlib.MAX_WBITS).decode(), None), ('video', 'items', 0, {dict})) or {}
  115. video_id = item['mpxRefId']
  116. video_url = self._get_video_url(item)
  117. if not video_url:
  118. # Old embeds redirect user to ParamountPlus but most links are 404
  119. pplus_url = f'https://www.paramountplus.com/shows/video/{video_id}'
  120. try:
  121. self._request_webpage(HEADRequest(pplus_url), video_id)
  122. return self.url_result(pplus_url, ParamountPlusIE)
  123. except ExtractorError:
  124. self.raise_no_formats('This video is no longer available', True, video_id)
  125. return self._extract_video(item, video_url, video_id)
  126. class CBSNewsIE(CBSNewsBaseIE):
  127. IE_NAME = 'cbsnews'
  128. IE_DESC = 'CBS News'
  129. _VALID_URL = r'https?://(?:www\.)?cbsnews\.com/(?:news|video)/(?P<id>[\w-]+)'
  130. _TESTS = [
  131. {
  132. # 60 minutes
  133. 'url': 'http://www.cbsnews.com/news/artificial-intelligence-positioned-to-be-a-game-changer/',
  134. 'info_dict': {
  135. 'id': 'Y_nf_aEg6WwO9OLAq0MpKaPgfnBUxfW4',
  136. 'ext': 'flv',
  137. 'title': 'Artificial Intelligence, real-life applications',
  138. 'description': 'md5:a7aaf27f1b4777244de8b0b442289304',
  139. 'thumbnail': r're:^https?://.*\.jpg$',
  140. 'duration': 317,
  141. 'uploader': 'CBSI-NEW',
  142. 'timestamp': 1476046464,
  143. 'upload_date': '20161009',
  144. },
  145. 'skip': 'This video is no longer available',
  146. },
  147. {
  148. 'url': 'https://www.cbsnews.com/video/fort-hood-shooting-army-downplays-mental-illness-as-cause-of-attack/',
  149. 'info_dict': {
  150. 'id': 'SNJBOYzXiWBOvaLsdzwH8fmtP1SCd91Y',
  151. 'ext': 'mp4',
  152. 'title': 'Fort Hood shooting: Army downplays mental illness as cause of attack',
  153. 'description': 'md5:4a6983e480542d8b333a947bfc64ddc7',
  154. 'upload_date': '20140404',
  155. 'timestamp': 1396650660,
  156. 'thumbnail': r're:^https?://.*\.jpg$',
  157. 'duration': 205,
  158. 'subtitles': {
  159. 'en': [{
  160. 'ext': 'dfxp',
  161. }],
  162. },
  163. },
  164. 'params': {
  165. 'skip_download': 'm3u8',
  166. },
  167. },
  168. {
  169. # 48 hours
  170. 'url': 'http://www.cbsnews.com/news/maria-ridulph-murder-will-the-nations-oldest-cold-case-to-go-to-trial-ever-get-solved/',
  171. 'info_dict': {
  172. 'id': 'maria-ridulph-murder-will-the-nations-oldest-cold-case-to-go-to-trial-ever-get-solved',
  173. 'title': 'Cold as Ice',
  174. 'description': 'Can a childhood memory solve the 1957 murder of 7-year-old Maria Ridulph?',
  175. },
  176. 'playlist_mincount': 7,
  177. },
  178. {
  179. 'url': 'https://www.cbsnews.com/video/032823-cbs-evening-news/',
  180. 'info_dict': {
  181. 'id': '_2wuO7hD9LwtyM_TwSnVwnKp6kxlcXgE',
  182. 'ext': 'mp4',
  183. 'title': 'CBS Evening News, March 28, 2023',
  184. 'description': 'md5:db20615aae54adc1d55a1fd69dc75d13',
  185. 'duration': 1189,
  186. 'timestamp': 1680042600,
  187. 'upload_date': '20230328',
  188. 'season': 'Season 2023',
  189. 'season_number': 2023,
  190. 'episode': 'Episode 83',
  191. 'episode_number': 83,
  192. 'thumbnail': r're:^https?://.*\.jpg$',
  193. },
  194. 'params': {
  195. 'skip_download': 'm3u8',
  196. },
  197. },
  198. ]
  199. def _real_extract(self, url):
  200. display_id = self._match_id(url)
  201. webpage = self._download_webpage(url, display_id)
  202. playlist = self._extract_playlist(webpage, display_id)
  203. if playlist:
  204. return playlist
  205. item = self._get_item(webpage, display_id)
  206. video_id = item.get('mpxRefId') or display_id
  207. video_url = self._get_video_url(item)
  208. if not video_url:
  209. self.raise_no_formats('No video content was found', expected=True, video_id=video_id)
  210. return self._extract_video(item, video_url, video_id)
  211. class CBSLocalBaseIE(CBSNewsBaseIE):
  212. def _real_extract(self, url):
  213. display_id = self._match_id(url)
  214. webpage = self._download_webpage(url, display_id)
  215. item = self._get_item(webpage, display_id)
  216. video_id = item.get('mpxRefId') or display_id
  217. anvato_id = None
  218. video_url = self._get_video_url(item)
  219. if not video_url:
  220. anv_params = self._search_regex(
  221. r'<iframe[^>]+\bdata-src="https?://w3\.mp\.lura\.live/player/prod/v3/anvload\.html\?key=([^"]+)"',
  222. webpage, 'Anvato URL', default=None)
  223. if not anv_params:
  224. playlist = self._extract_playlist(webpage, display_id)
  225. if playlist:
  226. return playlist
  227. self.raise_no_formats('No video content was found', expected=True, video_id=video_id)
  228. anv_data = self._parse_json(base64.urlsafe_b64decode(f'{anv_params}===').decode(), video_id)
  229. anvato_id = anv_data['v']
  230. return self.url_result(
  231. smuggle_url(f'anvato:{anv_data.get("anvack") or self._ANVACK}:{anvato_id}', {
  232. 'token': anv_data.get('token') or 'default',
  233. }), AnvatoIE, url_transparent=True, _old_archive_ids=[make_archive_id(self, anvato_id)])
  234. return self._extract_video(item, video_url, video_id)
  235. class CBSLocalIE(CBSLocalBaseIE):
  236. _VALID_URL = rf'https?://(?:www\.)?cbsnews\.com/(?:{CBSNewsBaseIE._LOCALE_RE})/(?:live/)?video/(?P<id>[\w-]+)'
  237. _TESTS = [{
  238. # Anvato video via defaultPayload JSON
  239. 'url': 'https://www.cbsnews.com/newyork/video/1st-cannabis-dispensary-opens-in-queens/',
  240. 'info_dict': {
  241. 'id': '6376747',
  242. 'ext': 'mp4',
  243. 'title': '1st cannabis dispensary opens in Queens',
  244. 'description': 'The dispensary is women-owned and located in Jamaica.',
  245. 'uploader': 'CBS',
  246. 'duration': 20,
  247. 'timestamp': 1680193657,
  248. 'upload_date': '20230330',
  249. 'categories': ['Stations\\Spoken Word\\WCBSTV', 'Content\\Google', 'Content\\News', 'Content\\News\\Local News'],
  250. 'tags': 'count:11',
  251. 'thumbnail': 're:^https?://.*',
  252. '_old_archive_ids': ['cbslocal 6376747'],
  253. },
  254. 'params': {'skip_download': 'm3u8'},
  255. }, {
  256. # cbsnews.com video via defaultPayload JSON
  257. 'url': 'https://www.cbsnews.com/newyork/live/video/20230330171655-the-city-is-sounding-the-alarm-on-dangerous-social-media-challenges/',
  258. 'info_dict': {
  259. 'id': 'sJqfw7YvgSC6ant2zVmzt3y1jYKoL5J3',
  260. 'ext': 'mp4',
  261. 'title': 'the city is sounding the alarm on dangerous social media challenges',
  262. 'description': 'md5:8eccc9b1b73be5138a52e9c4350d2cd6',
  263. 'thumbnail': 'https://images-cbsn.cbsnews.com/prod/2023/03/30/story_22509622_1680196925.jpg',
  264. 'duration': 41.0,
  265. 'timestamp': 1680196615,
  266. 'upload_date': '20230330',
  267. },
  268. 'params': {'skip_download': 'm3u8'},
  269. }]
  270. class CBSLocalArticleIE(CBSLocalBaseIE):
  271. _VALID_URL = rf'https?://(?:www\.)?cbsnews\.com/(?:{CBSNewsBaseIE._LOCALE_RE})/news/(?P<id>[\w-]+)'
  272. _TESTS = [{
  273. # Anvato video via iframe embed
  274. 'url': 'https://www.cbsnews.com/newyork/news/mta-station-agents-leaving-their-booths-to-provide-more-direct-customer-service/',
  275. 'playlist_count': 2,
  276. 'info_dict': {
  277. 'id': 'mta-station-agents-leaving-their-booths-to-provide-more-direct-customer-service',
  278. 'title': 'MTA station agents begin leaving their booths to provide more direct customer service',
  279. 'description': 'The more than 2,200 agents will provide face-to-face customer service to passengers.',
  280. },
  281. }, {
  282. 'url': 'https://www.cbsnews.com/losangeles/news/safety-advocates-say-fatal-car-seat-failures-are-public-health-crisis/',
  283. 'md5': 'f0ee3081e3843f575fccef901199b212',
  284. 'info_dict': {
  285. 'id': '3401037',
  286. 'ext': 'mp4',
  287. 'title': 'Safety Advocates Say Fatal Car Seat Failures Are \'Public Health Crisis\'',
  288. 'thumbnail': 're:^https?://.*',
  289. 'timestamp': 1463440500,
  290. 'upload_date': '20160516',
  291. },
  292. 'skip': 'Video has been removed',
  293. }]
  294. class CBSNewsLiveBaseIE(CBSNewsBaseIE):
  295. def _get_id(self, url):
  296. raise NotImplementedError('This method must be implemented by subclasses')
  297. def _real_extract(self, url):
  298. video_id = self._get_id(url)
  299. if not video_id:
  300. raise ExtractorError('Livestream is not available', expected=True)
  301. data = traverse_obj(self._download_json(
  302. 'https://feeds-cbsn.cbsnews.com/2.0/rundown/', video_id, query={
  303. 'partner': 'cbsnsite',
  304. 'edition': video_id,
  305. 'type': 'live',
  306. }), ('navigation', 'data', 0, {dict}))
  307. video_url = traverse_obj(data, (('videoUrlDAI', ('videoUrl', 'base')), {url_or_none}), get_all=False)
  308. if not video_url:
  309. raise UserNotLive(video_id=video_id)
  310. formats, subtitles = self._extract_m3u8_formats_and_subtitles(video_url, video_id, 'mp4', m3u8_id='hls')
  311. return {
  312. 'id': video_id,
  313. 'formats': formats,
  314. 'subtitles': subtitles,
  315. 'is_live': True,
  316. **traverse_obj(data, {
  317. 'title': 'headline',
  318. 'description': 'rundown_slug',
  319. 'thumbnail': ('images', 'thumbnail_url_hd', {url_or_none}),
  320. }),
  321. }
  322. class CBSLocalLiveIE(CBSNewsLiveBaseIE):
  323. _VALID_URL = rf'https?://(?:www\.)?cbsnews\.com/(?P<id>{CBSNewsBaseIE._LOCALE_RE})/live/?(?:[?#]|$)'
  324. _TESTS = [{
  325. 'url': 'https://www.cbsnews.com/losangeles/live/',
  326. 'info_dict': {
  327. 'id': 'CBSN-LA',
  328. 'ext': 'mp4',
  329. 'title': str,
  330. 'description': r're:KCBS/CBSN_LA.CRISPIN.\w+.RUNDOWN \w+ \w+',
  331. 'thumbnail': r're:^https?://.*\.jpg$',
  332. 'live_status': 'is_live',
  333. },
  334. 'params': {'skip_download': 'm3u8'},
  335. }]
  336. def _get_id(self, url):
  337. return format_field(self._LOCALES, self._match_id(url), 'CBSN-%s')
  338. class CBSNewsLiveIE(CBSNewsLiveBaseIE):
  339. IE_NAME = 'cbsnews:live'
  340. IE_DESC = 'CBS News Livestream'
  341. _VALID_URL = r'https?://(?:www\.)?cbsnews\.com/live/?(?:[?#]|$)'
  342. _TESTS = [{
  343. 'url': 'https://www.cbsnews.com/live/',
  344. 'info_dict': {
  345. 'id': 'CBSN-US',
  346. 'ext': 'mp4',
  347. 'title': str,
  348. 'description': r're:\w+ \w+ CRISPIN RUNDOWN',
  349. 'thumbnail': r're:^https?://.*\.jpg$',
  350. 'live_status': 'is_live',
  351. },
  352. 'params': {'skip_download': 'm3u8'},
  353. }]
  354. def _get_id(self, url):
  355. return 'CBSN-US'
  356. class CBSNewsLiveVideoIE(InfoExtractor):
  357. IE_NAME = 'cbsnews:livevideo'
  358. IE_DESC = 'CBS News Live Videos'
  359. _VALID_URL = r'https?://(?:www\.)?cbsnews\.com/live/video/(?P<id>[^/?#]+)'
  360. # Live videos get deleted soon. See http://www.cbsnews.com/live/ for the latest examples
  361. _TESTS = [{
  362. 'url': 'http://www.cbsnews.com/live/video/clinton-sanders-prepare-to-face-off-in-nh/',
  363. 'info_dict': {
  364. 'id': 'clinton-sanders-prepare-to-face-off-in-nh',
  365. 'ext': 'mp4',
  366. 'title': 'Clinton, Sanders Prepare To Face Off In NH',
  367. 'duration': 334,
  368. },
  369. 'skip': 'Video gone',
  370. }]
  371. def _real_extract(self, url):
  372. display_id = self._match_id(url)
  373. video_info = self._download_json(
  374. 'http://feeds.cbsn.cbsnews.com/rundown/story', display_id, query={
  375. 'device': 'desktop',
  376. 'dvr_slug': display_id,
  377. })
  378. return {
  379. 'id': display_id,
  380. 'display_id': display_id,
  381. 'formats': self._extract_akamai_formats(video_info['url'], display_id),
  382. **traverse_obj(video_info, {
  383. 'title': 'headline',
  384. 'thumbnail': ('thumbnail_url_hd', {url_or_none}),
  385. 'duration': ('segmentDur', {parse_duration}),
  386. }),
  387. }