niconico.py 44 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060
  1. import datetime as dt
  2. import functools
  3. import itertools
  4. import json
  5. import re
  6. import time
  7. import urllib.parse
  8. from .common import InfoExtractor, SearchInfoExtractor
  9. from ..networking import Request
  10. from ..networking.exceptions import HTTPError
  11. from ..utils import (
  12. ExtractorError,
  13. OnDemandPagedList,
  14. clean_html,
  15. float_or_none,
  16. int_or_none,
  17. join_nonempty,
  18. parse_duration,
  19. parse_iso8601,
  20. parse_resolution,
  21. qualities,
  22. remove_start,
  23. str_or_none,
  24. traverse_obj,
  25. try_get,
  26. unescapeHTML,
  27. update_url_query,
  28. url_or_none,
  29. urlencode_postdata,
  30. urljoin,
  31. )
  32. class NiconicoIE(InfoExtractor):
  33. IE_NAME = 'niconico'
  34. IE_DESC = 'ニコニコ動画'
  35. _GEO_COUNTRIES = ['JP']
  36. _GEO_BYPASS = False
  37. _TESTS = [{
  38. 'url': 'http://www.nicovideo.jp/watch/sm22312215',
  39. 'md5': 'd1a75c0823e2f629128c43e1212760f9',
  40. 'info_dict': {
  41. 'id': 'sm22312215',
  42. 'ext': 'mp4',
  43. 'title': 'Big Buck Bunny',
  44. 'thumbnail': r're:https?://.*',
  45. 'uploader': 'takuya0301',
  46. 'uploader_id': '2698420',
  47. 'upload_date': '20131123',
  48. 'timestamp': int, # timestamp is unstable
  49. 'description': '(c) copyright 2008, Blender Foundation / www.bigbuckbunny.org',
  50. 'duration': 33,
  51. 'view_count': int,
  52. 'comment_count': int,
  53. 'genres': ['未設定'],
  54. 'tags': [],
  55. 'expected_protocol': str,
  56. },
  57. }, {
  58. # File downloaded with and without credentials are different, so omit
  59. # the md5 field
  60. 'url': 'http://www.nicovideo.jp/watch/nm14296458',
  61. 'info_dict': {
  62. 'id': 'nm14296458',
  63. 'ext': 'mp4',
  64. 'title': '【Kagamine Rin】Dance on media【Original】take2!',
  65. 'description': 'md5:9368f2b1f4178de64f2602c2f3d6cbf5',
  66. 'thumbnail': r're:https?://.*',
  67. 'uploader': 'りょうた',
  68. 'uploader_id': '18822557',
  69. 'upload_date': '20110429',
  70. 'timestamp': 1304065916,
  71. 'duration': 208.0,
  72. 'comment_count': int,
  73. 'view_count': int,
  74. 'genres': ['音楽・サウンド'],
  75. 'tags': ['Translation_Request', 'Kagamine_Rin', 'Rin_Original'],
  76. 'expected_protocol': str,
  77. },
  78. }, {
  79. # 'video exists but is marked as "deleted"
  80. # md5 is unstable
  81. 'url': 'http://www.nicovideo.jp/watch/sm10000',
  82. 'info_dict': {
  83. 'id': 'sm10000',
  84. 'ext': 'unknown_video',
  85. 'description': 'deleted',
  86. 'title': 'ドラえもんエターナル第3話「決戦第3新東京市」<前編>',
  87. 'thumbnail': r're:https?://.*',
  88. 'upload_date': '20071224',
  89. 'timestamp': int, # timestamp field has different value if logged in
  90. 'duration': 304,
  91. 'view_count': int,
  92. },
  93. 'skip': 'Requires an account',
  94. }, {
  95. 'url': 'http://www.nicovideo.jp/watch/so22543406',
  96. 'info_dict': {
  97. 'id': '1388129933',
  98. 'ext': 'mp4',
  99. 'title': '【第1回】RADIOアニメロミックス ラブライブ!~のぞえりRadio Garden~',
  100. 'description': 'md5:b27d224bb0ff53d3c8269e9f8b561cf1',
  101. 'thumbnail': r're:https?://.*',
  102. 'timestamp': 1388851200,
  103. 'upload_date': '20140104',
  104. 'uploader': 'アニメロチャンネル',
  105. 'uploader_id': '312',
  106. },
  107. 'skip': 'The viewing period of the video you were searching for has expired.',
  108. }, {
  109. # video not available via `getflv`; "old" HTML5 video
  110. 'url': 'http://www.nicovideo.jp/watch/sm1151009',
  111. 'md5': 'f95a3d259172667b293530cc2e41ebda',
  112. 'info_dict': {
  113. 'id': 'sm1151009',
  114. 'ext': 'mp4',
  115. 'title': 'マスターシステム本体内蔵のスペハリのメインテーマ(PSG版)',
  116. 'description': 'md5:f95a3d259172667b293530cc2e41ebda',
  117. 'thumbnail': r're:https?://.*',
  118. 'duration': 184,
  119. 'timestamp': 1190835883,
  120. 'upload_date': '20070926',
  121. 'uploader': 'denden2',
  122. 'uploader_id': '1392194',
  123. 'view_count': int,
  124. 'comment_count': int,
  125. 'genres': ['ゲーム'],
  126. 'tags': [],
  127. 'expected_protocol': str,
  128. },
  129. }, {
  130. # "New" HTML5 video
  131. # md5 is unstable
  132. 'url': 'http://www.nicovideo.jp/watch/sm31464864',
  133. 'info_dict': {
  134. 'id': 'sm31464864',
  135. 'ext': 'mp4',
  136. 'title': '新作TVアニメ「戦姫絶唱シンフォギアAXZ」PV 最高画質',
  137. 'description': 'md5:e52974af9a96e739196b2c1ca72b5feb',
  138. 'timestamp': 1498481660,
  139. 'upload_date': '20170626',
  140. 'uploader': 'no-namamae',
  141. 'uploader_id': '40826363',
  142. 'thumbnail': r're:https?://.*',
  143. 'duration': 198,
  144. 'view_count': int,
  145. 'comment_count': int,
  146. 'genres': ['アニメ'],
  147. 'tags': [],
  148. 'expected_protocol': str,
  149. },
  150. }, {
  151. # Video without owner
  152. 'url': 'http://www.nicovideo.jp/watch/sm18238488',
  153. 'md5': 'd265680a1f92bdcbbd2a507fc9e78a9e',
  154. 'info_dict': {
  155. 'id': 'sm18238488',
  156. 'ext': 'mp4',
  157. 'title': '【実写版】ミュータントタートルズ',
  158. 'description': 'md5:15df8988e47a86f9e978af2064bf6d8e',
  159. 'timestamp': 1341128008,
  160. 'upload_date': '20120701',
  161. 'thumbnail': r're:https?://.*',
  162. 'duration': 5271,
  163. 'view_count': int,
  164. 'comment_count': int,
  165. 'genres': ['エンターテイメント'],
  166. 'tags': [],
  167. 'expected_protocol': str,
  168. },
  169. }, {
  170. 'url': 'http://sp.nicovideo.jp/watch/sm28964488?ss_pos=1&cp_in=wt_tg',
  171. 'only_matching': True,
  172. }, {
  173. 'note': 'a video that is only served as an ENCRYPTED HLS.',
  174. 'url': 'https://www.nicovideo.jp/watch/so38016254',
  175. 'only_matching': True,
  176. }]
  177. _VALID_URL = r'https?://(?:(?:www\.|secure\.|sp\.)?nicovideo\.jp/watch|nico\.ms)/(?P<id>(?:[a-z]{2})?[0-9]+)'
  178. _NETRC_MACHINE = 'niconico'
  179. _API_HEADERS = {
  180. 'X-Frontend-ID': '6',
  181. 'X-Frontend-Version': '0',
  182. 'X-Niconico-Language': 'en-us',
  183. 'Referer': 'https://www.nicovideo.jp/',
  184. 'Origin': 'https://www.nicovideo.jp',
  185. }
  186. def _perform_login(self, username, password):
  187. login_ok = True
  188. login_form_strs = {
  189. 'mail_tel': username,
  190. 'password': password,
  191. }
  192. self._request_webpage(
  193. 'https://account.nicovideo.jp/login', None,
  194. note='Acquiring Login session')
  195. page = self._download_webpage(
  196. 'https://account.nicovideo.jp/login/redirector?show_button_twitter=1&site=niconico&show_button_facebook=1', None,
  197. note='Logging in', errnote='Unable to log in',
  198. data=urlencode_postdata(login_form_strs),
  199. headers={
  200. 'Referer': 'https://account.nicovideo.jp/login',
  201. 'Content-Type': 'application/x-www-form-urlencoded',
  202. })
  203. if 'oneTimePw' in page:
  204. post_url = self._search_regex(
  205. r'<form[^>]+action=(["\'])(?P<url>.+?)\1', page, 'post url', group='url')
  206. page = self._download_webpage(
  207. urljoin('https://account.nicovideo.jp', post_url), None,
  208. note='Performing MFA', errnote='Unable to complete MFA',
  209. data=urlencode_postdata({
  210. 'otp': self._get_tfa_info('6 digits code'),
  211. }), headers={
  212. 'Content-Type': 'application/x-www-form-urlencoded',
  213. })
  214. if 'oneTimePw' in page or 'formError' in page:
  215. err_msg = self._html_search_regex(
  216. r'formError["\']+>(.*?)</div>', page, 'form_error',
  217. default='There\'s an error but the message can\'t be parsed.',
  218. flags=re.DOTALL)
  219. self.report_warning(f'Unable to log in: MFA challenge failed, "{err_msg}"')
  220. return False
  221. login_ok = 'class="notice error"' not in page
  222. if not login_ok:
  223. self.report_warning('Unable to log in: bad username or password')
  224. return login_ok
  225. def _get_heartbeat_info(self, info_dict):
  226. video_id, video_src_id, audio_src_id = info_dict['url'].split(':')[1].split('/')
  227. dmc_protocol = info_dict['expected_protocol']
  228. api_data = (
  229. info_dict.get('_api_data')
  230. or self._parse_json(
  231. self._html_search_regex(
  232. 'data-api-data="([^"]+)"',
  233. self._download_webpage('https://www.nicovideo.jp/watch/' + video_id, video_id),
  234. 'API data', default='{}'),
  235. video_id))
  236. session_api_data = try_get(api_data, lambda x: x['media']['delivery']['movie']['session'])
  237. session_api_endpoint = try_get(session_api_data, lambda x: x['urls'][0])
  238. def ping():
  239. tracking_id = traverse_obj(api_data, ('media', 'delivery', 'trackingId'))
  240. if tracking_id:
  241. tracking_url = update_url_query('https://nvapi.nicovideo.jp/v1/2ab0cbaa/watch', {'t': tracking_id})
  242. watch_request_response = self._download_json(
  243. tracking_url, video_id,
  244. note='Acquiring permission for downloading video', fatal=False,
  245. headers=self._API_HEADERS)
  246. if traverse_obj(watch_request_response, ('meta', 'status')) != 200:
  247. self.report_warning('Failed to acquire permission for playing video. Video download may fail.')
  248. yesno = lambda x: 'yes' if x else 'no'
  249. if dmc_protocol == 'http':
  250. protocol = 'http'
  251. protocol_parameters = {
  252. 'http_output_download_parameters': {
  253. 'use_ssl': yesno(session_api_data['urls'][0]['isSsl']),
  254. 'use_well_known_port': yesno(session_api_data['urls'][0]['isWellKnownPort']),
  255. },
  256. }
  257. elif dmc_protocol == 'hls':
  258. protocol = 'm3u8'
  259. segment_duration = try_get(self._configuration_arg('segment_duration'), lambda x: int(x[0])) or 6000
  260. parsed_token = self._parse_json(session_api_data['token'], video_id)
  261. encryption = traverse_obj(api_data, ('media', 'delivery', 'encryption'))
  262. protocol_parameters = {
  263. 'hls_parameters': {
  264. 'segment_duration': segment_duration,
  265. 'transfer_preset': '',
  266. 'use_ssl': yesno(session_api_data['urls'][0]['isSsl']),
  267. 'use_well_known_port': yesno(session_api_data['urls'][0]['isWellKnownPort']),
  268. },
  269. }
  270. if 'hls_encryption' in parsed_token and encryption:
  271. protocol_parameters['hls_parameters']['encryption'] = {
  272. parsed_token['hls_encryption']: {
  273. 'encrypted_key': encryption['encryptedKey'],
  274. 'key_uri': encryption['keyUri'],
  275. },
  276. }
  277. else:
  278. protocol = 'm3u8_native'
  279. else:
  280. raise ExtractorError(f'Unsupported DMC protocol: {dmc_protocol}')
  281. session_response = self._download_json(
  282. session_api_endpoint['url'], video_id,
  283. query={'_format': 'json'},
  284. headers={'Content-Type': 'application/json'},
  285. note='Downloading JSON metadata for {}'.format(info_dict['format_id']),
  286. data=json.dumps({
  287. 'session': {
  288. 'client_info': {
  289. 'player_id': session_api_data.get('playerId'),
  290. },
  291. 'content_auth': {
  292. 'auth_type': try_get(session_api_data, lambda x: x['authTypes'][session_api_data['protocols'][0]]),
  293. 'content_key_timeout': session_api_data.get('contentKeyTimeout'),
  294. 'service_id': 'nicovideo',
  295. 'service_user_id': session_api_data.get('serviceUserId'),
  296. },
  297. 'content_id': session_api_data.get('contentId'),
  298. 'content_src_id_sets': [{
  299. 'content_src_ids': [{
  300. 'src_id_to_mux': {
  301. 'audio_src_ids': [audio_src_id],
  302. 'video_src_ids': [video_src_id],
  303. },
  304. }],
  305. }],
  306. 'content_type': 'movie',
  307. 'content_uri': '',
  308. 'keep_method': {
  309. 'heartbeat': {
  310. 'lifetime': session_api_data.get('heartbeatLifetime'),
  311. },
  312. },
  313. 'priority': session_api_data['priority'],
  314. 'protocol': {
  315. 'name': 'http',
  316. 'parameters': {
  317. 'http_parameters': {
  318. 'parameters': protocol_parameters,
  319. },
  320. },
  321. },
  322. 'recipe_id': session_api_data.get('recipeId'),
  323. 'session_operation_auth': {
  324. 'session_operation_auth_by_signature': {
  325. 'signature': session_api_data.get('signature'),
  326. 'token': session_api_data.get('token'),
  327. },
  328. },
  329. 'timing_constraint': 'unlimited',
  330. },
  331. }).encode())
  332. info_dict['url'] = session_response['data']['session']['content_uri']
  333. info_dict['protocol'] = protocol
  334. # get heartbeat info
  335. heartbeat_info_dict = {
  336. 'url': session_api_endpoint['url'] + '/' + session_response['data']['session']['id'] + '?_format=json&_method=PUT',
  337. 'data': json.dumps(session_response['data']),
  338. # interval, convert milliseconds to seconds, then halve to make a buffer.
  339. 'interval': float_or_none(session_api_data.get('heartbeatLifetime'), scale=3000),
  340. 'ping': ping,
  341. }
  342. return info_dict, heartbeat_info_dict
  343. def _extract_format_for_quality(self, video_id, audio_quality, video_quality, dmc_protocol):
  344. if not audio_quality.get('isAvailable') or not video_quality.get('isAvailable'):
  345. return None
  346. format_id = '-'.join(
  347. [remove_start(s['id'], 'archive_') for s in (video_quality, audio_quality)] + [dmc_protocol])
  348. vid_qual_label = traverse_obj(video_quality, ('metadata', 'label'))
  349. return {
  350. 'url': 'niconico_dmc:{}/{}/{}'.format(video_id, video_quality['id'], audio_quality['id']),
  351. 'format_id': format_id,
  352. 'format_note': join_nonempty('DMC', vid_qual_label, dmc_protocol.upper(), delim=' '),
  353. 'ext': 'mp4', # Session API are used in HTML5, which always serves mp4
  354. 'acodec': 'aac',
  355. 'vcodec': 'h264',
  356. **traverse_obj(audio_quality, ('metadata', {
  357. 'abr': ('bitrate', {functools.partial(float_or_none, scale=1000)}),
  358. 'asr': ('samplingRate', {int_or_none}),
  359. })),
  360. **traverse_obj(video_quality, ('metadata', {
  361. 'vbr': ('bitrate', {functools.partial(float_or_none, scale=1000)}),
  362. 'height': ('resolution', 'height', {int_or_none}),
  363. 'width': ('resolution', 'width', {int_or_none}),
  364. })),
  365. 'quality': -2 if 'low' in video_quality['id'] else None,
  366. 'protocol': 'niconico_dmc',
  367. 'expected_protocol': dmc_protocol, # XXX: This is not a documented field
  368. 'http_headers': {
  369. 'Origin': 'https://www.nicovideo.jp',
  370. 'Referer': 'https://www.nicovideo.jp/watch/' + video_id,
  371. },
  372. }
  373. def _yield_dmc_formats(self, api_data, video_id):
  374. dmc_data = traverse_obj(api_data, ('media', 'delivery', 'movie'))
  375. audios = traverse_obj(dmc_data, ('audios', ..., {dict}))
  376. videos = traverse_obj(dmc_data, ('videos', ..., {dict}))
  377. protocols = traverse_obj(dmc_data, ('session', 'protocols', ..., {str}))
  378. if not all((audios, videos, protocols)):
  379. return
  380. for audio_quality, video_quality, protocol in itertools.product(audios, videos, protocols):
  381. if fmt := self._extract_format_for_quality(video_id, audio_quality, video_quality, protocol):
  382. yield fmt
  383. def _yield_dms_formats(self, api_data, video_id):
  384. fmt_filter = lambda _, v: v['isAvailable'] and v['id']
  385. videos = traverse_obj(api_data, ('media', 'domand', 'videos', fmt_filter))
  386. audios = traverse_obj(api_data, ('media', 'domand', 'audios', fmt_filter))
  387. access_key = traverse_obj(api_data, ('media', 'domand', 'accessRightKey', {str}))
  388. track_id = traverse_obj(api_data, ('client', 'watchTrackId', {str}))
  389. if not all((videos, audios, access_key, track_id)):
  390. return
  391. dms_m3u8_url = self._download_json(
  392. f'https://nvapi.nicovideo.jp/v1/watch/{video_id}/access-rights/hls', video_id,
  393. data=json.dumps({
  394. 'outputs': list(itertools.product((v['id'] for v in videos), (a['id'] for a in audios))),
  395. }).encode(), query={'actionTrackId': track_id}, headers={
  396. 'x-access-right-key': access_key,
  397. 'x-frontend-id': 6,
  398. 'x-frontend-version': 0,
  399. 'x-request-with': 'https://www.nicovideo.jp',
  400. })['data']['contentUrl']
  401. # Getting all audio formats results in duplicate video formats which we filter out later
  402. dms_fmts = self._extract_m3u8_formats(dms_m3u8_url, video_id)
  403. # m3u8 extraction does not provide audio bitrates, so extract from the API data and fix
  404. for audio_fmt in traverse_obj(dms_fmts, lambda _, v: v['vcodec'] == 'none'):
  405. yield {
  406. **audio_fmt,
  407. **traverse_obj(audios, (lambda _, v: audio_fmt['format_id'].startswith(v['id']), {
  408. 'format_id': ('id', {str}),
  409. 'abr': ('bitRate', {functools.partial(float_or_none, scale=1000)}),
  410. 'asr': ('samplingRate', {int_or_none}),
  411. }), get_all=False),
  412. 'acodec': 'aac',
  413. 'ext': 'm4a',
  414. }
  415. # Sort before removing dupes to keep the format dicts with the lowest tbr
  416. video_fmts = sorted((fmt for fmt in dms_fmts if fmt['vcodec'] != 'none'), key=lambda f: f['tbr'])
  417. self._remove_duplicate_formats(video_fmts)
  418. # Calculate the true vbr/tbr by subtracting the lowest abr
  419. min_abr = min(traverse_obj(audios, (..., 'bitRate', {float_or_none})), default=0) / 1000
  420. for video_fmt in video_fmts:
  421. video_fmt['tbr'] -= min_abr
  422. video_fmt['format_id'] = f'video-{video_fmt["tbr"]:.0f}'
  423. yield video_fmt
  424. def _real_extract(self, url):
  425. video_id = self._match_id(url)
  426. try:
  427. webpage, handle = self._download_webpage_handle(
  428. 'https://www.nicovideo.jp/watch/' + video_id, video_id)
  429. if video_id.startswith('so'):
  430. video_id = self._match_id(handle.url)
  431. api_data = self._parse_json(self._html_search_regex(
  432. 'data-api-data="([^"]+)"', webpage,
  433. 'API data', default='{}'), video_id)
  434. except ExtractorError as e:
  435. try:
  436. api_data = self._download_json(
  437. f'https://www.nicovideo.jp/api/watch/v3/{video_id}?_frontendId=6&_frontendVersion=0&actionTrackId=AAAAAAAAAA_{round(time.time() * 1000)}', video_id,
  438. note='Downloading API JSON', errnote='Unable to fetch data')['data']
  439. except ExtractorError:
  440. if not isinstance(e.cause, HTTPError):
  441. raise
  442. webpage = e.cause.response.read().decode('utf-8', 'replace')
  443. error_msg = self._html_search_regex(
  444. r'(?s)<section\s+class="(?:(?:ErrorMessage|WatchExceptionPage-message)\s*)+">(.+?)</section>',
  445. webpage, 'error reason', default=None)
  446. if not error_msg:
  447. raise
  448. raise ExtractorError(clean_html(error_msg), expected=True)
  449. availability = self._availability(**(traverse_obj(api_data, ('payment', 'video', {
  450. 'needs_premium': ('isPremium', {bool}),
  451. 'needs_subscription': ('isAdmission', {bool}),
  452. })) or {'needs_auth': True}))
  453. formats = [*self._yield_dmc_formats(api_data, video_id),
  454. *self._yield_dms_formats(api_data, video_id)]
  455. if not formats:
  456. fail_msg = clean_html(self._html_search_regex(
  457. r'<p[^>]+\bclass="fail-message"[^>]*>(?P<msg>.+?)</p>',
  458. webpage, 'fail message', default=None, group='msg'))
  459. if fail_msg:
  460. self.to_screen(f'Niconico said: {fail_msg}')
  461. if fail_msg and 'された地域と同じ地域からのみ視聴できます。' in fail_msg:
  462. availability = None
  463. self.raise_geo_restricted(countries=self._GEO_COUNTRIES, metadata_available=True)
  464. elif availability == 'premium_only':
  465. self.raise_login_required('This video requires premium', metadata_available=True)
  466. elif availability == 'subscriber_only':
  467. self.raise_login_required('This video is for members only', metadata_available=True)
  468. elif availability == 'needs_auth':
  469. self.raise_login_required(metadata_available=False)
  470. # Start extracting information
  471. tags = None
  472. if webpage:
  473. # use og:video:tag (not logged in)
  474. og_video_tags = re.finditer(r'<meta\s+property="og:video:tag"\s*content="(.*?)">', webpage)
  475. tags = list(filter(None, (clean_html(x.group(1)) for x in og_video_tags)))
  476. if not tags:
  477. # use keywords and split with comma (not logged in)
  478. kwds = self._html_search_meta('keywords', webpage, default=None)
  479. if kwds:
  480. tags = [x for x in kwds.split(',') if x]
  481. if not tags:
  482. # find in json (logged in)
  483. tags = traverse_obj(api_data, ('tag', 'items', ..., 'name'))
  484. thumb_prefs = qualities(['url', 'middleUrl', 'largeUrl', 'player', 'ogp'])
  485. def get_video_info(*items, get_first=True, **kwargs):
  486. return traverse_obj(api_data, ('video', *items), get_all=not get_first, **kwargs)
  487. return {
  488. 'id': video_id,
  489. '_api_data': api_data,
  490. 'title': get_video_info(('originalTitle', 'title')) or self._og_search_title(webpage, default=None),
  491. 'formats': formats,
  492. 'availability': availability,
  493. 'thumbnails': [{
  494. 'id': key,
  495. 'url': url,
  496. 'ext': 'jpg',
  497. 'preference': thumb_prefs(key),
  498. **parse_resolution(url, lenient=True),
  499. } for key, url in (get_video_info('thumbnail') or {}).items() if url],
  500. 'description': clean_html(get_video_info('description')),
  501. 'uploader': traverse_obj(api_data, ('owner', 'nickname'), ('channel', 'name'), ('community', 'name')),
  502. 'uploader_id': str_or_none(traverse_obj(api_data, ('owner', 'id'), ('channel', 'id'), ('community', 'id'))),
  503. 'timestamp': parse_iso8601(get_video_info('registeredAt')) or parse_iso8601(
  504. self._html_search_meta('video:release_date', webpage, 'date published', default=None)),
  505. 'channel': traverse_obj(api_data, ('channel', 'name'), ('community', 'name')),
  506. 'channel_id': traverse_obj(api_data, ('channel', 'id'), ('community', 'id')),
  507. 'view_count': int_or_none(get_video_info('count', 'view')),
  508. 'tags': tags,
  509. 'genre': traverse_obj(api_data, ('genre', 'label'), ('genre', 'key')),
  510. 'comment_count': get_video_info('count', 'comment', expected_type=int),
  511. 'duration': (
  512. parse_duration(self._html_search_meta('video:duration', webpage, 'video duration', default=None))
  513. or get_video_info('duration')),
  514. 'webpage_url': url_or_none(url) or f'https://www.nicovideo.jp/watch/{video_id}',
  515. 'subtitles': self.extract_subtitles(video_id, api_data),
  516. }
  517. def _get_subtitles(self, video_id, api_data):
  518. comments_info = traverse_obj(api_data, ('comment', 'nvComment', {dict})) or {}
  519. if not comments_info.get('server'):
  520. return
  521. danmaku = traverse_obj(self._download_json(
  522. f'{comments_info["server"]}/v1/threads', video_id, data=json.dumps({
  523. 'additionals': {},
  524. 'params': comments_info.get('params'),
  525. 'threadKey': comments_info.get('threadKey'),
  526. }).encode(), fatal=False,
  527. headers={
  528. 'Referer': 'https://www.nicovideo.jp/',
  529. 'Origin': 'https://www.nicovideo.jp',
  530. 'Content-Type': 'text/plain;charset=UTF-8',
  531. 'x-client-os-type': 'others',
  532. 'x-frontend-id': '6',
  533. 'x-frontend-version': '0',
  534. },
  535. note='Downloading comments', errnote='Failed to download comments'),
  536. ('data', 'threads', ..., 'comments', ...))
  537. return {
  538. 'comments': [{
  539. 'ext': 'json',
  540. 'data': json.dumps(danmaku),
  541. }],
  542. }
  543. class NiconicoPlaylistBaseIE(InfoExtractor):
  544. _PAGE_SIZE = 100
  545. _API_HEADERS = {
  546. 'X-Frontend-ID': '6',
  547. 'X-Frontend-Version': '0',
  548. 'X-Niconico-Language': 'en-us',
  549. }
  550. def _call_api(self, list_id, resource, query):
  551. raise NotImplementedError('Must be implemented in subclasses')
  552. @staticmethod
  553. def _parse_owner(item):
  554. return {
  555. 'uploader': traverse_obj(item, ('owner', 'name')),
  556. 'uploader_id': traverse_obj(item, ('owner', 'id')),
  557. }
  558. def _fetch_page(self, list_id, page):
  559. page += 1
  560. resp = self._call_api(list_id, f'page {page}', {
  561. 'page': page,
  562. 'pageSize': self._PAGE_SIZE,
  563. })
  564. # this is needed to support both mylist and user
  565. for video in traverse_obj(resp, ('items', ..., ('video', None))) or []:
  566. video_id = video.get('id')
  567. if not video_id:
  568. # skip {"video": {"id": "blablabla", ...}}
  569. continue
  570. count = video.get('count') or {}
  571. get_count = lambda x: int_or_none(count.get(x))
  572. yield {
  573. '_type': 'url',
  574. 'id': video_id,
  575. 'title': video.get('title'),
  576. 'url': f'https://www.nicovideo.jp/watch/{video_id}',
  577. 'description': video.get('shortDescription'),
  578. 'duration': int_or_none(video.get('duration')),
  579. 'view_count': get_count('view'),
  580. 'comment_count': get_count('comment'),
  581. 'thumbnail': traverse_obj(video, ('thumbnail', ('nHdUrl', 'largeUrl', 'listingUrl', 'url'))),
  582. 'ie_key': NiconicoIE.ie_key(),
  583. **self._parse_owner(video),
  584. }
  585. def _entries(self, list_id):
  586. return OnDemandPagedList(functools.partial(self._fetch_page, list_id), self._PAGE_SIZE)
  587. class NiconicoPlaylistIE(NiconicoPlaylistBaseIE):
  588. IE_NAME = 'niconico:playlist'
  589. _VALID_URL = r'https?://(?:(?:www\.|sp\.)?nicovideo\.jp|nico\.ms)/(?:user/\d+/)?(?:my/)?mylist/(?:#/)?(?P<id>\d+)'
  590. _TESTS = [{
  591. 'url': 'http://www.nicovideo.jp/mylist/27411728',
  592. 'info_dict': {
  593. 'id': '27411728',
  594. 'title': 'AKB48のオールナイトニッポン',
  595. 'description': 'md5:d89694c5ded4b6c693dea2db6e41aa08',
  596. 'uploader': 'のっく',
  597. 'uploader_id': '805442',
  598. },
  599. 'playlist_mincount': 291,
  600. }, {
  601. 'url': 'https://www.nicovideo.jp/user/805442/mylist/27411728',
  602. 'only_matching': True,
  603. }, {
  604. 'url': 'https://www.nicovideo.jp/my/mylist/#/68048635',
  605. 'only_matching': True,
  606. }]
  607. def _call_api(self, list_id, resource, query):
  608. return self._download_json(
  609. f'https://nvapi.nicovideo.jp/v2/mylists/{list_id}', list_id,
  610. f'Downloading {resource}', query=query,
  611. headers=self._API_HEADERS)['data']['mylist']
  612. def _real_extract(self, url):
  613. list_id = self._match_id(url)
  614. mylist = self._call_api(list_id, 'list', {
  615. 'pageSize': 1,
  616. })
  617. return self.playlist_result(
  618. self._entries(list_id), list_id,
  619. mylist.get('name'), mylist.get('description'), **self._parse_owner(mylist))
  620. class NiconicoSeriesIE(InfoExtractor):
  621. IE_NAME = 'niconico:series'
  622. _VALID_URL = r'https?://(?:(?:www\.|sp\.)?nicovideo\.jp(?:/user/\d+)?|nico\.ms)/series/(?P<id>\d+)'
  623. _TESTS = [{
  624. 'url': 'https://www.nicovideo.jp/user/44113208/series/110226',
  625. 'info_dict': {
  626. 'id': '110226',
  627. 'title': 'ご立派ァ!のシリーズ',
  628. },
  629. 'playlist_mincount': 10,
  630. }, {
  631. 'url': 'https://www.nicovideo.jp/series/12312/',
  632. 'info_dict': {
  633. 'id': '12312',
  634. 'title': 'バトルスピリッツ お勧めカード紹介(調整中)',
  635. },
  636. 'playlist_mincount': 103,
  637. }, {
  638. 'url': 'https://nico.ms/series/203559',
  639. 'only_matching': True,
  640. }]
  641. def _real_extract(self, url):
  642. list_id = self._match_id(url)
  643. webpage = self._download_webpage(url, list_id)
  644. title = self._search_regex(
  645. (r'<title>「(.+)(全',
  646. r'<div class="TwitterShareButton"\s+data-text="(.+)\s+https:'),
  647. webpage, 'title', fatal=False)
  648. if title:
  649. title = unescapeHTML(title)
  650. json_data = next(self._yield_json_ld(webpage, None, fatal=False))
  651. return self.playlist_from_matches(
  652. traverse_obj(json_data, ('itemListElement', ..., 'url')), list_id, title, ie=NiconicoIE)
  653. class NiconicoHistoryIE(NiconicoPlaylistBaseIE):
  654. IE_NAME = 'niconico:history'
  655. IE_DESC = 'NicoNico user history or likes. Requires cookies.'
  656. _VALID_URL = r'https?://(?:www\.|sp\.)?nicovideo\.jp/my/(?P<id>history(?:/like)?)'
  657. _TESTS = [{
  658. 'note': 'PC page, with /video',
  659. 'url': 'https://www.nicovideo.jp/my/history/video',
  660. 'only_matching': True,
  661. }, {
  662. 'note': 'PC page, without /video',
  663. 'url': 'https://www.nicovideo.jp/my/history',
  664. 'only_matching': True,
  665. }, {
  666. 'note': 'mobile page, with /video',
  667. 'url': 'https://sp.nicovideo.jp/my/history/video',
  668. 'only_matching': True,
  669. }, {
  670. 'note': 'mobile page, without /video',
  671. 'url': 'https://sp.nicovideo.jp/my/history',
  672. 'only_matching': True,
  673. }, {
  674. 'note': 'PC page',
  675. 'url': 'https://www.nicovideo.jp/my/history/like',
  676. 'only_matching': True,
  677. }, {
  678. 'note': 'Mobile page',
  679. 'url': 'https://sp.nicovideo.jp/my/history/like',
  680. 'only_matching': True,
  681. }]
  682. def _call_api(self, list_id, resource, query):
  683. path = 'likes' if list_id == 'history/like' else 'watch/history'
  684. return self._download_json(
  685. f'https://nvapi.nicovideo.jp/v1/users/me/{path}', list_id,
  686. f'Downloading {resource}', query=query, headers=self._API_HEADERS)['data']
  687. def _real_extract(self, url):
  688. list_id = self._match_id(url)
  689. try:
  690. mylist = self._call_api(list_id, 'list', {'pageSize': 1})
  691. except ExtractorError as e:
  692. if isinstance(e.cause, HTTPError) and e.cause.status == 401:
  693. self.raise_login_required('You have to be logged in to get your history')
  694. raise
  695. return self.playlist_result(self._entries(list_id), list_id, **self._parse_owner(mylist))
  696. class NicovideoSearchBaseIE(InfoExtractor):
  697. _SEARCH_TYPE = 'search'
  698. def _entries(self, url, item_id, query=None, note='Downloading page %(page)s'):
  699. query = query or {}
  700. pages = [query['page']] if 'page' in query else itertools.count(1)
  701. for page_num in pages:
  702. query['page'] = str(page_num)
  703. webpage = self._download_webpage(url, item_id, query=query, note=note % {'page': page_num})
  704. results = re.findall(r'(?<=data-video-id=)["\']?(?P<videoid>.*?)(?=["\'])', webpage)
  705. for item in results:
  706. yield self.url_result(f'https://www.nicovideo.jp/watch/{item}', 'Niconico', item)
  707. if not results:
  708. break
  709. def _search_results(self, query):
  710. return self._entries(
  711. self._proto_relative_url(f'//www.nicovideo.jp/{self._SEARCH_TYPE}/{query}'), query)
  712. class NicovideoSearchIE(NicovideoSearchBaseIE, SearchInfoExtractor):
  713. IE_DESC = 'Nico video search'
  714. IE_NAME = 'nicovideo:search'
  715. _SEARCH_KEY = 'nicosearch'
  716. class NicovideoSearchURLIE(NicovideoSearchBaseIE):
  717. IE_NAME = f'{NicovideoSearchIE.IE_NAME}_url'
  718. IE_DESC = 'Nico video search URLs'
  719. _VALID_URL = r'https?://(?:www\.)?nicovideo\.jp/search/(?P<id>[^?#&]+)?'
  720. _TESTS = [{
  721. 'url': 'http://www.nicovideo.jp/search/sm9',
  722. 'info_dict': {
  723. 'id': 'sm9',
  724. 'title': 'sm9',
  725. },
  726. 'playlist_mincount': 40,
  727. }, {
  728. 'url': 'https://www.nicovideo.jp/search/sm9?sort=h&order=d&end=2020-12-31&start=2020-01-01',
  729. 'info_dict': {
  730. 'id': 'sm9',
  731. 'title': 'sm9',
  732. },
  733. 'playlist_count': 31,
  734. }]
  735. def _real_extract(self, url):
  736. query = self._match_id(url)
  737. return self.playlist_result(self._entries(url, query), query, query)
  738. class NicovideoSearchDateIE(NicovideoSearchBaseIE, SearchInfoExtractor):
  739. IE_DESC = 'Nico video search, newest first'
  740. IE_NAME = f'{NicovideoSearchIE.IE_NAME}:date'
  741. _SEARCH_KEY = 'nicosearchdate'
  742. _TESTS = [{
  743. 'url': 'nicosearchdateall:a',
  744. 'info_dict': {
  745. 'id': 'a',
  746. 'title': 'a',
  747. },
  748. 'playlist_mincount': 1610,
  749. }]
  750. _START_DATE = dt.date(2007, 1, 1)
  751. _RESULTS_PER_PAGE = 32
  752. _MAX_PAGES = 50
  753. def _entries(self, url, item_id, start_date=None, end_date=None):
  754. start_date, end_date = start_date or self._START_DATE, end_date or dt.datetime.now().date()
  755. # If the last page has a full page of videos, we need to break down the query interval further
  756. last_page_len = len(list(self._get_entries_for_date(
  757. url, item_id, start_date, end_date, self._MAX_PAGES,
  758. note=f'Checking number of videos from {start_date} to {end_date}')))
  759. if (last_page_len == self._RESULTS_PER_PAGE and start_date != end_date):
  760. midpoint = start_date + ((end_date - start_date) // 2)
  761. yield from self._entries(url, item_id, midpoint, end_date)
  762. yield from self._entries(url, item_id, start_date, midpoint)
  763. else:
  764. self.to_screen(f'{item_id}: Downloading results from {start_date} to {end_date}')
  765. yield from self._get_entries_for_date(
  766. url, item_id, start_date, end_date, note=' Downloading page %(page)s')
  767. def _get_entries_for_date(self, url, item_id, start_date, end_date=None, page_num=None, note=None):
  768. query = {
  769. 'start': str(start_date),
  770. 'end': str(end_date or start_date),
  771. 'sort': 'f',
  772. 'order': 'd',
  773. }
  774. if page_num:
  775. query['page'] = str(page_num)
  776. yield from super()._entries(url, item_id, query=query, note=note)
  777. class NicovideoTagURLIE(NicovideoSearchBaseIE):
  778. IE_NAME = 'niconico:tag'
  779. IE_DESC = 'NicoNico video tag URLs'
  780. _SEARCH_TYPE = 'tag'
  781. _VALID_URL = r'https?://(?:www\.)?nicovideo\.jp/tag/(?P<id>[^?#&]+)?'
  782. _TESTS = [{
  783. 'url': 'https://www.nicovideo.jp/tag/ドキュメンタリー淫夢',
  784. 'info_dict': {
  785. 'id': 'ドキュメンタリー淫夢',
  786. 'title': 'ドキュメンタリー淫夢',
  787. },
  788. 'playlist_mincount': 400,
  789. }]
  790. def _real_extract(self, url):
  791. query = self._match_id(url)
  792. return self.playlist_result(self._entries(url, query), query, query)
  793. class NiconicoUserIE(InfoExtractor):
  794. _VALID_URL = r'https?://(?:www\.)?nicovideo\.jp/user/(?P<id>\d+)/?(?:$|[#?])'
  795. _TEST = {
  796. 'url': 'https://www.nicovideo.jp/user/419948',
  797. 'info_dict': {
  798. 'id': '419948',
  799. },
  800. 'playlist_mincount': 101,
  801. }
  802. _API_URL = 'https://nvapi.nicovideo.jp/v1/users/%s/videos?sortKey=registeredAt&sortOrder=desc&pageSize=%s&page=%s'
  803. _PAGE_SIZE = 100
  804. _API_HEADERS = {
  805. 'X-Frontend-ID': '6',
  806. 'X-Frontend-Version': '0',
  807. }
  808. def _entries(self, list_id):
  809. total_count = 1
  810. count = page_num = 0
  811. while count < total_count:
  812. json_parsed = self._download_json(
  813. self._API_URL % (list_id, self._PAGE_SIZE, page_num + 1), list_id,
  814. headers=self._API_HEADERS,
  815. note='Downloading JSON metadata%s' % (f' page {page_num}' if page_num else ''))
  816. if not page_num:
  817. total_count = int_or_none(json_parsed['data'].get('totalCount'))
  818. for entry in json_parsed['data']['items']:
  819. count += 1
  820. yield self.url_result('https://www.nicovideo.jp/watch/{}'.format(entry['id']))
  821. page_num += 1
  822. def _real_extract(self, url):
  823. list_id = self._match_id(url)
  824. return self.playlist_result(self._entries(list_id), list_id, ie=NiconicoIE.ie_key())
  825. class NiconicoLiveIE(InfoExtractor):
  826. IE_NAME = 'niconico:live'
  827. IE_DESC = 'ニコニコ生放送'
  828. _VALID_URL = r'https?://(?:sp\.)?live2?\.nicovideo\.jp/(?:watch|gate)/(?P<id>lv\d+)'
  829. _TESTS = [{
  830. 'note': 'this test case includes invisible characters for title, pasting them as-is',
  831. 'url': 'https://live.nicovideo.jp/watch/lv339533123',
  832. 'info_dict': {
  833. 'id': 'lv339533123',
  834. 'title': '激辛ペヤング食べます\u202a( ;ᯅ; )\u202c(歌枠オーディション参加中)',
  835. 'view_count': 1526,
  836. 'comment_count': 1772,
  837. 'description': '初めましてもかって言います❕\nのんびり自由に適当に暮らしてます',
  838. 'uploader': 'もか',
  839. 'channel': 'ゲストさんのコミュニティ',
  840. 'channel_id': 'co5776900',
  841. 'channel_url': 'https://com.nicovideo.jp/community/co5776900',
  842. 'timestamp': 1670677328,
  843. 'is_live': True,
  844. },
  845. 'skip': 'livestream',
  846. }, {
  847. 'url': 'https://live2.nicovideo.jp/watch/lv339533123',
  848. 'only_matching': True,
  849. }, {
  850. 'url': 'https://sp.live.nicovideo.jp/watch/lv339533123',
  851. 'only_matching': True,
  852. }, {
  853. 'url': 'https://sp.live2.nicovideo.jp/watch/lv339533123',
  854. 'only_matching': True,
  855. }]
  856. _KNOWN_LATENCY = ('high', 'low')
  857. def _real_extract(self, url):
  858. video_id = self._match_id(url)
  859. webpage, urlh = self._download_webpage_handle(f'https://live.nicovideo.jp/watch/{video_id}', video_id)
  860. embedded_data = self._parse_json(unescapeHTML(self._search_regex(
  861. r'<script\s+id="embedded-data"\s*data-props="(.+?)"', webpage, 'embedded data')), video_id)
  862. ws_url = traverse_obj(embedded_data, ('site', 'relive', 'webSocketUrl'))
  863. if not ws_url:
  864. raise ExtractorError('The live hasn\'t started yet or already ended.', expected=True)
  865. ws_url = update_url_query(ws_url, {
  866. 'frontend_id': traverse_obj(embedded_data, ('site', 'frontendId')) or '9',
  867. })
  868. hostname = remove_start(urllib.parse.urlparse(urlh.url).hostname, 'sp.')
  869. latency = try_get(self._configuration_arg('latency'), lambda x: x[0])
  870. if latency not in self._KNOWN_LATENCY:
  871. latency = 'high'
  872. ws = self._request_webpage(
  873. Request(ws_url, headers={'Origin': f'https://{hostname}'}),
  874. video_id=video_id, note='Connecting to WebSocket server')
  875. self.write_debug('[debug] Sending HLS server request')
  876. ws.send(json.dumps({
  877. 'type': 'startWatching',
  878. 'data': {
  879. 'stream': {
  880. 'quality': 'abr',
  881. 'protocol': 'hls+fmp4',
  882. 'latency': latency,
  883. 'chasePlay': False,
  884. },
  885. 'room': {
  886. 'protocol': 'webSocket',
  887. 'commentable': True,
  888. },
  889. 'reconnect': False,
  890. },
  891. }))
  892. while True:
  893. recv = ws.recv()
  894. if not recv:
  895. continue
  896. data = json.loads(recv)
  897. if not isinstance(data, dict):
  898. continue
  899. if data.get('type') == 'stream':
  900. m3u8_url = data['data']['uri']
  901. qualities = data['data']['availableQualities']
  902. break
  903. elif data.get('type') == 'disconnect':
  904. self.write_debug(recv)
  905. raise ExtractorError('Disconnected at middle of extraction')
  906. elif data.get('type') == 'error':
  907. self.write_debug(recv)
  908. message = traverse_obj(data, ('body', 'code')) or recv
  909. raise ExtractorError(message)
  910. elif self.get_param('verbose', False):
  911. if len(recv) > 100:
  912. recv = recv[:100] + '...'
  913. self.write_debug(f'Server said: {recv}')
  914. title = traverse_obj(embedded_data, ('program', 'title')) or self._html_search_meta(
  915. ('og:title', 'twitter:title'), webpage, 'live title', fatal=False)
  916. raw_thumbs = traverse_obj(embedded_data, ('program', 'thumbnail')) or {}
  917. thumbnails = []
  918. for name, value in raw_thumbs.items():
  919. if not isinstance(value, dict):
  920. thumbnails.append({
  921. 'id': name,
  922. 'url': value,
  923. **parse_resolution(value, lenient=True),
  924. })
  925. continue
  926. for k, img_url in value.items():
  927. res = parse_resolution(k, lenient=True) or parse_resolution(img_url, lenient=True)
  928. width, height = res.get('width'), res.get('height')
  929. thumbnails.append({
  930. 'id': f'{name}_{width}x{height}',
  931. 'url': img_url,
  932. **res,
  933. })
  934. formats = self._extract_m3u8_formats(m3u8_url, video_id, ext='mp4', live=True)
  935. for fmt, q in zip(formats, reversed(qualities[1:])):
  936. fmt.update({
  937. 'format_id': q,
  938. 'protocol': 'niconico_live',
  939. 'ws': ws,
  940. 'video_id': video_id,
  941. 'live_latency': latency,
  942. 'origin': hostname,
  943. })
  944. return {
  945. 'id': video_id,
  946. 'title': title,
  947. **traverse_obj(embedded_data, {
  948. 'view_count': ('program', 'statistics', 'watchCount'),
  949. 'comment_count': ('program', 'statistics', 'commentCount'),
  950. 'uploader': ('program', 'supplier', 'name'),
  951. 'channel': ('socialGroup', 'name'),
  952. 'channel_id': ('socialGroup', 'id'),
  953. 'channel_url': ('socialGroup', 'socialGroupPageUrl'),
  954. }),
  955. 'description': clean_html(traverse_obj(embedded_data, ('program', 'description'))),
  956. 'timestamp': int_or_none(traverse_obj(embedded_data, ('program', 'openTime'))),
  957. 'is_live': True,
  958. 'thumbnails': thumbnails,
  959. 'formats': formats,
  960. }