abematv.py 19 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476
  1. import io
  2. import json
  3. import time
  4. import hashlib
  5. import hmac
  6. import re
  7. import struct
  8. from base64 import urlsafe_b64encode
  9. from binascii import unhexlify
  10. from .common import InfoExtractor
  11. from ..aes import aes_ecb_decrypt
  12. from ..compat import (
  13. compat_urllib_response,
  14. compat_urllib_parse_urlparse,
  15. compat_urllib_request,
  16. )
  17. from ..utils import (
  18. ExtractorError,
  19. decode_base,
  20. int_or_none,
  21. random_uuidv4,
  22. request_to_url,
  23. time_seconds,
  24. update_url_query,
  25. traverse_obj,
  26. intlist_to_bytes,
  27. bytes_to_intlist,
  28. urljoin,
  29. )
  30. # NOTE: network handler related code is temporary thing until network stack overhaul PRs are merged (#2861/#2862)
  31. def add_opener(ydl, handler):
  32. ''' Add a handler for opening URLs, like _download_webpage '''
  33. # https://github.com/python/cpython/blob/main/Lib/urllib/request.py#L426
  34. # https://github.com/python/cpython/blob/main/Lib/urllib/request.py#L605
  35. assert isinstance(ydl._opener, compat_urllib_request.OpenerDirector)
  36. ydl._opener.add_handler(handler)
  37. def remove_opener(ydl, handler):
  38. '''
  39. Remove handler(s) for opening URLs
  40. @param handler Either handler object itself or handler type.
  41. Specifying handler type will remove all handler which isinstance returns True.
  42. '''
  43. # https://github.com/python/cpython/blob/main/Lib/urllib/request.py#L426
  44. # https://github.com/python/cpython/blob/main/Lib/urllib/request.py#L605
  45. opener = ydl._opener
  46. assert isinstance(ydl._opener, compat_urllib_request.OpenerDirector)
  47. if isinstance(handler, (type, tuple)):
  48. find_cp = lambda x: isinstance(x, handler)
  49. else:
  50. find_cp = lambda x: x is handler
  51. removed = []
  52. for meth in dir(handler):
  53. if meth in ["redirect_request", "do_open", "proxy_open"]:
  54. # oops, coincidental match
  55. continue
  56. i = meth.find("_")
  57. protocol = meth[:i]
  58. condition = meth[i + 1:]
  59. if condition.startswith("error"):
  60. j = condition.find("_") + i + 1
  61. kind = meth[j + 1:]
  62. try:
  63. kind = int(kind)
  64. except ValueError:
  65. pass
  66. lookup = opener.handle_error.get(protocol, {})
  67. opener.handle_error[protocol] = lookup
  68. elif condition == "open":
  69. kind = protocol
  70. lookup = opener.handle_open
  71. elif condition == "response":
  72. kind = protocol
  73. lookup = opener.process_response
  74. elif condition == "request":
  75. kind = protocol
  76. lookup = opener.process_request
  77. else:
  78. continue
  79. handlers = lookup.setdefault(kind, [])
  80. if handlers:
  81. handlers[:] = [x for x in handlers if not find_cp(x)]
  82. removed.append(x for x in handlers if find_cp(x))
  83. if removed:
  84. for x in opener.handlers:
  85. if find_cp(x):
  86. x.add_parent(None)
  87. opener.handlers[:] = [x for x in opener.handlers if not find_cp(x)]
  88. class AbemaLicenseHandler(compat_urllib_request.BaseHandler):
  89. handler_order = 499
  90. STRTABLE = '123456789ABCDEFGHJKLMNPQRSTUVWXYZabcdefghijkmnopqrstuvwxyz'
  91. HKEY = b'3AF0298C219469522A313570E8583005A642E73EDD58E3EA2FB7339D3DF1597E'
  92. def __init__(self, ie: 'AbemaTVIE'):
  93. # the protcol that this should really handle is 'abematv-license://'
  94. # abematv_license_open is just a placeholder for development purposes
  95. # ref. https://github.com/python/cpython/blob/f4c03484da59049eb62a9bf7777b963e2267d187/Lib/urllib/request.py#L510
  96. setattr(self, 'abematv-license_open', getattr(self, 'abematv_license_open'))
  97. self.ie = ie
  98. def _get_videokey_from_ticket(self, ticket):
  99. to_show = self.ie._downloader.params.get('verbose', False)
  100. media_token = self.ie._get_media_token(to_show=to_show)
  101. license_response = self.ie._download_json(
  102. 'https://license.abema.io/abematv-hls', None, note='Requesting playback license' if to_show else False,
  103. query={'t': media_token},
  104. data=json.dumps({
  105. 'kv': 'a',
  106. 'lt': ticket
  107. }).encode('utf-8'),
  108. headers={
  109. 'Content-Type': 'application/json',
  110. })
  111. res = decode_base(license_response['k'], self.STRTABLE)
  112. encvideokey = bytes_to_intlist(struct.pack('>QQ', res >> 64, res & 0xffffffffffffffff))
  113. h = hmac.new(
  114. unhexlify(self.HKEY),
  115. (license_response['cid'] + self.ie._DEVICE_ID).encode('utf-8'),
  116. digestmod=hashlib.sha256)
  117. enckey = bytes_to_intlist(h.digest())
  118. return intlist_to_bytes(aes_ecb_decrypt(encvideokey, enckey))
  119. def abematv_license_open(self, url):
  120. url = request_to_url(url)
  121. ticket = compat_urllib_parse_urlparse(url).netloc
  122. response_data = self._get_videokey_from_ticket(ticket)
  123. return compat_urllib_response.addinfourl(io.BytesIO(response_data), headers={
  124. 'Content-Length': len(response_data),
  125. }, url=url, code=200)
  126. class AbemaTVBaseIE(InfoExtractor):
  127. def _extract_breadcrumb_list(self, webpage, video_id):
  128. for jld in re.finditer(
  129. r'(?is)</span></li></ul><script[^>]+type=(["\']?)application/ld\+json\1[^>]*>(?P<json_ld>.+?)</script>',
  130. webpage):
  131. jsonld = self._parse_json(jld.group('json_ld'), video_id, fatal=False)
  132. if jsonld:
  133. if jsonld.get('@type') != 'BreadcrumbList':
  134. continue
  135. trav = traverse_obj(jsonld, ('itemListElement', ..., 'name'))
  136. if trav:
  137. return trav
  138. return []
  139. class AbemaTVIE(AbemaTVBaseIE):
  140. _VALID_URL = r'https?://abema\.tv/(?P<type>now-on-air|video/episode|channels/.+?/slots)/(?P<id>[^?/]+)'
  141. _NETRC_MACHINE = 'abematv'
  142. _TESTS = [{
  143. 'url': 'https://abema.tv/video/episode/194-25_s2_p1',
  144. 'info_dict': {
  145. 'id': '194-25_s2_p1',
  146. 'title': '第1話 「チーズケーキ」 「モーニング再び」',
  147. 'series': '異世界食堂2',
  148. 'series_number': 2,
  149. 'episode': '第1話 「チーズケーキ」 「モーニング再び」',
  150. 'episode_number': 1,
  151. },
  152. 'skip': 'expired',
  153. }, {
  154. 'url': 'https://abema.tv/channels/anime-live2/slots/E8tvAnMJ7a9a5d',
  155. 'info_dict': {
  156. 'id': 'E8tvAnMJ7a9a5d',
  157. 'title': 'ゆるキャン△ SEASON2 全話一挙【無料ビデオ72時間】',
  158. 'series': 'ゆるキャン△ SEASON2',
  159. 'episode': 'ゆるキャン△ SEASON2 全話一挙【無料ビデオ72時間】',
  160. 'series_number': 2,
  161. 'episode_number': 1,
  162. 'description': 'md5:9c5a3172ae763278f9303922f0ea5b17',
  163. },
  164. 'skip': 'expired',
  165. }, {
  166. 'url': 'https://abema.tv/video/episode/87-877_s1282_p31047',
  167. 'info_dict': {
  168. 'id': 'E8tvAnMJ7a9a5d',
  169. 'title': '第5話『光射す』',
  170. 'description': 'md5:56d4fc1b4f7769ded5f923c55bb4695d',
  171. 'thumbnail': r're:https://hayabusa\.io/.+',
  172. 'series': '相棒',
  173. 'episode': '第5話『光射す』',
  174. },
  175. 'skip': 'expired',
  176. }, {
  177. 'url': 'https://abema.tv/now-on-air/abema-anime',
  178. 'info_dict': {
  179. 'id': 'abema-anime',
  180. # this varies
  181. # 'title': '女子高生の無駄づかい 全話一挙【無料ビデオ72時間】',
  182. 'description': 'md5:55f2e61f46a17e9230802d7bcc913d5f',
  183. 'is_live': True,
  184. },
  185. 'skip': 'Not supported until yt-dlp implements native live downloader OR AbemaTV can start a local HTTP server',
  186. }]
  187. _USERTOKEN = None
  188. _DEVICE_ID = None
  189. _TIMETABLE = None
  190. _MEDIATOKEN = None
  191. _SECRETKEY = b'v+Gjs=25Aw5erR!J8ZuvRrCx*rGswhB&qdHd_SYerEWdU&a?3DzN9BRbp5KwY4hEmcj5#fykMjJ=AuWz5GSMY-d@H7DMEh3M@9n2G552Us$$k9cD=3TxwWe86!x#Zyhe'
  192. def _generate_aks(self, deviceid):
  193. deviceid = deviceid.encode('utf-8')
  194. # add 1 hour and then drop minute and secs
  195. ts_1hour = int((time_seconds(hours=9) // 3600 + 1) * 3600)
  196. time_struct = time.gmtime(ts_1hour)
  197. ts_1hour_str = str(ts_1hour).encode('utf-8')
  198. tmp = None
  199. def mix_once(nonce):
  200. nonlocal tmp
  201. h = hmac.new(self._SECRETKEY, digestmod=hashlib.sha256)
  202. h.update(nonce)
  203. tmp = h.digest()
  204. def mix_tmp(count):
  205. nonlocal tmp
  206. for i in range(count):
  207. mix_once(tmp)
  208. def mix_twist(nonce):
  209. nonlocal tmp
  210. mix_once(urlsafe_b64encode(tmp).rstrip(b'=') + nonce)
  211. mix_once(self._SECRETKEY)
  212. mix_tmp(time_struct.tm_mon)
  213. mix_twist(deviceid)
  214. mix_tmp(time_struct.tm_mday % 5)
  215. mix_twist(ts_1hour_str)
  216. mix_tmp(time_struct.tm_hour % 5)
  217. return urlsafe_b64encode(tmp).rstrip(b'=').decode('utf-8')
  218. def _get_device_token(self):
  219. if self._USERTOKEN:
  220. return self._USERTOKEN
  221. self._DEVICE_ID = random_uuidv4()
  222. aks = self._generate_aks(self._DEVICE_ID)
  223. user_data = self._download_json(
  224. 'https://api.abema.io/v1/users', None, note='Authorizing',
  225. data=json.dumps({
  226. 'deviceId': self._DEVICE_ID,
  227. 'applicationKeySecret': aks,
  228. }).encode('utf-8'),
  229. headers={
  230. 'Content-Type': 'application/json',
  231. })
  232. self._USERTOKEN = user_data['token']
  233. # don't allow adding it 2 times or more, though it's guarded
  234. remove_opener(self._downloader, AbemaLicenseHandler)
  235. add_opener(self._downloader, AbemaLicenseHandler(self))
  236. return self._USERTOKEN
  237. def _get_media_token(self, invalidate=False, to_show=True):
  238. if not invalidate and self._MEDIATOKEN:
  239. return self._MEDIATOKEN
  240. self._MEDIATOKEN = self._download_json(
  241. 'https://api.abema.io/v1/media/token', None, note='Fetching media token' if to_show else False,
  242. query={
  243. 'osName': 'android',
  244. 'osVersion': '6.0.1',
  245. 'osLang': 'ja_JP',
  246. 'osTimezone': 'Asia/Tokyo',
  247. 'appId': 'tv.abema',
  248. 'appVersion': '3.27.1'
  249. }, headers={
  250. 'Authorization': 'bearer ' + self._get_device_token()
  251. })['token']
  252. return self._MEDIATOKEN
  253. def _perform_login(self, username, password):
  254. if '@' in username: # don't strictly check if it's email address or not
  255. ep, method = 'user/email', 'email'
  256. else:
  257. ep, method = 'oneTimePassword', 'userId'
  258. login_response = self._download_json(
  259. f'https://api.abema.io/v1/auth/{ep}', None, note='Logging in',
  260. data=json.dumps({
  261. method: username,
  262. 'password': password
  263. }).encode('utf-8'), headers={
  264. 'Authorization': 'bearer ' + self._get_device_token(),
  265. 'Origin': 'https://abema.tv',
  266. 'Referer': 'https://abema.tv/',
  267. 'Content-Type': 'application/json',
  268. })
  269. self._USERTOKEN = login_response['token']
  270. self._get_media_token(True)
  271. def _real_extract(self, url):
  272. # starting download using infojson from this extractor is undefined behavior,
  273. # and never be fixed in the future; you must trigger downloads by directly specifing URL.
  274. # (unless there's a way to hook before downloading by extractor)
  275. video_id, video_type = self._match_valid_url(url).group('id', 'type')
  276. headers = {
  277. 'Authorization': 'Bearer ' + self._get_device_token(),
  278. }
  279. video_type = video_type.split('/')[-1]
  280. webpage = self._download_webpage(url, video_id)
  281. canonical_url = self._search_regex(
  282. r'<link\s+rel="canonical"\s*href="(.+?)"', webpage, 'canonical URL',
  283. default=url)
  284. info = self._search_json_ld(webpage, video_id, default={})
  285. title = self._search_regex(
  286. r'<span\s*class=".+?EpisodeTitleBlock__title">(.+?)</span>', webpage, 'title', default=None)
  287. if not title:
  288. jsonld = None
  289. for jld in re.finditer(
  290. r'(?is)<span\s*class="com-m-Thumbnail__image">(?:</span>)?<script[^>]+type=(["\']?)application/ld\+json\1[^>]*>(?P<json_ld>.+?)</script>',
  291. webpage):
  292. jsonld = self._parse_json(jld.group('json_ld'), video_id, fatal=False)
  293. if jsonld:
  294. break
  295. if jsonld:
  296. title = jsonld.get('caption')
  297. if not title and video_type == 'now-on-air':
  298. if not self._TIMETABLE:
  299. # cache the timetable because it goes to 5MiB in size (!!)
  300. self._TIMETABLE = self._download_json(
  301. 'https://api.abema.io/v1/timetable/dataSet?debug=false', video_id,
  302. headers=headers)
  303. now = time_seconds(hours=9)
  304. for slot in self._TIMETABLE.get('slots', []):
  305. if slot.get('channelId') != video_id:
  306. continue
  307. if slot['startAt'] <= now and now < slot['endAt']:
  308. title = slot['title']
  309. break
  310. # read breadcrumb on top of page
  311. breadcrumb = self._extract_breadcrumb_list(webpage, video_id)
  312. if breadcrumb:
  313. # breadcrumb list translates to: (example is 1st test for this IE)
  314. # Home > Anime (genre) > Isekai Shokudo 2 (series name) > Episode 1 "Cheese cakes" "Morning again" (episode title)
  315. # hence this works
  316. info['series'] = breadcrumb[-2]
  317. info['episode'] = breadcrumb[-1]
  318. if not title:
  319. title = info['episode']
  320. description = self._html_search_regex(
  321. (r'<p\s+class="com-video-EpisodeDetailsBlock__content"><span\s+class=".+?">(.+?)</span></p><div',
  322. r'<span\s+class=".+?SlotSummary.+?">(.+?)</span></div><div',),
  323. webpage, 'description', default=None, group=1)
  324. if not description:
  325. og_desc = self._html_search_meta(
  326. ('description', 'og:description', 'twitter:description'), webpage)
  327. if og_desc:
  328. description = re.sub(r'''(?sx)
  329. ^(.+?)(?:
  330. アニメの動画を無料で見るならABEMA!| # anime
  331. 等、.+ # applies for most of categories
  332. )?
  333. ''', r'\1', og_desc)
  334. # canonical URL may contain series and episode number
  335. mobj = re.search(r's(\d+)_p(\d+)$', canonical_url)
  336. if mobj:
  337. seri = int_or_none(mobj.group(1), default=float('inf'))
  338. epis = int_or_none(mobj.group(2), default=float('inf'))
  339. info['series_number'] = seri if seri < 100 else None
  340. # some anime like Detective Conan (though not available in AbemaTV)
  341. # has more than 1000 episodes (1026 as of 2021/11/15)
  342. info['episode_number'] = epis if epis < 2000 else None
  343. is_live, m3u8_url = False, None
  344. if video_type == 'now-on-air':
  345. is_live = True
  346. channel_url = 'https://api.abema.io/v1/channels'
  347. if video_id == 'news-global':
  348. channel_url = update_url_query(channel_url, {'division': '1'})
  349. onair_channels = self._download_json(channel_url, video_id)
  350. for ch in onair_channels['channels']:
  351. if video_id == ch['id']:
  352. m3u8_url = ch['playback']['hls']
  353. break
  354. else:
  355. raise ExtractorError(f'Cannot find on-air {video_id} channel.', expected=True)
  356. elif video_type == 'episode':
  357. api_response = self._download_json(
  358. f'https://api.abema.io/v1/video/programs/{video_id}', video_id,
  359. note='Checking playability',
  360. headers=headers)
  361. ondemand_types = traverse_obj(api_response, ('terms', ..., 'onDemandType'), default=[])
  362. if 3 not in ondemand_types:
  363. # cannot acquire decryption key for these streams
  364. self.report_warning('This is a premium-only stream')
  365. m3u8_url = f'https://vod-abematv.akamaized.net/program/{video_id}/playlist.m3u8'
  366. elif video_type == 'slots':
  367. api_response = self._download_json(
  368. f'https://api.abema.io/v1/media/slots/{video_id}', video_id,
  369. note='Checking playability',
  370. headers=headers)
  371. if not traverse_obj(api_response, ('slot', 'flags', 'timeshiftFree'), default=False):
  372. self.report_warning('This is a premium-only stream')
  373. m3u8_url = f'https://vod-abematv.akamaized.net/slot/{video_id}/playlist.m3u8'
  374. else:
  375. raise ExtractorError('Unreachable')
  376. if is_live:
  377. self.report_warning("This is a livestream; yt-dlp doesn't support downloading natively, but FFmpeg cannot handle m3u8 manifests from AbemaTV")
  378. self.report_warning('Please consider using Streamlink to download these streams (https://github.com/streamlink/streamlink)')
  379. formats = self._extract_m3u8_formats(
  380. m3u8_url, video_id, ext='mp4', live=is_live)
  381. info.update({
  382. 'id': video_id,
  383. 'title': title,
  384. 'description': description,
  385. 'formats': formats,
  386. 'is_live': is_live,
  387. })
  388. return info
  389. class AbemaTVTitleIE(AbemaTVBaseIE):
  390. _VALID_URL = r'https?://abema\.tv/video/title/(?P<id>[^?/]+)'
  391. _TESTS = [{
  392. 'url': 'https://abema.tv/video/title/90-1597',
  393. 'info_dict': {
  394. 'id': '90-1597',
  395. 'title': 'シャッフルアイランド',
  396. },
  397. 'playlist_mincount': 2,
  398. }, {
  399. 'url': 'https://abema.tv/video/title/193-132',
  400. 'info_dict': {
  401. 'id': '193-132',
  402. 'title': '真心が届く~僕とスターのオフィス・ラブ!?~',
  403. },
  404. 'playlist_mincount': 16,
  405. }]
  406. def _real_extract(self, url):
  407. video_id = self._match_id(url)
  408. webpage = self._download_webpage(url, video_id)
  409. playlist_title, breadcrumb = None, self._extract_breadcrumb_list(webpage, video_id)
  410. if breadcrumb:
  411. playlist_title = breadcrumb[-1]
  412. playlist = [
  413. self.url_result(urljoin('https://abema.tv/', mobj.group(1)))
  414. for mobj in re.finditer(r'<li\s*class=".+?EpisodeList.+?"><a\s*href="(/[^"]+?)"', webpage)]
  415. return self.playlist_result(playlist, playlist_title=playlist_title, playlist_id=video_id)