iwara.py 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297
  1. import functools
  2. import hashlib
  3. import json
  4. import time
  5. import urllib.parse
  6. from .common import InfoExtractor
  7. from ..utils import (
  8. ExtractorError,
  9. OnDemandPagedList,
  10. int_or_none,
  11. jwt_decode_hs256,
  12. mimetype2ext,
  13. qualities,
  14. traverse_obj,
  15. try_call,
  16. unified_timestamp,
  17. )
  18. class IwaraBaseIE(InfoExtractor):
  19. _NETRC_MACHINE = 'iwara'
  20. _USERTOKEN = None
  21. _MEDIATOKEN = None
  22. def _is_token_expired(self, token, token_type):
  23. # User token TTL == ~3 weeks, Media token TTL == ~1 hour
  24. if (try_call(lambda: jwt_decode_hs256(token)['exp']) or 0) <= int(time.time() - 120):
  25. self.to_screen(f'{token_type} token has expired')
  26. return True
  27. def _get_user_token(self):
  28. username, password = self._get_login_info()
  29. if not username or not password:
  30. return
  31. user_token = IwaraBaseIE._USERTOKEN or self.cache.load(self._NETRC_MACHINE, username)
  32. if not user_token or self._is_token_expired(user_token, 'User'):
  33. response = self._download_json(
  34. 'https://api.iwara.tv/user/login', None, note='Logging in',
  35. headers={'Content-Type': 'application/json'}, data=json.dumps({
  36. 'email': username,
  37. 'password': password,
  38. }).encode(), expected_status=lambda x: True)
  39. user_token = traverse_obj(response, ('token', {str}))
  40. if not user_token:
  41. error = traverse_obj(response, ('message', {str}))
  42. if 'invalidLogin' in error:
  43. raise ExtractorError('Invalid login credentials', expected=True)
  44. else:
  45. raise ExtractorError(f'Iwara API said: {error or "nothing"}')
  46. self.cache.store(self._NETRC_MACHINE, username, user_token)
  47. IwaraBaseIE._USERTOKEN = user_token
  48. def _get_media_token(self):
  49. self._get_user_token()
  50. if not IwaraBaseIE._USERTOKEN:
  51. return # user has not passed credentials
  52. if not IwaraBaseIE._MEDIATOKEN or self._is_token_expired(IwaraBaseIE._MEDIATOKEN, 'Media'):
  53. IwaraBaseIE._MEDIATOKEN = self._download_json(
  54. 'https://api.iwara.tv/user/token', None, note='Fetching media token',
  55. data=b'', headers={
  56. 'Authorization': f'Bearer {IwaraBaseIE._USERTOKEN}',
  57. 'Content-Type': 'application/json',
  58. })['accessToken']
  59. return {'Authorization': f'Bearer {IwaraBaseIE._MEDIATOKEN}'}
  60. def _perform_login(self, username, password):
  61. self._get_media_token()
  62. class IwaraIE(IwaraBaseIE):
  63. IE_NAME = 'iwara'
  64. _VALID_URL = r'https?://(?:www\.|ecchi\.)?iwara\.tv/videos?/(?P<id>[a-zA-Z0-9]+)'
  65. _TESTS = [{
  66. 'url': 'https://www.iwara.tv/video/k2ayoueezfkx6gvq',
  67. 'info_dict': {
  68. 'id': 'k2ayoueezfkx6gvq',
  69. 'ext': 'mp4',
  70. 'age_limit': 18,
  71. 'title': 'Defeat of Irybelda - アイリベルダの敗北',
  72. 'description': 'md5:70278abebe706647a8b4cb04cf23e0d3',
  73. 'uploader': 'Inwerwm',
  74. 'uploader_id': 'inwerwm',
  75. 'tags': 'count:1',
  76. 'like_count': 6133,
  77. 'view_count': 1050343,
  78. 'comment_count': 1,
  79. 'timestamp': 1677843869,
  80. 'modified_timestamp': 1679056362,
  81. },
  82. 'skip': 'this video cannot be played because of migration',
  83. }, {
  84. 'url': 'https://iwara.tv/video/1ywe1sbkqwumpdxz5/',
  85. 'md5': '7645f966f069b8ec9210efd9130c9aad',
  86. 'info_dict': {
  87. 'id': '1ywe1sbkqwumpdxz5',
  88. 'ext': 'mp4',
  89. 'age_limit': 18,
  90. 'title': 'Aponia アポニア SEX Party Tonight 手の脱衣 巨乳 ',
  91. 'description': 'md5:3f60016fff22060eef1ef26d430b1f67',
  92. 'uploader': 'Lyu ya',
  93. 'uploader_id': 'user792540',
  94. 'tags': [
  95. 'uncategorized',
  96. ],
  97. 'like_count': int,
  98. 'view_count': int,
  99. 'comment_count': int,
  100. 'timestamp': 1678732213,
  101. 'modified_timestamp': int,
  102. 'thumbnail': 'https://files.iwara.tv/image/thumbnail/581d12b5-46f4-4f15-beb2-cfe2cde5d13d/thumbnail-00.jpg',
  103. 'modified_date': '20230614',
  104. 'upload_date': '20230313',
  105. },
  106. }, {
  107. 'url': 'https://iwara.tv/video/blggmfno8ghl725bg',
  108. 'info_dict': {
  109. 'id': 'blggmfno8ghl725bg',
  110. 'ext': 'mp4',
  111. 'age_limit': 18,
  112. 'title': 'お外でおしっこしちゃう猫耳ロリメイド',
  113. 'description': 'md5:0342ba9bf6db09edbbb28729657c3611',
  114. 'uploader': 'Fe_Kurosabi',
  115. 'uploader_id': 'fekurosabi',
  116. 'tags': [
  117. 'pee',
  118. ],
  119. 'like_count': int,
  120. 'view_count': int,
  121. 'comment_count': int,
  122. 'timestamp': 1598880567,
  123. 'modified_timestamp': int,
  124. 'upload_date': '20200831',
  125. 'modified_date': '20230605',
  126. 'thumbnail': 'https://files.iwara.tv/image/thumbnail/7693e881-d302-42a4-a780-f16d66b5dadd/thumbnail-00.jpg',
  127. # 'availability': 'needs_auth',
  128. },
  129. }]
  130. def _extract_formats(self, video_id, fileurl):
  131. up = urllib.parse.urlparse(fileurl)
  132. q = urllib.parse.parse_qs(up.query)
  133. paths = up.path.rstrip('/').split('/')
  134. # https://github.com/yt-dlp/yt-dlp/issues/6549#issuecomment-1473771047
  135. x_version = hashlib.sha1('_'.join((paths[-1], q['expires'][0], '5nFp9kmbNnHdAFhaqMvt')).encode()).hexdigest()
  136. preference = qualities(['preview', '360', '540', 'Source'])
  137. files = self._download_json(fileurl, video_id, headers={'X-Version': x_version})
  138. for fmt in files:
  139. yield traverse_obj(fmt, {
  140. 'format_id': 'name',
  141. 'url': ('src', ('view', 'download'), {self._proto_relative_url}),
  142. 'ext': ('type', {mimetype2ext}),
  143. 'quality': ('name', {preference}),
  144. 'height': ('name', {int_or_none}),
  145. }, get_all=False)
  146. def _real_extract(self, url):
  147. video_id = self._match_id(url)
  148. username, _ = self._get_login_info()
  149. video_data = self._download_json(
  150. f'https://api.iwara.tv/video/{video_id}', video_id,
  151. expected_status=lambda x: True, headers=self._get_media_token())
  152. errmsg = video_data.get('message')
  153. # at this point we can actually get uploaded user info, but do we need it?
  154. if errmsg == 'errors.privateVideo':
  155. self.raise_login_required('Private video. Login if you have permissions to watch', method='password')
  156. elif errmsg == 'errors.notFound' and not username:
  157. self.raise_login_required('Video may need login to view', method='password')
  158. elif errmsg: # None if success
  159. raise ExtractorError(f'Iwara says: {errmsg}')
  160. if not video_data.get('fileUrl'):
  161. if video_data.get('embedUrl'):
  162. return self.url_result(video_data.get('embedUrl'))
  163. raise ExtractorError('This video is unplayable', expected=True)
  164. return {
  165. 'id': video_id,
  166. 'age_limit': 18 if video_data.get('rating') == 'ecchi' else 0, # ecchi is 'sexy' in Japanese
  167. **traverse_obj(video_data, {
  168. 'title': 'title',
  169. 'description': 'body',
  170. 'uploader': ('user', 'name'),
  171. 'uploader_id': ('user', 'username'),
  172. 'tags': ('tags', ..., 'id'),
  173. 'like_count': 'numLikes',
  174. 'view_count': 'numViews',
  175. 'comment_count': 'numComments',
  176. 'timestamp': ('createdAt', {unified_timestamp}),
  177. 'modified_timestamp': ('updatedAt', {unified_timestamp}),
  178. 'thumbnail': ('file', 'id', {str}, {
  179. lambda x: f'https://files.iwara.tv/image/thumbnail/{x}/thumbnail-00.jpg'}),
  180. }),
  181. 'formats': list(self._extract_formats(video_id, video_data.get('fileUrl'))),
  182. }
  183. class IwaraUserIE(IwaraBaseIE):
  184. _VALID_URL = r'https?://(?:www\.)?iwara\.tv/profile/(?P<id>[^/?#&]+)'
  185. IE_NAME = 'iwara:user'
  186. _PER_PAGE = 32
  187. _TESTS = [{
  188. 'url': 'https://iwara.tv/profile/user792540/videos',
  189. 'info_dict': {
  190. 'id': 'user792540',
  191. 'title': 'Lyu ya',
  192. },
  193. 'playlist_mincount': 70,
  194. }, {
  195. 'url': 'https://iwara.tv/profile/theblackbirdcalls/videos',
  196. 'info_dict': {
  197. 'id': 'theblackbirdcalls',
  198. 'title': 'TheBlackbirdCalls',
  199. },
  200. 'playlist_mincount': 723,
  201. }, {
  202. 'url': 'https://iwara.tv/profile/user792540',
  203. 'only_matching': True,
  204. }, {
  205. 'url': 'https://iwara.tv/profile/theblackbirdcalls',
  206. 'only_matching': True,
  207. }, {
  208. 'url': 'https://www.iwara.tv/profile/lumymmd',
  209. 'info_dict': {
  210. 'id': 'lumymmd',
  211. 'title': 'Lumy MMD',
  212. },
  213. 'playlist_mincount': 1,
  214. }]
  215. def _entries(self, playlist_id, user_id, page):
  216. videos = self._download_json(
  217. 'https://api.iwara.tv/videos', playlist_id,
  218. note=f'Downloading page {page}',
  219. query={
  220. 'page': page,
  221. 'sort': 'date',
  222. 'user': user_id,
  223. 'limit': self._PER_PAGE,
  224. }, headers=self._get_media_token())
  225. for x in traverse_obj(videos, ('results', ..., 'id')):
  226. yield self.url_result(f'https://iwara.tv/video/{x}')
  227. def _real_extract(self, url):
  228. playlist_id = self._match_id(url)
  229. user_info = self._download_json(
  230. f'https://api.iwara.tv/profile/{playlist_id}', playlist_id,
  231. note='Requesting user info')
  232. user_id = traverse_obj(user_info, ('user', 'id'))
  233. return self.playlist_result(
  234. OnDemandPagedList(
  235. functools.partial(self._entries, playlist_id, user_id),
  236. self._PER_PAGE),
  237. playlist_id, traverse_obj(user_info, ('user', 'name')))
  238. class IwaraPlaylistIE(IwaraBaseIE):
  239. _VALID_URL = r'https?://(?:www\.)?iwara\.tv/playlist/(?P<id>[0-9a-f-]+)'
  240. IE_NAME = 'iwara:playlist'
  241. _PER_PAGE = 32
  242. _TESTS = [{
  243. 'url': 'https://iwara.tv/playlist/458e5486-36a4-4ac0-b233-7e9eef01025f',
  244. 'info_dict': {
  245. 'id': '458e5486-36a4-4ac0-b233-7e9eef01025f',
  246. },
  247. 'playlist_mincount': 3,
  248. }]
  249. def _entries(self, playlist_id, first_page, page):
  250. videos = self._download_json(
  251. 'https://api.iwara.tv/videos', playlist_id, f'Downloading page {page}',
  252. query={'page': page, 'limit': self._PER_PAGE},
  253. headers=self._get_media_token()) if page else first_page
  254. for x in traverse_obj(videos, ('results', ..., 'id')):
  255. yield self.url_result(f'https://iwara.tv/video/{x}')
  256. def _real_extract(self, url):
  257. playlist_id = self._match_id(url)
  258. page_0 = self._download_json(
  259. f'https://api.iwara.tv/playlist/{playlist_id}?page=0&limit={self._PER_PAGE}', playlist_id,
  260. note='Requesting playlist info', headers=self._get_media_token())
  261. return self.playlist_result(
  262. OnDemandPagedList(
  263. functools.partial(self._entries, playlist_id, page_0),
  264. self._PER_PAGE),
  265. playlist_id, traverse_obj(page_0, ('title', 'name')))