yandexmusic.py 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453
  1. import hashlib
  2. import itertools
  3. from .common import InfoExtractor
  4. from ..utils import (
  5. ExtractorError,
  6. float_or_none,
  7. int_or_none,
  8. try_get,
  9. )
  10. class YandexMusicBaseIE(InfoExtractor):
  11. _VALID_URL_BASE = r'https?://music\.yandex\.(?P<tld>ru|kz|ua|by|com)'
  12. @staticmethod
  13. def _handle_error(response):
  14. if isinstance(response, dict):
  15. error = response.get('error')
  16. if error:
  17. raise ExtractorError(error, expected=True)
  18. if response.get('type') == 'captcha' or 'captcha' in response:
  19. YandexMusicBaseIE._raise_captcha()
  20. @staticmethod
  21. def _raise_captcha():
  22. raise ExtractorError(
  23. 'YandexMusic has considered yt-dlp requests automated and '
  24. 'asks you to solve a CAPTCHA. You can either wait for some '
  25. 'time until unblocked and optionally use --sleep-interval '
  26. 'in future or alternatively you can go to https://music.yandex.ru/ '
  27. 'solve CAPTCHA, then export cookies and pass cookie file to '
  28. 'yt-dlp with --cookies',
  29. expected=True)
  30. def _download_webpage_handle(self, *args, **kwargs):
  31. webpage = super()._download_webpage_handle(*args, **kwargs)
  32. if 'Нам очень жаль, но&nbsp;запросы, поступившие с&nbsp;вашего IP-адреса, похожи на&nbsp;автоматические.' in webpage:
  33. self._raise_captcha()
  34. return webpage
  35. def _download_json(self, *args, **kwargs):
  36. response = super()._download_json(*args, **kwargs)
  37. self._handle_error(response)
  38. return response
  39. def _call_api(self, ep, tld, url, item_id, note, query):
  40. return self._download_json(
  41. f'https://music.yandex.{tld}/handlers/{ep}.jsx',
  42. item_id, note,
  43. fatal=False,
  44. headers={
  45. 'Referer': url,
  46. 'X-Requested-With': 'XMLHttpRequest',
  47. 'X-Retpath-Y': url,
  48. },
  49. query=query)
  50. class YandexMusicTrackIE(YandexMusicBaseIE):
  51. IE_NAME = 'yandexmusic:track'
  52. IE_DESC = 'Яндекс.Музыка - Трек'
  53. _VALID_URL = rf'{YandexMusicBaseIE._VALID_URL_BASE}/album/(?P<album_id>\d+)/track/(?P<id>\d+)'
  54. _TESTS = [{
  55. 'url': 'http://music.yandex.ru/album/540508/track/4878838',
  56. 'md5': 'dec8b661f12027ceaba33318787fff76',
  57. 'info_dict': {
  58. 'id': '4878838',
  59. 'ext': 'mp3',
  60. 'title': 'md5:c63e19341fdbe84e43425a30bc777856',
  61. 'filesize': int,
  62. 'duration': 193.04,
  63. 'track': 'md5:210508c6ffdfd67a493a6c378f22c3ff',
  64. 'album': 'md5:cd04fb13c4efeafdfa0a6a6aca36d01a',
  65. 'album_artist': 'md5:5f54c35462c07952df33d97cfb5fc200',
  66. 'artist': 'md5:e6fd86621825f14dc0b25db3acd68160',
  67. 'release_year': 2009,
  68. },
  69. # 'skip': 'Travis CI servers blocked by YandexMusic',
  70. }, {
  71. # multiple disks
  72. 'url': 'http://music.yandex.ru/album/3840501/track/705105',
  73. 'md5': '82a54e9e787301dd45aba093cf6e58c0',
  74. 'info_dict': {
  75. 'id': '705105',
  76. 'ext': 'mp3',
  77. 'title': 'md5:f86d4a9188279860a83000277024c1a6',
  78. 'filesize': int,
  79. 'duration': 239.27,
  80. 'track': 'md5:40f887f0666ba1aa10b835aca44807d1',
  81. 'album': 'md5:624f5224b14f5c88a8e812fd7fbf1873',
  82. 'album_artist': 'md5:dd35f2af4e8927100cbe6f5e62e1fb12',
  83. 'artist': 'md5:dd35f2af4e8927100cbe6f5e62e1fb12',
  84. 'release_year': 2016,
  85. 'genre': 'pop',
  86. 'disc_number': 2,
  87. 'track_number': 9,
  88. },
  89. # 'skip': 'Travis CI servers blocked by YandexMusic',
  90. }, {
  91. 'url': 'http://music.yandex.com/album/540508/track/4878838',
  92. 'only_matching': True,
  93. }]
  94. def _real_extract(self, url):
  95. mobj = self._match_valid_url(url)
  96. tld, album_id, track_id = mobj.group('tld'), mobj.group('album_id'), mobj.group('id')
  97. track = self._call_api(
  98. 'track', tld, url, track_id, 'Downloading track JSON',
  99. {'track': f'{track_id}:{album_id}'})['track']
  100. track_title = track['title']
  101. download_data = self._download_json(
  102. f'https://music.yandex.ru/api/v2.1/handlers/track/{track_id}:{album_id}/web-album_track-track-track-main/download/m',
  103. track_id, 'Downloading track location url JSON', query={'hq': 1}, headers={'X-Retpath-Y': url})
  104. fd_data = self._download_json(
  105. download_data['src'], track_id,
  106. 'Downloading track location JSON',
  107. query={'format': 'json'})
  108. key = hashlib.md5(('XGRlBW9FXlekgbPrRHuSiA' + fd_data['path'][1:] + fd_data['s']).encode()).hexdigest()
  109. f_url = 'http://{}/get-mp3/{}/{}?track-id={} '.format(fd_data['host'], key, fd_data['ts'] + fd_data['path'], track['id'])
  110. thumbnail = None
  111. cover_uri = track.get('albums', [{}])[0].get('coverUri')
  112. if cover_uri:
  113. thumbnail = cover_uri.replace('%%', 'orig')
  114. if not thumbnail.startswith('http'):
  115. thumbnail = 'http://' + thumbnail
  116. track_info = {
  117. 'id': track_id,
  118. 'ext': 'mp3',
  119. 'url': f_url,
  120. 'filesize': int_or_none(track.get('fileSize')),
  121. 'duration': float_or_none(track.get('durationMs'), 1000),
  122. 'thumbnail': thumbnail,
  123. 'track': track_title,
  124. 'acodec': download_data.get('codec'),
  125. 'abr': int_or_none(download_data.get('bitrate')),
  126. }
  127. def extract_artist_name(artist):
  128. decomposed = artist.get('decomposed')
  129. if not isinstance(decomposed, list):
  130. return artist['name']
  131. parts = [artist['name']]
  132. for element in decomposed:
  133. if isinstance(element, dict) and element.get('name'):
  134. parts.append(element['name'])
  135. elif isinstance(element, str):
  136. parts.append(element)
  137. return ''.join(parts)
  138. def extract_artist(artist_list):
  139. if artist_list and isinstance(artist_list, list):
  140. artists_names = [extract_artist_name(a) for a in artist_list if a.get('name')]
  141. if artists_names:
  142. return ', '.join(artists_names)
  143. albums = track.get('albums')
  144. if albums and isinstance(albums, list):
  145. album = albums[0]
  146. if isinstance(album, dict):
  147. year = album.get('year')
  148. disc_number = int_or_none(try_get(
  149. album, lambda x: x['trackPosition']['volume']))
  150. track_number = int_or_none(try_get(
  151. album, lambda x: x['trackPosition']['index']))
  152. track_info.update({
  153. 'album': album.get('title'),
  154. 'album_artist': extract_artist(album.get('artists')),
  155. 'release_year': int_or_none(year),
  156. 'genre': album.get('genre'),
  157. 'disc_number': disc_number,
  158. 'track_number': track_number,
  159. })
  160. track_artist = extract_artist(track.get('artists'))
  161. if track_artist:
  162. track_info.update({
  163. 'artist': track_artist,
  164. 'title': f'{track_artist} - {track_title}',
  165. })
  166. else:
  167. track_info['title'] = track_title
  168. return track_info
  169. class YandexMusicPlaylistBaseIE(YandexMusicBaseIE):
  170. def _extract_tracks(self, source, item_id, url, tld):
  171. tracks = source['tracks']
  172. track_ids = [str(track_id) for track_id in source['trackIds']]
  173. # tracks dictionary shipped with playlist.jsx API is limited to 150 tracks,
  174. # missing tracks should be retrieved manually.
  175. if len(tracks) < len(track_ids):
  176. present_track_ids = {
  177. str(track['id'])
  178. for track in tracks if track.get('id')}
  179. missing_track_ids = [
  180. track_id for track_id in track_ids
  181. if track_id not in present_track_ids]
  182. # Request missing tracks in chunks to avoid exceeding max HTTP header size,
  183. # see https://github.com/ytdl-org/youtube-dl/issues/27355
  184. _TRACKS_PER_CHUNK = 250
  185. for chunk_num in itertools.count(0):
  186. start = chunk_num * _TRACKS_PER_CHUNK
  187. end = start + _TRACKS_PER_CHUNK
  188. missing_track_ids_req = missing_track_ids[start:end]
  189. assert missing_track_ids_req
  190. missing_tracks = self._call_api(
  191. 'track-entries', tld, url, item_id,
  192. f'Downloading missing tracks JSON chunk {chunk_num + 1}', {
  193. 'entries': ','.join(missing_track_ids_req),
  194. 'lang': tld,
  195. 'external-domain': f'music.yandex.{tld}',
  196. 'overembed': 'false',
  197. 'strict': 'true',
  198. })
  199. if missing_tracks:
  200. tracks.extend(missing_tracks)
  201. if end >= len(missing_track_ids):
  202. break
  203. return tracks
  204. def _build_playlist(self, tracks):
  205. entries = []
  206. for track in tracks:
  207. track_id = track.get('id') or track.get('realId')
  208. if not track_id:
  209. continue
  210. albums = track.get('albums')
  211. if not albums or not isinstance(albums, list):
  212. continue
  213. album = albums[0]
  214. if not isinstance(album, dict):
  215. continue
  216. album_id = album.get('id')
  217. if not album_id:
  218. continue
  219. entries.append(self.url_result(
  220. f'http://music.yandex.ru/album/{album_id}/track/{track_id}',
  221. ie=YandexMusicTrackIE.ie_key(), video_id=track_id))
  222. return entries
  223. class YandexMusicAlbumIE(YandexMusicPlaylistBaseIE):
  224. IE_NAME = 'yandexmusic:album'
  225. IE_DESC = 'Яндекс.Музыка - Альбом'
  226. _VALID_URL = rf'{YandexMusicBaseIE._VALID_URL_BASE}/album/(?P<id>\d+)'
  227. _TESTS = [{
  228. 'url': 'http://music.yandex.ru/album/540508',
  229. 'info_dict': {
  230. 'id': '540508',
  231. 'title': 'md5:7ed1c3567f28d14be9f61179116f5571',
  232. },
  233. 'playlist_count': 50,
  234. # 'skip': 'Travis CI servers blocked by YandexMusic',
  235. }, {
  236. 'url': 'https://music.yandex.ru/album/3840501',
  237. 'info_dict': {
  238. 'id': '3840501',
  239. 'title': 'md5:36733472cdaa7dcb1fd9473f7da8e50f',
  240. },
  241. 'playlist_count': 33,
  242. # 'skip': 'Travis CI servers blocked by YandexMusic',
  243. }, {
  244. # empty artists
  245. 'url': 'https://music.yandex.ru/album/9091882',
  246. 'info_dict': {
  247. 'id': '9091882',
  248. 'title': 'ТЕД на русском',
  249. },
  250. 'playlist_count': 187,
  251. }]
  252. @classmethod
  253. def suitable(cls, url):
  254. return False if YandexMusicTrackIE.suitable(url) else super().suitable(url)
  255. def _real_extract(self, url):
  256. mobj = self._match_valid_url(url)
  257. tld = mobj.group('tld')
  258. album_id = mobj.group('id')
  259. album = self._call_api(
  260. 'album', tld, url, album_id, 'Downloading album JSON',
  261. {'album': album_id})
  262. entries = self._build_playlist([track for volume in album['volumes'] for track in volume])
  263. title = album['title']
  264. artist = try_get(album, lambda x: x['artists'][0]['name'], str)
  265. if artist:
  266. title = f'{artist} - {title}'
  267. year = album.get('year')
  268. if year:
  269. title += f' ({year})'
  270. return self.playlist_result(entries, str(album['id']), title)
  271. class YandexMusicPlaylistIE(YandexMusicPlaylistBaseIE):
  272. IE_NAME = 'yandexmusic:playlist'
  273. IE_DESC = 'Яндекс.Музыка - Плейлист'
  274. _VALID_URL = rf'{YandexMusicBaseIE._VALID_URL_BASE}/users/(?P<user>[^/]+)/playlists/(?P<id>\d+)'
  275. _TESTS = [{
  276. 'url': 'http://music.yandex.ru/users/music.partners/playlists/1245',
  277. 'info_dict': {
  278. 'id': '1245',
  279. 'title': 'md5:841559b3fe2b998eca88d0d2e22a3097',
  280. 'description': 'md5:3b9f27b0efbe53f2ee1e844d07155cc9',
  281. },
  282. 'playlist_count': 5,
  283. # 'skip': 'Travis CI servers blocked by YandexMusic',
  284. }, {
  285. 'url': 'https://music.yandex.ru/users/ya.playlist/playlists/1036',
  286. 'only_matching': True,
  287. }, {
  288. # playlist exceeding the limit of 150 tracks (see
  289. # https://github.com/ytdl-org/youtube-dl/issues/6666)
  290. 'url': 'https://music.yandex.ru/users/mesiaz/playlists/1364',
  291. 'info_dict': {
  292. 'id': '1364',
  293. 'title': 'md5:b3b400f997d3f878a13ae0699653f7db',
  294. },
  295. 'playlist_mincount': 437,
  296. # 'skip': 'Travis CI servers blocked by YandexMusic',
  297. }]
  298. def _real_extract(self, url):
  299. mobj = self._match_valid_url(url)
  300. tld = mobj.group('tld')
  301. user = mobj.group('user')
  302. playlist_id = mobj.group('id')
  303. playlist = self._call_api(
  304. 'playlist', tld, url, playlist_id, 'Downloading playlist JSON', {
  305. 'owner': user,
  306. 'kinds': playlist_id,
  307. 'light': 'true',
  308. 'lang': tld,
  309. 'external-domain': f'music.yandex.{tld}',
  310. 'overembed': 'false',
  311. })['playlist']
  312. tracks = self._extract_tracks(playlist, playlist_id, url, tld)
  313. return self.playlist_result(
  314. self._build_playlist(tracks),
  315. str(playlist_id),
  316. playlist.get('title'), playlist.get('description'))
  317. class YandexMusicArtistBaseIE(YandexMusicPlaylistBaseIE):
  318. def _call_artist(self, tld, url, artist_id):
  319. return self._call_api(
  320. 'artist', tld, url, artist_id,
  321. f'Downloading artist {self._ARTIST_WHAT} JSON', {
  322. 'artist': artist_id,
  323. 'what': self._ARTIST_WHAT,
  324. 'sort': self._ARTIST_SORT or '',
  325. 'dir': '',
  326. 'period': '',
  327. 'lang': tld,
  328. 'external-domain': f'music.yandex.{tld}',
  329. 'overembed': 'false',
  330. })
  331. def _real_extract(self, url):
  332. mobj = self._match_valid_url(url)
  333. tld = mobj.group('tld')
  334. artist_id = mobj.group('id')
  335. data = self._call_artist(tld, url, artist_id)
  336. tracks = self._extract_tracks(data, artist_id, url, tld)
  337. title = try_get(data, lambda x: x['artist']['name'], str)
  338. return self.playlist_result(
  339. self._build_playlist(tracks), artist_id, title)
  340. class YandexMusicArtistTracksIE(YandexMusicArtistBaseIE):
  341. IE_NAME = 'yandexmusic:artist:tracks'
  342. IE_DESC = 'Яндекс.Музыка - Артист - Треки'
  343. _VALID_URL = rf'{YandexMusicBaseIE._VALID_URL_BASE}/artist/(?P<id>\d+)/tracks'
  344. _TESTS = [{
  345. 'url': 'https://music.yandex.ru/artist/617526/tracks',
  346. 'info_dict': {
  347. 'id': '617526',
  348. 'title': 'md5:131aef29d45fd5a965ca613e708c040b',
  349. },
  350. 'playlist_count': 507,
  351. # 'skip': 'Travis CI servers blocked by YandexMusic',
  352. }]
  353. _ARTIST_SORT = ''
  354. _ARTIST_WHAT = 'tracks'
  355. def _real_extract(self, url):
  356. mobj = self._match_valid_url(url)
  357. tld = mobj.group('tld')
  358. artist_id = mobj.group('id')
  359. data = self._call_artist(tld, url, artist_id)
  360. tracks = self._extract_tracks(data, artist_id, url, tld)
  361. artist = try_get(data, lambda x: x['artist']['name'], str)
  362. title = '{} - {}'.format(artist or artist_id, 'Треки')
  363. return self.playlist_result(
  364. self._build_playlist(tracks), artist_id, title)
  365. class YandexMusicArtistAlbumsIE(YandexMusicArtistBaseIE):
  366. IE_NAME = 'yandexmusic:artist:albums'
  367. IE_DESC = 'Яндекс.Музыка - Артист - Альбомы'
  368. _VALID_URL = rf'{YandexMusicBaseIE._VALID_URL_BASE}/artist/(?P<id>\d+)/albums'
  369. _TESTS = [{
  370. 'url': 'https://music.yandex.ru/artist/617526/albums',
  371. 'info_dict': {
  372. 'id': '617526',
  373. 'title': 'md5:55dc58d5c85699b7fb41ee926700236c',
  374. },
  375. 'playlist_count': 8,
  376. # 'skip': 'Travis CI servers blocked by YandexMusic',
  377. }]
  378. _ARTIST_SORT = 'year'
  379. _ARTIST_WHAT = 'albums'
  380. def _real_extract(self, url):
  381. mobj = self._match_valid_url(url)
  382. tld = mobj.group('tld')
  383. artist_id = mobj.group('id')
  384. data = self._call_artist(tld, url, artist_id)
  385. entries = []
  386. for album in data['albums']:
  387. if not isinstance(album, dict):
  388. continue
  389. album_id = album.get('id')
  390. if not album_id:
  391. continue
  392. entries.append(self.url_result(
  393. f'http://music.yandex.ru/album/{album_id}',
  394. ie=YandexMusicAlbumIE.ie_key(), video_id=album_id))
  395. artist = try_get(data, lambda x: x['artist']['name'], str)
  396. title = '{} - {}'.format(artist or artist_id, 'Альбомы')
  397. return self.playlist_result(entries, artist_id, title)