jiosaavn.py 7.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194
  1. import functools
  2. import math
  3. import re
  4. from .common import InfoExtractor
  5. from ..utils import (
  6. InAdvancePagedList,
  7. clean_html,
  8. int_or_none,
  9. make_archive_id,
  10. smuggle_url,
  11. unsmuggle_url,
  12. url_basename,
  13. url_or_none,
  14. urlencode_postdata,
  15. )
  16. from ..utils.traversal import traverse_obj
  17. class JioSaavnBaseIE(InfoExtractor):
  18. _API_URL = 'https://www.jiosaavn.com/api.php'
  19. _VALID_BITRATES = {'16', '32', '64', '128', '320'}
  20. @functools.cached_property
  21. def requested_bitrates(self):
  22. requested_bitrates = self._configuration_arg('bitrate', ['128', '320'], ie_key='JioSaavn')
  23. if invalid_bitrates := set(requested_bitrates) - self._VALID_BITRATES:
  24. raise ValueError(
  25. f'Invalid bitrate(s): {", ".join(invalid_bitrates)}. '
  26. f'Valid bitrates are: {", ".join(sorted(self._VALID_BITRATES, key=int))}')
  27. return requested_bitrates
  28. def _extract_formats(self, song_data):
  29. for bitrate in self.requested_bitrates:
  30. media_data = self._download_json(
  31. self._API_URL, song_data['id'],
  32. f'Downloading format info for {bitrate}',
  33. fatal=False, data=urlencode_postdata({
  34. '__call': 'song.generateAuthToken',
  35. '_format': 'json',
  36. 'bitrate': bitrate,
  37. 'url': song_data['encrypted_media_url'],
  38. }))
  39. if not traverse_obj(media_data, ('auth_url', {url_or_none})):
  40. self.report_warning(f'Unable to extract format info for {bitrate}')
  41. continue
  42. ext = media_data.get('type')
  43. yield {
  44. 'url': media_data['auth_url'],
  45. 'ext': 'm4a' if ext == 'mp4' else ext,
  46. 'format_id': bitrate,
  47. 'abr': int(bitrate),
  48. 'vcodec': 'none',
  49. }
  50. def _extract_song(self, song_data, url=None):
  51. info = traverse_obj(song_data, {
  52. 'id': ('id', {str}),
  53. 'title': ('song', {clean_html}),
  54. 'album': ('album', {clean_html}),
  55. 'thumbnail': ('image', {url_or_none}, {lambda x: re.sub(r'-\d+x\d+\.', '-500x500.', x)}),
  56. 'duration': ('duration', {int_or_none}),
  57. 'view_count': ('play_count', {int_or_none}),
  58. 'release_year': ('year', {int_or_none}),
  59. 'artists': ('primary_artists', {lambda x: x.split(', ') if x else None}),
  60. 'webpage_url': ('perma_url', {url_or_none}),
  61. })
  62. if webpage_url := info.get('webpage_url') or url:
  63. info['display_id'] = url_basename(webpage_url)
  64. info['_old_archive_ids'] = [make_archive_id(JioSaavnSongIE, info['display_id'])]
  65. return info
  66. def _call_api(self, type_, token, note='API', params={}):
  67. return self._download_json(
  68. self._API_URL, token, f'Downloading {note} JSON', f'Unable to download {note} JSON',
  69. query={
  70. '__call': 'webapi.get',
  71. '_format': 'json',
  72. '_marker': '0',
  73. 'ctx': 'web6dot0',
  74. 'token': token,
  75. 'type': type_,
  76. **params,
  77. })
  78. def _yield_songs(self, playlist_data):
  79. for song_data in traverse_obj(playlist_data, ('songs', lambda _, v: v['id'] and v['perma_url'])):
  80. song_info = self._extract_song(song_data)
  81. url = smuggle_url(song_info['webpage_url'], {
  82. 'id': song_data['id'],
  83. 'encrypted_media_url': song_data['encrypted_media_url'],
  84. })
  85. yield self.url_result(url, JioSaavnSongIE, url_transparent=True, **song_info)
  86. class JioSaavnSongIE(JioSaavnBaseIE):
  87. IE_NAME = 'jiosaavn:song'
  88. _VALID_URL = r'https?://(?:www\.)?(?:jiosaavn\.com/song/[^/?#]+/|saavn\.com/s/song/(?:[^/?#]+/){3})(?P<id>[^/?#]+)'
  89. _TESTS = [{
  90. 'url': 'https://www.jiosaavn.com/song/leja-re/OQsEfQFVUXk',
  91. 'md5': '3b84396d15ed9e083c3106f1fa589c04',
  92. 'info_dict': {
  93. 'id': 'IcoLuefJ',
  94. 'display_id': 'OQsEfQFVUXk',
  95. 'ext': 'm4a',
  96. 'title': 'Leja Re',
  97. 'album': 'Leja Re',
  98. 'thumbnail': r're:https?://c.saavncdn.com/258/Leja-Re-Hindi-2018-20181124024539-500x500.jpg',
  99. 'duration': 205,
  100. 'view_count': int,
  101. 'release_year': 2018,
  102. 'artists': ['Sandesh Shandilya', 'Dhvani Bhanushali', 'Tanishk Bagchi'],
  103. '_old_archive_ids': ['jiosaavnsong OQsEfQFVUXk'],
  104. },
  105. }, {
  106. 'url': 'https://www.saavn.com/s/song/hindi/Saathiya/O-Humdum-Suniyo-Re/KAMiazoCblU',
  107. 'only_matching': True,
  108. }]
  109. def _real_extract(self, url):
  110. url, smuggled_data = unsmuggle_url(url)
  111. song_data = traverse_obj(smuggled_data, ({
  112. 'id': ('id', {str}),
  113. 'encrypted_media_url': ('encrypted_media_url', {str}),
  114. }))
  115. if 'id' in song_data and 'encrypted_media_url' in song_data:
  116. result = {'id': song_data['id']}
  117. else:
  118. # only extract metadata if this is not a url_transparent result
  119. song_data = self._call_api('song', self._match_id(url))['songs'][0]
  120. result = self._extract_song(song_data, url)
  121. result['formats'] = list(self._extract_formats(song_data))
  122. return result
  123. class JioSaavnAlbumIE(JioSaavnBaseIE):
  124. IE_NAME = 'jiosaavn:album'
  125. _VALID_URL = r'https?://(?:www\.)?(?:jio)?saavn\.com/album/[^/?#]+/(?P<id>[^/?#]+)'
  126. _TESTS = [{
  127. 'url': 'https://www.jiosaavn.com/album/96/buIOjYZDrNA_',
  128. 'info_dict': {
  129. 'id': 'buIOjYZDrNA_',
  130. 'title': '96',
  131. },
  132. 'playlist_count': 10,
  133. }]
  134. def _real_extract(self, url):
  135. display_id = self._match_id(url)
  136. album_data = self._call_api('album', display_id)
  137. return self.playlist_result(
  138. self._yield_songs(album_data), display_id, traverse_obj(album_data, ('title', {str})))
  139. class JioSaavnPlaylistIE(JioSaavnBaseIE):
  140. IE_NAME = 'jiosaavn:playlist'
  141. _VALID_URL = r'https?://(?:www\.)?(?:jio)?saavn\.com/s/playlist/(?:[^/?#]+/){2}(?P<id>[^/?#]+)'
  142. _TESTS = [{
  143. 'url': 'https://www.jiosaavn.com/s/playlist/2279fbe391defa793ad7076929a2f5c9/mood-english/LlJ8ZWT1ibN5084vKHRj2Q__',
  144. 'info_dict': {
  145. 'id': 'LlJ8ZWT1ibN5084vKHRj2Q__',
  146. 'title': 'Mood English',
  147. },
  148. 'playlist_mincount': 301,
  149. }, {
  150. 'url': 'https://www.jiosaavn.com/s/playlist/2279fbe391defa793ad7076929a2f5c9/mood-hindi/DVR,pFUOwyXqIp77B1JF,A__',
  151. 'info_dict': {
  152. 'id': 'DVR,pFUOwyXqIp77B1JF,A__',
  153. 'title': 'Mood Hindi',
  154. },
  155. 'playlist_mincount': 801,
  156. }]
  157. _PAGE_SIZE = 50
  158. def _fetch_page(self, token, page):
  159. return self._call_api(
  160. 'playlist', token, f'playlist page {page}', {'p': page, 'n': self._PAGE_SIZE})
  161. def _entries(self, token, first_page_data, page):
  162. page_data = first_page_data if not page else self._fetch_page(token, page + 1)
  163. yield from self._yield_songs(page_data)
  164. def _real_extract(self, url):
  165. display_id = self._match_id(url)
  166. playlist_data = self._fetch_page(display_id, 1)
  167. total_pages = math.ceil(int(playlist_data['list_count']) / self._PAGE_SIZE)
  168. return self.playlist_result(InAdvancePagedList(
  169. functools.partial(self._entries, display_id, playlist_data),
  170. total_pages, self._PAGE_SIZE), display_id, traverse_obj(playlist_data, ('listname', {str})))