spreaker.py 5.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172
  1. import itertools
  2. from .common import InfoExtractor
  3. from ..utils import (
  4. float_or_none,
  5. int_or_none,
  6. str_or_none,
  7. try_get,
  8. unified_timestamp,
  9. url_or_none,
  10. )
  11. def _extract_episode(data, episode_id=None):
  12. title = data['title']
  13. download_url = data['download_url']
  14. series = try_get(data, lambda x: x['show']['title'], str)
  15. uploader = try_get(data, lambda x: x['author']['fullname'], str)
  16. thumbnails = []
  17. for image in ('image_original', 'image_medium', 'image'):
  18. image_url = url_or_none(data.get(f'{image}_url'))
  19. if image_url:
  20. thumbnails.append({'url': image_url})
  21. def stats(key):
  22. return int_or_none(try_get(
  23. data,
  24. (lambda x: x[f'{key}s_count'],
  25. lambda x: x['stats'][f'{key}s'])))
  26. def duration(key):
  27. return float_or_none(data.get(key), scale=1000)
  28. return {
  29. 'id': str(episode_id or data['episode_id']),
  30. 'url': download_url,
  31. 'display_id': data.get('permalink'),
  32. 'title': title,
  33. 'description': data.get('description'),
  34. 'timestamp': unified_timestamp(data.get('published_at')),
  35. 'uploader': uploader,
  36. 'uploader_id': str_or_none(data.get('author_id')),
  37. 'creator': uploader,
  38. 'duration': duration('duration') or duration('length'),
  39. 'view_count': stats('play'),
  40. 'like_count': stats('like'),
  41. 'comment_count': stats('message'),
  42. 'format': 'MPEG Layer 3',
  43. 'format_id': 'mp3',
  44. 'container': 'mp3',
  45. 'ext': 'mp3',
  46. 'thumbnails': thumbnails,
  47. 'series': series,
  48. 'extractor_key': SpreakerIE.ie_key(),
  49. }
  50. class SpreakerIE(InfoExtractor):
  51. _VALID_URL = r'''(?x)
  52. https?://
  53. api\.spreaker\.com/
  54. (?:
  55. (?:download/)?episode|
  56. v2/episodes
  57. )/
  58. (?P<id>\d+)
  59. '''
  60. _TESTS = [{
  61. 'url': 'https://api.spreaker.com/episode/12534508',
  62. 'info_dict': {
  63. 'id': '12534508',
  64. 'display_id': 'swm-ep15-how-to-market-your-music-part-2',
  65. 'ext': 'mp3',
  66. 'title': 'EP:15 | Music Marketing (Likes) - Part 2',
  67. 'description': 'md5:0588c43e27be46423e183076fa071177',
  68. 'timestamp': 1502250336,
  69. 'upload_date': '20170809',
  70. 'uploader': 'SWM',
  71. 'uploader_id': '9780658',
  72. 'duration': 1063.42,
  73. 'view_count': int,
  74. 'like_count': int,
  75. 'comment_count': int,
  76. 'series': 'Success With Music (SWM)',
  77. },
  78. }, {
  79. 'url': 'https://api.spreaker.com/download/episode/12534508/swm_ep15_how_to_market_your_music_part_2.mp3',
  80. 'only_matching': True,
  81. }, {
  82. 'url': 'https://api.spreaker.com/v2/episodes/12534508?export=episode_segments',
  83. 'only_matching': True,
  84. }]
  85. def _real_extract(self, url):
  86. episode_id = self._match_id(url)
  87. data = self._download_json(
  88. f'https://api.spreaker.com/v2/episodes/{episode_id}',
  89. episode_id)['response']['episode']
  90. return _extract_episode(data, episode_id)
  91. class SpreakerPageIE(InfoExtractor):
  92. _VALID_URL = r'https?://(?:www\.)?spreaker\.com/user/[^/]+/(?P<id>[^/?#&]+)'
  93. _TESTS = [{
  94. 'url': 'https://www.spreaker.com/user/9780658/swm-ep15-how-to-market-your-music-part-2',
  95. 'only_matching': True,
  96. }]
  97. def _real_extract(self, url):
  98. display_id = self._match_id(url)
  99. webpage = self._download_webpage(url, display_id)
  100. episode_id = self._search_regex(
  101. (r'data-episode_id=["\'](?P<id>\d+)',
  102. r'episode_id\s*:\s*(?P<id>\d+)'), webpage, 'episode id')
  103. return self.url_result(
  104. f'https://api.spreaker.com/episode/{episode_id}',
  105. ie=SpreakerIE.ie_key(), video_id=episode_id)
  106. class SpreakerShowIE(InfoExtractor):
  107. _VALID_URL = r'https?://api\.spreaker\.com/show/(?P<id>\d+)'
  108. _TESTS = [{
  109. 'url': 'https://api.spreaker.com/show/4652058',
  110. 'info_dict': {
  111. 'id': '4652058',
  112. },
  113. 'playlist_mincount': 118,
  114. }]
  115. def _entries(self, show_id):
  116. for page_num in itertools.count(1):
  117. episodes = self._download_json(
  118. f'https://api.spreaker.com/show/{show_id}/episodes',
  119. show_id, note=f'Downloading JSON page {page_num}', query={
  120. 'page': page_num,
  121. 'max_per_page': 100,
  122. })
  123. pager = try_get(episodes, lambda x: x['response']['pager'], dict)
  124. if not pager:
  125. break
  126. results = pager.get('results')
  127. if not results or not isinstance(results, list):
  128. break
  129. for result in results:
  130. if not isinstance(result, dict):
  131. continue
  132. yield _extract_episode(result)
  133. if page_num == pager.get('last_page'):
  134. break
  135. def _real_extract(self, url):
  136. show_id = self._match_id(url)
  137. return self.playlist_result(self._entries(show_id), playlist_id=show_id)
  138. class SpreakerShowPageIE(InfoExtractor):
  139. _VALID_URL = r'https?://(?:www\.)?spreaker\.com/show/(?P<id>[^/?#&]+)'
  140. _TESTS = [{
  141. 'url': 'https://www.spreaker.com/show/success-with-music',
  142. 'only_matching': True,
  143. }]
  144. def _real_extract(self, url):
  145. display_id = self._match_id(url)
  146. webpage = self._download_webpage(url, display_id)
  147. show_id = self._search_regex(
  148. r'show_id\s*:\s*(?P<id>\d+)', webpage, 'show id')
  149. return self.url_result(
  150. f'https://api.spreaker.com/show/{show_id}',
  151. ie=SpreakerShowIE.ie_key(), video_id=show_id)