acast.py 5.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143
  1. from .common import InfoExtractor
  2. from ..utils import (
  3. clean_html,
  4. clean_podcast_url,
  5. int_or_none,
  6. parse_iso8601,
  7. )
  8. class ACastBaseIE(InfoExtractor):
  9. def _extract_episode(self, episode, show_info):
  10. title = episode['title']
  11. info = {
  12. 'id': episode['id'],
  13. 'display_id': episode.get('episodeUrl'),
  14. 'url': clean_podcast_url(episode['url']),
  15. 'title': title,
  16. 'description': clean_html(episode.get('description') or episode.get('summary')),
  17. 'thumbnail': episode.get('image'),
  18. 'timestamp': parse_iso8601(episode.get('publishDate')),
  19. 'duration': int_or_none(episode.get('duration')),
  20. 'filesize': int_or_none(episode.get('contentLength')),
  21. 'season_number': int_or_none(episode.get('season')),
  22. 'episode': title,
  23. 'episode_number': int_or_none(episode.get('episode')),
  24. }
  25. info.update(show_info)
  26. return info
  27. def _extract_show_info(self, show):
  28. return {
  29. 'creator': show.get('author'),
  30. 'series': show.get('title'),
  31. }
  32. def _call_api(self, path, video_id, query=None):
  33. return self._download_json(
  34. 'https://feeder.acast.com/api/v1/shows/' + path, video_id, query=query)
  35. class ACastIE(ACastBaseIE):
  36. IE_NAME = 'acast'
  37. _VALID_URL = r'''(?x:
  38. https?://
  39. (?:
  40. (?:(?:embed|www)\.)?acast\.com/|
  41. play\.acast\.com/s/
  42. )
  43. (?P<channel>[^/]+)/(?P<id>[^/#?"]+)
  44. )'''
  45. _EMBED_REGEX = [rf'(?x)<iframe[^>]+\bsrc=[\'"](?P<url>{_VALID_URL})']
  46. _TESTS = [{
  47. 'url': 'https://www.acast.com/sparpodcast/2.raggarmordet-rosterurdetforflutna',
  48. 'info_dict': {
  49. 'id': '2a92b283-1a75-4ad8-8396-499c641de0d9',
  50. 'ext': 'mp3',
  51. 'title': '2. Raggarmordet - Röster ur det förflutna',
  52. 'description': 'md5:013959207e05011ad14a222cf22278cc',
  53. 'timestamp': 1477346700,
  54. 'upload_date': '20161024',
  55. 'duration': 2766,
  56. 'creator': 'Third Ear Studio',
  57. 'series': 'Spår',
  58. 'episode': '2. Raggarmordet - Röster ur det förflutna',
  59. 'thumbnail': 'https://assets.pippa.io/shows/616ebe1886d7b1398620b943/616ebe33c7e6e70013cae7da.jpg',
  60. 'episode_number': 2,
  61. 'display_id': '2.raggarmordet-rosterurdetforflutna',
  62. 'season_number': 4,
  63. 'season': 'Season 4',
  64. },
  65. }, {
  66. 'url': 'http://embed.acast.com/adambuxton/ep.12-adam-joeschristmaspodcast2015',
  67. 'only_matching': True,
  68. }, {
  69. 'url': 'https://play.acast.com/s/rattegangspodden/s04e09styckmordetihelenelund-del2-2',
  70. 'only_matching': True,
  71. }, {
  72. 'url': 'https://play.acast.com/s/sparpodcast/2a92b283-1a75-4ad8-8396-499c641de0d9',
  73. 'only_matching': True,
  74. }]
  75. _WEBPAGE_TESTS = [{
  76. 'url': 'https://ausi.anu.edu.au/news/democracy-sausage-episode-can-labor-be-long-form-government',
  77. 'info_dict': {
  78. 'id': '646c68fb21fbf20011e9c651',
  79. 'ext': 'mp3',
  80. 'creator': 'The Australian National University',
  81. 'display_id': 'can-labor-be-a-long-form-government',
  82. 'duration': 2618,
  83. 'thumbnail': 'https://assets.pippa.io/shows/6113e8578b4903809f16f7e5/1684821529295-515b9520db9ce53275b995eb302f941c.jpeg',
  84. 'title': 'Can Labor be a long-form government?',
  85. 'episode': 'Can Labor be a long-form government?',
  86. 'upload_date': '20230523',
  87. 'series': 'Democracy Sausage with Mark Kenny',
  88. 'timestamp': 1684826362,
  89. 'description': 'md5:feabe1fc5004c78ee59c84a46bf4ba16',
  90. },
  91. }]
  92. def _real_extract(self, url):
  93. channel, display_id = self._match_valid_url(url).groups()
  94. episode = self._call_api(
  95. f'{channel}/episodes/{display_id}',
  96. display_id, {'showInfo': 'true'})
  97. return self._extract_episode(
  98. episode, self._extract_show_info(episode.get('show') or {}))
  99. class ACastChannelIE(ACastBaseIE):
  100. IE_NAME = 'acast:channel'
  101. _VALID_URL = r'''(?x)
  102. https?://
  103. (?:
  104. (?:www\.)?acast\.com/|
  105. play\.acast\.com/s/
  106. )
  107. (?P<id>[^/#?]+)
  108. '''
  109. _TESTS = [{
  110. 'url': 'https://www.acast.com/todayinfocus',
  111. 'info_dict': {
  112. 'id': '4efc5294-5385-4847-98bd-519799ce5786',
  113. 'title': 'Today in Focus',
  114. 'description': 'md5:c09ce28c91002ce4ffce71d6504abaae',
  115. },
  116. 'playlist_mincount': 200,
  117. }, {
  118. 'url': 'http://play.acast.com/s/ft-banking-weekly',
  119. 'only_matching': True,
  120. }]
  121. @classmethod
  122. def suitable(cls, url):
  123. return False if ACastIE.suitable(url) else super().suitable(url)
  124. def _real_extract(self, url):
  125. show_slug = self._match_id(url)
  126. show = self._call_api(show_slug, show_slug)
  127. show_info = self._extract_show_info(show)
  128. entries = []
  129. for episode in (show.get('episodes') or []):
  130. entries.append(self._extract_episode(episode, show_info))
  131. return self.playlist_result(
  132. entries, show.get('id'), show.get('title'), show.get('description'))