megaphone.py 1.6 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546
  1. from .common import InfoExtractor
  2. from ..utils import js_to_json
  3. class MegaphoneIE(InfoExtractor):
  4. IE_NAME = 'megaphone.fm'
  5. IE_DESC = 'megaphone.fm embedded players'
  6. _VALID_URL = r'https?://player\.megaphone\.fm/(?P<id>[A-Z0-9]+)'
  7. _EMBED_REGEX = [rf'<iframe[^>]*?\ssrc=["\'](?P<url>{_VALID_URL})']
  8. _TEST = {
  9. 'url': 'https://player.megaphone.fm/GLT9749789991',
  10. 'md5': '4816a0de523eb3e972dc0dda2c191f96',
  11. 'info_dict': {
  12. 'id': 'GLT9749789991',
  13. 'ext': 'mp3',
  14. 'title': '#97 What Kind Of Idiot Gets Phished?',
  15. 'thumbnail': r're:^https://.*\.png.*$',
  16. 'duration': 1998.36,
  17. 'creators': ['Reply All'],
  18. },
  19. }
  20. def _real_extract(self, url):
  21. video_id = self._match_id(url)
  22. webpage = self._download_webpage(url, video_id)
  23. title = self._og_search_property('audio:title', webpage)
  24. author = self._og_search_property('audio:artist', webpage)
  25. thumbnail = self._og_search_thumbnail(webpage)
  26. episode_json = self._search_regex(r'(?s)var\s+episode\s*=\s*(\{.+?\});', webpage, 'episode JSON')
  27. episode_data = self._parse_json(episode_json, video_id, js_to_json)
  28. video_url = self._proto_relative_url(episode_data['mediaUrl'], 'https:')
  29. formats = [{
  30. 'url': video_url,
  31. }]
  32. return {
  33. 'id': video_id,
  34. 'thumbnail': thumbnail,
  35. 'title': title,
  36. 'creators': [author] if author else None,
  37. 'duration': episode_data['duration'],
  38. 'formats': formats,
  39. }