thisamericanlife.py 1.5 KB

1234567891011121314151617181920212223242526272829303132333435363738
  1. from .common import InfoExtractor
  2. class ThisAmericanLifeIE(InfoExtractor):
  3. _VALID_URL = r'https?://(?:www\.)?thisamericanlife\.org/(?:radio-archives/episode/|play_full\.php\?play=)(?P<id>\d+)'
  4. _TESTS = [{
  5. 'url': 'http://www.thisamericanlife.org/radio-archives/episode/487/harper-high-school-part-one',
  6. 'md5': '8f7d2da8926298fdfca2ee37764c11ce',
  7. 'info_dict': {
  8. 'id': '487',
  9. 'ext': 'm4a',
  10. 'title': '487: Harper High School, Part One',
  11. 'description': 'md5:ee40bdf3fb96174a9027f76dbecea655',
  12. 'thumbnail': r're:^https?://.*\.jpg$',
  13. },
  14. }, {
  15. 'url': 'http://www.thisamericanlife.org/play_full.php?play=487',
  16. 'only_matching': True,
  17. }]
  18. def _real_extract(self, url):
  19. video_id = self._match_id(url)
  20. webpage = self._download_webpage(
  21. f'http://www.thisamericanlife.org/radio-archives/episode/{video_id}', video_id)
  22. return {
  23. 'id': video_id,
  24. 'url': f'http://stream.thisamericanlife.org/{video_id}/stream/{video_id}_64k.m3u8',
  25. 'protocol': 'm3u8_native',
  26. 'ext': 'm4a',
  27. 'acodec': 'aac',
  28. 'vcodec': 'none',
  29. 'abr': 64,
  30. 'title': self._html_search_meta(r'twitter:title', webpage, 'title', fatal=True),
  31. 'description': self._html_search_meta(r'description', webpage, 'description'),
  32. 'thumbnail': self._og_search_thumbnail(webpage),
  33. }