hgtv.py 1.3 KB

12345678910111213141516171819202122232425262728293031323334353637
  1. from .common import InfoExtractor
  2. class HGTVComShowIE(InfoExtractor):
  3. IE_NAME = 'hgtv.com:show'
  4. _VALID_URL = r'https?://(?:www\.)?hgtv\.com/shows/[^/]+/(?P<id>[^/?#&]+)'
  5. _TESTS = [{
  6. # data-module="video"
  7. 'url': 'http://www.hgtv.com/shows/flip-or-flop/flip-or-flop-full-episodes-season-4-videos',
  8. 'info_dict': {
  9. 'id': 'flip-or-flop-full-episodes-season-4-videos',
  10. 'title': 'Flip or Flop Full Episodes',
  11. },
  12. 'playlist_mincount': 15,
  13. }, {
  14. # data-deferred-module="video"
  15. 'url': 'http://www.hgtv.com/shows/good-bones/episodes/an-old-victorian-house-gets-a-new-facelift',
  16. 'only_matching': True,
  17. }]
  18. def _real_extract(self, url):
  19. display_id = self._match_id(url)
  20. webpage = self._download_webpage(url, display_id)
  21. config = self._parse_json(
  22. self._search_regex(
  23. r'(?s)data-(?:deferred-)?module=["\']video["\'][^>]*>.*?<script[^>]+type=["\']text/x-config["\'][^>]*>(.+?)</script',
  24. webpage, 'video config'),
  25. display_id)['channels'][0]
  26. entries = [
  27. self.url_result(video['releaseUrl'])
  28. for video in config['videos'] if video.get('releaseUrl')]
  29. return self.playlist_result(
  30. entries, display_id, config.get('title'), config.get('description'))