tvnoe.py 1.5 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546
  1. from .common import InfoExtractor
  2. from ..utils import (
  3. clean_html,
  4. get_element_by_class,
  5. js_to_json,
  6. )
  7. class TVNoeIE(InfoExtractor):
  8. _WORKING = False
  9. _VALID_URL = r'https?://(?:www\.)?tvnoe\.cz/video/(?P<id>[0-9]+)'
  10. _TEST = {
  11. 'url': 'http://www.tvnoe.cz/video/10362',
  12. 'md5': 'aee983f279aab96ec45ab6e2abb3c2ca',
  13. 'info_dict': {
  14. 'id': '10362',
  15. 'ext': 'mp4',
  16. 'series': 'Noční univerzita',
  17. 'title': 'prof. Tomáš Halík, Th.D. - Návrat náboženství a střet civilizací',
  18. 'description': 'md5:f337bae384e1a531a52c55ebc50fff41',
  19. },
  20. }
  21. def _real_extract(self, url):
  22. video_id = self._match_id(url)
  23. webpage = self._download_webpage(url, video_id)
  24. iframe_url = self._search_regex(
  25. r'<iframe[^>]+src="([^"]+)"', webpage, 'iframe URL')
  26. ifs_page = self._download_webpage(iframe_url, video_id)
  27. jwplayer_data = self._find_jwplayer_data(
  28. ifs_page, video_id, transform_source=js_to_json)
  29. info_dict = self._parse_jwplayer_data(
  30. jwplayer_data, video_id, require_title=False, base_url=iframe_url)
  31. info_dict.update({
  32. 'id': video_id,
  33. 'title': clean_html(get_element_by_class(
  34. 'field-name-field-podnazev', webpage)),
  35. 'description': clean_html(get_element_by_class(
  36. 'field-name-body', webpage)),
  37. 'series': clean_html(get_element_by_class('title', webpage)),
  38. })
  39. return info_dict