drbonanza.py 1.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354
  1. from .common import InfoExtractor
  2. from ..utils import (
  3. js_to_json,
  4. parse_duration,
  5. unescapeHTML,
  6. )
  7. class DRBonanzaIE(InfoExtractor):
  8. _VALID_URL = r'https?://(?:www\.)?dr\.dk/bonanza/[^/]+/\d+/[^/]+/(?P<id>\d+)/(?P<display_id>[^/?#&]+)'
  9. _TEST = {
  10. 'url': 'http://www.dr.dk/bonanza/serie/154/matador/40312/matador---0824-komme-fremmede-',
  11. 'info_dict': {
  12. 'id': '40312',
  13. 'display_id': 'matador---0824-komme-fremmede-',
  14. 'ext': 'mp4',
  15. 'title': 'MATADOR - 08:24. "Komme fremmede".',
  16. 'description': 'md5:77b4c1ac4d4c1b9d610ab4395212ff84',
  17. 'thumbnail': r're:^https?://.*\.(?:gif|jpg)$',
  18. 'duration': 4613,
  19. },
  20. }
  21. def _real_extract(self, url):
  22. mobj = self._match_valid_url(url)
  23. video_id, display_id = mobj.group('id', 'display_id')
  24. webpage = self._download_webpage(url, display_id)
  25. info = self._parse_html5_media_entries(
  26. url, webpage, display_id, m3u8_id='hls',
  27. m3u8_entry_protocol='m3u8_native')[0]
  28. asset = self._parse_json(
  29. self._search_regex(
  30. r'(?s)currentAsset\s*=\s*({.+?})\s*</script', webpage, 'asset'),
  31. display_id, transform_source=js_to_json)
  32. title = unescapeHTML(asset['AssetTitle']).strip()
  33. def extract(field):
  34. return self._search_regex(
  35. rf'<div[^>]+>\s*<p>{field}:<p>\s*</div>\s*<div[^>]+>\s*<p>([^<]+)</p>',
  36. webpage, field, default=None)
  37. info.update({
  38. 'id': asset.get('AssetId') or video_id,
  39. 'display_id': display_id,
  40. 'title': title,
  41. 'description': extract('Programinfo'),
  42. 'duration': parse_duration(extract('Tid')),
  43. 'thumbnail': asset.get('AssetImageUrl'),
  44. })
  45. return info