golem.py 2.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566
  1. import urllib.parse
  2. from .common import InfoExtractor
  3. from ..utils import (
  4. determine_ext,
  5. )
  6. class GolemIE(InfoExtractor):
  7. _VALID_URL = r'^https?://video\.golem\.de/.+?/(?P<id>.+?)/'
  8. _TEST = {
  9. 'url': 'http://video.golem.de/handy/14095/iphone-6-und-6-plus-test.html',
  10. 'md5': 'c1a2c0a3c863319651c7c992c5ee29bf',
  11. 'info_dict': {
  12. 'id': '14095',
  13. 'format_id': 'high',
  14. 'ext': 'mp4',
  15. 'title': 'iPhone 6 und 6 Plus - Test',
  16. 'duration': 300.44,
  17. 'filesize': 65309548,
  18. },
  19. }
  20. _PREFIX = 'http://video.golem.de'
  21. def _real_extract(self, url):
  22. video_id = self._match_id(url)
  23. config = self._download_xml(
  24. f'https://video.golem.de/xml/{video_id}.xml', video_id)
  25. info = {
  26. 'id': video_id,
  27. 'title': config.findtext('./title', 'golem'),
  28. 'duration': self._float(config.findtext('./playtime'), 'duration'),
  29. }
  30. formats = []
  31. for e in config:
  32. url = e.findtext('./url')
  33. if not url:
  34. continue
  35. formats.append({
  36. 'format_id': str(e.tag),
  37. 'url': urllib.parse.urljoin(self._PREFIX, url),
  38. 'height': self._int(e.get('height'), 'height'),
  39. 'width': self._int(e.get('width'), 'width'),
  40. 'filesize': self._int(e.findtext('filesize'), 'filesize'),
  41. 'ext': determine_ext(e.findtext('./filename')),
  42. })
  43. info['formats'] = formats
  44. thumbnails = []
  45. for e in config.findall('.//teaser'):
  46. url = e.findtext('./url')
  47. if not url:
  48. continue
  49. thumbnails.append({
  50. 'url': urllib.parse.urljoin(self._PREFIX, url),
  51. 'width': self._int(e.get('width'), 'thumbnail width'),
  52. 'height': self._int(e.get('height'), 'thumbnail height'),
  53. })
  54. info['thumbnails'] = thumbnails
  55. return info