bild.py 2.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263
  1. from .common import InfoExtractor
  2. from ..utils import (
  3. int_or_none,
  4. traverse_obj,
  5. unescapeHTML,
  6. )
  7. class BildIE(InfoExtractor):
  8. _VALID_URL = r'https?://(?:www\.)?bild\.de/(?:[^/]+/)+(?P<display_id>[^/]+)-(?P<id>\d+)(?:,auto=true)?\.bild\.html'
  9. IE_DESC = 'Bild.de'
  10. _TESTS = [{
  11. 'note': 'static MP4 only',
  12. 'url': 'http://www.bild.de/video/clip/apple-ipad-air/das-koennen-die-neuen-ipads-38184146.bild.html',
  13. 'md5': 'dd495cbd99f2413502a1713a1156ac8a',
  14. 'info_dict': {
  15. 'id': '38184146',
  16. 'ext': 'mp4',
  17. 'title': 'Das können die neuen iPads',
  18. 'description': 'md5:a4058c4fa2a804ab59c00d7244bbf62f',
  19. 'thumbnail': r're:^https?://.*\.jpg$',
  20. 'duration': 196,
  21. },
  22. }, {
  23. 'note': 'static MP4 and HLS',
  24. 'url': 'https://www.bild.de/video/clip/news-ausland/deftiger-abgang-vom-10m-turm-bademeister-sorgt-fuer-skandal-85158620.bild.html',
  25. 'md5': 'fb0ed4f09c495d4ba7ce2eee0bb90de1',
  26. 'info_dict': {
  27. 'id': '85158620',
  28. 'ext': 'mp4',
  29. 'title': 'Der Sprungturm-Skandal',
  30. 'description': 'md5:709b543c24dc31bbbffee73bccda34ad',
  31. 'thumbnail': r're:^https?://.*\.jpg$',
  32. 'duration': 69,
  33. },
  34. }]
  35. def _real_extract(self, url):
  36. video_id = self._match_id(url)
  37. video_data = self._download_json(
  38. url.split('.bild.html')[0] + ',view=json.bild.html', video_id)
  39. formats = []
  40. for src in traverse_obj(video_data, ('clipList', 0, 'srces', lambda _, v: v['src'])):
  41. src_type = src.get('type')
  42. if src_type == 'application/x-mpegURL':
  43. formats.extend(
  44. self._extract_m3u8_formats(
  45. src['src'], video_id, 'mp4', m3u8_id='hls', fatal=False))
  46. elif src_type == 'video/mp4':
  47. formats.append({'url': src['src'], 'format_id': 'http-mp4'})
  48. else:
  49. self.report_warning(f'Skipping unsupported format type: "{src_type}"')
  50. return {
  51. 'id': video_id,
  52. 'title': unescapeHTML(video_data['title']).strip(),
  53. 'description': unescapeHTML(video_data.get('description')),
  54. 'formats': formats,
  55. 'thumbnail': video_data.get('poster'),
  56. 'duration': int_or_none(video_data.get('durationSec')),
  57. }