expressen.py 4.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596
  1. from .common import InfoExtractor
  2. from ..utils import (
  3. determine_ext,
  4. int_or_none,
  5. unescapeHTML,
  6. unified_timestamp,
  7. )
  8. class ExpressenIE(InfoExtractor):
  9. _VALID_URL = r'''(?x)
  10. https?://
  11. (?:www\.)?(?:expressen|di)\.se/
  12. (?:(?:tvspelare/video|video-?player/embed)/)?
  13. (?:tv|nyheter)/(?:[^/?#]+/)*
  14. (?P<id>[^/?#&]+)
  15. '''
  16. _EMBED_REGEX = [r'<iframe[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//(?:www\.)?(?:expressen|di)\.se/(?:tvspelare/video|videoplayer/embed)/tv/.+?)\1']
  17. _TESTS = [{
  18. 'url': 'https://www.expressen.se/tv/ledare/ledarsnack/ledarsnack-om-arbetslosheten-bland-kvinnor-i-speciellt-utsatta-omraden/',
  19. 'md5': 'deb2ca62e7b1dcd19fa18ba37523f66e',
  20. 'info_dict': {
  21. 'id': 'ba90f5a9-78d1-4511-aa02-c177b9c99136',
  22. 'display_id': 'ledarsnack-om-arbetslosheten-bland-kvinnor-i-speciellt-utsatta-omraden',
  23. 'ext': 'mp4',
  24. 'title': 'Ledarsnack: Om arbetslösheten bland kvinnor i speciellt utsatta områden',
  25. 'description': 'md5:f38c81ff69f3de4d269bbda012fcbbba',
  26. 'thumbnail': r're:^https?://.*\.jpg$',
  27. 'duration': 788,
  28. 'timestamp': 1526639109,
  29. 'upload_date': '20180518',
  30. },
  31. }, {
  32. 'url': 'https://www.expressen.se/tv/kultur/kulturdebatt-med-expressens-karin-olsson/',
  33. 'only_matching': True,
  34. }, {
  35. 'url': 'https://www.expressen.se/tvspelare/video/tv/ditv/ekonomistudion/experterna-har-ar-fragorna-som-avgor-valet/?embed=true&external=true&autoplay=true&startVolume=0&partnerId=di',
  36. 'only_matching': True,
  37. }, {
  38. 'url': 'https://www.expressen.se/videoplayer/embed/tv/ditv/ekonomistudion/experterna-har-ar-fragorna-som-avgor-valet/?embed=true&external=true&autoplay=true&startVolume=0&partnerId=di',
  39. 'only_matching': True,
  40. }, {
  41. 'url': 'https://www.di.se/videoplayer/embed/tv/ditv/borsmorgon/implantica-rusar-70--under-borspremiaren-hor-styrelsemedlemmen/?embed=true&external=true&autoplay=true&startVolume=0&partnerId=di',
  42. 'only_matching': True,
  43. }, {
  44. 'url': 'https://www.expressen.se/video-player/embed/tv/nyheter/ekero-fodda-olof-gustafsson-forvaltar-knarkbaronen-pablo-escobars-namn',
  45. 'only_matching': True,
  46. }, {
  47. 'url': 'https://www.expressen.se/nyheter/efter-egna-telefonbluffen-escobar-stammer-klarna/',
  48. 'only_matching': True,
  49. }]
  50. def _real_extract(self, url):
  51. display_id = self._match_id(url)
  52. webpage = self._download_webpage(url, display_id)
  53. def extract_data(name):
  54. return self._parse_json(
  55. self._search_regex(
  56. rf'data-{name}=(["\'])(?P<value>(?:(?!\1).)+)\1',
  57. webpage, 'info', group='value'),
  58. display_id, transform_source=unescapeHTML)
  59. info = extract_data('video-tracking-info')
  60. video_id = info['contentId']
  61. data = extract_data('article-data')
  62. stream = data['stream']
  63. if determine_ext(stream) == 'm3u8':
  64. formats = self._extract_m3u8_formats(
  65. stream, display_id, 'mp4', entry_protocol='m3u8_native',
  66. m3u8_id='hls')
  67. else:
  68. formats = [{
  69. 'url': stream,
  70. }]
  71. title = info.get('titleRaw') or data['title']
  72. description = info.get('descriptionRaw')
  73. thumbnail = info.get('socialMediaImage') or data.get('image')
  74. duration = int_or_none(info.get('videoTotalSecondsDuration')
  75. or data.get('totalSecondsDuration'))
  76. timestamp = unified_timestamp(info.get('publishDate'))
  77. return {
  78. 'id': video_id,
  79. 'display_id': display_id,
  80. 'title': title,
  81. 'description': description,
  82. 'thumbnail': thumbnail,
  83. 'duration': duration,
  84. 'timestamp': timestamp,
  85. 'formats': formats,
  86. }