atvat.py 4.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108
  1. import datetime as dt
  2. from .common import InfoExtractor
  3. from ..utils import (
  4. ExtractorError,
  5. float_or_none,
  6. jwt_encode_hs256,
  7. try_get,
  8. )
  9. class ATVAtIE(InfoExtractor):
  10. _VALID_URL = r'https?://(?:www\.)?atv\.at/tv/(?:[^/]+/){2,3}(?P<id>.*)'
  11. _TESTS = [{
  12. 'url': 'https://www.atv.at/tv/bauer-sucht-frau/staffel-18/bauer-sucht-frau/bauer-sucht-frau-staffel-18-folge-3-die-hofwochen',
  13. 'md5': '3c3b4aaca9f63e32b35e04a9c2515903',
  14. 'info_dict': {
  15. 'id': 'v-ce9cgn1e70n5-1',
  16. 'ext': 'mp4',
  17. 'title': 'Bauer sucht Frau - Staffel 18 Folge 3 - Die Hofwochen',
  18. },
  19. }, {
  20. 'url': 'https://www.atv.at/tv/bauer-sucht-frau/staffel-18/episode-01/bauer-sucht-frau-staffel-18-vorstellungsfolge-1',
  21. 'only_matching': True,
  22. }]
  23. # extracted from bootstrap.js function (search for e.encryption_key and use your browser's debugger)
  24. _ACCESS_ID = 'x_atv'
  25. _ENCRYPTION_KEY = 'Hohnaekeishoogh2omaeghooquooshia'
  26. def _extract_video_info(self, url, content, video):
  27. clip_id = content.get('splitId', content['id'])
  28. formats = []
  29. clip_urls = video['urls']
  30. for protocol, variant in clip_urls.items():
  31. source_url = try_get(variant, lambda x: x['clear']['url'])
  32. if not source_url:
  33. continue
  34. if protocol == 'dash':
  35. formats.extend(self._extract_mpd_formats(
  36. source_url, clip_id, mpd_id=protocol, fatal=False))
  37. elif protocol == 'hls':
  38. formats.extend(self._extract_m3u8_formats(
  39. source_url, clip_id, 'mp4', 'm3u8_native',
  40. m3u8_id=protocol, fatal=False))
  41. else:
  42. formats.append({
  43. 'url': source_url,
  44. 'format_id': protocol,
  45. })
  46. return {
  47. 'id': clip_id,
  48. 'title': content.get('title'),
  49. 'duration': float_or_none(content.get('duration')),
  50. 'series': content.get('tvShowTitle'),
  51. 'formats': formats,
  52. }
  53. def _real_extract(self, url):
  54. video_id = self._match_id(url)
  55. webpage = self._download_webpage(url, video_id)
  56. json_data = self._parse_json(
  57. self._search_regex(r'<script id="state" type="text/plain">(.*)</script>', webpage, 'json_data'),
  58. video_id=video_id)
  59. video_title = json_data['views']['default']['page']['title']
  60. content_resource = json_data['views']['default']['page']['contentResource']
  61. content_id = content_resource[0]['id']
  62. content_ids = [{'id': id_, 'subclip_start': content['start'], 'subclip_end': content['end']}
  63. for id_, content in enumerate(content_resource)]
  64. time_of_request = dt.datetime.now()
  65. not_before = time_of_request - dt.timedelta(minutes=5)
  66. expire = time_of_request + dt.timedelta(minutes=5)
  67. payload = {
  68. 'content_ids': {
  69. content_id: content_ids,
  70. },
  71. 'secure_delivery': True,
  72. 'iat': int(time_of_request.timestamp()),
  73. 'nbf': int(not_before.timestamp()),
  74. 'exp': int(expire.timestamp()),
  75. }
  76. jwt_token = jwt_encode_hs256(payload, self._ENCRYPTION_KEY, headers={'kid': self._ACCESS_ID})
  77. videos = self._download_json(
  78. 'https://vas-v4.p7s1video.net/4.0/getsources',
  79. content_id, 'Downloading videos JSON', query={
  80. 'token': jwt_token.decode('utf-8'),
  81. })
  82. video_id, videos_data = next(iter(videos['data'].items()))
  83. error_msg = try_get(videos_data, lambda x: x['error']['title'])
  84. if error_msg == 'Geo check failed':
  85. self.raise_geo_restricted(error_msg)
  86. elif error_msg:
  87. raise ExtractorError(error_msg)
  88. entries = [
  89. self._extract_video_info(url, content_resource[video['id']], video)
  90. for video in videos_data]
  91. return {
  92. '_type': 'multi_video',
  93. 'id': video_id,
  94. 'title': video_title,
  95. 'entries': entries,
  96. }