eurosport.py 6.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123
  1. from .common import InfoExtractor
  2. from ..utils import traverse_obj
  3. class EurosportIE(InfoExtractor):
  4. _VALID_URL = r'https?://www\.eurosport\.com/\w+/(?:[\w-]+/[\d-]+/)?[\w-]+_(?P<id>vid\d+)'
  5. _TESTS = [{
  6. 'url': 'https://www.eurosport.com/tennis/roland-garros/2022/highlights-rafael-nadal-brushes-aside-caper-ruud-to-win-record-extending-14th-french-open-title_vid1694147/video.shtml',
  7. 'info_dict': {
  8. 'id': '2480939',
  9. 'ext': 'mp4',
  10. 'title': 'Highlights: Rafael Nadal brushes aside Caper Ruud to win record-extending 14th French Open title',
  11. 'description': 'md5:b564db73ecfe4b14ebbd8e62a3692c76',
  12. 'thumbnail': 'https://imgresizer.eurosport.com/unsafe/1280x960/smart/filters:format(jpeg)/origin-imgresizer.eurosport.com/2022/06/05/3388285-69245968-2560-1440.png',
  13. 'duration': 195.0,
  14. 'display_id': 'vid1694147',
  15. 'timestamp': 1654446698,
  16. 'upload_date': '20220605',
  17. },
  18. }, {
  19. 'url': 'https://www.eurosport.com/tennis/roland-garros/2022/watch-the-top-five-shots-from-men-s-final-as-rafael-nadal-beats-casper-ruud-to-seal-14th-french-open_vid1694283/video.shtml',
  20. 'info_dict': {
  21. 'id': '2481254',
  22. 'ext': 'mp4',
  23. 'title': 'md5:149dcc5dfb38ab7352acc008cc9fb071',
  24. 'duration': 130.0,
  25. 'thumbnail': 'https://imgresizer.eurosport.com/unsafe/1280x960/smart/filters:format(jpeg)/origin-imgresizer.eurosport.com/2022/06/05/3388422-69248708-2560-1440.png',
  26. 'description': 'md5:a0c8a7f6b285e48ae8ddbe7aa85cfee6',
  27. 'display_id': 'vid1694283',
  28. 'timestamp': 1654456090,
  29. 'upload_date': '20220605',
  30. },
  31. }, {
  32. # geo-fence but can bypassed by xff
  33. 'url': 'https://www.eurosport.com/cycling/tour-de-france-femmes/2022/incredible-ride-marlen-reusser-storms-to-stage-4-win-at-tour-de-france-femmes_vid1722221/video.shtml',
  34. 'info_dict': {
  35. 'id': '2582552',
  36. 'ext': 'mp4',
  37. 'title': '‘Incredible ride!’ - Marlen Reusser storms to Stage 4 win at Tour de France Femmes',
  38. 'duration': 188.0,
  39. 'display_id': 'vid1722221',
  40. 'timestamp': 1658936167,
  41. 'thumbnail': 'https://imgresizer.eurosport.com/unsafe/1280x960/smart/filters:format(jpeg)/origin-imgresizer.eurosport.com/2022/07/27/3423347-69852108-2560-1440.jpg',
  42. 'description': 'md5:32bbe3a773ac132c57fb1e8cca4b7c71',
  43. 'upload_date': '20220727',
  44. },
  45. }, {
  46. 'url': 'https://www.eurosport.com/football/champions-league/2022-2023/pep-guardiola-emotionally-destroyed-after-manchester-city-win-over-bayern-munich-in-champions-league_vid1896254/video.shtml',
  47. 'info_dict': {
  48. 'id': '3096477',
  49. 'ext': 'mp4',
  50. 'title': 'md5:82edc17370124c7a19b3cf518517583b',
  51. 'duration': 84.0,
  52. 'description': 'md5:b3f44ef7f5b5b95b24a273b163083feb',
  53. 'thumbnail': 'https://imgresizer.eurosport.com/unsafe/1280x960/smart/filters:format(jpeg)/origin-imgresizer.eurosport.com/2023/04/12/3682873-74947393-2560-1440.jpg',
  54. 'timestamp': 1681292028,
  55. 'upload_date': '20230412',
  56. 'display_id': 'vid1896254',
  57. },
  58. }, {
  59. 'url': 'https://www.eurosport.com/football/last-year-s-semi-final-pain-was-still-there-pep-guardiola-after-man-city-reach-cl-final_vid1914115/video.shtml',
  60. 'info_dict': {
  61. 'id': '3149108',
  62. 'ext': 'mp4',
  63. 'title': '\'Last year\'s semi-final pain was still there\' - Pep Guardiola after Man City reach CL final',
  64. 'description': 'md5:89ef142fe0170a66abab77fac2955d8e',
  65. 'display_id': 'vid1914115',
  66. 'timestamp': 1684403618,
  67. 'thumbnail': 'https://imgresizer.eurosport.com/unsafe/1280x960/smart/filters:format(jpeg)/origin-imgresizer.eurosport.com/2023/05/18/3707254-75435008-2560-1440.jpg',
  68. 'duration': 105.0,
  69. 'upload_date': '20230518',
  70. },
  71. }]
  72. _TOKEN = None
  73. # actually defined in https://netsport.eurosport.io/?variables={"databaseId":<databaseId>,"playoutType":"VDP"}&extensions={"persistedQuery":{"version":1 ..
  74. # but this method require to get sha256 hash
  75. _GEO_COUNTRIES = ['DE', 'NL', 'EU', 'IT', 'FR'] # Not complete list but it should work
  76. def _real_initialize(self):
  77. if EurosportIE._TOKEN is None:
  78. EurosportIE._TOKEN = self._download_json(
  79. 'https://eu3-prod-direct.eurosport.com/token?realm=eurosport', None,
  80. 'Trying to get token')['data']['attributes']['token']
  81. def _real_extract(self, url):
  82. display_id = self._match_id(url)
  83. webpage = self._download_webpage(url, display_id)
  84. json_data = self._download_json(
  85. f'https://eu3-prod-direct.eurosport.com/playback/v2/videoPlaybackInfo/sourceSystemId/eurosport-{display_id}',
  86. display_id, query={'usePreAuth': True}, headers={'Authorization': f'Bearer {EurosportIE._TOKEN}'})['data']
  87. json_ld_data = self._search_json_ld(webpage, display_id)
  88. formats, subtitles = [], {}
  89. for stream_type in json_data['attributes']['streaming']:
  90. if stream_type == 'hls':
  91. fmts, subs = self._extract_m3u8_formats_and_subtitles(
  92. traverse_obj(json_data, ('attributes', 'streaming', stream_type, 'url')), display_id, ext='mp4')
  93. elif stream_type == 'dash':
  94. fmts, subs = self._extract_mpd_formats_and_subtitles(
  95. traverse_obj(json_data, ('attributes', 'streaming', stream_type, 'url')), display_id)
  96. elif stream_type == 'mss':
  97. fmts, subs = self._extract_ism_formats_and_subtitles(
  98. traverse_obj(json_data, ('attributes', 'streaming', stream_type, 'url')), display_id)
  99. formats.extend(fmts)
  100. self._merge_subtitles(subs, target=subtitles)
  101. return {
  102. 'id': json_data['id'],
  103. 'title': json_ld_data.get('title') or self._og_search_title(webpage),
  104. 'display_id': display_id,
  105. 'formats': formats,
  106. 'subtitles': subtitles,
  107. 'thumbnails': json_ld_data.get('thumbnails'),
  108. 'description': (json_ld_data.get('description')
  109. or self._html_search_meta(['og:description', 'description'], webpage)),
  110. 'duration': json_ld_data.get('duration'),
  111. 'timestamp': json_ld_data.get('timestamp'),
  112. }