skyit.py 9.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225
  1. import urllib.parse
  2. from .common import InfoExtractor
  3. from ..utils import (
  4. dict_get,
  5. int_or_none,
  6. parse_duration,
  7. unified_timestamp,
  8. )
  9. class SkyItPlayerIE(InfoExtractor):
  10. IE_NAME = 'player.sky.it'
  11. _VALID_URL = r'https?://player\.sky\.it/player/(?:external|social)\.html\?.*?\bid=(?P<id>\d+)'
  12. _GEO_BYPASS = False
  13. _DOMAIN = 'sky'
  14. _PLAYER_TMPL = 'https://player.sky.it/player/external.html?id=%s&domain=%s'
  15. # http://static.sky.it/static/skyplayer/conf.json
  16. _TOKEN_MAP = {
  17. 'cielo': 'Hh9O7M8ks5yi6nSROL7bKYz933rdf3GhwZlTLMgvy4Q',
  18. 'hotclub': 'kW020K2jq2lk2eKRJD2vWEg832ncx2EivZlTLQput2C',
  19. 'mtv8': 'A5Nn9GGb326CI7vP5e27d7E4PIaQjota',
  20. 'salesforce': 'C6D585FD1615272C98DE38235F38BD86',
  21. 'sitocommerciale': 'VJwfFuSGnLKnd9Phe9y96WkXgYDCguPMJ2dLhGMb2RE',
  22. 'sky': 'F96WlOd8yoFmLQgiqv6fNQRvHZcsWk5jDaYnDvhbiJk',
  23. 'skyarte': 'LWk29hfiU39NNdq87ePeRach3nzTSV20o0lTv2001Cd',
  24. 'theupfront': 'PRSGmDMsg6QMGc04Obpoy7Vsbn7i2Whp',
  25. }
  26. def _player_url_result(self, video_id):
  27. return self.url_result(
  28. self._PLAYER_TMPL % (video_id, self._DOMAIN),
  29. SkyItPlayerIE.ie_key(), video_id)
  30. def _parse_video(self, video, video_id):
  31. title = video['title']
  32. is_live = video.get('type') == 'live'
  33. hls_url = video.get(('streaming' if is_live else 'hls') + '_url')
  34. if not hls_url and video.get('geoblock' if is_live else 'geob'):
  35. self.raise_geo_restricted(countries=['IT'])
  36. formats = self._extract_m3u8_formats(hls_url, video_id, 'mp4')
  37. return {
  38. 'id': video_id,
  39. 'title': title,
  40. 'formats': formats,
  41. 'thumbnail': dict_get(video, ('video_still', 'video_still_medium', 'thumb')),
  42. 'description': video.get('short_desc') or None,
  43. 'timestamp': unified_timestamp(video.get('create_date')),
  44. 'duration': int_or_none(video.get('duration_sec')) or parse_duration(video.get('duration')),
  45. 'is_live': is_live,
  46. }
  47. def _real_extract(self, url):
  48. video_id = self._match_id(url)
  49. domain = urllib.parse.parse_qs(urllib.parse.urlparse(
  50. url).query).get('domain', [None])[0]
  51. token = dict_get(self._TOKEN_MAP, (domain, 'sky'))
  52. video = self._download_json(
  53. 'https://apid.sky.it/vdp/v1/getVideoData',
  54. video_id, query={
  55. 'caller': 'sky',
  56. 'id': video_id,
  57. 'token': token,
  58. }, headers=self.geo_verification_headers())
  59. return self._parse_video(video, video_id)
  60. class SkyItVideoIE(SkyItPlayerIE): # XXX: Do not subclass from concrete IE
  61. IE_NAME = 'video.sky.it'
  62. _VALID_URL = r'https?://(?:masterchef|video|xfactor)\.sky\.it(?:/[^/]+)*/video/[0-9a-z-]+-(?P<id>\d+)'
  63. _TESTS = [{
  64. 'url': 'https://video.sky.it/news/mondo/video/uomo-ucciso-da-uno-squalo-in-australia-631227',
  65. 'md5': '5b858a62d9ffe2ab77b397553024184a',
  66. 'info_dict': {
  67. 'id': '631227',
  68. 'ext': 'mp4',
  69. 'title': 'Uomo ucciso da uno squalo in Australia',
  70. 'timestamp': 1606036192,
  71. 'upload_date': '20201122',
  72. 'duration': 26,
  73. 'thumbnail': 'https://video.sky.it/captures/thumbs/631227/631227_thumb_880x494.jpg',
  74. },
  75. 'params': {'skip_download': 'm3u8'},
  76. }, {
  77. 'url': 'https://xfactor.sky.it/video/x-factor-2020-replay-audizioni-1-615820',
  78. 'only_matching': True,
  79. }, {
  80. 'url': 'https://masterchef.sky.it/video/masterchef-9-cosa-e-successo-nella-prima-puntata-562831',
  81. 'only_matching': True,
  82. }]
  83. def _real_extract(self, url):
  84. video_id = self._match_id(url)
  85. return self._player_url_result(video_id)
  86. class SkyItVideoLiveIE(SkyItPlayerIE): # XXX: Do not subclass from concrete IE
  87. IE_NAME = 'video.sky.it:live'
  88. _VALID_URL = r'https?://video\.sky\.it/diretta/(?P<id>[^/?&#]+)'
  89. _TEST = {
  90. 'url': 'https://video.sky.it/diretta/tg24',
  91. 'info_dict': {
  92. 'id': '1',
  93. 'ext': 'mp4',
  94. 'title': r're:Diretta TG24 \d{4}-\d{2}-\d{2} \d{2}:\d{2}',
  95. 'description': r're:(?:Clicca play e )?[Gg]uarda la diretta streaming di SkyTg24, segui con Sky tutti gli appuntamenti e gli speciali di Tg24\.',
  96. 'live_status': 'is_live',
  97. },
  98. 'params': {
  99. # m3u8 download
  100. 'skip_download': True,
  101. },
  102. }
  103. def _real_extract(self, url):
  104. display_id = self._match_id(url)
  105. webpage = self._download_webpage(url, display_id)
  106. asset_id = str(self._search_nextjs_data(webpage, display_id)['props']['initialState']['livePage']['content']['asset_id'])
  107. livestream = self._download_json(
  108. 'https://apid.sky.it/vdp/v1/getLivestream',
  109. asset_id, query={'id': asset_id})
  110. return self._parse_video(livestream, asset_id)
  111. class SkyItIE(SkyItPlayerIE): # XXX: Do not subclass from concrete IE
  112. IE_NAME = 'sky.it'
  113. _VALID_URL = r'https?://(?:sport|tg24)\.sky\.it(?:/[^/]+)*/\d{4}/\d{2}/\d{2}/(?P<id>[^/?&#]+)'
  114. _TESTS = [{
  115. 'url': 'https://sport.sky.it/calcio/serie-a/2022/11/03/brozovic-inter-news',
  116. 'info_dict': {
  117. 'id': '789222',
  118. 'ext': 'mp4',
  119. 'title': 'Brozovic con il gruppo: verso convocazione per Juve-Inter',
  120. 'upload_date': '20221103',
  121. 'timestamp': 1667484130,
  122. 'duration': 22,
  123. 'thumbnail': 'https://videoplatform.sky.it/still/2022/11/03/1667480526353_brozovic_videostill_1.jpg',
  124. },
  125. 'params': {'skip_download': 'm3u8'},
  126. }, {
  127. 'url': 'https://tg24.sky.it/mondo/2020/11/22/australia-squalo-uccide-uomo',
  128. 'md5': 'fe5c91e59a84a3437eaa0bca6e134ccd',
  129. 'info_dict': {
  130. 'id': '631227',
  131. 'ext': 'mp4',
  132. 'title': 'Uomo ucciso da uno squalo in Australia',
  133. 'timestamp': 1606036192,
  134. 'upload_date': '20201122',
  135. 'duration': 26,
  136. 'thumbnail': 'https://video.sky.it/captures/thumbs/631227/631227_thumb_880x494.jpg',
  137. },
  138. 'params': {'skip_download': 'm3u8'},
  139. }]
  140. _VIDEO_ID_REGEX = r'data-videoid="(\d+)"'
  141. def _real_extract(self, url):
  142. display_id = self._match_id(url)
  143. webpage = self._download_webpage(url, display_id)
  144. video_id = self._search_regex(
  145. self._VIDEO_ID_REGEX, webpage, 'video id')
  146. return self._player_url_result(video_id)
  147. class SkyItArteIE(SkyItIE): # XXX: Do not subclass from concrete IE
  148. IE_NAME = 'arte.sky.it'
  149. _VALID_URL = r'https?://arte\.sky\.it/video/(?P<id>[^/?&#]+)'
  150. _TESTS = [{
  151. 'url': 'https://arte.sky.it/video/oliviero-toscani-torino-galleria-mazzoleni-788962',
  152. 'md5': '515aee97b87d7a018b6c80727d3e7e17',
  153. 'info_dict': {
  154. 'id': '788962',
  155. 'ext': 'mp4',
  156. 'title': 'La fotografia di Oliviero Toscani conquista Torino',
  157. 'upload_date': '20221102',
  158. 'timestamp': 1667399996,
  159. 'duration': 12,
  160. 'thumbnail': 'https://videoplatform.sky.it/still/2022/11/02/1667396388552_oliviero-toscani-torino-galleria-mazzoleni_videostill_1.jpg',
  161. },
  162. 'params': {'skip_download': 'm3u8'},
  163. }]
  164. _DOMAIN = 'skyarte'
  165. _VIDEO_ID_REGEX = r'"embedUrl"\s*:\s*"(?:https:)?//player\.sky\.it/player/external\.html\?[^"]*\bid=(\d+)'
  166. class CieloTVItIE(SkyItIE): # XXX: Do not subclass from concrete IE
  167. IE_NAME = 'cielotv.it'
  168. _VALID_URL = r'https?://(?:www\.)?cielotv\.it/video/(?P<id>[^.]+)\.html'
  169. _TESTS = [{
  170. 'url': 'https://www.cielotv.it/video/Il-lunedi-e-sempre-un-dramma.html',
  171. 'md5': 'c4deed77552ba901c2a0d9258320304b',
  172. 'info_dict': {
  173. 'id': '499240',
  174. 'ext': 'mp4',
  175. 'title': 'Il lunedì è sempre un dramma',
  176. 'upload_date': '20190329',
  177. 'timestamp': 1553862178,
  178. 'duration': 30,
  179. 'thumbnail': 'https://videoplatform.sky.it/still/2019/03/29/1553858575610_lunedi_dramma_mant_videostill_1.jpg',
  180. },
  181. 'params': {'skip_download': 'm3u8'},
  182. }]
  183. _DOMAIN = 'cielo'
  184. _VIDEO_ID_REGEX = r'videoId\s*=\s*"(\d+)"'
  185. class TV8ItIE(SkyItVideoIE): # XXX: Do not subclass from concrete IE
  186. IE_NAME = 'tv8.it'
  187. _VALID_URL = r'https?://(?:www\.)?tv8\.it/(?:show)?video/[0-9a-z-]+-(?P<id>\d+)'
  188. _TESTS = [{
  189. 'url': 'https://www.tv8.it/video/ogni-mattina-ucciso-asino-di-andrea-lo-cicero-630529',
  190. 'md5': '9ab906a3f75ea342ed928442f9dabd21',
  191. 'info_dict': {
  192. 'id': '630529',
  193. 'ext': 'mp4',
  194. 'title': 'Ogni mattina - Ucciso asino di Andrea Lo Cicero',
  195. 'timestamp': 1605721374,
  196. 'upload_date': '20201118',
  197. 'duration': 114,
  198. 'thumbnail': 'https://videoplatform.sky.it/still/2020/11/18/1605717753954_ogni-mattina-ucciso-asino-di-andrea-lo-cicero_videostill_1.jpg',
  199. },
  200. 'params': {'skip_download': 'm3u8'},
  201. }]
  202. _DOMAIN = 'mtv8'