mediastream.py 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226
  1. import re
  2. from .common import InfoExtractor
  3. from ..utils import (
  4. clean_html,
  5. filter_dict,
  6. parse_qs,
  7. remove_end,
  8. traverse_obj,
  9. update_url_query,
  10. urljoin,
  11. )
  12. class MediaStreamBaseIE(InfoExtractor):
  13. _EMBED_BASE_URL = 'https://mdstrm.com/embed'
  14. _BASE_URL_RE = r'https?://mdstrm\.com/(?:embed|live-stream)'
  15. def _extract_mediastream_urls(self, webpage):
  16. yield from traverse_obj(list(self._yield_json_ld(webpage, None, fatal=False)), (
  17. lambda _, v: v['@type'] == 'VideoObject', ('embedUrl', 'contentUrl'),
  18. {lambda x: x if re.match(rf'{self._BASE_URL_RE}/\w+', x) else None}))
  19. for mobj in re.finditer(r'<script[^>]+>[^>]*playerMdStream\.mdstreamVideo\(\s*[\'"](?P<video_id>\w+)', webpage):
  20. yield f'{self._EMBED_BASE_URL}/{mobj.group("video_id")}'
  21. yield from re.findall(
  22. rf'<iframe[^>]+\bsrc="({self._BASE_URL_RE}/\w+)', webpage)
  23. for mobj in re.finditer(
  24. r'''(?x)
  25. <(?:div|ps-mediastream)[^>]+
  26. (class="[^"]*MediaStreamVideoPlayer)[^"]*"[^>]+
  27. data-video-id="(?P<video_id>\w+)"
  28. (?:\s*data-video-type="(?P<video_type>[^"]+))?
  29. (?:[^>]*>\s*<div[^>]+\1[^"]*"[^>]+data-mediastream=["\'][^>]+
  30. https://mdstrm\.com/(?P<live>live-stream))?
  31. ''', webpage):
  32. video_type = 'live-stream' if mobj.group('video_type') == 'live' or mobj.group('live') else 'embed'
  33. yield f'https://mdstrm.com/{video_type}/{mobj.group("video_id")}'
  34. class MediaStreamIE(MediaStreamBaseIE):
  35. _VALID_URL = MediaStreamBaseIE._BASE_URL_RE + r'/(?P<id>\w+)'
  36. _TESTS = [{
  37. 'url': 'https://mdstrm.com/embed/6318e3f1d1d316083ae48831',
  38. 'md5': '97b4f2634b8e8612cc574dfcd504df05',
  39. 'info_dict': {
  40. 'id': '6318e3f1d1d316083ae48831',
  41. 'title': 'Video: Así fue el despido de Thomas Tuchel del Chelsea',
  42. 'description': 'md5:358ce1e1396010d50a1ece1be3633c95',
  43. 'thumbnail': r're:^https?://[^?#]+6318e3f1d1d316083ae48831',
  44. 'ext': 'mp4',
  45. },
  46. 'params': {'skip_download': 'm3u8'},
  47. }]
  48. _WEBPAGE_TESTS = [{
  49. 'url': 'https://www.multimedios.com/video/costa-rica-tv-en-vivo/v2616',
  50. 'info_dict': {
  51. 'id': '5a7b1e63a8da282c34d65445',
  52. 'title': 're:mmtv-costarica',
  53. 'description': 'mmtv-costarica',
  54. 'thumbnail': 're:^https?://[^?#]+5a7b1e63a8da282c34d65445',
  55. 'ext': 'mp4',
  56. 'live_status': 'is_live',
  57. },
  58. 'params': {'skip_download': 'Livestream'},
  59. }, {
  60. 'url': 'https://www.multimedios.com/television/clases-de-llaves-y-castigos-quien-sabe-mas',
  61. 'md5': 'de31f0b1ecc321fb35bf22d58734ea40',
  62. 'info_dict': {
  63. 'id': '63731bab8ec9b308a2c9ed28',
  64. 'title': 'Clases de llaves y castigos ¿Quién sabe más?',
  65. 'description': 'md5:1b49aa1ee5a4b32fbd66104b2d629e9d',
  66. 'thumbnail': 're:^https?://[^?#]+63731bab8ec9b308a2c9ed28',
  67. 'ext': 'mp4',
  68. },
  69. 'params': {'skip_download': 'm3u8'},
  70. }, {
  71. 'url': 'https://www.americatv.com.pe/videos/esto-es-guerra/facundo-gonzalez-sufrio-fuerte-golpe-durante-competencia-frente-hugo-garcia-eeg-noticia-139120',
  72. 'info_dict': {
  73. 'id': '63756df1c638b008a5659dec',
  74. 'title': 'Facundo González sufrió fuerte golpe durante competencia frente a Hugo García en EEG',
  75. 'description': 'md5:9490c034264afd756eef7b2c3adee69e',
  76. 'thumbnail': 're:^https?://[^?#]+63756df1c638b008a5659dec',
  77. 'ext': 'mp4',
  78. },
  79. 'params': {'skip_download': 'm3u8'},
  80. }, {
  81. 'url': 'https://www.americatv.com.pe/videos/al-fondo-hay-sitio/nuevas-lomas-town-bernardo-mata-se-enfrento-sujeto-luchar-amor-macarena-noticia-139083',
  82. 'info_dict': {
  83. 'id': '637307669609130f74cd3a6e',
  84. 'title': 'Las Nuevas Lomas Town: Bernardo De La Mata se enfrentó a sujeto para luchar por el amor de Macarena',
  85. 'description': 'md5:60d71772f1e1496923539ae58aa17124',
  86. 'thumbnail': 're:^https?://[^?#]+637307669609130f74cd3a6e',
  87. 'ext': 'mp4',
  88. },
  89. 'params': {'skip_download': 'm3u8'},
  90. }]
  91. def _extract_from_webpage(self, url, webpage):
  92. for embed_url in self._extract_mediastream_urls(webpage):
  93. yield self.url_result(embed_url, MediaStreamIE, None)
  94. def _real_extract(self, url):
  95. video_id = self._match_id(url)
  96. webpage = self._download_webpage(url, video_id)
  97. for message in [
  98. 'Debido a tu ubicación no puedes ver el contenido',
  99. 'You are not allowed to watch this video: Geo Fencing Restriction',
  100. 'Este contenido no está disponible en tu zona geográfica.',
  101. 'El contenido sólo está disponible dentro de',
  102. ]:
  103. if message in webpage:
  104. self.raise_geo_restricted()
  105. player_config = self._search_json(r'window\.MDSTRM\.OPTIONS\s*=', webpage, 'metadata', video_id)
  106. formats, subtitles = [], {}
  107. for video_format in player_config['src']:
  108. if video_format == 'hls':
  109. params = {
  110. 'at': 'web-app',
  111. 'access_token': traverse_obj(parse_qs(url), ('access_token', 0)),
  112. }
  113. for name, key in (('MDSTRMUID', 'uid'), ('MDSTRMSID', 'sid'), ('MDSTRMPID', 'pid'), ('VERSION', 'av')):
  114. params[key] = self._search_regex(
  115. rf'window\.{name}\s*=\s*["\']([^"\']+)["\'];', webpage, key, default=None)
  116. fmts, subs = self._extract_m3u8_formats_and_subtitles(
  117. update_url_query(player_config['src'][video_format], filter_dict(params)), video_id)
  118. formats.extend(fmts)
  119. self._merge_subtitles(subs, target=subtitles)
  120. elif video_format == 'mpd':
  121. fmts, subs = self._extract_mpd_formats_and_subtitles(player_config['src'][video_format], video_id)
  122. formats.extend(fmts)
  123. self._merge_subtitles(subs, target=subtitles)
  124. else:
  125. formats.append({
  126. 'url': player_config['src'][video_format],
  127. })
  128. return {
  129. 'id': video_id,
  130. 'title': self._og_search_title(webpage) or player_config.get('title'),
  131. 'description': self._og_search_description(webpage),
  132. 'formats': formats,
  133. 'subtitles': subtitles,
  134. 'is_live': player_config.get('type') == 'live',
  135. 'thumbnail': self._og_search_thumbnail(webpage),
  136. }
  137. class WinSportsVideoIE(MediaStreamBaseIE):
  138. _VALID_URL = r'https?://www\.winsports\.co/videos/(?P<id>[\w-]+)'
  139. _TESTS = [{
  140. 'url': 'https://www.winsports.co/videos/siempre-castellanos-gran-atajada-del-portero-cardenal-para-evitar-la-caida-de-su-arco-60536',
  141. 'info_dict': {
  142. 'id': '62dc8357162c4b0821fcfb3c',
  143. 'display_id': 'siempre-castellanos-gran-atajada-del-portero-cardenal-para-evitar-la-caida-de-su-arco-60536',
  144. 'title': '¡Siempre Castellanos! Gran atajada del portero \'cardenal\' para evitar la caída de su arco',
  145. 'description': 'md5:eb811b2b2882bdc59431732c06b905f2',
  146. 'thumbnail': r're:^https?://[^?#]+62dc8357162c4b0821fcfb3c',
  147. 'ext': 'mp4',
  148. },
  149. 'params': {'skip_download': 'm3u8'},
  150. }, {
  151. 'url': 'https://www.winsports.co/videos/observa-aqui-los-goles-del-empate-entre-tolima-y-nacional-60548',
  152. 'info_dict': {
  153. 'id': '62dcb875ef12a5526790b552',
  154. 'display_id': 'observa-aqui-los-goles-del-empate-entre-tolima-y-nacional-60548',
  155. 'title': 'Observa aquí los goles del empate entre Tolima y Nacional',
  156. 'description': 'md5:b19402ba6e46558b93fd24b873eea9c9',
  157. 'thumbnail': r're:^https?://[^?#]+62dcb875ef12a5526790b552',
  158. 'ext': 'mp4',
  159. },
  160. 'params': {'skip_download': 'm3u8'},
  161. }, {
  162. 'url': 'https://www.winsports.co/videos/equidad-vuelve-defender-su-arco-de-remates-de-junior',
  163. 'info_dict': {
  164. 'id': '63fa7eca72f1741ad3a4d515',
  165. 'display_id': 'equidad-vuelve-defender-su-arco-de-remates-de-junior',
  166. 'title': '⚽ Equidad vuelve a defender su arco de remates de Junior',
  167. 'description': 'Remate de Sierra',
  168. 'thumbnail': r're:^https?://[^?#]+63fa7eca72f1741ad3a4d515',
  169. 'ext': 'mp4',
  170. },
  171. 'params': {'skip_download': 'm3u8'},
  172. }, {
  173. 'url': 'https://www.winsports.co/videos/bucaramanga-se-quedo-con-el-grito-de-gol-en-la-garganta',
  174. 'info_dict': {
  175. 'id': '6402adb62bbf3b18d454e1b0',
  176. 'display_id': 'bucaramanga-se-quedo-con-el-grito-de-gol-en-la-garganta',
  177. 'title': '⚽Bucaramanga se quedó con el grito de gol en la garganta',
  178. 'description': 'Gol anulado Bucaramanga',
  179. 'thumbnail': r're:^https?://[^?#]+6402adb62bbf3b18d454e1b0',
  180. 'ext': 'mp4',
  181. },
  182. 'params': {'skip_download': 'm3u8'},
  183. }]
  184. def _real_extract(self, url):
  185. display_id = self._match_id(url)
  186. webpage = self._download_webpage(url, display_id)
  187. data = self._search_json(
  188. r'<script\s*[^>]+data-drupal-selector="drupal-settings-json">', webpage, 'data', display_id)
  189. mediastream_url = urljoin(f'{self._EMBED_BASE_URL}/', (
  190. traverse_obj(data, (
  191. (('settings', 'mediastream_formatter', ..., 'mediastream_id'), 'url'), {str}), get_all=False)
  192. or next(self._extract_mediastream_urls(webpage), None)))
  193. if not mediastream_url:
  194. self.raise_no_formats('No MediaStream embed found in webpage')
  195. title = clean_html(remove_end(
  196. self._search_json_ld(webpage, display_id, expected_type='VideoObject', default={}).get('title')
  197. or self._og_search_title(webpage), '| Win Sports'))
  198. return self.url_result(
  199. mediastream_url, MediaStreamIE, display_id, url_transparent=True, display_id=display_id, video_title=title)