jwplatform.py 3.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990
  1. import re
  2. from .common import InfoExtractor
  3. from ..utils import unsmuggle_url
  4. class JWPlatformIE(InfoExtractor):
  5. _VALID_URL = r'(?:https?://(?:content\.jwplatform|cdn\.jwplayer)\.com/(?:(?:feed|player|thumb|preview|manifest)s|jw6|v2/media)/|jwplatform:)(?P<id>[a-zA-Z0-9]{8})'
  6. _TESTS = [{
  7. 'url': 'http://content.jwplatform.com/players/nPripu9l-ALJ3XQCI.js',
  8. 'md5': '3aa16e4f6860e6e78b7df5829519aed3',
  9. 'info_dict': {
  10. 'id': 'nPripu9l',
  11. 'ext': 'mp4',
  12. 'title': 'Big Buck Bunny Trailer',
  13. 'description': 'Big Buck Bunny is a short animated film by the Blender Institute. It is made using free and open source software.',
  14. 'upload_date': '20081127',
  15. 'timestamp': 1227796140,
  16. 'duration': 32.0,
  17. 'thumbnail': 'https://cdn.jwplayer.com/v2/media/nPripu9l/poster.jpg?width=720',
  18. },
  19. }, {
  20. 'url': 'https://cdn.jwplayer.com/players/nPripu9l-ALJ3XQCI.js',
  21. 'only_matching': True,
  22. }]
  23. _WEBPAGE_TESTS = [{
  24. # JWPlatform iframe
  25. 'url': 'https://www.covermagazine.co.uk/feature/2465255/business-protection-involved',
  26. 'info_dict': {
  27. 'id': 'AG26UQXM',
  28. 'ext': 'mp4',
  29. 'upload_date': '20160719',
  30. 'timestamp': 1468923808,
  31. 'title': '2016_05_18 Cover L&G Business Protection V1 FINAL.mp4',
  32. 'thumbnail': 'https://cdn.jwplayer.com/v2/media/AG26UQXM/poster.jpg?width=720',
  33. 'description': '',
  34. 'duration': 294.0,
  35. },
  36. }, {
  37. # Player url not surrounded by quotes
  38. 'url': 'https://www.deutsche-kinemathek.de/en/online/streaming/school-trip',
  39. 'info_dict': {
  40. 'id': 'jUxh5uin',
  41. 'title': 'Klassenfahrt',
  42. 'ext': 'mp4',
  43. 'upload_date': '20230109',
  44. 'thumbnail': 'https://cdn.jwplayer.com/v2/media/jUxh5uin/poster.jpg?width=720',
  45. 'timestamp': 1673270298,
  46. 'description': '',
  47. 'duration': 5193.0,
  48. },
  49. 'params': {'allowed_extractors': ['generic', 'jwplatform']},
  50. }, {
  51. # iframe src attribute includes backslash before URL string
  52. 'url': 'https://www.elespectador.com/colombia/video-asi-se-evito-la-fuga-de-john-poulos-presunto-feminicida-de-valentina-trespalacios-explicacion',
  53. 'info_dict': {
  54. 'id': 'QD3gsexj',
  55. 'title': 'Así se evitó la fuga de John Poulos, presunto feminicida de Valentina Trespalacios',
  56. 'ext': 'mp4',
  57. 'upload_date': '20230127',
  58. 'thumbnail': 'https://cdn.jwplayer.com/v2/media/QD3gsexj/poster.jpg?width=720',
  59. 'timestamp': 1674862986,
  60. 'description': 'md5:128fd74591c4e1fc2da598c5cb6f5ce4',
  61. 'duration': 263.0,
  62. },
  63. }]
  64. @classmethod
  65. def _extract_embed_urls(cls, url, webpage):
  66. for tag, key in ((r'(?:script|iframe)', 'src'), ('input', 'value')):
  67. # <input value=URL> is used by hyland.com
  68. # if we find <iframe>, dont look for <input>
  69. ret = re.findall(
  70. rf'<{tag}[^>]+?{key}=\\?["\']?((?:https?:)?//(?:content\.jwplatform|cdn\.jwplayer)\.com/players/[a-zA-Z0-9]{{8}})',
  71. webpage)
  72. if ret:
  73. return ret
  74. mobj = re.search(r'<div\b[^>]* data-video-jw-id="([a-zA-Z0-9]{8})"', webpage)
  75. if mobj:
  76. return [f'jwplatform:{mobj.group(1)}']
  77. def _real_extract(self, url):
  78. url, smuggled_data = unsmuggle_url(url, {})
  79. self._initialize_geo_bypass({
  80. 'countries': smuggled_data.get('geo_countries'),
  81. })
  82. video_id = self._match_id(url)
  83. json_data = self._download_json('https://cdn.jwplayer.com/v2/media/' + video_id, video_id)
  84. return self._parse_jwplayer_data(json_data, video_id)