boxcast.py 4.6 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798
  1. from .common import InfoExtractor
  2. from ..utils import js_to_json, traverse_obj, unified_timestamp
  3. class BoxCastVideoIE(InfoExtractor):
  4. _VALID_URL = r'''(?x)
  5. https?://boxcast\.tv/(?:
  6. view-embed/|
  7. channel/\w+\?(?:[^#]+&)?b=|
  8. video-portal/(?:\w+/){2}
  9. )(?P<id>[\w-]+)'''
  10. _EMBED_REGEX = [r'<iframe[^>]+src=["\'](?P<url>https?://boxcast\.tv/view-embed/[\w-]+)']
  11. _TESTS = [{
  12. 'url': 'https://boxcast.tv/view-embed/in-the-midst-of-darkness-light-prevails-an-interdisciplinary-symposium-ozmq5eclj50ujl4bmpwx',
  13. 'info_dict': {
  14. 'id': 'da1eqqgkacngd5djlqld',
  15. 'ext': 'mp4',
  16. 'thumbnail': r're:https?://uploads\.boxcast\.com/(?:[\w+-]+/){3}.+\.png$',
  17. 'title': 'In the Midst of Darkness Light Prevails: An Interdisciplinary Symposium',
  18. 'release_timestamp': 1670686812,
  19. 'release_date': '20221210',
  20. 'uploader_id': 're8w0v8hohhvpqtbskpe',
  21. 'uploader': 'Children\'s Health Defense',
  22. },
  23. }, {
  24. 'url': 'https://boxcast.tv/video-portal/vctwevwntun3o0ikq7af/rvyblnn0fxbfjx5nwxhl/otbpltj2kzkveo2qz3ad',
  25. 'info_dict': {
  26. 'id': 'otbpltj2kzkveo2qz3ad',
  27. 'ext': 'mp4',
  28. 'uploader_id': 'vctwevwntun3o0ikq7af',
  29. 'uploader': 'Legacy Christian Church',
  30. 'title': 'The Quest | 1: Beginner\'s Bay | Jamie Schools',
  31. 'thumbnail': r're:https?://uploads.boxcast.com/(?:[\w-]+/){3}.+\.jpg',
  32. },
  33. }, {
  34. 'url': 'https://boxcast.tv/channel/z03fqwaeaby5lnaawox2?b=ssihlw5gvfij2by8tkev',
  35. 'info_dict': {
  36. 'id': 'ssihlw5gvfij2by8tkev',
  37. 'ext': 'mp4',
  38. 'thumbnail': r're:https?://uploads.boxcast.com/(?:[\w-]+/){3}.+\.jpg$',
  39. 'release_date': '20230101',
  40. 'uploader_id': 'ds25vaazhlu4ygcvffid',
  41. 'release_timestamp': 1672543201,
  42. 'uploader': 'Lighthouse Ministries International - Beltsville, Maryland',
  43. 'description': 'md5:ac23e3d01b0b0be592e8f7fe0ec3a340',
  44. 'title': 'New Year\'s Eve CROSSOVER Service at LHMI | December 31, 2022',
  45. },
  46. }]
  47. _WEBPAGE_TESTS = [{
  48. 'url': 'https://childrenshealthdefense.eu/live-stream/',
  49. 'info_dict': {
  50. 'id': 'da1eqqgkacngd5djlqld',
  51. 'ext': 'mp4',
  52. 'thumbnail': r're:https?://uploads\.boxcast\.com/(?:[\w+-]+/){3}.+\.png$',
  53. 'title': 'In the Midst of Darkness Light Prevails: An Interdisciplinary Symposium',
  54. 'release_timestamp': 1670686812,
  55. 'release_date': '20221210',
  56. 'uploader_id': 're8w0v8hohhvpqtbskpe',
  57. 'uploader': 'Children\'s Health Defense',
  58. },
  59. }]
  60. def _real_extract(self, url):
  61. display_id = self._match_id(url)
  62. webpage = self._download_webpage(url, display_id)
  63. webpage_json_data = self._search_json(
  64. r'var\s*BOXCAST_PRELOAD\s*=', webpage, 'broadcast data', display_id,
  65. transform_source=js_to_json, default={})
  66. # Ref: https://support.boxcast.com/en/articles/4235158-build-a-custom-viewer-experience-with-boxcast-api
  67. broadcast_json_data = (
  68. traverse_obj(webpage_json_data, ('broadcast', 'data'))
  69. or self._download_json(f'https://api.boxcast.com/broadcasts/{display_id}', display_id))
  70. view_json_data = (
  71. traverse_obj(webpage_json_data, ('view', 'data'))
  72. or self._download_json(f'https://api.boxcast.com/broadcasts/{display_id}/view',
  73. display_id, fatal=False) or {})
  74. formats, subtitles = [], {}
  75. if view_json_data.get('status') == 'recorded':
  76. formats, subtitles = self._extract_m3u8_formats_and_subtitles(
  77. view_json_data['playlist'], display_id)
  78. return {
  79. 'id': str(broadcast_json_data['id']),
  80. 'title': (broadcast_json_data.get('name')
  81. or self._html_search_meta(['og:title', 'twitter:title'], webpage)),
  82. 'description': (broadcast_json_data.get('description')
  83. or self._html_search_meta(['og:description', 'twitter:description'], webpage)
  84. or None),
  85. 'thumbnail': (broadcast_json_data.get('preview')
  86. or self._html_search_meta(['og:image', 'twitter:image'], webpage)),
  87. 'formats': formats,
  88. 'subtitles': subtitles,
  89. 'release_timestamp': unified_timestamp(broadcast_json_data.get('streamed_at')),
  90. 'uploader': broadcast_json_data.get('account_name'),
  91. 'uploader_id': broadcast_json_data.get('account_id'),
  92. }