bibeltv.py 6.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197
  1. import functools
  2. from .common import InfoExtractor
  3. from ..utils import (
  4. ExtractorError,
  5. clean_html,
  6. determine_ext,
  7. format_field,
  8. int_or_none,
  9. js_to_json,
  10. orderedSet,
  11. parse_iso8601,
  12. traverse_obj,
  13. url_or_none,
  14. )
  15. class BibelTVBaseIE(InfoExtractor):
  16. _GEO_COUNTRIES = ['AT', 'CH', 'DE']
  17. _GEO_BYPASS = False
  18. API_URL = 'https://www.bibeltv.de/mediathek/api'
  19. AUTH_TOKEN = 'j88bRXY8DsEqJ9xmTdWhrByVi5Hm'
  20. def _extract_formats_and_subtitles(self, data, crn_id, *, is_live=False):
  21. formats = []
  22. subtitles = {}
  23. for media_url in traverse_obj(data, (..., 'src', {url_or_none})):
  24. media_ext = determine_ext(media_url)
  25. if media_ext == 'm3u8':
  26. m3u8_formats, m3u8_subs = self._extract_m3u8_formats_and_subtitles(
  27. media_url, crn_id, live=is_live)
  28. formats.extend(m3u8_formats)
  29. subtitles.update(m3u8_subs)
  30. elif media_ext == 'mpd':
  31. mpd_formats, mpd_subs = self._extract_mpd_formats_and_subtitles(media_url, crn_id)
  32. formats.extend(mpd_formats)
  33. subtitles.update(mpd_subs)
  34. elif media_ext == 'mp4':
  35. formats.append({'url': media_url})
  36. else:
  37. self.report_warning(f'Unknown format {media_ext!r}')
  38. return formats, subtitles
  39. @staticmethod
  40. def _extract_base_info(data):
  41. return {
  42. 'id': data['crn'],
  43. **traverse_obj(data, {
  44. 'title': 'title',
  45. 'description': 'description',
  46. 'duration': ('duration', {functools.partial(int_or_none, scale=1000)}),
  47. 'timestamp': ('schedulingStart', {parse_iso8601}),
  48. 'season_number': 'seasonNumber',
  49. 'episode_number': 'episodeNumber',
  50. 'view_count': 'viewCount',
  51. 'like_count': 'likeCount',
  52. }),
  53. 'thumbnails': orderedSet(traverse_obj(data, ('images', ..., {
  54. 'url': ('url', {url_or_none}),
  55. }))),
  56. }
  57. def _extract_url_info(self, data):
  58. return {
  59. '_type': 'url',
  60. 'url': format_field(data, 'slug', 'https://www.bibeltv.de/mediathek/videos/%s'),
  61. **self._extract_base_info(data),
  62. }
  63. def _extract_video_info(self, data):
  64. crn_id = data['crn']
  65. if data.get('drm'):
  66. self.report_drm(crn_id)
  67. json_data = self._download_json(
  68. format_field(data, 'id', f'{self.API_URL}/video/%s'), crn_id,
  69. headers={'Authorization': self.AUTH_TOKEN}, fatal=False,
  70. errnote='No formats available') or {}
  71. formats, subtitles = self._extract_formats_and_subtitles(
  72. traverse_obj(json_data, ('video', 'videoUrls', ...)), crn_id)
  73. return {
  74. '_type': 'video',
  75. **self._extract_base_info(data),
  76. 'formats': formats,
  77. 'subtitles': subtitles,
  78. }
  79. class BibelTVVideoIE(BibelTVBaseIE):
  80. IE_DESC = 'BibelTV single video'
  81. _VALID_URL = r'https?://(?:www\.)?bibeltv\.de/mediathek/videos/(?P<id>\d+)[\w-]+'
  82. IE_NAME = 'bibeltv:video'
  83. _TESTS = [{
  84. 'url': 'https://www.bibeltv.de/mediathek/videos/344436-alte-wege',
  85. 'md5': 'ec1c07efe54353780512e8a4103b612e',
  86. 'info_dict': {
  87. 'id': '344436',
  88. 'ext': 'mp4',
  89. 'title': 'Alte Wege',
  90. 'description': 'md5:2f4eb7294c9797a47b8fd13cccca22e9',
  91. 'timestamp': 1677877071,
  92. 'duration': 150.0,
  93. 'upload_date': '20230303',
  94. 'thumbnail': r're:https://bibeltv\.imgix\.net/[\w-]+\.jpg',
  95. 'episode': 'Episode 1',
  96. 'episode_number': 1,
  97. 'view_count': int,
  98. 'like_count': int,
  99. },
  100. 'params': {
  101. 'format': '6',
  102. },
  103. }]
  104. def _real_extract(self, url):
  105. crn_id = self._match_id(url)
  106. video_data = traverse_obj(
  107. self._search_nextjs_data(self._download_webpage(url, crn_id), crn_id),
  108. ('props', 'pageProps', 'videoPageData', 'videos', 0, {dict}))
  109. if not video_data:
  110. raise ExtractorError('Missing video data.')
  111. return self._extract_video_info(video_data)
  112. class BibelTVSeriesIE(BibelTVBaseIE):
  113. IE_DESC = 'BibelTV series playlist'
  114. _VALID_URL = r'https?://(?:www\.)?bibeltv\.de/mediathek/serien/(?P<id>\d+)[\w-]+'
  115. IE_NAME = 'bibeltv:series'
  116. _TESTS = [{
  117. 'url': 'https://www.bibeltv.de/mediathek/serien/333485-ein-wunder-fuer-jeden-tag',
  118. 'playlist_mincount': 400,
  119. 'info_dict': {
  120. 'id': '333485',
  121. 'title': 'Ein Wunder für jeden Tag',
  122. 'description': 'Tägliche Kurzandacht mit Déborah Rosenkranz.',
  123. },
  124. }]
  125. def _real_extract(self, url):
  126. crn_id = self._match_id(url)
  127. webpage = self._download_webpage(url, crn_id)
  128. nextjs_data = self._search_nextjs_data(webpage, crn_id)
  129. series_data = traverse_obj(nextjs_data, ('props', 'pageProps', 'seriePageData', {dict}))
  130. if not series_data:
  131. raise ExtractorError('Missing series data.')
  132. return self.playlist_result(
  133. traverse_obj(series_data, ('videos', ..., {dict}, {self._extract_url_info})),
  134. crn_id, series_data.get('title'), clean_html(series_data.get('description')))
  135. class BibelTVLiveIE(BibelTVBaseIE):
  136. IE_DESC = 'BibelTV live program'
  137. _VALID_URL = r'https?://(?:www\.)?bibeltv\.de/livestreams/(?P<id>[\w-]+)'
  138. IE_NAME = 'bibeltv:live'
  139. _TESTS = [{
  140. 'url': 'https://www.bibeltv.de/livestreams/bibeltv/',
  141. 'info_dict': {
  142. 'id': 'bibeltv',
  143. 'ext': 'mp4',
  144. 'title': 're:Bibel TV',
  145. 'live_status': 'is_live',
  146. 'thumbnail': 'https://streampreview.bibeltv.de/bibeltv.webp',
  147. },
  148. 'params': {'skip_download': 'm3u8'},
  149. }, {
  150. 'url': 'https://www.bibeltv.de/livestreams/impuls/',
  151. 'only_matching': True,
  152. }]
  153. def _real_extract(self, url):
  154. stream_id = self._match_id(url)
  155. webpage = self._download_webpage(url, stream_id)
  156. stream_data = self._search_json(
  157. r'\\"video\\":', webpage, 'bibeltvData', stream_id,
  158. transform_source=lambda jstring: js_to_json(jstring.replace('\\"', '"')))
  159. formats, subtitles = self._extract_formats_and_subtitles(
  160. traverse_obj(stream_data, ('src', ...)), stream_id, is_live=True)
  161. return {
  162. 'id': stream_id,
  163. 'title': stream_data.get('title'),
  164. 'thumbnail': stream_data.get('poster'),
  165. 'is_live': True,
  166. 'formats': formats,
  167. 'subtitles': subtitles,
  168. }