opencast.py 7.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183
  1. import re
  2. from .common import InfoExtractor
  3. from ..utils import (
  4. ExtractorError,
  5. determine_ext,
  6. int_or_none,
  7. parse_iso8601,
  8. traverse_obj,
  9. variadic,
  10. )
  11. class OpencastBaseIE(InfoExtractor):
  12. _INSTANCES_RE = r'''(?:
  13. opencast\.informatik\.kit\.edu|
  14. electures\.uni-muenster\.de|
  15. oc-presentation\.ltcc\.tuwien\.ac\.at|
  16. medien\.ph-noe\.ac\.at|
  17. oc-video\.ruhr-uni-bochum\.de|
  18. oc-video1\.ruhr-uni-bochum\.de|
  19. opencast\.informatik\.uni-goettingen\.de|
  20. heicast\.uni-heidelberg\.de|
  21. opencast\.hawk\.de:8080|
  22. opencast\.hs-osnabrueck\.de|
  23. video[0-9]+\.virtuos\.uni-osnabrueck\.de|
  24. opencast\.uni-koeln\.de|
  25. media\.opencast\.hochschule-rhein-waal\.de|
  26. matterhorn\.dce\.harvard\.edu|
  27. hs-harz\.opencast\.uni-halle\.de|
  28. videocampus\.urz\.uni-leipzig\.de|
  29. media\.uct\.ac\.za|
  30. vid\.igb\.illinois\.edu|
  31. cursosabertos\.c3sl\.ufpr\.br|
  32. mcmedia\.missioncollege\.org|
  33. clases\.odon\.edu\.uy
  34. )'''
  35. _UUID_RE = r'[\da-fA-F]{8}-[\da-fA-F]{4}-[\da-fA-F]{4}-[\da-fA-F]{4}-[\da-fA-F]{12}'
  36. def _call_api(self, host, video_id, **kwargs):
  37. return self._download_json(self._API_BASE % (host, video_id), video_id, **kwargs)
  38. def _parse_mediapackage(self, video):
  39. video_id = video.get('id')
  40. if video_id is None:
  41. raise ExtractorError('Video id was not found')
  42. formats = []
  43. for track in variadic(traverse_obj(video, ('media', 'track')) or []):
  44. href = track.get('url')
  45. if href is None:
  46. continue
  47. ext = determine_ext(href, None)
  48. transport = track.get('transport')
  49. if transport == 'DASH' or ext == 'mpd':
  50. formats.extend(self._extract_mpd_formats(href, video_id, mpd_id='dash', fatal=False))
  51. elif transport == 'HLS' or ext == 'm3u8':
  52. formats.extend(self._extract_m3u8_formats(
  53. href, video_id, m3u8_id='hls', entry_protocol='m3u8_native', fatal=False))
  54. elif transport == 'HDS' or ext == 'f4m':
  55. formats.extend(self._extract_f4m_formats(href, video_id, f4m_id='hds', fatal=False))
  56. elif transport == 'SMOOTH':
  57. formats.extend(self._extract_ism_formats(href, video_id, ism_id='smooth', fatal=False))
  58. elif ext == 'smil':
  59. formats.extend(self._extract_smil_formats(href, video_id, fatal=False))
  60. else:
  61. track_obj = {
  62. 'url': href,
  63. 'ext': ext,
  64. 'format_note': track.get('transport'),
  65. 'resolution': traverse_obj(track, ('video', 'resolution')),
  66. 'fps': int_or_none(traverse_obj(track, ('video', 'framerate'))),
  67. 'vbr': int_or_none(traverse_obj(track, ('video', 'bitrate')), scale=1000),
  68. 'vcodec': traverse_obj(track, ('video', 'encoder', 'type')) if track.get('video') else 'none',
  69. 'abr': int_or_none(traverse_obj(track, ('audio', 'bitrate')), scale=1000),
  70. 'asr': int_or_none(traverse_obj(track, ('audio', 'samplingrate'))),
  71. 'acodec': traverse_obj(track, ('audio', 'encoder', 'type')) if track.get('audio') else 'none',
  72. }
  73. if transport == 'RTMP':
  74. m_obj = re.search(r'(?:rtmp://[^/]+/(?P<app>[^/]+))/(?P<ext>.+):(?P<playpath>.+)', href)
  75. if not m_obj:
  76. continue
  77. track_obj.update({
  78. 'app': m_obj.group('app'),
  79. 'ext': m_obj.group('ext'),
  80. 'play_path': m_obj.group('ext') + ':' + m_obj.group('playpath'),
  81. 'rtmp_live': True,
  82. 'preference': -2,
  83. })
  84. formats.append(track_obj)
  85. return {
  86. 'id': video_id,
  87. 'formats': formats,
  88. 'title': video.get('title'),
  89. 'series': video.get('seriestitle'),
  90. 'season_id': video.get('series'),
  91. 'creator': traverse_obj(video, ('creators', 'creator')),
  92. 'timestamp': parse_iso8601(video.get('start')),
  93. 'thumbnail': traverse_obj(video, ('attachments', 'attachment', ..., 'url'), get_all=False),
  94. }
  95. class OpencastIE(OpencastBaseIE):
  96. _VALID_URL = rf'''(?x)
  97. https?://(?P<host>{OpencastBaseIE._INSTANCES_RE})/paella/ui/watch\.html\?
  98. (?:[^#]+&)?id=(?P<id>{OpencastBaseIE._UUID_RE})'''
  99. _API_BASE = 'https://%s/search/episode.json?id=%s'
  100. _TESTS = [
  101. {
  102. 'url': 'https://oc-video1.ruhr-uni-bochum.de/paella/ui/watch.html?id=ed063cd5-72c8-46b5-a60a-569243edcea8',
  103. 'md5': '554c8e99a90f7be7e874619fcf2a3bc9',
  104. 'info_dict': {
  105. 'id': 'ed063cd5-72c8-46b5-a60a-569243edcea8',
  106. 'ext': 'mp4',
  107. 'title': '11 - Kryptographie - 24.11.2015',
  108. 'thumbnail': r're:^https?://.*\.jpg$',
  109. 'timestamp': 1606208400,
  110. 'upload_date': '20201124',
  111. 'season_id': 'cf68a4a1-36b1-4a53-a6ba-61af5705a0d0',
  112. 'series': 'Kryptographie - WiSe 15/16',
  113. 'creator': 'Alexander May',
  114. },
  115. },
  116. ]
  117. def _real_extract(self, url):
  118. host, video_id = self._match_valid_url(url).group('host', 'id')
  119. return self._parse_mediapackage(
  120. self._call_api(host, video_id)['search-results']['result']['mediapackage'])
  121. class OpencastPlaylistIE(OpencastBaseIE):
  122. _VALID_URL = rf'''(?x)
  123. https?://(?P<host>{OpencastBaseIE._INSTANCES_RE})(?:
  124. /engage/ui/index\.html\?(?:[^#]+&)?epFrom=|
  125. /ltitools/index\.html\?(?:[^#]+&)?series=
  126. )(?P<id>{OpencastBaseIE._UUID_RE})'''
  127. _API_BASE = 'https://%s/search/episode.json?sid=%s'
  128. _TESTS = [
  129. {
  130. 'url': 'https://oc-video1.ruhr-uni-bochum.de/engage/ui/index.html?epFrom=cf68a4a1-36b1-4a53-a6ba-61af5705a0d0',
  131. 'info_dict': {
  132. 'id': 'cf68a4a1-36b1-4a53-a6ba-61af5705a0d0',
  133. 'title': 'Kryptographie - WiSe 15/16',
  134. },
  135. 'playlist_mincount': 29,
  136. },
  137. {
  138. 'url': 'https://oc-video1.ruhr-uni-bochum.de/ltitools/index.html?subtool=series&series=cf68a4a1-36b1-4a53-a6ba-61af5705a0d0&lng=de',
  139. 'info_dict': {
  140. 'id': 'cf68a4a1-36b1-4a53-a6ba-61af5705a0d0',
  141. 'title': 'Kryptographie - WiSe 15/16',
  142. },
  143. 'playlist_mincount': 29,
  144. },
  145. {
  146. 'url': 'https://electures.uni-muenster.de/engage/ui/index.html?e=1&p=1&epFrom=39391d10-a711-4d23-b21d-afd2ed7d758c',
  147. 'info_dict': {
  148. 'id': '39391d10-a711-4d23-b21d-afd2ed7d758c',
  149. 'title': '021670 Theologische Themen bei Hans Blumenberg WiSe 2017/18',
  150. },
  151. 'playlist_mincount': 13,
  152. },
  153. ]
  154. def _real_extract(self, url):
  155. host, video_id = self._match_valid_url(url).group('host', 'id')
  156. entries = [
  157. self._parse_mediapackage(episode['mediapackage'])
  158. for episode in variadic(self._call_api(host, video_id)['search-results']['result'])
  159. if episode.get('mediapackage')
  160. ]
  161. return self.playlist_result(entries, video_id, traverse_obj(entries, (0, 'series')))