videocampus_sachsen.py 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253
  1. import functools
  2. import re
  3. from .common import InfoExtractor
  4. from ..networking.exceptions import HTTPError
  5. from ..utils import ExtractorError, OnDemandPagedList, urlencode_postdata
  6. class VideocampusSachsenIE(InfoExtractor):
  7. IE_NAME = 'ViMP'
  8. _INSTANCES = (
  9. 'bergauf.tv',
  10. 'campus.demo.vimp.com',
  11. 'corporate.demo.vimp.com',
  12. 'dancehalldatabase.com',
  13. 'drehzahl.tv',
  14. 'educhannel.hs-gesundheit.de',
  15. 'emedia.ls.haw-hamburg.de',
  16. 'globale-evolution.net',
  17. 'hohu.tv',
  18. 'htvideos.hightechhigh.org',
  19. 'k210039.vimp.mivitec.net',
  20. 'media.cmslegal.com',
  21. 'media.hs-furtwangen.de',
  22. 'media.hwr-berlin.de',
  23. 'mediathek.dkfz.de',
  24. 'mediathek.htw-berlin.de',
  25. 'mediathek.polizei-bw.de',
  26. 'medien.hs-merseburg.de',
  27. 'mportal.europa-uni.de',
  28. 'pacific.demo.vimp.com',
  29. 'slctv.com',
  30. 'streaming.prairiesouth.ca',
  31. 'tube.isbonline.cn',
  32. 'univideo.uni-kassel.de',
  33. 'ursula2.genetics.emory.edu',
  34. 'ursulablicklevideoarchiv.com',
  35. 'v.agrarumweltpaedagogik.at',
  36. 'video.eplay-tv.de',
  37. 'video.fh-dortmund.de',
  38. 'video.hs-offenburg.de',
  39. 'video.hs-pforzheim.de',
  40. 'video.hspv.nrw.de',
  41. 'video.irtshdf.fr',
  42. 'video.pareygo.de',
  43. 'video.tu-freiberg.de',
  44. 'videocampus.sachsen.de',
  45. 'videoportal.uni-freiburg.de',
  46. 'videoportal.vm.uni-freiburg.de',
  47. 'videos.duoc.cl',
  48. 'videos.uni-paderborn.de',
  49. 'vimp-bemus.udk-berlin.de',
  50. 'vimp.aekwl.de',
  51. 'vimp.hs-mittweida.de',
  52. 'vimp.oth-regensburg.de',
  53. 'vimp.ph-heidelberg.de',
  54. 'vimp.sma-events.com',
  55. 'vimp.weka-fachmedien.de',
  56. 'webtv.univ-montp3.fr',
  57. 'www.b-tu.de/media',
  58. 'www.bergauf.tv',
  59. 'www.bigcitytv.de',
  60. 'www.cad-videos.de',
  61. 'www.drehzahl.tv',
  62. 'www.fh-bielefeld.de/medienportal',
  63. 'www.hohu.tv',
  64. 'www.orvovideo.com',
  65. 'www.rwe.tv',
  66. 'www.salzi.tv',
  67. 'www.wenglor-media.com',
  68. 'www2.univ-sba.dz',
  69. )
  70. _VALID_URL = r'''(?x)https?://(?P<host>{})/(?:
  71. m/(?P<tmp_id>[0-9a-f]+)|
  72. (?:category/)?video/(?P<display_id>[\w-]+)/(?P<id>[0-9a-f]{{32}})|
  73. media/embed.*(?:\?|&)key=(?P<embed_id>[0-9a-f]{{32}}&?)
  74. )'''.format('|'.join(map(re.escape, _INSTANCES)))
  75. _TESTS = [
  76. {
  77. 'url': 'https://videocampus.sachsen.de/m/e0d6c8ce6e394c188f1342f1ab7c50ed6fc4490b808699801def5cb2e46d76ca7367f622a9f516c542ffb805b24d6b643bd7c81f385acaac4c59081b87a2767b',
  78. 'info_dict': {
  79. 'id': 'e6b9349905c1628631f175712250f2a1',
  80. 'title': 'Konstruktiver Entwicklungsprozess Vorlesung 7',
  81. 'description': 'Konstruktiver Entwicklungsprozess Vorlesung 7',
  82. 'thumbnail': 'https://videocampus.sachsen.de/cache/1a985379ad3aecba8097a6902c7daa4e.jpg',
  83. 'ext': 'mp4',
  84. },
  85. },
  86. {
  87. 'url': 'https://videocampus.sachsen.de/video/Was-ist-selbstgesteuertes-Lernen/fc99c527e4205b121cb7c74433469262',
  88. 'info_dict': {
  89. 'id': 'fc99c527e4205b121cb7c74433469262',
  90. 'title': 'Was ist selbstgesteuertes Lernen?',
  91. 'description': 'md5:196aa3b0509a526db62f84679522a2f5',
  92. 'thumbnail': 'https://videocampus.sachsen.de/cache/6f4a85096ba24cb398e6ce54446b57ae.jpg',
  93. 'display_id': 'Was-ist-selbstgesteuertes-Lernen',
  94. 'ext': 'mp4',
  95. },
  96. },
  97. {
  98. 'url': 'https://videocampus.sachsen.de/category/video/Tutorial-zur-Nutzung-von-Adobe-Connect-aus-Veranstalter-Sicht/09d4ed029002eb1bdda610f1103dd54c/100',
  99. 'info_dict': {
  100. 'id': '09d4ed029002eb1bdda610f1103dd54c',
  101. 'title': 'Tutorial zur Nutzung von Adobe Connect aus Veranstalter-Sicht',
  102. 'description': 'md5:3d379ca3cc17b9da6784d7f58cca4d58',
  103. 'thumbnail': 'https://videocampus.sachsen.de/cache/2452498fe8c2d5a7dc79a05d30f407b6.jpg',
  104. 'display_id': 'Tutorial-zur-Nutzung-von-Adobe-Connect-aus-Veranstalter-Sicht',
  105. 'ext': 'mp4',
  106. },
  107. },
  108. {
  109. 'url': 'https://www2.univ-sba.dz/video/Presentation-de-la-Faculte-de-droit-et-des-sciences-politiques-Journee-portes-ouvertes-202122/0183356e41af7bfb83d7667b20d9b6a3',
  110. 'info_dict': {
  111. 'url': 'https://www2.univ-sba.dz/getMedium/0183356e41af7bfb83d7667b20d9b6a3.mp4',
  112. 'id': '0183356e41af7bfb83d7667b20d9b6a3',
  113. 'title': 'Présentation de la Faculté de droit et des sciences politiques - Journée portes ouvertes 2021/22',
  114. 'description': 'md5:508958bd93e0ca002ac731d94182a54f',
  115. 'thumbnail': 'https://www2.univ-sba.dz/cache/4d5d4a0b4189271a8cc6cb5328e14769.jpg',
  116. 'display_id': 'Presentation-de-la-Faculte-de-droit-et-des-sciences-politiques-Journee-portes-ouvertes-202122',
  117. 'ext': 'mp4',
  118. },
  119. },
  120. {
  121. 'url': 'https://vimp.weka-fachmedien.de/video/Preisverleihung-Produkte-des-Jahres-2022/c8816f1cc942c12b6cce57c835cffd7c',
  122. 'info_dict': {
  123. 'id': 'c8816f1cc942c12b6cce57c835cffd7c',
  124. 'title': 'Preisverleihung »Produkte des Jahres 2022«',
  125. 'description': 'md5:60c347568ca89aa25b772c4ea564ebd3',
  126. 'thumbnail': 'https://vimp.weka-fachmedien.de/cache/da9f3090e9227b25beacf67ccf94de14.png',
  127. 'display_id': 'Preisverleihung-Produkte-des-Jahres-2022',
  128. 'ext': 'mp4',
  129. },
  130. },
  131. {
  132. 'url': 'https://videocampus.sachsen.de/media/embed?key=fc99c527e4205b121cb7c74433469262',
  133. 'info_dict': {
  134. 'id': 'fc99c527e4205b121cb7c74433469262',
  135. 'title': 'Was ist selbstgesteuertes Lernen?',
  136. 'ext': 'mp4',
  137. },
  138. },
  139. ]
  140. def _real_extract(self, url):
  141. host, video_id, tmp_id, display_id, embed_id = self._match_valid_url(url).group(
  142. 'host', 'id', 'tmp_id', 'display_id', 'embed_id')
  143. webpage = self._download_webpage(url, video_id or tmp_id, fatal=False) or ''
  144. if not video_id:
  145. video_id = embed_id or self._html_search_regex(
  146. rf'src="https?://{host}/media/embed.*(?:\?|&)key=([0-9a-f]+)&?',
  147. webpage, 'video_id')
  148. if not (display_id or tmp_id):
  149. # Title, description from embedded page's meta wouldn't be correct
  150. title = self._html_search_regex(r'<video-js[^>]* data-piwik-title="([^"<]+)"', webpage, 'title', fatal=False)
  151. description = None
  152. thumbnail = None
  153. else:
  154. title = self._html_search_meta(('og:title', 'twitter:title', 'title'), webpage, fatal=False)
  155. description = self._html_search_meta(
  156. ('og:description', 'twitter:description', 'description'), webpage, fatal=False)
  157. thumbnail = self._html_search_meta(('og:image', 'twitter:image'), webpage, fatal=False)
  158. formats, subtitles = [], {}
  159. try:
  160. formats, subtitles = self._extract_m3u8_formats_and_subtitles(
  161. f'https://{host}/media/hlsMedium/key/{video_id}/format/auto/ext/mp4/learning/0/path/m3u8',
  162. video_id, 'mp4', m3u8_id='hls', fatal=True)
  163. except ExtractorError as e:
  164. if not isinstance(e.cause, HTTPError) or e.cause.status not in (404, 500):
  165. raise
  166. formats.append({'url': f'https://{host}/getMedium/{video_id}.mp4'})
  167. return {
  168. 'id': video_id,
  169. 'title': title,
  170. 'description': description,
  171. 'thumbnail': thumbnail,
  172. 'display_id': display_id,
  173. 'formats': formats,
  174. 'subtitles': subtitles,
  175. }
  176. class ViMPPlaylistIE(InfoExtractor):
  177. IE_NAME = 'ViMP:Playlist'
  178. _VALID_URL = r'''(?x)(?P<host>https?://(?:{}))/(?:
  179. album/view/aid/(?P<album_id>[0-9]+)|
  180. (?P<mode>category|channel)/(?P<name>[\w-]+)/(?P<id>[0-9]+)
  181. )'''.format('|'.join(map(re.escape, VideocampusSachsenIE._INSTANCES)))
  182. _TESTS = [{
  183. 'url': 'https://vimp.oth-regensburg.de/channel/Designtheorie-1-SoSe-2020/3',
  184. 'info_dict': {
  185. 'id': 'channel-3',
  186. 'title': 'Designtheorie 1 SoSe 2020 :: Channels :: ViMP OTH Regensburg',
  187. },
  188. 'playlist_mincount': 9,
  189. }, {
  190. 'url': 'https://www.fh-bielefeld.de/medienportal/album/view/aid/208',
  191. 'info_dict': {
  192. 'id': 'album-208',
  193. 'title': 'KG Praktikum ABT/MEC :: Playlists :: FH-Medienportal',
  194. },
  195. 'playlist_mincount': 4,
  196. }, {
  197. 'url': 'https://videocampus.sachsen.de/category/online-tutorials-onyx/91',
  198. 'info_dict': {
  199. 'id': 'category-91',
  200. 'title': 'Online-Seminare ONYX - BPS - Bildungseinrichtungen - VCS',
  201. },
  202. 'playlist_mincount': 7,
  203. }]
  204. _PAGE_SIZE = 10
  205. def _fetch_page(self, host, url_part, playlist_id, data, page):
  206. webpage = self._download_webpage(
  207. f'{host}/media/ajax/component/boxList/{url_part}', playlist_id,
  208. query={'page': page, 'page_only': 1}, data=urlencode_postdata(data))
  209. urls = re.findall(r'"([^"]+/video/[^"]+)"', webpage)
  210. for url in urls:
  211. yield self.url_result(host + url, VideocampusSachsenIE)
  212. def _real_extract(self, url):
  213. host, album_id, mode, name, playlist_id = self._match_valid_url(url).group(
  214. 'host', 'album_id', 'mode', 'name', 'id')
  215. webpage = self._download_webpage(url, album_id or playlist_id, fatal=False) or ''
  216. title = (self._html_search_meta('title', webpage, fatal=False)
  217. or self._html_extract_title(webpage))
  218. url_part = (f'aid/{album_id}' if album_id
  219. else f'category/{name}/category_id/{playlist_id}' if mode == 'category'
  220. else f'title/{name}/channel/{playlist_id}')
  221. mode = mode or 'album'
  222. data = {
  223. 'vars[mode]': mode,
  224. f'vars[{mode}]': album_id or playlist_id,
  225. 'vars[context]': '4' if album_id else '1' if mode == 'category' else '3',
  226. 'vars[context_id]': album_id or playlist_id,
  227. 'vars[layout]': 'thumb',
  228. 'vars[per_page][thumb]': str(self._PAGE_SIZE),
  229. }
  230. return self.playlist_result(
  231. OnDemandPagedList(functools.partial(
  232. self._fetch_page, host, url_part, album_id or playlist_id, data), self._PAGE_SIZE),
  233. playlist_title=title, id=f'{mode}-{album_id or playlist_id}')