tubetugraz.py 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257
  1. from .common import InfoExtractor
  2. from ..utils import (
  3. float_or_none,
  4. parse_resolution,
  5. traverse_obj,
  6. urlencode_postdata,
  7. variadic,
  8. )
  9. class TubeTuGrazBaseIE(InfoExtractor):
  10. _NETRC_MACHINE = 'tubetugraz'
  11. _API_EPISODE = 'https://tube.tugraz.at/search/episode.json'
  12. _FORMAT_TYPES = ('presentation', 'presenter')
  13. def _perform_login(self, username, password):
  14. urlh = self._request_webpage(
  15. 'https://tube.tugraz.at/Shibboleth.sso/Login?target=/paella/ui/index.html',
  16. None, fatal=False, note='downloading login page', errnote='unable to fetch login page')
  17. if not urlh:
  18. return
  19. response = self._download_webpage_handle(
  20. urlh.url, None, fatal=False, headers={'referer': urlh.url},
  21. note='logging in', errnote='unable to log in',
  22. data=urlencode_postdata({
  23. 'lang': 'de',
  24. '_eventId_proceed': '',
  25. 'j_username': username,
  26. 'j_password': password,
  27. }))
  28. if not response:
  29. return
  30. content, urlh = response
  31. if urlh.url == 'https://tube.tugraz.at/paella/ui/index.html':
  32. return
  33. if not self._html_search_regex(
  34. r'<p\b[^>]*>(Bitte geben Sie einen OTP-Wert ein:)</p>',
  35. content, 'TFA prompt', default=None):
  36. self.report_warning('unable to login: incorrect password')
  37. return
  38. urlh = self._request_webpage(
  39. urlh.url, None, fatal=False, headers={'referer': urlh.url},
  40. note='logging in with TFA', errnote='unable to log in with TFA',
  41. data=urlencode_postdata({
  42. 'lang': 'de',
  43. '_eventId_proceed': '',
  44. 'j_tokenNumber': self._get_tfa_info(),
  45. }))
  46. if not urlh or urlh.url == 'https://tube.tugraz.at/paella/ui/index.html':
  47. return
  48. self.report_warning('unable to login: incorrect TFA code')
  49. def _extract_episode(self, episode_info):
  50. video_id = episode_info.get('id')
  51. formats = list(self._extract_formats(
  52. traverse_obj(episode_info, ('mediapackage', 'media', 'track')), video_id))
  53. title = traverse_obj(episode_info, ('mediapackage', 'title'), 'dcTitle')
  54. series_title = traverse_obj(episode_info, ('mediapackage', 'seriestitle'))
  55. creator = ', '.join(variadic(traverse_obj(
  56. episode_info, ('mediapackage', 'creators', 'creator'), 'dcCreator', default='')))
  57. return {
  58. 'id': video_id,
  59. 'title': title,
  60. 'creator': creator or None,
  61. 'duration': traverse_obj(episode_info, ('mediapackage', 'duration'), 'dcExtent'),
  62. 'series': series_title,
  63. 'series_id': traverse_obj(episode_info, ('mediapackage', 'series'), 'dcIsPartOf'),
  64. 'episode': series_title and title,
  65. 'formats': formats,
  66. }
  67. def _set_format_type(self, formats, fmt_type):
  68. for f in formats:
  69. f['format_note'] = fmt_type
  70. if not fmt_type.startswith(self._FORMAT_TYPES[0]):
  71. f['preference'] = -2
  72. return formats
  73. def _extract_formats(self, format_list, video_id):
  74. has_hls, has_dash = False, False
  75. for format_info in format_list or []:
  76. url = traverse_obj(format_info, ('tags', 'url'), 'url')
  77. if url is None:
  78. continue
  79. fmt_type = format_info.get('type') or 'unknown'
  80. transport = (format_info.get('transport') or 'https').lower()
  81. if transport == 'https':
  82. formats = [{
  83. 'url': url,
  84. 'abr': float_or_none(traverse_obj(format_info, ('audio', 'bitrate')), 1000),
  85. 'vbr': float_or_none(traverse_obj(format_info, ('video', 'bitrate')), 1000),
  86. 'fps': traverse_obj(format_info, ('video', 'framerate')),
  87. **parse_resolution(traverse_obj(format_info, ('video', 'resolution'))),
  88. }]
  89. elif transport == 'hls':
  90. has_hls, formats = True, self._extract_m3u8_formats(
  91. url, video_id, 'mp4', fatal=False, note=f'downloading {fmt_type} HLS manifest')
  92. elif transport == 'dash':
  93. has_dash, formats = True, self._extract_mpd_formats(
  94. url, video_id, fatal=False, note=f'downloading {fmt_type} DASH manifest')
  95. else:
  96. # RTMP, HDS, SMOOTH, and unknown formats
  97. # - RTMP url fails on every tested entry until now
  98. # - HDS url 404's on every tested entry until now
  99. # - SMOOTH url 404's on every tested entry until now
  100. continue
  101. yield from self._set_format_type(formats, fmt_type)
  102. # TODO: Add test for these
  103. for fmt_type in self._FORMAT_TYPES:
  104. if not has_hls:
  105. hls_formats = self._extract_m3u8_formats(
  106. f'https://wowza.tugraz.at/matterhorn_engage/smil:engage-player_{video_id}_{fmt_type}.smil/playlist.m3u8',
  107. video_id, 'mp4', fatal=False, note=f'Downloading {fmt_type} HLS manifest', errnote=False) or []
  108. yield from self._set_format_type(hls_formats, fmt_type)
  109. if not has_dash:
  110. dash_formats = self._extract_mpd_formats(
  111. f'https://wowza.tugraz.at/matterhorn_engage/smil:engage-player_{video_id}_{fmt_type}.smil/manifest_mpm4sav_mvlist.mpd',
  112. video_id, fatal=False, note=f'Downloading {fmt_type} DASH manifest', errnote=False)
  113. yield from self._set_format_type(dash_formats, fmt_type)
  114. class TubeTuGrazIE(TubeTuGrazBaseIE):
  115. IE_DESC = 'tube.tugraz.at'
  116. _VALID_URL = r'''(?x)
  117. https?://tube\.tugraz\.at/paella/ui/watch.html\?id=
  118. (?P<id>[0-9a-fA-F]{8}-(?:[0-9a-fA-F]{4}-){3}[0-9a-fA-F]{12})
  119. '''
  120. _TESTS = [
  121. {
  122. 'url': 'https://tube.tugraz.at/paella/ui/watch.html?id=f2634392-e40e-4ac7-9ddc-47764aa23d40',
  123. 'md5': 'a23a3d5c9aaca2b84932fdba66e17145',
  124. 'info_dict': {
  125. 'id': 'f2634392-e40e-4ac7-9ddc-47764aa23d40',
  126. 'ext': 'mp4',
  127. 'title': '#6 (23.11.2017)',
  128. 'episode': '#6 (23.11.2017)',
  129. 'series': '[INB03001UF] Einführung in die strukturierte Programmierung',
  130. 'creator': 'Safran C',
  131. 'duration': 3295818,
  132. 'series_id': 'b1192fff-2aa7-4bf0-a5cf-7b15c3bd3b34',
  133. },
  134. }, {
  135. 'url': 'https://tube.tugraz.at/paella/ui/watch.html?id=2df6d787-e56a-428d-8ef4-d57f07eef238',
  136. 'md5': 'de0d854a56bf7318d2b693fe1adb89a5',
  137. 'info_dict': {
  138. 'id': '2df6d787-e56a-428d-8ef4-d57f07eef238',
  139. 'title': 'TubeTuGraz video #2df6d787-e56a-428d-8ef4-d57f07eef238',
  140. 'ext': 'mp4',
  141. },
  142. 'expected_warnings': ['Extractor failed to obtain "title"'],
  143. },
  144. ]
  145. def _real_extract(self, url):
  146. video_id = self._match_id(url)
  147. episode_data = self._download_json(
  148. self._API_EPISODE, video_id, query={'id': video_id, 'limit': 1}, note='Downloading episode metadata')
  149. episode_info = traverse_obj(episode_data, ('search-results', 'result'), default={'id': video_id})
  150. return self._extract_episode(episode_info)
  151. class TubeTuGrazSeriesIE(TubeTuGrazBaseIE):
  152. _VALID_URL = r'''(?x)
  153. https?://tube\.tugraz\.at/paella/ui/browse\.html\?series=
  154. (?P<id>[0-9a-fA-F]{8}-(?:[0-9a-fA-F]{4}-){3}[0-9a-fA-F]{12})
  155. '''
  156. _TESTS = [{
  157. 'url': 'https://tube.tugraz.at/paella/ui/browse.html?series=0e6351b7-c372-491e-8a49-2c9b7e21c5a6',
  158. 'id': '0e6351b7-c372-491e-8a49-2c9b7e21c5a6',
  159. 'info_dict': {
  160. 'id': '0e6351b7-c372-491e-8a49-2c9b7e21c5a6',
  161. 'title': '[209351] Strassenwesen',
  162. },
  163. 'playlist': [
  164. {
  165. 'info_dict': {
  166. 'id': 'ee17ce5d-34e2-48b7-a76a-fed148614e11',
  167. 'series_id': '0e6351b7-c372-491e-8a49-2c9b7e21c5a6',
  168. 'ext': 'mp4',
  169. 'title': '#4 Detailprojekt',
  170. 'episode': '#4 Detailprojekt',
  171. 'series': '[209351] Strassenwesen',
  172. 'creator': 'Neuhold R',
  173. 'duration': 6127024,
  174. },
  175. },
  176. {
  177. 'info_dict': {
  178. 'id': '87350498-799a-44d3-863f-d1518a98b114',
  179. 'series_id': '0e6351b7-c372-491e-8a49-2c9b7e21c5a6',
  180. 'ext': 'mp4',
  181. 'title': '#3 Generelles Projekt',
  182. 'episode': '#3 Generelles Projekt',
  183. 'series': '[209351] Strassenwesen',
  184. 'creator': 'Neuhold R',
  185. 'duration': 5374422,
  186. },
  187. },
  188. {
  189. 'info_dict': {
  190. 'id': '778599ea-489e-4189-9e05-3b4888e19bcd',
  191. 'series_id': '0e6351b7-c372-491e-8a49-2c9b7e21c5a6',
  192. 'ext': 'mp4',
  193. 'title': '#2 Vorprojekt',
  194. 'episode': '#2 Vorprojekt',
  195. 'series': '[209351] Strassenwesen',
  196. 'creator': 'Neuhold R',
  197. 'duration': 5566404,
  198. },
  199. },
  200. {
  201. 'info_dict': {
  202. 'id': '75e4c71c-d99d-4e56-b0e6-4f2bcdf11f29',
  203. 'series_id': '0e6351b7-c372-491e-8a49-2c9b7e21c5a6',
  204. 'ext': 'mp4',
  205. 'title': '#1 Variantenstudium',
  206. 'episode': '#1 Variantenstudium',
  207. 'series': '[209351] Strassenwesen',
  208. 'creator': 'Neuhold R',
  209. 'duration': 5420200,
  210. },
  211. },
  212. ],
  213. 'min_playlist_count': 4,
  214. }]
  215. def _real_extract(self, url):
  216. playlist_id = self._match_id(url)
  217. episodes_data = self._download_json(
  218. self._API_EPISODE, playlist_id, query={'sid': playlist_id}, note='Downloading episode list')
  219. series_data = self._download_json(
  220. 'https://tube.tugraz.at/series/series.json', playlist_id, fatal=False,
  221. note='downloading series metadata', errnote='failed to download series metadata',
  222. query={
  223. 'seriesId': playlist_id,
  224. 'count': 1,
  225. 'sort': 'TITLE',
  226. })
  227. return self.playlist_result(
  228. map(self._extract_episode, episodes_data['search-results']['result']), playlist_id,
  229. traverse_obj(series_data, ('catalogs', 0, 'http://purl.org/dc/terms/', 'title', 0, 'value')))