digitalconcerthall.py 7.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162
  1. from .common import InfoExtractor
  2. from ..utils import (
  3. ExtractorError,
  4. try_get,
  5. url_or_none,
  6. urlencode_postdata,
  7. )
  8. from ..utils.traversal import traverse_obj
  9. class DigitalConcertHallIE(InfoExtractor):
  10. IE_DESC = 'DigitalConcertHall extractor'
  11. _VALID_URL = r'https?://(?:www\.)?digitalconcerthall\.com/(?P<language>[a-z]+)/(?P<type>film|concert|work)/(?P<id>[0-9]+)-?(?P<part>[0-9]+)?'
  12. _OAUTH_URL = 'https://api.digitalconcerthall.com/v2/oauth2/token'
  13. _ACCESS_TOKEN = None
  14. _NETRC_MACHINE = 'digitalconcerthall'
  15. _TESTS = [{
  16. 'note': 'Playlist with only one video',
  17. 'url': 'https://www.digitalconcerthall.com/en/concert/53201',
  18. 'info_dict': {
  19. 'id': '53201-1',
  20. 'ext': 'mp4',
  21. 'composer': 'Kurt Weill',
  22. 'title': '[Magic Night]',
  23. 'thumbnail': r're:^https?://images.digitalconcerthall.com/cms/thumbnails.*\.jpg$',
  24. 'upload_date': '20210624',
  25. 'timestamp': 1624548600,
  26. 'duration': 2798,
  27. 'album_artists': ['Members of the Berliner Philharmoniker', 'Simon Rössler'],
  28. 'composers': ['Kurt Weill'],
  29. },
  30. 'params': {'skip_download': 'm3u8'},
  31. }, {
  32. 'note': 'Concert with several works and an interview',
  33. 'url': 'https://www.digitalconcerthall.com/en/concert/53785',
  34. 'info_dict': {
  35. 'id': '53785',
  36. 'album_artists': ['Berliner Philharmoniker', 'Kirill Petrenko'],
  37. 'title': 'Kirill Petrenko conducts Mendelssohn and Shostakovich',
  38. 'thumbnail': r're:^https?://images.digitalconcerthall.com/cms/thumbnails.*\.jpg$',
  39. },
  40. 'params': {'skip_download': 'm3u8'},
  41. 'playlist_count': 3,
  42. }, {
  43. 'url': 'https://www.digitalconcerthall.com/en/film/388',
  44. 'info_dict': {
  45. 'id': '388',
  46. 'ext': 'mp4',
  47. 'title': 'The Berliner Philharmoniker and Frank Peter Zimmermann',
  48. 'description': 'md5:cfe25a7044fa4be13743e5089b5b5eb2',
  49. 'thumbnail': r're:^https?://images.digitalconcerthall.com/cms/thumbnails.*\.jpg$',
  50. 'upload_date': '20220714',
  51. 'timestamp': 1657785600,
  52. 'album_artists': ['Frank Peter Zimmermann', 'Benedikt von Bernstorff', 'Jakob von Bernstorff'],
  53. },
  54. 'params': {'skip_download': 'm3u8'},
  55. }, {
  56. 'note': 'Concert with several works and an interview',
  57. 'url': 'https://www.digitalconcerthall.com/en/work/53785-1',
  58. 'info_dict': {
  59. 'id': '53785',
  60. 'album_artists': ['Berliner Philharmoniker', 'Kirill Petrenko'],
  61. 'title': 'Kirill Petrenko conducts Mendelssohn and Shostakovich',
  62. 'thumbnail': r're:^https?://images.digitalconcerthall.com/cms/thumbnails.*\.jpg$',
  63. },
  64. 'params': {'skip_download': 'm3u8'},
  65. 'playlist_count': 1,
  66. }]
  67. def _perform_login(self, username, password):
  68. token_response = self._download_json(
  69. self._OAUTH_URL,
  70. None, 'Obtaining token', errnote='Unable to obtain token', data=urlencode_postdata({
  71. 'affiliate': 'none',
  72. 'grant_type': 'device',
  73. 'device_vendor': 'unknown',
  74. 'app_id': 'dch.webapp',
  75. 'app_version': '1.0.0',
  76. 'client_secret': '2ySLN+2Fwb',
  77. }), headers={
  78. 'Content-Type': 'application/x-www-form-urlencoded',
  79. })
  80. self._ACCESS_TOKEN = token_response['access_token']
  81. try:
  82. self._download_json(
  83. self._OAUTH_URL,
  84. None, note='Logging in', errnote='Unable to login', data=urlencode_postdata({
  85. 'grant_type': 'password',
  86. 'username': username,
  87. 'password': password,
  88. }), headers={
  89. 'Content-Type': 'application/x-www-form-urlencoded',
  90. 'Referer': 'https://www.digitalconcerthall.com',
  91. 'Authorization': f'Bearer {self._ACCESS_TOKEN}',
  92. })
  93. except ExtractorError:
  94. self.raise_login_required(msg='Login info incorrect')
  95. def _real_initialize(self):
  96. if not self._ACCESS_TOKEN:
  97. self.raise_login_required(method='password')
  98. def _entries(self, items, language, type_, **kwargs):
  99. for item in items:
  100. video_id = item['id']
  101. stream_info = self._download_json(
  102. self._proto_relative_url(item['_links']['streams']['href']), video_id, headers={
  103. 'Accept': 'application/json',
  104. 'Authorization': f'Bearer {self._ACCESS_TOKEN}',
  105. 'Accept-Language': language,
  106. })
  107. formats = []
  108. for m3u8_url in traverse_obj(stream_info, ('channel', ..., 'stream', ..., 'url', {url_or_none})):
  109. formats.extend(self._extract_m3u8_formats(m3u8_url, video_id, 'mp4', fatal=False))
  110. yield {
  111. 'id': video_id,
  112. 'title': item.get('title'),
  113. 'composer': item.get('name_composer'),
  114. 'formats': formats,
  115. 'duration': item.get('duration_total'),
  116. 'timestamp': traverse_obj(item, ('date', 'published')),
  117. 'description': item.get('short_description') or stream_info.get('short_description'),
  118. **kwargs,
  119. 'chapters': [{
  120. 'start_time': chapter.get('time'),
  121. 'end_time': try_get(chapter, lambda x: x['time'] + x['duration']),
  122. 'title': chapter.get('text'),
  123. } for chapter in item['cuepoints']] if item.get('cuepoints') and type_ == 'concert' else None,
  124. }
  125. def _real_extract(self, url):
  126. language, type_, video_id, part = self._match_valid_url(url).group('language', 'type', 'id', 'part')
  127. if not language:
  128. language = 'en'
  129. api_type = 'concert' if type_ == 'work' else type_
  130. vid_info = self._download_json(
  131. f'https://api.digitalconcerthall.com/v2/{api_type}/{video_id}', video_id, headers={
  132. 'Accept': 'application/json',
  133. 'Accept-Language': language,
  134. })
  135. album_artists = traverse_obj(vid_info, ('_links', 'artist', ..., 'name'))
  136. videos = [vid_info] if type_ == 'film' else traverse_obj(vid_info, ('_embedded', ..., ...))
  137. if type_ == 'work':
  138. videos = [videos[int(part) - 1]]
  139. thumbnail = traverse_obj(vid_info, (
  140. 'image', ..., {self._proto_relative_url}, {url_or_none},
  141. {lambda x: x.format(width=0, height=0)}, any)) # NB: 0x0 is the original size
  142. return {
  143. '_type': 'playlist',
  144. 'id': video_id,
  145. 'title': vid_info.get('title'),
  146. 'entries': self._entries(
  147. videos, language, type_, thumbnail=thumbnail, album_artists=album_artists),
  148. 'thumbnail': thumbnail,
  149. 'album_artists': album_artists,
  150. }