medici.py 6.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153
  1. import urllib.parse
  2. from .common import InfoExtractor
  3. from ..utils import (
  4. filter_dict,
  5. parse_iso8601,
  6. traverse_obj,
  7. try_call,
  8. url_or_none,
  9. )
  10. class MediciIE(InfoExtractor):
  11. _VALID_URL = r'https?://(?:(?P<sub>www|edu)\.)?medici\.tv/[a-z]{2}/[\w.-]+/(?P<id>[^/?#&]+)'
  12. _TESTS = [{
  13. 'url': 'https://www.medici.tv/en/operas/thomas-ades-the-exterminating-angel-calixto-bieito-opera-bastille-paris',
  14. 'md5': 'd483f74e7a7a9eac0dbe152ab189050d',
  15. 'info_dict': {
  16. 'id': '8032',
  17. 'ext': 'mp4',
  18. 'title': 'Thomas Adès\'s The Exterminating Angel',
  19. 'description': 'md5:708ae6350dadc604225b4a6e32482bab',
  20. 'thumbnail': r're:https://.+/.+\.jpg',
  21. 'upload_date': '20240304',
  22. 'timestamp': 1709561766,
  23. 'display_id': 'thomas-ades-the-exterminating-angel-calixto-bieito-opera-bastille-paris',
  24. },
  25. 'expected_warnings': [r'preview'],
  26. }, {
  27. 'url': 'https://edu.medici.tv/en/operas/wagner-lohengrin-paris-opera-kirill-serebrennikov-piotr-beczala-kwangchul-youn-johanni-van-oostrum',
  28. 'md5': '4ef3f4079a6e1c617584463a9eb84f99',
  29. 'info_dict': {
  30. 'id': '7900',
  31. 'ext': 'mp4',
  32. 'title': 'Wagner\'s Lohengrin',
  33. 'description': 'md5:a384a62937866101f86902f21752cd89',
  34. 'thumbnail': r're:https://.+/.+\.jpg',
  35. 'upload_date': '20231017',
  36. 'timestamp': 1697554771,
  37. 'display_id': 'wagner-lohengrin-paris-opera-kirill-serebrennikov-piotr-beczala-kwangchul-youn-johanni-van-oostrum',
  38. },
  39. 'expected_warnings': [r'preview'],
  40. }, {
  41. 'url': 'https://www.medici.tv/en/concerts/sergey-smbatyan-conducts-mansurian-chouchane-siranossian-mario-brunello',
  42. 'md5': '9dd757e53b22b2511e85ea9ea60e4815',
  43. 'info_dict': {
  44. 'id': '5712',
  45. 'ext': 'mp4',
  46. 'title': 'Sergey Smbatyan conducts Tigran Mansurian — With Chouchane Siranossian and Mario Brunello',
  47. 'thumbnail': r're:https://.+/.+\.jpg',
  48. 'description': 'md5:9411fe44c874bb10e9af288c65816e41',
  49. 'upload_date': '20200323',
  50. 'timestamp': 1584975600,
  51. 'display_id': 'sergey-smbatyan-conducts-mansurian-chouchane-siranossian-mario-brunello',
  52. },
  53. 'expected_warnings': [r'preview'],
  54. }, {
  55. 'url': 'https://www.medici.tv/en/ballets/carmen-ballet-choregraphie-de-jiri-bubenicek-teatro-dellopera-di-roma',
  56. 'md5': '40f5e76cb701a97a6d7ba23b62c49990',
  57. 'info_dict': {
  58. 'id': '7857',
  59. 'ext': 'mp4',
  60. 'title': 'Carmen by Jiří Bubeníček after Roland Petit, music by Bizet, de Falla, Castelnuovo-Tedesco, and Bonolis',
  61. 'thumbnail': r're:https://.+/.+\.jpg',
  62. 'description': 'md5:0f15a15611ed748020c769873e10a8bb',
  63. 'upload_date': '20240223',
  64. 'timestamp': 1708707600,
  65. 'display_id': 'carmen-ballet-choregraphie-de-jiri-bubenicek-teatro-dellopera-di-roma',
  66. },
  67. 'expected_warnings': [r'preview'],
  68. }, {
  69. 'url': 'https://www.medici.tv/en/documentaries/la-sonnambula-liege-2023-documentaire',
  70. 'md5': '87ff198018ce79a34757ab0dd6f21080',
  71. 'info_dict': {
  72. 'id': '7513',
  73. 'ext': 'mp4',
  74. 'title': 'La Sonnambula',
  75. 'thumbnail': r're:https://.+/.+\.jpg',
  76. 'description': 'md5:0caf9109a860fd50cd018df062a67f34',
  77. 'upload_date': '20231103',
  78. 'timestamp': 1699010830,
  79. 'display_id': 'la-sonnambula-liege-2023-documentaire',
  80. },
  81. 'expected_warnings': [r'preview'],
  82. }, {
  83. 'url': 'https://edu.medici.tv/en/masterclasses/yvonne-loriod-olivier-messiaen',
  84. 'md5': 'fb5dcec46d76ad20fbdbaabb01da191d',
  85. 'info_dict': {
  86. 'id': '3024',
  87. 'ext': 'mp4',
  88. 'title': 'Olivier Messiaen and Yvonne Loriod, pianists and teachers',
  89. 'thumbnail': r're:https://.+/.+\.jpg',
  90. 'description': 'md5:aab948e2f7690214b5c28896c83f1fc1',
  91. 'upload_date': '20150223',
  92. 'timestamp': 1424706608,
  93. 'display_id': 'yvonne-loriod-olivier-messiaen',
  94. },
  95. 'skip': 'Requires authentication; preview starts in the middle',
  96. }, {
  97. 'url': 'https://www.medici.tv/en/jazz/makaya-mccraven-la-rochelle',
  98. 'md5': '4cc279a8b06609782747c8f50beea2b3',
  99. 'info_dict': {
  100. 'id': '7922',
  101. 'ext': 'mp4',
  102. 'title': 'NEW: Makaya McCraven in La Rochelle',
  103. 'thumbnail': r're:https://.+/.+\.jpg',
  104. 'description': 'md5:b5a8aaeb6993d8ccb18bde8abb8aa8d2',
  105. 'upload_date': '20231228',
  106. 'timestamp': 1703754863,
  107. 'display_id': 'makaya-mccraven-la-rochelle',
  108. },
  109. 'expected_warnings': [r'preview'],
  110. }]
  111. def _real_extract(self, url):
  112. display_id, subdomain = self._match_valid_url(url).group('id', 'sub')
  113. self._request_webpage(url, display_id, 'Requesting CSRF token cookie')
  114. subdomain = 'edu-' if subdomain == 'edu' else ''
  115. origin = f'https://{urllib.parse.urlparse(url).hostname}'
  116. data = self._download_json(
  117. f'https://api.medici.tv/{subdomain}satie/edito/movie-file/{display_id}/', display_id,
  118. headers=filter_dict({
  119. 'Authorization': try_call(
  120. lambda: urllib.parse.unquote(self._get_cookies(url)['auth._token.mAuth'].value)),
  121. 'Device-Type': 'web',
  122. 'Origin': origin,
  123. 'Referer': f'{origin}/',
  124. 'Accept': 'application/json, text/plain, */*',
  125. }))
  126. if not traverse_obj(data, ('video', 'is_full_video')) and traverse_obj(
  127. data, ('video', 'is_limited_by_user_access')):
  128. self.report_warning(
  129. 'The full video is for subscribers only. Only previews will be downloaded. If you '
  130. 'have used the --cookies-from-browser option, try using the --cookies option instead')
  131. formats, subtitles = self._extract_m3u8_formats_and_subtitles(
  132. data['video']['video_url'], display_id, 'mp4')
  133. return {
  134. 'id': str(data['id']),
  135. 'display_id': display_id,
  136. 'formats': formats,
  137. 'subtitles': subtitles,
  138. **traverse_obj(data, {
  139. 'title': ('title', {str}),
  140. 'description': ('subtitle', {str}),
  141. 'thumbnail': ('picture', {url_or_none}),
  142. 'timestamp': ('date_publish', {parse_iso8601}),
  143. }),
  144. }