globalplayer.py 9.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254
  1. from .common import InfoExtractor
  2. from ..utils import (
  3. clean_html,
  4. join_nonempty,
  5. parse_duration,
  6. str_or_none,
  7. traverse_obj,
  8. unified_strdate,
  9. unified_timestamp,
  10. urlhandle_detect_ext,
  11. )
  12. class GlobalPlayerBaseIE(InfoExtractor):
  13. def _get_page_props(self, url, video_id):
  14. webpage = self._download_webpage(url, video_id)
  15. return self._search_nextjs_data(webpage, video_id)['props']['pageProps']
  16. def _request_ext(self, url, video_id):
  17. return urlhandle_detect_ext(self._request_webpage( # Server rejects HEAD requests
  18. url, video_id, note='Determining source extension'))
  19. def _extract_audio(self, episode, series):
  20. return {
  21. 'vcodec': 'none',
  22. **traverse_obj(series, {
  23. 'series': 'title',
  24. 'series_id': 'id',
  25. 'thumbnail': 'imageUrl',
  26. 'uploader': 'itunesAuthor', # podcasts only
  27. }),
  28. **traverse_obj(episode, {
  29. 'id': 'id',
  30. 'description': ('description', {clean_html}),
  31. 'duration': ('duration', {parse_duration}),
  32. 'thumbnail': 'imageUrl',
  33. 'url': 'streamUrl',
  34. 'timestamp': (('pubDate', 'startDate'), {unified_timestamp}),
  35. 'title': 'title',
  36. }, get_all=False),
  37. }
  38. class GlobalPlayerLiveIE(GlobalPlayerBaseIE):
  39. _VALID_URL = r'https?://www\.globalplayer\.com/live/(?P<id>\w+)/\w+'
  40. _TESTS = [{
  41. 'url': 'https://www.globalplayer.com/live/smoothchill/uk/',
  42. 'info_dict': {
  43. 'id': '2mx1E',
  44. 'ext': 'aac',
  45. 'display_id': 'smoothchill-uk',
  46. 'title': 're:^Smooth Chill.+$',
  47. 'thumbnail': 'https://herald.musicradio.com/media/f296ade8-50c9-4f60-911f-924e96873620.png',
  48. 'description': 'Music To Chill To',
  49. 'live_status': 'is_live',
  50. },
  51. }, {
  52. # national station
  53. 'url': 'https://www.globalplayer.com/live/heart/uk/',
  54. 'info_dict': {
  55. 'id': '2mwx4',
  56. 'ext': 'aac',
  57. 'description': 'turn up the feel good!',
  58. 'thumbnail': 'https://herald.musicradio.com/media/49b9e8cb-15bf-4bf2-8c28-a4850cc6b0f3.png',
  59. 'live_status': 'is_live',
  60. 'title': 're:^Heart UK.+$',
  61. 'display_id': 'heart-uk',
  62. },
  63. }, {
  64. # regional variation
  65. 'url': 'https://www.globalplayer.com/live/heart/london/',
  66. 'info_dict': {
  67. 'id': 'AMqg',
  68. 'ext': 'aac',
  69. 'thumbnail': 'https://herald.musicradio.com/media/49b9e8cb-15bf-4bf2-8c28-a4850cc6b0f3.png',
  70. 'title': 're:^Heart London.+$',
  71. 'live_status': 'is_live',
  72. 'display_id': 'heart-london',
  73. 'description': 'turn up the feel good!',
  74. },
  75. }]
  76. def _real_extract(self, url):
  77. video_id = self._match_id(url)
  78. station = self._get_page_props(url, video_id)['station']
  79. stream_url = station['streamUrl']
  80. return {
  81. 'id': station['id'],
  82. 'display_id': join_nonempty('brandSlug', 'slug', from_dict=station) or station.get('legacyStationPrefix'),
  83. 'url': stream_url,
  84. 'ext': self._request_ext(stream_url, video_id),
  85. 'vcodec': 'none',
  86. 'is_live': True,
  87. **traverse_obj(station, {
  88. 'title': (('name', 'brandName'), {str_or_none}),
  89. 'description': 'tagline',
  90. 'thumbnail': 'brandLogo',
  91. }, get_all=False),
  92. }
  93. class GlobalPlayerLivePlaylistIE(GlobalPlayerBaseIE):
  94. _VALID_URL = r'https?://www\.globalplayer\.com/playlists/(?P<id>\w+)'
  95. _TESTS = [{
  96. # "live playlist"
  97. 'url': 'https://www.globalplayer.com/playlists/8bLk/',
  98. 'info_dict': {
  99. 'id': '8bLk',
  100. 'ext': 'aac',
  101. 'live_status': 'is_live',
  102. 'description': 'md5:e10f5e10b01a7f2c14ba815509fbb38d',
  103. 'thumbnail': 'https://images.globalplayer.com/images/551379?width=450&signature=oMLPZIoi5_dBSHnTMREW0Xg76mA=',
  104. 'title': 're:^Classic FM Hall of Fame.+$',
  105. },
  106. }]
  107. def _real_extract(self, url):
  108. video_id = self._match_id(url)
  109. station = self._get_page_props(url, video_id)['playlistData']
  110. stream_url = station['streamUrl']
  111. return {
  112. 'id': video_id,
  113. 'url': stream_url,
  114. 'ext': self._request_ext(stream_url, video_id),
  115. 'vcodec': 'none',
  116. 'is_live': True,
  117. **traverse_obj(station, {
  118. 'title': 'title',
  119. 'description': 'description',
  120. 'thumbnail': 'image',
  121. }),
  122. }
  123. class GlobalPlayerAudioIE(GlobalPlayerBaseIE):
  124. _VALID_URL = r'https?://www\.globalplayer\.com/(?:(?P<podcast>podcasts)/|catchup/\w+/\w+/)(?P<id>\w+)/?(?:$|[?#])'
  125. _TESTS = [{
  126. # podcast
  127. 'url': 'https://www.globalplayer.com/podcasts/42KuaM/',
  128. 'playlist_mincount': 5,
  129. 'info_dict': {
  130. 'id': '42KuaM',
  131. 'title': 'Filthy Ritual',
  132. 'thumbnail': 'md5:60286e7d12d795bd1bbc9efc6cee643e',
  133. 'categories': ['Society & Culture', 'True Crime'],
  134. 'uploader': 'Global',
  135. 'description': 'md5:da5b918eac9ae319454a10a563afacf9',
  136. },
  137. }, {
  138. # radio catchup
  139. 'url': 'https://www.globalplayer.com/catchup/lbc/uk/46vyD7z/',
  140. 'playlist_mincount': 3,
  141. 'info_dict': {
  142. 'id': '46vyD7z',
  143. 'description': 'Nick Ferrari At Breakfast is Leading Britain\'s Conversation.',
  144. 'title': 'Nick Ferrari',
  145. 'thumbnail': 'md5:4df24d8a226f5b2508efbcc6ae874ebf',
  146. },
  147. }]
  148. def _real_extract(self, url):
  149. video_id, podcast = self._match_valid_url(url).group('id', 'podcast')
  150. props = self._get_page_props(url, video_id)
  151. series = props['podcastInfo'] if podcast else props['catchupInfo']
  152. return {
  153. '_type': 'playlist',
  154. 'id': video_id,
  155. 'entries': [self._extract_audio(ep, series) for ep in traverse_obj(
  156. series, ('episodes', lambda _, v: v['id'] and v['streamUrl']))],
  157. 'categories': traverse_obj(series, ('categories', ..., 'name')) or None,
  158. **traverse_obj(series, {
  159. 'description': 'description',
  160. 'thumbnail': 'imageUrl',
  161. 'title': 'title',
  162. 'uploader': 'itunesAuthor', # podcasts only
  163. }),
  164. }
  165. class GlobalPlayerAudioEpisodeIE(GlobalPlayerBaseIE):
  166. _VALID_URL = r'https?://www\.globalplayer\.com/(?:(?P<podcast>podcasts)|catchup/\w+/\w+)/episodes/(?P<id>\w+)/?(?:$|[?#])'
  167. _TESTS = [{
  168. # podcast
  169. 'url': 'https://www.globalplayer.com/podcasts/episodes/7DrfNnE/',
  170. 'info_dict': {
  171. 'id': '7DrfNnE',
  172. 'ext': 'mp3',
  173. 'title': 'Filthy Ritual - Trailer',
  174. 'description': 'md5:1f1562fd0f01b4773b590984f94223e0',
  175. 'thumbnail': 'md5:60286e7d12d795bd1bbc9efc6cee643e',
  176. 'duration': 225.0,
  177. 'timestamp': 1681254900,
  178. 'series': 'Filthy Ritual',
  179. 'series_id': '42KuaM',
  180. 'upload_date': '20230411',
  181. 'uploader': 'Global',
  182. },
  183. }, {
  184. # radio catchup
  185. 'url': 'https://www.globalplayer.com/catchup/lbc/uk/episodes/2zGq26Vcv1fCWhddC4JAwETXWe/',
  186. 'info_dict': {
  187. 'id': '2zGq26Vcv1fCWhddC4JAwETXWe',
  188. 'ext': 'm4a',
  189. 'timestamp': 1682056800,
  190. 'series': 'Nick Ferrari',
  191. 'thumbnail': 'md5:4df24d8a226f5b2508efbcc6ae874ebf',
  192. 'upload_date': '20230421',
  193. 'series_id': '46vyD7z',
  194. 'description': 'Nick Ferrari At Breakfast is Leading Britain\'s Conversation.',
  195. 'title': 'Nick Ferrari',
  196. 'duration': 10800.0,
  197. },
  198. }]
  199. def _real_extract(self, url):
  200. video_id, podcast = self._match_valid_url(url).group('id', 'podcast')
  201. props = self._get_page_props(url, video_id)
  202. episode = props['podcastEpisode'] if podcast else props['catchupEpisode']
  203. return self._extract_audio(
  204. episode, traverse_obj(episode, 'podcast', 'show', expected_type=dict) or {})
  205. class GlobalPlayerVideoIE(GlobalPlayerBaseIE):
  206. _VALID_URL = r'https?://www\.globalplayer\.com/videos/(?P<id>\w+)'
  207. _TESTS = [{
  208. 'url': 'https://www.globalplayer.com/videos/2JsSZ7Gm2uP/',
  209. 'info_dict': {
  210. 'id': '2JsSZ7Gm2uP',
  211. 'ext': 'mp4',
  212. 'description': 'md5:6a9f063c67c42f218e42eee7d0298bfd',
  213. 'thumbnail': 'md5:d4498af48e15aae4839ce77b97d39550',
  214. 'upload_date': '20230420',
  215. 'title': 'Treble Malakai Bayoh sings a sublime Handel aria at Classic FM Live',
  216. },
  217. }]
  218. def _real_extract(self, url):
  219. video_id = self._match_id(url)
  220. meta = self._get_page_props(url, video_id)['videoData']
  221. return {
  222. 'id': video_id,
  223. **traverse_obj(meta, {
  224. 'url': 'url',
  225. 'thumbnail': ('image', 'url'),
  226. 'title': 'title',
  227. 'upload_date': ('publish_date', {unified_strdate}),
  228. 'description': 'description',
  229. }),
  230. }