s4c.py 4.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103
  1. from .common import InfoExtractor
  2. from ..utils import traverse_obj, url_or_none
  3. class S4CIE(InfoExtractor):
  4. _VALID_URL = r'https?://(?:www\.)?s4c\.cymru/clic/programme/(?P<id>\d+)'
  5. _TESTS = [{
  6. 'url': 'https://www.s4c.cymru/clic/programme/861362209',
  7. 'info_dict': {
  8. 'id': '861362209',
  9. 'ext': 'mp4',
  10. 'title': 'Y Swn',
  11. 'description': 'md5:f7681a30e4955b250b3224aa9fe70cf0',
  12. 'duration': 5340,
  13. 'thumbnail': 'https://www.s4c.cymru/amg/1920x1080/Y_Swn_2023S4C_099_ii.jpg',
  14. },
  15. }, {
  16. 'url': 'https://www.s4c.cymru/clic/programme/856636948',
  17. 'info_dict': {
  18. 'id': '856636948',
  19. 'ext': 'mp4',
  20. 'title': 'Am Dro',
  21. 'duration': 2880,
  22. 'description': 'md5:100d8686fc9a632a0cb2db52a3433ffe',
  23. 'thumbnail': 'https://www.s4c.cymru/amg/1920x1080/Am_Dro_2022-23S4C_P6_4005.jpg',
  24. },
  25. }]
  26. def _real_extract(self, url):
  27. video_id = self._match_id(url)
  28. details = self._download_json(
  29. f'https://www.s4c.cymru/df/full_prog_details?lang=e&programme_id={video_id}',
  30. video_id, fatal=False)
  31. player_config = self._download_json(
  32. 'https://player-api.s4c-cdn.co.uk/player-configuration/prod', video_id, query={
  33. 'programme_id': video_id,
  34. 'signed': '0',
  35. 'lang': 'en',
  36. 'mode': 'od',
  37. 'appId': 'clic',
  38. 'streamName': '',
  39. }, note='Downloading player config JSON')
  40. subtitles = {}
  41. for sub in traverse_obj(player_config, ('subtitles', lambda _, v: url_or_none(v['0']))):
  42. subtitles.setdefault(sub.get('3', 'en'), []).append({
  43. 'url': sub['0'],
  44. 'name': sub.get('1'),
  45. })
  46. m3u8_url = self._download_json(
  47. 'https://player-api.s4c-cdn.co.uk/streaming-urls/prod', video_id, query={
  48. 'mode': 'od',
  49. 'application': 'clic',
  50. 'region': 'WW',
  51. 'extra': 'false',
  52. 'thirdParty': 'false',
  53. 'filename': player_config['filename'],
  54. }, note='Downloading streaming urls JSON')['hls']
  55. return {
  56. 'id': video_id,
  57. 'formats': self._extract_m3u8_formats(m3u8_url, video_id, 'mp4', m3u8_id='hls'),
  58. 'subtitles': subtitles,
  59. 'thumbnail': url_or_none(player_config.get('poster')),
  60. **traverse_obj(details, ('full_prog_details', 0, {
  61. 'title': (('programme_title', 'series_title'), {str}),
  62. 'description': ('full_billing', {str.strip}),
  63. 'duration': ('duration', {lambda x: int(x) * 60}),
  64. }), get_all=False),
  65. }
  66. class S4CSeriesIE(InfoExtractor):
  67. _VALID_URL = r'https?://(?:www\.)?s4c\.cymru/clic/series/(?P<id>\d+)'
  68. _TESTS = [{
  69. 'url': 'https://www.s4c.cymru/clic/series/864982911',
  70. 'playlist_mincount': 6,
  71. 'info_dict': {
  72. 'id': '864982911',
  73. 'title': 'Iaith ar Daith',
  74. },
  75. }, {
  76. 'url': 'https://www.s4c.cymru/clic/series/866852587',
  77. 'playlist_mincount': 8,
  78. 'info_dict': {
  79. 'id': '866852587',
  80. 'title': 'FFIT Cymru',
  81. },
  82. }]
  83. def _real_extract(self, url):
  84. series_id = self._match_id(url)
  85. series_details = self._download_json(
  86. 'https://www.s4c.cymru/df/series_details', series_id, query={
  87. 'lang': 'e',
  88. 'series_id': series_id,
  89. 'show_prog_in_series': 'Y',
  90. }, note='Downloading series details JSON')
  91. return self.playlist_result(
  92. [self.url_result(f'https://www.s4c.cymru/clic/programme/{episode_id}', S4CIE, episode_id)
  93. for episode_id in traverse_obj(series_details, ('other_progs_in_series', ..., 'id'))],
  94. series_id, traverse_obj(series_details, ('full_prog_details', 0, 'series_title', {str})))