rtvslo.py 7.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190
  1. import re
  2. from .common import InfoExtractor
  3. from ..utils import (
  4. ExtractorError,
  5. int_or_none,
  6. parse_duration,
  7. traverse_obj,
  8. unified_timestamp,
  9. url_or_none,
  10. urljoin,
  11. )
  12. class RTVSLOIE(InfoExtractor):
  13. IE_NAME = 'rtvslo.si'
  14. _VALID_URL = r'''(?x)
  15. https?://(?:
  16. (?:365|4d)\.rtvslo.si/arhiv/[^/?#&;]+|
  17. (?:www\.)?rtvslo\.si/rtv365/arhiv
  18. )/(?P<id>\d+)'''
  19. _GEO_COUNTRIES = ['SI']
  20. _API_BASE = 'https://api.rtvslo.si/ava/{}/{}?client_id=82013fb3a531d5414f478747c1aca622'
  21. SUB_LANGS_MAP = {'Slovenski': 'sl'}
  22. _TESTS = [{
  23. 'url': 'https://www.rtvslo.si/rtv365/arhiv/174842550?s=tv',
  24. 'info_dict': {
  25. 'id': '174842550',
  26. 'ext': 'mp4',
  27. 'release_timestamp': 1643140032,
  28. 'upload_date': '20220125',
  29. 'series': 'Dnevnik',
  30. 'thumbnail': 'https://img.rtvcdn.si/_up/ava/ava_misc/show_logos/92/dnevnik_3_wide2.jpg',
  31. 'description': 'md5:76a18692757aeb8f0f51221106277dd2',
  32. 'timestamp': 1643137046,
  33. 'title': 'Dnevnik',
  34. 'series_id': '92',
  35. 'release_date': '20220125',
  36. 'duration': 1789,
  37. },
  38. }, {
  39. 'url': 'https://365.rtvslo.si/arhiv/utrip/174843754',
  40. 'info_dict': {
  41. 'id': '174843754',
  42. 'ext': 'mp4',
  43. 'series_id': '94',
  44. 'release_date': '20220129',
  45. 'timestamp': 1643484455,
  46. 'title': 'Utrip',
  47. 'duration': 813,
  48. 'thumbnail': 'https://img.rtvcdn.si/_up/ava/ava_misc/show_logos/94/utrip_1_wide2.jpg',
  49. 'description': 'md5:77f2892630c7b17bb7a5bb84319020c9',
  50. 'release_timestamp': 1643485825,
  51. 'upload_date': '20220129',
  52. 'series': 'Utrip',
  53. },
  54. }, {
  55. 'url': 'https://365.rtvslo.si/arhiv/il-giornale-della-sera/174844609',
  56. 'info_dict': {
  57. 'id': '174844609',
  58. 'ext': 'mp3',
  59. 'series_id': '106615841',
  60. 'title': 'Il giornale della sera',
  61. 'duration': 1328,
  62. 'series': 'Il giornale della sera',
  63. 'timestamp': 1643743800,
  64. 'release_timestamp': 1643745424,
  65. 'thumbnail': 'https://img.rtvcdn.si/_up/ava/ava_misc/show_logos/il-giornale-della-sera_wide2.jpg',
  66. 'upload_date': '20220201',
  67. 'tbr': 128000,
  68. 'release_date': '20220201',
  69. },
  70. }, {
  71. 'url': 'https://365.rtvslo.si/arhiv/razred-zase/148350750',
  72. 'info_dict': {
  73. 'id': '148350750',
  74. 'ext': 'mp4',
  75. 'title': 'Prvi šolski dan, mozaična oddaja za mlade',
  76. 'series': 'Razred zase',
  77. 'series_id': '148185730',
  78. 'duration': 1481,
  79. 'upload_date': '20121019',
  80. 'timestamp': 1350672122,
  81. 'release_date': '20121019',
  82. 'release_timestamp': 1350672122,
  83. 'thumbnail': 'https://img.rtvcdn.si/_up/ava/ava_misc/show_logos/148185730/razred_zase_2014_logo_4d_wide2.jpg',
  84. },
  85. }, {
  86. 'url': 'https://4d.rtvslo.si/arhiv/dnevnik/174842550',
  87. 'only_matching': True,
  88. }]
  89. def _real_extract(self, url):
  90. v_id = self._match_id(url)
  91. meta = self._download_json(self._API_BASE.format('getRecordingDrm', v_id), v_id)['response']
  92. thumbs = [{'id': k, 'url': v, 'http_headers': {'Accept': 'image/jpeg'}}
  93. for k, v in (meta.get('images') or {}).items()]
  94. subs = {}
  95. for s in traverse_obj(meta, 'subs', 'subtitles', default=[]):
  96. lang = self.SUB_LANGS_MAP.get(s.get('language'), s.get('language') or 'und')
  97. subs.setdefault(lang, []).append({
  98. 'url': s.get('file'),
  99. 'ext': traverse_obj(s, 'format', expected_type=str.lower),
  100. })
  101. jwt = meta.get('jwt')
  102. if not jwt:
  103. raise ExtractorError('Site did not provide an authentication token, cannot proceed.')
  104. media = self._download_json(self._API_BASE.format('getMedia', v_id), v_id, query={'jwt': jwt})['response']
  105. formats = []
  106. skip_protocols = ['smil', 'f4m', 'dash']
  107. adaptive_url = traverse_obj(media, ('addaptiveMedia', 'hls_sec'), expected_type=url_or_none)
  108. if adaptive_url:
  109. formats = self._extract_wowza_formats(adaptive_url, v_id, skip_protocols=skip_protocols)
  110. adaptive_url = traverse_obj(media, ('addaptiveMedia_sl', 'hls_sec'), expected_type=url_or_none)
  111. if adaptive_url:
  112. for f in self._extract_wowza_formats(adaptive_url, v_id, skip_protocols=skip_protocols):
  113. formats.append({
  114. **f,
  115. 'format_id': 'sign-' + f['format_id'],
  116. 'format_note': 'Sign language interpretation', 'preference': -10,
  117. 'language': (
  118. 'slv' if f.get('language') == 'eng' and f.get('acodec') != 'none'
  119. else f.get('language')),
  120. })
  121. for mediafile in traverse_obj(media, ('mediaFiles', lambda _, v: url_or_none(v['streams']['https']))):
  122. formats.append(traverse_obj(mediafile, {
  123. 'url': ('streams', 'https'),
  124. 'ext': ('mediaType', {str.lower}),
  125. 'width': ('width', {int_or_none}),
  126. 'height': ('height', {int_or_none}),
  127. 'tbr': ('bitrate', {int_or_none}),
  128. 'filesize': ('filesize', {int_or_none}),
  129. }))
  130. for mediafile in traverse_obj(media, ('mediaFiles', lambda _, v: url_or_none(v['streams']['hls_sec']))):
  131. formats.extend(self._extract_wowza_formats(
  132. mediafile['streams']['hls_sec'], v_id, skip_protocols=skip_protocols))
  133. if any('intermission.mp4' in x['url'] for x in formats):
  134. self.raise_geo_restricted(countries=self._GEO_COUNTRIES, metadata_available=True)
  135. if any('dummy_720p.mp4' in x.get('manifest_url', '') for x in formats) and meta.get('stub') == 'error':
  136. raise ExtractorError(f'{self.IE_NAME} said: Clip not available', expected=True)
  137. return {
  138. 'id': v_id,
  139. 'webpage_url': ''.join(traverse_obj(meta, ('canonical', ('domain', 'path')))),
  140. 'title': meta.get('title'),
  141. 'formats': formats,
  142. 'subtitles': subs,
  143. 'thumbnails': thumbs,
  144. 'description': meta.get('description'),
  145. 'timestamp': unified_timestamp(traverse_obj(meta, 'broadcastDate', ('broadcastDates', 0))),
  146. 'release_timestamp': unified_timestamp(meta.get('recordingDate')),
  147. 'duration': meta.get('duration') or parse_duration(meta.get('length')),
  148. 'tags': meta.get('genre'),
  149. 'series': meta.get('showName'),
  150. 'series_id': meta.get('showId'),
  151. }
  152. class RTVSLOShowIE(InfoExtractor):
  153. IE_NAME = 'rtvslo.si:show'
  154. _VALID_URL = r'https?://(?:365|4d)\.rtvslo.si/oddaja/[^/?#&]+/(?P<id>\d+)'
  155. _TESTS = [{
  156. 'url': 'https://365.rtvslo.si/oddaja/ekipa-bled/173250997',
  157. 'info_dict': {
  158. 'id': '173250997',
  159. 'title': 'Ekipa Bled',
  160. },
  161. 'playlist_count': 18,
  162. }]
  163. def _real_extract(self, url):
  164. playlist_id = self._match_id(url)
  165. webpage = self._download_webpage(url, playlist_id)
  166. return self.playlist_from_matches(
  167. re.findall(r'<a [^>]*\bhref="(/arhiv/[^"]+)"', webpage),
  168. playlist_id, self._html_extract_title(webpage),
  169. getter=lambda x: urljoin('https://365.rtvslo.si', x), ie=RTVSLOIE)