ruv.py 6.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186
  1. from .common import InfoExtractor
  2. from ..utils import (
  3. determine_ext,
  4. parse_duration,
  5. traverse_obj,
  6. unified_timestamp,
  7. )
  8. class RuvIE(InfoExtractor):
  9. _VALID_URL = r'https?://(?:www\.)?ruv\.is/(?:sarpurinn/[^/]+|node)/(?P<id>[^/]+(?:/\d+)?)'
  10. _TESTS = [{
  11. # m3u8
  12. 'url': 'http://ruv.is/sarpurinn/ruv-aukaras/fh-valur/20170516',
  13. 'md5': '66347652f4e13e71936817102acc1724',
  14. 'info_dict': {
  15. 'id': '1144499',
  16. 'display_id': 'fh-valur/20170516',
  17. 'ext': 'mp4',
  18. 'title': 'FH - Valur',
  19. 'description': 'Bein útsending frá 3. leik FH og Vals í úrslitum Olísdeildar karla í handbolta.',
  20. 'timestamp': 1494963600,
  21. 'upload_date': '20170516',
  22. },
  23. }, {
  24. # mp3
  25. 'url': 'http://ruv.is/sarpurinn/ras-2/morgunutvarpid/20170619',
  26. 'md5': '395ea250c8a13e5fdb39d4670ef85378',
  27. 'info_dict': {
  28. 'id': '1153630',
  29. 'display_id': 'morgunutvarpid/20170619',
  30. 'ext': 'mp3',
  31. 'title': 'Morgunútvarpið',
  32. 'description': 'md5:a4cf1202c0a1645ca096b06525915418',
  33. 'timestamp': 1497855000,
  34. 'upload_date': '20170619',
  35. },
  36. }, {
  37. 'url': 'http://ruv.is/sarpurinn/ruv/frettir/20170614',
  38. 'only_matching': True,
  39. }, {
  40. 'url': 'http://www.ruv.is/node/1151854',
  41. 'only_matching': True,
  42. }, {
  43. 'url': 'http://ruv.is/sarpurinn/klippa/secret-soltice-hefst-a-morgun',
  44. 'only_matching': True,
  45. }, {
  46. 'url': 'http://ruv.is/sarpurinn/ras-1/morgunvaktin/20170619',
  47. 'only_matching': True,
  48. }]
  49. def _real_extract(self, url):
  50. display_id = self._match_id(url)
  51. webpage = self._download_webpage(url, display_id)
  52. title = self._og_search_title(webpage)
  53. FIELD_RE = r'video\.%s\s*=\s*(["\'])(?P<url>(?:(?!\1).)+)\1'
  54. media_url = self._html_search_regex(
  55. FIELD_RE % 'src', webpage, 'video URL', group='url')
  56. video_id = self._search_regex(
  57. r'<link\b[^>]+\bhref=["\']https?://www\.ruv\.is/node/(\d+)',
  58. webpage, 'video id', default=display_id)
  59. ext = determine_ext(media_url)
  60. if ext == 'm3u8':
  61. formats = self._extract_m3u8_formats(
  62. media_url, video_id, 'mp4', entry_protocol='m3u8_native',
  63. m3u8_id='hls')
  64. elif ext == 'mp3':
  65. formats = [{
  66. 'format_id': 'mp3',
  67. 'url': media_url,
  68. 'vcodec': 'none',
  69. }]
  70. else:
  71. formats = [{
  72. 'url': media_url,
  73. }]
  74. description = self._og_search_description(webpage, default=None)
  75. thumbnail = self._og_search_thumbnail(
  76. webpage, default=None) or self._search_regex(
  77. FIELD_RE % 'poster', webpage, 'thumbnail', fatal=False)
  78. timestamp = unified_timestamp(self._html_search_meta(
  79. 'article:published_time', webpage, 'timestamp', fatal=False))
  80. return {
  81. 'id': video_id,
  82. 'display_id': display_id,
  83. 'title': title,
  84. 'description': description,
  85. 'thumbnail': thumbnail,
  86. 'timestamp': timestamp,
  87. 'formats': formats,
  88. }
  89. class RuvSpilaIE(InfoExtractor):
  90. IE_NAME = 'ruv.is:spila'
  91. _VALID_URL = r'https?://(?:www\.)?ruv\.is/(?:(?:sjon|ut)varp|(?:krakka|ung)ruv)/spila/.+/(?P<series_id>[0-9]+)/(?P<id>[a-z0-9]+)'
  92. _TESTS = [{
  93. 'url': 'https://www.ruv.is/sjonvarp/spila/ithrottir/30657/9jcnd4',
  94. 'info_dict': {
  95. 'id': '9jcnd4',
  96. 'ext': 'mp4',
  97. 'title': '01.02.2022',
  98. 'chapters': 'count:4',
  99. 'timestamp': 1643743500,
  100. 'upload_date': '20220201',
  101. 'thumbnail': 'https://d38kdhuogyllre.cloudfront.net/fit-in/1960x/filters:quality(65)/hd_posters/94boog-iti3jg.jpg',
  102. 'description': 'Íþróttafréttir.',
  103. 'age_limit': 0,
  104. },
  105. }, {
  106. 'url': 'https://www.ruv.is/utvarp/spila/i-ljosi-sogunnar/23795/7hqkre',
  107. 'info_dict': {
  108. 'id': '7hqkre',
  109. 'ext': 'mp3',
  110. 'thumbnail': 'https://d38kdhuogyllre.cloudfront.net/fit-in/1960x/filters:quality(65)/hd_posters/7hqkre-7uepao.jpg',
  111. 'description': 'md5:8d7046549daff35e9a3190dc9901a120',
  112. 'chapters': [],
  113. 'upload_date': '20220204',
  114. 'timestamp': 1643965500,
  115. 'title': 'Nellie Bly II',
  116. 'age_limit': 0,
  117. },
  118. }, {
  119. 'url': 'https://www.ruv.is/ungruv/spila/ungruv/28046/8beuph',
  120. 'only_matching': True,
  121. }, {
  122. 'url': 'https://www.ruv.is/krakkaruv/spila/krakkafrettir/30712/9jbgb0',
  123. 'only_matching': True,
  124. }]
  125. def _real_extract(self, url):
  126. display_id, series_id = self._match_valid_url(url).group('id', 'series_id')
  127. program = self._download_json(
  128. 'https://www.ruv.is/gql/', display_id, query={'query': '''{
  129. Program(id: %s){
  130. title image description short_description
  131. episodes(id: {value: "%s"}) {
  132. rating title duration file image firstrun description
  133. clips {
  134. time text
  135. }
  136. subtitles {
  137. name value
  138. }
  139. }
  140. }
  141. }''' % (series_id, display_id)})['data']['Program'] # noqa: UP031
  142. episode = program['episodes'][0]
  143. subs = {}
  144. for trk in episode.get('subtitles'):
  145. if trk.get('name') and trk.get('value'):
  146. subs.setdefault(trk['name'], []).append({'url': trk['value'], 'ext': 'vtt'})
  147. media_url = episode['file']
  148. if determine_ext(media_url) == 'm3u8':
  149. formats = self._extract_m3u8_formats(media_url, display_id)
  150. else:
  151. formats = [{'url': media_url}]
  152. clips = [
  153. {'start_time': parse_duration(c.get('time')), 'title': c.get('text')}
  154. for c in episode.get('clips') or []]
  155. return {
  156. 'id': display_id,
  157. 'title': traverse_obj(program, ('episodes', 0, 'title'), 'title'),
  158. 'description': traverse_obj(
  159. program, ('episodes', 0, 'description'), 'description', 'short_description',
  160. expected_type=lambda x: x or None),
  161. 'subtitles': subs,
  162. 'thumbnail': episode.get('image', '').replace('$$IMAGESIZE$$', '1960') or None,
  163. 'timestamp': unified_timestamp(episode.get('firstrun')),
  164. 'formats': formats,
  165. 'age_limit': episode.get('rating'),
  166. 'chapters': clips,
  167. }