nuum.py 7.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201
  1. import functools
  2. from .common import InfoExtractor
  3. from ..utils import (
  4. ExtractorError,
  5. OnDemandPagedList,
  6. UserNotLive,
  7. filter_dict,
  8. int_or_none,
  9. parse_iso8601,
  10. str_or_none,
  11. url_or_none,
  12. )
  13. from ..utils.traversal import traverse_obj
  14. class NuumBaseIE(InfoExtractor):
  15. def _call_api(self, path, video_id, description, query={}):
  16. response = self._download_json(
  17. f'https://nuum.ru/api/v2/{path}', video_id, query=query,
  18. note=f'Downloading {description} metadata',
  19. errnote=f'Unable to download {description} metadata')
  20. if error := response.get('error'):
  21. raise ExtractorError(f'API returned error: {error!r}')
  22. return response['result']
  23. def _get_channel_info(self, channel_name):
  24. return self._call_api(
  25. 'broadcasts/public', video_id=channel_name, description='channel',
  26. query={
  27. 'with_extra': 'true',
  28. 'channel_name': channel_name,
  29. 'with_deleted': 'true',
  30. })
  31. def _parse_video_data(self, container, extract_formats=True):
  32. stream = traverse_obj(container, ('media_container_streams', 0, {dict})) or {}
  33. media = traverse_obj(stream, ('stream_media', 0, {dict})) or {}
  34. media_url = traverse_obj(media, (
  35. 'media_meta', ('media_archive_url', 'media_url'), {url_or_none}), get_all=False)
  36. video_id = str(container['media_container_id'])
  37. is_live = media.get('media_status') == 'RUNNING'
  38. formats, subtitles = None, None
  39. headers = {'Referer': 'https://nuum.ru/'}
  40. if extract_formats:
  41. formats, subtitles = self._extract_m3u8_formats_and_subtitles(
  42. media_url, video_id, 'mp4', live=is_live, headers=headers)
  43. return filter_dict({
  44. 'id': video_id,
  45. 'is_live': is_live,
  46. 'formats': formats,
  47. 'subtitles': subtitles,
  48. 'http_headers': headers,
  49. **traverse_obj(container, {
  50. 'title': ('media_container_name', {str}),
  51. 'description': ('media_container_description', {str}),
  52. 'timestamp': ('created_at', {parse_iso8601}),
  53. 'channel': ('media_container_channel', 'channel_name', {str}),
  54. 'channel_id': ('media_container_channel', 'channel_id', {str_or_none}),
  55. }),
  56. **traverse_obj(stream, {
  57. 'view_count': ('stream_total_viewers', {int_or_none}),
  58. 'concurrent_view_count': ('stream_current_viewers', {int_or_none}),
  59. }),
  60. **traverse_obj(media, {
  61. 'duration': ('media_duration', {int_or_none}),
  62. 'thumbnail': ('media_meta', ('media_preview_archive_url', 'media_preview_url'), {url_or_none}),
  63. }, get_all=False),
  64. })
  65. class NuumMediaIE(NuumBaseIE):
  66. IE_NAME = 'nuum:media'
  67. _VALID_URL = r'https?://nuum\.ru/(?:streams|videos|clips)/(?P<id>[\d]+)'
  68. _TESTS = [{
  69. 'url': 'https://nuum.ru/streams/1592713-7-days-to-die',
  70. 'only_matching': True,
  71. }, {
  72. 'url': 'https://nuum.ru/videos/1567547-toxi-hurtz',
  73. 'md5': 'ce28837a5bbffe6952d7bfd3d39811b0',
  74. 'info_dict': {
  75. 'id': '1567547',
  76. 'ext': 'mp4',
  77. 'title': 'Toxi$ - Hurtz',
  78. 'description': '',
  79. 'timestamp': 1702631651,
  80. 'upload_date': '20231215',
  81. 'thumbnail': r're:^https?://.+\.jpg',
  82. 'view_count': int,
  83. 'concurrent_view_count': int,
  84. 'channel_id': '6911',
  85. 'channel': 'toxis',
  86. 'duration': 116,
  87. },
  88. }, {
  89. 'url': 'https://nuum.ru/clips/1552564-pro-misu',
  90. 'md5': 'b248ae1565b1e55433188f11beeb0ca1',
  91. 'info_dict': {
  92. 'id': '1552564',
  93. 'ext': 'mp4',
  94. 'title': 'Про Мису 🙃',
  95. 'timestamp': 1701971828,
  96. 'upload_date': '20231207',
  97. 'thumbnail': r're:^https?://.+\.jpg',
  98. 'view_count': int,
  99. 'concurrent_view_count': int,
  100. 'channel_id': '3320',
  101. 'channel': 'Misalelik',
  102. 'duration': 41,
  103. },
  104. }]
  105. def _real_extract(self, url):
  106. video_id = self._match_id(url)
  107. video_data = self._call_api(f'media-containers/{video_id}', video_id, 'media')
  108. return self._parse_video_data(video_data)
  109. class NuumLiveIE(NuumBaseIE):
  110. IE_NAME = 'nuum:live'
  111. _VALID_URL = r'https?://nuum\.ru/channel/(?P<id>[^/#?]+)/?(?:$|[#?])'
  112. _TESTS = [{
  113. 'url': 'https://nuum.ru/channel/mts_live',
  114. 'only_matching': True,
  115. }]
  116. def _real_extract(self, url):
  117. channel = self._match_id(url)
  118. channel_info = self._get_channel_info(channel)
  119. if traverse_obj(channel_info, ('channel', 'channel_is_live')) is False:
  120. raise UserNotLive(video_id=channel)
  121. info = self._parse_video_data(channel_info['media_container'])
  122. return {
  123. 'webpage_url': f'https://nuum.ru/streams/{info["id"]}',
  124. 'extractor_key': NuumMediaIE.ie_key(),
  125. 'extractor': NuumMediaIE.IE_NAME,
  126. **info,
  127. }
  128. class NuumTabIE(NuumBaseIE):
  129. IE_NAME = 'nuum:tab'
  130. _VALID_URL = r'https?://nuum\.ru/channel/(?P<id>[^/#?]+)/(?P<type>streams|videos|clips)'
  131. _TESTS = [{
  132. 'url': 'https://nuum.ru/channel/dankon_/clips',
  133. 'info_dict': {
  134. 'id': 'dankon__clips',
  135. 'title': 'Dankon_',
  136. },
  137. 'playlist_mincount': 29,
  138. }, {
  139. 'url': 'https://nuum.ru/channel/dankon_/videos',
  140. 'info_dict': {
  141. 'id': 'dankon__videos',
  142. 'title': 'Dankon_',
  143. },
  144. 'playlist_mincount': 2,
  145. }, {
  146. 'url': 'https://nuum.ru/channel/dankon_/streams',
  147. 'info_dict': {
  148. 'id': 'dankon__streams',
  149. 'title': 'Dankon_',
  150. },
  151. 'playlist_mincount': 1,
  152. }]
  153. _PAGE_SIZE = 50
  154. def _fetch_page(self, channel_id, tab_type, tab_id, page):
  155. CONTAINER_TYPES = {
  156. 'clips': ['SHORT_VIDEO', 'REVIEW_VIDEO'],
  157. 'videos': ['LONG_VIDEO'],
  158. 'streams': ['SINGLE'],
  159. }
  160. media_containers = self._call_api(
  161. 'media-containers', video_id=tab_id, description=f'{tab_type} tab page {page + 1}',
  162. query={
  163. 'limit': self._PAGE_SIZE,
  164. 'offset': page * self._PAGE_SIZE,
  165. 'channel_id': channel_id,
  166. 'media_container_status': 'STOPPED',
  167. 'media_container_type': CONTAINER_TYPES[tab_type],
  168. })
  169. for container in traverse_obj(media_containers, (..., {dict})):
  170. metadata = self._parse_video_data(container, extract_formats=False)
  171. yield self.url_result(f'https://nuum.ru/videos/{metadata["id"]}', NuumMediaIE, **metadata)
  172. def _real_extract(self, url):
  173. channel_name, tab_type = self._match_valid_url(url).group('id', 'type')
  174. tab_id = f'{channel_name}_{tab_type}'
  175. channel_data = self._get_channel_info(channel_name)['channel']
  176. return self.playlist_result(OnDemandPagedList(functools.partial(
  177. self._fetch_page, channel_data['channel_id'], tab_type, tab_id), self._PAGE_SIZE),
  178. playlist_id=tab_id, playlist_title=channel_data.get('channel_name'))