ivi.py 9.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253
  1. import json
  2. import re
  3. from .common import InfoExtractor
  4. from ..dependencies import Cryptodome
  5. from ..utils import ExtractorError, int_or_none, qualities
  6. class IviIE(InfoExtractor):
  7. IE_DESC = 'ivi.ru'
  8. IE_NAME = 'ivi'
  9. _VALID_URL = r'https?://(?:www\.)?ivi\.(?:ru|tv)/(?:watch/(?:[^/]+/)?|video/player\?.*?videoId=)(?P<id>\d+)'
  10. _EMBED_REGEX = [r'<embed[^>]+?src=(["\'])(?P<url>https?://(?:www\.)?ivi\.ru/video/player.+?)\1']
  11. _GEO_BYPASS = False
  12. _GEO_COUNTRIES = ['RU']
  13. _LIGHT_KEY = b'\xf1\x02\x32\xb7\xbc\x5c\x7a\xe8\xf7\x96\xc1\x33\x2b\x27\xa1\x8c'
  14. _LIGHT_URL = 'https://api.ivi.ru/light/'
  15. _TESTS = [
  16. # Single movie
  17. {
  18. 'url': 'http://www.ivi.ru/watch/53141',
  19. 'md5': '6ff5be2254e796ed346251d117196cf4',
  20. 'info_dict': {
  21. 'id': '53141',
  22. 'ext': 'mp4',
  23. 'title': 'Иван Васильевич меняет профессию',
  24. 'description': 'md5:b924063ea1677c8fe343d8a72ac2195f',
  25. 'duration': 5498,
  26. 'thumbnail': r're:^https?://.*\.jpg$',
  27. },
  28. 'skip': 'Only works from Russia',
  29. },
  30. # Serial's series
  31. {
  32. 'url': 'http://www.ivi.ru/watch/dvoe_iz_lartsa/9549',
  33. 'md5': '221f56b35e3ed815fde2df71032f4b3e',
  34. 'info_dict': {
  35. 'id': '9549',
  36. 'ext': 'mp4',
  37. 'title': 'Двое из ларца - Дело Гольдберга (1 часть)',
  38. 'series': 'Двое из ларца',
  39. 'season': 'Сезон 1',
  40. 'season_number': 1,
  41. 'episode': 'Дело Гольдберга (1 часть)',
  42. 'episode_number': 1,
  43. 'duration': 2655,
  44. 'thumbnail': r're:^https?://.*\.jpg$',
  45. },
  46. 'skip': 'Only works from Russia',
  47. },
  48. {
  49. # with MP4-HD720 format
  50. 'url': 'http://www.ivi.ru/watch/146500',
  51. 'md5': 'd63d35cdbfa1ea61a5eafec7cc523e1e',
  52. 'info_dict': {
  53. 'id': '146500',
  54. 'ext': 'mp4',
  55. 'title': 'Кукла',
  56. 'description': 'md5:ffca9372399976a2d260a407cc74cce6',
  57. 'duration': 5599,
  58. 'thumbnail': r're:^https?://.*\.jpg$',
  59. },
  60. 'skip': 'Only works from Russia',
  61. },
  62. {
  63. 'url': 'https://www.ivi.tv/watch/33560/',
  64. 'only_matching': True,
  65. },
  66. ]
  67. # Sorted by quality
  68. _KNOWN_FORMATS = (
  69. 'MP4-low-mobile', 'MP4-mobile', 'FLV-lo', 'MP4-lo', 'FLV-hi', 'MP4-hi',
  70. 'MP4-SHQ', 'MP4-HD720', 'MP4-HD1080')
  71. def _real_extract(self, url):
  72. video_id = self._match_id(url)
  73. data = json.dumps({
  74. 'method': 'da.content.get',
  75. 'params': [
  76. video_id, {
  77. 'site': 's%d',
  78. 'referrer': f'http://www.ivi.ru/watch/{video_id}',
  79. 'contentid': video_id,
  80. },
  81. ],
  82. })
  83. for site in (353, 183):
  84. content_data = (data % site).encode()
  85. if site == 353:
  86. if not Cryptodome.CMAC:
  87. continue
  88. timestamp = (self._download_json(
  89. self._LIGHT_URL, video_id,
  90. 'Downloading timestamp JSON', data=json.dumps({
  91. 'method': 'da.timestamp.get',
  92. 'params': [],
  93. }).encode(), fatal=False) or {}).get('result')
  94. if not timestamp:
  95. continue
  96. query = {
  97. 'ts': timestamp,
  98. 'sign': Cryptodome.CMAC.new(self._LIGHT_KEY, timestamp.encode() + content_data,
  99. Cryptodome.Blowfish).hexdigest(),
  100. }
  101. else:
  102. query = {}
  103. video_json = self._download_json(
  104. self._LIGHT_URL, video_id,
  105. 'Downloading video JSON', data=content_data, query=query)
  106. error = video_json.get('error')
  107. if error:
  108. origin = error.get('origin')
  109. message = error.get('message') or error.get('user_message')
  110. extractor_msg = 'Unable to download video %s'
  111. if origin == 'NotAllowedForLocation':
  112. self.raise_geo_restricted(message, self._GEO_COUNTRIES)
  113. elif origin == 'NoRedisValidData':
  114. extractor_msg = 'Video %s does not exist'
  115. elif site == 353:
  116. continue
  117. elif not Cryptodome.CMAC:
  118. raise ExtractorError('pycryptodomex not found. Please install', expected=True)
  119. elif message:
  120. extractor_msg += ': ' + message
  121. raise ExtractorError(extractor_msg % video_id, expected=True)
  122. else:
  123. break
  124. result = video_json['result']
  125. title = result['title']
  126. quality = qualities(self._KNOWN_FORMATS)
  127. formats = []
  128. for f in result.get('files', []):
  129. f_url = f.get('url')
  130. content_format = f.get('content_format')
  131. if not f_url:
  132. continue
  133. if (not self.get_param('allow_unplayable_formats')
  134. and ('-MDRM-' in content_format or '-FPS-' in content_format)):
  135. continue
  136. formats.append({
  137. 'url': f_url,
  138. 'format_id': content_format,
  139. 'quality': quality(content_format),
  140. 'filesize': int_or_none(f.get('size_in_bytes')),
  141. })
  142. compilation = result.get('compilation')
  143. episode = title if compilation else None
  144. title = f'{compilation} - {title}' if compilation is not None else title
  145. thumbnails = [{
  146. 'url': preview['url'],
  147. 'id': preview.get('content_format'),
  148. } for preview in result.get('preview', []) if preview.get('url')]
  149. webpage = self._download_webpage(url, video_id)
  150. season = self._search_regex(
  151. r'<li[^>]+class="season active"[^>]*><a[^>]+>([^<]+)',
  152. webpage, 'season', default=None)
  153. season_number = int_or_none(self._search_regex(
  154. r'<li[^>]+class="season active"[^>]*><a[^>]+data-season(?:-index)?="(\d+)"',
  155. webpage, 'season number', default=None))
  156. episode_number = int_or_none(self._search_regex(
  157. r'[^>]+itemprop="episode"[^>]*>\s*<meta[^>]+itemprop="episodeNumber"[^>]+content="(\d+)',
  158. webpage, 'episode number', default=None))
  159. description = self._og_search_description(webpage, default=None) or self._html_search_meta(
  160. 'description', webpage, 'description', default=None)
  161. return {
  162. 'id': video_id,
  163. 'title': title,
  164. 'series': compilation,
  165. 'season': season,
  166. 'season_number': season_number,
  167. 'episode': episode,
  168. 'episode_number': episode_number,
  169. 'thumbnails': thumbnails,
  170. 'description': description,
  171. 'duration': int_or_none(result.get('duration')),
  172. 'formats': formats,
  173. }
  174. class IviCompilationIE(InfoExtractor):
  175. IE_DESC = 'ivi.ru compilations'
  176. IE_NAME = 'ivi:compilation'
  177. _VALID_URL = r'https?://(?:www\.)?ivi\.ru/watch/(?!\d+)(?P<compilationid>[a-z\d_-]+)(?:/season(?P<seasonid>\d+))?$'
  178. _TESTS = [{
  179. 'url': 'http://www.ivi.ru/watch/dvoe_iz_lartsa',
  180. 'info_dict': {
  181. 'id': 'dvoe_iz_lartsa',
  182. 'title': 'Двое из ларца (2006 - 2008)',
  183. },
  184. 'playlist_mincount': 24,
  185. }, {
  186. 'url': 'http://www.ivi.ru/watch/dvoe_iz_lartsa/season1',
  187. 'info_dict': {
  188. 'id': 'dvoe_iz_lartsa/season1',
  189. 'title': 'Двое из ларца (2006 - 2008) 1 сезон',
  190. },
  191. 'playlist_mincount': 12,
  192. }]
  193. def _extract_entries(self, html, compilation_id):
  194. return [
  195. self.url_result(
  196. f'http://www.ivi.ru/watch/{compilation_id}/{serie}', IviIE.ie_key())
  197. for serie in re.findall(
  198. rf'<a\b[^>]+\bhref=["\']/watch/{compilation_id}/(\d+)["\']', html)]
  199. def _real_extract(self, url):
  200. mobj = self._match_valid_url(url)
  201. compilation_id = mobj.group('compilationid')
  202. season_id = mobj.group('seasonid')
  203. if season_id is not None: # Season link
  204. season_page = self._download_webpage(
  205. url, compilation_id, f'Downloading season {season_id} web page')
  206. playlist_id = f'{compilation_id}/season{season_id}'
  207. playlist_title = self._html_search_meta('title', season_page, 'title')
  208. entries = self._extract_entries(season_page, compilation_id)
  209. else: # Compilation link
  210. compilation_page = self._download_webpage(url, compilation_id, 'Downloading compilation web page')
  211. playlist_id = compilation_id
  212. playlist_title = self._html_search_meta('title', compilation_page, 'title')
  213. seasons = re.findall(
  214. rf'<a href="/watch/{compilation_id}/season(\d+)', compilation_page)
  215. if not seasons: # No seasons in this compilation
  216. entries = self._extract_entries(compilation_page, compilation_id)
  217. else:
  218. entries = []
  219. for season_id in seasons:
  220. season_page = self._download_webpage(
  221. f'http://www.ivi.ru/watch/{compilation_id}/season{season_id}',
  222. compilation_id, f'Downloading season {season_id} web page')
  223. entries.extend(self._extract_entries(season_page, compilation_id))
  224. return self.playlist_result(entries, playlist_id, playlist_title)