crackle.py 9.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243
  1. import hashlib
  2. import hmac
  3. import re
  4. import time
  5. from .common import InfoExtractor
  6. from ..networking.exceptions import HTTPError
  7. from ..utils import (
  8. ExtractorError,
  9. determine_ext,
  10. float_or_none,
  11. int_or_none,
  12. orderedSet,
  13. parse_age_limit,
  14. parse_duration,
  15. url_or_none,
  16. )
  17. class CrackleIE(InfoExtractor):
  18. _VALID_URL = r'(?:crackle:|https?://(?:(?:www|m)\.)?(?:sony)?crackle\.com/(?:playlist/\d+/|(?:[^/]+/)+))(?P<id>\d+)'
  19. _TESTS = [{
  20. # Crackle is available in the United States and territories
  21. 'url': 'https://www.crackle.com/thanksgiving/2510064',
  22. 'info_dict': {
  23. 'id': '2510064',
  24. 'ext': 'mp4',
  25. 'title': 'Touch Football',
  26. 'description': 'md5:cfbb513cf5de41e8b56d7ab756cff4df',
  27. 'duration': 1398,
  28. 'view_count': int,
  29. 'average_rating': 0,
  30. 'age_limit': 17,
  31. 'genre': 'Comedy',
  32. 'creator': 'Daniel Powell',
  33. 'artist': 'Chris Elliott, Amy Sedaris',
  34. 'release_year': 2016,
  35. 'series': 'Thanksgiving',
  36. 'episode': 'Touch Football',
  37. 'season_number': 1,
  38. 'episode_number': 1,
  39. },
  40. 'params': {
  41. # m3u8 download
  42. 'skip_download': True,
  43. },
  44. 'expected_warnings': [
  45. 'Trying with a list of known countries',
  46. ],
  47. }, {
  48. 'url': 'https://www.sonycrackle.com/thanksgiving/2510064',
  49. 'only_matching': True,
  50. }]
  51. _MEDIA_FILE_SLOTS = {
  52. '360p.mp4': {
  53. 'width': 640,
  54. 'height': 360,
  55. },
  56. '480p.mp4': {
  57. 'width': 768,
  58. 'height': 432,
  59. },
  60. '480p_1mbps.mp4': {
  61. 'width': 852,
  62. 'height': 480,
  63. },
  64. }
  65. def _download_json(self, url, *args, **kwargs):
  66. # Authorization generation algorithm is reverse engineered from:
  67. # https://www.sonycrackle.com/static/js/main.ea93451f.chunk.js
  68. timestamp = time.strftime('%Y%m%d%H%M', time.gmtime())
  69. h = hmac.new(b'IGSLUQCBDFHEOIFM', '|'.join([url, timestamp]).encode(), hashlib.sha1).hexdigest().upper()
  70. headers = {
  71. 'Accept': 'application/json',
  72. 'Authorization': '|'.join([h, timestamp, '117', '1']),
  73. }
  74. return InfoExtractor._download_json(self, url, *args, headers=headers, **kwargs)
  75. def _real_extract(self, url):
  76. video_id = self._match_id(url)
  77. geo_bypass_country = self.get_param('geo_bypass_country', None)
  78. countries = orderedSet((geo_bypass_country, 'US', 'AU', 'CA', 'AS', 'FM', 'GU', 'MP', 'PR', 'PW', 'MH', 'VI', ''))
  79. num_countries, num = len(countries) - 1, 0
  80. media = {}
  81. for num, country in enumerate(countries):
  82. if num == 1: # start hard-coded list
  83. self.report_warning('%s. Trying with a list of known countries' % (
  84. f'Unable to obtain video formats from {geo_bypass_country} API' if geo_bypass_country
  85. else 'No country code was given using --geo-bypass-country'))
  86. elif num == num_countries: # end of list
  87. geo_info = self._download_json(
  88. 'https://web-api-us.crackle.com/Service.svc/geo/country',
  89. video_id, fatal=False, note='Downloading geo-location information from crackle API',
  90. errnote='Unable to fetch geo-location information from crackle') or {}
  91. country = geo_info.get('CountryCode')
  92. if country is None:
  93. continue
  94. self.to_screen(f'{self.IE_NAME} identified country as {country}')
  95. if country in countries:
  96. self.to_screen(f'Downloading from {country} API was already attempted. Skipping...')
  97. continue
  98. if country is None:
  99. continue
  100. try:
  101. media = self._download_json(
  102. f'https://web-api-us.crackle.com/Service.svc/details/media/{video_id}/{country}?disableProtocols=true',
  103. video_id, note=f'Downloading media JSON from {country} API',
  104. errnote='Unable to download media JSON')
  105. except ExtractorError as e:
  106. # 401 means geo restriction, trying next country
  107. if isinstance(e.cause, HTTPError) and e.cause.status == 401:
  108. continue
  109. raise
  110. status = media.get('status')
  111. if status.get('messageCode') != '0':
  112. raise ExtractorError(
  113. '{} said: {} {} - {}'.format(
  114. self.IE_NAME, status.get('messageCodeDescription'), status.get('messageCode'), status.get('message')),
  115. expected=True)
  116. # Found video formats
  117. if isinstance(media.get('MediaURLs'), list):
  118. break
  119. ignore_no_formats = self.get_param('ignore_no_formats_error')
  120. if not media or (not media.get('MediaURLs') and not ignore_no_formats):
  121. raise ExtractorError(
  122. 'Unable to access the crackle API. Try passing your country code '
  123. 'to --geo-bypass-country. If it still does not work and the '
  124. 'video is available in your country')
  125. title = media['Title']
  126. formats, subtitles = [], {}
  127. has_drm = False
  128. for e in media.get('MediaURLs') or []:
  129. if e.get('UseDRM'):
  130. has_drm = True
  131. format_url = url_or_none(e.get('DRMPath'))
  132. else:
  133. format_url = url_or_none(e.get('Path'))
  134. if not format_url:
  135. continue
  136. ext = determine_ext(format_url)
  137. if ext == 'm3u8':
  138. fmts, subs = self._extract_m3u8_formats_and_subtitles(
  139. format_url, video_id, 'mp4', entry_protocol='m3u8_native',
  140. m3u8_id='hls', fatal=False)
  141. formats.extend(fmts)
  142. subtitles = self._merge_subtitles(subtitles, subs)
  143. elif ext == 'mpd':
  144. fmts, subs = self._extract_mpd_formats_and_subtitles(
  145. format_url, video_id, mpd_id='dash', fatal=False)
  146. formats.extend(fmts)
  147. subtitles = self._merge_subtitles(subtitles, subs)
  148. elif format_url.endswith('.ism/Manifest'):
  149. fmts, subs = self._extract_ism_formats_and_subtitles(
  150. format_url, video_id, ism_id='mss', fatal=False)
  151. formats.extend(fmts)
  152. subtitles = self._merge_subtitles(subtitles, subs)
  153. else:
  154. mfs_path = e.get('Type')
  155. mfs_info = self._MEDIA_FILE_SLOTS.get(mfs_path)
  156. if not mfs_info:
  157. continue
  158. formats.append({
  159. 'url': format_url,
  160. 'format_id': 'http-' + mfs_path.split('.')[0],
  161. 'width': mfs_info['width'],
  162. 'height': mfs_info['height'],
  163. })
  164. if not formats and has_drm:
  165. self.report_drm(video_id)
  166. description = media.get('Description')
  167. duration = int_or_none(media.get(
  168. 'DurationInSeconds')) or parse_duration(media.get('Duration'))
  169. view_count = int_or_none(media.get('CountViews'))
  170. average_rating = float_or_none(media.get('UserRating'))
  171. age_limit = parse_age_limit(media.get('Rating'))
  172. genre = media.get('Genre')
  173. release_year = int_or_none(media.get('ReleaseYear'))
  174. creator = media.get('Directors')
  175. artist = media.get('Cast')
  176. if media.get('MediaTypeDisplayValue') == 'Full Episode':
  177. series = media.get('ShowName')
  178. episode = title
  179. season_number = int_or_none(media.get('Season'))
  180. episode_number = int_or_none(media.get('Episode'))
  181. else:
  182. series = episode = season_number = episode_number = None
  183. cc_files = media.get('ClosedCaptionFiles')
  184. if isinstance(cc_files, list):
  185. for cc_file in cc_files:
  186. if not isinstance(cc_file, dict):
  187. continue
  188. cc_url = url_or_none(cc_file.get('Path'))
  189. if not cc_url:
  190. continue
  191. lang = cc_file.get('Locale') or 'en'
  192. subtitles.setdefault(lang, []).append({'url': cc_url})
  193. thumbnails = []
  194. images = media.get('Images')
  195. if isinstance(images, list):
  196. for image_key, image_url in images.items():
  197. mobj = re.search(r'Img_(\d+)[xX](\d+)', image_key)
  198. if not mobj:
  199. continue
  200. thumbnails.append({
  201. 'url': image_url,
  202. 'width': int(mobj.group(1)),
  203. 'height': int(mobj.group(2)),
  204. })
  205. return {
  206. 'id': video_id,
  207. 'title': title,
  208. 'description': description,
  209. 'duration': duration,
  210. 'view_count': view_count,
  211. 'average_rating': average_rating,
  212. 'age_limit': age_limit,
  213. 'genre': genre,
  214. 'creator': creator,
  215. 'artist': artist,
  216. 'release_year': release_year,
  217. 'series': series,
  218. 'episode': episode,
  219. 'season_number': season_number,
  220. 'episode_number': episode_number,
  221. 'thumbnails': thumbnails,
  222. 'subtitles': subtitles,
  223. 'formats': formats,
  224. }