vice.py 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311
  1. import functools
  2. import hashlib
  3. import json
  4. import random
  5. import time
  6. from .adobepass import AdobePassIE
  7. from .common import InfoExtractor
  8. from .youtube import YoutubeIE
  9. from ..networking.exceptions import HTTPError
  10. from ..utils import (
  11. ExtractorError,
  12. OnDemandPagedList,
  13. clean_html,
  14. int_or_none,
  15. parse_age_limit,
  16. str_or_none,
  17. try_get,
  18. )
  19. class ViceBaseIE(InfoExtractor):
  20. def _call_api(self, resource, resource_key, resource_id, locale, fields, args=''):
  21. return self._download_json(
  22. 'https://video.vice.com/api/v1/graphql', resource_id, query={
  23. 'query': '''{
  24. %s(locale: "%s", %s: "%s"%s) {
  25. %s
  26. }
  27. }''' % (resource, locale, resource_key, resource_id, args, fields), # noqa: UP031
  28. })['data'][resource]
  29. class ViceIE(ViceBaseIE, AdobePassIE):
  30. IE_NAME = 'vice'
  31. _VALID_URL = r'https?://(?:(?:video|vms)\.vice|(?:www\.)?vice(?:land|tv))\.com/(?P<locale>[^/]+)/(?:video/[^/]+|embed)/(?P<id>[\da-f]{24})'
  32. _EMBED_REGEX = [r'<iframe\b[^>]+\bsrc=["\'](?P<url>(?:https?:)?//video\.vice\.com/[^/]+/embed/[\da-f]{24})']
  33. _TESTS = [{
  34. 'url': 'https://video.vice.com/en_us/video/pet-cremator/58c69e38a55424f1227dc3f7',
  35. 'info_dict': {
  36. 'id': '58c69e38a55424f1227dc3f7',
  37. 'ext': 'mp4',
  38. 'title': '10 Questions You Always Wanted To Ask: Pet Cremator',
  39. 'description': 'md5:fe856caacf61fe0e74fab15ce2b07ca5',
  40. 'uploader': 'vice',
  41. 'uploader_id': '57a204088cb727dec794c67b',
  42. 'timestamp': 1489664942,
  43. 'upload_date': '20170316',
  44. 'age_limit': 14,
  45. },
  46. 'params': {
  47. # m3u8 download
  48. 'skip_download': True,
  49. },
  50. }, {
  51. # geo restricted to US
  52. 'url': 'https://video.vice.com/en_us/video/the-signal-from-tolva/5816510690b70e6c5fd39a56',
  53. 'info_dict': {
  54. 'id': '5816510690b70e6c5fd39a56',
  55. 'ext': 'mp4',
  56. 'uploader': 'vice',
  57. 'title': 'The Signal From Tölva',
  58. 'description': 'md5:3927e3c79f9e8094606a2b3c5b5e55d5',
  59. 'uploader_id': '57a204088cb727dec794c67b',
  60. 'timestamp': 1477941983,
  61. 'upload_date': '20161031',
  62. },
  63. 'params': {
  64. # m3u8 download
  65. 'skip_download': True,
  66. },
  67. }, {
  68. 'url': 'https://video.vice.com/alps/video/ulfs-wien-beruchtigste-grafitti-crew-part-1/581b12b60a0e1f4c0fb6ea2f',
  69. 'info_dict': {
  70. 'id': '581b12b60a0e1f4c0fb6ea2f',
  71. 'ext': 'mp4',
  72. 'title': 'ULFs - Wien berüchtigste Grafitti Crew - Part 1',
  73. 'description': 'Zwischen Hinterzimmer-Tattoos und U-Bahnschächten erzählen uns die Ulfs, wie es ist, "süchtig nach Sachbeschädigung" zu sein.',
  74. 'uploader': 'vice',
  75. 'uploader_id': '57a204088cb727dec794c67b',
  76. 'timestamp': 1485368119,
  77. 'upload_date': '20170125',
  78. 'age_limit': 14,
  79. },
  80. 'params': {
  81. # AES-encrypted m3u8
  82. 'skip_download': True,
  83. },
  84. }, {
  85. 'url': 'https://video.vice.com/en_us/video/pizza-show-trailer/56d8c9a54d286ed92f7f30e4',
  86. 'only_matching': True,
  87. }, {
  88. 'url': 'https://video.vice.com/en_us/embed/57f41d3556a0a80f54726060',
  89. 'only_matching': True,
  90. }, {
  91. 'url': 'https://vms.vice.com/en_us/video/preplay/58c69e38a55424f1227dc3f7',
  92. 'only_matching': True,
  93. }, {
  94. 'url': 'https://www.viceland.com/en_us/video/thursday-march-1-2018/5a8f2d7ff1cdb332dd446ec1',
  95. 'only_matching': True,
  96. }]
  97. def _real_extract(self, url):
  98. locale, video_id = self._match_valid_url(url).groups()
  99. video = self._call_api('videos', 'id', video_id, locale, '''body
  100. locked
  101. rating
  102. thumbnail_url
  103. title''')[0]
  104. title = video['title'].strip()
  105. rating = video.get('rating')
  106. query = {}
  107. if video.get('locked'):
  108. resource = self._get_mvpd_resource(
  109. 'VICELAND', title, video_id, rating)
  110. query['tvetoken'] = self._extract_mvpd_auth(
  111. url, video_id, 'VICELAND', resource)
  112. # signature generation algorithm is reverse engineered from signatureGenerator in
  113. # webpack:///../shared/~/vice-player/dist/js/vice-player.js in
  114. # https://www.viceland.com/assets/common/js/web.vendor.bundle.js
  115. # new JS is located here https://vice-web-statics-cdn.vice.com/vice-player/player-embed.js
  116. exp = int(time.time()) + 1440
  117. query.update({
  118. 'exp': exp,
  119. 'sign': hashlib.sha512(f'{video_id}:GET:{exp}'.encode()).hexdigest(),
  120. 'skipadstitching': 1,
  121. 'platform': 'desktop',
  122. 'rn': random.randint(10000, 100000),
  123. })
  124. try:
  125. preplay = self._download_json(
  126. f'https://vms.vice.com/{locale}/video/preplay/{video_id}',
  127. video_id, query=query)
  128. except ExtractorError as e:
  129. if isinstance(e.cause, HTTPError) and e.cause.status in (400, 401):
  130. error = json.loads(e.cause.response.read().decode())
  131. error_message = error.get('error_description') or error['details']
  132. raise ExtractorError(f'{self.IE_NAME} said: {error_message}', expected=True)
  133. raise
  134. video_data = preplay['video']
  135. formats = self._extract_m3u8_formats(
  136. preplay['playURL'], video_id, 'mp4', 'm3u8_native')
  137. episode = video_data.get('episode') or {}
  138. channel = video_data.get('channel') or {}
  139. season = video_data.get('season') or {}
  140. subtitles = {}
  141. for subtitle in preplay.get('subtitleURLs', []):
  142. cc_url = subtitle.get('url')
  143. if not cc_url:
  144. continue
  145. language_code = try_get(subtitle, lambda x: x['languages'][0]['language_code'], str) or 'en'
  146. subtitles.setdefault(language_code, []).append({
  147. 'url': cc_url,
  148. })
  149. return {
  150. 'formats': formats,
  151. 'id': video_id,
  152. 'title': title,
  153. 'description': clean_html(video.get('body')),
  154. 'thumbnail': video.get('thumbnail_url'),
  155. 'duration': int_or_none(video_data.get('video_duration')),
  156. 'timestamp': int_or_none(video_data.get('created_at'), 1000),
  157. 'age_limit': parse_age_limit(video_data.get('video_rating') or rating),
  158. 'series': try_get(video_data, lambda x: x['show']['base']['display_title'], str),
  159. 'episode_number': int_or_none(episode.get('episode_number')),
  160. 'episode_id': str_or_none(episode.get('id') or video_data.get('episode_id')),
  161. 'season_number': int_or_none(season.get('season_number')),
  162. 'season_id': str_or_none(season.get('id') or video_data.get('season_id')),
  163. 'uploader': channel.get('name'),
  164. 'uploader_id': str_or_none(channel.get('id')),
  165. 'subtitles': subtitles,
  166. }
  167. class ViceShowIE(ViceBaseIE):
  168. IE_NAME = 'vice:show'
  169. _VALID_URL = r'https?://(?:video\.vice|(?:www\.)?vice(?:land|tv))\.com/(?P<locale>[^/]+)/show/(?P<id>[^/?#&]+)'
  170. _PAGE_SIZE = 25
  171. _TESTS = [{
  172. 'url': 'https://video.vice.com/en_us/show/fck-thats-delicious',
  173. 'info_dict': {
  174. 'id': '57a2040c8cb727dec794c901',
  175. 'title': 'F*ck, That’s Delicious',
  176. 'description': 'The life and eating habits of rap’s greatest bon vivant, Action Bronson.',
  177. },
  178. 'playlist_mincount': 64,
  179. }, {
  180. 'url': 'https://www.vicetv.com/en_us/show/fck-thats-delicious',
  181. 'only_matching': True,
  182. }]
  183. def _fetch_page(self, locale, show_id, page):
  184. videos = self._call_api('videos', 'show_id', show_id, locale, '''body
  185. id
  186. url''', f', page: {page + 1}, per_page: {self._PAGE_SIZE}')
  187. for video in videos:
  188. yield self.url_result(
  189. video['url'], ViceIE.ie_key(), video.get('id'))
  190. def _real_extract(self, url):
  191. locale, display_id = self._match_valid_url(url).groups()
  192. show = self._call_api('shows', 'slug', display_id, locale, '''dek
  193. id
  194. title''')[0]
  195. show_id = show['id']
  196. entries = OnDemandPagedList(
  197. functools.partial(self._fetch_page, locale, show_id),
  198. self._PAGE_SIZE)
  199. return self.playlist_result(
  200. entries, show_id, show.get('title'), show.get('dek'))
  201. class ViceArticleIE(ViceBaseIE):
  202. IE_NAME = 'vice:article'
  203. _VALID_URL = r'https?://(?:www\.)?vice\.com/(?P<locale>[^/]+)/article/(?:[0-9a-z]{6}/)?(?P<id>[^?#]+)'
  204. _TESTS = [{
  205. 'url': 'https://www.vice.com/en_us/article/on-set-with-the-woman-making-mormon-porn-in-utah',
  206. 'info_dict': {
  207. 'id': '58dc0a3dee202d2a0ccfcbd8',
  208. 'ext': 'mp4',
  209. 'title': 'Mormon War on Porn',
  210. 'description': 'md5:1c5d91fe25fa8aa304f9def118b92dbf',
  211. 'uploader': 'vice',
  212. 'uploader_id': '57a204088cb727dec794c67b',
  213. 'timestamp': 1491883129,
  214. 'upload_date': '20170411',
  215. 'age_limit': 17,
  216. },
  217. 'params': {
  218. # AES-encrypted m3u8
  219. 'skip_download': True,
  220. },
  221. 'add_ie': [ViceIE.ie_key()],
  222. }, {
  223. 'url': 'https://www.vice.com/en_us/article/how-to-hack-a-car',
  224. 'md5': '13010ee0bc694ea87ec40724397c2349',
  225. 'info_dict': {
  226. 'id': '3jstaBeXgAs',
  227. 'ext': 'mp4',
  228. 'title': 'How to Hack a Car: Phreaked Out (Episode 2)',
  229. 'description': 'md5:ee95453f7ff495db8efe14ae8bf56f30',
  230. 'uploader': 'Motherboard',
  231. 'uploader_id': 'MotherboardTV',
  232. 'upload_date': '20140529',
  233. },
  234. 'add_ie': [YoutubeIE.ie_key()],
  235. }, {
  236. 'url': 'https://www.vice.com/en_us/article/znm9dx/karley-sciortino-slutever-reloaded',
  237. 'md5': 'a7ecf64ee4fa19b916c16f4b56184ae2',
  238. 'info_dict': {
  239. 'id': '57f41d3556a0a80f54726060',
  240. 'ext': 'mp4',
  241. 'title': "Making The World's First Male Sex Doll",
  242. 'description': 'md5:19b00b215b99961cf869c40fbe9df755',
  243. 'uploader': 'vice',
  244. 'uploader_id': '57a204088cb727dec794c67b',
  245. 'timestamp': 1476919911,
  246. 'upload_date': '20161019',
  247. 'age_limit': 17,
  248. },
  249. 'params': {
  250. 'skip_download': True,
  251. },
  252. 'add_ie': [ViceIE.ie_key()],
  253. }, {
  254. 'url': 'https://www.vice.com/en_us/article/cowboy-capitalists-part-1',
  255. 'only_matching': True,
  256. }, {
  257. 'url': 'https://www.vice.com/ru/article/big-night-out-ibiza-clive-martin-229',
  258. 'only_matching': True,
  259. }]
  260. def _real_extract(self, url):
  261. locale, display_id = self._match_valid_url(url).groups()
  262. article = self._call_api('articles', 'slug', display_id, locale, '''body
  263. embed_code''')[0]
  264. body = article['body']
  265. def _url_res(video_url, ie_key):
  266. return {
  267. '_type': 'url_transparent',
  268. 'url': video_url,
  269. 'display_id': display_id,
  270. 'ie_key': ie_key,
  271. }
  272. vice_url = ViceIE._extract_url(body)
  273. if vice_url:
  274. return _url_res(vice_url, ViceIE.ie_key())
  275. youtube_url = YoutubeIE._extract_url(body)
  276. if youtube_url:
  277. return _url_res(youtube_url, YoutubeIE.ie_key())
  278. video_url = self._html_search_regex(
  279. r'data-video-url="([^"]+)"',
  280. article['embed_code'], 'video URL')
  281. return _url_res(video_url, ViceIE.ie_key())