vimeo.py 62 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467
  1. import base64
  2. import functools
  3. import itertools
  4. import re
  5. import urllib.parse
  6. from .common import InfoExtractor
  7. from ..networking import HEADRequest, Request
  8. from ..networking.exceptions import HTTPError
  9. from ..utils import (
  10. ExtractorError,
  11. OnDemandPagedList,
  12. clean_html,
  13. determine_ext,
  14. get_element_by_class,
  15. int_or_none,
  16. js_to_json,
  17. merge_dicts,
  18. parse_filesize,
  19. parse_iso8601,
  20. parse_qs,
  21. smuggle_url,
  22. str_or_none,
  23. traverse_obj,
  24. try_get,
  25. unified_timestamp,
  26. unsmuggle_url,
  27. urlencode_postdata,
  28. urlhandle_detect_ext,
  29. urljoin,
  30. )
  31. class VimeoBaseInfoExtractor(InfoExtractor):
  32. _NETRC_MACHINE = 'vimeo'
  33. _LOGIN_REQUIRED = False
  34. _LOGIN_URL = 'https://vimeo.com/log_in'
  35. @staticmethod
  36. def _smuggle_referrer(url, referrer_url):
  37. return smuggle_url(url, {'referer': referrer_url})
  38. def _unsmuggle_headers(self, url):
  39. """@returns (url, smuggled_data, headers)"""
  40. url, data = unsmuggle_url(url, {})
  41. headers = self.get_param('http_headers').copy()
  42. if 'referer' in data:
  43. headers['Referer'] = data['referer']
  44. return url, data, headers
  45. def _perform_login(self, username, password):
  46. viewer = self._download_json('https://vimeo.com/_next/viewer', None, 'Downloading login token')
  47. data = {
  48. 'action': 'login',
  49. 'email': username,
  50. 'password': password,
  51. 'service': 'vimeo',
  52. 'token': viewer['xsrft'],
  53. }
  54. self._set_vimeo_cookie('vuid', viewer['vuid'])
  55. try:
  56. self._download_webpage(
  57. self._LOGIN_URL, None, 'Logging in',
  58. data=urlencode_postdata(data), headers={
  59. 'Content-Type': 'application/x-www-form-urlencoded',
  60. 'Referer': self._LOGIN_URL,
  61. })
  62. except ExtractorError as e:
  63. if isinstance(e.cause, HTTPError) and e.cause.status == 418:
  64. raise ExtractorError(
  65. 'Unable to log in: bad username or password',
  66. expected=True)
  67. raise ExtractorError('Unable to log in')
  68. def _real_initialize(self):
  69. if self._LOGIN_REQUIRED and not self._get_cookies('https://vimeo.com').get('vuid'):
  70. self._raise_login_required()
  71. def _get_video_password(self):
  72. password = self.get_param('videopassword')
  73. if password is None:
  74. raise ExtractorError(
  75. 'This video is protected by a password, use the --video-password option',
  76. expected=True)
  77. return password
  78. def _verify_video_password(self, url, video_id, password, token, vuid):
  79. if url.startswith('http://'):
  80. # vimeo only supports https now, but the user can give an http url
  81. url = url.replace('http://', 'https://')
  82. self._set_vimeo_cookie('vuid', vuid)
  83. return self._download_webpage(
  84. url + '/password', video_id, 'Verifying the password',
  85. 'Wrong password', data=urlencode_postdata({
  86. 'password': password,
  87. 'token': token,
  88. }), headers={
  89. 'Content-Type': 'application/x-www-form-urlencoded',
  90. 'Referer': url,
  91. })
  92. def _extract_xsrft_and_vuid(self, webpage):
  93. xsrft = self._search_regex(
  94. r'(?:(?P<q1>["\'])xsrft(?P=q1)\s*:|xsrft\s*[=:])\s*(?P<q>["\'])(?P<xsrft>.+?)(?P=q)',
  95. webpage, 'login token', group='xsrft')
  96. vuid = self._search_regex(
  97. r'["\']vuid["\']\s*:\s*(["\'])(?P<vuid>.+?)\1',
  98. webpage, 'vuid', group='vuid')
  99. return xsrft, vuid
  100. def _extract_vimeo_config(self, webpage, video_id, *args, **kwargs):
  101. vimeo_config = self._search_regex(
  102. r'vimeo\.config\s*=\s*(?:({.+?})|_extend\([^,]+,\s+({.+?})\));',
  103. webpage, 'vimeo config', *args, **kwargs)
  104. if vimeo_config:
  105. return self._parse_json(vimeo_config, video_id)
  106. def _set_vimeo_cookie(self, name, value):
  107. self._set_cookie('vimeo.com', name, value)
  108. def _parse_config(self, config, video_id):
  109. video_data = config['video']
  110. video_title = video_data.get('title')
  111. live_event = video_data.get('live_event') or {}
  112. live_status = {
  113. 'pending': 'is_upcoming',
  114. 'active': 'is_upcoming',
  115. 'started': 'is_live',
  116. 'ended': 'post_live',
  117. }.get(live_event.get('status'))
  118. is_live = live_status == 'is_live'
  119. request = config.get('request') or {}
  120. formats = []
  121. subtitles = {}
  122. config_files = video_data.get('files') or request.get('files') or {}
  123. for f in (config_files.get('progressive') or []):
  124. video_url = f.get('url')
  125. if not video_url:
  126. continue
  127. formats.append({
  128. 'url': video_url,
  129. 'format_id': 'http-{}'.format(f.get('quality')),
  130. 'source_preference': 10,
  131. 'width': int_or_none(f.get('width')),
  132. 'height': int_or_none(f.get('height')),
  133. 'fps': int_or_none(f.get('fps')),
  134. 'tbr': int_or_none(f.get('bitrate')),
  135. })
  136. # TODO: fix handling of 308 status code returned for live archive manifest requests
  137. sep_pattern = r'/sep/video/'
  138. for files_type in ('hls', 'dash'):
  139. for cdn_name, cdn_data in (try_get(config_files, lambda x: x[files_type]['cdns']) or {}).items():
  140. manifest_url = cdn_data.get('url')
  141. if not manifest_url:
  142. continue
  143. format_id = f'{files_type}-{cdn_name}'
  144. sep_manifest_urls = []
  145. if re.search(sep_pattern, manifest_url):
  146. for suffix, repl in (('', 'video'), ('_sep', 'sep/video')):
  147. sep_manifest_urls.append((format_id + suffix, re.sub(
  148. sep_pattern, f'/{repl}/', manifest_url)))
  149. else:
  150. sep_manifest_urls = [(format_id, manifest_url)]
  151. for f_id, m_url in sep_manifest_urls:
  152. if files_type == 'hls':
  153. fmts, subs = self._extract_m3u8_formats_and_subtitles(
  154. m_url, video_id, 'mp4', live=is_live, m3u8_id=f_id,
  155. note=f'Downloading {cdn_name} m3u8 information',
  156. fatal=False)
  157. formats.extend(fmts)
  158. self._merge_subtitles(subs, target=subtitles)
  159. elif files_type == 'dash':
  160. if 'json=1' in m_url:
  161. real_m_url = (self._download_json(m_url, video_id, fatal=False) or {}).get('url')
  162. if real_m_url:
  163. m_url = real_m_url
  164. fmts, subs = self._extract_mpd_formats_and_subtitles(
  165. m_url.replace('/master.json', '/master.mpd'), video_id, f_id,
  166. f'Downloading {cdn_name} MPD information',
  167. fatal=False)
  168. formats.extend(fmts)
  169. self._merge_subtitles(subs, target=subtitles)
  170. live_archive = live_event.get('archive') or {}
  171. live_archive_source_url = live_archive.get('source_url')
  172. if live_archive_source_url and live_archive.get('status') == 'done':
  173. formats.append({
  174. 'format_id': 'live-archive-source',
  175. 'url': live_archive_source_url,
  176. 'quality': 10,
  177. })
  178. for tt in (request.get('text_tracks') or []):
  179. subtitles.setdefault(tt['lang'], []).append({
  180. 'ext': 'vtt',
  181. 'url': urljoin('https://vimeo.com', tt['url']),
  182. })
  183. thumbnails = []
  184. if not is_live:
  185. for key, thumb in (video_data.get('thumbs') or {}).items():
  186. thumbnails.append({
  187. 'id': key,
  188. 'width': int_or_none(key),
  189. 'url': thumb,
  190. })
  191. thumbnail = video_data.get('thumbnail')
  192. if thumbnail:
  193. thumbnails.append({
  194. 'url': thumbnail,
  195. })
  196. owner = video_data.get('owner') or {}
  197. video_uploader_url = owner.get('url')
  198. duration = int_or_none(video_data.get('duration'))
  199. chapter_data = try_get(config, lambda x: x['embed']['chapters']) or []
  200. chapters = [{
  201. 'title': current_chapter.get('title'),
  202. 'start_time': current_chapter.get('timecode'),
  203. 'end_time': next_chapter.get('timecode'),
  204. } for current_chapter, next_chapter in zip(chapter_data, chapter_data[1:] + [{'timecode': duration}])]
  205. if chapters and chapters[0]['start_time']: # Chapters may not start from 0
  206. chapters[:0] = [{'title': '<Untitled>', 'start_time': 0, 'end_time': chapters[0]['start_time']}]
  207. return {
  208. 'id': str_or_none(video_data.get('id')) or video_id,
  209. 'title': video_title,
  210. 'uploader': owner.get('name'),
  211. 'uploader_id': video_uploader_url.split('/')[-1] if video_uploader_url else None,
  212. 'uploader_url': video_uploader_url,
  213. 'thumbnails': thumbnails,
  214. 'duration': duration,
  215. 'chapters': chapters or None,
  216. 'formats': formats,
  217. 'subtitles': subtitles,
  218. 'live_status': live_status,
  219. 'release_timestamp': traverse_obj(live_event, ('ingest', 'scheduled_start_time', {parse_iso8601})),
  220. # Note: Bitrates are completely broken. Single m3u8 may contain entries in kbps and bps
  221. # at the same time without actual units specified.
  222. '_format_sort_fields': ('quality', 'res', 'fps', 'hdr:12', 'source'),
  223. }
  224. def _extract_original_format(self, url, video_id, unlisted_hash=None):
  225. query = {'action': 'load_download_config'}
  226. if unlisted_hash:
  227. query['unlisted_hash'] = unlisted_hash
  228. download_data = self._download_json(
  229. url, video_id, fatal=False, query=query,
  230. headers={'X-Requested-With': 'XMLHttpRequest'},
  231. expected_status=(403, 404)) or {}
  232. source_file = download_data.get('source_file')
  233. download_url = try_get(source_file, lambda x: x['download_url'])
  234. if download_url and not source_file.get('is_cold') and not source_file.get('is_defrosting'):
  235. source_name = source_file.get('public_name', 'Original')
  236. if self._is_valid_url(download_url, video_id, f'{source_name} video'):
  237. ext = (try_get(
  238. source_file, lambda x: x['extension'],
  239. str) or determine_ext(
  240. download_url, None) or 'mp4').lower()
  241. return {
  242. 'url': download_url,
  243. 'ext': ext,
  244. 'width': int_or_none(source_file.get('width')),
  245. 'height': int_or_none(source_file.get('height')),
  246. 'filesize': parse_filesize(source_file.get('size')),
  247. 'format_id': source_name,
  248. 'quality': 1,
  249. }
  250. jwt_response = self._download_json(
  251. 'https://vimeo.com/_rv/viewer', video_id, note='Downloading jwt token', fatal=False) or {}
  252. if not jwt_response.get('jwt'):
  253. return
  254. headers = {'Authorization': 'jwt {}'.format(jwt_response['jwt']), 'Accept': 'application/json'}
  255. original_response = self._download_json(
  256. f'https://api.vimeo.com/videos/{video_id}', video_id,
  257. headers=headers, fatal=False, expected_status=(403, 404)) or {}
  258. for download_data in original_response.get('download') or []:
  259. download_url = download_data.get('link')
  260. if not download_url or download_data.get('quality') != 'source':
  261. continue
  262. ext = determine_ext(parse_qs(download_url).get('filename', [''])[0].lower(), default_ext=None)
  263. if not ext:
  264. urlh = self._request_webpage(
  265. HEADRequest(download_url), video_id, fatal=False, note='Determining source extension')
  266. ext = urlh and urlhandle_detect_ext(urlh)
  267. return {
  268. 'url': download_url,
  269. 'ext': ext or 'unknown_video',
  270. 'format_id': download_data.get('public_name', 'Original'),
  271. 'width': int_or_none(download_data.get('width')),
  272. 'height': int_or_none(download_data.get('height')),
  273. 'fps': int_or_none(download_data.get('fps')),
  274. 'filesize': int_or_none(download_data.get('size')),
  275. 'quality': 1,
  276. }
  277. class VimeoIE(VimeoBaseInfoExtractor):
  278. """Information extractor for vimeo.com."""
  279. # _VALID_URL matches Vimeo URLs
  280. _VALID_URL = r'''(?x)
  281. https?://
  282. (?:
  283. (?:
  284. www|
  285. player
  286. )
  287. \.
  288. )?
  289. vimeo\.com/
  290. (?:
  291. (?P<u>user)|
  292. (?!(?:channels|album|showcase)/[^/?#]+/?(?:$|[?#])|[^/]+/review/|ondemand/)
  293. (?:.*?/)??
  294. (?P<q>
  295. (?:
  296. play_redirect_hls|
  297. moogaloop\.swf)\?clip_id=
  298. )?
  299. (?:videos?/)?
  300. )
  301. (?P<id>[0-9]+)
  302. (?(u)
  303. /(?!videos|likes)[^/?#]+/?|
  304. (?(q)|/(?P<unlisted_hash>[\da-f]{10}))?
  305. )
  306. (?:(?(q)[&]|(?(u)|/?)[?]).*?)?(?:[#].*)?$
  307. '''
  308. IE_NAME = 'vimeo'
  309. _EMBED_REGEX = [
  310. # iframe
  311. r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//player\.vimeo\.com/video/\d+.*?)\1',
  312. # Embedded (swf embed) Vimeo player
  313. r'<embed[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?vimeo\.com/moogaloop\.swf.+?)\1',
  314. # Non-standard embedded Vimeo player
  315. r'<video[^>]+src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?vimeo\.com/[0-9]+)\1',
  316. ]
  317. _TESTS = [
  318. {
  319. 'url': 'http://vimeo.com/56015672#at=0',
  320. 'md5': '8879b6cc097e987f02484baf890129e5',
  321. 'info_dict': {
  322. 'id': '56015672',
  323. 'ext': 'mp4',
  324. 'title': "youtube-dl test video '' ä↭𝕐-BaW jenozKc",
  325. 'description': 'md5:2d3305bad981a06ff79f027f19865021',
  326. 'timestamp': 1355990239,
  327. 'upload_date': '20121220',
  328. 'uploader_url': r're:https?://(?:www\.)?vimeo\.com/user7108434',
  329. 'uploader_id': 'user7108434',
  330. 'uploader': 'Filippo Valsorda',
  331. 'duration': 10,
  332. 'license': 'by-sa',
  333. },
  334. 'params': {
  335. 'format': 'best[protocol=https]',
  336. },
  337. 'skip': 'No longer available',
  338. },
  339. {
  340. 'url': 'http://player.vimeo.com/video/54469442',
  341. 'md5': '619b811a4417aa4abe78dc653becf511',
  342. 'note': 'Videos that embed the url in the player page',
  343. 'info_dict': {
  344. 'id': '54469442',
  345. 'ext': 'mp4',
  346. 'title': 'Kathy Sierra: Building the minimum Badass User, Business of Software 2012',
  347. 'uploader': 'Business of Software',
  348. 'uploader_url': r're:https?://(?:www\.)?vimeo\.com/businessofsoftware',
  349. 'uploader_id': 'businessofsoftware',
  350. 'duration': 3610,
  351. 'thumbnail': 'https://i.vimeocdn.com/video/376682406-f34043e7b766af6bef2af81366eacd6724f3fc3173179a11a97a1e26587c9529-d_1280',
  352. },
  353. 'params': {
  354. 'format': 'best[protocol=https]',
  355. },
  356. },
  357. {
  358. 'url': 'http://vimeo.com/68375962',
  359. 'md5': 'aaf896bdb7ddd6476df50007a0ac0ae7',
  360. 'note': 'Video protected with password',
  361. 'info_dict': {
  362. 'id': '68375962',
  363. 'ext': 'mp4',
  364. 'title': 'youtube-dl password protected test video',
  365. 'timestamp': 1371200155,
  366. 'upload_date': '20130614',
  367. 'uploader_url': r're:https?://(?:www\.)?vimeo\.com/user18948128',
  368. 'uploader_id': 'user18948128',
  369. 'uploader': 'Jaime Marquínez Ferrándiz',
  370. 'duration': 10,
  371. 'description': 'md5:6173f270cd0c0119f22817204b3eb86c',
  372. 'thumbnail': 'https://i.vimeocdn.com/video/440665496-b2c5aee2b61089442c794f64113a8e8f7d5763c3e6b3ebfaf696ae6413f8b1f4-d_1280',
  373. 'view_count': int,
  374. 'comment_count': int,
  375. 'like_count': int,
  376. },
  377. 'params': {
  378. 'format': 'best[protocol=https]',
  379. 'videopassword': 'youtube-dl',
  380. },
  381. },
  382. {
  383. 'url': 'http://vimeo.com/channels/keypeele/75629013',
  384. 'md5': '2f86a05afe9d7abc0b9126d229bbe15d',
  385. 'info_dict': {
  386. 'id': '75629013',
  387. 'ext': 'mp4',
  388. 'title': 'Key & Peele: Terrorist Interrogation',
  389. 'description': 'md5:6173f270cd0c0119f22817204b3eb86c',
  390. 'uploader_url': r're:https?://(?:www\.)?vimeo\.com/atencio',
  391. 'uploader_id': 'atencio',
  392. 'uploader': 'Peter Atencio',
  393. 'channel_id': 'keypeele',
  394. 'channel_url': r're:https?://(?:www\.)?vimeo\.com/channels/keypeele',
  395. 'timestamp': 1380339469,
  396. 'upload_date': '20130928',
  397. 'duration': 187,
  398. 'thumbnail': 'https://i.vimeocdn.com/video/450239872-a05512d9b1e55d707a7c04365c10980f327b06d966351bc403a5d5d65c95e572-d_1280',
  399. 'view_count': int,
  400. 'comment_count': int,
  401. 'like_count': int,
  402. },
  403. 'params': {'format': 'http-1080p'},
  404. },
  405. {
  406. 'url': 'http://vimeo.com/76979871',
  407. 'note': 'Video with subtitles',
  408. 'info_dict': {
  409. 'id': '76979871',
  410. 'ext': 'mov',
  411. 'title': 'The New Vimeo Player (You Know, For Videos)',
  412. 'description': 'md5:2ec900bf97c3f389378a96aee11260ea',
  413. 'timestamp': 1381846109,
  414. 'upload_date': '20131015',
  415. 'uploader_url': r're:https?://(?:www\.)?vimeo\.com/staff',
  416. 'uploader_id': 'staff',
  417. 'uploader': 'Vimeo Staff',
  418. 'duration': 62,
  419. 'subtitles': {
  420. 'de': [{'ext': 'vtt'}],
  421. 'en': [{'ext': 'vtt'}],
  422. 'es': [{'ext': 'vtt'}],
  423. 'fr': [{'ext': 'vtt'}],
  424. },
  425. },
  426. 'expected_warnings': ['Ignoring subtitle tracks found in the HLS manifest'],
  427. },
  428. {
  429. # from https://www.ouya.tv/game/Pier-Solar-and-the-Great-Architects/
  430. 'url': 'https://player.vimeo.com/video/98044508',
  431. 'note': 'The js code contains assignments to the same variable as the config',
  432. 'info_dict': {
  433. 'id': '98044508',
  434. 'ext': 'mp4',
  435. 'title': 'Pier Solar OUYA Official Trailer',
  436. 'uploader': 'Tulio Gonçalves',
  437. 'uploader_url': r're:https?://(?:www\.)?vimeo\.com/user28849593',
  438. 'uploader_id': 'user28849593',
  439. 'duration': 118,
  440. 'thumbnail': 'https://i.vimeocdn.com/video/478636036-c18440305ef3df9decfb6bf207a61fe39d2d17fa462a96f6f2d93d30492b037d-d_1280',
  441. },
  442. },
  443. {
  444. # contains original format
  445. 'url': 'https://vimeo.com/33951933',
  446. 'md5': '53c688fa95a55bf4b7293d37a89c5c53',
  447. 'info_dict': {
  448. 'id': '33951933',
  449. 'ext': 'mp4',
  450. 'title': 'FOX CLASSICS - Forever Classic ID - A Full Minute',
  451. 'uploader': 'The DMCI',
  452. 'uploader_url': r're:https?://(?:www\.)?vimeo\.com/dmci',
  453. 'uploader_id': 'dmci',
  454. 'timestamp': 1324343742,
  455. 'upload_date': '20111220',
  456. 'description': 'md5:ae23671e82d05415868f7ad1aec21147',
  457. 'duration': 60,
  458. 'comment_count': int,
  459. 'view_count': int,
  460. 'thumbnail': 'https://i.vimeocdn.com/video/231174622-dd07f015e9221ff529d451e1cc31c982b5d87bfafa48c4189b1da72824ee289a-d_1280',
  461. 'like_count': int,
  462. },
  463. },
  464. {
  465. 'note': 'Contains original format not accessible in webpage',
  466. 'url': 'https://vimeo.com/393756517',
  467. 'md5': 'c464af248b592190a5ffbb5d33f382b0',
  468. 'info_dict': {
  469. 'id': '393756517',
  470. 'ext': 'mov',
  471. 'timestamp': 1582642091,
  472. 'uploader_id': 'frameworkla',
  473. 'title': 'Straight To Hell - Sabrina: Netflix',
  474. 'uploader': 'Framework Studio',
  475. 'description': 'md5:f2edc61af3ea7a5592681ddbb683db73',
  476. 'upload_date': '20200225',
  477. 'duration': 176,
  478. 'thumbnail': 'https://i.vimeocdn.com/video/859377297-836494a4ef775e9d4edbace83937d9ad34dc846c688c0c419c0e87f7ab06c4b3-d_1280',
  479. 'uploader_url': 'https://vimeo.com/frameworkla',
  480. },
  481. },
  482. {
  483. # only available via https://vimeo.com/channels/tributes/6213729 and
  484. # not via https://vimeo.com/6213729
  485. 'url': 'https://vimeo.com/channels/tributes/6213729',
  486. 'info_dict': {
  487. 'id': '6213729',
  488. 'ext': 'mp4',
  489. 'title': 'Vimeo Tribute: The Shining',
  490. 'uploader': 'Casey Donahue',
  491. 'uploader_url': r're:https?://(?:www\.)?vimeo\.com/caseydonahue',
  492. 'uploader_id': 'caseydonahue',
  493. 'channel_url': r're:https?://(?:www\.)?vimeo\.com/channels/tributes',
  494. 'channel_id': 'tributes',
  495. 'timestamp': 1250886430,
  496. 'upload_date': '20090821',
  497. 'description': 'md5:bdbf314014e58713e6e5b66eb252f4a6',
  498. 'duration': 321,
  499. 'comment_count': int,
  500. 'view_count': int,
  501. 'thumbnail': 'https://i.vimeocdn.com/video/22728298-bfc22146f930de7cf497821c7b0b9f168099201ecca39b00b6bd31fcedfca7a6-d_1280',
  502. 'like_count': int,
  503. },
  504. 'params': {
  505. 'skip_download': True,
  506. },
  507. },
  508. {
  509. # redirects to ondemand extractor and should be passed through it
  510. # for successful extraction
  511. 'url': 'https://vimeo.com/73445910',
  512. 'info_dict': {
  513. 'id': '73445910',
  514. 'ext': 'mp4',
  515. 'title': 'The Reluctant Revolutionary',
  516. 'uploader': '10Ft Films',
  517. 'uploader_url': r're:https?://(?:www\.)?vimeo\.com/tenfootfilms',
  518. 'uploader_id': 'tenfootfilms',
  519. 'description': 'md5:0fa704e05b04f91f40b7f3ca2e801384',
  520. 'upload_date': '20130830',
  521. 'timestamp': 1377853339,
  522. },
  523. 'params': {
  524. 'skip_download': True,
  525. },
  526. 'skip': 'this page is no longer available.',
  527. },
  528. {
  529. 'url': 'http://player.vimeo.com/video/68375962',
  530. 'md5': 'aaf896bdb7ddd6476df50007a0ac0ae7',
  531. 'info_dict': {
  532. 'id': '68375962',
  533. 'ext': 'mp4',
  534. 'title': 'youtube-dl password protected test video',
  535. 'timestamp': 1371200155,
  536. 'upload_date': '20130614',
  537. 'uploader_url': r're:https?://(?:www\.)?vimeo\.com/user18948128',
  538. 'uploader_id': 'user18948128',
  539. 'uploader': 'Jaime Marquínez Ferrándiz',
  540. 'duration': 10,
  541. 'description': 'md5:6173f270cd0c0119f22817204b3eb86c',
  542. 'thumbnail': 'https://i.vimeocdn.com/video/440665496-b2c5aee2b61089442c794f64113a8e8f7d5763c3e6b3ebfaf696ae6413f8b1f4-d_1280',
  543. 'view_count': int,
  544. 'comment_count': int,
  545. 'like_count': int,
  546. },
  547. 'params': {
  548. 'format': 'best[protocol=https]',
  549. 'videopassword': 'youtube-dl',
  550. },
  551. },
  552. {
  553. 'url': 'http://vimeo.com/moogaloop.swf?clip_id=2539741',
  554. 'only_matching': True,
  555. },
  556. {
  557. 'url': 'https://vimeo.com/109815029',
  558. 'note': 'Video not completely processed, "failed" seed status',
  559. 'only_matching': True,
  560. },
  561. {
  562. 'url': 'https://vimeo.com/groups/travelhd/videos/22439234',
  563. 'only_matching': True,
  564. },
  565. {
  566. 'url': 'https://vimeo.com/album/2632481/video/79010983',
  567. 'only_matching': True,
  568. },
  569. {
  570. 'url': 'https://vimeo.com/showcase/3253534/video/119195465',
  571. 'note': 'A video in a password protected album (showcase)',
  572. 'info_dict': {
  573. 'id': '119195465',
  574. 'ext': 'mp4',
  575. 'title': "youtube-dl test video '' ä↭𝕐-BaW jenozKc",
  576. 'uploader': 'Philipp Hagemeister',
  577. 'uploader_id': 'user20132939',
  578. 'description': 'md5:fa7b6c6d8db0bdc353893df2f111855b',
  579. 'upload_date': '20150209',
  580. 'timestamp': 1423518307,
  581. 'thumbnail': 'https://i.vimeocdn.com/video/default_1280',
  582. 'duration': 10,
  583. 'like_count': int,
  584. 'uploader_url': 'https://vimeo.com/user20132939',
  585. 'view_count': int,
  586. 'comment_count': int,
  587. },
  588. 'params': {
  589. 'format': 'best[protocol=https]',
  590. 'videopassword': 'youtube-dl',
  591. },
  592. },
  593. {
  594. # source file returns 403: Forbidden
  595. 'url': 'https://vimeo.com/7809605',
  596. 'only_matching': True,
  597. },
  598. {
  599. 'note': 'Direct URL with hash',
  600. 'url': 'https://vimeo.com/160743502/abd0e13fb4',
  601. 'info_dict': {
  602. 'id': '160743502',
  603. 'ext': 'mp4',
  604. 'uploader': 'Julian Tryba',
  605. 'uploader_id': 'aliniamedia',
  606. 'title': 'Harrisville New Hampshire',
  607. 'timestamp': 1459259666,
  608. 'upload_date': '20160329',
  609. 'release_timestamp': 1459259666,
  610. 'license': 'by-nc',
  611. 'duration': 159,
  612. 'comment_count': int,
  613. 'thumbnail': 'https://i.vimeocdn.com/video/562802436-585eeb13b5020c6ac0f171a2234067938098f84737787df05ff0d767f6d54ee9-d_1280',
  614. 'like_count': int,
  615. 'uploader_url': 'https://vimeo.com/aliniamedia',
  616. 'release_date': '20160329',
  617. },
  618. 'params': {'skip_download': True},
  619. },
  620. {
  621. 'url': 'https://vimeo.com/138909882',
  622. 'info_dict': {
  623. 'id': '138909882',
  624. 'ext': 'mp4',
  625. 'title': 'Eastnor Castle 2015 Firework Champions - The Promo!',
  626. 'description': 'md5:5967e090768a831488f6e74b7821b3c1',
  627. 'uploader_id': 'fireworkchampions',
  628. 'uploader': 'Firework Champions',
  629. 'upload_date': '20150910',
  630. 'timestamp': 1441901895,
  631. },
  632. 'params': {
  633. 'skip_download': True,
  634. 'format': 'Original',
  635. },
  636. },
  637. {
  638. 'url': 'https://vimeo.com/channels/staffpicks/143603739',
  639. 'info_dict': {
  640. 'id': '143603739',
  641. 'ext': 'mp4',
  642. 'uploader': 'Karim Huu Do',
  643. 'timestamp': 1445846953,
  644. 'upload_date': '20151026',
  645. 'title': 'The Shoes - Submarine Feat. Blaine Harrison',
  646. 'uploader_id': 'karimhd',
  647. 'description': 'md5:8e2eea76de4504c2e8020a9bcfa1e843',
  648. 'channel_id': 'staffpicks',
  649. 'duration': 336,
  650. 'comment_count': int,
  651. 'view_count': int,
  652. 'thumbnail': 'https://i.vimeocdn.com/video/541243181-b593db36a16db2f0096f655da3f5a4dc46b8766d77b0f440df937ecb0c418347-d_1280',
  653. 'like_count': int,
  654. 'uploader_url': 'https://vimeo.com/karimhd',
  655. 'channel_url': 'https://vimeo.com/channels/staffpicks',
  656. },
  657. 'params': {'skip_download': 'm3u8'},
  658. },
  659. {
  660. # requires passing unlisted_hash(a52724358e) to load_download_config request
  661. 'url': 'https://vimeo.com/392479337/a52724358e',
  662. 'only_matching': True,
  663. },
  664. {
  665. # similar, but all numeric: ID must be 581039021, not 9603038895
  666. # issue #29690
  667. 'url': 'https://vimeo.com/581039021/9603038895',
  668. 'info_dict': {
  669. 'id': '581039021',
  670. 'ext': 'mp4',
  671. 'timestamp': 1627621014,
  672. 'release_timestamp': 1627621014,
  673. 'duration': 976,
  674. 'comment_count': int,
  675. 'thumbnail': 'https://i.vimeocdn.com/video/1202249320-4ddb2c30398c0dc0ee059172d1bd5ea481ad12f0e0e3ad01d2266f56c744b015-d_1280',
  676. 'like_count': int,
  677. 'uploader_url': 'https://vimeo.com/txwestcapital',
  678. 'release_date': '20210730',
  679. 'uploader': 'Christopher Inks',
  680. 'title': 'Thursday, July 29, 2021 BMA Evening Video Update',
  681. 'uploader_id': 'txwestcapital',
  682. 'upload_date': '20210730',
  683. },
  684. 'params': {
  685. 'skip_download': True,
  686. },
  687. },
  688. {
  689. # user playlist alias -> https://vimeo.com/258705797
  690. 'url': 'https://vimeo.com/user26785108/newspiritualguide',
  691. 'only_matching': True,
  692. },
  693. # https://gettingthingsdone.com/workflowmap/
  694. # vimeo embed with check-password page protected by Referer header
  695. ]
  696. @classmethod
  697. def _extract_embed_urls(cls, url, webpage):
  698. for embed_url in super()._extract_embed_urls(url, webpage):
  699. yield cls._smuggle_referrer(embed_url, url)
  700. @classmethod
  701. def _extract_url(cls, url, webpage):
  702. return next(cls._extract_embed_urls(url, webpage), None)
  703. def _verify_player_video_password(self, url, video_id, headers):
  704. password = self._get_video_password()
  705. data = urlencode_postdata({
  706. 'password': base64.b64encode(password.encode()),
  707. })
  708. headers = merge_dicts(headers, {
  709. 'Content-Type': 'application/x-www-form-urlencoded',
  710. })
  711. checked = self._download_json(
  712. f'{urllib.parse.urlsplit(url)._replace(query=None).geturl()}/check-password',
  713. video_id, 'Verifying the password', data=data, headers=headers)
  714. if checked is False:
  715. raise ExtractorError('Wrong video password', expected=True)
  716. return checked
  717. def _extract_from_api(self, video_id, unlisted_hash=None):
  718. token = self._download_json(
  719. 'https://vimeo.com/_rv/jwt', video_id, headers={
  720. 'X-Requested-With': 'XMLHttpRequest',
  721. })['token']
  722. api_url = 'https://api.vimeo.com/videos/' + video_id
  723. if unlisted_hash:
  724. api_url += ':' + unlisted_hash
  725. video = self._download_json(
  726. api_url, video_id, headers={
  727. 'Authorization': 'jwt ' + token,
  728. 'Accept': 'application/json',
  729. }, query={
  730. 'fields': 'config_url,created_time,description,license,metadata.connections.comments.total,metadata.connections.likes.total,release_time,stats.plays',
  731. })
  732. info = self._parse_config(self._download_json(
  733. video['config_url'], video_id), video_id)
  734. get_timestamp = lambda x: parse_iso8601(video.get(x + '_time'))
  735. info.update({
  736. 'description': video.get('description'),
  737. 'license': video.get('license'),
  738. 'release_timestamp': get_timestamp('release'),
  739. 'timestamp': get_timestamp('created'),
  740. 'view_count': int_or_none(try_get(video, lambda x: x['stats']['plays'])),
  741. })
  742. connections = try_get(
  743. video, lambda x: x['metadata']['connections'], dict) or {}
  744. for k in ('comment', 'like'):
  745. info[k + '_count'] = int_or_none(try_get(connections, lambda x: x[k + 's']['total']))
  746. return info
  747. def _try_album_password(self, url):
  748. album_id = self._search_regex(
  749. r'vimeo\.com/(?:album|showcase)/([^/]+)', url, 'album id', default=None)
  750. if not album_id:
  751. return
  752. viewer = self._download_json(
  753. 'https://vimeo.com/_rv/viewer', album_id, fatal=False)
  754. if not viewer:
  755. webpage = self._download_webpage(url, album_id)
  756. viewer = self._parse_json(self._search_regex(
  757. r'bootstrap_data\s*=\s*({.+?})</script>',
  758. webpage, 'bootstrap data'), album_id)['viewer']
  759. jwt = viewer['jwt']
  760. album = self._download_json(
  761. 'https://api.vimeo.com/albums/' + album_id,
  762. album_id, headers={'Authorization': 'jwt ' + jwt, 'Accept': 'application/json'},
  763. query={'fields': 'description,name,privacy'})
  764. if try_get(album, lambda x: x['privacy']['view']) == 'password':
  765. password = self.get_param('videopassword')
  766. if not password:
  767. raise ExtractorError(
  768. 'This album is protected by a password, use the --video-password option',
  769. expected=True)
  770. self._set_vimeo_cookie('vuid', viewer['vuid'])
  771. try:
  772. self._download_json(
  773. f'https://vimeo.com/showcase/{album_id}/auth',
  774. album_id, 'Verifying the password', data=urlencode_postdata({
  775. 'password': password,
  776. 'token': viewer['xsrft'],
  777. }), headers={
  778. 'X-Requested-With': 'XMLHttpRequest',
  779. })
  780. except ExtractorError as e:
  781. if isinstance(e.cause, HTTPError) and e.cause.status == 401:
  782. raise ExtractorError('Wrong password', expected=True)
  783. raise
  784. def _real_extract(self, url):
  785. url, data, headers = self._unsmuggle_headers(url)
  786. if 'Referer' not in headers:
  787. headers['Referer'] = url
  788. # Extract ID from URL
  789. mobj = self._match_valid_url(url).groupdict()
  790. video_id, unlisted_hash = mobj['id'], mobj.get('unlisted_hash')
  791. if unlisted_hash:
  792. return self._extract_from_api(video_id, unlisted_hash)
  793. if any(p in url for p in ('play_redirect_hls', 'moogaloop.swf')):
  794. url = 'https://vimeo.com/' + video_id
  795. self._try_album_password(url)
  796. is_secure = urllib.parse.urlparse(url).scheme == 'https'
  797. try:
  798. # Retrieve video webpage to extract further information
  799. webpage, urlh = self._download_webpage_handle(
  800. url, video_id, headers=headers, impersonate=is_secure)
  801. redirect_url = urlh.url
  802. except ExtractorError as error:
  803. if not isinstance(error.cause, HTTPError) or error.cause.status not in (403, 429):
  804. raise
  805. errmsg = error.cause.response.read()
  806. if b'Because of its privacy settings, this video cannot be played here' in errmsg:
  807. raise ExtractorError(
  808. 'Cannot download embed-only video without embedding URL. Please call yt-dlp '
  809. 'with the URL of the page that embeds this video.', expected=True)
  810. # 403 == vimeo.com TLS fingerprint or DC IP block; 429 == player.vimeo.com TLS FP block
  811. status = error.cause.status
  812. dcip_msg = 'If you are using a data center IP or VPN/proxy, your IP may be blocked'
  813. if target := error.cause.response.extensions.get('impersonate'):
  814. raise ExtractorError(
  815. f'Got HTTP Error {status} when using impersonate target "{target}". {dcip_msg}')
  816. elif not is_secure:
  817. raise ExtractorError(f'Got HTTP Error {status}. {dcip_msg}', expected=True)
  818. raise ExtractorError(
  819. 'This request has been blocked due to its TLS fingerprint. Install a '
  820. 'required impersonation dependency if possible, or else if you are okay with '
  821. f'{self._downloader._format_err("compromising your security/cookies", "light red")}, '
  822. f'try replacing "https:" with "http:" in the input URL. {dcip_msg}.', expected=True)
  823. if '://player.vimeo.com/video/' in url:
  824. config = self._search_json(
  825. r'\b(?:playerC|c)onfig\s*=', webpage, 'info section', video_id)
  826. if config.get('view') == 4:
  827. config = self._verify_player_video_password(
  828. redirect_url, video_id, headers)
  829. return self._parse_config(config, video_id)
  830. if re.search(r'<form[^>]+?id="pw_form"', webpage):
  831. video_password = self._get_video_password()
  832. token, vuid = self._extract_xsrft_and_vuid(webpage)
  833. webpage = self._verify_video_password(
  834. redirect_url, video_id, video_password, token, vuid)
  835. vimeo_config = self._extract_vimeo_config(webpage, video_id, default=None)
  836. if vimeo_config:
  837. seed_status = vimeo_config.get('seed_status') or {}
  838. if seed_status.get('state') == 'failed':
  839. raise ExtractorError(
  840. '{} said: {}'.format(self.IE_NAME, seed_status['title']),
  841. expected=True)
  842. cc_license = None
  843. timestamp = None
  844. video_description = None
  845. info_dict = {}
  846. config_url = None
  847. channel_id = self._search_regex(
  848. r'vimeo\.com/channels/([^/]+)', url, 'channel id', default=None)
  849. if channel_id:
  850. config_url = self._html_search_regex(
  851. r'\bdata-config-url="([^"]+)"', webpage, 'config URL', default=None)
  852. video_description = clean_html(get_element_by_class('description', webpage))
  853. info_dict.update({
  854. 'channel_id': channel_id,
  855. 'channel_url': 'https://vimeo.com/channels/' + channel_id,
  856. })
  857. if not config_url:
  858. page_config = self._parse_json(self._search_regex(
  859. r'vimeo\.(?:clip|vod_title)_page_config\s*=\s*({.+?});',
  860. webpage, 'page config', default='{}'), video_id, fatal=False)
  861. if not page_config:
  862. return self._extract_from_api(video_id)
  863. config_url = page_config['player']['config_url']
  864. cc_license = page_config.get('cc_license')
  865. clip = page_config.get('clip') or {}
  866. timestamp = clip.get('uploaded_on')
  867. video_description = clean_html(
  868. clip.get('description') or page_config.get('description_html_escaped'))
  869. config = self._download_json(config_url, video_id)
  870. video = config.get('video') or {}
  871. vod = video.get('vod') or {}
  872. def is_rented():
  873. if '>You rented this title.<' in webpage:
  874. return True
  875. if try_get(config, lambda x: x['user']['purchased']):
  876. return True
  877. for purchase_option in (vod.get('purchase_options') or []):
  878. if purchase_option.get('purchased'):
  879. return True
  880. label = purchase_option.get('label_string')
  881. if label and (label.startswith('You rented this') or label.endswith(' remaining')):
  882. return True
  883. return False
  884. if is_rented() and vod.get('is_trailer'):
  885. feature_id = vod.get('feature_id')
  886. if feature_id and not data.get('force_feature_id', False):
  887. return self.url_result(smuggle_url(
  888. f'https://player.vimeo.com/player/{feature_id}',
  889. {'force_feature_id': True}), 'Vimeo')
  890. if not video_description:
  891. video_description = self._html_search_regex(
  892. r'(?s)<div\s+class="[^"]*description[^"]*"[^>]*>(.*?)</div>',
  893. webpage, 'description', default=None)
  894. if not video_description:
  895. video_description = self._html_search_meta(
  896. ['description', 'og:description', 'twitter:description'],
  897. webpage, default=None)
  898. if not video_description:
  899. self.report_warning('Cannot find video description')
  900. if not timestamp:
  901. timestamp = self._search_regex(
  902. r'<time[^>]+datetime="([^"]+)"', webpage,
  903. 'timestamp', default=None)
  904. view_count = int_or_none(self._search_regex(r'UserPlays:(\d+)', webpage, 'view count', default=None))
  905. like_count = int_or_none(self._search_regex(r'UserLikes:(\d+)', webpage, 'like count', default=None))
  906. comment_count = int_or_none(self._search_regex(r'UserComments:(\d+)', webpage, 'comment count', default=None))
  907. formats = []
  908. source_format = self._extract_original_format(
  909. 'https://vimeo.com/' + video_id, video_id, video.get('unlisted_hash'))
  910. if source_format:
  911. formats.append(source_format)
  912. info_dict_config = self._parse_config(config, video_id)
  913. formats.extend(info_dict_config['formats'])
  914. info_dict['_format_sort_fields'] = info_dict_config['_format_sort_fields']
  915. json_ld = self._search_json_ld(webpage, video_id, default={})
  916. if not cc_license:
  917. cc_license = self._search_regex(
  918. r'<link[^>]+rel=["\']license["\'][^>]+href=(["\'])(?P<license>(?:(?!\1).)+)\1',
  919. webpage, 'license', default=None, group='license')
  920. info_dict.update({
  921. 'formats': formats,
  922. 'timestamp': unified_timestamp(timestamp),
  923. 'description': video_description,
  924. 'webpage_url': url,
  925. 'view_count': view_count,
  926. 'like_count': like_count,
  927. 'comment_count': comment_count,
  928. 'license': cc_license,
  929. })
  930. return merge_dicts(info_dict, info_dict_config, json_ld)
  931. class VimeoOndemandIE(VimeoIE): # XXX: Do not subclass from concrete IE
  932. IE_NAME = 'vimeo:ondemand'
  933. _VALID_URL = r'https?://(?:www\.)?vimeo\.com/ondemand/(?:[^/]+/)?(?P<id>[^/?#&]+)'
  934. _TESTS = [{
  935. # ondemand video not available via https://vimeo.com/id
  936. 'url': 'https://vimeo.com/ondemand/20704',
  937. 'md5': 'c424deda8c7f73c1dfb3edd7630e2f35',
  938. 'info_dict': {
  939. 'id': '105442900',
  940. 'ext': 'mp4',
  941. 'title': 'המעבדה - במאי יותם פלדמן',
  942. 'uploader': 'גם סרטים',
  943. 'uploader_url': r're:https?://(?:www\.)?vimeo\.com/gumfilms',
  944. 'uploader_id': 'gumfilms',
  945. 'description': 'md5:aeeba3dbd4d04b0fa98a4fdc9c639998',
  946. 'upload_date': '20140906',
  947. 'timestamp': 1410032453,
  948. 'thumbnail': 'https://i.vimeocdn.com/video/488238335-d7bf151c364cff8d467f1b73784668fe60aae28a54573a35d53a1210ae283bd8-d_1280',
  949. 'comment_count': int,
  950. 'license': 'https://creativecommons.org/licenses/by-nc-nd/3.0/',
  951. 'duration': 53,
  952. 'view_count': int,
  953. 'like_count': int,
  954. },
  955. 'params': {
  956. 'format': 'best[protocol=https]',
  957. },
  958. 'expected_warnings': ['Unable to download JSON metadata'],
  959. }, {
  960. # requires Referer to be passed along with og:video:url
  961. 'url': 'https://vimeo.com/ondemand/36938/126682985',
  962. 'info_dict': {
  963. 'id': '126584684',
  964. 'ext': 'mp4',
  965. 'title': 'Rävlock, rätt läte på rätt plats',
  966. 'uploader': 'Lindroth & Norin',
  967. 'uploader_url': r're:https?://(?:www\.)?vimeo\.com/lindrothnorin',
  968. 'uploader_id': 'lindrothnorin',
  969. 'description': 'md5:c3c46a90529612c8279fb6af803fc0df',
  970. 'upload_date': '20150502',
  971. 'timestamp': 1430586422,
  972. 'duration': 121,
  973. 'comment_count': int,
  974. 'view_count': int,
  975. 'thumbnail': 'https://i.vimeocdn.com/video/517077723-7066ae1d9a79d3eb361334fb5d58ec13c8f04b52f8dd5eadfbd6fb0bcf11f613-d_1280',
  976. 'like_count': int,
  977. },
  978. 'params': {
  979. 'skip_download': True,
  980. },
  981. 'expected_warnings': ['Unable to download JSON metadata'],
  982. }, {
  983. 'url': 'https://vimeo.com/ondemand/nazmaalik',
  984. 'only_matching': True,
  985. }, {
  986. 'url': 'https://vimeo.com/ondemand/141692381',
  987. 'only_matching': True,
  988. }, {
  989. 'url': 'https://vimeo.com/ondemand/thelastcolony/150274832',
  990. 'only_matching': True,
  991. }]
  992. class VimeoChannelIE(VimeoBaseInfoExtractor):
  993. IE_NAME = 'vimeo:channel'
  994. _VALID_URL = r'https://vimeo\.com/channels/(?P<id>[^/?#]+)/?(?:$|[?#])'
  995. _MORE_PAGES_INDICATOR = r'<a.+?rel="next"'
  996. _TITLE = None
  997. _TITLE_RE = r'<link rel="alternate"[^>]+?title="(.*?)"'
  998. _TESTS = [{
  999. 'url': 'https://vimeo.com/channels/tributes',
  1000. 'info_dict': {
  1001. 'id': 'tributes',
  1002. 'title': 'Vimeo Tributes',
  1003. },
  1004. 'playlist_mincount': 22,
  1005. }]
  1006. _BASE_URL_TEMPL = 'https://vimeo.com/channels/%s'
  1007. def _page_url(self, base_url, pagenum):
  1008. return f'{base_url}/videos/page:{pagenum}/'
  1009. def _extract_list_title(self, webpage):
  1010. return self._TITLE or self._html_search_regex(
  1011. self._TITLE_RE, webpage, 'list title', fatal=False)
  1012. def _title_and_entries(self, list_id, base_url):
  1013. for pagenum in itertools.count(1):
  1014. page_url = self._page_url(base_url, pagenum)
  1015. webpage = self._download_webpage(
  1016. page_url, list_id,
  1017. f'Downloading page {pagenum}')
  1018. if pagenum == 1:
  1019. yield self._extract_list_title(webpage)
  1020. # Try extracting href first since not all videos are available via
  1021. # short https://vimeo.com/id URL (e.g. https://vimeo.com/channels/tributes/6213729)
  1022. clips = re.findall(
  1023. r'id="clip_(\d+)"[^>]*>\s*<a[^>]+href="(/(?:[^/]+/)*\1)(?:[^>]+\btitle="([^"]+)")?', webpage)
  1024. if clips:
  1025. for video_id, video_url, video_title in clips:
  1026. yield self.url_result(
  1027. urllib.parse.urljoin(base_url, video_url),
  1028. VimeoIE.ie_key(), video_id=video_id, video_title=video_title)
  1029. # More relaxed fallback
  1030. else:
  1031. for video_id in re.findall(r'id=["\']clip_(\d+)', webpage):
  1032. yield self.url_result(
  1033. f'https://vimeo.com/{video_id}',
  1034. VimeoIE.ie_key(), video_id=video_id)
  1035. if re.search(self._MORE_PAGES_INDICATOR, webpage, re.DOTALL) is None:
  1036. break
  1037. def _extract_videos(self, list_id, base_url):
  1038. title_and_entries = self._title_and_entries(list_id, base_url)
  1039. list_title = next(title_and_entries)
  1040. return self.playlist_result(title_and_entries, list_id, list_title)
  1041. def _real_extract(self, url):
  1042. channel_id = self._match_id(url)
  1043. return self._extract_videos(channel_id, self._BASE_URL_TEMPL % channel_id)
  1044. class VimeoUserIE(VimeoChannelIE): # XXX: Do not subclass from concrete IE
  1045. IE_NAME = 'vimeo:user'
  1046. _VALID_URL = r'https://vimeo\.com/(?!(?:[0-9]+|watchlater)(?:$|[?#/]))(?P<id>[^/]+)(?:/videos)?/?(?:$|[?#])'
  1047. _TITLE_RE = r'<a[^>]+?class="user">([^<>]+?)</a>'
  1048. _TESTS = [{
  1049. 'url': 'https://vimeo.com/nkistudio/videos',
  1050. 'info_dict': {
  1051. 'title': 'Nki',
  1052. 'id': 'nkistudio',
  1053. },
  1054. 'playlist_mincount': 66,
  1055. }, {
  1056. 'url': 'https://vimeo.com/nkistudio/',
  1057. 'only_matching': True,
  1058. }]
  1059. _BASE_URL_TEMPL = 'https://vimeo.com/%s'
  1060. class VimeoAlbumIE(VimeoBaseInfoExtractor):
  1061. IE_NAME = 'vimeo:album'
  1062. _VALID_URL = r'https://vimeo\.com/(?:album|showcase)/(?P<id>\d+)(?:$|[?#]|/(?!video))'
  1063. _TITLE_RE = r'<header id="page_header">\n\s*<h1>(.*?)</h1>'
  1064. _TESTS = [{
  1065. 'url': 'https://vimeo.com/album/2632481',
  1066. 'info_dict': {
  1067. 'id': '2632481',
  1068. 'title': 'Staff Favorites: November 2013',
  1069. },
  1070. 'playlist_mincount': 13,
  1071. }, {
  1072. 'note': 'Password-protected album',
  1073. 'url': 'https://vimeo.com/album/3253534',
  1074. 'info_dict': {
  1075. 'title': 'test',
  1076. 'id': '3253534',
  1077. },
  1078. 'playlist_count': 1,
  1079. 'params': {
  1080. 'videopassword': 'youtube-dl',
  1081. },
  1082. }]
  1083. _PAGE_SIZE = 100
  1084. def _fetch_page(self, album_id, authorization, hashed_pass, page):
  1085. api_page = page + 1
  1086. query = {
  1087. 'fields': 'link,uri',
  1088. 'page': api_page,
  1089. 'per_page': self._PAGE_SIZE,
  1090. }
  1091. if hashed_pass:
  1092. query['_hashed_pass'] = hashed_pass
  1093. try:
  1094. videos = self._download_json(
  1095. f'https://api.vimeo.com/albums/{album_id}/videos',
  1096. album_id, f'Downloading page {api_page}', query=query, headers={
  1097. 'Authorization': 'jwt ' + authorization,
  1098. 'Accept': 'application/json',
  1099. })['data']
  1100. except ExtractorError as e:
  1101. if isinstance(e.cause, HTTPError) and e.cause.status == 400:
  1102. return
  1103. raise
  1104. for video in videos:
  1105. link = video.get('link')
  1106. if not link:
  1107. continue
  1108. uri = video.get('uri')
  1109. video_id = self._search_regex(r'/videos/(\d+)', uri, 'video_id', default=None) if uri else None
  1110. yield self.url_result(link, VimeoIE.ie_key(), video_id)
  1111. def _real_extract(self, url):
  1112. album_id = self._match_id(url)
  1113. viewer = self._download_json(
  1114. 'https://vimeo.com/_rv/viewer', album_id, fatal=False)
  1115. if not viewer:
  1116. webpage = self._download_webpage(url, album_id)
  1117. viewer = self._parse_json(self._search_regex(
  1118. r'bootstrap_data\s*=\s*({.+?})</script>',
  1119. webpage, 'bootstrap data'), album_id)['viewer']
  1120. jwt = viewer['jwt']
  1121. album = self._download_json(
  1122. 'https://api.vimeo.com/albums/' + album_id,
  1123. album_id, headers={'Authorization': 'jwt ' + jwt, 'Accept': 'application/json'},
  1124. query={'fields': 'description,name,privacy'})
  1125. hashed_pass = None
  1126. if try_get(album, lambda x: x['privacy']['view']) == 'password':
  1127. password = self.get_param('videopassword')
  1128. if not password:
  1129. raise ExtractorError(
  1130. 'This album is protected by a password, use the --video-password option',
  1131. expected=True)
  1132. self._set_vimeo_cookie('vuid', viewer['vuid'])
  1133. try:
  1134. hashed_pass = self._download_json(
  1135. f'https://vimeo.com/showcase/{album_id}/auth',
  1136. album_id, 'Verifying the password', data=urlencode_postdata({
  1137. 'password': password,
  1138. 'token': viewer['xsrft'],
  1139. }), headers={
  1140. 'X-Requested-With': 'XMLHttpRequest',
  1141. })['hashed_pass']
  1142. except ExtractorError as e:
  1143. if isinstance(e.cause, HTTPError) and e.cause.status == 401:
  1144. raise ExtractorError('Wrong password', expected=True)
  1145. raise
  1146. entries = OnDemandPagedList(functools.partial(
  1147. self._fetch_page, album_id, jwt, hashed_pass), self._PAGE_SIZE)
  1148. return self.playlist_result(
  1149. entries, album_id, album.get('name'), album.get('description'))
  1150. class VimeoGroupsIE(VimeoChannelIE): # XXX: Do not subclass from concrete IE
  1151. IE_NAME = 'vimeo:group'
  1152. _VALID_URL = r'https://vimeo\.com/groups/(?P<id>[^/]+)(?:/(?!videos?/\d+)|$)'
  1153. _TESTS = [{
  1154. 'url': 'https://vimeo.com/groups/meetup',
  1155. 'info_dict': {
  1156. 'id': 'meetup',
  1157. 'title': 'Vimeo Meetup!',
  1158. },
  1159. 'playlist_mincount': 27,
  1160. }]
  1161. _BASE_URL_TEMPL = 'https://vimeo.com/groups/%s'
  1162. class VimeoReviewIE(VimeoBaseInfoExtractor):
  1163. IE_NAME = 'vimeo:review'
  1164. IE_DESC = 'Review pages on vimeo'
  1165. _VALID_URL = r'(?P<url>https://vimeo\.com/[^/]+/review/(?P<id>[^/]+)/[0-9a-f]{10})'
  1166. _TESTS = [{
  1167. 'url': 'https://vimeo.com/user21297594/review/75524534/3c257a1b5d',
  1168. 'md5': 'c507a72f780cacc12b2248bb4006d253',
  1169. 'info_dict': {
  1170. 'id': '75524534',
  1171. 'ext': 'mp4',
  1172. 'title': "DICK HARDWICK 'Comedian'",
  1173. 'uploader': 'Richard Hardwick',
  1174. 'uploader_id': 'user21297594',
  1175. 'description': "Comedian Dick Hardwick's five minute demo filmed in front of a live theater audience.\nEdit by Doug Mattocks",
  1176. 'duration': 304,
  1177. 'thumbnail': 'https://i.vimeocdn.com/video/450115033-43303819d9ebe24c2630352e18b7056d25197d09b3ae901abdac4c4f1d68de71-d_1280',
  1178. 'uploader_url': 'https://vimeo.com/user21297594',
  1179. },
  1180. }, {
  1181. 'note': 'video player needs Referer',
  1182. 'url': 'https://vimeo.com/user22258446/review/91613211/13f927e053',
  1183. 'md5': '6295fdab8f4bf6a002d058b2c6dce276',
  1184. 'info_dict': {
  1185. 'id': '91613211',
  1186. 'ext': 'mp4',
  1187. 'title': 're:(?i)^Death by dogma versus assembling agile . Sander Hoogendoorn',
  1188. 'uploader': 'DevWeek Events',
  1189. 'duration': 2773,
  1190. 'thumbnail': r're:^https?://.*\.jpg$',
  1191. 'uploader_id': 'user22258446',
  1192. },
  1193. 'skip': 'video gone',
  1194. }, {
  1195. 'note': 'Password protected',
  1196. 'url': 'https://vimeo.com/user37284429/review/138823582/c4d865efde',
  1197. 'info_dict': {
  1198. 'id': '138823582',
  1199. 'ext': 'mp4',
  1200. 'title': 'EFFICIENT PICKUP MASTERCLASS MODULE 1',
  1201. 'uploader': 'TMB',
  1202. 'uploader_id': 'user37284429',
  1203. },
  1204. 'params': {
  1205. 'videopassword': 'holygrail',
  1206. },
  1207. 'skip': 'video gone',
  1208. }]
  1209. def _real_extract(self, url):
  1210. page_url, video_id = self._match_valid_url(url).groups()
  1211. data = self._download_json(
  1212. page_url.replace('/review/', '/review/data/'), video_id)
  1213. if data.get('isLocked') is True:
  1214. video_password = self._get_video_password()
  1215. viewer = self._download_json(
  1216. 'https://vimeo.com/_rv/viewer', video_id)
  1217. webpage = self._verify_video_password(
  1218. 'https://vimeo.com/' + video_id, video_id,
  1219. video_password, viewer['xsrft'], viewer['vuid'])
  1220. clip_page_config = self._parse_json(self._search_regex(
  1221. r'window\.vimeo\.clip_page_config\s*=\s*({.+?});',
  1222. webpage, 'clip page config'), video_id)
  1223. config_url = clip_page_config['player']['config_url']
  1224. clip_data = clip_page_config.get('clip') or {}
  1225. else:
  1226. clip_data = data['clipData']
  1227. config_url = clip_data['configUrl']
  1228. config = self._download_json(config_url, video_id)
  1229. info_dict = self._parse_config(config, video_id)
  1230. source_format = self._extract_original_format(
  1231. page_url + '/action', video_id)
  1232. if source_format:
  1233. info_dict['formats'].append(source_format)
  1234. info_dict['description'] = clean_html(clip_data.get('description'))
  1235. return info_dict
  1236. class VimeoWatchLaterIE(VimeoChannelIE): # XXX: Do not subclass from concrete IE
  1237. IE_NAME = 'vimeo:watchlater'
  1238. IE_DESC = 'Vimeo watch later list, ":vimeowatchlater" keyword (requires authentication)'
  1239. _VALID_URL = r'https://vimeo\.com/(?:home/)?watchlater|:vimeowatchlater'
  1240. _TITLE = 'Watch Later'
  1241. _LOGIN_REQUIRED = True
  1242. _TESTS = [{
  1243. 'url': 'https://vimeo.com/watchlater',
  1244. 'only_matching': True,
  1245. }]
  1246. def _page_url(self, base_url, pagenum):
  1247. url = f'{base_url}/page:{pagenum}/'
  1248. request = Request(url)
  1249. # Set the header to get a partial html page with the ids,
  1250. # the normal page doesn't contain them.
  1251. request.headers['X-Requested-With'] = 'XMLHttpRequest'
  1252. return request
  1253. def _real_extract(self, url):
  1254. return self._extract_videos('watchlater', 'https://vimeo.com/watchlater')
  1255. class VimeoLikesIE(VimeoChannelIE): # XXX: Do not subclass from concrete IE
  1256. _VALID_URL = r'https://(?:www\.)?vimeo\.com/(?P<id>[^/]+)/likes/?(?:$|[?#]|sort:)'
  1257. IE_NAME = 'vimeo:likes'
  1258. IE_DESC = 'Vimeo user likes'
  1259. _TESTS = [{
  1260. 'url': 'https://vimeo.com/user755559/likes/',
  1261. 'playlist_mincount': 293,
  1262. 'info_dict': {
  1263. 'id': 'user755559',
  1264. 'title': 'urza’s Likes',
  1265. },
  1266. }, {
  1267. 'url': 'https://vimeo.com/stormlapse/likes',
  1268. 'only_matching': True,
  1269. }]
  1270. def _page_url(self, base_url, pagenum):
  1271. return f'{base_url}/page:{pagenum}/'
  1272. def _real_extract(self, url):
  1273. user_id = self._match_id(url)
  1274. return self._extract_videos(user_id, f'https://vimeo.com/{user_id}/likes')
  1275. class VHXEmbedIE(VimeoBaseInfoExtractor):
  1276. IE_NAME = 'vhx:embed'
  1277. _VALID_URL = r'https?://embed\.vhx\.tv/videos/(?P<id>\d+)'
  1278. _EMBED_REGEX = [r'<iframe[^>]+src="(?P<url>https?://embed\.vhx\.tv/videos/\d+[^"]*)"']
  1279. @classmethod
  1280. def _extract_embed_urls(cls, url, webpage):
  1281. for embed_url in super()._extract_embed_urls(url, webpage):
  1282. yield cls._smuggle_referrer(embed_url, url)
  1283. def _real_extract(self, url):
  1284. video_id = self._match_id(url)
  1285. url, _, headers = self._unsmuggle_headers(url)
  1286. webpage = self._download_webpage(url, video_id, headers=headers)
  1287. config_url = self._parse_json(self._search_regex(
  1288. r'window\.OTTData\s*=\s*({.+})', webpage,
  1289. 'ott data'), video_id, js_to_json)['config_url']
  1290. config = self._download_json(config_url, video_id)
  1291. info = self._parse_config(config, video_id)
  1292. info['id'] = video_id
  1293. return info
  1294. class VimeoProIE(VimeoBaseInfoExtractor):
  1295. IE_NAME = 'vimeo:pro'
  1296. _VALID_URL = r'https?://(?:www\.)?vimeopro\.com/[^/?#]+/(?P<slug>[^/?#]+)(?:(?:/videos?/(?P<id>[0-9]+)))?'
  1297. _TESTS = [{
  1298. # Vimeo URL derived from video_id
  1299. 'url': 'http://vimeopro.com/openstreetmapus/state-of-the-map-us-2013/video/68093876',
  1300. 'md5': '3b5ca6aa22b60dfeeadf50b72e44ed82',
  1301. 'note': 'Vimeo Pro video (#1197)',
  1302. 'info_dict': {
  1303. 'id': '68093876',
  1304. 'ext': 'mp4',
  1305. 'uploader_url': r're:https?://(?:www\.)?vimeo\.com/openstreetmapus',
  1306. 'uploader_id': 'openstreetmapus',
  1307. 'uploader': 'OpenStreetMap US',
  1308. 'title': 'Andy Allan - Putting the Carto into OpenStreetMap Cartography',
  1309. 'description': 'md5:2c362968038d4499f4d79f88458590c1',
  1310. 'duration': 1595,
  1311. 'upload_date': '20130610',
  1312. 'timestamp': 1370893156,
  1313. 'license': 'by',
  1314. 'thumbnail': 'https://i.vimeocdn.com/video/440260469-19b0d92fca3bd84066623b53f1eb8aaa3980c6c809e2d67b6b39ab7b4a77a344-d_960',
  1315. 'view_count': int,
  1316. 'comment_count': int,
  1317. 'like_count': int,
  1318. 'tags': 'count:1',
  1319. },
  1320. 'params': {
  1321. 'format': 'best[protocol=https]',
  1322. },
  1323. }, {
  1324. # password-protected VimeoPro page with Vimeo player embed
  1325. 'url': 'https://vimeopro.com/cadfem/simulation-conference-mechanische-systeme-in-perfektion',
  1326. 'info_dict': {
  1327. 'id': '764543723',
  1328. 'ext': 'mp4',
  1329. 'title': 'Mechanische Systeme in Perfektion: Realität erfassen, Innovation treiben',
  1330. 'thumbnail': 'https://i.vimeocdn.com/video/1543784598-a1a750494a485e601110136b9fe11e28c2131942452b3a5d30391cb3800ca8fd-d_1280',
  1331. 'description': 'md5:2a9d195cd1b0f6f79827107dc88c2420',
  1332. 'uploader': 'CADFEM',
  1333. 'uploader_id': 'cadfem',
  1334. 'uploader_url': 'https://vimeo.com/cadfem',
  1335. 'duration': 12505,
  1336. 'chapters': 'count:10',
  1337. },
  1338. 'params': {
  1339. 'videopassword': 'Conference2022',
  1340. 'skip_download': True,
  1341. },
  1342. }]
  1343. def _real_extract(self, url):
  1344. display_id, video_id = self._match_valid_url(url).group('slug', 'id')
  1345. if video_id:
  1346. display_id = video_id
  1347. webpage = self._download_webpage(url, display_id)
  1348. password_form = self._search_regex(
  1349. r'(?is)<form[^>]+?method=["\']post["\'][^>]*>(.+?password.+?)</form>',
  1350. webpage, 'password form', default=None)
  1351. if password_form:
  1352. try:
  1353. webpage = self._download_webpage(url, display_id, data=urlencode_postdata({
  1354. 'password': self._get_video_password(),
  1355. **self._hidden_inputs(password_form),
  1356. }), note='Logging in with video password')
  1357. except ExtractorError as e:
  1358. if isinstance(e.cause, HTTPError) and e.cause.status == 418:
  1359. raise ExtractorError('Wrong video password', expected=True)
  1360. raise
  1361. description = None
  1362. # even if we have video_id, some videos require player URL with portfolio_id query param
  1363. # https://github.com/ytdl-org/youtube-dl/issues/20070
  1364. vimeo_url = VimeoIE._extract_url(url, webpage)
  1365. if vimeo_url:
  1366. description = self._html_search_meta('description', webpage, default=None)
  1367. elif video_id:
  1368. vimeo_url = f'https://vimeo.com/{video_id}'
  1369. else:
  1370. raise ExtractorError(
  1371. 'No Vimeo embed or video ID could be found in VimeoPro page', expected=True)
  1372. return self.url_result(vimeo_url, VimeoIE, video_id, url_transparent=True,
  1373. description=description)