external.py 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464
  1. from __future__ import unicode_literals
  2. import os.path
  3. import re
  4. import subprocess
  5. import sys
  6. import time
  7. try:
  8. from Crypto.Cipher import AES
  9. can_decrypt_frag = True
  10. except ImportError:
  11. can_decrypt_frag = False
  12. from .common import FileDownloader
  13. from ..compat import (
  14. compat_setenv,
  15. compat_str,
  16. )
  17. from ..postprocessor.ffmpeg import FFmpegPostProcessor, EXT_TO_OUT_FORMATS
  18. from ..utils import (
  19. cli_option,
  20. cli_valueless_option,
  21. cli_bool_option,
  22. cli_configuration_args,
  23. encodeFilename,
  24. error_to_compat_str,
  25. encodeArgument,
  26. handle_youtubedl_headers,
  27. check_executable,
  28. is_outdated_version,
  29. process_communicate_or_kill,
  30. sanitized_Request,
  31. sanitize_open,
  32. )
  33. class ExternalFD(FileDownloader):
  34. SUPPORTED_PROTOCOLS = ('http', 'https', 'ftp', 'ftps')
  35. def real_download(self, filename, info_dict):
  36. self.report_destination(filename)
  37. tmpfilename = self.temp_name(filename)
  38. try:
  39. started = time.time()
  40. retval = self._call_downloader(tmpfilename, info_dict)
  41. except KeyboardInterrupt:
  42. if not info_dict.get('is_live'):
  43. raise
  44. # Live stream downloading cancellation should be considered as
  45. # correct and expected termination thus all postprocessing
  46. # should take place
  47. retval = 0
  48. self.to_screen('[%s] Interrupted by user' % self.get_basename())
  49. if retval == 0:
  50. status = {
  51. 'filename': filename,
  52. 'status': 'finished',
  53. 'elapsed': time.time() - started,
  54. }
  55. if filename != '-':
  56. fsize = os.path.getsize(encodeFilename(tmpfilename))
  57. self.to_screen('\r[%s] Downloaded %s bytes' % (self.get_basename(), fsize))
  58. self.try_rename(tmpfilename, filename)
  59. status.update({
  60. 'downloaded_bytes': fsize,
  61. 'total_bytes': fsize,
  62. })
  63. self._hook_progress(status)
  64. return True
  65. else:
  66. self.to_stderr('\n')
  67. self.report_error('%s exited with code %d' % (
  68. self.get_basename(), retval))
  69. return False
  70. @classmethod
  71. def get_basename(cls):
  72. return cls.__name__[:-2].lower()
  73. @property
  74. def exe(self):
  75. return self.params.get('external_downloader')
  76. @classmethod
  77. def available(cls, path=None):
  78. return check_executable(path or cls.get_basename(), [cls.AVAILABLE_OPT])
  79. @classmethod
  80. def supports(cls, info_dict):
  81. return info_dict['protocol'] in cls.SUPPORTED_PROTOCOLS
  82. @classmethod
  83. def can_download(cls, info_dict, path=None):
  84. return cls.available(path) and cls.supports(info_dict)
  85. def _option(self, command_option, param):
  86. return cli_option(self.params, command_option, param)
  87. def _bool_option(self, command_option, param, true_value='true', false_value='false', separator=None):
  88. return cli_bool_option(self.params, command_option, param, true_value, false_value, separator)
  89. def _valueless_option(self, command_option, param, expected_value=True):
  90. return cli_valueless_option(self.params, command_option, param, expected_value)
  91. def _configuration_args(self, *args, **kwargs):
  92. return cli_configuration_args(
  93. self.params.get('external_downloader_args'),
  94. self.get_basename(), *args, **kwargs)
  95. def _call_downloader(self, tmpfilename, info_dict):
  96. """ Either overwrite this or implement _make_cmd """
  97. cmd = [encodeArgument(a) for a in self._make_cmd(tmpfilename, info_dict)]
  98. self._debug_cmd(cmd)
  99. p = subprocess.Popen(
  100. cmd, stderr=subprocess.PIPE)
  101. _, stderr = process_communicate_or_kill(p)
  102. if p.returncode != 0:
  103. self.to_stderr(stderr.decode('utf-8', 'replace'))
  104. if 'fragments' in info_dict:
  105. file_list = []
  106. dest, _ = sanitize_open(tmpfilename, 'wb')
  107. for i, fragment in enumerate(info_dict['fragments']):
  108. file = '%s-Frag%d' % (tmpfilename, i)
  109. decrypt_info = fragment.get('decrypt_info')
  110. src, _ = sanitize_open(file, 'rb')
  111. if decrypt_info:
  112. if decrypt_info['METHOD'] == 'AES-128':
  113. iv = decrypt_info.get('IV')
  114. decrypt_info['KEY'] = decrypt_info.get('KEY') or self.ydl.urlopen(
  115. self._prepare_url(info_dict, info_dict.get('_decryption_key_url') or decrypt_info['URI'])).read()
  116. encrypted_data = src.read()
  117. decrypted_data = AES.new(
  118. decrypt_info['KEY'], AES.MODE_CBC, iv).decrypt(encrypted_data)
  119. dest.write(decrypted_data)
  120. else:
  121. fragment_data = src.read()
  122. dest.write(fragment_data)
  123. else:
  124. fragment_data = src.read()
  125. dest.write(fragment_data)
  126. src.close()
  127. file_list.append(file)
  128. dest.close()
  129. if not self.params.get('keep_fragments', False):
  130. for file_path in file_list:
  131. try:
  132. os.remove(file_path)
  133. except OSError as ose:
  134. self.report_error("Unable to delete file %s; %s" % (file_path, error_to_compat_str(ose)))
  135. try:
  136. file_path = '%s.frag.urls' % tmpfilename
  137. os.remove(file_path)
  138. except OSError as ose:
  139. self.report_error("Unable to delete file %s; %s" % (file_path, error_to_compat_str(ose)))
  140. return p.returncode
  141. def _prepare_url(self, info_dict, url):
  142. headers = info_dict.get('http_headers')
  143. return sanitized_Request(url, None, headers) if headers else url
  144. class CurlFD(ExternalFD):
  145. AVAILABLE_OPT = '-V'
  146. def _make_cmd(self, tmpfilename, info_dict):
  147. cmd = [self.exe, '--location', '-o', tmpfilename]
  148. if info_dict.get('http_headers') is not None:
  149. for key, val in info_dict['http_headers'].items():
  150. cmd += ['--header', '%s: %s' % (key, val)]
  151. cmd += self._bool_option('--continue-at', 'continuedl', '-', '0')
  152. cmd += self._valueless_option('--silent', 'noprogress')
  153. cmd += self._valueless_option('--verbose', 'verbose')
  154. cmd += self._option('--limit-rate', 'ratelimit')
  155. retry = self._option('--retry', 'retries')
  156. if len(retry) == 2:
  157. if retry[1] in ('inf', 'infinite'):
  158. retry[1] = '2147483647'
  159. cmd += retry
  160. cmd += self._option('--max-filesize', 'max_filesize')
  161. cmd += self._option('--interface', 'source_address')
  162. cmd += self._option('--proxy', 'proxy')
  163. cmd += self._valueless_option('--insecure', 'nocheckcertificate')
  164. cmd += self._configuration_args()
  165. cmd += ['--', info_dict['url']]
  166. return cmd
  167. def _call_downloader(self, tmpfilename, info_dict):
  168. cmd = [encodeArgument(a) for a in self._make_cmd(tmpfilename, info_dict)]
  169. self._debug_cmd(cmd)
  170. # curl writes the progress to stderr so don't capture it.
  171. p = subprocess.Popen(cmd)
  172. process_communicate_or_kill(p)
  173. return p.returncode
  174. class AxelFD(ExternalFD):
  175. AVAILABLE_OPT = '-V'
  176. def _make_cmd(self, tmpfilename, info_dict):
  177. cmd = [self.exe, '-o', tmpfilename]
  178. if info_dict.get('http_headers') is not None:
  179. for key, val in info_dict['http_headers'].items():
  180. cmd += ['-H', '%s: %s' % (key, val)]
  181. cmd += self._configuration_args()
  182. cmd += ['--', info_dict['url']]
  183. return cmd
  184. class WgetFD(ExternalFD):
  185. AVAILABLE_OPT = '--version'
  186. def _make_cmd(self, tmpfilename, info_dict):
  187. cmd = [self.exe, '-O', tmpfilename, '-nv', '--no-cookies']
  188. if info_dict.get('http_headers') is not None:
  189. for key, val in info_dict['http_headers'].items():
  190. cmd += ['--header', '%s: %s' % (key, val)]
  191. cmd += self._option('--limit-rate', 'ratelimit')
  192. retry = self._option('--tries', 'retries')
  193. if len(retry) == 2:
  194. if retry[1] in ('inf', 'infinite'):
  195. retry[1] = '0'
  196. cmd += retry
  197. cmd += self._option('--bind-address', 'source_address')
  198. cmd += self._option('--proxy', 'proxy')
  199. cmd += self._valueless_option('--no-check-certificate', 'nocheckcertificate')
  200. cmd += self._configuration_args()
  201. cmd += ['--', info_dict['url']]
  202. return cmd
  203. class Aria2cFD(ExternalFD):
  204. AVAILABLE_OPT = '-v'
  205. SUPPORTED_PROTOCOLS = ('http', 'https', 'ftp', 'ftps', 'frag_urls')
  206. @staticmethod
  207. def supports_manifest(manifest):
  208. UNSUPPORTED_FEATURES = [
  209. r'#EXT-X-BYTERANGE', # playlists composed of byte ranges of media files [1]
  210. # 1. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.2.2
  211. ]
  212. check_results = (not re.search(feature, manifest) for feature in UNSUPPORTED_FEATURES)
  213. return all(check_results)
  214. def _make_cmd(self, tmpfilename, info_dict):
  215. cmd = [self.exe, '-c']
  216. dn = os.path.dirname(tmpfilename)
  217. if 'fragments' not in info_dict:
  218. cmd += ['--out', os.path.basename(tmpfilename)]
  219. verbose_level_args = ['--console-log-level=warn', '--summary-interval=0']
  220. cmd += self._configuration_args(['--file-allocation=none', '-x16', '-j16', '-s16'] + verbose_level_args)
  221. if dn:
  222. cmd += ['--dir', dn]
  223. if info_dict.get('http_headers') is not None:
  224. for key, val in info_dict['http_headers'].items():
  225. cmd += ['--header', '%s: %s' % (key, val)]
  226. cmd += self._option('--interface', 'source_address')
  227. cmd += self._option('--all-proxy', 'proxy')
  228. cmd += self._bool_option('--check-certificate', 'nocheckcertificate', 'false', 'true', '=')
  229. cmd += self._bool_option('--remote-time', 'updatetime', 'true', 'false', '=')
  230. cmd += ['--auto-file-renaming=false']
  231. if 'fragments' in info_dict:
  232. cmd += verbose_level_args
  233. cmd += ['--uri-selector', 'inorder', '--download-result=hide']
  234. url_list_file = '%s.frag.urls' % tmpfilename
  235. url_list = []
  236. for i, fragment in enumerate(info_dict['fragments']):
  237. tmpsegmentname = '%s-Frag%d' % (os.path.basename(tmpfilename), i)
  238. url_list.append('%s\n\tout=%s' % (fragment['url'], tmpsegmentname))
  239. stream, _ = sanitize_open(url_list_file, 'wb')
  240. stream.write('\n'.join(url_list).encode('utf-8'))
  241. stream.close()
  242. cmd += ['-i', url_list_file]
  243. else:
  244. cmd += ['--', info_dict['url']]
  245. return cmd
  246. class HttpieFD(ExternalFD):
  247. @classmethod
  248. def available(cls, path=None):
  249. return check_executable(path or 'http', ['--version'])
  250. def _make_cmd(self, tmpfilename, info_dict):
  251. cmd = ['http', '--download', '--output', tmpfilename, info_dict['url']]
  252. if info_dict.get('http_headers') is not None:
  253. for key, val in info_dict['http_headers'].items():
  254. cmd += ['%s:%s' % (key, val)]
  255. return cmd
  256. class FFmpegFD(ExternalFD):
  257. SUPPORTED_PROTOCOLS = ('http', 'https', 'ftp', 'ftps', 'm3u8', 'rtsp', 'rtmp', 'mms')
  258. @classmethod
  259. def available(cls, path=None): # path is ignored for ffmpeg
  260. return FFmpegPostProcessor().available
  261. def _call_downloader(self, tmpfilename, info_dict):
  262. url = info_dict['url']
  263. ffpp = FFmpegPostProcessor(downloader=self)
  264. if not ffpp.available:
  265. self.report_error('m3u8 download detected but ffmpeg could not be found. Please install')
  266. return False
  267. ffpp.check_version()
  268. args = [ffpp.executable, '-y']
  269. for log_level in ('quiet', 'verbose'):
  270. if self.params.get(log_level, False):
  271. args += ['-loglevel', log_level]
  272. break
  273. seekable = info_dict.get('_seekable')
  274. if seekable is not None:
  275. # setting -seekable prevents ffmpeg from guessing if the server
  276. # supports seeking(by adding the header `Range: bytes=0-`), which
  277. # can cause problems in some cases
  278. # https://github.com/ytdl-org/youtube-dl/issues/11800#issuecomment-275037127
  279. # http://trac.ffmpeg.org/ticket/6125#comment:10
  280. args += ['-seekable', '1' if seekable else '0']
  281. args += self._configuration_args()
  282. # start_time = info_dict.get('start_time') or 0
  283. # if start_time:
  284. # args += ['-ss', compat_str(start_time)]
  285. # end_time = info_dict.get('end_time')
  286. # if end_time:
  287. # args += ['-t', compat_str(end_time - start_time)]
  288. if info_dict.get('http_headers') is not None and re.match(r'^https?://', url):
  289. # Trailing \r\n after each HTTP header is important to prevent warning from ffmpeg/avconv:
  290. # [http @ 00000000003d2fa0] No trailing CRLF found in HTTP header.
  291. headers = handle_youtubedl_headers(info_dict['http_headers'])
  292. args += [
  293. '-headers',
  294. ''.join('%s: %s\r\n' % (key, val) for key, val in headers.items())]
  295. env = None
  296. proxy = self.params.get('proxy')
  297. if proxy:
  298. if not re.match(r'^[\da-zA-Z]+://', proxy):
  299. proxy = 'http://%s' % proxy
  300. if proxy.startswith('socks'):
  301. self.report_warning(
  302. '%s does not support SOCKS proxies. Downloading is likely to fail. '
  303. 'Consider adding --hls-prefer-native to your command.' % self.get_basename())
  304. # Since December 2015 ffmpeg supports -http_proxy option (see
  305. # http://git.videolan.org/?p=ffmpeg.git;a=commit;h=b4eb1f29ebddd60c41a2eb39f5af701e38e0d3fd)
  306. # We could switch to the following code if we are able to detect version properly
  307. # args += ['-http_proxy', proxy]
  308. env = os.environ.copy()
  309. compat_setenv('HTTP_PROXY', proxy, env=env)
  310. compat_setenv('http_proxy', proxy, env=env)
  311. protocol = info_dict.get('protocol')
  312. if protocol == 'rtmp':
  313. player_url = info_dict.get('player_url')
  314. page_url = info_dict.get('page_url')
  315. app = info_dict.get('app')
  316. play_path = info_dict.get('play_path')
  317. tc_url = info_dict.get('tc_url')
  318. flash_version = info_dict.get('flash_version')
  319. live = info_dict.get('rtmp_live', False)
  320. conn = info_dict.get('rtmp_conn')
  321. if player_url is not None:
  322. args += ['-rtmp_swfverify', player_url]
  323. if page_url is not None:
  324. args += ['-rtmp_pageurl', page_url]
  325. if app is not None:
  326. args += ['-rtmp_app', app]
  327. if play_path is not None:
  328. args += ['-rtmp_playpath', play_path]
  329. if tc_url is not None:
  330. args += ['-rtmp_tcurl', tc_url]
  331. if flash_version is not None:
  332. args += ['-rtmp_flashver', flash_version]
  333. if live:
  334. args += ['-rtmp_live', 'live']
  335. if isinstance(conn, list):
  336. for entry in conn:
  337. args += ['-rtmp_conn', entry]
  338. elif isinstance(conn, compat_str):
  339. args += ['-rtmp_conn', conn]
  340. args += ['-i', url, '-c', 'copy']
  341. if self.params.get('test', False):
  342. args += ['-fs', compat_str(self._TEST_FILE_SIZE)]
  343. if protocol in ('m3u8', 'm3u8_native'):
  344. use_mpegts = (tmpfilename == '-') or self.params.get('hls_use_mpegts')
  345. if use_mpegts is None:
  346. use_mpegts = info_dict.get('is_live')
  347. if use_mpegts:
  348. args += ['-f', 'mpegts']
  349. else:
  350. args += ['-f', 'mp4']
  351. if (ffpp.basename == 'ffmpeg' and is_outdated_version(ffpp._versions['ffmpeg'], '3.2', False)) and (not info_dict.get('acodec') or info_dict['acodec'].split('.')[0] in ('aac', 'mp4a')):
  352. args += ['-bsf:a', 'aac_adtstoasc']
  353. elif protocol == 'rtmp':
  354. args += ['-f', 'flv']
  355. else:
  356. args += ['-f', EXT_TO_OUT_FORMATS.get(info_dict['ext'], info_dict['ext'])]
  357. args = [encodeArgument(opt) for opt in args]
  358. args.append(encodeFilename(ffpp._ffmpeg_filename_argument(tmpfilename), True))
  359. self._debug_cmd(args)
  360. proc = subprocess.Popen(args, stdin=subprocess.PIPE, env=env)
  361. try:
  362. retval = proc.wait()
  363. except BaseException as e:
  364. # subprocces.run would send the SIGKILL signal to ffmpeg and the
  365. # mp4 file couldn't be played, but if we ask ffmpeg to quit it
  366. # produces a file that is playable (this is mostly useful for live
  367. # streams). Note that Windows is not affected and produces playable
  368. # files (see https://github.com/ytdl-org/youtube-dl/issues/8300).
  369. if isinstance(e, KeyboardInterrupt) and sys.platform != 'win32':
  370. process_communicate_or_kill(proc, b'q')
  371. else:
  372. proc.kill()
  373. proc.wait()
  374. raise
  375. return retval
  376. class AVconvFD(FFmpegFD):
  377. pass
  378. _BY_NAME = dict(
  379. (klass.get_basename(), klass)
  380. for name, klass in globals().items()
  381. if name.endswith('FD') and name != 'ExternalFD'
  382. )
  383. def list_external_downloaders():
  384. return sorted(_BY_NAME.keys())
  385. def get_external_downloader(external_downloader):
  386. """ Given the name of the executable, see whether we support the given
  387. downloader . """
  388. # Drop .exe extension on Windows
  389. bn = os.path.splitext(os.path.basename(external_downloader))[0]
  390. return _BY_NAME[bn]