test_networking.py 89 KB


  1. #!/usr/bin/env python3
  2. # Allow direct execution
  3. import os
  4. import sys
  5. import pytest
  6. from yt_dlp.networking.common import Features, DEFAULT_TIMEOUT
  7. sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
  8. import gzip
  9. import http.client
  10. import http.cookiejar
  11. import http.server
  12. import io
  13. import logging
  14. import pathlib
  15. import random
  16. import ssl
  17. import tempfile
  18. import threading
  19. import time
  20. import urllib.error
  21. import urllib.request
  22. import warnings
  23. import zlib
  24. from email.message import Message
  25. from http.cookiejar import CookieJar
  26. from test.helper import (
  27. FakeYDL,
  28. http_server_port,
  29. validate_and_send,
  30. verify_address_availability,
  31. )
  32. from yt_dlp.cookies import YoutubeDLCookieJar
  33. from yt_dlp.dependencies import brotli, curl_cffi, requests, urllib3
  34. from yt_dlp.networking import (
  35. HEADRequest,
  36. PUTRequest,
  37. Request,
  38. RequestDirector,
  39. RequestHandler,
  40. Response,
  41. )
  42. from yt_dlp.networking._urllib import UrllibRH
  43. from yt_dlp.networking.exceptions import (
  44. CertificateVerifyError,
  45. HTTPError,
  46. IncompleteRead,
  47. NoSupportingHandlers,
  48. ProxyError,
  49. RequestError,
  50. SSLError,
  51. TransportError,
  52. UnsupportedRequest,
  53. )
  54. from yt_dlp.networking.impersonate import (
  55. ImpersonateRequestHandler,
  56. ImpersonateTarget,
  57. )
  58. from yt_dlp.utils import YoutubeDLError
  59. from yt_dlp.utils._utils import _YDLLogger as FakeLogger
  60. from yt_dlp.utils.networking import HTTPHeaderDict, std_headers
  61. TEST_DIR = os.path.dirname(os.path.abspath(__file__))
  62. class HTTPTestRequestHandler(http.server.BaseHTTPRequestHandler):
  63. protocol_version = 'HTTP/1.1'
  64. default_request_version = 'HTTP/1.1'
  65. def log_message(self, format, *args):
  66. pass
  67. def _headers(self):
  68. payload = str(self.headers).encode()
  69. self.send_response(200)
  70. self.send_header('Content-Type', 'application/json')
  71. self.send_header('Content-Length', str(len(payload)))
  72. self.end_headers()
  73. self.wfile.write(payload)
  74. def _redirect(self):
  75. self.send_response(int(self.path[len('/redirect_'):]))
  76. self.send_header('Location', '/method')
  77. self.send_header('Content-Length', '0')
  78. self.end_headers()
  79. def _method(self, method, payload=None):
  80. self.send_response(200)
  81. self.send_header('Content-Length', str(len(payload or '')))
  82. self.send_header('Method', method)
  83. self.end_headers()
  84. if payload:
  85. self.wfile.write(payload)
  86. def _status(self, status):
  87. payload = f'<html>{status} NOT FOUND</html>'.encode()
  88. self.send_response(int(status))
  89. self.send_header('Content-Type', 'text/html; charset=utf-8')
  90. self.send_header('Content-Length', str(len(payload)))
  91. self.end_headers()
  92. self.wfile.write(payload)
  93. def _read_data(self):
  94. if 'Content-Length' in self.headers:
  95. return self.rfile.read(int(self.headers['Content-Length']))
  96. else:
  97. return b''
  98. def do_POST(self):
  99. data = self._read_data() + str(self.headers).encode()
  100. if self.path.startswith('/redirect_'):
  101. self._redirect()
  102. elif self.path.startswith('/method'):
  103. self._method('POST', data)
  104. elif self.path.startswith('/headers'):
  105. self._headers()
  106. else:
  107. self._status(404)
  108. def do_HEAD(self):
  109. if self.path.startswith('/redirect_'):
  110. self._redirect()
  111. elif self.path.startswith('/method'):
  112. self._method('HEAD')
  113. else:
  114. self._status(404)
  115. def do_PUT(self):
  116. data = self._read_data() + str(self.headers).encode()
  117. if self.path.startswith('/redirect_'):
  118. self._redirect()
  119. elif self.path.startswith('/method'):
  120. self._method('PUT', data)
  121. else:
  122. self._status(404)
  123. def do_GET(self):
  124. if self.path == '/video.html':
  125. payload = b'<html><video src="/vid.mp4" /></html>'
  126. self.send_response(200)
  127. self.send_header('Content-Type', 'text/html; charset=utf-8')
  128. self.send_header('Content-Length', str(len(payload)))
  129. self.end_headers()
  130. self.wfile.write(payload)
  131. elif self.path == '/vid.mp4':
  132. payload = b'\x00\x00\x00\x00\x20\x66\x74[video]'
  133. self.send_response(200)
  134. self.send_header('Content-Type', 'video/mp4')
  135. self.send_header('Content-Length', str(len(payload)))
  136. self.end_headers()
  137. self.wfile.write(payload)
  138. elif self.path == '/%E4%B8%AD%E6%96%87.html':
  139. payload = b'<html><video src="/vid.mp4" /></html>'
  140. self.send_response(200)
  141. self.send_header('Content-Type', 'text/html; charset=utf-8')
  142. self.send_header('Content-Length', str(len(payload)))
  143. self.end_headers()
  144. self.wfile.write(payload)
  145. elif self.path == '/%c7%9f':
  146. payload = b'<html><video src="/vid.mp4" /></html>'
  147. self.send_response(200)
  148. self.send_header('Content-Type', 'text/html; charset=utf-8')
  149. self.send_header('Content-Length', str(len(payload)))
  150. self.end_headers()
  151. self.wfile.write(payload)
  152. elif self.path.startswith('/redirect_loop'):
  153. self.send_response(301)
  154. self.send_header('Location', self.path)
  155. self.send_header('Content-Length', '0')
  156. self.end_headers()
  157. elif self.path == '/redirect_dotsegments':
  158. self.send_response(301)
  159. # redirect to /headers but with dot segments before
  160. self.send_header('Location', '/a/b/./../../headers')
  161. self.send_header('Content-Length', '0')
  162. self.end_headers()
  163. elif self.path == '/redirect_dotsegments_absolute':
  164. self.send_response(301)
  165. # redirect to /headers but with dot segments before - absolute url
  166. self.send_header('Location', f'http://127.0.0.1:{http_server_port(self.server)}/a/b/./../../headers')
  167. self.send_header('Content-Length', '0')
  168. self.end_headers()
  169. elif self.path.startswith('/redirect_'):
  170. self._redirect()
  171. elif self.path.startswith('/method'):
  172. self._method('GET', str(self.headers).encode())
  173. elif self.path.startswith('/headers'):
  174. self._headers()
  175. elif self.path.startswith('/308-to-headers'):
  176. self.send_response(308)
  177. # redirect to "localhost" for testing cookie redirection handling
  178. self.send_header('Location', f'http://localhost:{self.connection.getsockname()[1]}/headers')
  179. self.send_header('Content-Length', '0')
  180. self.end_headers()
  181. elif self.path == '/trailing_garbage':
  182. payload = b'<html><video src="/vid.mp4" /></html>'
  183. self.send_response(200)
  184. self.send_header('Content-Type', 'text/html; charset=utf-8')
  185. self.send_header('Content-Encoding', 'gzip')
  186. buf = io.BytesIO()
  187. with gzip.GzipFile(fileobj=buf, mode='wb') as f:
  188. f.write(payload)
  189. compressed = buf.getvalue() + b'trailing garbage'
  190. self.send_header('Content-Length', str(len(compressed)))
  191. self.end_headers()
  192. self.wfile.write(compressed)
  193. elif self.path == '/302-non-ascii-redirect':
  194. new_url = f'http://127.0.0.1:{http_server_port(self.server)}/中文.html'
  195. self.send_response(301)
  196. self.send_header('Location', new_url)
  197. self.send_header('Content-Length', '0')
  198. self.end_headers()
  199. elif self.path == '/content-encoding':
  200. encodings = self.headers.get('ytdl-encoding', '')
  201. payload = b'<html><video src="/vid.mp4" /></html>'
  202. for encoding in filter(None, (e.strip() for e in encodings.split(','))):
  203. if encoding == 'br' and brotli:
  204. payload = brotli.compress(payload)
  205. elif encoding == 'gzip':
  206. buf = io.BytesIO()
  207. with gzip.GzipFile(fileobj=buf, mode='wb') as f:
  208. f.write(payload)
  209. payload = buf.getvalue()
  210. elif encoding == 'deflate':
  211. payload = zlib.compress(payload)
  212. elif encoding == 'unsupported':
  213. payload = b'raw'
  214. break
  215. else:
  216. self._status(415)
  217. return
  218. self.send_response(200)
  219. self.send_header('Content-Encoding', encodings)
  220. self.send_header('Content-Length', str(len(payload)))
  221. self.end_headers()
  222. self.wfile.write(payload)
  223. elif self.path.startswith('/gen_'):
  224. payload = b'<html></html>'
  225. self.send_response(int(self.path[len('/gen_'):]))
  226. self.send_header('Content-Type', 'text/html; charset=utf-8')
  227. self.send_header('Content-Length', str(len(payload)))
  228. self.end_headers()
  229. self.wfile.write(payload)
  230. elif self.path.startswith('/incompleteread'):
  231. payload = b'<html></html>'
  232. self.send_response(200)
  233. self.send_header('Content-Type', 'text/html; charset=utf-8')
  234. self.send_header('Content-Length', '234234')
  235. self.end_headers()
  236. self.wfile.write(payload)
  237. self.finish()
  238. elif self.path.startswith('/timeout_'):
  239. time.sleep(int(self.path[len('/timeout_'):]))
  240. self._headers()
  241. elif self.path == '/source_address':
  242. payload = str(self.client_address[0]).encode()
  243. self.send_response(200)
  244. self.send_header('Content-Type', 'text/html; charset=utf-8')
  245. self.send_header('Content-Length', str(len(payload)))
  246. self.end_headers()
  247. self.wfile.write(payload)
  248. self.finish()
  249. elif self.path == '/get_cookie':
  250. self.send_response(200)
  251. self.send_header('Set-Cookie', 'test=ytdlp; path=/')
  252. self.end_headers()
  253. self.finish()
  254. else:
  255. self._status(404)
  256. def send_header(self, keyword, value):
  257. """
  258. Forcibly allow HTTP server to send non percent-encoded non-ASCII characters in headers.
  259. This is against what is defined in RFC 3986, however we need to test we support this
  260. since some sites incorrectly do this.
  261. """
  262. if keyword.lower() == 'connection':
  263. return super().send_header(keyword, value)
  264. if not hasattr(self, '_headers_buffer'):
  265. self._headers_buffer = []
  266. self._headers_buffer.append(f'{keyword}: {value}\r\n'.encode())
  267. class TestRequestHandlerBase:
  268. @classmethod
  269. def setup_class(cls):
  270. cls.http_httpd = http.server.ThreadingHTTPServer(
  271. ('127.0.0.1', 0), HTTPTestRequestHandler)
  272. cls.http_port = http_server_port(cls.http_httpd)
  273. cls.http_server_thread = threading.Thread(target=cls.http_httpd.serve_forever)
  274. # FIXME: we should probably stop the http server thread after each test
  275. # See: https://github.com/yt-dlp/yt-dlp/pull/7094#discussion_r1199746041
  276. cls.http_server_thread.daemon = True
  277. cls.http_server_thread.start()
  278. # HTTPS server
  279. certfn = os.path.join(TEST_DIR, 'testcert.pem')
  280. cls.https_httpd = http.server.ThreadingHTTPServer(
  281. ('127.0.0.1', 0), HTTPTestRequestHandler)
  282. sslctx = ssl.SSLContext(ssl.PROTOCOL_TLS_SERVER)
  283. sslctx.load_cert_chain(certfn, None)
  284. cls.https_httpd.socket = sslctx.wrap_socket(cls.https_httpd.socket, server_side=True)
  285. cls.https_port = http_server_port(cls.https_httpd)
  286. cls.https_server_thread = threading.Thread(target=cls.https_httpd.serve_forever)
  287. cls.https_server_thread.daemon = True
  288. cls.https_server_thread.start()
  289. @pytest.mark.parametrize('handler', ['Urllib', 'Requests', 'CurlCFFI'], indirect=True)
  290. class TestHTTPRequestHandler(TestRequestHandlerBase):
  291. def test_verify_cert(self, handler):
  292. with handler() as rh:
  293. with pytest.raises(CertificateVerifyError):
  294. validate_and_send(rh, Request(f'https://127.0.0.1:{self.https_port}/headers'))
  295. with handler(verify=False) as rh:
  296. r = validate_and_send(rh, Request(f'https://127.0.0.1:{self.https_port}/headers'))
  297. assert r.status == 200
  298. r.close()
  299. def test_ssl_error(self, handler):
  300. # HTTPS server with too old TLS version
  301. # XXX: is there a better way to test this than to create a new server?
  302. https_httpd = http.server.ThreadingHTTPServer(
  303. ('127.0.0.1', 0), HTTPTestRequestHandler)
  304. sslctx = ssl.SSLContext(ssl.PROTOCOL_TLS_SERVER)
  305. https_httpd.socket = sslctx.wrap_socket(https_httpd.socket, server_side=True)
  306. https_port = http_server_port(https_httpd)
  307. https_server_thread = threading.Thread(target=https_httpd.serve_forever)
  308. https_server_thread.daemon = True
  309. https_server_thread.start()
  310. with handler(verify=False) as rh:
  311. with pytest.raises(SSLError, match=r'(?i)ssl(?:v3|/tls).alert.handshake.failure') as exc_info:
  312. validate_and_send(rh, Request(f'https://127.0.0.1:{https_port}/headers'))
  313. assert not issubclass(exc_info.type, CertificateVerifyError)
  314. @pytest.mark.skip_handler('CurlCFFI', 'legacy_ssl ignored by CurlCFFI')
  315. def test_legacy_ssl_extension(self, handler):
  316. # HTTPS server with old ciphers
  317. # XXX: is there a better way to test this than to create a new server?
  318. https_httpd = http.server.ThreadingHTTPServer(
  319. ('127.0.0.1', 0), HTTPTestRequestHandler)
  320. sslctx = ssl.SSLContext(ssl.PROTOCOL_TLS_SERVER)
  321. sslctx.maximum_version = ssl.TLSVersion.TLSv1_2
  322. sslctx.set_ciphers('SHA1:AESCCM:aDSS:eNULL:aNULL')
  323. sslctx.load_cert_chain(os.path.join(TEST_DIR, 'testcert.pem'), None)
  324. https_httpd.socket = sslctx.wrap_socket(https_httpd.socket, server_side=True)
  325. https_port = http_server_port(https_httpd)
  326. https_server_thread = threading.Thread(target=https_httpd.serve_forever)
  327. https_server_thread.daemon = True
  328. https_server_thread.start()
  329. with handler(verify=False) as rh:
  330. res = validate_and_send(rh, Request(f'https://127.0.0.1:{https_port}/headers', extensions={'legacy_ssl': True}))
  331. assert res.status == 200
  332. res.close()
  333. # Ensure only applies to request extension
  334. with pytest.raises(SSLError):
  335. validate_and_send(rh, Request(f'https://127.0.0.1:{https_port}/headers'))
  336. @pytest.mark.skip_handler('CurlCFFI', 'legacy_ssl ignored by CurlCFFI')
  337. def test_legacy_ssl_support(self, handler):
  338. # HTTPS server with old ciphers
  339. # XXX: is there a better way to test this than to create a new server?
  340. https_httpd = http.server.ThreadingHTTPServer(
  341. ('127.0.0.1', 0), HTTPTestRequestHandler)
  342. sslctx = ssl.SSLContext(ssl.PROTOCOL_TLS_SERVER)
  343. sslctx.maximum_version = ssl.TLSVersion.TLSv1_2
  344. sslctx.set_ciphers('SHA1:AESCCM:aDSS:eNULL:aNULL')
  345. sslctx.load_cert_chain(os.path.join(TEST_DIR, 'testcert.pem'), None)
  346. https_httpd.socket = sslctx.wrap_socket(https_httpd.socket, server_side=True)
  347. https_port = http_server_port(https_httpd)
  348. https_server_thread = threading.Thread(target=https_httpd.serve_forever)
  349. https_server_thread.daemon = True
  350. https_server_thread.start()
  351. with handler(verify=False, legacy_ssl_support=True) as rh:
  352. res = validate_and_send(rh, Request(f'https://127.0.0.1:{https_port}/headers'))
  353. assert res.status == 200
  354. res.close()
  355. def test_percent_encode(self, handler):
  356. with handler() as rh:
  357. # Unicode characters should be encoded with uppercase percent-encoding
  358. res = validate_and_send(rh, Request(f'http://127.0.0.1:{self.http_port}/中文.html'))
  359. assert res.status == 200
  360. res.close()
  361. # don't normalize existing percent encodings
  362. res = validate_and_send(rh, Request(f'http://127.0.0.1:{self.http_port}/%c7%9f'))
  363. assert res.status == 200
  364. res.close()
  365. @pytest.mark.parametrize('path', [
  366. '/a/b/./../../headers',
  367. '/redirect_dotsegments',
  368. # https://github.com/yt-dlp/yt-dlp/issues/9020
  369. '/redirect_dotsegments_absolute',
  370. ])
  371. def test_remove_dot_segments(self, handler, path):
  372. with handler(verbose=True) as rh:
  373. # This isn't a comprehensive test,
  374. # but it should be enough to check whether the handler is removing dot segments in required scenarios
  375. res = validate_and_send(rh, Request(f'http://127.0.0.1:{self.http_port}{path}'))
  376. assert res.status == 200
  377. assert res.url == f'http://127.0.0.1:{self.http_port}/headers'
  378. res.close()
  379. @pytest.mark.skip_handler('CurlCFFI', 'not supported by curl-cffi (non-standard)')
  380. def test_unicode_path_redirection(self, handler):
  381. with handler() as rh:
  382. r = validate_and_send(rh, Request(f'http://127.0.0.1:{self.http_port}/302-non-ascii-redirect'))
  383. assert r.url == f'http://127.0.0.1:{self.http_port}/%E4%B8%AD%E6%96%87.html'
  384. r.close()
  385. def test_raise_http_error(self, handler):
  386. with handler() as rh:
  387. for bad_status in (400, 500, 599, 302):
  388. with pytest.raises(HTTPError):
  389. validate_and_send(rh, Request(f'http://127.0.0.1:{self.http_port}/gen_{bad_status}'))
  390. # Should not raise an error
  391. validate_and_send(rh, Request(f'http://127.0.0.1:{self.http_port}/gen_200')).close()
  392. def test_response_url(self, handler):
  393. with handler() as rh:
  394. # Response url should be that of the last url in redirect chain
  395. res = validate_and_send(rh, Request(f'http://127.0.0.1:{self.http_port}/redirect_301'))
  396. assert res.url == f'http://127.0.0.1:{self.http_port}/method'
  397. res.close()
  398. res2 = validate_and_send(rh, Request(f'http://127.0.0.1:{self.http_port}/gen_200'))
  399. assert res2.url == f'http://127.0.0.1:{self.http_port}/gen_200'
  400. res2.close()
  401. # Covers some basic cases we expect some level of consistency between request handlers for
  402. @pytest.mark.parametrize('redirect_status,method,expected', [
  403. # A 303 must either use GET or HEAD for subsequent request
  404. (303, 'POST', ('', 'GET', False)),
  405. (303, 'HEAD', ('', 'HEAD', False)),
  406. # 301 and 302 turn POST only into a GET
  407. (301, 'POST', ('', 'GET', False)),
  408. (301, 'HEAD', ('', 'HEAD', False)),
  409. (302, 'POST', ('', 'GET', False)),
  410. (302, 'HEAD', ('', 'HEAD', False)),
  411. # 307 and 308 should not change method
  412. (307, 'POST', ('testdata', 'POST', True)),
  413. (308, 'POST', ('testdata', 'POST', True)),
  414. (307, 'HEAD', ('', 'HEAD', False)),
  415. (308, 'HEAD', ('', 'HEAD', False)),
  416. ])
  417. def test_redirect(self, handler, redirect_status, method, expected):
  418. with handler() as rh:
  419. data = b'testdata' if method == 'POST' else None
  420. headers = {}
  421. if data is not None:
  422. headers['Content-Type'] = 'application/test'
  423. res = validate_and_send(
  424. rh, Request(f'http://127.0.0.1:{self.http_port}/redirect_{redirect_status}', method=method, data=data,
  425. headers=headers))
  426. headers = b''
  427. data_recv = b''
  428. if data is not None:
  429. data_recv += res.read(len(data))
  430. if data_recv != data:
  431. headers += data_recv
  432. data_recv = b''
  433. headers += res.read()
  434. assert expected[0] == data_recv.decode()
  435. assert expected[1] == res.headers.get('method')
  436. assert expected[2] == ('content-length' in headers.decode().lower())
  437. def test_request_cookie_header(self, handler):
  438. # We should accept a Cookie header being passed as in normal headers and handle it appropriately.
  439. with handler() as rh:
  440. # Specified Cookie header should be used
  441. res = validate_and_send(
  442. rh, Request(
  443. f'http://127.0.0.1:{self.http_port}/headers',
  444. headers={'Cookie': 'test=test'})).read().decode()
  445. assert 'cookie: test=test' in res.lower()
  446. # Specified Cookie header should be removed on any redirect
  447. res = validate_and_send(
  448. rh, Request(
  449. f'http://127.0.0.1:{self.http_port}/308-to-headers',
  450. headers={'Cookie': 'test=test2'})).read().decode()
  451. assert 'cookie: test=test2' not in res.lower()
  452. # Specified Cookie header should override global cookiejar for that request
  453. # Whether cookies from the cookiejar is applied on the redirect is considered undefined for now
  454. cookiejar = YoutubeDLCookieJar()
  455. cookiejar.set_cookie(http.cookiejar.Cookie(
  456. version=0, name='test', value='ytdlp', port=None, port_specified=False,
  457. domain='127.0.0.1', domain_specified=True, domain_initial_dot=False, path='/',
  458. path_specified=True, secure=False, expires=None, discard=False, comment=None,
  459. comment_url=None, rest={}))
  460. with handler(cookiejar=cookiejar) as rh:
  461. data = validate_and_send(
  462. rh, Request(f'http://127.0.0.1:{self.http_port}/headers', headers={'cookie': 'test=test3'})).read()
  463. assert b'cookie: test=ytdlp' not in data.lower()
  464. assert b'cookie: test=test3' in data.lower()
  465. def test_redirect_loop(self, handler):
  466. with handler() as rh:
  467. with pytest.raises(HTTPError, match='redirect loop'):
  468. validate_and_send(rh, Request(f'http://127.0.0.1:{self.http_port}/redirect_loop'))
  469. def test_incompleteread(self, handler):
  470. with handler(timeout=2) as rh:
  471. with pytest.raises(IncompleteRead, match='13 bytes read, 234221 more expected'):
  472. validate_and_send(rh, Request(f'http://127.0.0.1:{self.http_port}/incompleteread')).read()
  473. def test_cookies(self, handler):
  474. cookiejar = YoutubeDLCookieJar()
  475. cookiejar.set_cookie(http.cookiejar.Cookie(
  476. 0, 'test', 'ytdlp', None, False, '127.0.0.1', True,
  477. False, '/headers', True, False, None, False, None, None, {}))
  478. with handler(cookiejar=cookiejar) as rh:
  479. data = validate_and_send(rh, Request(f'http://127.0.0.1:{self.http_port}/headers')).read()
  480. assert b'cookie: test=ytdlp' in data.lower()
  481. # Per request
  482. with handler() as rh:
  483. data = validate_and_send(
  484. rh, Request(f'http://127.0.0.1:{self.http_port}/headers', extensions={'cookiejar': cookiejar})).read()
  485. assert b'cookie: test=ytdlp' in data.lower()
  486. def test_cookie_sync_only_cookiejar(self, handler):
  487. # Ensure that cookies are ONLY being handled by the cookiejar
  488. with handler() as rh:
  489. validate_and_send(rh, Request(f'http://127.0.0.1:{self.http_port}/get_cookie', extensions={'cookiejar': YoutubeDLCookieJar()}))
  490. data = validate_and_send(rh, Request(f'http://127.0.0.1:{self.http_port}/headers', extensions={'cookiejar': YoutubeDLCookieJar()})).read()
  491. assert b'cookie: test=ytdlp' not in data.lower()
  492. def test_cookie_sync_delete_cookie(self, handler):
  493. # Ensure that cookies are ONLY being handled by the cookiejar
  494. cookiejar = YoutubeDLCookieJar()
  495. with handler(cookiejar=cookiejar) as rh:
  496. validate_and_send(rh, Request(f'http://127.0.0.1:{self.http_port}/get_cookie'))
  497. data = validate_and_send(rh, Request(f'http://127.0.0.1:{self.http_port}/headers')).read()
  498. assert b'cookie: test=ytdlp' in data.lower()
  499. cookiejar.clear_session_cookies()
  500. data = validate_and_send(rh, Request(f'http://127.0.0.1:{self.http_port}/headers')).read()
  501. assert b'cookie: test=ytdlp' not in data.lower()
  502. def test_headers(self, handler):
  503. with handler(headers=HTTPHeaderDict({'test1': 'test', 'test2': 'test2'})) as rh:
  504. # Global Headers
  505. data = validate_and_send(rh, Request(f'http://127.0.0.1:{self.http_port}/headers')).read().lower()
  506. assert b'test1: test' in data
  507. # Per request headers, merged with global
  508. data = validate_and_send(rh, Request(
  509. f'http://127.0.0.1:{self.http_port}/headers', headers={'test2': 'changed', 'test3': 'test3'})).read().lower()
  510. assert b'test1: test' in data
  511. assert b'test2: changed' in data
  512. assert b'test2: test2' not in data
  513. assert b'test3: test3' in data
  514. def test_read_timeout(self, handler):
  515. with handler() as rh:
  516. # Default timeout is 20 seconds, so this should go through
  517. validate_and_send(
  518. rh, Request(f'http://127.0.0.1:{self.http_port}/timeout_1'))
  519. with handler(timeout=0.1) as rh:
  520. with pytest.raises(TransportError):
  521. validate_and_send(
  522. rh, Request(f'http://127.0.0.1:{self.http_port}/timeout_5'))
  523. # Per request timeout, should override handler timeout
  524. validate_and_send(
  525. rh, Request(f'http://127.0.0.1:{self.http_port}/timeout_1', extensions={'timeout': 4}))
  526. def test_connect_timeout(self, handler):
  527. # nothing should be listening on this port
  528. connect_timeout_url = 'http://10.255.255.255'
  529. with handler(timeout=0.01) as rh, pytest.raises(TransportError):
  530. now = time.time()
  531. validate_and_send(rh, Request(connect_timeout_url))
  532. assert time.time() - now < DEFAULT_TIMEOUT
  533. # Per request timeout, should override handler timeout
  534. request = Request(connect_timeout_url, extensions={'timeout': 0.01})
  535. with handler() as rh, pytest.raises(TransportError):
  536. now = time.time()
  537. validate_and_send(rh, request)
  538. assert time.time() - now < DEFAULT_TIMEOUT
  539. def test_source_address(self, handler):
  540. source_address = f'127.0.0.{random.randint(5, 255)}'
  541. # on some systems these loopback addresses we need for testing may not be available
  542. # see: https://github.com/yt-dlp/yt-dlp/issues/8890
  543. verify_address_availability(source_address)
  544. with handler(source_address=source_address) as rh:
  545. data = validate_and_send(
  546. rh, Request(f'http://127.0.0.1:{self.http_port}/source_address')).read().decode()
  547. assert source_address == data
  548. # Not supported by CurlCFFI
  549. @pytest.mark.skip_handler('CurlCFFI', 'not supported by curl-cffi')
  550. def test_gzip_trailing_garbage(self, handler):
  551. with handler() as rh:
  552. data = validate_and_send(rh, Request(f'http://localhost:{self.http_port}/trailing_garbage')).read().decode()
  553. assert data == '<html><video src="/vid.mp4" /></html>'
  554. @pytest.mark.skip_handler('CurlCFFI', 'not applicable to curl-cffi')
  555. @pytest.mark.skipif(not brotli, reason='brotli support is not installed')
  556. def test_brotli(self, handler):
  557. with handler() as rh:
  558. res = validate_and_send(
  559. rh, Request(
  560. f'http://127.0.0.1:{self.http_port}/content-encoding',
  561. headers={'ytdl-encoding': 'br'}))
  562. assert res.headers.get('Content-Encoding') == 'br'
  563. assert res.read() == b'<html><video src="/vid.mp4" /></html>'
  564. def test_deflate(self, handler):
  565. with handler() as rh:
  566. res = validate_and_send(
  567. rh, Request(
  568. f'http://127.0.0.1:{self.http_port}/content-encoding',
  569. headers={'ytdl-encoding': 'deflate'}))
  570. assert res.headers.get('Content-Encoding') == 'deflate'
  571. assert res.read() == b'<html><video src="/vid.mp4" /></html>'
  572. def test_gzip(self, handler):
  573. with handler() as rh:
  574. res = validate_and_send(
  575. rh, Request(
  576. f'http://127.0.0.1:{self.http_port}/content-encoding',
  577. headers={'ytdl-encoding': 'gzip'}))
  578. assert res.headers.get('Content-Encoding') == 'gzip'
  579. assert res.read() == b'<html><video src="/vid.mp4" /></html>'
  580. def test_multiple_encodings(self, handler):
  581. with handler() as rh:
  582. for pair in ('gzip,deflate', 'deflate, gzip', 'gzip, gzip', 'deflate, deflate'):
  583. res = validate_and_send(
  584. rh, Request(
  585. f'http://127.0.0.1:{self.http_port}/content-encoding',
  586. headers={'ytdl-encoding': pair}))
  587. assert res.headers.get('Content-Encoding') == pair
  588. assert res.read() == b'<html><video src="/vid.mp4" /></html>'
  589. @pytest.mark.skip_handler('CurlCFFI', 'not supported by curl-cffi')
  590. def test_unsupported_encoding(self, handler):
  591. with handler() as rh:
  592. res = validate_and_send(
  593. rh, Request(
  594. f'http://127.0.0.1:{self.http_port}/content-encoding',
  595. headers={'ytdl-encoding': 'unsupported', 'Accept-Encoding': '*'}))
  596. assert res.headers.get('Content-Encoding') == 'unsupported'
  597. assert res.read() == b'raw'
  598. def test_read(self, handler):
  599. with handler() as rh:
  600. res = validate_and_send(
  601. rh, Request(f'http://127.0.0.1:{self.http_port}/headers'))
  602. assert res.readable()
  603. assert res.read(1) == b'H'
  604. assert res.read(3) == b'ost'
  605. assert res.read().decode().endswith('\n\n')
  606. assert res.read() == b''
  607. def test_request_disable_proxy(self, handler):
  608. for proxy_proto in handler._SUPPORTED_PROXY_SCHEMES or ['http']:
  609. # Given the handler is configured with a proxy
  610. with handler(proxies={'http': f'{proxy_proto}://10.255.255.255'}, timeout=5) as rh:
  611. # When a proxy is explicitly set to None for the request
  612. res = validate_and_send(
  613. rh, Request(f'http://127.0.0.1:{self.http_port}/headers', proxies={'http': None}))
  614. # Then no proxy should be used
  615. res.close()
  616. assert res.status == 200
  617. @pytest.mark.skip_handlers_if(
  618. lambda _, handler: Features.NO_PROXY not in handler._SUPPORTED_FEATURES, 'handler does not support NO_PROXY')
  619. def test_noproxy(self, handler):
  620. for proxy_proto in handler._SUPPORTED_PROXY_SCHEMES or ['http']:
  621. # Given the handler is configured with a proxy
  622. with handler(proxies={'http': f'{proxy_proto}://10.255.255.255'}, timeout=5) as rh:
  623. for no_proxy in (f'127.0.0.1:{self.http_port}', '127.0.0.1', 'localhost'):
  624. # When request no proxy includes the request url host
  625. nop_response = validate_and_send(
  626. rh, Request(f'http://127.0.0.1:{self.http_port}/headers', proxies={'no': no_proxy}))
  627. # Then the proxy should not be used
  628. assert nop_response.status == 200
  629. nop_response.close()
  630. @pytest.mark.skip_handlers_if(
  631. lambda _, handler: Features.ALL_PROXY not in handler._SUPPORTED_FEATURES, 'handler does not support ALL_PROXY')
  632. def test_allproxy(self, handler):
  633. # This is a bit of a hacky test, but it should be enough to check whether the handler is using the proxy.
  634. # 0.1s might not be enough of a timeout if proxy is not used in all cases, but should still get failures.
  635. with handler(proxies={'all': 'http://10.255.255.255'}, timeout=0.1) as rh:
  636. with pytest.raises(TransportError):
  637. validate_and_send(rh, Request(f'http://127.0.0.1:{self.http_port}/headers')).close()
  638. with handler(timeout=0.1) as rh:
  639. with pytest.raises(TransportError):
  640. validate_and_send(
  641. rh, Request(
  642. f'http://127.0.0.1:{self.http_port}/headers', proxies={'all': 'http://10.255.255.255'})).close()
  643. @pytest.mark.skip_handlers_if(lambda _, handler: handler not in ['Urllib', 'CurlCFFI'], 'handler does not support keep_header_casing')
  644. def test_keep_header_casing(self, handler):
  645. with handler() as rh:
  646. res = validate_and_send(
  647. rh, Request(
  648. f'http://127.0.0.1:{self.http_port}/headers', headers={'X-test-heaDer': 'test'}, extensions={'keep_header_casing': True})).read().decode()
  649. assert 'X-test-heaDer: test' in res
  650. @pytest.mark.parametrize('handler', ['Urllib', 'Requests', 'CurlCFFI'], indirect=True)
  651. class TestClientCertificate:
  652. @classmethod
  653. def setup_class(cls):
  654. certfn = os.path.join(TEST_DIR, 'testcert.pem')
  655. cls.certdir = os.path.join(TEST_DIR, 'testdata', 'certificate')
  656. cacertfn = os.path.join(cls.certdir, 'ca.crt')
  657. cls.httpd = http.server.ThreadingHTTPServer(('127.0.0.1', 0), HTTPTestRequestHandler)
  658. sslctx = ssl.SSLContext(ssl.PROTOCOL_TLS_SERVER)
  659. sslctx.verify_mode = ssl.CERT_REQUIRED
  660. sslctx.load_verify_locations(cafile=cacertfn)
  661. sslctx.load_cert_chain(certfn, None)
  662. cls.httpd.socket = sslctx.wrap_socket(cls.httpd.socket, server_side=True)
  663. cls.port = http_server_port(cls.httpd)
  664. cls.server_thread = threading.Thread(target=cls.httpd.serve_forever)
  665. cls.server_thread.daemon = True
  666. cls.server_thread.start()
  667. def _run_test(self, handler, **handler_kwargs):
  668. with handler(
  669. # Disable client-side validation of unacceptable self-signed testcert.pem
  670. # The test is of a check on the server side, so unaffected
  671. verify=False,
  672. **handler_kwargs,
  673. ) as rh:
  674. validate_and_send(rh, Request(f'https://127.0.0.1:{self.port}/video.html')).read().decode()
  675. def test_certificate_combined_nopass(self, handler):
  676. self._run_test(handler, client_cert={
  677. 'client_certificate': os.path.join(self.certdir, 'clientwithkey.crt'),
  678. })
  679. def test_certificate_nocombined_nopass(self, handler):
  680. self._run_test(handler, client_cert={
  681. 'client_certificate': os.path.join(self.certdir, 'client.crt'),
  682. 'client_certificate_key': os.path.join(self.certdir, 'client.key'),
  683. })
  684. def test_certificate_combined_pass(self, handler):
  685. self._run_test(handler, client_cert={
  686. 'client_certificate': os.path.join(self.certdir, 'clientwithencryptedkey.crt'),
  687. 'client_certificate_password': 'foobar',
  688. })
  689. def test_certificate_nocombined_pass(self, handler):
  690. self._run_test(handler, client_cert={
  691. 'client_certificate': os.path.join(self.certdir, 'client.crt'),
  692. 'client_certificate_key': os.path.join(self.certdir, 'clientencrypted.key'),
  693. 'client_certificate_password': 'foobar',
  694. })
  695. @pytest.mark.parametrize('handler', ['CurlCFFI'], indirect=True)
  696. class TestHTTPImpersonateRequestHandler(TestRequestHandlerBase):
  697. def test_supported_impersonate_targets(self, handler):
  698. with handler(headers=std_headers) as rh:
  699. # note: this assumes the impersonate request handler supports the impersonate extension
  700. for target in rh.supported_targets:
  701. res = validate_and_send(rh, Request(
  702. f'http://127.0.0.1:{self.http_port}/headers', extensions={'impersonate': target}))
  703. assert res.status == 200
  704. assert std_headers['user-agent'].lower() not in res.read().decode().lower()
  705. def test_response_extensions(self, handler):
  706. with handler() as rh:
  707. for target in rh.supported_targets:
  708. request = Request(
  709. f'http://127.0.0.1:{self.http_port}/gen_200', extensions={'impersonate': target})
  710. res = validate_and_send(rh, request)
  711. assert res.extensions['impersonate'] == rh._get_request_target(request)
  712. def test_http_error_response_extensions(self, handler):
  713. with handler() as rh:
  714. for target in rh.supported_targets:
  715. request = Request(
  716. f'http://127.0.0.1:{self.http_port}/gen_404', extensions={'impersonate': target})
  717. try:
  718. validate_and_send(rh, request)
  719. except HTTPError as e:
  720. res = e.response
  721. assert res.extensions['impersonate'] == rh._get_request_target(request)
  722. class TestRequestHandlerMisc:
  723. """Misc generic tests for request handlers, not related to request or validation testing"""
  724. @pytest.mark.parametrize('handler,logger_name', [
  725. ('Requests', 'urllib3'),
  726. ('Websockets', 'websockets.client'),
  727. ('Websockets', 'websockets.server'),
  728. ], indirect=['handler'])
  729. def test_remove_logging_handler(self, handler, logger_name):
  730. # Ensure any logging handlers, which may contain a YoutubeDL instance,
  731. # are removed when we close the request handler
  732. # See: https://github.com/yt-dlp/yt-dlp/issues/8922
  733. logging_handlers = logging.getLogger(logger_name).handlers
  734. before_count = len(logging_handlers)
  735. rh = handler()
  736. assert len(logging_handlers) == before_count + 1
  737. rh.close()
  738. assert len(logging_handlers) == before_count
  739. def test_wrap_request_errors(self):
  740. class TestRequestHandler(RequestHandler):
  741. def _validate(self, request):
  742. if request.headers.get('x-fail'):
  743. raise UnsupportedRequest('test error')
  744. def _send(self, request: Request):
  745. raise RequestError('test error')
  746. with TestRequestHandler(logger=FakeLogger()) as rh:
  747. with pytest.raises(UnsupportedRequest, match='test error') as exc_info:
  748. rh.validate(Request('http://example.com', headers={'x-fail': '1'}))
  749. assert exc_info.value.handler is rh
  750. with pytest.raises(RequestError, match='test error') as exc_info:
  751. rh.send(Request('http://example.com'))
  752. assert exc_info.value.handler is rh
  753. @pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
  754. class TestUrllibRequestHandler(TestRequestHandlerBase):
  755. def test_file_urls(self, handler):
  756. # See https://github.com/ytdl-org/youtube-dl/issues/8227
  757. tf = tempfile.NamedTemporaryFile(delete=False)
  758. tf.write(b'foobar')
  759. tf.close()
  760. req = Request(pathlib.Path(tf.name).as_uri())
  761. with handler() as rh:
  762. with pytest.raises(UnsupportedRequest):
  763. rh.validate(req)
  764. # Test that urllib never loaded FileHandler
  765. with pytest.raises(TransportError):
  766. rh.send(req)
  767. with handler(enable_file_urls=True) as rh:
  768. res = validate_and_send(rh, req)
  769. assert res.read() == b'foobar'
  770. res.close()
  771. os.unlink(tf.name)
  772. def test_http_error_returns_content(self, handler):
  773. # urllib HTTPError will try close the underlying response if reference to the HTTPError object is lost
  774. def get_response():
  775. with handler() as rh:
  776. # headers url
  777. try:
  778. validate_and_send(rh, Request(f'http://127.0.0.1:{self.http_port}/gen_404'))
  779. except HTTPError as e:
  780. return e.response
  781. assert get_response().read() == b'<html></html>'
  782. def test_verify_cert_error_text(self, handler):
  783. # Check the output of the error message
  784. with handler() as rh:
  785. with pytest.raises(
  786. CertificateVerifyError,
  787. match=r'\[SSL: CERTIFICATE_VERIFY_FAILED\] certificate verify failed: self.signed certificate',
  788. ):
  789. validate_and_send(rh, Request(f'https://127.0.0.1:{self.https_port}/headers'))
  790. @pytest.mark.parametrize('req,match,version_check', [
  791. # https://github.com/python/cpython/blob/987b712b4aeeece336eed24fcc87a950a756c3e2/Lib/http/client.py#L1256
  792. # bpo-39603: Check implemented in 3.7.9+, 3.8.5+
  793. (
  794. Request('http://127.0.0.1', method='GET\n'),
  795. 'method can\'t contain control characters',
  796. lambda v: v < (3, 7, 9) or (3, 8, 0) <= v < (3, 8, 5),
  797. ),
  798. # https://github.com/python/cpython/blob/987b712b4aeeece336eed24fcc87a950a756c3e2/Lib/http/client.py#L1265
  799. # bpo-38576: Check implemented in 3.7.8+, 3.8.3+
  800. (
  801. Request('http://127.0.0. 1', method='GET'),
  802. 'URL can\'t contain control characters',
  803. lambda v: v < (3, 7, 8) or (3, 8, 0) <= v < (3, 8, 3),
  804. ),
  805. # https://github.com/python/cpython/blob/987b712b4aeeece336eed24fcc87a950a756c3e2/Lib/http/client.py#L1288C31-L1288C50
  806. (Request('http://127.0.0.1', headers={'foo\n': 'bar'}), 'Invalid header name', None),
  807. ])
  808. def test_httplib_validation_errors(self, handler, req, match, version_check):
  809. if version_check and version_check(sys.version_info):
  810. pytest.skip(f'Python {sys.version} version does not have the required validation for this test.')
  811. with handler() as rh:
  812. with pytest.raises(RequestError, match=match) as exc_info:
  813. validate_and_send(rh, req)
  814. assert not isinstance(exc_info.value, TransportError)
  815. @pytest.mark.parametrize('handler', ['Requests'], indirect=True)
  816. class TestRequestsRequestHandler(TestRequestHandlerBase):
  817. @pytest.mark.parametrize('raised,expected', [
  818. (lambda: requests.exceptions.ConnectTimeout(), TransportError),
  819. (lambda: requests.exceptions.ReadTimeout(), TransportError),
  820. (lambda: requests.exceptions.Timeout(), TransportError),
  821. (lambda: requests.exceptions.ConnectionError(), TransportError),
  822. (lambda: requests.exceptions.ProxyError(), ProxyError),
  823. (lambda: requests.exceptions.SSLError('12[CERTIFICATE_VERIFY_FAILED]34'), CertificateVerifyError),
  824. (lambda: requests.exceptions.SSLError(), SSLError),
  825. (lambda: requests.exceptions.InvalidURL(), RequestError),
  826. (lambda: requests.exceptions.InvalidHeader(), RequestError),
  827. # catch-all: https://github.com/psf/requests/blob/main/src/requests/adapters.py#L535
  828. (lambda: urllib3.exceptions.HTTPError(), TransportError),
  829. (lambda: requests.exceptions.RequestException(), RequestError),
  830. # (lambda: requests.exceptions.TooManyRedirects(), HTTPError) - Needs a response object
  831. ])
  832. def test_request_error_mapping(self, handler, monkeypatch, raised, expected):
  833. with handler() as rh:
  834. def mock_get_instance(*args, **kwargs):
  835. class MockSession:
  836. def request(self, *args, **kwargs):
  837. raise raised()
  838. return MockSession()
  839. monkeypatch.setattr(rh, '_get_instance', mock_get_instance)
  840. with pytest.raises(expected) as exc_info:
  841. rh.send(Request('http://fake'))
  842. assert exc_info.type is expected
  843. @pytest.mark.parametrize('raised,expected,match', [
  844. (lambda: urllib3.exceptions.SSLError(), SSLError, None),
  845. (lambda: urllib3.exceptions.TimeoutError(), TransportError, None),
  846. (lambda: urllib3.exceptions.ReadTimeoutError(None, None, None), TransportError, None),
  847. (lambda: urllib3.exceptions.ProtocolError(), TransportError, None),
  848. (lambda: urllib3.exceptions.DecodeError(), TransportError, None),
  849. (lambda: urllib3.exceptions.HTTPError(), TransportError, None), # catch-all
  850. (
  851. lambda: urllib3.exceptions.ProtocolError('error', http.client.IncompleteRead(partial=b'abc', expected=4)),
  852. IncompleteRead,
  853. '3 bytes read, 4 more expected',
  854. ),
  855. (
  856. lambda: urllib3.exceptions.ProtocolError('error', urllib3.exceptions.IncompleteRead(partial=3, expected=5)),
  857. IncompleteRead,
  858. '3 bytes read, 5 more expected',
  859. ),
  860. ])
  861. def test_response_error_mapping(self, handler, monkeypatch, raised, expected, match):
  862. from requests.models import Response as RequestsResponse
  863. from urllib3.response import HTTPResponse as Urllib3Response
  864. from yt_dlp.networking._requests import RequestsResponseAdapter
  865. requests_res = RequestsResponse()
  866. requests_res.raw = Urllib3Response(body=b'', status=200)
  867. res = RequestsResponseAdapter(requests_res)
  868. def mock_read(*args, **kwargs):
  869. raise raised()
  870. monkeypatch.setattr(res.fp, 'read', mock_read)
  871. with pytest.raises(expected, match=match) as exc_info:
  872. res.read()
  873. assert exc_info.type is expected
  874. def test_close(self, handler, monkeypatch):
  875. rh = handler()
  876. session = rh._get_instance(cookiejar=rh.cookiejar)
  877. called = False
  878. original_close = session.close
  879. def mock_close(*args, **kwargs):
  880. nonlocal called
  881. called = True
  882. return original_close(*args, **kwargs)
  883. monkeypatch.setattr(session, 'close', mock_close)
  884. rh.close()
  885. assert called
  886. @pytest.mark.parametrize('handler', ['CurlCFFI'], indirect=True)
  887. class TestCurlCFFIRequestHandler(TestRequestHandlerBase):
  888. @pytest.mark.parametrize('params,extensions', [
  889. ({'impersonate': ImpersonateTarget('chrome', '110')}, {}),
  890. ({'impersonate': ImpersonateTarget('chrome', '99')}, {'impersonate': ImpersonateTarget('chrome', '110')}),
  891. ])
  892. def test_impersonate(self, handler, params, extensions):
  893. with handler(headers=std_headers, **params) as rh:
  894. res = validate_and_send(
  895. rh, Request(f'http://127.0.0.1:{self.http_port}/headers', extensions=extensions)).read().decode()
  896. assert 'sec-ch-ua: "Chromium";v="110"' in res
  897. # Check that user agent is added over ours
  898. assert 'User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/110.0.0.0 Safari/537.36' in res
  899. def test_headers(self, handler):
  900. with handler(headers=std_headers) as rh:
  901. # Ensure curl-impersonate overrides our standard headers (usually added
  902. res = validate_and_send(
  903. rh, Request(f'http://127.0.0.1:{self.http_port}/headers', extensions={
  904. 'impersonate': ImpersonateTarget('safari')}, headers={'x-custom': 'test', 'sec-fetch-mode': 'custom'})).read().decode().lower()
  905. assert std_headers['user-agent'].lower() not in res
  906. assert std_headers['accept-language'].lower() not in res
  907. assert std_headers['sec-fetch-mode'].lower() not in res
  908. # other than UA, custom headers that differ from std_headers should be kept
  909. assert 'sec-fetch-mode: custom' in res
  910. assert 'x-custom: test' in res
  911. # but when not impersonating don't remove std_headers
  912. res = validate_and_send(
  913. rh, Request(f'http://127.0.0.1:{self.http_port}/headers', headers={'x-custom': 'test'})).read().decode().lower()
  914. # std_headers should be present
  915. for k, v in std_headers.items():
  916. assert f'{k}: {v}'.lower() in res
  917. @pytest.mark.parametrize('raised,expected,match', [
  918. (lambda: curl_cffi.requests.errors.RequestsError(
  919. '', code=curl_cffi.const.CurlECode.PARTIAL_FILE), IncompleteRead, None),
  920. (lambda: curl_cffi.requests.errors.RequestsError(
  921. '', code=curl_cffi.const.CurlECode.OPERATION_TIMEDOUT), TransportError, None),
  922. (lambda: curl_cffi.requests.errors.RequestsError(
  923. '', code=curl_cffi.const.CurlECode.RECV_ERROR), TransportError, None),
  924. ])
  925. def test_response_error_mapping(self, handler, monkeypatch, raised, expected, match):
  926. import curl_cffi.requests
  927. from yt_dlp.networking._curlcffi import CurlCFFIResponseAdapter
  928. curl_res = curl_cffi.requests.Response()
  929. res = CurlCFFIResponseAdapter(curl_res)
  930. def mock_read(*args, **kwargs):
  931. try:
  932. raise raised()
  933. except Exception as e:
  934. e.response = curl_res
  935. raise
  936. monkeypatch.setattr(res.fp, 'read', mock_read)
  937. with pytest.raises(expected, match=match) as exc_info:
  938. res.read()
  939. assert exc_info.type is expected
  940. @pytest.mark.parametrize('raised,expected,match', [
  941. (lambda: curl_cffi.requests.errors.RequestsError(
  942. '', code=curl_cffi.const.CurlECode.OPERATION_TIMEDOUT), TransportError, None),
  943. (lambda: curl_cffi.requests.errors.RequestsError(
  944. '', code=curl_cffi.const.CurlECode.PEER_FAILED_VERIFICATION), CertificateVerifyError, None),
  945. (lambda: curl_cffi.requests.errors.RequestsError(
  946. '', code=curl_cffi.const.CurlECode.SSL_CONNECT_ERROR), SSLError, None),
  947. (lambda: curl_cffi.requests.errors.RequestsError(
  948. '', code=curl_cffi.const.CurlECode.TOO_MANY_REDIRECTS), HTTPError, None),
  949. (lambda: curl_cffi.requests.errors.RequestsError(
  950. '', code=curl_cffi.const.CurlECode.PROXY), ProxyError, None),
  951. ])
  952. def test_request_error_mapping(self, handler, monkeypatch, raised, expected, match):
  953. import curl_cffi.requests
  954. curl_res = curl_cffi.requests.Response()
  955. curl_res.status_code = 301
  956. with handler() as rh:
  957. original_get_instance = rh._get_instance
  958. def mock_get_instance(*args, **kwargs):
  959. instance = original_get_instance(*args, **kwargs)
  960. def request(*_, **__):
  961. try:
  962. raise raised()
  963. except Exception as e:
  964. e.response = curl_res
  965. raise
  966. monkeypatch.setattr(instance, 'request', request)
  967. return instance
  968. monkeypatch.setattr(rh, '_get_instance', mock_get_instance)
  969. with pytest.raises(expected) as exc_info:
  970. rh.send(Request('http://fake'))
  971. assert exc_info.type is expected
  972. def test_response_reader(self, handler):
  973. class FakeResponse:
  974. def __init__(self, raise_error=False):
  975. self.raise_error = raise_error
  976. self.closed = False
  977. def iter_content(self):
  978. yield b'foo'
  979. yield b'bar'
  980. yield b'z'
  981. if self.raise_error:
  982. raise Exception('test')
  983. def close(self):
  984. self.closed = True
  985. from yt_dlp.networking._curlcffi import CurlCFFIResponseReader
  986. res = CurlCFFIResponseReader(FakeResponse())
  987. assert res.readable
  988. assert res.bytes_read == 0
  989. assert res.read(1) == b'f'
  990. assert res.bytes_read == 3
  991. assert res._buffer == b'oo'
  992. assert res.read(2) == b'oo'
  993. assert res.bytes_read == 3
  994. assert res._buffer == b''
  995. assert res.read(2) == b'ba'
  996. assert res.bytes_read == 6
  997. assert res._buffer == b'r'
  998. assert res.read(3) == b'rz'
  999. assert res.bytes_read == 7
  1000. assert res._buffer == b''
  1001. assert res.closed
  1002. assert res._response.closed
  1003. # should handle no size param
  1004. res2 = CurlCFFIResponseReader(FakeResponse())
  1005. assert res2.read() == b'foobarz'
  1006. assert res2.bytes_read == 7
  1007. assert res2._buffer == b''
  1008. assert res2.closed
  1009. # should close on an exception
  1010. res3 = CurlCFFIResponseReader(FakeResponse(raise_error=True))
  1011. with pytest.raises(Exception, match='test'):
  1012. res3.read()
  1013. assert res3._buffer == b''
  1014. assert res3.bytes_read == 7
  1015. assert res3.closed
  1016. # buffer should be cleared on close
  1017. res4 = CurlCFFIResponseReader(FakeResponse())
  1018. res4.read(2)
  1019. assert res4._buffer == b'o'
  1020. res4.close()
  1021. assert res4.closed
  1022. assert res4._buffer == b''
  1023. def run_validation(handler, error, req, **handler_kwargs):
  1024. with handler(**handler_kwargs) as rh:
  1025. if error:
  1026. with pytest.raises(error):
  1027. rh.validate(req)
  1028. else:
  1029. rh.validate(req)
  1030. class TestRequestHandlerValidation:
  1031. class ValidationRH(RequestHandler):
  1032. def _send(self, request):
  1033. raise RequestError('test')
  1034. class NoCheckRH(ValidationRH):
  1035. _SUPPORTED_FEATURES = None
  1036. _SUPPORTED_PROXY_SCHEMES = None
  1037. _SUPPORTED_URL_SCHEMES = None
  1038. def _check_extensions(self, extensions):
  1039. extensions.clear()
  1040. class HTTPSupportedRH(ValidationRH):
  1041. _SUPPORTED_URL_SCHEMES = ('http',)
  1042. URL_SCHEME_TESTS = [
  1043. # scheme, expected to fail, handler kwargs
  1044. ('Urllib', [
  1045. ('http', False, {}),
  1046. ('https', False, {}),
  1047. ('data', False, {}),
  1048. ('ftp', False, {}),
  1049. ('file', UnsupportedRequest, {}),
  1050. ('file', False, {'enable_file_urls': True}),
  1051. ]),
  1052. ('Requests', [
  1053. ('http', False, {}),
  1054. ('https', False, {}),
  1055. ]),
  1056. ('Websockets', [
  1057. ('ws', False, {}),
  1058. ('wss', False, {}),
  1059. ]),
  1060. ('CurlCFFI', [
  1061. ('http', False, {}),
  1062. ('https', False, {}),
  1063. ]),
  1064. (NoCheckRH, [('http', False, {})]),
  1065. (ValidationRH, [('http', UnsupportedRequest, {})]),
  1066. ]
  1067. PROXY_SCHEME_TESTS = [
  1068. # proxy scheme, expected to fail
  1069. ('Urllib', 'http', [
  1070. ('http', False),
  1071. ('https', UnsupportedRequest),
  1072. ('socks4', False),
  1073. ('socks4a', False),
  1074. ('socks5', False),
  1075. ('socks5h', False),
  1076. ('socks', UnsupportedRequest),
  1077. ]),
  1078. ('Requests', 'http', [
  1079. ('http', False),
  1080. ('https', False),
  1081. ('socks4', False),
  1082. ('socks4a', False),
  1083. ('socks5', False),
  1084. ('socks5h', False),
  1085. ]),
  1086. ('CurlCFFI', 'http', [
  1087. ('http', False),
  1088. ('https', False),
  1089. ('socks4', False),
  1090. ('socks4a', False),
  1091. ('socks5', False),
  1092. ('socks5h', False),
  1093. ]),
  1094. ('Websockets', 'ws', [
  1095. ('http', UnsupportedRequest),
  1096. ('https', UnsupportedRequest),
  1097. ('socks4', False),
  1098. ('socks4a', False),
  1099. ('socks5', False),
  1100. ('socks5h', False),
  1101. ]),
  1102. (NoCheckRH, 'http', [('http', False)]),
  1103. (HTTPSupportedRH, 'http', [('http', UnsupportedRequest)]),
  1104. (NoCheckRH, 'http', [('http', False)]),
  1105. (HTTPSupportedRH, 'http', [('http', UnsupportedRequest)]),
  1106. ]
  1107. PROXY_KEY_TESTS = [
  1108. # proxy key, proxy scheme, expected to fail
  1109. ('Urllib', 'http', [
  1110. ('all', 'http', False),
  1111. ('unrelated', 'http', False),
  1112. ]),
  1113. ('Requests', 'http', [
  1114. ('all', 'http', False),
  1115. ('unrelated', 'http', False),
  1116. ]),
  1117. ('CurlCFFI', 'http', [
  1118. ('all', 'http', False),
  1119. ('unrelated', 'http', False),
  1120. ]),
  1121. ('Websockets', 'ws', [
  1122. ('all', 'socks5', False),
  1123. ('unrelated', 'socks5', False),
  1124. ]),
  1125. (NoCheckRH, 'http', [('all', 'http', False)]),
  1126. (HTTPSupportedRH, 'http', [('all', 'http', UnsupportedRequest)]),
  1127. (HTTPSupportedRH, 'http', [('no', 'http', UnsupportedRequest)]),
  1128. ]
  1129. EXTENSION_TESTS = [
  1130. ('Urllib', 'http', [
  1131. ({'cookiejar': 'notacookiejar'}, AssertionError),
  1132. ({'cookiejar': YoutubeDLCookieJar()}, False),
  1133. ({'cookiejar': CookieJar()}, AssertionError),
  1134. ({'timeout': 1}, False),
  1135. ({'timeout': 'notatimeout'}, AssertionError),
  1136. ({'unsupported': 'value'}, UnsupportedRequest),
  1137. ({'legacy_ssl': False}, False),
  1138. ({'legacy_ssl': True}, False),
  1139. ({'legacy_ssl': 'notabool'}, AssertionError),
  1140. ({'keep_header_casing': True}, UnsupportedRequest),
  1141. ]),
  1142. ('Requests', 'http', [
  1143. ({'cookiejar': 'notacookiejar'}, AssertionError),
  1144. ({'cookiejar': YoutubeDLCookieJar()}, False),
  1145. ({'timeout': 1}, False),
  1146. ({'timeout': 'notatimeout'}, AssertionError),
  1147. ({'unsupported': 'value'}, UnsupportedRequest),
  1148. ({'legacy_ssl': False}, False),
  1149. ({'legacy_ssl': True}, False),
  1150. ({'legacy_ssl': 'notabool'}, AssertionError),
  1151. ({'keep_header_casing': False}, False),
  1152. ({'keep_header_casing': True}, False),
  1153. ({'keep_header_casing': 'notabool'}, AssertionError),
  1154. ]),
  1155. ('CurlCFFI', 'http', [
  1156. ({'cookiejar': 'notacookiejar'}, AssertionError),
  1157. ({'cookiejar': YoutubeDLCookieJar()}, False),
  1158. ({'timeout': 1}, False),
  1159. ({'timeout': 'notatimeout'}, AssertionError),
  1160. ({'unsupported': 'value'}, UnsupportedRequest),
  1161. ({'impersonate': ImpersonateTarget('badtarget', None, None, None)}, UnsupportedRequest),
  1162. ({'impersonate': 123}, AssertionError),
  1163. ({'impersonate': ImpersonateTarget('chrome', None, None, None)}, False),
  1164. ({'impersonate': ImpersonateTarget(None, None, None, None)}, False),
  1165. ({'impersonate': ImpersonateTarget()}, False),
  1166. ({'impersonate': 'chrome'}, AssertionError),
  1167. ({'legacy_ssl': False}, False),
  1168. ({'legacy_ssl': True}, False),
  1169. ({'legacy_ssl': 'notabool'}, AssertionError),
  1170. ]),
  1171. (NoCheckRH, 'http', [
  1172. ({'cookiejar': 'notacookiejar'}, False),
  1173. ({'somerandom': 'test'}, False), # but any extension is allowed through
  1174. ]),
  1175. ('Websockets', 'ws', [
  1176. ({'cookiejar': YoutubeDLCookieJar()}, False),
  1177. ({'timeout': 2}, False),
  1178. ({'legacy_ssl': False}, False),
  1179. ({'legacy_ssl': True}, False),
  1180. ({'legacy_ssl': 'notabool'}, AssertionError),
  1181. ]),
  1182. ]
  1183. @pytest.mark.parametrize('handler,fail,scheme', [
  1184. ('Urllib', False, 'http'),
  1185. ('Requests', False, 'http'),
  1186. ('CurlCFFI', False, 'http'),
  1187. ('Websockets', False, 'ws'),
  1188. ], indirect=['handler'])
  1189. def test_no_proxy(self, handler, fail, scheme):
  1190. run_validation(handler, fail, Request(f'{scheme}://', proxies={'no': '127.0.0.1,github.com'}))
  1191. run_validation(handler, fail, Request(f'{scheme}://'), proxies={'no': '127.0.0.1,github.com'})
  1192. @pytest.mark.parametrize('handler,scheme', [
  1193. ('Urllib', 'http'),
  1194. (HTTPSupportedRH, 'http'),
  1195. ('Requests', 'http'),
  1196. ('CurlCFFI', 'http'),
  1197. ('Websockets', 'ws'),
  1198. ], indirect=['handler'])
  1199. def test_empty_proxy(self, handler, scheme):
  1200. run_validation(handler, False, Request(f'{scheme}://', proxies={scheme: None}))
  1201. run_validation(handler, False, Request(f'{scheme}://'), proxies={scheme: None})
  1202. @pytest.mark.parametrize('proxy_url', ['//example.com', 'example.com', '127.0.0.1', '/a/b/c'])
  1203. @pytest.mark.parametrize('handler,scheme', [
  1204. ('Urllib', 'http'),
  1205. (HTTPSupportedRH, 'http'),
  1206. ('Requests', 'http'),
  1207. ('CurlCFFI', 'http'),
  1208. ('Websockets', 'ws'),
  1209. ], indirect=['handler'])
  1210. def test_invalid_proxy_url(self, handler, scheme, proxy_url):
  1211. run_validation(handler, UnsupportedRequest, Request(f'{scheme}://', proxies={scheme: proxy_url}))
  1212. @pytest.mark.parametrize('handler,scheme,fail,handler_kwargs', [
  1213. (handler_tests[0], scheme, fail, handler_kwargs)
  1214. for handler_tests in URL_SCHEME_TESTS
  1215. for scheme, fail, handler_kwargs in handler_tests[1]
  1216. ], indirect=['handler'])
  1217. def test_url_scheme(self, handler, scheme, fail, handler_kwargs):
  1218. run_validation(handler, fail, Request(f'{scheme}://'), **(handler_kwargs or {}))
  1219. @pytest.mark.parametrize('handler,scheme,proxy_key,proxy_scheme,fail', [
  1220. (handler_tests[0], handler_tests[1], proxy_key, proxy_scheme, fail)
  1221. for handler_tests in PROXY_KEY_TESTS
  1222. for proxy_key, proxy_scheme, fail in handler_tests[2]
  1223. ], indirect=['handler'])
  1224. def test_proxy_key(self, handler, scheme, proxy_key, proxy_scheme, fail):
  1225. run_validation(handler, fail, Request(f'{scheme}://', proxies={proxy_key: f'{proxy_scheme}://example.com'}))
  1226. run_validation(handler, fail, Request(f'{scheme}://'), proxies={proxy_key: f'{proxy_scheme}://example.com'})
  1227. @pytest.mark.parametrize('handler,req_scheme,scheme,fail', [
  1228. (handler_tests[0], handler_tests[1], scheme, fail)
  1229. for handler_tests in PROXY_SCHEME_TESTS
  1230. for scheme, fail in handler_tests[2]
  1231. ], indirect=['handler'])
  1232. def test_proxy_scheme(self, handler, req_scheme, scheme, fail):
  1233. run_validation(handler, fail, Request(f'{req_scheme}://', proxies={req_scheme: f'{scheme}://example.com'}))
  1234. run_validation(handler, fail, Request(f'{req_scheme}://'), proxies={req_scheme: f'{scheme}://example.com'})
  1235. @pytest.mark.parametrize('handler,scheme,extensions,fail', [
  1236. (handler_tests[0], handler_tests[1], extensions, fail)
  1237. for handler_tests in EXTENSION_TESTS
  1238. for extensions, fail in handler_tests[2]
  1239. ], indirect=['handler'])
  1240. def test_extension(self, handler, scheme, extensions, fail):
  1241. run_validation(
  1242. handler, fail, Request(f'{scheme}://', extensions=extensions))
  1243. def test_invalid_request_type(self):
  1244. rh = self.ValidationRH(logger=FakeLogger())
  1245. for method in (rh.validate, rh.send):
  1246. with pytest.raises(TypeError, match='Expected an instance of Request'):
  1247. method('not a request')
  1248. class FakeResponse(Response):
  1249. def __init__(self, request):
  1250. # XXX: we could make request part of standard response interface
  1251. self.request = request
  1252. super().__init__(fp=io.BytesIO(b''), headers={}, url=request.url)
  1253. class FakeRH(RequestHandler):
  1254. def __init__(self, *args, **params):
  1255. self.params = params
  1256. super().__init__(*args, **params)
  1257. def _validate(self, request):
  1258. return
  1259. def _send(self, request: Request):
  1260. if request.url.startswith('ssl://'):
  1261. raise SSLError(request.url[len('ssl://'):])
  1262. return FakeResponse(request)
  1263. class FakeRHYDL(FakeYDL):
  1264. def __init__(self, *args, **kwargs):
  1265. super().__init__(*args, **kwargs)
  1266. self._request_director = self.build_request_director([FakeRH])
  1267. class AllUnsupportedRHYDL(FakeYDL):
  1268. def __init__(self, *args, **kwargs):
  1269. class UnsupportedRH(RequestHandler):
  1270. def _send(self, request: Request):
  1271. pass
  1272. _SUPPORTED_FEATURES = ()
  1273. _SUPPORTED_PROXY_SCHEMES = ()
  1274. _SUPPORTED_URL_SCHEMES = ()
  1275. super().__init__(*args, **kwargs)
  1276. self._request_director = self.build_request_director([UnsupportedRH])
  1277. class TestRequestDirector:
  1278. def test_handler_operations(self):
  1279. director = RequestDirector(logger=FakeLogger())
  1280. handler = FakeRH(logger=FakeLogger())
  1281. director.add_handler(handler)
  1282. assert director.handlers.get(FakeRH.RH_KEY) is handler
  1283. # Handler should overwrite
  1284. handler2 = FakeRH(logger=FakeLogger())
  1285. director.add_handler(handler2)
  1286. assert director.handlers.get(FakeRH.RH_KEY) is not handler
  1287. assert director.handlers.get(FakeRH.RH_KEY) is handler2
  1288. assert len(director.handlers) == 1
  1289. class AnotherFakeRH(FakeRH):
  1290. pass
  1291. director.add_handler(AnotherFakeRH(logger=FakeLogger()))
  1292. assert len(director.handlers) == 2
  1293. assert director.handlers.get(AnotherFakeRH.RH_KEY).RH_KEY == AnotherFakeRH.RH_KEY
  1294. director.handlers.pop(FakeRH.RH_KEY, None)
  1295. assert director.handlers.get(FakeRH.RH_KEY) is None
  1296. assert len(director.handlers) == 1
  1297. # RequestErrors should passthrough
  1298. with pytest.raises(SSLError):
  1299. director.send(Request('ssl://something'))
  1300. def test_send(self):
  1301. director = RequestDirector(logger=FakeLogger())
  1302. with pytest.raises(RequestError):
  1303. director.send(Request('any://'))
  1304. director.add_handler(FakeRH(logger=FakeLogger()))
  1305. assert isinstance(director.send(Request('http://')), FakeResponse)
  1306. def test_unsupported_handlers(self):
  1307. class SupportedRH(RequestHandler):
  1308. _SUPPORTED_URL_SCHEMES = ['http']
  1309. def _send(self, request: Request):
  1310. return Response(fp=io.BytesIO(b'supported'), headers={}, url=request.url)
  1311. director = RequestDirector(logger=FakeLogger())
  1312. director.add_handler(SupportedRH(logger=FakeLogger()))
  1313. director.add_handler(FakeRH(logger=FakeLogger()))
  1314. # First should take preference
  1315. assert director.send(Request('http://')).read() == b'supported'
  1316. assert director.send(Request('any://')).read() == b''
  1317. director.handlers.pop(FakeRH.RH_KEY)
  1318. with pytest.raises(NoSupportingHandlers):
  1319. director.send(Request('any://'))
  1320. def test_unexpected_error(self):
  1321. director = RequestDirector(logger=FakeLogger())
  1322. class UnexpectedRH(FakeRH):
  1323. def _send(self, request: Request):
  1324. raise TypeError('something')
  1325. director.add_handler(UnexpectedRH(logger=FakeLogger))
  1326. with pytest.raises(NoSupportingHandlers, match=r'1 unexpected error'):
  1327. director.send(Request('any://'))
  1328. director.handlers.clear()
  1329. assert len(director.handlers) == 0
  1330. # Should not be fatal
  1331. director.add_handler(FakeRH(logger=FakeLogger()))
  1332. director.add_handler(UnexpectedRH(logger=FakeLogger))
  1333. assert director.send(Request('any://'))
  1334. def test_preference(self):
  1335. director = RequestDirector(logger=FakeLogger())
  1336. director.add_handler(FakeRH(logger=FakeLogger()))
  1337. class SomeRH(RequestHandler):
  1338. _SUPPORTED_URL_SCHEMES = ['http']
  1339. def _send(self, request: Request):
  1340. return Response(fp=io.BytesIO(b'supported'), headers={}, url=request.url)
  1341. def some_preference(rh, request):
  1342. return (0 if not isinstance(rh, SomeRH)
  1343. else 100 if 'prefer' in request.headers
  1344. else -1)
  1345. director.add_handler(SomeRH(logger=FakeLogger()))
  1346. director.preferences.add(some_preference)
  1347. assert director.send(Request('http://')).read() == b''
  1348. assert director.send(Request('http://', headers={'prefer': '1'})).read() == b'supported'
  1349. def test_close(self, monkeypatch):
  1350. director = RequestDirector(logger=FakeLogger())
  1351. director.add_handler(FakeRH(logger=FakeLogger()))
  1352. called = False
  1353. def mock_close(*args, **kwargs):
  1354. nonlocal called
  1355. called = True
  1356. monkeypatch.setattr(director.handlers[FakeRH.RH_KEY], 'close', mock_close)
  1357. director.close()
  1358. assert called
  1359. # XXX: do we want to move this to test_YoutubeDL.py?
  1360. class TestYoutubeDLNetworking:
  1361. @staticmethod
  1362. def build_handler(ydl, handler: RequestHandler = FakeRH):
  1363. return ydl.build_request_director([handler]).handlers.get(handler.RH_KEY)
  1364. def test_compat_opener(self):
  1365. with FakeYDL() as ydl:
  1366. with warnings.catch_warnings():
  1367. warnings.simplefilter('ignore', category=DeprecationWarning)
  1368. assert isinstance(ydl._opener, urllib.request.OpenerDirector)
  1369. @pytest.mark.parametrize('proxy,expected', [
  1370. ('http://127.0.0.1:8080', {'all': 'http://127.0.0.1:8080'}),
  1371. ('', {'all': '__noproxy__'}),
  1372. (None, {'http': 'http://127.0.0.1:8081', 'https': 'http://127.0.0.1:8081'}), # env, set https
  1373. ])
  1374. def test_proxy(self, proxy, expected, monkeypatch):
  1375. monkeypatch.setenv('HTTP_PROXY', 'http://127.0.0.1:8081')
  1376. with FakeYDL({'proxy': proxy}) as ydl:
  1377. assert ydl.proxies == expected
  1378. def test_compat_request(self):
  1379. with FakeRHYDL() as ydl:
  1380. assert ydl.urlopen('test://')
  1381. urllib_req = urllib.request.Request('http://foo.bar', data=b'test', method='PUT', headers={'X-Test': '1'})
  1382. urllib_req.add_unredirected_header('Cookie', 'bob=bob')
  1383. urllib_req.timeout = 2
  1384. with warnings.catch_warnings():
  1385. warnings.simplefilter('ignore', category=DeprecationWarning)
  1386. req = ydl.urlopen(urllib_req).request
  1387. assert req.url == urllib_req.get_full_url()
  1388. assert req.data == urllib_req.data
  1389. assert req.method == urllib_req.get_method()
  1390. assert 'X-Test' in req.headers
  1391. assert 'Cookie' in req.headers
  1392. assert req.extensions.get('timeout') == 2
  1393. with pytest.raises(AssertionError):
  1394. ydl.urlopen(None)
  1395. def test_extract_basic_auth(self):
  1396. with FakeRHYDL() as ydl:
  1397. res = ydl.urlopen(Request('http://user:pass@foo.bar'))
  1398. assert res.request.headers['Authorization'] == 'Basic dXNlcjpwYXNz'
  1399. def test_sanitize_url(self):
  1400. with FakeRHYDL() as ydl:
  1401. res = ydl.urlopen(Request('httpss://foo.bar'))
  1402. assert res.request.url == 'https://foo.bar'
  1403. def test_file_urls_error(self):
  1404. # use urllib handler
  1405. with FakeYDL() as ydl:
  1406. with pytest.raises(RequestError, match=r'file:// URLs are disabled by default'):
  1407. ydl.urlopen('file://')
  1408. @pytest.mark.parametrize('scheme', (['ws', 'wss']))
  1409. def test_websocket_unavailable_error(self, scheme):
  1410. with AllUnsupportedRHYDL() as ydl:
  1411. with pytest.raises(RequestError, match=r'This request requires WebSocket support'):
  1412. ydl.urlopen(f'{scheme}://')
  1413. def test_legacy_server_connect_error(self):
  1414. with FakeRHYDL() as ydl:
  1415. for error in ('UNSAFE_LEGACY_RENEGOTIATION_DISABLED', 'SSLV3_ALERT_HANDSHAKE_FAILURE'):
  1416. with pytest.raises(RequestError, match=r'Try using --legacy-server-connect'):
  1417. ydl.urlopen(f'ssl://{error}')
  1418. with pytest.raises(SSLError, match='testerror'):
  1419. ydl.urlopen('ssl://testerror')
  1420. def test_unsupported_impersonate_target(self):
  1421. class FakeImpersonationRHYDL(FakeYDL):
  1422. def __init__(self, *args, **kwargs):
  1423. class HTTPRH(RequestHandler):
  1424. def _send(self, request: Request):
  1425. pass
  1426. _SUPPORTED_URL_SCHEMES = ('http',)
  1427. _SUPPORTED_PROXY_SCHEMES = None
  1428. super().__init__(*args, **kwargs)
  1429. self._request_director = self.build_request_director([HTTPRH])
  1430. with FakeImpersonationRHYDL() as ydl:
  1431. with pytest.raises(
  1432. RequestError,
  1433. match=r'Impersonate target "test" is not available',
  1434. ):
  1435. ydl.urlopen(Request('http://', extensions={'impersonate': ImpersonateTarget('test', None, None, None)}))
  1436. def test_unsupported_impersonate_extension(self):
  1437. class FakeHTTPRHYDL(FakeYDL):
  1438. def __init__(self, *args, **kwargs):
  1439. class IRH(ImpersonateRequestHandler):
  1440. def _send(self, request: Request):
  1441. pass
  1442. _SUPPORTED_URL_SCHEMES = ('http',)
  1443. _SUPPORTED_IMPERSONATE_TARGET_MAP = {ImpersonateTarget('abc'): 'test'}
  1444. _SUPPORTED_PROXY_SCHEMES = None
  1445. super().__init__(*args, **kwargs)
  1446. self._request_director = self.build_request_director([IRH])
  1447. with FakeHTTPRHYDL() as ydl:
  1448. with pytest.raises(
  1449. RequestError,
  1450. match=r'Impersonate target "test" is not available',
  1451. ):
  1452. ydl.urlopen(Request('http://', extensions={'impersonate': ImpersonateTarget('test', None, None, None)}))
  1453. def test_raise_impersonate_error(self):
  1454. with pytest.raises(
  1455. YoutubeDLError,
  1456. match=r'Impersonate target "test" is not available',
  1457. ):
  1458. FakeYDL({'impersonate': ImpersonateTarget('test', None, None, None)})
  1459. def test_pass_impersonate_param(self, monkeypatch):
  1460. class IRH(ImpersonateRequestHandler):
  1461. def _send(self, request: Request):
  1462. pass
  1463. _SUPPORTED_URL_SCHEMES = ('http',)
  1464. _SUPPORTED_IMPERSONATE_TARGET_MAP = {ImpersonateTarget('abc'): 'test'}
  1465. # Bypass the check on initialize
  1466. brh = FakeYDL.build_request_director
  1467. monkeypatch.setattr(FakeYDL, 'build_request_director', lambda cls, handlers, preferences=None: brh(cls, handlers=[IRH]))
  1468. with FakeYDL({
  1469. 'impersonate': ImpersonateTarget('abc', None, None, None),
  1470. }) as ydl:
  1471. rh = self.build_handler(ydl, IRH)
  1472. assert rh.impersonate == ImpersonateTarget('abc', None, None, None)
  1473. def test_get_impersonate_targets(self):
  1474. handlers = []
  1475. for target_client in ('abc', 'xyz', 'asd'):
  1476. class TestRH(ImpersonateRequestHandler):
  1477. def _send(self, request: Request):
  1478. pass
  1479. _SUPPORTED_URL_SCHEMES = ('http',)
  1480. _SUPPORTED_IMPERSONATE_TARGET_MAP = {ImpersonateTarget(target_client): 'test'}
  1481. RH_KEY = target_client
  1482. RH_NAME = target_client
  1483. handlers.append(TestRH)
  1484. with FakeYDL() as ydl:
  1485. ydl._request_director = ydl.build_request_director(handlers)
  1486. assert set(ydl._get_available_impersonate_targets()) == {
  1487. (ImpersonateTarget('xyz'), 'xyz'),
  1488. (ImpersonateTarget('abc'), 'abc'),
  1489. (ImpersonateTarget('asd'), 'asd'),
  1490. }
  1491. assert ydl._impersonate_target_available(ImpersonateTarget('abc'))
  1492. assert ydl._impersonate_target_available(ImpersonateTarget())
  1493. assert not ydl._impersonate_target_available(ImpersonateTarget('zxy'))
  1494. @pytest.mark.parametrize('proxy_key,proxy_url,expected', [
  1495. ('http', '__noproxy__', None),
  1496. ('no', '127.0.0.1,foo.bar', '127.0.0.1,foo.bar'),
  1497. ('https', 'example.com', 'http://example.com'),
  1498. ('https', '//example.com', 'http://example.com'),
  1499. ('https', 'socks5://example.com', 'socks5h://example.com'),
  1500. ('http', 'socks://example.com', 'socks4://example.com'),
  1501. ('http', 'socks4://example.com', 'socks4://example.com'),
  1502. ('unrelated', '/bad/proxy', '/bad/proxy'), # clean_proxies should ignore bad proxies
  1503. ])
  1504. def test_clean_proxy(self, proxy_key, proxy_url, expected, monkeypatch):
  1505. # proxies should be cleaned in urlopen()
  1506. with FakeRHYDL() as ydl:
  1507. req = ydl.urlopen(Request('test://', proxies={proxy_key: proxy_url})).request
  1508. assert req.proxies[proxy_key] == expected
  1509. # and should also be cleaned when building the handler
  1510. monkeypatch.setenv(f'{proxy_key.upper()}_PROXY', proxy_url)
  1511. with FakeYDL() as ydl:
  1512. rh = self.build_handler(ydl)
  1513. assert rh.proxies[proxy_key] == expected
  1514. def test_clean_proxy_header(self):
  1515. with FakeRHYDL() as ydl:
  1516. req = ydl.urlopen(Request('test://', headers={'ytdl-request-proxy': '//foo.bar'})).request
  1517. assert 'ytdl-request-proxy' not in req.headers
  1518. assert req.proxies == {'all': 'http://foo.bar'}
  1519. with FakeYDL({'http_headers': {'ytdl-request-proxy': '//foo.bar'}}) as ydl:
  1520. rh = self.build_handler(ydl)
  1521. assert 'ytdl-request-proxy' not in rh.headers
  1522. assert rh.proxies == {'all': 'http://foo.bar'}
  1523. def test_clean_header(self):
  1524. with FakeRHYDL() as ydl:
  1525. res = ydl.urlopen(Request('test://', headers={'Youtubedl-no-compression': True}))
  1526. assert 'Youtubedl-no-compression' not in res.request.headers
  1527. assert res.request.headers.get('Accept-Encoding') == 'identity'
  1528. with FakeYDL({'http_headers': {'Youtubedl-no-compression': True}}) as ydl:
  1529. rh = self.build_handler(ydl)
  1530. assert 'Youtubedl-no-compression' not in rh.headers
  1531. assert rh.headers.get('Accept-Encoding') == 'identity'
  1532. with FakeYDL({'http_headers': {'Ytdl-socks-proxy': 'socks://localhost:1080'}}) as ydl:
  1533. rh = self.build_handler(ydl)
  1534. assert 'Ytdl-socks-proxy' not in rh.headers
  1535. def test_build_handler_params(self):
  1536. with FakeYDL({
  1537. 'http_headers': {'test': 'testtest'},
  1538. 'socket_timeout': 2,
  1539. 'proxy': 'http://127.0.0.1:8080',
  1540. 'source_address': '127.0.0.45',
  1541. 'debug_printtraffic': True,
  1542. 'compat_opts': ['no-certifi'],
  1543. 'nocheckcertificate': True,
  1544. 'legacyserverconnect': True,
  1545. }) as ydl:
  1546. rh = self.build_handler(ydl)
  1547. assert rh.headers.get('test') == 'testtest'
  1548. assert 'Accept' in rh.headers # ensure std_headers are still there
  1549. assert rh.timeout == 2
  1550. assert rh.proxies.get('all') == 'http://127.0.0.1:8080'
  1551. assert rh.source_address == '127.0.0.45'
  1552. assert rh.verbose is True
  1553. assert rh.prefer_system_certs is True
  1554. assert rh.verify is False
  1555. assert rh.legacy_ssl_support is True
  1556. @pytest.mark.parametrize('ydl_params', [
  1557. {'client_certificate': 'fakecert.crt'},
  1558. {'client_certificate': 'fakecert.crt', 'client_certificate_key': 'fakekey.key'},
  1559. {'client_certificate': 'fakecert.crt', 'client_certificate_key': 'fakekey.key', 'client_certificate_password': 'foobar'},
  1560. {'client_certificate_key': 'fakekey.key', 'client_certificate_password': 'foobar'},
  1561. ])
  1562. def test_client_certificate(self, ydl_params):
  1563. with FakeYDL(ydl_params) as ydl:
  1564. rh = self.build_handler(ydl)
  1565. assert rh._client_cert == ydl_params # XXX: Too bound to implementation
  1566. def test_urllib_file_urls(self):
  1567. with FakeYDL({'enable_file_urls': False}) as ydl:
  1568. rh = self.build_handler(ydl, UrllibRH)
  1569. assert rh.enable_file_urls is False
  1570. with FakeYDL({'enable_file_urls': True}) as ydl:
  1571. rh = self.build_handler(ydl, UrllibRH)
  1572. assert rh.enable_file_urls is True
  1573. def test_compat_opt_prefer_urllib(self):
  1574. # This assumes urllib only has a preference when this compat opt is given
  1575. with FakeYDL({'compat_opts': ['prefer-legacy-http-handler']}) as ydl:
  1576. director = ydl.build_request_director([UrllibRH])
  1577. assert len(director.preferences) == 1
  1578. assert director.preferences.pop()(UrllibRH, None)
  1579. class TestRequest:
  1580. def test_query(self):
  1581. req = Request('http://example.com?q=something', query={'v': 'xyz'})
  1582. assert req.url == 'http://example.com?q=something&v=xyz'
  1583. req.update(query={'v': '123'})
  1584. assert req.url == 'http://example.com?q=something&v=123'
  1585. req.update(url='http://example.com', query={'v': 'xyz'})
  1586. assert req.url == 'http://example.com?v=xyz'
  1587. def test_method(self):
  1588. req = Request('http://example.com')
  1589. assert req.method == 'GET'
  1590. req.data = b'test'
  1591. assert req.method == 'POST'
  1592. req.data = None
  1593. assert req.method == 'GET'
  1594. req.data = b'test2'
  1595. req.method = 'PUT'
  1596. assert req.method == 'PUT'
  1597. req.data = None
  1598. assert req.method == 'PUT'
  1599. with pytest.raises(TypeError):
  1600. req.method = 1
  1601. def test_request_helpers(self):
  1602. assert HEADRequest('http://example.com').method == 'HEAD'
  1603. assert PUTRequest('http://example.com').method == 'PUT'
  1604. def test_headers(self):
  1605. req = Request('http://example.com', headers={'tesT': 'test'})
  1606. assert req.headers == HTTPHeaderDict({'test': 'test'})
  1607. req.update(headers={'teSt2': 'test2'})
  1608. assert req.headers == HTTPHeaderDict({'test': 'test', 'test2': 'test2'})
  1609. req.headers = new_headers = HTTPHeaderDict({'test': 'test'})
  1610. assert req.headers == HTTPHeaderDict({'test': 'test'})
  1611. assert req.headers is new_headers
  1612. # test converts dict to case insensitive dict
  1613. req.headers = new_headers = {'test2': 'test2'}
  1614. assert isinstance(req.headers, HTTPHeaderDict)
  1615. assert req.headers is not new_headers
  1616. with pytest.raises(TypeError):
  1617. req.headers = None
  1618. def test_data_type(self):
  1619. req = Request('http://example.com')
  1620. assert req.data is None
  1621. # test bytes is allowed
  1622. req.data = b'test'
  1623. assert req.data == b'test'
  1624. # test iterable of bytes is allowed
  1625. i = [b'test', b'test2']
  1626. req.data = i
  1627. assert req.data == i
  1628. # test file-like object is allowed
  1629. f = io.BytesIO(b'test')
  1630. req.data = f
  1631. assert req.data == f
  1632. # common mistake: test str not allowed
  1633. with pytest.raises(TypeError):
  1634. req.data = 'test'
  1635. assert req.data != 'test'
  1636. # common mistake: test dict is not allowed
  1637. with pytest.raises(TypeError):
  1638. req.data = {'test': 'test'}
  1639. assert req.data != {'test': 'test'}
  1640. def test_content_length_header(self):
  1641. req = Request('http://example.com', headers={'Content-Length': '0'}, data=b'')
  1642. assert req.headers.get('Content-Length') == '0'
  1643. req.data = b'test'
  1644. assert 'Content-Length' not in req.headers
  1645. req = Request('http://example.com', headers={'Content-Length': '10'})
  1646. assert 'Content-Length' not in req.headers
  1647. def test_content_type_header(self):
  1648. req = Request('http://example.com', headers={'Content-Type': 'test'}, data=b'test')
  1649. assert req.headers.get('Content-Type') == 'test'
  1650. req.data = b'test2'
  1651. assert req.headers.get('Content-Type') == 'test'
  1652. req.data = None
  1653. assert 'Content-Type' not in req.headers
  1654. req.data = b'test3'
  1655. assert req.headers.get('Content-Type') == 'application/x-www-form-urlencoded'
  1656. def test_update_req(self):
  1657. req = Request('http://example.com')
  1658. assert req.data is None
  1659. assert req.method == 'GET'
  1660. assert 'Content-Type' not in req.headers
  1661. # Test that zero-byte payloads will be sent
  1662. req.update(data=b'')
  1663. assert req.data == b''
  1664. assert req.method == 'POST'
  1665. assert req.headers.get('Content-Type') == 'application/x-www-form-urlencoded'
  1666. def test_proxies(self):
  1667. req = Request(url='http://example.com', proxies={'http': 'http://127.0.0.1:8080'})
  1668. assert req.proxies == {'http': 'http://127.0.0.1:8080'}
  1669. def test_extensions(self):
  1670. req = Request(url='http://example.com', extensions={'timeout': 2})
  1671. assert req.extensions == {'timeout': 2}
  1672. def test_copy(self):
  1673. req = Request(
  1674. url='http://example.com',
  1675. extensions={'cookiejar': CookieJar()},
  1676. headers={'Accept-Encoding': 'br'},
  1677. proxies={'http': 'http://127.0.0.1'},
  1678. data=[b'123'],
  1679. )
  1680. req_copy = req.copy()
  1681. assert req_copy is not req
  1682. assert req_copy.url == req.url
  1683. assert req_copy.headers == req.headers
  1684. assert req_copy.headers is not req.headers
  1685. assert req_copy.proxies == req.proxies
  1686. assert req_copy.proxies is not req.proxies
  1687. # Data is not able to be copied
  1688. assert req_copy.data == req.data
  1689. assert req_copy.data is req.data
  1690. # Shallow copy extensions
  1691. assert req_copy.extensions is not req.extensions
  1692. assert req_copy.extensions['cookiejar'] == req.extensions['cookiejar']
  1693. # Subclasses are copied by default
  1694. class AnotherRequest(Request):
  1695. pass
  1696. req = AnotherRequest(url='http://127.0.0.1')
  1697. assert isinstance(req.copy(), AnotherRequest)
  1698. def test_url(self):
  1699. req = Request(url='https://фtest.example.com/ some spaceв?ä=c')
  1700. assert req.url == 'https://xn--test-z6d.example.com/%20some%20space%D0%B2?%C3%A4=c'
  1701. assert Request(url='//example.com').url == 'http://example.com'
  1702. with pytest.raises(TypeError):
  1703. Request(url='https://').url = None
  1704. class TestResponse:
  1705. @pytest.mark.parametrize('reason,status,expected', [
  1706. ('custom', 200, 'custom'),
  1707. (None, 404, 'Not Found'), # fallback status
  1708. ('', 403, 'Forbidden'),
  1709. (None, 999, None),
  1710. ])
  1711. def test_reason(self, reason, status, expected):
  1712. res = Response(io.BytesIO(b''), url='test://', headers={}, status=status, reason=reason)
  1713. assert res.reason == expected
  1714. def test_headers(self):
  1715. headers = Message()
  1716. headers.add_header('Test', 'test')
  1717. headers.add_header('Test', 'test2')
  1718. headers.add_header('content-encoding', 'br')
  1719. res = Response(io.BytesIO(b''), headers=headers, url='test://')
  1720. assert res.headers.get_all('test') == ['test', 'test2']
  1721. assert 'Content-Encoding' in res.headers
  1722. def test_get_header(self):
  1723. headers = Message()
  1724. headers.add_header('Set-Cookie', 'cookie1')
  1725. headers.add_header('Set-cookie', 'cookie2')
  1726. headers.add_header('Test', 'test')
  1727. headers.add_header('Test', 'test2')
  1728. res = Response(io.BytesIO(b''), headers=headers, url='test://')
  1729. assert res.get_header('test') == 'test, test2'
  1730. assert res.get_header('set-Cookie') == 'cookie1'
  1731. assert res.get_header('notexist', 'default') == 'default'
  1732. def test_compat(self):
  1733. res = Response(io.BytesIO(b''), url='test://', status=404, headers={'test': 'test'})
  1734. with warnings.catch_warnings():
  1735. warnings.simplefilter('ignore', category=DeprecationWarning)
  1736. assert res.code == res.getcode() == res.status
  1737. assert res.geturl() == res.url
  1738. assert res.info() is res.headers
  1739. assert res.getheader('test') == res.get_header('test')
  1740. class TestImpersonateTarget:
  1741. @pytest.mark.parametrize('target_str,expected', [
  1742. ('abc', ImpersonateTarget('abc', None, None, None)),
  1743. ('abc-120_esr', ImpersonateTarget('abc', '120_esr', None, None)),
  1744. ('abc-120:xyz', ImpersonateTarget('abc', '120', 'xyz', None)),
  1745. ('abc-120:xyz-5.6', ImpersonateTarget('abc', '120', 'xyz', '5.6')),
  1746. ('abc:xyz', ImpersonateTarget('abc', None, 'xyz', None)),
  1747. ('abc:', ImpersonateTarget('abc', None, None, None)),
  1748. ('abc-120:', ImpersonateTarget('abc', '120', None, None)),
  1749. (':xyz', ImpersonateTarget(None, None, 'xyz', None)),
  1750. (':xyz-6.5', ImpersonateTarget(None, None, 'xyz', '6.5')),
  1751. (':', ImpersonateTarget(None, None, None, None)),
  1752. ('', ImpersonateTarget(None, None, None, None)),
  1753. ])
  1754. def test_target_from_str(self, target_str, expected):
  1755. assert ImpersonateTarget.from_str(target_str) == expected
  1756. @pytest.mark.parametrize('target_str', [
  1757. '-120', ':-12.0', '-12:-12', '-:-',
  1758. '::', 'a-c-d:', 'a-c-d:e-f-g', 'a:b:',
  1759. ])
  1760. def test_target_from_invalid_str(self, target_str):
  1761. with pytest.raises(ValueError):
  1762. ImpersonateTarget.from_str(target_str)
  1763. @pytest.mark.parametrize('target,expected', [
  1764. (ImpersonateTarget('abc', None, None, None), 'abc'),
  1765. (ImpersonateTarget('abc', '120', None, None), 'abc-120'),
  1766. (ImpersonateTarget('abc', '120', 'xyz', None), 'abc-120:xyz'),
  1767. (ImpersonateTarget('abc', '120', 'xyz', '5'), 'abc-120:xyz-5'),
  1768. (ImpersonateTarget('abc', None, 'xyz', None), 'abc:xyz'),
  1769. (ImpersonateTarget('abc', '120', None, None), 'abc-120'),
  1770. (ImpersonateTarget('abc', '120', 'xyz', None), 'abc-120:xyz'),
  1771. (ImpersonateTarget('abc', None, 'xyz'), 'abc:xyz'),
  1772. (ImpersonateTarget(None, None, 'xyz', '6.5'), ':xyz-6.5'),
  1773. (ImpersonateTarget('abc'), 'abc'),
  1774. (ImpersonateTarget(None, None, None, None), ''),
  1775. ])
  1776. def test_str(self, target, expected):
  1777. assert str(target) == expected
  1778. @pytest.mark.parametrize('args', [
  1779. ('abc', None, None, '5'),
  1780. ('abc', '120', None, '5'),
  1781. (None, '120', None, None),
  1782. (None, '120', None, '5'),
  1783. (None, None, None, '5'),
  1784. (None, '120', 'xyz', '5'),
  1785. ])
  1786. def test_invalid_impersonate_target(self, args):
  1787. with pytest.raises(ValueError):
  1788. ImpersonateTarget(*args)
  1789. @pytest.mark.parametrize('target1,target2,is_in,is_eq', [
  1790. (ImpersonateTarget('abc', None, None, None), ImpersonateTarget('abc', None, None, None), True, True),
  1791. (ImpersonateTarget('abc', None, None, None), ImpersonateTarget('abc', '120', None, None), True, False),
  1792. (ImpersonateTarget('abc', None, 'xyz', 'test'), ImpersonateTarget('abc', '120', 'xyz', None), True, False),
  1793. (ImpersonateTarget('abc', '121', 'xyz', 'test'), ImpersonateTarget('abc', '120', 'xyz', 'test'), False, False),
  1794. (ImpersonateTarget('abc'), ImpersonateTarget('abc', '120', 'xyz', 'test'), True, False),
  1795. (ImpersonateTarget('abc', '120', 'xyz', 'test'), ImpersonateTarget('abc'), True, False),
  1796. (ImpersonateTarget(), ImpersonateTarget('abc', '120', 'xyz'), True, False),
  1797. (ImpersonateTarget(), ImpersonateTarget(), True, True),
  1798. ])
  1799. def test_impersonate_target_in(self, target1, target2, is_in, is_eq):
  1800. assert (target1 in target2) is is_in
  1801. assert (target1 == target2) is is_eq