test_networking.py 59 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432
  1. #!/usr/bin/env python3
  2. # Allow direct execution
  3. import os
  4. import sys
  5. import pytest
  6. sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
  7. import functools
  8. import gzip
  9. import http.client
  10. import http.cookiejar
  11. import http.server
  12. import inspect
  13. import io
  14. import pathlib
  15. import random
  16. import ssl
  17. import tempfile
  18. import threading
  19. import time
  20. import urllib.error
  21. import urllib.request
  22. import warnings
  23. import zlib
  24. from email.message import Message
  25. from http.cookiejar import CookieJar
  26. from test.helper import FakeYDL, http_server_port
  27. from yt_dlp.dependencies import brotli
  28. from yt_dlp.networking import (
  29. HEADRequest,
  30. PUTRequest,
  31. Request,
  32. RequestDirector,
  33. RequestHandler,
  34. Response,
  35. )
  36. from yt_dlp.networking._urllib import UrllibRH
  37. from yt_dlp.networking.common import _REQUEST_HANDLERS
  38. from yt_dlp.networking.exceptions import (
  39. CertificateVerifyError,
  40. HTTPError,
  41. IncompleteRead,
  42. NoSupportingHandlers,
  43. RequestError,
  44. SSLError,
  45. TransportError,
  46. UnsupportedRequest,
  47. )
  48. from yt_dlp.utils._utils import _YDLLogger as FakeLogger
  49. from yt_dlp.utils.networking import HTTPHeaderDict
  50. TEST_DIR = os.path.dirname(os.path.abspath(__file__))
  51. def _build_proxy_handler(name):
  52. class HTTPTestRequestHandler(http.server.BaseHTTPRequestHandler):
  53. proxy_name = name
  54. def log_message(self, format, *args):
  55. pass
  56. def do_GET(self):
  57. self.send_response(200)
  58. self.send_header('Content-Type', 'text/plain; charset=utf-8')
  59. self.end_headers()
  60. self.wfile.write('{self.proxy_name}: {self.path}'.format(self=self).encode())
  61. return HTTPTestRequestHandler
  62. class HTTPTestRequestHandler(http.server.BaseHTTPRequestHandler):
  63. protocol_version = 'HTTP/1.1'
  64. def log_message(self, format, *args):
  65. pass
  66. def _headers(self):
  67. payload = str(self.headers).encode()
  68. self.send_response(200)
  69. self.send_header('Content-Type', 'application/json')
  70. self.send_header('Content-Length', str(len(payload)))
  71. self.end_headers()
  72. self.wfile.write(payload)
  73. def _redirect(self):
  74. self.send_response(int(self.path[len('/redirect_'):]))
  75. self.send_header('Location', '/method')
  76. self.send_header('Content-Length', '0')
  77. self.end_headers()
  78. def _method(self, method, payload=None):
  79. self.send_response(200)
  80. self.send_header('Content-Length', str(len(payload or '')))
  81. self.send_header('Method', method)
  82. self.end_headers()
  83. if payload:
  84. self.wfile.write(payload)
  85. def _status(self, status):
  86. payload = f'<html>{status} NOT FOUND</html>'.encode()
  87. self.send_response(int(status))
  88. self.send_header('Content-Type', 'text/html; charset=utf-8')
  89. self.send_header('Content-Length', str(len(payload)))
  90. self.end_headers()
  91. self.wfile.write(payload)
  92. def _read_data(self):
  93. if 'Content-Length' in self.headers:
  94. return self.rfile.read(int(self.headers['Content-Length']))
  95. def do_POST(self):
  96. data = self._read_data() + str(self.headers).encode()
  97. if self.path.startswith('/redirect_'):
  98. self._redirect()
  99. elif self.path.startswith('/method'):
  100. self._method('POST', data)
  101. elif self.path.startswith('/headers'):
  102. self._headers()
  103. else:
  104. self._status(404)
  105. def do_HEAD(self):
  106. if self.path.startswith('/redirect_'):
  107. self._redirect()
  108. elif self.path.startswith('/method'):
  109. self._method('HEAD')
  110. else:
  111. self._status(404)
  112. def do_PUT(self):
  113. data = self._read_data() + str(self.headers).encode()
  114. if self.path.startswith('/redirect_'):
  115. self._redirect()
  116. elif self.path.startswith('/method'):
  117. self._method('PUT', data)
  118. else:
  119. self._status(404)
  120. def do_GET(self):
  121. if self.path == '/video.html':
  122. payload = b'<html><video src="/vid.mp4" /></html>'
  123. self.send_response(200)
  124. self.send_header('Content-Type', 'text/html; charset=utf-8')
  125. self.send_header('Content-Length', str(len(payload)))
  126. self.end_headers()
  127. self.wfile.write(payload)
  128. elif self.path == '/vid.mp4':
  129. payload = b'\x00\x00\x00\x00\x20\x66\x74[video]'
  130. self.send_response(200)
  131. self.send_header('Content-Type', 'video/mp4')
  132. self.send_header('Content-Length', str(len(payload)))
  133. self.end_headers()
  134. self.wfile.write(payload)
  135. elif self.path == '/%E4%B8%AD%E6%96%87.html':
  136. payload = b'<html><video src="/vid.mp4" /></html>'
  137. self.send_response(200)
  138. self.send_header('Content-Type', 'text/html; charset=utf-8')
  139. self.send_header('Content-Length', str(len(payload)))
  140. self.end_headers()
  141. self.wfile.write(payload)
  142. elif self.path == '/%c7%9f':
  143. payload = b'<html><video src="/vid.mp4" /></html>'
  144. self.send_response(200)
  145. self.send_header('Content-Type', 'text/html; charset=utf-8')
  146. self.send_header('Content-Length', str(len(payload)))
  147. self.end_headers()
  148. self.wfile.write(payload)
  149. elif self.path.startswith('/redirect_loop'):
  150. self.send_response(301)
  151. self.send_header('Location', self.path)
  152. self.send_header('Content-Length', '0')
  153. self.end_headers()
  154. elif self.path == '/redirect_dotsegments':
  155. self.send_response(301)
  156. # redirect to /headers but with dot segments before
  157. self.send_header('Location', '/a/b/./../../headers')
  158. self.send_header('Content-Length', '0')
  159. self.end_headers()
  160. elif self.path.startswith('/redirect_'):
  161. self._redirect()
  162. elif self.path.startswith('/method'):
  163. self._method('GET', str(self.headers).encode())
  164. elif self.path.startswith('/headers'):
  165. self._headers()
  166. elif self.path.startswith('/308-to-headers'):
  167. self.send_response(308)
  168. self.send_header('Location', '/headers')
  169. self.send_header('Content-Length', '0')
  170. self.end_headers()
  171. elif self.path == '/trailing_garbage':
  172. payload = b'<html><video src="/vid.mp4" /></html>'
  173. self.send_response(200)
  174. self.send_header('Content-Type', 'text/html; charset=utf-8')
  175. self.send_header('Content-Encoding', 'gzip')
  176. buf = io.BytesIO()
  177. with gzip.GzipFile(fileobj=buf, mode='wb') as f:
  178. f.write(payload)
  179. compressed = buf.getvalue() + b'trailing garbage'
  180. self.send_header('Content-Length', str(len(compressed)))
  181. self.end_headers()
  182. self.wfile.write(compressed)
  183. elif self.path == '/302-non-ascii-redirect':
  184. new_url = f'http://127.0.0.1:{http_server_port(self.server)}/中文.html'
  185. self.send_response(301)
  186. self.send_header('Location', new_url)
  187. self.send_header('Content-Length', '0')
  188. self.end_headers()
  189. elif self.path == '/content-encoding':
  190. encodings = self.headers.get('ytdl-encoding', '')
  191. payload = b'<html><video src="/vid.mp4" /></html>'
  192. for encoding in filter(None, (e.strip() for e in encodings.split(','))):
  193. if encoding == 'br' and brotli:
  194. payload = brotli.compress(payload)
  195. elif encoding == 'gzip':
  196. buf = io.BytesIO()
  197. with gzip.GzipFile(fileobj=buf, mode='wb') as f:
  198. f.write(payload)
  199. payload = buf.getvalue()
  200. elif encoding == 'deflate':
  201. payload = zlib.compress(payload)
  202. elif encoding == 'unsupported':
  203. payload = b'raw'
  204. break
  205. else:
  206. self._status(415)
  207. return
  208. self.send_response(200)
  209. self.send_header('Content-Encoding', encodings)
  210. self.send_header('Content-Length', str(len(payload)))
  211. self.end_headers()
  212. self.wfile.write(payload)
  213. elif self.path.startswith('/gen_'):
  214. payload = b'<html></html>'
  215. self.send_response(int(self.path[len('/gen_'):]))
  216. self.send_header('Content-Type', 'text/html; charset=utf-8')
  217. self.send_header('Content-Length', str(len(payload)))
  218. self.end_headers()
  219. self.wfile.write(payload)
  220. elif self.path.startswith('/incompleteread'):
  221. payload = b'<html></html>'
  222. self.send_response(200)
  223. self.send_header('Content-Type', 'text/html; charset=utf-8')
  224. self.send_header('Content-Length', '234234')
  225. self.end_headers()
  226. self.wfile.write(payload)
  227. self.finish()
  228. elif self.path.startswith('/timeout_'):
  229. time.sleep(int(self.path[len('/timeout_'):]))
  230. self._headers()
  231. elif self.path == '/source_address':
  232. payload = str(self.client_address[0]).encode()
  233. self.send_response(200)
  234. self.send_header('Content-Type', 'text/html; charset=utf-8')
  235. self.send_header('Content-Length', str(len(payload)))
  236. self.end_headers()
  237. self.wfile.write(payload)
  238. self.finish()
  239. else:
  240. self._status(404)
  241. def send_header(self, keyword, value):
  242. """
  243. Forcibly allow HTTP server to send non percent-encoded non-ASCII characters in headers.
  244. This is against what is defined in RFC 3986, however we need to test we support this
  245. since some sites incorrectly do this.
  246. """
  247. if keyword.lower() == 'connection':
  248. return super().send_header(keyword, value)
  249. if not hasattr(self, '_headers_buffer'):
  250. self._headers_buffer = []
  251. self._headers_buffer.append(f'{keyword}: {value}\r\n'.encode())
  252. def validate_and_send(rh, req):
  253. rh.validate(req)
  254. return rh.send(req)
  255. class TestRequestHandlerBase:
  256. @classmethod
  257. def setup_class(cls):
  258. cls.http_httpd = http.server.ThreadingHTTPServer(
  259. ('127.0.0.1', 0), HTTPTestRequestHandler)
  260. cls.http_port = http_server_port(cls.http_httpd)
  261. cls.http_server_thread = threading.Thread(target=cls.http_httpd.serve_forever)
  262. # FIXME: we should probably stop the http server thread after each test
  263. # See: https://github.com/yt-dlp/yt-dlp/pull/7094#discussion_r1199746041
  264. cls.http_server_thread.daemon = True
  265. cls.http_server_thread.start()
  266. # HTTPS server
  267. certfn = os.path.join(TEST_DIR, 'testcert.pem')
  268. cls.https_httpd = http.server.ThreadingHTTPServer(
  269. ('127.0.0.1', 0), HTTPTestRequestHandler)
  270. sslctx = ssl.SSLContext(ssl.PROTOCOL_TLS_SERVER)
  271. sslctx.load_cert_chain(certfn, None)
  272. cls.https_httpd.socket = sslctx.wrap_socket(cls.https_httpd.socket, server_side=True)
  273. cls.https_port = http_server_port(cls.https_httpd)
  274. cls.https_server_thread = threading.Thread(target=cls.https_httpd.serve_forever)
  275. cls.https_server_thread.daemon = True
  276. cls.https_server_thread.start()
  277. @pytest.fixture
  278. def handler(request):
  279. RH_KEY = request.param
  280. if inspect.isclass(RH_KEY) and issubclass(RH_KEY, RequestHandler):
  281. handler = RH_KEY
  282. elif RH_KEY in _REQUEST_HANDLERS:
  283. handler = _REQUEST_HANDLERS[RH_KEY]
  284. else:
  285. pytest.skip(f'{RH_KEY} request handler is not available')
  286. return functools.partial(handler, logger=FakeLogger)
  287. class TestHTTPRequestHandler(TestRequestHandlerBase):
  288. @pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
  289. def test_verify_cert(self, handler):
  290. with handler() as rh:
  291. with pytest.raises(CertificateVerifyError):
  292. validate_and_send(rh, Request(f'https://127.0.0.1:{self.https_port}/headers'))
  293. with handler(verify=False) as rh:
  294. r = validate_and_send(rh, Request(f'https://127.0.0.1:{self.https_port}/headers'))
  295. assert r.status == 200
  296. r.close()
  297. @pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
  298. def test_ssl_error(self, handler):
  299. # HTTPS server with too old TLS version
  300. # XXX: is there a better way to test this than to create a new server?
  301. https_httpd = http.server.ThreadingHTTPServer(
  302. ('127.0.0.1', 0), HTTPTestRequestHandler)
  303. sslctx = ssl.SSLContext(ssl.PROTOCOL_TLS_SERVER)
  304. https_httpd.socket = sslctx.wrap_socket(https_httpd.socket, server_side=True)
  305. https_port = http_server_port(https_httpd)
  306. https_server_thread = threading.Thread(target=https_httpd.serve_forever)
  307. https_server_thread.daemon = True
  308. https_server_thread.start()
  309. with handler(verify=False) as rh:
  310. with pytest.raises(SSLError, match='sslv3 alert handshake failure') as exc_info:
  311. validate_and_send(rh, Request(f'https://127.0.0.1:{https_port}/headers'))
  312. assert not issubclass(exc_info.type, CertificateVerifyError)
  313. @pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
  314. def test_percent_encode(self, handler):
  315. with handler() as rh:
  316. # Unicode characters should be encoded with uppercase percent-encoding
  317. res = validate_and_send(rh, Request(f'http://127.0.0.1:{self.http_port}/中文.html'))
  318. assert res.status == 200
  319. res.close()
  320. # don't normalize existing percent encodings
  321. res = validate_and_send(rh, Request(f'http://127.0.0.1:{self.http_port}/%c7%9f'))
  322. assert res.status == 200
  323. res.close()
  324. @pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
  325. def test_remove_dot_segments(self, handler):
  326. with handler() as rh:
  327. # This isn't a comprehensive test,
  328. # but it should be enough to check whether the handler is removing dot segments
  329. res = validate_and_send(rh, Request(f'http://127.0.0.1:{self.http_port}/a/b/./../../headers'))
  330. assert res.status == 200
  331. assert res.url == f'http://127.0.0.1:{self.http_port}/headers'
  332. res.close()
  333. res = validate_and_send(rh, Request(f'http://127.0.0.1:{self.http_port}/redirect_dotsegments'))
  334. assert res.status == 200
  335. assert res.url == f'http://127.0.0.1:{self.http_port}/headers'
  336. res.close()
  337. @pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
  338. def test_unicode_path_redirection(self, handler):
  339. with handler() as rh:
  340. r = validate_and_send(rh, Request(f'http://127.0.0.1:{self.http_port}/302-non-ascii-redirect'))
  341. assert r.url == f'http://127.0.0.1:{self.http_port}/%E4%B8%AD%E6%96%87.html'
  342. r.close()
  343. @pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
  344. def test_raise_http_error(self, handler):
  345. with handler() as rh:
  346. for bad_status in (400, 500, 599, 302):
  347. with pytest.raises(HTTPError):
  348. validate_and_send(rh, Request('http://127.0.0.1:%d/gen_%d' % (self.http_port, bad_status)))
  349. # Should not raise an error
  350. validate_and_send(rh, Request('http://127.0.0.1:%d/gen_200' % self.http_port)).close()
  351. @pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
  352. def test_response_url(self, handler):
  353. with handler() as rh:
  354. # Response url should be that of the last url in redirect chain
  355. res = validate_and_send(rh, Request(f'http://127.0.0.1:{self.http_port}/redirect_301'))
  356. assert res.url == f'http://127.0.0.1:{self.http_port}/method'
  357. res.close()
  358. res2 = validate_and_send(rh, Request(f'http://127.0.0.1:{self.http_port}/gen_200'))
  359. assert res2.url == f'http://127.0.0.1:{self.http_port}/gen_200'
  360. res2.close()
  361. @pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
  362. def test_redirect(self, handler):
  363. with handler() as rh:
  364. def do_req(redirect_status, method, assert_no_content=False):
  365. data = b'testdata' if method in ('POST', 'PUT') else None
  366. res = validate_and_send(
  367. rh, Request(f'http://127.0.0.1:{self.http_port}/redirect_{redirect_status}', method=method, data=data))
  368. headers = b''
  369. data_sent = b''
  370. if data is not None:
  371. data_sent += res.read(len(data))
  372. if data_sent != data:
  373. headers += data_sent
  374. data_sent = b''
  375. headers += res.read()
  376. if assert_no_content or data is None:
  377. assert b'Content-Type' not in headers
  378. assert b'Content-Length' not in headers
  379. else:
  380. assert b'Content-Type' in headers
  381. assert b'Content-Length' in headers
  382. return data_sent.decode(), res.headers.get('method', '')
  383. # A 303 must either use GET or HEAD for subsequent request
  384. assert do_req(303, 'POST', True) == ('', 'GET')
  385. assert do_req(303, 'HEAD') == ('', 'HEAD')
  386. assert do_req(303, 'PUT', True) == ('', 'GET')
  387. # 301 and 302 turn POST only into a GET
  388. assert do_req(301, 'POST', True) == ('', 'GET')
  389. assert do_req(301, 'HEAD') == ('', 'HEAD')
  390. assert do_req(302, 'POST', True) == ('', 'GET')
  391. assert do_req(302, 'HEAD') == ('', 'HEAD')
  392. assert do_req(301, 'PUT') == ('testdata', 'PUT')
  393. assert do_req(302, 'PUT') == ('testdata', 'PUT')
  394. # 307 and 308 should not change method
  395. for m in ('POST', 'PUT'):
  396. assert do_req(307, m) == ('testdata', m)
  397. assert do_req(308, m) == ('testdata', m)
  398. assert do_req(307, 'HEAD') == ('', 'HEAD')
  399. assert do_req(308, 'HEAD') == ('', 'HEAD')
  400. # These should not redirect and instead raise an HTTPError
  401. for code in (300, 304, 305, 306):
  402. with pytest.raises(HTTPError):
  403. do_req(code, 'GET')
  404. @pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
  405. def test_request_cookie_header(self, handler):
  406. # We should accept a Cookie header being passed as in normal headers and handle it appropriately.
  407. with handler() as rh:
  408. # Specified Cookie header should be used
  409. res = validate_and_send(
  410. rh, Request(
  411. f'http://127.0.0.1:{self.http_port}/headers',
  412. headers={'Cookie': 'test=test'})).read().decode()
  413. assert 'Cookie: test=test' in res
  414. # Specified Cookie header should be removed on any redirect
  415. res = validate_and_send(
  416. rh, Request(
  417. f'http://127.0.0.1:{self.http_port}/308-to-headers',
  418. headers={'Cookie': 'test=test'})).read().decode()
  419. assert 'Cookie: test=test' not in res
  420. # Specified Cookie header should override global cookiejar for that request
  421. cookiejar = http.cookiejar.CookieJar()
  422. cookiejar.set_cookie(http.cookiejar.Cookie(
  423. version=0, name='test', value='ytdlp', port=None, port_specified=False,
  424. domain='127.0.0.1', domain_specified=True, domain_initial_dot=False, path='/',
  425. path_specified=True, secure=False, expires=None, discard=False, comment=None,
  426. comment_url=None, rest={}))
  427. with handler(cookiejar=cookiejar) as rh:
  428. data = validate_and_send(
  429. rh, Request(f'http://127.0.0.1:{self.http_port}/headers', headers={'cookie': 'test=test'})).read()
  430. assert b'Cookie: test=ytdlp' not in data
  431. assert b'Cookie: test=test' in data
  432. @pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
  433. def test_redirect_loop(self, handler):
  434. with handler() as rh:
  435. with pytest.raises(HTTPError, match='redirect loop'):
  436. validate_and_send(rh, Request(f'http://127.0.0.1:{self.http_port}/redirect_loop'))
  437. @pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
  438. def test_incompleteread(self, handler):
  439. with handler(timeout=2) as rh:
  440. with pytest.raises(IncompleteRead):
  441. validate_and_send(rh, Request('http://127.0.0.1:%d/incompleteread' % self.http_port)).read()
  442. @pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
  443. def test_cookies(self, handler):
  444. cookiejar = http.cookiejar.CookieJar()
  445. cookiejar.set_cookie(http.cookiejar.Cookie(
  446. 0, 'test', 'ytdlp', None, False, '127.0.0.1', True,
  447. False, '/headers', True, False, None, False, None, None, {}))
  448. with handler(cookiejar=cookiejar) as rh:
  449. data = validate_and_send(rh, Request(f'http://127.0.0.1:{self.http_port}/headers')).read()
  450. assert b'Cookie: test=ytdlp' in data
  451. # Per request
  452. with handler() as rh:
  453. data = validate_and_send(
  454. rh, Request(f'http://127.0.0.1:{self.http_port}/headers', extensions={'cookiejar': cookiejar})).read()
  455. assert b'Cookie: test=ytdlp' in data
  456. @pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
  457. def test_headers(self, handler):
  458. with handler(headers=HTTPHeaderDict({'test1': 'test', 'test2': 'test2'})) as rh:
  459. # Global Headers
  460. data = validate_and_send(rh, Request(f'http://127.0.0.1:{self.http_port}/headers')).read()
  461. assert b'Test1: test' in data
  462. # Per request headers, merged with global
  463. data = validate_and_send(rh, Request(
  464. f'http://127.0.0.1:{self.http_port}/headers', headers={'test2': 'changed', 'test3': 'test3'})).read()
  465. assert b'Test1: test' in data
  466. assert b'Test2: changed' in data
  467. assert b'Test2: test2' not in data
  468. assert b'Test3: test3' in data
  469. @pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
  470. def test_timeout(self, handler):
  471. with handler() as rh:
  472. # Default timeout is 20 seconds, so this should go through
  473. validate_and_send(
  474. rh, Request(f'http://127.0.0.1:{self.http_port}/timeout_3'))
  475. with handler(timeout=0.5) as rh:
  476. with pytest.raises(TransportError):
  477. validate_and_send(
  478. rh, Request(f'http://127.0.0.1:{self.http_port}/timeout_1'))
  479. # Per request timeout, should override handler timeout
  480. validate_and_send(
  481. rh, Request(f'http://127.0.0.1:{self.http_port}/timeout_1', extensions={'timeout': 4}))
  482. @pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
  483. def test_source_address(self, handler):
  484. source_address = f'127.0.0.{random.randint(5, 255)}'
  485. with handler(source_address=source_address) as rh:
  486. data = validate_and_send(
  487. rh, Request(f'http://127.0.0.1:{self.http_port}/source_address')).read().decode()
  488. assert source_address == data
  489. @pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
  490. def test_gzip_trailing_garbage(self, handler):
  491. with handler() as rh:
  492. data = validate_and_send(rh, Request(f'http://localhost:{self.http_port}/trailing_garbage')).read().decode()
  493. assert data == '<html><video src="/vid.mp4" /></html>'
  494. @pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
  495. @pytest.mark.skipif(not brotli, reason='brotli support is not installed')
  496. def test_brotli(self, handler):
  497. with handler() as rh:
  498. res = validate_and_send(
  499. rh, Request(
  500. f'http://127.0.0.1:{self.http_port}/content-encoding',
  501. headers={'ytdl-encoding': 'br'}))
  502. assert res.headers.get('Content-Encoding') == 'br'
  503. assert res.read() == b'<html><video src="/vid.mp4" /></html>'
  504. @pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
  505. def test_deflate(self, handler):
  506. with handler() as rh:
  507. res = validate_and_send(
  508. rh, Request(
  509. f'http://127.0.0.1:{self.http_port}/content-encoding',
  510. headers={'ytdl-encoding': 'deflate'}))
  511. assert res.headers.get('Content-Encoding') == 'deflate'
  512. assert res.read() == b'<html><video src="/vid.mp4" /></html>'
  513. @pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
  514. def test_gzip(self, handler):
  515. with handler() as rh:
  516. res = validate_and_send(
  517. rh, Request(
  518. f'http://127.0.0.1:{self.http_port}/content-encoding',
  519. headers={'ytdl-encoding': 'gzip'}))
  520. assert res.headers.get('Content-Encoding') == 'gzip'
  521. assert res.read() == b'<html><video src="/vid.mp4" /></html>'
  522. @pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
  523. def test_multiple_encodings(self, handler):
  524. with handler() as rh:
  525. for pair in ('gzip,deflate', 'deflate, gzip', 'gzip, gzip', 'deflate, deflate'):
  526. res = validate_and_send(
  527. rh, Request(
  528. f'http://127.0.0.1:{self.http_port}/content-encoding',
  529. headers={'ytdl-encoding': pair}))
  530. assert res.headers.get('Content-Encoding') == pair
  531. assert res.read() == b'<html><video src="/vid.mp4" /></html>'
  532. @pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
  533. def test_unsupported_encoding(self, handler):
  534. with handler() as rh:
  535. res = validate_and_send(
  536. rh, Request(
  537. f'http://127.0.0.1:{self.http_port}/content-encoding',
  538. headers={'ytdl-encoding': 'unsupported'}))
  539. assert res.headers.get('Content-Encoding') == 'unsupported'
  540. assert res.read() == b'raw'
  541. @pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
  542. def test_read(self, handler):
  543. with handler() as rh:
  544. res = validate_and_send(
  545. rh, Request(f'http://127.0.0.1:{self.http_port}/headers'))
  546. assert res.readable()
  547. assert res.read(1) == b'H'
  548. assert res.read(3) == b'ost'
  549. class TestHTTPProxy(TestRequestHandlerBase):
  550. @classmethod
  551. def setup_class(cls):
  552. super().setup_class()
  553. # HTTP Proxy server
  554. cls.proxy = http.server.ThreadingHTTPServer(
  555. ('127.0.0.1', 0), _build_proxy_handler('normal'))
  556. cls.proxy_port = http_server_port(cls.proxy)
  557. cls.proxy_thread = threading.Thread(target=cls.proxy.serve_forever)
  558. cls.proxy_thread.daemon = True
  559. cls.proxy_thread.start()
  560. # Geo proxy server
  561. cls.geo_proxy = http.server.ThreadingHTTPServer(
  562. ('127.0.0.1', 0), _build_proxy_handler('geo'))
  563. cls.geo_port = http_server_port(cls.geo_proxy)
  564. cls.geo_proxy_thread = threading.Thread(target=cls.geo_proxy.serve_forever)
  565. cls.geo_proxy_thread.daemon = True
  566. cls.geo_proxy_thread.start()
  567. @pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
  568. def test_http_proxy(self, handler):
  569. http_proxy = f'http://127.0.0.1:{self.proxy_port}'
  570. geo_proxy = f'http://127.0.0.1:{self.geo_port}'
  571. # Test global http proxy
  572. # Test per request http proxy
  573. # Test per request http proxy disables proxy
  574. url = 'http://foo.com/bar'
  575. # Global HTTP proxy
  576. with handler(proxies={'http': http_proxy}) as rh:
  577. res = validate_and_send(rh, Request(url)).read().decode()
  578. assert res == f'normal: {url}'
  579. # Per request proxy overrides global
  580. res = validate_and_send(rh, Request(url, proxies={'http': geo_proxy})).read().decode()
  581. assert res == f'geo: {url}'
  582. # and setting to None disables all proxies for that request
  583. real_url = f'http://127.0.0.1:{self.http_port}/headers'
  584. res = validate_and_send(
  585. rh, Request(real_url, proxies={'http': None})).read().decode()
  586. assert res != f'normal: {real_url}'
  587. assert 'Accept' in res
  588. @pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
  589. def test_noproxy(self, handler):
  590. with handler(proxies={'proxy': f'http://127.0.0.1:{self.proxy_port}'}) as rh:
  591. # NO_PROXY
  592. for no_proxy in (f'127.0.0.1:{self.http_port}', '127.0.0.1', 'localhost'):
  593. nop_response = validate_and_send(
  594. rh, Request(f'http://127.0.0.1:{self.http_port}/headers', proxies={'no': no_proxy})).read().decode(
  595. 'utf-8')
  596. assert 'Accept' in nop_response
  597. @pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
  598. def test_allproxy(self, handler):
  599. url = 'http://foo.com/bar'
  600. with handler() as rh:
  601. response = validate_and_send(rh, Request(url, proxies={'all': f'http://127.0.0.1:{self.proxy_port}'})).read().decode(
  602. 'utf-8')
  603. assert response == f'normal: {url}'
  604. @pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
  605. def test_http_proxy_with_idn(self, handler):
  606. with handler(proxies={
  607. 'http': f'http://127.0.0.1:{self.proxy_port}',
  608. }) as rh:
  609. url = 'http://中文.tw/'
  610. response = rh.send(Request(url)).read().decode()
  611. # b'xn--fiq228c' is '中文'.encode('idna')
  612. assert response == 'normal: http://xn--fiq228c.tw/'
  613. class TestClientCertificate:
  614. @classmethod
  615. def setup_class(cls):
  616. certfn = os.path.join(TEST_DIR, 'testcert.pem')
  617. cls.certdir = os.path.join(TEST_DIR, 'testdata', 'certificate')
  618. cacertfn = os.path.join(cls.certdir, 'ca.crt')
  619. cls.httpd = http.server.ThreadingHTTPServer(('127.0.0.1', 0), HTTPTestRequestHandler)
  620. sslctx = ssl.SSLContext(ssl.PROTOCOL_TLS_SERVER)
  621. sslctx.verify_mode = ssl.CERT_REQUIRED
  622. sslctx.load_verify_locations(cafile=cacertfn)
  623. sslctx.load_cert_chain(certfn, None)
  624. cls.httpd.socket = sslctx.wrap_socket(cls.httpd.socket, server_side=True)
  625. cls.port = http_server_port(cls.httpd)
  626. cls.server_thread = threading.Thread(target=cls.httpd.serve_forever)
  627. cls.server_thread.daemon = True
  628. cls.server_thread.start()
  629. def _run_test(self, handler, **handler_kwargs):
  630. with handler(
  631. # Disable client-side validation of unacceptable self-signed testcert.pem
  632. # The test is of a check on the server side, so unaffected
  633. verify=False,
  634. **handler_kwargs,
  635. ) as rh:
  636. validate_and_send(rh, Request(f'https://127.0.0.1:{self.port}/video.html')).read().decode()
  637. @pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
  638. def test_certificate_combined_nopass(self, handler):
  639. self._run_test(handler, client_cert={
  640. 'client_certificate': os.path.join(self.certdir, 'clientwithkey.crt'),
  641. })
  642. @pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
  643. def test_certificate_nocombined_nopass(self, handler):
  644. self._run_test(handler, client_cert={
  645. 'client_certificate': os.path.join(self.certdir, 'client.crt'),
  646. 'client_certificate_key': os.path.join(self.certdir, 'client.key'),
  647. })
  648. @pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
  649. def test_certificate_combined_pass(self, handler):
  650. self._run_test(handler, client_cert={
  651. 'client_certificate': os.path.join(self.certdir, 'clientwithencryptedkey.crt'),
  652. 'client_certificate_password': 'foobar',
  653. })
  654. @pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
  655. def test_certificate_nocombined_pass(self, handler):
  656. self._run_test(handler, client_cert={
  657. 'client_certificate': os.path.join(self.certdir, 'client.crt'),
  658. 'client_certificate_key': os.path.join(self.certdir, 'clientencrypted.key'),
  659. 'client_certificate_password': 'foobar',
  660. })
  661. class TestUrllibRequestHandler(TestRequestHandlerBase):
  662. @pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
  663. def test_file_urls(self, handler):
  664. # See https://github.com/ytdl-org/youtube-dl/issues/8227
  665. tf = tempfile.NamedTemporaryFile(delete=False)
  666. tf.write(b'foobar')
  667. tf.close()
  668. req = Request(pathlib.Path(tf.name).as_uri())
  669. with handler() as rh:
  670. with pytest.raises(UnsupportedRequest):
  671. rh.validate(req)
  672. # Test that urllib never loaded FileHandler
  673. with pytest.raises(TransportError):
  674. rh.send(req)
  675. with handler(enable_file_urls=True) as rh:
  676. res = validate_and_send(rh, req)
  677. assert res.read() == b'foobar'
  678. res.close()
  679. os.unlink(tf.name)
  680. @pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
  681. def test_http_error_returns_content(self, handler):
  682. # urllib HTTPError will try close the underlying response if reference to the HTTPError object is lost
  683. def get_response():
  684. with handler() as rh:
  685. # headers url
  686. try:
  687. validate_and_send(rh, Request(f'http://127.0.0.1:{self.http_port}/gen_404'))
  688. except HTTPError as e:
  689. return e.response
  690. assert get_response().read() == b'<html></html>'
  691. @pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
  692. def test_verify_cert_error_text(self, handler):
  693. # Check the output of the error message
  694. with handler() as rh:
  695. with pytest.raises(
  696. CertificateVerifyError,
  697. match=r'\[SSL: CERTIFICATE_VERIFY_FAILED\] certificate verify failed: self.signed certificate'
  698. ):
  699. validate_and_send(rh, Request(f'https://127.0.0.1:{self.https_port}/headers'))
  700. @pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
  701. @pytest.mark.parametrize('req,match,version_check', [
  702. # https://github.com/python/cpython/blob/987b712b4aeeece336eed24fcc87a950a756c3e2/Lib/http/client.py#L1256
  703. # bpo-39603: Check implemented in 3.7.9+, 3.8.5+
  704. (
  705. Request('http://127.0.0.1', method='GET\n'),
  706. 'method can\'t contain control characters',
  707. lambda v: v < (3, 7, 9) or (3, 8, 0) <= v < (3, 8, 5)
  708. ),
  709. # https://github.com/python/cpython/blob/987b712b4aeeece336eed24fcc87a950a756c3e2/Lib/http/client.py#L1265
  710. # bpo-38576: Check implemented in 3.7.8+, 3.8.3+
  711. (
  712. Request('http://127.0.0. 1', method='GET'),
  713. 'URL can\'t contain control characters',
  714. lambda v: v < (3, 7, 8) or (3, 8, 0) <= v < (3, 8, 3)
  715. ),
  716. # https://github.com/python/cpython/blob/987b712b4aeeece336eed24fcc87a950a756c3e2/Lib/http/client.py#L1288C31-L1288C50
  717. (Request('http://127.0.0.1', headers={'foo\n': 'bar'}), 'Invalid header name', None),
  718. ])
  719. def test_httplib_validation_errors(self, handler, req, match, version_check):
  720. if version_check and version_check(sys.version_info):
  721. pytest.skip(f'Python {sys.version} version does not have the required validation for this test.')
  722. with handler() as rh:
  723. with pytest.raises(RequestError, match=match) as exc_info:
  724. validate_and_send(rh, req)
  725. assert not isinstance(exc_info.value, TransportError)
  726. def run_validation(handler, error, req, **handler_kwargs):
  727. with handler(**handler_kwargs) as rh:
  728. if error:
  729. with pytest.raises(error):
  730. rh.validate(req)
  731. else:
  732. rh.validate(req)
  733. class TestRequestHandlerValidation:
  734. class ValidationRH(RequestHandler):
  735. def _send(self, request):
  736. raise RequestError('test')
  737. class NoCheckRH(ValidationRH):
  738. _SUPPORTED_FEATURES = None
  739. _SUPPORTED_PROXY_SCHEMES = None
  740. _SUPPORTED_URL_SCHEMES = None
  741. def _check_extensions(self, extensions):
  742. extensions.clear()
  743. class HTTPSupportedRH(ValidationRH):
  744. _SUPPORTED_URL_SCHEMES = ('http',)
  745. URL_SCHEME_TESTS = [
  746. # scheme, expected to fail, handler kwargs
  747. ('Urllib', [
  748. ('http', False, {}),
  749. ('https', False, {}),
  750. ('data', False, {}),
  751. ('ftp', False, {}),
  752. ('file', UnsupportedRequest, {}),
  753. ('file', False, {'enable_file_urls': True}),
  754. ]),
  755. (NoCheckRH, [('http', False, {})]),
  756. (ValidationRH, [('http', UnsupportedRequest, {})])
  757. ]
  758. PROXY_SCHEME_TESTS = [
  759. # scheme, expected to fail
  760. ('Urllib', [
  761. ('http', False),
  762. ('https', UnsupportedRequest),
  763. ('socks4', False),
  764. ('socks4a', False),
  765. ('socks5', False),
  766. ('socks5h', False),
  767. ('socks', UnsupportedRequest),
  768. ]),
  769. (NoCheckRH, [('http', False)]),
  770. (HTTPSupportedRH, [('http', UnsupportedRequest)]),
  771. ]
  772. PROXY_KEY_TESTS = [
  773. # key, expected to fail
  774. ('Urllib', [
  775. ('all', False),
  776. ('unrelated', False),
  777. ]),
  778. (NoCheckRH, [('all', False)]),
  779. (HTTPSupportedRH, [('all', UnsupportedRequest)]),
  780. (HTTPSupportedRH, [('no', UnsupportedRequest)]),
  781. ]
  782. EXTENSION_TESTS = [
  783. ('Urllib', [
  784. ({'cookiejar': 'notacookiejar'}, AssertionError),
  785. ({'cookiejar': CookieJar()}, False),
  786. ({'timeout': 1}, False),
  787. ({'timeout': 'notatimeout'}, AssertionError),
  788. ({'unsupported': 'value'}, UnsupportedRequest),
  789. ]),
  790. (NoCheckRH, [
  791. ({'cookiejar': 'notacookiejar'}, False),
  792. ({'somerandom': 'test'}, False), # but any extension is allowed through
  793. ]),
  794. ]
  795. @pytest.mark.parametrize('handler,scheme,fail,handler_kwargs', [
  796. (handler_tests[0], scheme, fail, handler_kwargs)
  797. for handler_tests in URL_SCHEME_TESTS
  798. for scheme, fail, handler_kwargs in handler_tests[1]
  799. ], indirect=['handler'])
  800. def test_url_scheme(self, handler, scheme, fail, handler_kwargs):
  801. run_validation(handler, fail, Request(f'{scheme}://'), **(handler_kwargs or {}))
  802. @pytest.mark.parametrize('handler,fail', [('Urllib', False)], indirect=['handler'])
  803. def test_no_proxy(self, handler, fail):
  804. run_validation(handler, fail, Request('http://', proxies={'no': '127.0.0.1,github.com'}))
  805. run_validation(handler, fail, Request('http://'), proxies={'no': '127.0.0.1,github.com'})
  806. @pytest.mark.parametrize('handler,proxy_key,fail', [
  807. (handler_tests[0], proxy_key, fail)
  808. for handler_tests in PROXY_KEY_TESTS
  809. for proxy_key, fail in handler_tests[1]
  810. ], indirect=['handler'])
  811. def test_proxy_key(self, handler, proxy_key, fail):
  812. run_validation(handler, fail, Request('http://', proxies={proxy_key: 'http://example.com'}))
  813. run_validation(handler, fail, Request('http://'), proxies={proxy_key: 'http://example.com'})
  814. @pytest.mark.parametrize('handler,scheme,fail', [
  815. (handler_tests[0], scheme, fail)
  816. for handler_tests in PROXY_SCHEME_TESTS
  817. for scheme, fail in handler_tests[1]
  818. ], indirect=['handler'])
  819. def test_proxy_scheme(self, handler, scheme, fail):
  820. run_validation(handler, fail, Request('http://', proxies={'http': f'{scheme}://example.com'}))
  821. run_validation(handler, fail, Request('http://'), proxies={'http': f'{scheme}://example.com'})
  822. @pytest.mark.parametrize('handler', ['Urllib', HTTPSupportedRH], indirect=True)
  823. def test_empty_proxy(self, handler):
  824. run_validation(handler, False, Request('http://', proxies={'http': None}))
  825. run_validation(handler, False, Request('http://'), proxies={'http': None})
  826. @pytest.mark.parametrize('proxy_url', ['//example.com', 'example.com', '127.0.0.1', '/a/b/c'])
  827. @pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
  828. def test_invalid_proxy_url(self, handler, proxy_url):
  829. run_validation(handler, UnsupportedRequest, Request('http://', proxies={'http': proxy_url}))
  830. @pytest.mark.parametrize('handler,extensions,fail', [
  831. (handler_tests[0], extensions, fail)
  832. for handler_tests in EXTENSION_TESTS
  833. for extensions, fail in handler_tests[1]
  834. ], indirect=['handler'])
  835. def test_extension(self, handler, extensions, fail):
  836. run_validation(
  837. handler, fail, Request('http://', extensions=extensions))
  838. def test_invalid_request_type(self):
  839. rh = self.ValidationRH(logger=FakeLogger())
  840. for method in (rh.validate, rh.send):
  841. with pytest.raises(TypeError, match='Expected an instance of Request'):
  842. method('not a request')
  843. class FakeResponse(Response):
  844. def __init__(self, request):
  845. # XXX: we could make request part of standard response interface
  846. self.request = request
  847. super().__init__(fp=io.BytesIO(b''), headers={}, url=request.url)
  848. class FakeRH(RequestHandler):
  849. def _validate(self, request):
  850. return
  851. def _send(self, request: Request):
  852. if request.url.startswith('ssl://'):
  853. raise SSLError(request.url[len('ssl://'):])
  854. return FakeResponse(request)
  855. class FakeRHYDL(FakeYDL):
  856. def __init__(self, *args, **kwargs):
  857. super().__init__(*args, **kwargs)
  858. self._request_director = self.build_request_director([FakeRH])
  859. class TestRequestDirector:
  860. def test_handler_operations(self):
  861. director = RequestDirector(logger=FakeLogger())
  862. handler = FakeRH(logger=FakeLogger())
  863. director.add_handler(handler)
  864. assert director.handlers.get(FakeRH.RH_KEY) is handler
  865. # Handler should overwrite
  866. handler2 = FakeRH(logger=FakeLogger())
  867. director.add_handler(handler2)
  868. assert director.handlers.get(FakeRH.RH_KEY) is not handler
  869. assert director.handlers.get(FakeRH.RH_KEY) is handler2
  870. assert len(director.handlers) == 1
  871. class AnotherFakeRH(FakeRH):
  872. pass
  873. director.add_handler(AnotherFakeRH(logger=FakeLogger()))
  874. assert len(director.handlers) == 2
  875. assert director.handlers.get(AnotherFakeRH.RH_KEY).RH_KEY == AnotherFakeRH.RH_KEY
  876. director.handlers.pop(FakeRH.RH_KEY, None)
  877. assert director.handlers.get(FakeRH.RH_KEY) is None
  878. assert len(director.handlers) == 1
  879. # RequestErrors should passthrough
  880. with pytest.raises(SSLError):
  881. director.send(Request('ssl://something'))
  882. def test_send(self):
  883. director = RequestDirector(logger=FakeLogger())
  884. with pytest.raises(RequestError):
  885. director.send(Request('any://'))
  886. director.add_handler(FakeRH(logger=FakeLogger()))
  887. assert isinstance(director.send(Request('http://')), FakeResponse)
  888. def test_unsupported_handlers(self):
  889. director = RequestDirector(logger=FakeLogger())
  890. director.add_handler(FakeRH(logger=FakeLogger()))
  891. class SupportedRH(RequestHandler):
  892. _SUPPORTED_URL_SCHEMES = ['http']
  893. def _send(self, request: Request):
  894. return Response(fp=io.BytesIO(b'supported'), headers={}, url=request.url)
  895. # This handler should by default take preference over FakeRH
  896. director.add_handler(SupportedRH(logger=FakeLogger()))
  897. assert director.send(Request('http://')).read() == b'supported'
  898. assert director.send(Request('any://')).read() == b''
  899. director.handlers.pop(FakeRH.RH_KEY)
  900. with pytest.raises(NoSupportingHandlers):
  901. director.send(Request('any://'))
  902. def test_unexpected_error(self):
  903. director = RequestDirector(logger=FakeLogger())
  904. class UnexpectedRH(FakeRH):
  905. def _send(self, request: Request):
  906. raise TypeError('something')
  907. director.add_handler(UnexpectedRH(logger=FakeLogger))
  908. with pytest.raises(NoSupportingHandlers, match=r'1 unexpected error'):
  909. director.send(Request('any://'))
  910. director.handlers.clear()
  911. assert len(director.handlers) == 0
  912. # Should not be fatal
  913. director.add_handler(FakeRH(logger=FakeLogger()))
  914. director.add_handler(UnexpectedRH(logger=FakeLogger))
  915. assert director.send(Request('any://'))
  916. # XXX: do we want to move this to test_YoutubeDL.py?
  917. class TestYoutubeDLNetworking:
  918. @staticmethod
  919. def build_handler(ydl, handler: RequestHandler = FakeRH):
  920. return ydl.build_request_director([handler]).handlers.get(handler.RH_KEY)
  921. def test_compat_opener(self):
  922. with FakeYDL() as ydl:
  923. with warnings.catch_warnings():
  924. warnings.simplefilter('ignore', category=DeprecationWarning)
  925. assert isinstance(ydl._opener, urllib.request.OpenerDirector)
  926. @pytest.mark.parametrize('proxy,expected', [
  927. ('http://127.0.0.1:8080', {'all': 'http://127.0.0.1:8080'}),
  928. ('', {'all': '__noproxy__'}),
  929. (None, {'http': 'http://127.0.0.1:8081', 'https': 'http://127.0.0.1:8081'}) # env, set https
  930. ])
  931. def test_proxy(self, proxy, expected):
  932. old_http_proxy = os.environ.get('HTTP_PROXY')
  933. try:
  934. os.environ['HTTP_PROXY'] = 'http://127.0.0.1:8081' # ensure that provided proxies override env
  935. with FakeYDL({'proxy': proxy}) as ydl:
  936. assert ydl.proxies == expected
  937. finally:
  938. if old_http_proxy:
  939. os.environ['HTTP_PROXY'] = old_http_proxy
  940. def test_compat_request(self):
  941. with FakeRHYDL() as ydl:
  942. assert ydl.urlopen('test://')
  943. urllib_req = urllib.request.Request('http://foo.bar', data=b'test', method='PUT', headers={'X-Test': '1'})
  944. urllib_req.add_unredirected_header('Cookie', 'bob=bob')
  945. urllib_req.timeout = 2
  946. with warnings.catch_warnings():
  947. warnings.simplefilter('ignore', category=DeprecationWarning)
  948. req = ydl.urlopen(urllib_req).request
  949. assert req.url == urllib_req.get_full_url()
  950. assert req.data == urllib_req.data
  951. assert req.method == urllib_req.get_method()
  952. assert 'X-Test' in req.headers
  953. assert 'Cookie' in req.headers
  954. assert req.extensions.get('timeout') == 2
  955. with pytest.raises(AssertionError):
  956. ydl.urlopen(None)
  957. def test_extract_basic_auth(self):
  958. with FakeRHYDL() as ydl:
  959. res = ydl.urlopen(Request('http://user:pass@foo.bar'))
  960. assert res.request.headers['Authorization'] == 'Basic dXNlcjpwYXNz'
  961. def test_sanitize_url(self):
  962. with FakeRHYDL() as ydl:
  963. res = ydl.urlopen(Request('httpss://foo.bar'))
  964. assert res.request.url == 'https://foo.bar'
  965. def test_file_urls_error(self):
  966. # use urllib handler
  967. with FakeYDL() as ydl:
  968. with pytest.raises(RequestError, match=r'file:// URLs are disabled by default'):
  969. ydl.urlopen('file://')
  970. def test_legacy_server_connect_error(self):
  971. with FakeRHYDL() as ydl:
  972. for error in ('UNSAFE_LEGACY_RENEGOTIATION_DISABLED', 'SSLV3_ALERT_HANDSHAKE_FAILURE'):
  973. with pytest.raises(RequestError, match=r'Try using --legacy-server-connect'):
  974. ydl.urlopen(f'ssl://{error}')
  975. with pytest.raises(SSLError, match='testerror'):
  976. ydl.urlopen('ssl://testerror')
  977. @pytest.mark.parametrize('proxy_key,proxy_url,expected', [
  978. ('http', '__noproxy__', None),
  979. ('no', '127.0.0.1,foo.bar', '127.0.0.1,foo.bar'),
  980. ('https', 'example.com', 'http://example.com'),
  981. ('https', '//example.com', 'http://example.com'),
  982. ('https', 'socks5://example.com', 'socks5h://example.com'),
  983. ('http', 'socks://example.com', 'socks4://example.com'),
  984. ('http', 'socks4://example.com', 'socks4://example.com'),
  985. ('unrelated', '/bad/proxy', '/bad/proxy'), # clean_proxies should ignore bad proxies
  986. ])
  987. def test_clean_proxy(self, proxy_key, proxy_url, expected):
  988. # proxies should be cleaned in urlopen()
  989. with FakeRHYDL() as ydl:
  990. req = ydl.urlopen(Request('test://', proxies={proxy_key: proxy_url})).request
  991. assert req.proxies[proxy_key] == expected
  992. # and should also be cleaned when building the handler
  993. env_key = f'{proxy_key.upper()}_PROXY'
  994. old_env_proxy = os.environ.get(env_key)
  995. try:
  996. os.environ[env_key] = proxy_url # ensure that provided proxies override env
  997. with FakeYDL() as ydl:
  998. rh = self.build_handler(ydl)
  999. assert rh.proxies[proxy_key] == expected
  1000. finally:
  1001. if old_env_proxy:
  1002. os.environ[env_key] = old_env_proxy
  1003. def test_clean_proxy_header(self):
  1004. with FakeRHYDL() as ydl:
  1005. req = ydl.urlopen(Request('test://', headers={'ytdl-request-proxy': '//foo.bar'})).request
  1006. assert 'ytdl-request-proxy' not in req.headers
  1007. assert req.proxies == {'all': 'http://foo.bar'}
  1008. with FakeYDL({'http_headers': {'ytdl-request-proxy': '//foo.bar'}}) as ydl:
  1009. rh = self.build_handler(ydl)
  1010. assert 'ytdl-request-proxy' not in rh.headers
  1011. assert rh.proxies == {'all': 'http://foo.bar'}
  1012. def test_clean_header(self):
  1013. with FakeRHYDL() as ydl:
  1014. res = ydl.urlopen(Request('test://', headers={'Youtubedl-no-compression': True}))
  1015. assert 'Youtubedl-no-compression' not in res.request.headers
  1016. assert res.request.headers.get('Accept-Encoding') == 'identity'
  1017. with FakeYDL({'http_headers': {'Youtubedl-no-compression': True}}) as ydl:
  1018. rh = self.build_handler(ydl)
  1019. assert 'Youtubedl-no-compression' not in rh.headers
  1020. assert rh.headers.get('Accept-Encoding') == 'identity'
  1021. def test_build_handler_params(self):
  1022. with FakeYDL({
  1023. 'http_headers': {'test': 'testtest'},
  1024. 'socket_timeout': 2,
  1025. 'proxy': 'http://127.0.0.1:8080',
  1026. 'source_address': '127.0.0.45',
  1027. 'debug_printtraffic': True,
  1028. 'compat_opts': ['no-certifi'],
  1029. 'nocheckcertificate': True,
  1030. 'legacyserverconnect': True,
  1031. }) as ydl:
  1032. rh = self.build_handler(ydl)
  1033. assert rh.headers.get('test') == 'testtest'
  1034. assert 'Accept' in rh.headers # ensure std_headers are still there
  1035. assert rh.timeout == 2
  1036. assert rh.proxies.get('all') == 'http://127.0.0.1:8080'
  1037. assert rh.source_address == '127.0.0.45'
  1038. assert rh.verbose is True
  1039. assert rh.prefer_system_certs is True
  1040. assert rh.verify is False
  1041. assert rh.legacy_ssl_support is True
  1042. @pytest.mark.parametrize('ydl_params', [
  1043. {'client_certificate': 'fakecert.crt'},
  1044. {'client_certificate': 'fakecert.crt', 'client_certificate_key': 'fakekey.key'},
  1045. {'client_certificate': 'fakecert.crt', 'client_certificate_key': 'fakekey.key', 'client_certificate_password': 'foobar'},
  1046. {'client_certificate_key': 'fakekey.key', 'client_certificate_password': 'foobar'},
  1047. ])
  1048. def test_client_certificate(self, ydl_params):
  1049. with FakeYDL(ydl_params) as ydl:
  1050. rh = self.build_handler(ydl)
  1051. assert rh._client_cert == ydl_params # XXX: Too bound to implementation
  1052. def test_urllib_file_urls(self):
  1053. with FakeYDL({'enable_file_urls': False}) as ydl:
  1054. rh = self.build_handler(ydl, UrllibRH)
  1055. assert rh.enable_file_urls is False
  1056. with FakeYDL({'enable_file_urls': True}) as ydl:
  1057. rh = self.build_handler(ydl, UrllibRH)
  1058. assert rh.enable_file_urls is True
  1059. class TestRequest:
  1060. def test_query(self):
  1061. req = Request('http://example.com?q=something', query={'v': 'xyz'})
  1062. assert req.url == 'http://example.com?q=something&v=xyz'
  1063. req.update(query={'v': '123'})
  1064. assert req.url == 'http://example.com?q=something&v=123'
  1065. req.update(url='http://example.com', query={'v': 'xyz'})
  1066. assert req.url == 'http://example.com?v=xyz'
  1067. def test_method(self):
  1068. req = Request('http://example.com')
  1069. assert req.method == 'GET'
  1070. req.data = b'test'
  1071. assert req.method == 'POST'
  1072. req.data = None
  1073. assert req.method == 'GET'
  1074. req.data = b'test2'
  1075. req.method = 'PUT'
  1076. assert req.method == 'PUT'
  1077. req.data = None
  1078. assert req.method == 'PUT'
  1079. with pytest.raises(TypeError):
  1080. req.method = 1
  1081. def test_request_helpers(self):
  1082. assert HEADRequest('http://example.com').method == 'HEAD'
  1083. assert PUTRequest('http://example.com').method == 'PUT'
  1084. def test_headers(self):
  1085. req = Request('http://example.com', headers={'tesT': 'test'})
  1086. assert req.headers == HTTPHeaderDict({'test': 'test'})
  1087. req.update(headers={'teSt2': 'test2'})
  1088. assert req.headers == HTTPHeaderDict({'test': 'test', 'test2': 'test2'})
  1089. req.headers = new_headers = HTTPHeaderDict({'test': 'test'})
  1090. assert req.headers == HTTPHeaderDict({'test': 'test'})
  1091. assert req.headers is new_headers
  1092. # test converts dict to case insensitive dict
  1093. req.headers = new_headers = {'test2': 'test2'}
  1094. assert isinstance(req.headers, HTTPHeaderDict)
  1095. assert req.headers is not new_headers
  1096. with pytest.raises(TypeError):
  1097. req.headers = None
  1098. def test_data_type(self):
  1099. req = Request('http://example.com')
  1100. assert req.data is None
  1101. # test bytes is allowed
  1102. req.data = b'test'
  1103. assert req.data == b'test'
  1104. # test iterable of bytes is allowed
  1105. i = [b'test', b'test2']
  1106. req.data = i
  1107. assert req.data == i
  1108. # test file-like object is allowed
  1109. f = io.BytesIO(b'test')
  1110. req.data = f
  1111. assert req.data == f
  1112. # common mistake: test str not allowed
  1113. with pytest.raises(TypeError):
  1114. req.data = 'test'
  1115. assert req.data != 'test'
  1116. # common mistake: test dict is not allowed
  1117. with pytest.raises(TypeError):
  1118. req.data = {'test': 'test'}
  1119. assert req.data != {'test': 'test'}
  1120. def test_content_length_header(self):
  1121. req = Request('http://example.com', headers={'Content-Length': '0'}, data=b'')
  1122. assert req.headers.get('Content-Length') == '0'
  1123. req.data = b'test'
  1124. assert 'Content-Length' not in req.headers
  1125. req = Request('http://example.com', headers={'Content-Length': '10'})
  1126. assert 'Content-Length' not in req.headers
  1127. def test_content_type_header(self):
  1128. req = Request('http://example.com', headers={'Content-Type': 'test'}, data=b'test')
  1129. assert req.headers.get('Content-Type') == 'test'
  1130. req.data = b'test2'
  1131. assert req.headers.get('Content-Type') == 'test'
  1132. req.data = None
  1133. assert 'Content-Type' not in req.headers
  1134. req.data = b'test3'
  1135. assert req.headers.get('Content-Type') == 'application/x-www-form-urlencoded'
  1136. def test_update_req(self):
  1137. req = Request('http://example.com')
  1138. assert req.data is None
  1139. assert req.method == 'GET'
  1140. assert 'Content-Type' not in req.headers
  1141. # Test that zero-byte payloads will be sent
  1142. req.update(data=b'')
  1143. assert req.data == b''
  1144. assert req.method == 'POST'
  1145. assert req.headers.get('Content-Type') == 'application/x-www-form-urlencoded'
  1146. def test_proxies(self):
  1147. req = Request(url='http://example.com', proxies={'http': 'http://127.0.0.1:8080'})
  1148. assert req.proxies == {'http': 'http://127.0.0.1:8080'}
  1149. def test_extensions(self):
  1150. req = Request(url='http://example.com', extensions={'timeout': 2})
  1151. assert req.extensions == {'timeout': 2}
  1152. def test_copy(self):
  1153. req = Request(
  1154. url='http://example.com',
  1155. extensions={'cookiejar': CookieJar()},
  1156. headers={'Accept-Encoding': 'br'},
  1157. proxies={'http': 'http://127.0.0.1'},
  1158. data=[b'123']
  1159. )
  1160. req_copy = req.copy()
  1161. assert req_copy is not req
  1162. assert req_copy.url == req.url
  1163. assert req_copy.headers == req.headers
  1164. assert req_copy.headers is not req.headers
  1165. assert req_copy.proxies == req.proxies
  1166. assert req_copy.proxies is not req.proxies
  1167. # Data is not able to be copied
  1168. assert req_copy.data == req.data
  1169. assert req_copy.data is req.data
  1170. # Shallow copy extensions
  1171. assert req_copy.extensions is not req.extensions
  1172. assert req_copy.extensions['cookiejar'] == req.extensions['cookiejar']
  1173. # Subclasses are copied by default
  1174. class AnotherRequest(Request):
  1175. pass
  1176. req = AnotherRequest(url='http://127.0.0.1')
  1177. assert isinstance(req.copy(), AnotherRequest)
  1178. def test_url(self):
  1179. req = Request(url='https://фtest.example.com/ some spaceв?ä=c',)
  1180. assert req.url == 'https://xn--test-z6d.example.com/%20some%20space%D0%B2?%C3%A4=c'
  1181. assert Request(url='//example.com').url == 'http://example.com'
  1182. with pytest.raises(TypeError):
  1183. Request(url='https://').url = None
  1184. class TestResponse:
  1185. @pytest.mark.parametrize('reason,status,expected', [
  1186. ('custom', 200, 'custom'),
  1187. (None, 404, 'Not Found'), # fallback status
  1188. ('', 403, 'Forbidden'),
  1189. (None, 999, None)
  1190. ])
  1191. def test_reason(self, reason, status, expected):
  1192. res = Response(io.BytesIO(b''), url='test://', headers={}, status=status, reason=reason)
  1193. assert res.reason == expected
  1194. def test_headers(self):
  1195. headers = Message()
  1196. headers.add_header('Test', 'test')
  1197. headers.add_header('Test', 'test2')
  1198. headers.add_header('content-encoding', 'br')
  1199. res = Response(io.BytesIO(b''), headers=headers, url='test://')
  1200. assert res.headers.get_all('test') == ['test', 'test2']
  1201. assert 'Content-Encoding' in res.headers
  1202. def test_get_header(self):
  1203. headers = Message()
  1204. headers.add_header('Set-Cookie', 'cookie1')
  1205. headers.add_header('Set-cookie', 'cookie2')
  1206. headers.add_header('Test', 'test')
  1207. headers.add_header('Test', 'test2')
  1208. res = Response(io.BytesIO(b''), headers=headers, url='test://')
  1209. assert res.get_header('test') == 'test, test2'
  1210. assert res.get_header('set-Cookie') == 'cookie1'
  1211. assert res.get_header('notexist', 'default') == 'default'
  1212. def test_compat(self):
  1213. res = Response(io.BytesIO(b''), url='test://', status=404, headers={'test': 'test'})
  1214. with warnings.catch_warnings():
  1215. warnings.simplefilter('ignore', category=DeprecationWarning)
  1216. assert res.code == res.getcode() == res.status
  1217. assert res.geturl() == res.url
  1218. assert res.info() is res.headers
  1219. assert res.getheader('test') == res.get_header('test')