123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418 |
- from __future__ import annotations
- import contextlib
- import functools
- import http.client
- import logging
- import re
- import socket
- import warnings
- from ..dependencies import brotli, requests, urllib3
- from ..utils import bug_reports_message, int_or_none, variadic
- from ..utils.networking import normalize_url
- if requests is None:
- raise ImportError('requests module is not installed')
- if urllib3 is None:
- raise ImportError('urllib3 module is not installed')
- urllib3_version = tuple(int_or_none(x, default=0) for x in urllib3.__version__.split('.'))
- if urllib3_version < (1, 26, 17):
- raise ImportError('Only urllib3 >= 1.26.17 is supported')
- if requests.__build__ < 0x023202:
- raise ImportError('Only requests >= 2.32.2 is supported')
- import requests.adapters
- import requests.utils
- import urllib3.connection
- import urllib3.exceptions
- import urllib3.util
- from ._helper import (
- InstanceStoreMixin,
- add_accept_encoding_header,
- create_connection,
- create_socks_proxy_socket,
- get_redirect_method,
- make_socks_proxy_opts,
- select_proxy,
- )
- from .common import (
- Features,
- RequestHandler,
- Response,
- register_preference,
- register_rh,
- )
- from .exceptions import (
- CertificateVerifyError,
- HTTPError,
- IncompleteRead,
- ProxyError,
- RequestError,
- SSLError,
- TransportError,
- )
- from ..socks import ProxyError as SocksProxyError
- SUPPORTED_ENCODINGS = [
- 'gzip', 'deflate',
- ]
- if brotli is not None:
- SUPPORTED_ENCODINGS.append('br')
- '''
- Override urllib3's behavior to not convert lower-case percent-encoded characters
- to upper-case during url normalization process.
- RFC3986 defines that the lower or upper case percent-encoded hexidecimal characters are equivalent
- and normalizers should convert them to uppercase for consistency [1].
- However, some sites may have an incorrect implementation where they provide
- a percent-encoded url that is then compared case-sensitively.[2]
- While this is a very rare case, since urllib does not do this normalization step, it
- is best to avoid it in requests too for compatability reasons.
- 1: https://tools.ietf.org/html/rfc3986#section-2.1
- 2: https://github.com/streamlink/streamlink/pull/4003
- '''
- class Urllib3PercentREOverride:
- def __init__(self, r: re.Pattern):
- self.re = r
- # pass through all other attribute calls to the original re
- def __getattr__(self, item):
- return self.re.__getattribute__(item)
- def subn(self, repl, string, *args, **kwargs):
- return string, self.re.subn(repl, string, *args, **kwargs)[1]
- # urllib3 >= 1.25.8 uses subn:
- # https://github.com/urllib3/urllib3/commit/a2697e7c6b275f05879b60f593c5854a816489f0
- import urllib3.util.url
- if hasattr(urllib3.util.url, 'PERCENT_RE'):
- urllib3.util.url.PERCENT_RE = Urllib3PercentREOverride(urllib3.util.url.PERCENT_RE)
- elif hasattr(urllib3.util.url, '_PERCENT_RE'): # urllib3 >= 2.0.0
- urllib3.util.url._PERCENT_RE = Urllib3PercentREOverride(urllib3.util.url._PERCENT_RE)
- else:
- warnings.warn('Failed to patch PERCENT_RE in urllib3 (does the attribute exist?)' + bug_reports_message())
- '''
- Workaround for issue in urllib.util.ssl_.py: ssl_wrap_context does not pass
- server_hostname to SSLContext.wrap_socket if server_hostname is an IP,
- however this is an issue because we set check_hostname to True in our SSLContext.
- Monkey-patching IS_SECURETRANSPORT forces ssl_wrap_context to pass server_hostname regardless.
- This has been fixed in urllib3 2.0+.
- See: https://github.com/urllib3/urllib3/issues/517
- '''
- if urllib3_version < (2, 0, 0):
- with contextlib.suppress(Exception):
- urllib3.util.IS_SECURETRANSPORT = urllib3.util.ssl_.IS_SECURETRANSPORT = True
- # Requests will not automatically handle no_proxy by default
- # due to buggy no_proxy handling with proxy dict [1].
- # 1. https://github.com/psf/requests/issues/5000
- requests.adapters.select_proxy = select_proxy
- class RequestsResponseAdapter(Response):
- def __init__(self, res: requests.models.Response):
- super().__init__(
- fp=res.raw, headers=res.headers, url=res.url,
- status=res.status_code, reason=res.reason)
- self._requests_response = res
- def read(self, amt: int | None = None):
- try:
- # Interact with urllib3 response directly.
- return self.fp.read(amt, decode_content=True)
- # See urllib3.response.HTTPResponse.read() for exceptions raised on read
- except urllib3.exceptions.SSLError as e:
- raise SSLError(cause=e) from e
- except urllib3.exceptions.ProtocolError as e:
- # IncompleteRead is always contained within ProtocolError
- # See urllib3.response.HTTPResponse._error_catcher()
- ir_err = next(
- (err for err in (e.__context__, e.__cause__, *variadic(e.args))
- if isinstance(err, http.client.IncompleteRead)), None)
- if ir_err is not None:
- # `urllib3.exceptions.IncompleteRead` is subclass of `http.client.IncompleteRead`
- # but uses an `int` for its `partial` property.
- partial = ir_err.partial if isinstance(ir_err.partial, int) else len(ir_err.partial)
- raise IncompleteRead(partial=partial, expected=ir_err.expected) from e
- raise TransportError(cause=e) from e
- except urllib3.exceptions.HTTPError as e:
- # catch-all for any other urllib3 response exceptions
- raise TransportError(cause=e) from e
- class RequestsHTTPAdapter(requests.adapters.HTTPAdapter):
- def __init__(self, ssl_context=None, proxy_ssl_context=None, source_address=None, **kwargs):
- self._pm_args = {}
- if ssl_context:
- self._pm_args['ssl_context'] = ssl_context
- if source_address:
- self._pm_args['source_address'] = (source_address, 0)
- self._proxy_ssl_context = proxy_ssl_context or ssl_context
- super().__init__(**kwargs)
- def init_poolmanager(self, *args, **kwargs):
- return super().init_poolmanager(*args, **kwargs, **self._pm_args)
- def proxy_manager_for(self, proxy, **proxy_kwargs):
- extra_kwargs = {}
- if not proxy.lower().startswith('socks') and self._proxy_ssl_context:
- extra_kwargs['proxy_ssl_context'] = self._proxy_ssl_context
- return super().proxy_manager_for(proxy, **proxy_kwargs, **self._pm_args, **extra_kwargs)
- # Skip `requests` internal verification; we use our own SSLContext
- def cert_verify(*args, **kwargs):
- pass
- # requests 2.32.2+: Reimplementation without `_urllib3_request_context`
- def get_connection_with_tls_context(self, request, verify, proxies=None, cert=None):
- url = urllib3.util.parse_url(request.url).url
- manager = self.poolmanager
- if proxy := select_proxy(url, proxies):
- manager = self.proxy_manager_for(proxy)
- return manager.connection_from_url(url)
- class RequestsSession(requests.sessions.Session):
- """
- Ensure unified redirect method handling with our urllib redirect handler.
- """
- def rebuild_method(self, prepared_request, response):
- new_method = get_redirect_method(prepared_request.method, response.status_code)
- # HACK: requests removes headers/body on redirect unless code was a 307/308.
- if new_method == prepared_request.method:
- response._real_status_code = response.status_code
- response.status_code = 308
- prepared_request.method = new_method
- # Requests fails to resolve dot segments on absolute redirect locations
- # See: https://github.com/yt-dlp/yt-dlp/issues/9020
- prepared_request.url = normalize_url(prepared_request.url)
- def rebuild_auth(self, prepared_request, response):
- # HACK: undo status code change from rebuild_method, if applicable.
- # rebuild_auth runs after requests would remove headers/body based on status code
- if hasattr(response, '_real_status_code'):
- response.status_code = response._real_status_code
- del response._real_status_code
- return super().rebuild_auth(prepared_request, response)
- class Urllib3LoggingFilter(logging.Filter):
- def filter(self, record):
- # Ignore HTTP request messages since HTTPConnection prints those
- return record.msg != '%s://%s:%s "%s %s %s" %s %s'
- class Urllib3LoggingHandler(logging.Handler):
- """Redirect urllib3 logs to our logger"""
- def __init__(self, logger, *args, **kwargs):
- super().__init__(*args, **kwargs)
- self._logger = logger
- def emit(self, record):
- try:
- msg = self.format(record)
- if record.levelno >= logging.ERROR:
- self._logger.error(msg)
- else:
- self._logger.stdout(msg)
- except Exception:
- self.handleError(record)
- @register_rh
- class RequestsRH(RequestHandler, InstanceStoreMixin):
- """Requests RequestHandler
- https://github.com/psf/requests
- """
- _SUPPORTED_URL_SCHEMES = ('http', 'https')
- _SUPPORTED_ENCODINGS = tuple(SUPPORTED_ENCODINGS)
- _SUPPORTED_PROXY_SCHEMES = ('http', 'https', 'socks4', 'socks4a', 'socks5', 'socks5h')
- _SUPPORTED_FEATURES = (Features.NO_PROXY, Features.ALL_PROXY)
- RH_NAME = 'requests'
- def __init__(self, *args, **kwargs):
- super().__init__(*args, **kwargs)
- # Forward urllib3 debug messages to our logger
- logger = logging.getLogger('urllib3')
- self.__logging_handler = Urllib3LoggingHandler(logger=self._logger)
- self.__logging_handler.setFormatter(logging.Formatter('requests: %(message)s'))
- self.__logging_handler.addFilter(Urllib3LoggingFilter())
- logger.addHandler(self.__logging_handler)
- # TODO: Use a logger filter to suppress pool reuse warning instead
- logger.setLevel(logging.ERROR)
- if self.verbose:
- # Setting this globally is not ideal, but is easier than hacking with urllib3.
- # It could technically be problematic for scripts embedding yt-dlp.
- # However, it is unlikely debug traffic is used in that context in a way this will cause problems.
- urllib3.connection.HTTPConnection.debuglevel = 1
- logger.setLevel(logging.DEBUG)
- # this is expected if we are using --no-check-certificate
- urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
- def close(self):
- self._clear_instances()
- # Remove the logging handler that contains a reference to our logger
- # See: https://github.com/yt-dlp/yt-dlp/issues/8922
- logging.getLogger('urllib3').removeHandler(self.__logging_handler)
- def _check_extensions(self, extensions):
- super()._check_extensions(extensions)
- extensions.pop('cookiejar', None)
- extensions.pop('timeout', None)
- def _create_instance(self, cookiejar):
- session = RequestsSession()
- http_adapter = RequestsHTTPAdapter(
- ssl_context=self._make_sslcontext(),
- source_address=self.source_address,
- max_retries=urllib3.util.retry.Retry(False),
- )
- session.adapters.clear()
- session.headers = requests.models.CaseInsensitiveDict({'Connection': 'keep-alive'})
- session.mount('https://', http_adapter)
- session.mount('http://', http_adapter)
- session.cookies = cookiejar
- session.trust_env = False # no need, we already load proxies from env
- return session
- def _send(self, request):
- headers = self._merge_headers(request.headers)
- add_accept_encoding_header(headers, SUPPORTED_ENCODINGS)
- max_redirects_exceeded = False
- session = self._get_instance(cookiejar=self._get_cookiejar(request))
- try:
- requests_res = session.request(
- method=request.method,
- url=request.url,
- data=request.data,
- headers=headers,
- timeout=self._calculate_timeout(request),
- proxies=self._get_proxies(request),
- allow_redirects=True,
- stream=True,
- )
- except requests.exceptions.TooManyRedirects as e:
- max_redirects_exceeded = True
- requests_res = e.response
- except requests.exceptions.SSLError as e:
- if 'CERTIFICATE_VERIFY_FAILED' in str(e):
- raise CertificateVerifyError(cause=e) from e
- raise SSLError(cause=e) from e
- except requests.exceptions.ProxyError as e:
- raise ProxyError(cause=e) from e
- except (requests.exceptions.ConnectionError, requests.exceptions.Timeout) as e:
- raise TransportError(cause=e) from e
- except urllib3.exceptions.HTTPError as e:
- # Catch any urllib3 exceptions that may leak through
- raise TransportError(cause=e) from e
- except requests.exceptions.RequestException as e:
- # Miscellaneous Requests exceptions. May not necessary be network related e.g. InvalidURL
- raise RequestError(cause=e) from e
- res = RequestsResponseAdapter(requests_res)
- if not 200 <= res.status < 300:
- raise HTTPError(res, redirect_loop=max_redirects_exceeded)
- return res
- @register_preference(RequestsRH)
- def requests_preference(rh, request):
- return 100
- # Use our socks proxy implementation with requests to avoid an extra dependency.
- class SocksHTTPConnection(urllib3.connection.HTTPConnection):
- def __init__(self, _socks_options, *args, **kwargs): # must use _socks_options to pass PoolKey checks
- self._proxy_args = _socks_options
- super().__init__(*args, **kwargs)
- def _new_conn(self):
- try:
- return create_connection(
- address=(self._proxy_args['addr'], self._proxy_args['port']),
- timeout=self.timeout,
- source_address=self.source_address,
- _create_socket_func=functools.partial(
- create_socks_proxy_socket, (self.host, self.port), self._proxy_args))
- except (socket.timeout, TimeoutError) as e:
- raise urllib3.exceptions.ConnectTimeoutError(
- self, f'Connection to {self.host} timed out. (connect timeout={self.timeout})') from e
- except SocksProxyError as e:
- raise urllib3.exceptions.ProxyError(str(e), e) from e
- except OSError as e:
- raise urllib3.exceptions.NewConnectionError(
- self, f'Failed to establish a new connection: {e}') from e
- class SocksHTTPSConnection(SocksHTTPConnection, urllib3.connection.HTTPSConnection):
- pass
- class SocksHTTPConnectionPool(urllib3.HTTPConnectionPool):
- ConnectionCls = SocksHTTPConnection
- class SocksHTTPSConnectionPool(urllib3.HTTPSConnectionPool):
- ConnectionCls = SocksHTTPSConnection
- class SocksProxyManager(urllib3.PoolManager):
- def __init__(self, socks_proxy, username=None, password=None, num_pools=10, headers=None, **connection_pool_kw):
- connection_pool_kw['_socks_options'] = make_socks_proxy_opts(socks_proxy)
- super().__init__(num_pools, headers, **connection_pool_kw)
- self.pool_classes_by_scheme = {
- 'http': SocksHTTPConnectionPool,
- 'https': SocksHTTPSConnectionPool,
- }
- requests.adapters.SOCKSProxyManager = SocksProxyManager
|