common.py 21 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581
  1. from __future__ import annotations
  2. import abc
  3. import copy
  4. import enum
  5. import functools
  6. import io
  7. import typing
  8. import urllib.parse
  9. import urllib.request
  10. import urllib.response
  11. from collections.abc import Iterable, Mapping
  12. from email.message import Message
  13. from http import HTTPStatus
  14. from ._helper import make_ssl_context, wrap_request_errors
  15. from .exceptions import (
  16. NoSupportingHandlers,
  17. RequestError,
  18. TransportError,
  19. UnsupportedRequest,
  20. )
  21. from ..compat.types import NoneType
  22. from ..cookies import YoutubeDLCookieJar
  23. from ..utils import (
  24. bug_reports_message,
  25. classproperty,
  26. deprecation_warning,
  27. error_to_str,
  28. update_url_query,
  29. )
  30. from ..utils.networking import HTTPHeaderDict, normalize_url
  31. DEFAULT_TIMEOUT = 20
  32. def register_preference(*handlers: type[RequestHandler]):
  33. assert all(issubclass(handler, RequestHandler) for handler in handlers)
  34. def outer(preference: Preference):
  35. @functools.wraps(preference)
  36. def inner(handler, *args, **kwargs):
  37. if not handlers or isinstance(handler, handlers):
  38. return preference(handler, *args, **kwargs)
  39. return 0
  40. _RH_PREFERENCES.add(inner)
  41. return inner
  42. return outer
  43. class RequestDirector:
  44. """RequestDirector class
  45. Helper class that, when given a request, forward it to a RequestHandler that supports it.
  46. Preference functions in the form of func(handler, request) -> int
  47. can be registered into the `preferences` set. These are used to sort handlers
  48. in order of preference.
  49. @param logger: Logger instance.
  50. @param verbose: Print debug request information to stdout.
  51. """
  52. def __init__(self, logger, verbose=False):
  53. self.handlers: dict[str, RequestHandler] = {}
  54. self.preferences: set[Preference] = set()
  55. self.logger = logger # TODO(Grub4k): default logger
  56. self.verbose = verbose
  57. def close(self):
  58. for handler in self.handlers.values():
  59. handler.close()
  60. self.handlers.clear()
  61. def add_handler(self, handler: RequestHandler):
  62. """Add a handler. If a handler of the same RH_KEY exists, it will overwrite it"""
  63. assert isinstance(handler, RequestHandler), 'handler must be a RequestHandler'
  64. self.handlers[handler.RH_KEY] = handler
  65. def _get_handlers(self, request: Request) -> list[RequestHandler]:
  66. """Sorts handlers by preference, given a request"""
  67. preferences = {
  68. rh: sum(pref(rh, request) for pref in self.preferences)
  69. for rh in self.handlers.values()
  70. }
  71. self._print_verbose('Handler preferences for this request: {}'.format(', '.join(
  72. f'{rh.RH_NAME}={pref}' for rh, pref in preferences.items())))
  73. return sorted(self.handlers.values(), key=preferences.get, reverse=True)
  74. def _print_verbose(self, msg):
  75. if self.verbose:
  76. self.logger.stdout(f'director: {msg}')
  77. def send(self, request: Request) -> Response:
  78. """
  79. Passes a request onto a suitable RequestHandler
  80. """
  81. if not self.handlers:
  82. raise RequestError('No request handlers configured')
  83. assert isinstance(request, Request)
  84. unexpected_errors = []
  85. unsupported_errors = []
  86. for handler in self._get_handlers(request):
  87. self._print_verbose(f'Checking if "{handler.RH_NAME}" supports this request.')
  88. try:
  89. handler.validate(request)
  90. except UnsupportedRequest as e:
  91. self._print_verbose(
  92. f'"{handler.RH_NAME}" cannot handle this request (reason: {error_to_str(e)})')
  93. unsupported_errors.append(e)
  94. continue
  95. self._print_verbose(f'Sending request via "{handler.RH_NAME}"')
  96. try:
  97. response = handler.send(request)
  98. except RequestError:
  99. raise
  100. except Exception as e:
  101. self.logger.error(
  102. f'[{handler.RH_NAME}] Unexpected error: {error_to_str(e)}{bug_reports_message()}',
  103. is_error=False)
  104. unexpected_errors.append(e)
  105. continue
  106. assert isinstance(response, Response)
  107. return response
  108. raise NoSupportingHandlers(unsupported_errors, unexpected_errors)
  109. _REQUEST_HANDLERS = {}
  110. def register_rh(handler):
  111. """Register a RequestHandler class"""
  112. assert issubclass(handler, RequestHandler), f'{handler} must be a subclass of RequestHandler'
  113. assert handler.RH_KEY not in _REQUEST_HANDLERS, f'RequestHandler {handler.RH_KEY} already registered'
  114. _REQUEST_HANDLERS[handler.RH_KEY] = handler
  115. return handler
  116. class Features(enum.Enum):
  117. ALL_PROXY = enum.auto()
  118. NO_PROXY = enum.auto()
  119. class RequestHandler(abc.ABC):
  120. """Request Handler class
  121. Request handlers are class that, given a Request,
  122. process the request from start to finish and return a Response.
  123. Concrete subclasses need to redefine the _send(request) method,
  124. which handles the underlying request logic and returns a Response.
  125. RH_NAME class variable may contain a display name for the RequestHandler.
  126. By default, this is generated from the class name.
  127. The concrete request handler MUST have "RH" as the suffix in the class name.
  128. All exceptions raised by a RequestHandler should be an instance of RequestError.
  129. Any other exception raised will be treated as a handler issue.
  130. If a Request is not supported by the handler, an UnsupportedRequest
  131. should be raised with a reason.
  132. By default, some checks are done on the request in _validate() based on the following class variables:
  133. - `_SUPPORTED_URL_SCHEMES`: a tuple of supported url schemes.
  134. Any Request with an url scheme not in this list will raise an UnsupportedRequest.
  135. - `_SUPPORTED_PROXY_SCHEMES`: a tuple of support proxy url schemes. Any Request that contains
  136. a proxy url with an url scheme not in this list will raise an UnsupportedRequest.
  137. - `_SUPPORTED_FEATURES`: a tuple of supported features, as defined in Features enum.
  138. The above may be set to None to disable the checks.
  139. Parameters:
  140. @param logger: logger instance
  141. @param headers: HTTP Headers to include when sending requests.
  142. @param cookiejar: Cookiejar to use for requests.
  143. @param timeout: Socket timeout to use when sending requests.
  144. @param proxies: Proxies to use for sending requests.
  145. @param source_address: Client-side IP address to bind to for requests.
  146. @param verbose: Print debug request and traffic information to stdout.
  147. @param prefer_system_certs: Whether to prefer system certificates over other means (e.g. certifi).
  148. @param client_cert: SSL client certificate configuration.
  149. dict with {client_certificate, client_certificate_key, client_certificate_password}
  150. @param verify: Verify SSL certificates
  151. @param legacy_ssl_support: Enable legacy SSL options such as legacy server connect and older cipher support.
  152. Some configuration options may be available for individual Requests too. In this case,
  153. either the Request configuration option takes precedence or they are merged.
  154. Requests may have additional optional parameters defined as extensions.
  155. RequestHandler subclasses may choose to support custom extensions.
  156. If an extension is supported, subclasses should extend _check_extensions(extensions)
  157. to pop and validate the extension.
  158. - Extensions left in `extensions` are treated as unsupported and UnsupportedRequest will be raised.
  159. The following extensions are defined for RequestHandler:
  160. - `cookiejar`: Cookiejar to use for this request.
  161. - `timeout`: socket timeout to use for this request.
  162. To enable these, add extensions.pop('<extension>', None) to _check_extensions
  163. Apart from the url protocol, proxies dict may contain the following keys:
  164. - `all`: proxy to use for all protocols. Used as a fallback if no proxy is set for a specific protocol.
  165. - `no`: comma seperated list of hostnames (optionally with port) to not use a proxy for.
  166. Note: a RequestHandler may not support these, as defined in `_SUPPORTED_FEATURES`.
  167. """
  168. _SUPPORTED_URL_SCHEMES = ()
  169. _SUPPORTED_PROXY_SCHEMES = ()
  170. _SUPPORTED_FEATURES = ()
  171. def __init__(
  172. self, *,
  173. logger, # TODO(Grub4k): default logger
  174. headers: HTTPHeaderDict = None,
  175. cookiejar: YoutubeDLCookieJar = None,
  176. timeout: float | int | None = None,
  177. proxies: dict | None = None,
  178. source_address: str | None = None,
  179. verbose: bool = False,
  180. prefer_system_certs: bool = False,
  181. client_cert: dict[str, str | None] | None = None,
  182. verify: bool = True,
  183. legacy_ssl_support: bool = False,
  184. **_,
  185. ):
  186. self._logger = logger
  187. self.headers = headers or {}
  188. self.cookiejar = cookiejar if cookiejar is not None else YoutubeDLCookieJar()
  189. self.timeout = float(timeout or DEFAULT_TIMEOUT)
  190. self.proxies = proxies or {}
  191. self.source_address = source_address
  192. self.verbose = verbose
  193. self.prefer_system_certs = prefer_system_certs
  194. self._client_cert = client_cert or {}
  195. self.verify = verify
  196. self.legacy_ssl_support = legacy_ssl_support
  197. super().__init__()
  198. def _make_sslcontext(self):
  199. return make_ssl_context(
  200. verify=self.verify,
  201. legacy_support=self.legacy_ssl_support,
  202. use_certifi=not self.prefer_system_certs,
  203. **self._client_cert,
  204. )
  205. def _merge_headers(self, request_headers):
  206. return HTTPHeaderDict(self.headers, request_headers)
  207. def _calculate_timeout(self, request):
  208. return float(request.extensions.get('timeout') or self.timeout)
  209. def _get_cookiejar(self, request):
  210. return request.extensions.get('cookiejar') or self.cookiejar
  211. def _get_proxies(self, request):
  212. return (request.proxies or self.proxies).copy()
  213. def _check_url_scheme(self, request: Request):
  214. scheme = urllib.parse.urlparse(request.url).scheme.lower()
  215. if self._SUPPORTED_URL_SCHEMES is not None and scheme not in self._SUPPORTED_URL_SCHEMES:
  216. raise UnsupportedRequest(f'Unsupported url scheme: "{scheme}"')
  217. return scheme # for further processing
  218. def _check_proxies(self, proxies):
  219. for proxy_key, proxy_url in proxies.items():
  220. if proxy_url is None:
  221. continue
  222. if proxy_key == 'no':
  223. if self._SUPPORTED_FEATURES is not None and Features.NO_PROXY not in self._SUPPORTED_FEATURES:
  224. raise UnsupportedRequest('"no" proxy is not supported')
  225. continue
  226. if (
  227. proxy_key == 'all'
  228. and self._SUPPORTED_FEATURES is not None
  229. and Features.ALL_PROXY not in self._SUPPORTED_FEATURES
  230. ):
  231. raise UnsupportedRequest('"all" proxy is not supported')
  232. # Unlikely this handler will use this proxy, so ignore.
  233. # This is to allow a case where a proxy may be set for a protocol
  234. # for one handler in which such protocol (and proxy) is not supported by another handler.
  235. if self._SUPPORTED_URL_SCHEMES is not None and proxy_key not in (*self._SUPPORTED_URL_SCHEMES, 'all'):
  236. continue
  237. if self._SUPPORTED_PROXY_SCHEMES is None:
  238. # Skip proxy scheme checks
  239. continue
  240. try:
  241. if urllib.request._parse_proxy(proxy_url)[0] is None:
  242. # Scheme-less proxies are not supported
  243. raise UnsupportedRequest(f'Proxy "{proxy_url}" missing scheme')
  244. except ValueError as e:
  245. # parse_proxy may raise on some invalid proxy urls such as "/a/b/c"
  246. raise UnsupportedRequest(f'Invalid proxy url "{proxy_url}": {e}')
  247. scheme = urllib.parse.urlparse(proxy_url).scheme.lower()
  248. if scheme not in self._SUPPORTED_PROXY_SCHEMES:
  249. raise UnsupportedRequest(f'Unsupported proxy type: "{scheme}"')
  250. def _check_extensions(self, extensions):
  251. """Check extensions for unsupported extensions. Subclasses should extend this."""
  252. assert isinstance(extensions.get('cookiejar'), (YoutubeDLCookieJar, NoneType))
  253. assert isinstance(extensions.get('timeout'), (float, int, NoneType))
  254. def _validate(self, request):
  255. self._check_url_scheme(request)
  256. self._check_proxies(request.proxies or self.proxies)
  257. extensions = request.extensions.copy()
  258. self._check_extensions(extensions)
  259. if extensions:
  260. # TODO: add support for optional extensions
  261. raise UnsupportedRequest(f'Unsupported extensions: {", ".join(extensions.keys())}')
  262. @wrap_request_errors
  263. def validate(self, request: Request):
  264. if not isinstance(request, Request):
  265. raise TypeError('Expected an instance of Request')
  266. self._validate(request)
  267. @wrap_request_errors
  268. def send(self, request: Request) -> Response:
  269. if not isinstance(request, Request):
  270. raise TypeError('Expected an instance of Request')
  271. return self._send(request)
  272. @abc.abstractmethod
  273. def _send(self, request: Request):
  274. """Handle a request from start to finish. Redefine in subclasses."""
  275. pass
  276. def close(self): # noqa: B027
  277. pass
  278. @classproperty
  279. def RH_NAME(cls):
  280. return cls.__name__[:-2]
  281. @classproperty
  282. def RH_KEY(cls):
  283. assert cls.__name__.endswith('RH'), 'RequestHandler class names must end with "RH"'
  284. return cls.__name__[:-2]
  285. def __enter__(self):
  286. return self
  287. def __exit__(self, *args):
  288. self.close()
  289. class Request:
  290. """
  291. Represents a request to be made.
  292. Partially backwards-compatible with urllib.request.Request.
  293. @param url: url to send. Will be sanitized.
  294. @param data: payload data to send. Must be bytes, iterable of bytes, a file-like object or None
  295. @param headers: headers to send.
  296. @param proxies: proxy dict mapping of proto:proxy to use for the request and any redirects.
  297. @param query: URL query parameters to update the url with.
  298. @param method: HTTP method to use. If no method specified, will use POST if payload data is present else GET
  299. @param extensions: Dictionary of Request extensions to add, as supported by handlers.
  300. """
  301. def __init__(
  302. self,
  303. url: str,
  304. data: RequestData = None,
  305. headers: typing.Mapping | None = None,
  306. proxies: dict | None = None,
  307. query: dict | None = None,
  308. method: str | None = None,
  309. extensions: dict | None = None,
  310. ):
  311. self._headers = HTTPHeaderDict()
  312. self._data = None
  313. if query:
  314. url = update_url_query(url, query)
  315. self.url = url
  316. self.method = method
  317. if headers:
  318. self.headers = headers
  319. self.data = data # note: must be done after setting headers
  320. self.proxies = proxies or {}
  321. self.extensions = extensions or {}
  322. @property
  323. def url(self):
  324. return self._url
  325. @url.setter
  326. def url(self, url):
  327. if not isinstance(url, str):
  328. raise TypeError('url must be a string')
  329. elif url.startswith('//'):
  330. url = 'http:' + url
  331. self._url = normalize_url(url)
  332. @property
  333. def method(self):
  334. return self._method or ('POST' if self.data is not None else 'GET')
  335. @method.setter
  336. def method(self, method):
  337. if method is None:
  338. self._method = None
  339. elif isinstance(method, str):
  340. self._method = method.upper()
  341. else:
  342. raise TypeError('method must be a string')
  343. @property
  344. def data(self):
  345. return self._data
  346. @data.setter
  347. def data(self, data: RequestData):
  348. # Try catch some common mistakes
  349. if data is not None and (
  350. not isinstance(data, (bytes, io.IOBase, Iterable)) or isinstance(data, (str, Mapping))
  351. ):
  352. raise TypeError('data must be bytes, iterable of bytes, or a file-like object')
  353. if data == self._data and self._data is None:
  354. self.headers.pop('Content-Length', None)
  355. # https://docs.python.org/3/library/urllib.request.html#urllib.request.Request.data
  356. if data != self._data:
  357. if self._data is not None:
  358. self.headers.pop('Content-Length', None)
  359. self._data = data
  360. if self._data is None:
  361. self.headers.pop('Content-Type', None)
  362. if 'Content-Type' not in self.headers and self._data is not None:
  363. self.headers['Content-Type'] = 'application/x-www-form-urlencoded'
  364. @property
  365. def headers(self) -> HTTPHeaderDict:
  366. return self._headers
  367. @headers.setter
  368. def headers(self, new_headers: Mapping):
  369. """Replaces headers of the request. If not a HTTPHeaderDict, it will be converted to one."""
  370. if isinstance(new_headers, HTTPHeaderDict):
  371. self._headers = new_headers
  372. elif isinstance(new_headers, Mapping):
  373. self._headers = HTTPHeaderDict(new_headers)
  374. else:
  375. raise TypeError('headers must be a mapping')
  376. def update(self, url=None, data=None, headers=None, query=None, extensions=None):
  377. self.data = data if data is not None else self.data
  378. self.headers.update(headers or {})
  379. self.extensions.update(extensions or {})
  380. self.url = update_url_query(url or self.url, query or {})
  381. def copy(self):
  382. return self.__class__(
  383. url=self.url,
  384. headers=copy.deepcopy(self.headers),
  385. proxies=copy.deepcopy(self.proxies),
  386. data=self._data,
  387. extensions=copy.copy(self.extensions),
  388. method=self._method,
  389. )
  390. HEADRequest = functools.partial(Request, method='HEAD')
  391. PUTRequest = functools.partial(Request, method='PUT')
  392. class Response(io.IOBase):
  393. """
  394. Base class for HTTP response adapters.
  395. By default, it provides a basic wrapper for a file-like response object.
  396. Interface partially backwards-compatible with addinfourl and http.client.HTTPResponse.
  397. @param fp: Original, file-like, response.
  398. @param url: URL that this is a response of.
  399. @param headers: response headers.
  400. @param status: Response HTTP status code. Default is 200 OK.
  401. @param reason: HTTP status reason. Will use built-in reasons based on status code if not provided.
  402. @param extensions: Dictionary of handler-specific response extensions.
  403. """
  404. def __init__(
  405. self,
  406. fp: io.IOBase,
  407. url: str,
  408. headers: Mapping[str, str],
  409. status: int = 200,
  410. reason: str | None = None,
  411. extensions: dict | None = None,
  412. ):
  413. self.fp = fp
  414. self.headers = Message()
  415. for name, value in headers.items():
  416. self.headers.add_header(name, value)
  417. self.status = status
  418. self.url = url
  419. try:
  420. self.reason = reason or HTTPStatus(status).phrase
  421. except ValueError:
  422. self.reason = None
  423. self.extensions = extensions or {}
  424. def readable(self):
  425. return self.fp.readable()
  426. def read(self, amt: int | None = None) -> bytes:
  427. # Expected errors raised here should be of type RequestError or subclasses.
  428. # Subclasses should redefine this method with more precise error handling.
  429. try:
  430. return self.fp.read(amt)
  431. except Exception as e:
  432. raise TransportError(cause=e) from e
  433. def close(self):
  434. self.fp.close()
  435. return super().close()
  436. def get_header(self, name, default=None):
  437. """Get header for name.
  438. If there are multiple matching headers, return all seperated by comma."""
  439. headers = self.headers.get_all(name)
  440. if not headers:
  441. return default
  442. if name.title() == 'Set-Cookie':
  443. # Special case, only get the first one
  444. # https://www.rfc-editor.org/rfc/rfc9110.html#section-5.3-4.1
  445. return headers[0]
  446. return ', '.join(headers)
  447. # The following methods are for compatability reasons and are deprecated
  448. @property
  449. def code(self):
  450. deprecation_warning('Response.code is deprecated, use Response.status', stacklevel=2)
  451. return self.status
  452. def getcode(self):
  453. deprecation_warning('Response.getcode() is deprecated, use Response.status', stacklevel=2)
  454. return self.status
  455. def geturl(self):
  456. deprecation_warning('Response.geturl() is deprecated, use Response.url', stacklevel=2)
  457. return self.url
  458. def info(self):
  459. deprecation_warning('Response.info() is deprecated, use Response.headers', stacklevel=2)
  460. return self.headers
  461. def getheader(self, name, default=None):
  462. deprecation_warning('Response.getheader() is deprecated, use Response.get_header', stacklevel=2)
  463. return self.get_header(name, default)
  464. if typing.TYPE_CHECKING:
  465. RequestData = bytes | Iterable[bytes] | typing.IO | None
  466. Preference = typing.Callable[[RequestHandler, Request], int]
  467. _RH_PREFERENCES: set[Preference] = set()