client.py 58 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815
  1. # -*- test-case-name: twisted.web.test.test_webclient,twisted.web.test.test_agent -*-
  2. # Copyright (c) Twisted Matrix Laboratories.
  3. # See LICENSE for details.
  4. """
  5. HTTP client.
  6. """
  7. from __future__ import annotations
  8. import collections
  9. import os
  10. import warnings
  11. import zlib
  12. from dataclasses import dataclass
  13. from functools import wraps
  14. from http.cookiejar import CookieJar
  15. from typing import TYPE_CHECKING, Iterable, Optional
  16. from urllib.parse import urldefrag, urljoin, urlunparse as _urlunparse
  17. from zope.interface import implementer
  18. from incremental import Version
  19. from twisted.internet import defer, protocol, task
  20. from twisted.internet.abstract import isIPv6Address
  21. from twisted.internet.defer import Deferred
  22. from twisted.internet.endpoints import HostnameEndpoint, wrapClientTLS
  23. from twisted.internet.interfaces import IOpenSSLContextFactory, IProtocol
  24. from twisted.logger import Logger
  25. from twisted.python.compat import nativeString, networkString
  26. from twisted.python.components import proxyForInterface
  27. from twisted.python.deprecate import (
  28. deprecatedModuleAttribute,
  29. getDeprecationWarningString,
  30. )
  31. from twisted.python.failure import Failure
  32. from twisted.web import error, http
  33. from twisted.web._newclient import _ensureValidMethod, _ensureValidURI
  34. from twisted.web.http_headers import Headers
  35. from twisted.web.iweb import (
  36. UNKNOWN_LENGTH,
  37. IAgent,
  38. IAgentEndpointFactory,
  39. IBodyProducer,
  40. IPolicyForHTTPS,
  41. IResponse,
  42. )
  43. # For the purpose of type-checking we want our faked-out types to be identical to the types they are replacing.
  44. # For the purpose of the impementation, we want to start
  45. # with a blank slate so that we don't accidentally use
  46. # any of the real implementation.
  47. if TYPE_CHECKING:
  48. from email.message import EmailMessage as _InfoType
  49. from http.client import HTTPResponse as _ResponseBase
  50. from urllib.request import Request as _RequestBase
  51. else:
  52. _RequestBase = object
  53. _ResponseBase = object
  54. _InfoType = object
  55. def urlunparse(parts):
  56. result = _urlunparse(tuple(p.decode("charmap") for p in parts))
  57. return result.encode("charmap")
  58. class PartialDownloadError(error.Error):
  59. """
  60. Page was only partially downloaded, we got disconnected in middle.
  61. @ivar response: All of the response body which was downloaded.
  62. """
  63. class URI:
  64. """
  65. A URI object.
  66. @see: U{https://tools.ietf.org/html/draft-ietf-httpbis-p1-messaging-21}
  67. """
  68. def __init__(self, scheme, netloc, host, port, path, params, query, fragment):
  69. """
  70. @type scheme: L{bytes}
  71. @param scheme: URI scheme specifier.
  72. @type netloc: L{bytes}
  73. @param netloc: Network location component.
  74. @type host: L{bytes}
  75. @param host: Host name. For IPv6 address literals the brackets are
  76. stripped.
  77. @type port: L{int}
  78. @param port: Port number.
  79. @type path: L{bytes}
  80. @param path: Hierarchical path.
  81. @type params: L{bytes}
  82. @param params: Parameters for last path segment.
  83. @type query: L{bytes}
  84. @param query: Query string.
  85. @type fragment: L{bytes}
  86. @param fragment: Fragment identifier.
  87. """
  88. self.scheme = scheme
  89. self.netloc = netloc
  90. self.host = host.strip(b"[]")
  91. self.port = port
  92. self.path = path
  93. self.params = params
  94. self.query = query
  95. self.fragment = fragment
  96. @classmethod
  97. def fromBytes(cls, uri, defaultPort=None):
  98. """
  99. Parse the given URI into a L{URI}.
  100. @type uri: C{bytes}
  101. @param uri: URI to parse.
  102. @type defaultPort: C{int} or L{None}
  103. @param defaultPort: An alternate value to use as the port if the URI
  104. does not include one.
  105. @rtype: L{URI}
  106. @return: Parsed URI instance.
  107. """
  108. uri = uri.strip()
  109. scheme, netloc, path, params, query, fragment = http.urlparse(uri)
  110. if defaultPort is None:
  111. if scheme == b"https":
  112. defaultPort = 443
  113. else:
  114. defaultPort = 80
  115. if b":" in netloc:
  116. host, port = netloc.rsplit(b":", 1)
  117. try:
  118. port = int(port)
  119. except ValueError:
  120. host, port = netloc, defaultPort
  121. else:
  122. host, port = netloc, defaultPort
  123. return cls(scheme, netloc, host, port, path, params, query, fragment)
  124. def toBytes(self):
  125. """
  126. Assemble the individual parts of the I{URI} into a fully formed I{URI}.
  127. @rtype: C{bytes}
  128. @return: A fully formed I{URI}.
  129. """
  130. return urlunparse(
  131. (
  132. self.scheme,
  133. self.netloc,
  134. self.path,
  135. self.params,
  136. self.query,
  137. self.fragment,
  138. )
  139. )
  140. @property
  141. def originForm(self):
  142. """
  143. The absolute I{URI} path including I{URI} parameters, query string and
  144. fragment identifier.
  145. @see: U{https://tools.ietf.org/html/draft-ietf-httpbis-p1-messaging-21#section-5.3}
  146. @return: The absolute path in original form.
  147. @rtype: L{bytes}
  148. """
  149. # The HTTP bis draft says the origin form should not include the
  150. # fragment.
  151. path = urlunparse((b"", b"", self.path, self.params, self.query, b""))
  152. if path == b"":
  153. path = b"/"
  154. return path
  155. def _urljoin(base, url):
  156. """
  157. Construct a full ("absolute") URL by combining a "base URL" with another
  158. URL. Informally, this uses components of the base URL, in particular the
  159. addressing scheme, the network location and (part of) the path, to provide
  160. missing components in the relative URL.
  161. Additionally, the fragment identifier is preserved according to the HTTP
  162. 1.1 bis draft.
  163. @type base: C{bytes}
  164. @param base: Base URL.
  165. @type url: C{bytes}
  166. @param url: URL to combine with C{base}.
  167. @return: An absolute URL resulting from the combination of C{base} and
  168. C{url}.
  169. @see: L{urllib.parse.urljoin()}
  170. @see: U{https://tools.ietf.org/html/draft-ietf-httpbis-p2-semantics-22#section-7.1.2}
  171. """
  172. base, baseFrag = urldefrag(base)
  173. url, urlFrag = urldefrag(urljoin(base, url))
  174. return urljoin(url, b"#" + (urlFrag or baseFrag))
  175. def _makeGetterFactory(url, factoryFactory, contextFactory=None, *args, **kwargs):
  176. """
  177. Create and connect an HTTP page getting factory.
  178. Any additional positional or keyword arguments are used when calling
  179. C{factoryFactory}.
  180. @param factoryFactory: Factory factory that is called with C{url}, C{args}
  181. and C{kwargs} to produce the getter
  182. @param contextFactory: Context factory to use when creating a secure
  183. connection, defaulting to L{None}
  184. @return: The factory created by C{factoryFactory}
  185. """
  186. uri = URI.fromBytes(_ensureValidURI(url.strip()))
  187. factory = factoryFactory(url, *args, **kwargs)
  188. from twisted.internet import reactor
  189. if uri.scheme == b"https":
  190. from twisted.internet import ssl
  191. if contextFactory is None:
  192. contextFactory = ssl.ClientContextFactory()
  193. reactor.connectSSL(nativeString(uri.host), uri.port, factory, contextFactory)
  194. else:
  195. reactor.connectTCP(nativeString(uri.host), uri.port, factory)
  196. return factory
  197. # The code which follows is based on the new HTTP client implementation. It
  198. # should be significantly better than anything above, though it is not yet
  199. # feature equivalent.
  200. from twisted.web._newclient import (
  201. HTTP11ClientProtocol,
  202. PotentialDataLoss,
  203. Request,
  204. RequestGenerationFailed,
  205. RequestNotSent,
  206. RequestTransmissionFailed,
  207. Response,
  208. ResponseDone,
  209. ResponseFailed,
  210. ResponseNeverReceived,
  211. _WrapperException,
  212. )
  213. from twisted.web.error import SchemeNotSupported
  214. try:
  215. from OpenSSL import SSL
  216. except ImportError:
  217. SSL = None # type: ignore[assignment]
  218. else:
  219. from twisted.internet.ssl import (
  220. CertificateOptions,
  221. optionsForClientTLS,
  222. platformTrust,
  223. )
  224. def _requireSSL(decoratee):
  225. """
  226. The decorated method requires pyOpenSSL to be present, or it raises
  227. L{NotImplementedError}.
  228. @param decoratee: A function which requires pyOpenSSL.
  229. @type decoratee: L{callable}
  230. @return: A function which raises L{NotImplementedError} if pyOpenSSL is not
  231. installed; otherwise, if it is installed, simply return C{decoratee}.
  232. @rtype: L{callable}
  233. """
  234. if SSL is None:
  235. @wraps(decoratee)
  236. def raiseNotImplemented(*a, **kw):
  237. """
  238. pyOpenSSL is not available.
  239. @param a: The positional arguments for C{decoratee}.
  240. @param kw: The keyword arguments for C{decoratee}.
  241. @raise NotImplementedError: Always.
  242. """
  243. raise NotImplementedError("SSL support unavailable")
  244. return raiseNotImplemented
  245. return decoratee
  246. class WebClientContextFactory:
  247. """
  248. This class is deprecated. Please simply use L{Agent} as-is, or if you want
  249. to customize something, use L{BrowserLikePolicyForHTTPS}.
  250. A L{WebClientContextFactory} is an HTTPS policy which totally ignores the
  251. hostname and port. It performs basic certificate verification, however the
  252. lack of validation of service identity (e.g. hostname validation) means it
  253. is still vulnerable to man-in-the-middle attacks. Don't use it any more.
  254. """
  255. def _getCertificateOptions(self, hostname, port):
  256. """
  257. Return a L{CertificateOptions}.
  258. @param hostname: ignored
  259. @param port: ignored
  260. @return: A new CertificateOptions instance.
  261. @rtype: L{CertificateOptions}
  262. """
  263. return CertificateOptions(method=SSL.SSLv23_METHOD, trustRoot=platformTrust())
  264. @_requireSSL
  265. def getContext(self, hostname, port):
  266. """
  267. Return an L{OpenSSL.SSL.Context}.
  268. @param hostname: ignored
  269. @param port: ignored
  270. @return: A new SSL context.
  271. @rtype: L{OpenSSL.SSL.Context}
  272. """
  273. return self._getCertificateOptions(hostname, port).getContext()
  274. @implementer(IPolicyForHTTPS)
  275. class BrowserLikePolicyForHTTPS:
  276. """
  277. SSL connection creator for web clients.
  278. """
  279. def __init__(self, trustRoot=None):
  280. self._trustRoot = trustRoot
  281. @_requireSSL
  282. def creatorForNetloc(self, hostname, port):
  283. """
  284. Create a L{client connection creator
  285. <twisted.internet.interfaces.IOpenSSLClientConnectionCreator>} for a
  286. given network location.
  287. @param hostname: The hostname part of the URI.
  288. @type hostname: L{bytes}
  289. @param port: The port part of the URI.
  290. @type port: L{int}
  291. @return: a connection creator with appropriate verification
  292. restrictions set
  293. @rtype: L{client connection creator
  294. <twisted.internet.interfaces.IOpenSSLClientConnectionCreator>}
  295. """
  296. return optionsForClientTLS(hostname.decode("ascii"), trustRoot=self._trustRoot)
  297. deprecatedModuleAttribute(
  298. Version("Twisted", 14, 0, 0),
  299. getDeprecationWarningString(
  300. WebClientContextFactory,
  301. Version("Twisted", 14, 0, 0),
  302. replacement=BrowserLikePolicyForHTTPS,
  303. ).split("; ")[1],
  304. WebClientContextFactory.__module__,
  305. WebClientContextFactory.__name__,
  306. )
  307. @implementer(IPolicyForHTTPS)
  308. class HostnameCachingHTTPSPolicy:
  309. """
  310. IPolicyForHTTPS that wraps a L{IPolicyForHTTPS} and caches the created
  311. L{IOpenSSLClientConnectionCreator}.
  312. This policy will cache up to C{cacheSize}
  313. L{client connection creators <twisted.internet.interfaces.
  314. IOpenSSLClientConnectionCreator>} for reuse in subsequent requests to
  315. the same hostname.
  316. @ivar _policyForHTTPS: See C{policyforHTTPS} parameter of L{__init__}.
  317. @ivar _cache: A cache associating hostnames to their
  318. L{client connection creators <twisted.internet.interfaces.
  319. IOpenSSLClientConnectionCreator>}.
  320. @type _cache: L{collections.OrderedDict}
  321. @ivar _cacheSize: See C{cacheSize} parameter of L{__init__}.
  322. @since: Twisted 19.2.0
  323. """
  324. def __init__(self, policyforHTTPS, cacheSize=20):
  325. """
  326. @param policyforHTTPS: The IPolicyForHTTPS to wrap.
  327. @type policyforHTTPS: L{IPolicyForHTTPS}
  328. @param cacheSize: The maximum size of the hostname cache.
  329. @type cacheSize: L{int}
  330. """
  331. self._policyForHTTPS = policyforHTTPS
  332. self._cache = collections.OrderedDict()
  333. self._cacheSize = cacheSize
  334. def creatorForNetloc(self, hostname, port):
  335. """
  336. Create a L{client connection creator
  337. <twisted.internet.interfaces.IOpenSSLClientConnectionCreator>} for a
  338. given network location and cache it for future use.
  339. @param hostname: The hostname part of the URI.
  340. @type hostname: L{bytes}
  341. @param port: The port part of the URI.
  342. @type port: L{int}
  343. @return: a connection creator with appropriate verification
  344. restrictions set
  345. @rtype: L{client connection creator
  346. <twisted.internet.interfaces.IOpenSSLClientConnectionCreator>}
  347. """
  348. host = hostname.decode("ascii")
  349. try:
  350. creator = self._cache.pop(host)
  351. except KeyError:
  352. creator = self._policyForHTTPS.creatorForNetloc(hostname, port)
  353. self._cache[host] = creator
  354. if len(self._cache) > self._cacheSize:
  355. self._cache.popitem(last=False)
  356. return creator
  357. @implementer(IOpenSSLContextFactory)
  358. class _ContextFactoryWithContext:
  359. """
  360. A L{_ContextFactoryWithContext} is like a
  361. L{twisted.internet.ssl.ContextFactory} with a pre-created context.
  362. @ivar _context: A Context.
  363. @type _context: L{OpenSSL.SSL.Context}
  364. """
  365. def __init__(self, context):
  366. """
  367. Initialize a L{_ContextFactoryWithContext} with a context.
  368. @param context: An SSL context.
  369. @type context: L{OpenSSL.SSL.Context}
  370. """
  371. self._context = context
  372. def getContext(self):
  373. """
  374. Return the context created by
  375. L{_DeprecatedToCurrentPolicyForHTTPS._webContextFactory}.
  376. @return: A context.
  377. @rtype: L{OpenSSL.SSL.Context}
  378. """
  379. return self._context
  380. @implementer(IPolicyForHTTPS)
  381. class _DeprecatedToCurrentPolicyForHTTPS:
  382. """
  383. Adapt a web context factory to a normal context factory.
  384. @ivar _webContextFactory: An object providing a getContext method with
  385. C{hostname} and C{port} arguments.
  386. @type _webContextFactory: L{WebClientContextFactory} (or object with a
  387. similar C{getContext} method).
  388. """
  389. def __init__(self, webContextFactory):
  390. """
  391. Wrap a web context factory in an L{IPolicyForHTTPS}.
  392. @param webContextFactory: An object providing a getContext method with
  393. C{hostname} and C{port} arguments.
  394. @type webContextFactory: L{WebClientContextFactory} (or object with a
  395. similar C{getContext} method).
  396. """
  397. self._webContextFactory = webContextFactory
  398. def creatorForNetloc(self, hostname, port):
  399. """
  400. Called the wrapped web context factory's C{getContext} method with a
  401. hostname and port number and return the resulting context object.
  402. @param hostname: The hostname part of the URI.
  403. @type hostname: L{bytes}
  404. @param port: The port part of the URI.
  405. @type port: L{int}
  406. @return: A context factory.
  407. @rtype: L{IOpenSSLContextFactory}
  408. """
  409. context = self._webContextFactory.getContext(hostname, port)
  410. return _ContextFactoryWithContext(context)
  411. @implementer(IBodyProducer)
  412. class FileBodyProducer:
  413. """
  414. L{FileBodyProducer} produces bytes from an input file object incrementally
  415. and writes them to a consumer.
  416. Since file-like objects cannot be read from in an event-driven manner,
  417. L{FileBodyProducer} uses a L{Cooperator} instance to schedule reads from
  418. the file. This process is also paused and resumed based on notifications
  419. from the L{IConsumer} provider being written to.
  420. The file is closed after it has been read, or if the producer is stopped
  421. early.
  422. @ivar _inputFile: Any file-like object, bytes read from which will be
  423. written to a consumer.
  424. @ivar _cooperate: A method like L{Cooperator.cooperate} which is used to
  425. schedule all reads.
  426. @ivar _readSize: The number of bytes to read from C{_inputFile} at a time.
  427. """
  428. def __init__(self, inputFile, cooperator=task, readSize=2**16):
  429. self._inputFile = inputFile
  430. self._cooperate = cooperator.cooperate
  431. self._readSize = readSize
  432. self.length = self._determineLength(inputFile)
  433. def _determineLength(self, fObj):
  434. """
  435. Determine how many bytes can be read out of C{fObj} (assuming it is not
  436. modified from this point on). If the determination cannot be made,
  437. return C{UNKNOWN_LENGTH}.
  438. """
  439. try:
  440. seek = fObj.seek
  441. tell = fObj.tell
  442. except AttributeError:
  443. return UNKNOWN_LENGTH
  444. originalPosition = tell()
  445. seek(0, os.SEEK_END)
  446. end = tell()
  447. seek(originalPosition, os.SEEK_SET)
  448. return end - originalPosition
  449. def stopProducing(self):
  450. """
  451. Permanently stop writing bytes from the file to the consumer by
  452. stopping the underlying L{CooperativeTask}.
  453. """
  454. self._inputFile.close()
  455. try:
  456. self._task.stop()
  457. except task.TaskFinished:
  458. pass
  459. def startProducing(self, consumer):
  460. """
  461. Start a cooperative task which will read bytes from the input file and
  462. write them to C{consumer}. Return a L{Deferred} which fires after all
  463. bytes have been written. If this L{Deferred} is cancelled before it is
  464. fired, stop reading and writing bytes.
  465. @param consumer: Any L{IConsumer} provider
  466. """
  467. self._task = self._cooperate(self._writeloop(consumer))
  468. d = self._task.whenDone()
  469. def maybeStopped(reason):
  470. if reason.check(defer.CancelledError):
  471. self.stopProducing()
  472. elif reason.check(task.TaskStopped):
  473. pass
  474. else:
  475. return reason
  476. # IBodyProducer.startProducing's Deferred isn't supposed to fire if
  477. # stopProducing is called.
  478. return defer.Deferred()
  479. d.addCallbacks(lambda ignored: None, maybeStopped)
  480. return d
  481. def _writeloop(self, consumer):
  482. """
  483. Return an iterator which reads one chunk of bytes from the input file
  484. and writes them to the consumer for each time it is iterated.
  485. """
  486. while True:
  487. bytes = self._inputFile.read(self._readSize)
  488. if not bytes:
  489. self._inputFile.close()
  490. break
  491. consumer.write(bytes)
  492. yield None
  493. def pauseProducing(self):
  494. """
  495. Temporarily suspend copying bytes from the input file to the consumer
  496. by pausing the L{CooperativeTask} which drives that activity.
  497. """
  498. self._task.pause()
  499. def resumeProducing(self):
  500. """
  501. Undo the effects of a previous C{pauseProducing} and resume copying
  502. bytes to the consumer by resuming the L{CooperativeTask} which drives
  503. the write activity.
  504. """
  505. self._task.resume()
  506. class _HTTP11ClientFactory(protocol.Factory):
  507. """
  508. A factory for L{HTTP11ClientProtocol}, used by L{HTTPConnectionPool}.
  509. @ivar _quiescentCallback: The quiescent callback to be passed to protocol
  510. instances, used to return them to the connection pool.
  511. @ivar _metadata: Metadata about the low-level connection details,
  512. used to make the repr more useful.
  513. @since: 11.1
  514. """
  515. def __init__(self, quiescentCallback, metadata):
  516. self._quiescentCallback = quiescentCallback
  517. self._metadata = metadata
  518. def __repr__(self) -> str:
  519. return "_HTTP11ClientFactory({}, {})".format(
  520. self._quiescentCallback, self._metadata
  521. )
  522. def buildProtocol(self, addr):
  523. return HTTP11ClientProtocol(self._quiescentCallback)
  524. class _RetryingHTTP11ClientProtocol:
  525. """
  526. A wrapper for L{HTTP11ClientProtocol} that automatically retries requests.
  527. @ivar _clientProtocol: The underlying L{HTTP11ClientProtocol}.
  528. @ivar _newConnection: A callable that creates a new connection for a
  529. retry.
  530. """
  531. def __init__(self, clientProtocol, newConnection):
  532. self._clientProtocol = clientProtocol
  533. self._newConnection = newConnection
  534. def _shouldRetry(self, method, exception, bodyProducer):
  535. """
  536. Indicate whether request should be retried.
  537. Only returns C{True} if method is idempotent, no response was
  538. received, the reason for the failed request was not due to
  539. user-requested cancellation, and no body was sent. The latter
  540. requirement may be relaxed in the future, and PUT added to approved
  541. method list.
  542. @param method: The method of the request.
  543. @type method: L{bytes}
  544. """
  545. if method not in (b"GET", b"HEAD", b"OPTIONS", b"DELETE", b"TRACE"):
  546. return False
  547. if not isinstance(
  548. exception,
  549. (RequestNotSent, RequestTransmissionFailed, ResponseNeverReceived),
  550. ):
  551. return False
  552. if isinstance(exception, _WrapperException):
  553. for aFailure in exception.reasons:
  554. if aFailure.check(defer.CancelledError):
  555. return False
  556. if bodyProducer is not None:
  557. return False
  558. return True
  559. def request(self, request):
  560. """
  561. Do a request, and retry once (with a new connection) if it fails in
  562. a retryable manner.
  563. @param request: A L{Request} instance that will be requested using the
  564. wrapped protocol.
  565. """
  566. d = self._clientProtocol.request(request)
  567. def failed(reason):
  568. if self._shouldRetry(request.method, reason.value, request.bodyProducer):
  569. return self._newConnection().addCallback(
  570. lambda connection: connection.request(request)
  571. )
  572. else:
  573. return reason
  574. d.addErrback(failed)
  575. return d
  576. class HTTPConnectionPool:
  577. """
  578. A pool of persistent HTTP connections.
  579. Features:
  580. - Cached connections will eventually time out.
  581. - Limits on maximum number of persistent connections.
  582. Connections are stored using keys, which should be chosen such that any
  583. connections stored under a given key can be used interchangeably.
  584. Failed requests done using previously cached connections will be retried
  585. once if they use an idempotent method (e.g. GET), in case the HTTP server
  586. timed them out.
  587. @ivar persistent: Boolean indicating whether connections should be
  588. persistent. Connections are persistent by default.
  589. @ivar maxPersistentPerHost: The maximum number of cached persistent
  590. connections for a C{host:port} destination.
  591. @type maxPersistentPerHost: C{int}
  592. @ivar cachedConnectionTimeout: Number of seconds a cached persistent
  593. connection will stay open before disconnecting.
  594. @ivar retryAutomatically: C{boolean} indicating whether idempotent
  595. requests should be retried once if no response was received.
  596. @ivar _factory: The factory used to connect to the proxy.
  597. @ivar _connections: Map (scheme, host, port) to lists of
  598. L{HTTP11ClientProtocol} instances.
  599. @ivar _timeouts: Map L{HTTP11ClientProtocol} instances to a
  600. C{IDelayedCall} instance of their timeout.
  601. @since: 12.1
  602. """
  603. _factory = _HTTP11ClientFactory
  604. maxPersistentPerHost = 2
  605. cachedConnectionTimeout = 240
  606. retryAutomatically = True
  607. _log = Logger()
  608. def __init__(self, reactor, persistent=True):
  609. self._reactor = reactor
  610. self.persistent = persistent
  611. self._connections = {}
  612. self._timeouts = {}
  613. def getConnection(self, key, endpoint):
  614. """
  615. Supply a connection, newly created or retrieved from the pool, to be
  616. used for one HTTP request.
  617. The connection will remain out of the pool (not available to be
  618. returned from future calls to this method) until one HTTP request has
  619. been completed over it.
  620. Afterwards, if the connection is still open, it will automatically be
  621. added to the pool.
  622. @param key: A unique key identifying connections that can be used
  623. interchangeably.
  624. @param endpoint: An endpoint that can be used to open a new connection
  625. if no cached connection is available.
  626. @return: A C{Deferred} that will fire with a L{HTTP11ClientProtocol}
  627. (or a wrapper) that can be used to send a single HTTP request.
  628. """
  629. # Try to get cached version:
  630. connections = self._connections.get(key)
  631. while connections:
  632. connection = connections.pop(0)
  633. # Cancel timeout:
  634. self._timeouts[connection].cancel()
  635. del self._timeouts[connection]
  636. if connection.state == "QUIESCENT":
  637. if self.retryAutomatically:
  638. newConnection = lambda: self._newConnection(key, endpoint)
  639. connection = _RetryingHTTP11ClientProtocol(
  640. connection, newConnection
  641. )
  642. return defer.succeed(connection)
  643. return self._newConnection(key, endpoint)
  644. def _newConnection(self, key, endpoint):
  645. """
  646. Create a new connection.
  647. This implements the new connection code path for L{getConnection}.
  648. """
  649. def quiescentCallback(protocol):
  650. self._putConnection(key, protocol)
  651. factory = self._factory(quiescentCallback, repr(endpoint))
  652. return endpoint.connect(factory)
  653. def _removeConnection(self, key, connection):
  654. """
  655. Remove a connection from the cache and disconnect it.
  656. """
  657. connection.transport.loseConnection()
  658. self._connections[key].remove(connection)
  659. del self._timeouts[connection]
  660. def _putConnection(self, key, connection):
  661. """
  662. Return a persistent connection to the pool. This will be called by
  663. L{HTTP11ClientProtocol} when the connection becomes quiescent.
  664. """
  665. if connection.state != "QUIESCENT":
  666. # Log with traceback for debugging purposes:
  667. try:
  668. raise RuntimeError(
  669. "BUG: Non-quiescent protocol added to connection pool."
  670. )
  671. except BaseException:
  672. self._log.failure(
  673. "BUG: Non-quiescent protocol added to connection pool."
  674. )
  675. return
  676. connections = self._connections.setdefault(key, [])
  677. if len(connections) == self.maxPersistentPerHost:
  678. dropped = connections.pop(0)
  679. dropped.transport.loseConnection()
  680. self._timeouts[dropped].cancel()
  681. del self._timeouts[dropped]
  682. connections.append(connection)
  683. cid = self._reactor.callLater(
  684. self.cachedConnectionTimeout, self._removeConnection, key, connection
  685. )
  686. self._timeouts[connection] = cid
  687. def closeCachedConnections(self):
  688. """
  689. Close all persistent connections and remove them from the pool.
  690. @return: L{defer.Deferred} that fires when all connections have been
  691. closed.
  692. """
  693. results = []
  694. for protocols in self._connections.values():
  695. for p in protocols:
  696. results.append(p.abort())
  697. self._connections = {}
  698. for dc in self._timeouts.values():
  699. dc.cancel()
  700. self._timeouts = {}
  701. return defer.gatherResults(results).addCallback(lambda ign: None)
  702. class _AgentBase:
  703. """
  704. Base class offering common facilities for L{Agent}-type classes.
  705. @ivar _reactor: The C{IReactorTime} implementation which will be used by
  706. the pool, and perhaps by subclasses as well.
  707. @ivar _pool: The L{HTTPConnectionPool} used to manage HTTP connections.
  708. """
  709. def __init__(self, reactor, pool):
  710. if pool is None:
  711. pool = HTTPConnectionPool(reactor, False)
  712. self._reactor = reactor
  713. self._pool = pool
  714. def _computeHostValue(self, scheme, host, port):
  715. """
  716. Compute the string to use for the value of the I{Host} header, based on
  717. the given scheme, host name, and port number.
  718. """
  719. if isIPv6Address(nativeString(host)):
  720. host = b"[" + host + b"]"
  721. if (scheme, port) in ((b"http", 80), (b"https", 443)):
  722. return host
  723. return b"%b:%d" % (host, port)
  724. def _requestWithEndpoint(
  725. self, key, endpoint, method, parsedURI, headers, bodyProducer, requestPath
  726. ):
  727. """
  728. Issue a new request, given the endpoint and the path sent as part of
  729. the request.
  730. """
  731. if not isinstance(method, bytes):
  732. raise TypeError(f"method={method!r} is {type(method)}, but must be bytes")
  733. method = _ensureValidMethod(method)
  734. # Create minimal headers, if necessary:
  735. if headers is None:
  736. headers = Headers()
  737. if not headers.hasHeader(b"host"):
  738. headers = headers.copy()
  739. headers.addRawHeader(
  740. b"host",
  741. self._computeHostValue(
  742. parsedURI.scheme, parsedURI.host, parsedURI.port
  743. ),
  744. )
  745. d = self._pool.getConnection(key, endpoint)
  746. def cbConnected(proto):
  747. return proto.request(
  748. Request._construct(
  749. method,
  750. requestPath,
  751. headers,
  752. bodyProducer,
  753. persistent=self._pool.persistent,
  754. parsedURI=parsedURI,
  755. )
  756. )
  757. d.addCallback(cbConnected)
  758. return d
  759. @implementer(IAgentEndpointFactory)
  760. class _StandardEndpointFactory:
  761. """
  762. Standard HTTP endpoint destinations - TCP for HTTP, TCP+TLS for HTTPS.
  763. @ivar _policyForHTTPS: A web context factory which will be used to create
  764. SSL context objects for any SSL connections the agent needs to make.
  765. @ivar _connectTimeout: If not L{None}, the timeout passed to
  766. L{HostnameEndpoint} for specifying the connection timeout.
  767. @ivar _bindAddress: If not L{None}, the address passed to
  768. L{HostnameEndpoint} for specifying the local address to bind to.
  769. """
  770. def __init__(self, reactor, contextFactory, connectTimeout, bindAddress):
  771. """
  772. @param reactor: A provider to use to create endpoints.
  773. @type reactor: see L{HostnameEndpoint.__init__} for acceptable reactor
  774. types.
  775. @param contextFactory: A factory for TLS contexts, to control the
  776. verification parameters of OpenSSL.
  777. @type contextFactory: L{IPolicyForHTTPS}.
  778. @param connectTimeout: The amount of time that this L{Agent} will wait
  779. for the peer to accept a connection.
  780. @type connectTimeout: L{float} or L{None}
  781. @param bindAddress: The local address for client sockets to bind to.
  782. @type bindAddress: L{bytes} or L{None}
  783. """
  784. self._reactor = reactor
  785. self._policyForHTTPS = contextFactory
  786. self._connectTimeout = connectTimeout
  787. self._bindAddress = bindAddress
  788. def endpointForURI(self, uri):
  789. """
  790. Connect directly over TCP for C{b'http'} scheme, and TLS for
  791. C{b'https'}.
  792. @param uri: L{URI} to connect to.
  793. @return: Endpoint to connect to.
  794. @rtype: L{IStreamClientEndpoint}
  795. """
  796. kwargs = {}
  797. if self._connectTimeout is not None:
  798. kwargs["timeout"] = self._connectTimeout
  799. kwargs["bindAddress"] = self._bindAddress
  800. try:
  801. host = nativeString(uri.host)
  802. except UnicodeDecodeError:
  803. raise ValueError(
  804. (
  805. "The host of the provided URI ({uri.host!r}) "
  806. "contains non-ASCII octets, it should be ASCII "
  807. "decodable."
  808. ).format(uri=uri)
  809. )
  810. endpoint = HostnameEndpoint(self._reactor, host, uri.port, **kwargs)
  811. if uri.scheme == b"http":
  812. return endpoint
  813. elif uri.scheme == b"https":
  814. connectionCreator = self._policyForHTTPS.creatorForNetloc(
  815. uri.host, uri.port
  816. )
  817. return wrapClientTLS(connectionCreator, endpoint)
  818. else:
  819. raise SchemeNotSupported(f"Unsupported scheme: {uri.scheme!r}")
  820. @implementer(IAgent)
  821. class Agent(_AgentBase):
  822. """
  823. L{Agent} is a very basic HTTP client. It supports I{HTTP} and I{HTTPS}
  824. scheme URIs.
  825. @ivar _pool: An L{HTTPConnectionPool} instance.
  826. @ivar _endpointFactory: The L{IAgentEndpointFactory} which will
  827. be used to create endpoints for outgoing connections.
  828. @since: 9.0
  829. """
  830. def __init__(
  831. self,
  832. reactor,
  833. contextFactory=BrowserLikePolicyForHTTPS(),
  834. connectTimeout=None,
  835. bindAddress=None,
  836. pool=None,
  837. ):
  838. """
  839. Create an L{Agent}.
  840. @param reactor: A reactor for this L{Agent} to place outgoing
  841. connections.
  842. @type reactor: see L{HostnameEndpoint.__init__} for acceptable reactor
  843. types.
  844. @param contextFactory: A factory for TLS contexts, to control the
  845. verification parameters of OpenSSL. The default is to use a
  846. L{BrowserLikePolicyForHTTPS}, so unless you have special
  847. requirements you can leave this as-is.
  848. @type contextFactory: L{IPolicyForHTTPS}.
  849. @param connectTimeout: The amount of time that this L{Agent} will wait
  850. for the peer to accept a connection.
  851. @type connectTimeout: L{float}
  852. @param bindAddress: The local address for client sockets to bind to.
  853. @type bindAddress: L{bytes}
  854. @param pool: An L{HTTPConnectionPool} instance, or L{None}, in which
  855. case a non-persistent L{HTTPConnectionPool} instance will be
  856. created.
  857. @type pool: L{HTTPConnectionPool}
  858. """
  859. if not IPolicyForHTTPS.providedBy(contextFactory):
  860. warnings.warn(
  861. repr(contextFactory)
  862. + " was passed as the HTTPS policy for an Agent, but it does "
  863. "not provide IPolicyForHTTPS. Since Twisted 14.0, you must "
  864. "pass a provider of IPolicyForHTTPS.",
  865. stacklevel=2,
  866. category=DeprecationWarning,
  867. )
  868. contextFactory = _DeprecatedToCurrentPolicyForHTTPS(contextFactory)
  869. endpointFactory = _StandardEndpointFactory(
  870. reactor, contextFactory, connectTimeout, bindAddress
  871. )
  872. self._init(reactor, endpointFactory, pool)
  873. @classmethod
  874. def usingEndpointFactory(cls, reactor, endpointFactory, pool=None):
  875. """
  876. Create a new L{Agent} that will use the endpoint factory to figure
  877. out how to connect to the server.
  878. @param reactor: A reactor for this L{Agent} to place outgoing
  879. connections.
  880. @type reactor: see L{HostnameEndpoint.__init__} for acceptable reactor
  881. types.
  882. @param endpointFactory: Used to construct endpoints which the
  883. HTTP client will connect with.
  884. @type endpointFactory: an L{IAgentEndpointFactory} provider.
  885. @param pool: An L{HTTPConnectionPool} instance, or L{None}, in which
  886. case a non-persistent L{HTTPConnectionPool} instance will be
  887. created.
  888. @type pool: L{HTTPConnectionPool}
  889. @return: A new L{Agent}.
  890. """
  891. agent = cls.__new__(cls)
  892. agent._init(reactor, endpointFactory, pool)
  893. return agent
  894. def _init(self, reactor, endpointFactory, pool):
  895. """
  896. Initialize a new L{Agent}.
  897. @param reactor: A reactor for this L{Agent} to place outgoing
  898. connections.
  899. @type reactor: see L{HostnameEndpoint.__init__} for acceptable reactor
  900. types.
  901. @param endpointFactory: Used to construct endpoints which the
  902. HTTP client will connect with.
  903. @type endpointFactory: an L{IAgentEndpointFactory} provider.
  904. @param pool: An L{HTTPConnectionPool} instance, or L{None}, in which
  905. case a non-persistent L{HTTPConnectionPool} instance will be
  906. created.
  907. @type pool: L{HTTPConnectionPool}
  908. @return: A new L{Agent}.
  909. """
  910. _AgentBase.__init__(self, reactor, pool)
  911. self._endpointFactory = endpointFactory
  912. def _getEndpoint(self, uri):
  913. """
  914. Get an endpoint for the given URI, using C{self._endpointFactory}.
  915. @param uri: The URI of the request.
  916. @type uri: L{URI}
  917. @return: An endpoint which can be used to connect to given address.
  918. """
  919. return self._endpointFactory.endpointForURI(uri)
  920. def request(self, method, uri, headers=None, bodyProducer=None):
  921. """
  922. Issue a request to the server indicated by the given C{uri}.
  923. An existing connection from the connection pool may be used or a new
  924. one may be created.
  925. I{HTTP} and I{HTTPS} schemes are supported in C{uri}.
  926. @see: L{twisted.web.iweb.IAgent.request}
  927. """
  928. uri = _ensureValidURI(uri.strip())
  929. parsedURI = URI.fromBytes(uri)
  930. try:
  931. endpoint = self._getEndpoint(parsedURI)
  932. except SchemeNotSupported:
  933. return defer.fail(Failure())
  934. key = (parsedURI.scheme, parsedURI.host, parsedURI.port)
  935. return self._requestWithEndpoint(
  936. key,
  937. endpoint,
  938. method,
  939. parsedURI,
  940. headers,
  941. bodyProducer,
  942. parsedURI.originForm,
  943. )
  944. @implementer(IAgent)
  945. class ProxyAgent(_AgentBase):
  946. """
  947. An HTTP agent able to cross HTTP proxies.
  948. @ivar _proxyEndpoint: The endpoint used to connect to the proxy.
  949. @since: 11.1
  950. """
  951. def __init__(self, endpoint, reactor=None, pool=None):
  952. if reactor is None:
  953. from twisted.internet import reactor
  954. _AgentBase.__init__(self, reactor, pool)
  955. self._proxyEndpoint = endpoint
  956. def request(self, method, uri, headers=None, bodyProducer=None):
  957. """
  958. Issue a new request via the configured proxy.
  959. """
  960. uri = _ensureValidURI(uri.strip())
  961. # Cache *all* connections under the same key, since we are only
  962. # connecting to a single destination, the proxy:
  963. key = ("http-proxy", self._proxyEndpoint)
  964. # To support proxying HTTPS via CONNECT, we will use key
  965. # ("http-proxy-CONNECT", scheme, host, port), and an endpoint that
  966. # wraps _proxyEndpoint with an additional callback to do the CONNECT.
  967. return self._requestWithEndpoint(
  968. key,
  969. self._proxyEndpoint,
  970. method,
  971. URI.fromBytes(uri),
  972. headers,
  973. bodyProducer,
  974. uri,
  975. )
  976. class _FakeStdlibRequest(_RequestBase):
  977. """
  978. A fake L{urllib.request.Request} object for L{cookiejar} to work with.
  979. @see: U{urllib.request.Request
  980. <https://docs.python.org/3/library/urllib.request.html#urllib.request.Request>}
  981. @ivar uri: Request URI.
  982. @ivar headers: Request headers.
  983. @ivar type: The scheme of the URI.
  984. @ivar host: The host[:port] of the URI.
  985. @since: 11.1
  986. """
  987. uri: str
  988. type: str
  989. host: str
  990. # The received headers managed using Twisted API.
  991. _twistedHeaders: Headers
  992. def __init__(self, uri: bytes) -> None:
  993. """
  994. Create a fake request.
  995. @param uri: Request URI.
  996. """
  997. self.uri = nativeString(uri)
  998. self._twistedHeaders = Headers()
  999. _uri = URI.fromBytes(uri)
  1000. self.type = nativeString(_uri.scheme)
  1001. self.host = nativeString(_uri.host)
  1002. if (_uri.scheme, _uri.port) not in ((b"http", 80), (b"https", 443)):
  1003. # If it's not a schema on the regular port, add the port.
  1004. self.host += ":" + str(_uri.port)
  1005. self.origin_req_host = nativeString(_uri.host)
  1006. self.unverifiable = False
  1007. def has_header(self, header):
  1008. return self._twistedHeaders.hasHeader(networkString(header))
  1009. def add_unredirected_header(self, name, value):
  1010. self._twistedHeaders.addRawHeader(networkString(name), networkString(value))
  1011. def get_full_url(self):
  1012. return self.uri
  1013. def get_header(self, name, default=None):
  1014. headers = self._twistedHeaders.getRawHeaders(networkString(name), default)
  1015. if headers is not None:
  1016. headers = [nativeString(x) for x in headers]
  1017. return headers[0]
  1018. return None
  1019. def get_host(self):
  1020. return self.host
  1021. def get_type(self):
  1022. return self.type
  1023. def is_unverifiable(self):
  1024. # In theory this shouldn't be hardcoded.
  1025. return False
  1026. @dataclass
  1027. class _FakeUrllibResponseInfo(_InfoType):
  1028. response: IResponse
  1029. def get_all(self, name: str, default: bytes) -> list[str]: # type:ignore[override]
  1030. headers = self.response.headers.getRawHeaders(networkString(name), default)
  1031. h = [nativeString(x) for x in headers]
  1032. return h
  1033. class _FakeStdlibResponse(_ResponseBase):
  1034. """
  1035. A fake L{urllib.response.Response} object for L{http.cookiejar} to work
  1036. with.
  1037. @ivar response: Underlying Twisted Web response.
  1038. @since: 11.1
  1039. """
  1040. response: IResponse
  1041. def __init__(self, response: IResponse) -> None:
  1042. self.response = response
  1043. def info(self) -> _InfoType:
  1044. result = _FakeUrllibResponseInfo(self.response)
  1045. return result
  1046. @implementer(IAgent)
  1047. class CookieAgent:
  1048. """
  1049. L{CookieAgent} extends the basic L{Agent} to add RFC-compliant handling of
  1050. HTTP cookies. Cookies are written to and extracted from a L{CookieJar}
  1051. instance.
  1052. The same cookie jar instance will be used for any requests through this
  1053. agent, mutating it whenever a I{Set-Cookie} header appears in a response.
  1054. @ivar _agent: Underlying Twisted Web agent to issue requests through.
  1055. @ivar cookieJar: Initialized cookie jar to read cookies from and store
  1056. cookies to.
  1057. @since: 11.1
  1058. """
  1059. _agent: IAgent
  1060. cookieJar: CookieJar
  1061. def __init__(self, agent: IAgent, cookieJar: CookieJar) -> None:
  1062. self._agent = agent
  1063. self.cookieJar = cookieJar
  1064. def request(
  1065. self,
  1066. method: bytes,
  1067. uri: bytes,
  1068. headers: Optional[Headers] = None,
  1069. bodyProducer: Optional[IBodyProducer] = None,
  1070. ) -> Deferred[IResponse]:
  1071. """
  1072. Issue a new request to the wrapped L{Agent}.
  1073. Send a I{Cookie} header if a cookie for C{uri} is stored in
  1074. L{CookieAgent.cookieJar}. Cookies are automatically extracted and
  1075. stored from requests.
  1076. If a C{'cookie'} header appears in C{headers} it will override the
  1077. automatic cookie header obtained from the cookie jar.
  1078. @see: L{Agent.request}
  1079. """
  1080. actualHeaders = headers if headers is not None else Headers()
  1081. lastRequest = _FakeStdlibRequest(uri)
  1082. # Setting a cookie header explicitly will disable automatic request
  1083. # cookies.
  1084. if not actualHeaders.hasHeader(b"cookie"):
  1085. self.cookieJar.add_cookie_header(lastRequest)
  1086. cookieHeader = lastRequest.get_header("Cookie", None)
  1087. if cookieHeader is not None:
  1088. actualHeaders = actualHeaders.copy()
  1089. actualHeaders.addRawHeader(b"cookie", networkString(cookieHeader))
  1090. return self._agent.request(
  1091. method, uri, actualHeaders, bodyProducer
  1092. ).addCallback(self._extractCookies, lastRequest)
  1093. def _extractCookies(
  1094. self, response: IResponse, request: _FakeStdlibRequest
  1095. ) -> IResponse:
  1096. """
  1097. Extract response cookies and store them in the cookie jar.
  1098. @param response: the Twisted Web response that we are processing.
  1099. @param request: A L{_FakeStdlibRequest} wrapping our Twisted request,
  1100. for L{CookieJar} to extract cookies from.
  1101. """
  1102. self.cookieJar.extract_cookies(_FakeStdlibResponse(response), request)
  1103. return response
  1104. class GzipDecoder(proxyForInterface(IResponse)): # type: ignore[misc]
  1105. """
  1106. A wrapper for a L{Response} instance which handles gzip'ed body.
  1107. @ivar original: The original L{Response} object.
  1108. @since: 11.1
  1109. """
  1110. def __init__(self, response):
  1111. self.original = response
  1112. self.length = UNKNOWN_LENGTH
  1113. def deliverBody(self, protocol):
  1114. """
  1115. Override C{deliverBody} to wrap the given C{protocol} with
  1116. L{_GzipProtocol}.
  1117. """
  1118. self.original.deliverBody(_GzipProtocol(protocol, self.original))
  1119. class _GzipProtocol(proxyForInterface(IProtocol)): # type: ignore[misc]
  1120. """
  1121. A L{Protocol} implementation which wraps another one, transparently
  1122. decompressing received data.
  1123. @ivar _zlibDecompress: A zlib decompress object used to decompress the data
  1124. stream.
  1125. @ivar _response: A reference to the original response, in case of errors.
  1126. @since: 11.1
  1127. """
  1128. def __init__(self, protocol, response):
  1129. self.original = protocol
  1130. self._response = response
  1131. self._zlibDecompress = zlib.decompressobj(16 + zlib.MAX_WBITS)
  1132. def dataReceived(self, data):
  1133. """
  1134. Decompress C{data} with the zlib decompressor, forwarding the raw data
  1135. to the original protocol.
  1136. """
  1137. try:
  1138. rawData = self._zlibDecompress.decompress(data)
  1139. except zlib.error:
  1140. raise ResponseFailed([Failure()], self._response)
  1141. if rawData:
  1142. self.original.dataReceived(rawData)
  1143. def connectionLost(self, reason):
  1144. """
  1145. Forward the connection lost event, flushing remaining data from the
  1146. decompressor if any.
  1147. """
  1148. try:
  1149. rawData = self._zlibDecompress.flush()
  1150. except zlib.error:
  1151. raise ResponseFailed([reason, Failure()], self._response)
  1152. if rawData:
  1153. self.original.dataReceived(rawData)
  1154. self.original.connectionLost(reason)
  1155. @implementer(IAgent)
  1156. class ContentDecoderAgent:
  1157. """
  1158. An L{Agent} wrapper to handle encoded content.
  1159. It takes care of declaring the support for content in the
  1160. I{Accept-Encoding} header and automatically decompresses the received data
  1161. if the I{Content-Encoding} header indicates a supported encoding.
  1162. For example::
  1163. agent = ContentDecoderAgent(Agent(reactor),
  1164. [(b'gzip', GzipDecoder)])
  1165. @param agent: The agent to wrap
  1166. @type agent: L{IAgent}
  1167. @param decoders: A sequence of (name, decoder) objects. The name
  1168. declares which encoding the decoder supports. The decoder must accept
  1169. an L{IResponse} and return an L{IResponse} when called. The order
  1170. determines how the decoders are advertised to the server. Names must
  1171. be unique.not be duplicated.
  1172. @type decoders: sequence of (L{bytes}, L{callable}) tuples
  1173. @since: 11.1
  1174. @see: L{GzipDecoder}
  1175. """
  1176. def __init__(self, agent, decoders):
  1177. self._agent = agent
  1178. self._decoders = dict(decoders)
  1179. self._supported = b",".join([decoder[0] for decoder in decoders])
  1180. def request(self, method, uri, headers=None, bodyProducer=None):
  1181. """
  1182. Send a client request which declares supporting compressed content.
  1183. @see: L{Agent.request}.
  1184. """
  1185. if headers is None:
  1186. headers = Headers()
  1187. else:
  1188. headers = headers.copy()
  1189. headers.addRawHeader(b"accept-encoding", self._supported)
  1190. deferred = self._agent.request(method, uri, headers, bodyProducer)
  1191. return deferred.addCallback(self._handleResponse)
  1192. def _handleResponse(self, response):
  1193. """
  1194. Check if the response is encoded, and wrap it to handle decompression.
  1195. """
  1196. contentEncodingHeaders = response.headers.getRawHeaders(b"content-encoding", [])
  1197. contentEncodingHeaders = b",".join(contentEncodingHeaders).split(b",")
  1198. while contentEncodingHeaders:
  1199. name = contentEncodingHeaders.pop().strip()
  1200. decoder = self._decoders.get(name)
  1201. if decoder is not None:
  1202. response = decoder(response)
  1203. else:
  1204. # Add it back
  1205. contentEncodingHeaders.append(name)
  1206. break
  1207. if contentEncodingHeaders:
  1208. response.headers.setRawHeaders(
  1209. b"content-encoding", [b",".join(contentEncodingHeaders)]
  1210. )
  1211. else:
  1212. response.headers.removeHeader(b"content-encoding")
  1213. return response
  1214. _canonicalHeaderName = Headers()._canonicalNameCaps
  1215. _defaultSensitiveHeaders = frozenset(
  1216. [
  1217. b"Authorization",
  1218. b"Cookie",
  1219. b"Cookie2",
  1220. b"Proxy-Authorization",
  1221. b"WWW-Authenticate",
  1222. ]
  1223. )
  1224. @implementer(IAgent)
  1225. class RedirectAgent:
  1226. """
  1227. An L{Agent} wrapper which handles HTTP redirects.
  1228. The implementation is rather strict: 301 and 302 behaves like 307, not
  1229. redirecting automatically on methods different from I{GET} and I{HEAD}.
  1230. See L{BrowserLikeRedirectAgent} for a redirecting Agent that behaves more
  1231. like a web browser.
  1232. @param redirectLimit: The maximum number of times the agent is allowed to
  1233. follow redirects before failing with a L{error.InfiniteRedirection}.
  1234. @param sensitiveHeaderNames: An iterable of C{bytes} enumerating the names
  1235. of headers that must not be transmitted when redirecting to a different
  1236. origins. These will be consulted in addition to the protocol-specified
  1237. set of headers that contain sensitive information.
  1238. @cvar _redirectResponses: A L{list} of HTTP status codes to be redirected
  1239. for I{GET} and I{HEAD} methods.
  1240. @cvar _seeOtherResponses: A L{list} of HTTP status codes to be redirected
  1241. for any method and the method altered to I{GET}.
  1242. @since: 11.1
  1243. """
  1244. _redirectResponses = [
  1245. http.MOVED_PERMANENTLY,
  1246. http.FOUND,
  1247. http.TEMPORARY_REDIRECT,
  1248. http.PERMANENT_REDIRECT,
  1249. ]
  1250. _seeOtherResponses = [http.SEE_OTHER]
  1251. def __init__(
  1252. self,
  1253. agent: IAgent,
  1254. redirectLimit: int = 20,
  1255. sensitiveHeaderNames: Iterable[bytes] = (),
  1256. ):
  1257. self._agent = agent
  1258. self._redirectLimit = redirectLimit
  1259. sensitive = {_canonicalHeaderName(each) for each in sensitiveHeaderNames}
  1260. sensitive.update(_defaultSensitiveHeaders)
  1261. self._sensitiveHeaderNames = sensitive
  1262. def request(self, method, uri, headers=None, bodyProducer=None):
  1263. """
  1264. Send a client request following HTTP redirects.
  1265. @see: L{Agent.request}.
  1266. """
  1267. deferred = self._agent.request(method, uri, headers, bodyProducer)
  1268. return deferred.addCallback(self._handleResponse, method, uri, headers, 0)
  1269. def _resolveLocation(self, requestURI, location):
  1270. """
  1271. Resolve the redirect location against the request I{URI}.
  1272. @type requestURI: C{bytes}
  1273. @param requestURI: The request I{URI}.
  1274. @type location: C{bytes}
  1275. @param location: The redirect location.
  1276. @rtype: C{bytes}
  1277. @return: Final resolved I{URI}.
  1278. """
  1279. return _urljoin(requestURI, location)
  1280. def _handleRedirect(self, response, method, uri, headers, redirectCount):
  1281. """
  1282. Handle a redirect response, checking the number of redirects already
  1283. followed, and extracting the location header fields.
  1284. """
  1285. if redirectCount >= self._redirectLimit:
  1286. err = error.InfiniteRedirection(
  1287. response.code, b"Infinite redirection detected", location=uri
  1288. )
  1289. raise ResponseFailed([Failure(err)], response)
  1290. locationHeaders = response.headers.getRawHeaders(b"location", [])
  1291. if not locationHeaders:
  1292. err = error.RedirectWithNoLocation(
  1293. response.code, b"No location header field", uri
  1294. )
  1295. raise ResponseFailed([Failure(err)], response)
  1296. location = self._resolveLocation(uri, locationHeaders[0])
  1297. if headers:
  1298. parsedURI = URI.fromBytes(uri)
  1299. parsedLocation = URI.fromBytes(location)
  1300. sameOrigin = (
  1301. (parsedURI.scheme == parsedLocation.scheme)
  1302. and (parsedURI.host == parsedLocation.host)
  1303. and (parsedURI.port == parsedLocation.port)
  1304. )
  1305. if not sameOrigin:
  1306. headers = Headers(
  1307. {
  1308. rawName: rawValue
  1309. for rawName, rawValue in headers.getAllRawHeaders()
  1310. if rawName not in self._sensitiveHeaderNames
  1311. }
  1312. )
  1313. deferred = self._agent.request(method, location, headers)
  1314. def _chainResponse(newResponse):
  1315. newResponse.setPreviousResponse(response)
  1316. return newResponse
  1317. deferred.addCallback(_chainResponse)
  1318. return deferred.addCallback(
  1319. self._handleResponse, method, uri, headers, redirectCount + 1
  1320. )
  1321. def _handleResponse(self, response, method, uri, headers, redirectCount):
  1322. """
  1323. Handle the response, making another request if it indicates a redirect.
  1324. """
  1325. if response.code in self._redirectResponses:
  1326. if method not in (b"GET", b"HEAD"):
  1327. err = error.PageRedirect(response.code, location=uri)
  1328. raise ResponseFailed([Failure(err)], response)
  1329. return self._handleRedirect(response, method, uri, headers, redirectCount)
  1330. elif response.code in self._seeOtherResponses:
  1331. return self._handleRedirect(response, b"GET", uri, headers, redirectCount)
  1332. return response
  1333. class BrowserLikeRedirectAgent(RedirectAgent):
  1334. """
  1335. An L{Agent} wrapper which handles HTTP redirects in the same fashion as web
  1336. browsers.
  1337. Unlike L{RedirectAgent}, the implementation is more relaxed: 301 and 302
  1338. behave like 303, redirecting automatically on any method and altering the
  1339. redirect request to a I{GET}.
  1340. @see: L{RedirectAgent}
  1341. @since: 13.1
  1342. """
  1343. _redirectResponses = [http.TEMPORARY_REDIRECT]
  1344. _seeOtherResponses = [
  1345. http.MOVED_PERMANENTLY,
  1346. http.FOUND,
  1347. http.SEE_OTHER,
  1348. http.PERMANENT_REDIRECT,
  1349. ]
  1350. class _ReadBodyProtocol(protocol.Protocol):
  1351. """
  1352. Protocol that collects data sent to it.
  1353. This is a helper for L{IResponse.deliverBody}, which collects the body and
  1354. fires a deferred with it.
  1355. @ivar deferred: See L{__init__}.
  1356. @ivar status: See L{__init__}.
  1357. @ivar message: See L{__init__}.
  1358. @ivar dataBuffer: list of byte-strings received
  1359. @type dataBuffer: L{list} of L{bytes}
  1360. """
  1361. def __init__(self, status, message, deferred):
  1362. """
  1363. @param status: Status of L{IResponse}
  1364. @ivar status: L{int}
  1365. @param message: Message of L{IResponse}
  1366. @type message: L{bytes}
  1367. @param deferred: deferred to fire when response is complete
  1368. @type deferred: L{Deferred} firing with L{bytes}
  1369. """
  1370. self.deferred = deferred
  1371. self.status = status
  1372. self.message = message
  1373. self.dataBuffer = []
  1374. def dataReceived(self, data):
  1375. """
  1376. Accumulate some more bytes from the response.
  1377. """
  1378. self.dataBuffer.append(data)
  1379. def connectionLost(self, reason):
  1380. """
  1381. Deliver the accumulated response bytes to the waiting L{Deferred}, if
  1382. the response body has been completely received without error.
  1383. """
  1384. if reason.check(ResponseDone):
  1385. self.deferred.callback(b"".join(self.dataBuffer))
  1386. elif reason.check(PotentialDataLoss):
  1387. self.deferred.errback(
  1388. PartialDownloadError(
  1389. self.status, self.message, b"".join(self.dataBuffer)
  1390. )
  1391. )
  1392. else:
  1393. self.deferred.errback(reason)
  1394. def readBody(response: IResponse) -> defer.Deferred[bytes]:
  1395. """
  1396. Get the body of an L{IResponse} and return it as a byte string.
  1397. This is a helper function for clients that don't want to incrementally
  1398. receive the body of an HTTP response.
  1399. @param response: The HTTP response for which the body will be read.
  1400. @type response: L{IResponse} provider
  1401. @return: A L{Deferred} which will fire with the body of the response.
  1402. Cancelling it will close the connection to the server immediately.
  1403. """
  1404. def cancel(deferred: defer.Deferred[bytes]) -> None:
  1405. """
  1406. Cancel a L{readBody} call, close the connection to the HTTP server
  1407. immediately, if it is still open.
  1408. @param deferred: The cancelled L{defer.Deferred}.
  1409. """
  1410. abort = getAbort()
  1411. if abort is not None:
  1412. abort()
  1413. d: defer.Deferred[bytes] = defer.Deferred(cancel)
  1414. protocol = _ReadBodyProtocol(response.code, response.phrase, d)
  1415. def getAbort():
  1416. return getattr(protocol.transport, "abortConnection", None)
  1417. response.deliverBody(protocol)
  1418. if protocol.transport is not None and getAbort() is None:
  1419. warnings.warn(
  1420. "Using readBody with a transport that does not have an "
  1421. "abortConnection method",
  1422. category=DeprecationWarning,
  1423. stacklevel=2,
  1424. )
  1425. return d
  1426. __all__ = [
  1427. "Agent",
  1428. "BrowserLikePolicyForHTTPS",
  1429. "BrowserLikeRedirectAgent",
  1430. "ContentDecoderAgent",
  1431. "CookieAgent",
  1432. "GzipDecoder",
  1433. "HTTPConnectionPool",
  1434. "PartialDownloadError",
  1435. "ProxyAgent",
  1436. "readBody",
  1437. "RedirectAgent",
  1438. "RequestGenerationFailed",
  1439. "RequestTransmissionFailed",
  1440. "Response",
  1441. "ResponseDone",
  1442. "ResponseFailed",
  1443. "ResponseNeverReceived",
  1444. "URI",
  1445. ]