proxy.py 9.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303
  1. # -*- test-case-name: twisted.web.test.test_proxy -*-
  2. # Copyright (c) Twisted Matrix Laboratories.
  3. # See LICENSE for details.
  4. """
  5. Simplistic HTTP proxy support.
  6. This comes in two main variants - the Proxy and the ReverseProxy.
  7. When a Proxy is in use, a browser trying to connect to a server (say,
  8. www.yahoo.com) will be intercepted by the Proxy, and the proxy will covertly
  9. connect to the server, and return the result.
  10. When a ReverseProxy is in use, the client connects directly to the ReverseProxy
  11. (say, www.yahoo.com) which farms off the request to one of a pool of servers,
  12. and returns the result.
  13. Normally, a Proxy is used on the client end of an Internet connection, while a
  14. ReverseProxy is used on the server end.
  15. """
  16. from __future__ import absolute_import, division
  17. from twisted.python.compat import urllib_parse, urlquote
  18. from twisted.internet import reactor
  19. from twisted.internet.protocol import ClientFactory
  20. from twisted.web.resource import Resource
  21. from twisted.web.server import NOT_DONE_YET
  22. from twisted.web.http import HTTPClient, Request, HTTPChannel, _QUEUED_SENTINEL
  23. class ProxyClient(HTTPClient):
  24. """
  25. Used by ProxyClientFactory to implement a simple web proxy.
  26. @ivar _finished: A flag which indicates whether or not the original request
  27. has been finished yet.
  28. """
  29. _finished = False
  30. def __init__(self, command, rest, version, headers, data, father):
  31. self.father = father
  32. self.command = command
  33. self.rest = rest
  34. if b"proxy-connection" in headers:
  35. del headers[b"proxy-connection"]
  36. headers[b"connection"] = b"close"
  37. headers.pop(b'keep-alive', None)
  38. self.headers = headers
  39. self.data = data
  40. def connectionMade(self):
  41. self.sendCommand(self.command, self.rest)
  42. for header, value in self.headers.items():
  43. self.sendHeader(header, value)
  44. self.endHeaders()
  45. self.transport.write(self.data)
  46. def handleStatus(self, version, code, message):
  47. self.father.setResponseCode(int(code), message)
  48. def handleHeader(self, key, value):
  49. # t.web.server.Request sets default values for these headers in its
  50. # 'process' method. When these headers are received from the remote
  51. # server, they ought to override the defaults, rather than append to
  52. # them.
  53. if key.lower() in [b'server', b'date', b'content-type']:
  54. self.father.responseHeaders.setRawHeaders(key, [value])
  55. else:
  56. self.father.responseHeaders.addRawHeader(key, value)
  57. def handleResponsePart(self, buffer):
  58. self.father.write(buffer)
  59. def handleResponseEnd(self):
  60. """
  61. Finish the original request, indicating that the response has been
  62. completely written to it, and disconnect the outgoing transport.
  63. """
  64. if not self._finished:
  65. self._finished = True
  66. self.father.finish()
  67. self.transport.loseConnection()
  68. class ProxyClientFactory(ClientFactory):
  69. """
  70. Used by ProxyRequest to implement a simple web proxy.
  71. """
  72. protocol = ProxyClient
  73. def __init__(self, command, rest, version, headers, data, father):
  74. self.father = father
  75. self.command = command
  76. self.rest = rest
  77. self.headers = headers
  78. self.data = data
  79. self.version = version
  80. def buildProtocol(self, addr):
  81. return self.protocol(self.command, self.rest, self.version,
  82. self.headers, self.data, self.father)
  83. def clientConnectionFailed(self, connector, reason):
  84. """
  85. Report a connection failure in a response to the incoming request as
  86. an error.
  87. """
  88. self.father.setResponseCode(501, b"Gateway error")
  89. self.father.responseHeaders.addRawHeader(b"Content-Type", b"text/html")
  90. self.father.write(b"<H1>Could not connect</H1>")
  91. self.father.finish()
  92. class ProxyRequest(Request):
  93. """
  94. Used by Proxy to implement a simple web proxy.
  95. @ivar reactor: the reactor used to create connections.
  96. @type reactor: object providing L{twisted.internet.interfaces.IReactorTCP}
  97. """
  98. protocols = {b'http': ProxyClientFactory}
  99. ports = {b'http': 80}
  100. def __init__(self, channel, queued=_QUEUED_SENTINEL, reactor=reactor):
  101. Request.__init__(self, channel, queued)
  102. self.reactor = reactor
  103. def process(self):
  104. parsed = urllib_parse.urlparse(self.uri)
  105. protocol = parsed[0]
  106. host = parsed[1].decode('ascii')
  107. port = self.ports[protocol]
  108. if ':' in host:
  109. host, port = host.split(':')
  110. port = int(port)
  111. rest = urllib_parse.urlunparse((b'', b'') + parsed[2:])
  112. if not rest:
  113. rest = rest + b'/'
  114. class_ = self.protocols[protocol]
  115. headers = self.getAllHeaders().copy()
  116. if b'host' not in headers:
  117. headers[b'host'] = host.encode('ascii')
  118. self.content.seek(0, 0)
  119. s = self.content.read()
  120. clientFactory = class_(self.method, rest, self.clientproto, headers,
  121. s, self)
  122. self.reactor.connectTCP(host, port, clientFactory)
  123. class Proxy(HTTPChannel):
  124. """
  125. This class implements a simple web proxy.
  126. Since it inherits from L{twisted.web.http.HTTPChannel}, to use it you
  127. should do something like this::
  128. from twisted.web import http
  129. f = http.HTTPFactory()
  130. f.protocol = Proxy
  131. Make the HTTPFactory a listener on a port as per usual, and you have
  132. a fully-functioning web proxy!
  133. """
  134. requestFactory = ProxyRequest
  135. class ReverseProxyRequest(Request):
  136. """
  137. Used by ReverseProxy to implement a simple reverse proxy.
  138. @ivar proxyClientFactoryClass: a proxy client factory class, used to create
  139. new connections.
  140. @type proxyClientFactoryClass: L{ClientFactory}
  141. @ivar reactor: the reactor used to create connections.
  142. @type reactor: object providing L{twisted.internet.interfaces.IReactorTCP}
  143. """
  144. proxyClientFactoryClass = ProxyClientFactory
  145. def __init__(self, channel, queued=_QUEUED_SENTINEL, reactor=reactor):
  146. Request.__init__(self, channel, queued)
  147. self.reactor = reactor
  148. def process(self):
  149. """
  150. Handle this request by connecting to the proxied server and forwarding
  151. it there, then forwarding the response back as the response to this
  152. request.
  153. """
  154. self.requestHeaders.setRawHeaders(b"host",
  155. [self.factory.host.encode('ascii')])
  156. clientFactory = self.proxyClientFactoryClass(
  157. self.method, self.uri, self.clientproto, self.getAllHeaders(),
  158. self.content.read(), self)
  159. self.reactor.connectTCP(self.factory.host, self.factory.port,
  160. clientFactory)
  161. class ReverseProxy(HTTPChannel):
  162. """
  163. Implements a simple reverse proxy.
  164. For details of usage, see the file examples/reverse-proxy.py.
  165. """
  166. requestFactory = ReverseProxyRequest
  167. class ReverseProxyResource(Resource):
  168. """
  169. Resource that renders the results gotten from another server
  170. Put this resource in the tree to cause everything below it to be relayed
  171. to a different server.
  172. @ivar proxyClientFactoryClass: a proxy client factory class, used to create
  173. new connections.
  174. @type proxyClientFactoryClass: L{ClientFactory}
  175. @ivar reactor: the reactor used to create connections.
  176. @type reactor: object providing L{twisted.internet.interfaces.IReactorTCP}
  177. """
  178. proxyClientFactoryClass = ProxyClientFactory
  179. def __init__(self, host, port, path, reactor=reactor):
  180. """
  181. @param host: the host of the web server to proxy.
  182. @type host: C{str}
  183. @param port: the port of the web server to proxy.
  184. @type port: C{port}
  185. @param path: the base path to fetch data from. Note that you shouldn't
  186. put any trailing slashes in it, it will be added automatically in
  187. request. For example, if you put B{/foo}, a request on B{/bar} will
  188. be proxied to B{/foo/bar}. Any required encoding of special
  189. characters (such as " " or "/") should have been done already.
  190. @type path: C{bytes}
  191. """
  192. Resource.__init__(self)
  193. self.host = host
  194. self.port = port
  195. self.path = path
  196. self.reactor = reactor
  197. def getChild(self, path, request):
  198. """
  199. Create and return a proxy resource with the same proxy configuration
  200. as this one, except that its path also contains the segment given by
  201. C{path} at the end.
  202. """
  203. return ReverseProxyResource(
  204. self.host, self.port, self.path + b'/' + urlquote(path, safe=b"").encode('utf-8'),
  205. self.reactor)
  206. def render(self, request):
  207. """
  208. Render a request by forwarding it to the proxied server.
  209. """
  210. # RFC 2616 tells us that we can omit the port if it's the default port,
  211. # but we have to provide it otherwise
  212. if self.port == 80:
  213. host = self.host
  214. else:
  215. host = u"%s:%d" % (self.host, self.port)
  216. request.requestHeaders.setRawHeaders(b"host", [host.encode('ascii')])
  217. request.content.seek(0, 0)
  218. qs = urllib_parse.urlparse(request.uri)[4]
  219. if qs:
  220. rest = self.path + b'?' + qs
  221. else:
  222. rest = self.path
  223. clientFactory = self.proxyClientFactoryClass(
  224. request.method, rest, request.clientproto,
  225. request.getAllHeaders(), request.content.read(), request)
  226. self.reactor.connectTCP(self.host, self.port, clientFactory)
  227. return NOT_DONE_YET