twcgi.py 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343
  1. # -*- test-case-name: twisted.web.test.test_cgi -*-
  2. # Copyright (c) Twisted Matrix Laboratories.
  3. # See LICENSE for details.
  4. """
  5. I hold resource classes and helper classes that deal with CGI scripts.
  6. """
  7. # System Imports
  8. import os
  9. import urllib
  10. from typing import AnyStr
  11. # Twisted Imports
  12. from twisted.internet import protocol
  13. from twisted.logger import Logger
  14. from twisted.python import filepath
  15. from twisted.spread import pb
  16. from twisted.web import http, resource, server, static
  17. class CGIDirectory(resource.Resource, filepath.FilePath[AnyStr]):
  18. def __init__(self, pathname):
  19. resource.Resource.__init__(self)
  20. filepath.FilePath.__init__(self, pathname)
  21. def getChild(self, path, request):
  22. fnp = self.child(path)
  23. if not fnp.exists():
  24. return static.File.childNotFound
  25. elif fnp.isdir():
  26. return CGIDirectory(fnp.path)
  27. else:
  28. return CGIScript(fnp.path)
  29. def render(self, request):
  30. notFound = resource.NoResource(
  31. "CGI directories do not support directory listing."
  32. )
  33. return notFound.render(request)
  34. class CGIScript(resource.Resource):
  35. """
  36. L{CGIScript} is a resource which runs child processes according to the CGI
  37. specification.
  38. The implementation is complex due to the fact that it requires asynchronous
  39. IPC with an external process with an unpleasant protocol.
  40. """
  41. isLeaf = 1
  42. def __init__(self, filename, registry=None, reactor=None):
  43. """
  44. Initialize, with the name of a CGI script file.
  45. """
  46. self.filename = filename
  47. if reactor is None:
  48. # This installs a default reactor, if None was installed before.
  49. # We do a late import here, so that importing the current module
  50. # won't directly trigger installing a default reactor.
  51. from twisted.internet import reactor
  52. self._reactor = reactor
  53. def render(self, request):
  54. """
  55. Do various things to conform to the CGI specification.
  56. I will set up the usual slew of environment variables, then spin off a
  57. process.
  58. @type request: L{twisted.web.http.Request}
  59. @param request: An HTTP request.
  60. """
  61. scriptName = b"/" + b"/".join(request.prepath)
  62. serverName = request.getRequestHostname().split(b":")[0]
  63. env = {
  64. "SERVER_SOFTWARE": server.version,
  65. "SERVER_NAME": serverName,
  66. "GATEWAY_INTERFACE": "CGI/1.1",
  67. "SERVER_PROTOCOL": request.clientproto,
  68. "SERVER_PORT": str(request.getHost().port),
  69. "REQUEST_METHOD": request.method,
  70. "SCRIPT_NAME": scriptName,
  71. "SCRIPT_FILENAME": self.filename,
  72. "REQUEST_URI": request.uri,
  73. }
  74. ip = request.getClientAddress().host
  75. if ip is not None:
  76. env["REMOTE_ADDR"] = ip
  77. pp = request.postpath
  78. if pp:
  79. env["PATH_INFO"] = "/" + "/".join(pp)
  80. if hasattr(request, "content"):
  81. # 'request.content' is either a StringIO or a TemporaryFile, and
  82. # the file pointer is sitting at the beginning (seek(0,0))
  83. request.content.seek(0, 2)
  84. length = request.content.tell()
  85. request.content.seek(0, 0)
  86. env["CONTENT_LENGTH"] = str(length)
  87. try:
  88. qindex = request.uri.index(b"?")
  89. except ValueError:
  90. env["QUERY_STRING"] = ""
  91. qargs = []
  92. else:
  93. qs = env["QUERY_STRING"] = request.uri[qindex + 1 :]
  94. if b"=" in qs:
  95. qargs = []
  96. else:
  97. qargs = [urllib.parse.unquote(x.decode()) for x in qs.split(b"+")]
  98. # Propagate HTTP headers
  99. for title, header in request.getAllHeaders().items():
  100. envname = title.replace(b"-", b"_").upper()
  101. if title not in (b"content-type", b"content-length", b"proxy"):
  102. envname = b"HTTP_" + envname
  103. env[envname] = header
  104. # Propagate our environment
  105. for key, value in os.environ.items():
  106. if key not in env:
  107. env[key] = value
  108. # And they're off!
  109. self.runProcess(env, request, qargs)
  110. return server.NOT_DONE_YET
  111. def runProcess(self, env, request, qargs=[]):
  112. """
  113. Run the cgi script.
  114. @type env: A L{dict} of L{str}, or L{None}
  115. @param env: The environment variables to pass to the process that will
  116. get spawned. See
  117. L{twisted.internet.interfaces.IReactorProcess.spawnProcess} for
  118. more information about environments and process creation.
  119. @type request: L{twisted.web.http.Request}
  120. @param request: An HTTP request.
  121. @type qargs: A L{list} of L{str}
  122. @param qargs: The command line arguments to pass to the process that
  123. will get spawned.
  124. """
  125. p = CGIProcessProtocol(request)
  126. self._reactor.spawnProcess(
  127. p,
  128. self.filename,
  129. [self.filename] + qargs,
  130. env,
  131. os.path.dirname(self.filename),
  132. )
  133. class FilteredScript(CGIScript):
  134. """
  135. I am a special version of a CGI script, that uses a specific executable.
  136. This is useful for interfacing with other scripting languages that adhere
  137. to the CGI standard. My C{filter} attribute specifies what executable to
  138. run, and my C{filename} init parameter describes which script to pass to
  139. the first argument of that script.
  140. To customize me for a particular location of a CGI interpreter, override
  141. C{filter}.
  142. @type filter: L{str}
  143. @ivar filter: The absolute path to the executable.
  144. """
  145. filter = "/usr/bin/cat"
  146. def runProcess(self, env, request, qargs=[]):
  147. """
  148. Run a script through the C{filter} executable.
  149. @type env: A L{dict} of L{str}, or L{None}
  150. @param env: The environment variables to pass to the process that will
  151. get spawned. See
  152. L{twisted.internet.interfaces.IReactorProcess.spawnProcess}
  153. for more information about environments and process creation.
  154. @type request: L{twisted.web.http.Request}
  155. @param request: An HTTP request.
  156. @type qargs: A L{list} of L{str}
  157. @param qargs: The command line arguments to pass to the process that
  158. will get spawned.
  159. """
  160. p = CGIProcessProtocol(request)
  161. self._reactor.spawnProcess(
  162. p,
  163. self.filter,
  164. [self.filter, self.filename] + qargs,
  165. env,
  166. os.path.dirname(self.filename),
  167. )
  168. class CGIProcessProtocol(protocol.ProcessProtocol, pb.Viewable):
  169. handling_headers = 1
  170. headers_written = 0
  171. headertext = b""
  172. errortext = b""
  173. _log = Logger()
  174. _requestFinished = False
  175. # Remotely relay producer interface.
  176. def view_resumeProducing(self, issuer):
  177. self.resumeProducing()
  178. def view_pauseProducing(self, issuer):
  179. self.pauseProducing()
  180. def view_stopProducing(self, issuer):
  181. self.stopProducing()
  182. def resumeProducing(self):
  183. self.transport.resumeProducing()
  184. def pauseProducing(self):
  185. self.transport.pauseProducing()
  186. def stopProducing(self):
  187. self.transport.loseConnection()
  188. def __init__(self, request):
  189. self.request = request
  190. self.request.notifyFinish().addBoth(self._finished)
  191. def connectionMade(self):
  192. self.request.registerProducer(self, 1)
  193. self.request.content.seek(0, 0)
  194. content = self.request.content.read()
  195. if content:
  196. self.transport.write(content)
  197. self.transport.closeStdin()
  198. def errReceived(self, error):
  199. self.errortext = self.errortext + error
  200. def outReceived(self, output):
  201. """
  202. Handle a chunk of input
  203. """
  204. # First, make sure that the headers from the script are sorted
  205. # out (we'll want to do some parsing on these later.)
  206. if self.handling_headers:
  207. text = self.headertext + output
  208. headerEnds = []
  209. for delimiter in b"\n\n", b"\r\n\r\n", b"\r\r", b"\n\r\n":
  210. headerend = text.find(delimiter)
  211. if headerend != -1:
  212. headerEnds.append((headerend, delimiter))
  213. if headerEnds:
  214. # The script is entirely in control of response headers;
  215. # disable the default Content-Type value normally provided by
  216. # twisted.web.server.Request.
  217. self.request.defaultContentType = None
  218. headerEnds.sort()
  219. headerend, delimiter = headerEnds[0]
  220. self.headertext = text[:headerend]
  221. # This is a final version of the header text.
  222. linebreak = delimiter[: len(delimiter) // 2]
  223. headers = self.headertext.split(linebreak)
  224. for header in headers:
  225. br = header.find(b": ")
  226. if br == -1:
  227. self._log.error(
  228. "ignoring malformed CGI header: {header!r}", header=header
  229. )
  230. else:
  231. headerName = header[:br].lower()
  232. headerText = header[br + 2 :]
  233. if headerName == b"location":
  234. self.request.setResponseCode(http.FOUND)
  235. if headerName == b"status":
  236. try:
  237. # "XXX <description>" sometimes happens.
  238. statusNum = int(headerText[:3])
  239. except BaseException:
  240. self._log.error("malformed status header")
  241. else:
  242. self.request.setResponseCode(statusNum)
  243. else:
  244. # Don't allow the application to control
  245. # these required headers.
  246. if headerName.lower() not in (b"server", b"date"):
  247. self.request.responseHeaders.addRawHeader(
  248. headerName, headerText
  249. )
  250. output = text[headerend + len(delimiter) :]
  251. self.handling_headers = 0
  252. if self.handling_headers:
  253. self.headertext = text
  254. if not self.handling_headers:
  255. self.request.write(output)
  256. def processEnded(self, reason):
  257. if reason.value.exitCode != 0:
  258. self._log.error(
  259. "CGI {uri} exited with exit code {exitCode}",
  260. uri=self.request.uri,
  261. exitCode=reason.value.exitCode,
  262. )
  263. if self.errortext:
  264. self._log.error(
  265. "Errors from CGI {uri}: {errorText}",
  266. uri=self.request.uri,
  267. errorText=self.errortext,
  268. )
  269. if self.handling_headers:
  270. self._log.error(
  271. "Premature end of headers in {uri}: {headerText}",
  272. uri=self.request.uri,
  273. headerText=self.headertext,
  274. )
  275. if not self._requestFinished:
  276. self.request.write(
  277. resource.ErrorPage(
  278. http.INTERNAL_SERVER_ERROR,
  279. "CGI Script Error",
  280. "Premature end of script headers.",
  281. ).render(self.request)
  282. )
  283. if not self._requestFinished:
  284. self.request.unregisterProducer()
  285. self.request.finish()
  286. def _finished(self, ignored):
  287. """
  288. Record the end of the response generation for the request being
  289. serviced.
  290. """
  291. self._requestFinished = True