_flatten.py 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487
  1. # -*- test-case-name: twisted.web.test.test_flatten,twisted.web.test.test_template -*-
  2. # Copyright (c) Twisted Matrix Laboratories.
  3. # See LICENSE for details.
  4. """
  5. Context-free flattener/serializer for rendering Python objects, possibly
  6. complex or arbitrarily nested, as strings.
  7. """
  8. from __future__ import annotations
  9. from inspect import iscoroutine
  10. from io import BytesIO
  11. from sys import exc_info
  12. from traceback import extract_tb
  13. from types import GeneratorType
  14. from typing import (
  15. Any,
  16. Callable,
  17. Coroutine,
  18. Generator,
  19. List,
  20. Mapping,
  21. Optional,
  22. Sequence,
  23. Tuple,
  24. TypeVar,
  25. Union,
  26. cast,
  27. )
  28. from twisted.internet.defer import Deferred, ensureDeferred
  29. from twisted.python.compat import nativeString
  30. from twisted.python.failure import Failure
  31. from twisted.web._stan import CDATA, CharRef, Comment, Tag, slot, voidElements
  32. from twisted.web.error import FlattenerError, UnfilledSlot, UnsupportedType
  33. from twisted.web.iweb import IRenderable, IRequest
  34. T = TypeVar("T")
  35. FlattenableRecursive = Any
  36. """
  37. For documentation purposes, read C{FlattenableRecursive} as L{Flattenable}.
  38. However, since mypy doesn't support recursive type definitions (yet?),
  39. we'll put Any in the actual definition.
  40. """
  41. Flattenable = Union[
  42. bytes,
  43. str,
  44. slot,
  45. CDATA,
  46. Comment,
  47. Tag,
  48. Tuple[FlattenableRecursive, ...],
  49. List[FlattenableRecursive],
  50. Generator[FlattenableRecursive, None, None],
  51. CharRef,
  52. Deferred[FlattenableRecursive],
  53. Coroutine[Deferred[FlattenableRecursive], object, FlattenableRecursive],
  54. IRenderable,
  55. ]
  56. """
  57. Type alias containing all types that can be flattened by L{flatten()}.
  58. """
  59. # The maximum number of bytes to synchronously accumulate in the flattener
  60. # buffer before delivering them onwards.
  61. BUFFER_SIZE = 2**16
  62. def escapeForContent(data: Union[bytes, str]) -> bytes:
  63. """
  64. Escape some character or UTF-8 byte data for inclusion in an HTML or XML
  65. document, by replacing metacharacters (C{&<>}) with their entity
  66. equivalents (C{&amp;&lt;&gt;}).
  67. This is used as an input to L{_flattenElement}'s C{dataEscaper} parameter.
  68. @param data: The string to escape.
  69. @return: The quoted form of C{data}. If C{data} is L{str}, return a utf-8
  70. encoded string.
  71. """
  72. if isinstance(data, str):
  73. data = data.encode("utf-8")
  74. data = data.replace(b"&", b"&amp;").replace(b"<", b"&lt;").replace(b">", b"&gt;")
  75. return data
  76. def attributeEscapingDoneOutside(data: Union[bytes, str]) -> bytes:
  77. """
  78. Escape some character or UTF-8 byte data for inclusion in the top level of
  79. an attribute. L{attributeEscapingDoneOutside} actually passes the data
  80. through unchanged, because L{writeWithAttributeEscaping} handles the
  81. quoting of the text within attributes outside the generator returned by
  82. L{_flattenElement}; this is used as the C{dataEscaper} argument to that
  83. L{_flattenElement} call so that that generator does not redundantly escape
  84. its text output.
  85. @param data: The string to escape.
  86. @return: The string, unchanged, except for encoding.
  87. """
  88. if isinstance(data, str):
  89. return data.encode("utf-8")
  90. return data
  91. def writeWithAttributeEscaping(
  92. write: Callable[[bytes], object]
  93. ) -> Callable[[bytes], None]:
  94. """
  95. Decorate a C{write} callable so that all output written is properly quoted
  96. for inclusion within an XML attribute value.
  97. If a L{Tag <twisted.web.template.Tag>} C{x} is flattened within the context
  98. of the contents of another L{Tag <twisted.web.template.Tag>} C{y}, the
  99. metacharacters (C{<>&"}) delimiting C{x} should be passed through
  100. unchanged, but the textual content of C{x} should still be quoted, as
  101. usual. For example: C{<y><x>&amp;</x></y>}. That is the default behavior
  102. of L{_flattenElement} when L{escapeForContent} is passed as the
  103. C{dataEscaper}.
  104. However, when a L{Tag <twisted.web.template.Tag>} C{x} is flattened within
  105. the context of an I{attribute} of another L{Tag <twisted.web.template.Tag>}
  106. C{y}, then the metacharacters delimiting C{x} should be quoted so that it
  107. can be parsed from the attribute's value. In the DOM itself, this is not a
  108. valid thing to do, but given that renderers and slots may be freely moved
  109. around in a L{twisted.web.template} template, it is a condition which may
  110. arise in a document and must be handled in a way which produces valid
  111. output. So, for example, you should be able to get C{<y attr="&lt;x /&gt;"
  112. />}. This should also be true for other XML/HTML meta-constructs such as
  113. comments and CDATA, so if you were to serialize a L{comment
  114. <twisted.web.template.Comment>} in an attribute you should get C{<y
  115. attr="&lt;-- comment --&gt;" />}. Therefore in order to capture these
  116. meta-characters, flattening is done with C{write} callable that is wrapped
  117. with L{writeWithAttributeEscaping}.
  118. The final case, and hopefully the much more common one as compared to
  119. serializing L{Tag <twisted.web.template.Tag>} and arbitrary L{IRenderable}
  120. objects within an attribute, is to serialize a simple string, and those
  121. should be passed through for L{writeWithAttributeEscaping} to quote
  122. without applying a second, redundant level of quoting.
  123. @param write: A callable which will be invoked with the escaped L{bytes}.
  124. @return: A callable that writes data with escaping.
  125. """
  126. def _write(data: bytes) -> None:
  127. write(escapeForContent(data).replace(b'"', b"&quot;"))
  128. return _write
  129. def escapedCDATA(data: Union[bytes, str]) -> bytes:
  130. """
  131. Escape CDATA for inclusion in a document.
  132. @param data: The string to escape.
  133. @return: The quoted form of C{data}. If C{data} is unicode, return a utf-8
  134. encoded string.
  135. """
  136. if isinstance(data, str):
  137. data = data.encode("utf-8")
  138. return data.replace(b"]]>", b"]]]]><![CDATA[>")
  139. def escapedComment(data: Union[bytes, str]) -> bytes:
  140. """
  141. Within comments the sequence C{-->} can be mistaken as the end of the comment.
  142. To ensure consistent parsing and valid output the sequence is replaced with C{--&gt;}.
  143. Furthermore, whitespace is added when a comment ends in a dash. This is done to break
  144. the connection of the ending C{-} with the closing C{-->}.
  145. @param data: The string to escape.
  146. @return: The quoted form of C{data}. If C{data} is unicode, return a utf-8
  147. encoded string.
  148. """
  149. if isinstance(data, str):
  150. data = data.encode("utf-8")
  151. data = data.replace(b"-->", b"--&gt;")
  152. if data and data[-1:] == b"-":
  153. data += b" "
  154. return data
  155. def _getSlotValue(
  156. name: str,
  157. slotData: Sequence[Optional[Mapping[str, Flattenable]]],
  158. default: Optional[Flattenable] = None,
  159. ) -> Flattenable:
  160. """
  161. Find the value of the named slot in the given stack of slot data.
  162. """
  163. for slotFrame in reversed(slotData):
  164. if slotFrame is not None and name in slotFrame:
  165. return slotFrame[name]
  166. else:
  167. if default is not None:
  168. return default
  169. raise UnfilledSlot(name)
  170. def _fork(d: Deferred[T]) -> Deferred[T]:
  171. """
  172. Create a new L{Deferred} based on C{d} that will fire and fail with C{d}'s
  173. result or error, but will not modify C{d}'s callback type.
  174. """
  175. d2: Deferred[T] = Deferred(lambda _: d.cancel())
  176. def callback(result: T) -> T:
  177. d2.callback(result)
  178. return result
  179. def errback(failure: Failure) -> Failure:
  180. d2.errback(failure)
  181. return failure
  182. d.addCallbacks(callback, errback)
  183. return d2
  184. def _flattenElement(
  185. request: Optional[IRequest],
  186. root: Flattenable,
  187. write: Callable[[bytes], object],
  188. slotData: List[Optional[Mapping[str, Flattenable]]],
  189. renderFactory: Optional[IRenderable],
  190. dataEscaper: Callable[[Union[bytes, str]], bytes],
  191. # This is annotated as Generator[T, None, None] instead of Iterator[T]
  192. # because mypy does not consider an Iterator to be an instance of
  193. # GeneratorType.
  194. ) -> Generator[Union[Generator[Any, Any, Any], Deferred[Flattenable]], None, None]:
  195. """
  196. Make C{root} slightly more flat by yielding all its immediate contents as
  197. strings, deferreds or generators that are recursive calls to itself.
  198. @param request: A request object which will be passed to
  199. L{IRenderable.render}.
  200. @param root: An object to be made flatter. This may be of type C{unicode},
  201. L{str}, L{slot}, L{Tag <twisted.web.template.Tag>}, L{tuple}, L{list},
  202. L{types.GeneratorType}, L{Deferred}, or an object that implements
  203. L{IRenderable}.
  204. @param write: A callable which will be invoked with each L{bytes} produced
  205. by flattening C{root}.
  206. @param slotData: A L{list} of L{dict} mapping L{str} slot names to data
  207. with which those slots will be replaced.
  208. @param renderFactory: If not L{None}, an object that provides
  209. L{IRenderable}.
  210. @param dataEscaper: A 1-argument callable which takes L{bytes} or
  211. L{unicode} and returns L{bytes}, quoted as appropriate for the
  212. rendering context. This is really only one of two values:
  213. L{attributeEscapingDoneOutside} or L{escapeForContent}, depending on
  214. whether the rendering context is within an attribute or not. See the
  215. explanation in L{writeWithAttributeEscaping}.
  216. @return: An iterator that eventually writes L{bytes} to C{write}.
  217. It can yield other iterators or L{Deferred}s; if it yields another
  218. iterator, the caller will iterate it; if it yields a L{Deferred},
  219. the result of that L{Deferred} will be another generator, in which
  220. case it is iterated. See L{_flattenTree} for the trampoline that
  221. consumes said values.
  222. """
  223. def keepGoing(
  224. newRoot: Flattenable,
  225. dataEscaper: Callable[[Union[bytes, str]], bytes] = dataEscaper,
  226. renderFactory: Optional[IRenderable] = renderFactory,
  227. write: Callable[[bytes], object] = write,
  228. ) -> Generator[Union[Flattenable, Deferred[Flattenable]], None, None]:
  229. return _flattenElement(
  230. request, newRoot, write, slotData, renderFactory, dataEscaper
  231. )
  232. def keepGoingAsync(result: Deferred[Flattenable]) -> Deferred[Flattenable]:
  233. return result.addCallback(keepGoing)
  234. if isinstance(root, (bytes, str)):
  235. write(dataEscaper(root))
  236. elif isinstance(root, slot):
  237. slotValue = _getSlotValue(root.name, slotData, root.default)
  238. yield keepGoing(slotValue)
  239. elif isinstance(root, CDATA):
  240. write(b"<![CDATA[")
  241. write(escapedCDATA(root.data))
  242. write(b"]]>")
  243. elif isinstance(root, Comment):
  244. write(b"<!--")
  245. write(escapedComment(root.data))
  246. write(b"-->")
  247. elif isinstance(root, Tag):
  248. slotData.append(root.slotData)
  249. rendererName = root.render
  250. if rendererName is not None:
  251. if renderFactory is None:
  252. raise ValueError(
  253. f'Tag wants to be rendered by method "{rendererName}" '
  254. f"but is not contained in any IRenderable"
  255. )
  256. rootClone = root.clone(False)
  257. rootClone.render = None
  258. renderMethod = renderFactory.lookupRenderMethod(rendererName)
  259. result = renderMethod(request, rootClone)
  260. yield keepGoing(result)
  261. slotData.pop()
  262. return
  263. if not root.tagName:
  264. yield keepGoing(root.children)
  265. return
  266. write(b"<")
  267. if isinstance(root.tagName, str):
  268. tagName = root.tagName.encode("ascii")
  269. else:
  270. tagName = root.tagName
  271. write(tagName)
  272. for k, v in root.attributes.items():
  273. if isinstance(k, str):
  274. k = k.encode("ascii")
  275. write(b" " + k + b'="')
  276. # Serialize the contents of the attribute, wrapping the results of
  277. # that serialization so that _everything_ is quoted.
  278. yield keepGoing(
  279. v, attributeEscapingDoneOutside, write=writeWithAttributeEscaping(write)
  280. )
  281. write(b'"')
  282. if root.children or nativeString(tagName) not in voidElements:
  283. write(b">")
  284. # Regardless of whether we're in an attribute or not, switch back
  285. # to the escapeForContent dataEscaper. The contents of a tag must
  286. # be quoted no matter what; in the top-level document, just so
  287. # they're valid, and if they're within an attribute, they have to
  288. # be quoted so that after applying the *un*-quoting required to re-
  289. # parse the tag within the attribute, all the quoting is still
  290. # correct.
  291. yield keepGoing(root.children, escapeForContent)
  292. write(b"</" + tagName + b">")
  293. else:
  294. write(b" />")
  295. elif isinstance(root, (tuple, list, GeneratorType)):
  296. for element in root:
  297. yield keepGoing(element)
  298. elif isinstance(root, CharRef):
  299. escaped = "&#%d;" % (root.ordinal,)
  300. write(escaped.encode("ascii"))
  301. elif isinstance(root, Deferred):
  302. yield keepGoingAsync(_fork(root))
  303. elif iscoroutine(root):
  304. yield keepGoingAsync(
  305. Deferred.fromCoroutine(
  306. cast(Coroutine[Deferred[Flattenable], object, Flattenable], root)
  307. )
  308. )
  309. elif IRenderable.providedBy(root):
  310. result = root.render(request)
  311. yield keepGoing(result, renderFactory=root)
  312. else:
  313. raise UnsupportedType(root)
  314. async def _flattenTree(
  315. request: Optional[IRequest], root: Flattenable, write: Callable[[bytes], object]
  316. ) -> None:
  317. """
  318. Make C{root} into an iterable of L{bytes} and L{Deferred} by doing a depth
  319. first traversal of the tree.
  320. @param request: A request object which will be passed to
  321. L{IRenderable.render}.
  322. @param root: An object to be made flatter. This may be of type C{unicode},
  323. L{bytes}, L{slot}, L{Tag <twisted.web.template.Tag>}, L{tuple},
  324. L{list}, L{types.GeneratorType}, L{Deferred}, or something providing
  325. L{IRenderable}.
  326. @param write: A callable which will be invoked with each L{bytes} produced
  327. by flattening C{root}.
  328. @return: A C{Deferred}-returning coroutine that resolves to C{None}.
  329. """
  330. buf = []
  331. bufSize = 0
  332. # Accumulate some bytes up to the buffer size so that we don't annoy the
  333. # upstream writer with a million tiny string.
  334. def bufferedWrite(bs: bytes) -> None:
  335. nonlocal bufSize
  336. buf.append(bs)
  337. bufSize += len(bs)
  338. if bufSize >= BUFFER_SIZE:
  339. flushBuffer()
  340. # Deliver the buffered content to the upstream writer as a single string.
  341. # This is how a "big enough" buffer gets delivered, how a buffer of any
  342. # size is delivered before execution is suspended to wait for an
  343. # asynchronous value, and how anything left in the buffer when we're
  344. # finished is delivered.
  345. def flushBuffer() -> None:
  346. nonlocal bufSize
  347. if bufSize > 0:
  348. write(b"".join(buf))
  349. del buf[:]
  350. bufSize = 0
  351. stack: List[Generator[Any, Any, Any]] = [
  352. _flattenElement(request, root, bufferedWrite, [], None, escapeForContent)
  353. ]
  354. while stack:
  355. try:
  356. frame = stack[-1].gi_frame
  357. element = next(stack[-1])
  358. if isinstance(element, Deferred):
  359. # Before suspending flattening for an unknown amount of time,
  360. # flush whatever data we have collected so far.
  361. flushBuffer()
  362. element = await element
  363. except StopIteration:
  364. stack.pop()
  365. except Exception as e:
  366. stack.pop()
  367. roots = []
  368. for generator in stack:
  369. roots.append(generator.gi_frame.f_locals["root"])
  370. roots.append(frame.f_locals["root"])
  371. raise FlattenerError(e, roots, extract_tb(exc_info()[2]))
  372. else:
  373. stack.append(element)
  374. # Flush any data that remains in the buffer before finishing.
  375. flushBuffer()
  376. def flatten(
  377. request: Optional[IRequest], root: Flattenable, write: Callable[[bytes], object]
  378. ) -> Deferred[None]:
  379. """
  380. Incrementally write out a string representation of C{root} using C{write}.
  381. In order to create a string representation, C{root} will be decomposed into
  382. simpler objects which will themselves be decomposed and so on until strings
  383. or objects which can easily be converted to strings are encountered.
  384. @param request: A request object which will be passed to the C{render}
  385. method of any L{IRenderable} provider which is encountered.
  386. @param root: An object to be made flatter. This may be of type L{str},
  387. L{bytes}, L{slot}, L{Tag <twisted.web.template.Tag>}, L{tuple},
  388. L{list}, L{types.GeneratorType}, L{Deferred}, or something that
  389. provides L{IRenderable}.
  390. @param write: A callable which will be invoked with each L{bytes} produced
  391. by flattening C{root}.
  392. @return: A L{Deferred} which will be called back with C{None} when C{root}
  393. has been completely flattened into C{write} or which will be errbacked
  394. if an unexpected exception occurs.
  395. """
  396. return ensureDeferred(_flattenTree(request, root, write))
  397. def flattenString(request: Optional[IRequest], root: Flattenable) -> Deferred[bytes]:
  398. """
  399. Collate a string representation of C{root} into a single string.
  400. This is basically gluing L{flatten} to an L{io.BytesIO} and returning
  401. the results. See L{flatten} for the exact meanings of C{request} and
  402. C{root}.
  403. @return: A L{Deferred} which will be called back with a single UTF-8 encoded
  404. string as its result when C{root} has been completely flattened or which
  405. will be errbacked if an unexpected exception occurs.
  406. """
  407. io = BytesIO()
  408. d = flatten(request, root, io.write)
  409. d.addCallback(lambda _: io.getvalue())
  410. return cast(Deferred[bytes], d)