ElementInclude.py 6.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186
  1. #
  2. # ElementTree
  3. # $Id: ElementInclude.py 3375 2008-02-13 08:05:08Z fredrik $
  4. #
  5. # limited xinclude support for element trees
  6. #
  7. # history:
  8. # 2003-08-15 fl created
  9. # 2003-11-14 fl fixed default loader
  10. #
  11. # Copyright (c) 2003-2004 by Fredrik Lundh. All rights reserved.
  12. #
  13. # fredrik@pythonware.com
  14. # http://www.pythonware.com
  15. #
  16. # --------------------------------------------------------------------
  17. # The ElementTree toolkit is
  18. #
  19. # Copyright (c) 1999-2008 by Fredrik Lundh
  20. #
  21. # By obtaining, using, and/or copying this software and/or its
  22. # associated documentation, you agree that you have read, understood,
  23. # and will comply with the following terms and conditions:
  24. #
  25. # Permission to use, copy, modify, and distribute this software and
  26. # its associated documentation for any purpose and without fee is
  27. # hereby granted, provided that the above copyright notice appears in
  28. # all copies, and that both that copyright notice and this permission
  29. # notice appear in supporting documentation, and that the name of
  30. # Secret Labs AB or the author not be used in advertising or publicity
  31. # pertaining to distribution of the software without specific, written
  32. # prior permission.
  33. #
  34. # SECRET LABS AB AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD
  35. # TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANT-
  36. # ABILITY AND FITNESS. IN NO EVENT SHALL SECRET LABS AB OR THE AUTHOR
  37. # BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY
  38. # DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
  39. # WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
  40. # ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
  41. # OF THIS SOFTWARE.
  42. # --------------------------------------------------------------------
  43. # Licensed to PSF under a Contributor Agreement.
  44. # See https://www.python.org/psf/license for licensing details.
  45. ##
  46. # Limited XInclude support for the ElementTree package.
  47. ##
  48. import copy
  49. from . import ElementTree
  50. from urllib.parse import urljoin
  51. XINCLUDE = "{http://www.w3.org/2001/XInclude}"
  52. XINCLUDE_INCLUDE = XINCLUDE + "include"
  53. XINCLUDE_FALLBACK = XINCLUDE + "fallback"
  54. # For security reasons, the inclusion depth is limited to this read-only value by default.
  55. DEFAULT_MAX_INCLUSION_DEPTH = 6
  56. ##
  57. # Fatal include error.
  58. class FatalIncludeError(SyntaxError):
  59. pass
  60. class LimitedRecursiveIncludeError(FatalIncludeError):
  61. pass
  62. ##
  63. # Default loader. This loader reads an included resource from disk.
  64. #
  65. # @param href Resource reference.
  66. # @param parse Parse mode. Either "xml" or "text".
  67. # @param encoding Optional text encoding (UTF-8 by default for "text").
  68. # @return The expanded resource. If the parse mode is "xml", this
  69. # is an Element instance. If the parse mode is "text", this
  70. # is a string. If the loader fails, it can return None
  71. # or raise an OSError exception.
  72. # @throws OSError If the loader fails to load the resource.
  73. def default_loader(href, parse, encoding=None):
  74. if parse == "xml":
  75. with open(href, 'rb') as file:
  76. data = ElementTree.parse(file).getroot()
  77. else:
  78. if not encoding:
  79. encoding = 'UTF-8'
  80. with open(href, 'r', encoding=encoding) as file:
  81. data = file.read()
  82. return data
  83. ##
  84. # Expand XInclude directives.
  85. #
  86. # @param elem Root Element or any ElementTree of a tree to be expanded
  87. # @param loader Optional resource loader. If omitted, it defaults
  88. # to {@link default_loader}. If given, it should be a callable
  89. # that implements the same interface as <b>default_loader</b>.
  90. # @param base_url The base URL of the original file, to resolve
  91. # relative include file references.
  92. # @param max_depth The maximum number of recursive inclusions.
  93. # Limited to reduce the risk of malicious content explosion.
  94. # Pass None to disable the limitation.
  95. # @throws LimitedRecursiveIncludeError If the {@link max_depth} was exceeded.
  96. # @throws FatalIncludeError If the function fails to include a given
  97. # resource, or if the tree contains malformed XInclude elements.
  98. # @throws OSError If the function fails to load a given resource.
  99. # @throws ValueError If negative {@link max_depth} is passed.
  100. # @returns None. Modifies tree pointed by {@link elem}
  101. def include(elem, loader=None, base_url=None,
  102. max_depth=DEFAULT_MAX_INCLUSION_DEPTH):
  103. if max_depth is None:
  104. max_depth = -1
  105. elif max_depth < 0:
  106. raise ValueError("expected non-negative depth or None for 'max_depth', got %r" % max_depth)
  107. if hasattr(elem, 'getroot'):
  108. elem = elem.getroot()
  109. if loader is None:
  110. loader = default_loader
  111. _include(elem, loader, base_url, max_depth, set())
  112. def _include(elem, loader, base_url, max_depth, _parent_hrefs):
  113. # look for xinclude elements
  114. i = 0
  115. while i < len(elem):
  116. e = elem[i]
  117. if e.tag == XINCLUDE_INCLUDE:
  118. # process xinclude directive
  119. href = e.get("href")
  120. if base_url:
  121. href = urljoin(base_url, href)
  122. parse = e.get("parse", "xml")
  123. if parse == "xml":
  124. if href in _parent_hrefs:
  125. raise FatalIncludeError("recursive include of %s" % href)
  126. if max_depth == 0:
  127. raise LimitedRecursiveIncludeError(
  128. "maximum xinclude depth reached when including file %s" % href)
  129. _parent_hrefs.add(href)
  130. node = loader(href, parse)
  131. if node is None:
  132. raise FatalIncludeError(
  133. "cannot load %r as %r" % (href, parse)
  134. )
  135. node = copy.copy(node) # FIXME: this makes little sense with recursive includes
  136. _include(node, loader, href, max_depth - 1, _parent_hrefs)
  137. _parent_hrefs.remove(href)
  138. if e.tail:
  139. node.tail = (node.tail or "") + e.tail
  140. elem[i] = node
  141. elif parse == "text":
  142. text = loader(href, parse, e.get("encoding"))
  143. if text is None:
  144. raise FatalIncludeError(
  145. "cannot load %r as %r" % (href, parse)
  146. )
  147. if e.tail:
  148. text += e.tail
  149. if i:
  150. node = elem[i-1]
  151. node.tail = (node.tail or "") + text
  152. else:
  153. elem.text = (elem.text or "") + text
  154. del elem[i]
  155. continue
  156. else:
  157. raise FatalIncludeError(
  158. "unknown parse type in xi:include tag (%r)" % parse
  159. )
  160. elif e.tag == XINCLUDE_FALLBACK:
  161. raise FatalIncludeError(
  162. "xi:fallback tag must be child of xi:include (%r)" % e.tag
  163. )
  164. else:
  165. _include(e, loader, base_url, max_depth, _parent_hrefs)
  166. i += 1