xpath.py 9.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337
  1. # -*- test-case-name: twisted.words.test.test_xpath -*-
  2. #
  3. # Copyright (c) Twisted Matrix Laboratories.
  4. # See LICENSE for details.
  5. """
  6. XPath query support.
  7. This module provides L{XPathQuery} to match
  8. L{domish.Element<twisted.words.xish.domish.Element>} instances against
  9. XPath-like expressions.
  10. """
  11. from io import StringIO
  12. class LiteralValue(str):
  13. def value(self, elem):
  14. return self
  15. class IndexValue:
  16. def __init__(self, index):
  17. self.index = int(index) - 1
  18. def value(self, elem):
  19. return elem.children[self.index]
  20. class AttribValue:
  21. def __init__(self, attribname):
  22. self.attribname = attribname
  23. if self.attribname == "xmlns":
  24. self.value = self.value_ns
  25. def value_ns(self, elem):
  26. return elem.uri
  27. def value(self, elem):
  28. if self.attribname in elem.attributes:
  29. return elem.attributes[self.attribname]
  30. else:
  31. return None
  32. class CompareValue:
  33. def __init__(self, lhs, op, rhs):
  34. self.lhs = lhs
  35. self.rhs = rhs
  36. if op == "=":
  37. self.value = self._compareEqual
  38. else:
  39. self.value = self._compareNotEqual
  40. def _compareEqual(self, elem):
  41. return self.lhs.value(elem) == self.rhs.value(elem)
  42. def _compareNotEqual(self, elem):
  43. return self.lhs.value(elem) != self.rhs.value(elem)
  44. class BooleanValue:
  45. """
  46. Provide boolean XPath expression operators.
  47. @ivar lhs: Left hand side expression of the operator.
  48. @ivar op: The operator. One of C{'and'}, C{'or'}.
  49. @ivar rhs: Right hand side expression of the operator.
  50. @ivar value: Reference to the method that will calculate the value of
  51. this expression given an element.
  52. """
  53. def __init__(self, lhs, op, rhs):
  54. self.lhs = lhs
  55. self.rhs = rhs
  56. if op == "and":
  57. self.value = self._booleanAnd
  58. else:
  59. self.value = self._booleanOr
  60. def _booleanAnd(self, elem):
  61. """
  62. Calculate boolean and of the given expressions given an element.
  63. @param elem: The element to calculate the value of the expression from.
  64. """
  65. return self.lhs.value(elem) and self.rhs.value(elem)
  66. def _booleanOr(self, elem):
  67. """
  68. Calculate boolean or of the given expressions given an element.
  69. @param elem: The element to calculate the value of the expression from.
  70. """
  71. return self.lhs.value(elem) or self.rhs.value(elem)
  72. def Function(fname):
  73. """
  74. Internal method which selects the function object
  75. """
  76. klassname = "_%s_Function" % fname
  77. c = globals()[klassname]()
  78. return c
  79. class _not_Function:
  80. def __init__(self):
  81. self.baseValue = None
  82. def setParams(self, baseValue):
  83. self.baseValue = baseValue
  84. def value(self, elem):
  85. return not self.baseValue.value(elem)
  86. class _text_Function:
  87. def setParams(self):
  88. pass
  89. def value(self, elem):
  90. return str(elem)
  91. class _Location:
  92. def __init__(self):
  93. self.predicates = []
  94. self.elementName = None
  95. self.childLocation = None
  96. def matchesPredicates(self, elem):
  97. if self.elementName != None and self.elementName != elem.name:
  98. return 0
  99. for p in self.predicates:
  100. if not p.value(elem):
  101. return 0
  102. return 1
  103. def matches(self, elem):
  104. if not self.matchesPredicates(elem):
  105. return 0
  106. if self.childLocation != None:
  107. for c in elem.elements():
  108. if self.childLocation.matches(c):
  109. return 1
  110. else:
  111. return 1
  112. return 0
  113. def queryForString(self, elem, resultbuf):
  114. if not self.matchesPredicates(elem):
  115. return
  116. if self.childLocation != None:
  117. for c in elem.elements():
  118. self.childLocation.queryForString(c, resultbuf)
  119. else:
  120. resultbuf.write(str(elem))
  121. def queryForNodes(self, elem, resultlist):
  122. if not self.matchesPredicates(elem):
  123. return
  124. if self.childLocation != None:
  125. for c in elem.elements():
  126. self.childLocation.queryForNodes(c, resultlist)
  127. else:
  128. resultlist.append(elem)
  129. def queryForStringList(self, elem, resultlist):
  130. if not self.matchesPredicates(elem):
  131. return
  132. if self.childLocation != None:
  133. for c in elem.elements():
  134. self.childLocation.queryForStringList(c, resultlist)
  135. else:
  136. for c in elem.children:
  137. if isinstance(c, str):
  138. resultlist.append(c)
  139. class _AnyLocation:
  140. def __init__(self):
  141. self.predicates = []
  142. self.elementName = None
  143. self.childLocation = None
  144. def matchesPredicates(self, elem):
  145. for p in self.predicates:
  146. if not p.value(elem):
  147. return 0
  148. return 1
  149. def listParents(self, elem, parentlist):
  150. if elem.parent != None:
  151. self.listParents(elem.parent, parentlist)
  152. parentlist.append(elem.name)
  153. def isRootMatch(self, elem):
  154. if (
  155. self.elementName == None or self.elementName == elem.name
  156. ) and self.matchesPredicates(elem):
  157. if self.childLocation != None:
  158. for c in elem.elements():
  159. if self.childLocation.matches(c):
  160. return True
  161. else:
  162. return True
  163. return False
  164. def findFirstRootMatch(self, elem):
  165. if (
  166. self.elementName == None or self.elementName == elem.name
  167. ) and self.matchesPredicates(elem):
  168. # Thus far, the name matches and the predicates match,
  169. # now check into the children and find the first one
  170. # that matches the rest of the structure
  171. # the rest of the structure
  172. if self.childLocation != None:
  173. for c in elem.elements():
  174. if self.childLocation.matches(c):
  175. return c
  176. return None
  177. else:
  178. # No children locations; this is a match!
  179. return elem
  180. else:
  181. # Ok, predicates or name didn't match, so we need to start
  182. # down each child and treat it as the root and try
  183. # again
  184. for c in elem.elements():
  185. if self.matches(c):
  186. return c
  187. # No children matched...
  188. return None
  189. def matches(self, elem):
  190. if self.isRootMatch(elem):
  191. return True
  192. else:
  193. # Ok, initial element isn't an exact match, walk
  194. # down each child and treat it as the root and try
  195. # again
  196. for c in elem.elements():
  197. if self.matches(c):
  198. return True
  199. # No children matched...
  200. return False
  201. def queryForString(self, elem, resultbuf):
  202. raise NotImplementedError("queryForString is not implemented for any location")
  203. def queryForNodes(self, elem, resultlist):
  204. # First check to see if _this_ element is a root
  205. if self.isRootMatch(elem):
  206. resultlist.append(elem)
  207. # Now check each child
  208. for c in elem.elements():
  209. self.queryForNodes(c, resultlist)
  210. def queryForStringList(self, elem, resultlist):
  211. if self.isRootMatch(elem):
  212. for c in elem.children:
  213. if isinstance(c, str):
  214. resultlist.append(c)
  215. for c in elem.elements():
  216. self.queryForStringList(c, resultlist)
  217. class XPathQuery:
  218. def __init__(self, queryStr):
  219. self.queryStr = queryStr
  220. # Prevent a circular import issue, as xpathparser imports this module.
  221. from twisted.words.xish.xpathparser import XPathParser, XPathParserScanner
  222. parser = XPathParser(XPathParserScanner(queryStr))
  223. self.baseLocation = getattr(parser, "XPATH")()
  224. def __hash__(self):
  225. return self.queryStr.__hash__()
  226. def matches(self, elem):
  227. return self.baseLocation.matches(elem)
  228. def queryForString(self, elem):
  229. result = StringIO()
  230. self.baseLocation.queryForString(elem, result)
  231. return result.getvalue()
  232. def queryForNodes(self, elem):
  233. result = []
  234. self.baseLocation.queryForNodes(elem, result)
  235. if len(result) == 0:
  236. return None
  237. else:
  238. return result
  239. def queryForStringList(self, elem):
  240. result = []
  241. self.baseLocation.queryForStringList(elem, result)
  242. if len(result) == 0:
  243. return None
  244. else:
  245. return result
  246. __internedQueries = {}
  247. def internQuery(queryString):
  248. if queryString not in __internedQueries:
  249. __internedQueries[queryString] = XPathQuery(queryString)
  250. return __internedQueries[queryString]
  251. def matches(xpathstr, elem):
  252. return internQuery(xpathstr).matches(elem)
  253. def queryForStringList(xpathstr, elem):
  254. return internQuery(xpathstr).queryForStringList(elem)
  255. def queryForString(xpathstr, elem):
  256. return internQuery(xpathstr).queryForString(elem)
  257. def queryForNodes(xpathstr, elem):
  258. return internQuery(xpathstr).queryForNodes(elem)