xpath.py 9.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337
  1. # -*- test-case-name: twisted.words.test.test_xpath -*-
  2. #
  3. # Copyright (c) Twisted Matrix Laboratories.
  4. # See LICENSE for details.
  5. """
  6. XPath query support.
  7. This module provides L{XPathQuery} to match
  8. L{domish.Element<twisted.words.xish.domish.Element>} instances against
  9. XPath-like expressions.
  10. """
  11. from __future__ import absolute_import, division
  12. from io import StringIO
  13. from twisted.python.compat import StringType, unicode
  14. class LiteralValue(unicode):
  15. def value(self, elem):
  16. return self
  17. class IndexValue:
  18. def __init__(self, index):
  19. self.index = int(index) - 1
  20. def value(self, elem):
  21. return elem.children[self.index]
  22. class AttribValue:
  23. def __init__(self, attribname):
  24. self.attribname = attribname
  25. if self.attribname == "xmlns":
  26. self.value = self.value_ns
  27. def value_ns(self, elem):
  28. return elem.uri
  29. def value(self, elem):
  30. if self.attribname in elem.attributes:
  31. return elem.attributes[self.attribname]
  32. else:
  33. return None
  34. class CompareValue:
  35. def __init__(self, lhs, op, rhs):
  36. self.lhs = lhs
  37. self.rhs = rhs
  38. if op == "=":
  39. self.value = self._compareEqual
  40. else:
  41. self.value = self._compareNotEqual
  42. def _compareEqual(self, elem):
  43. return self.lhs.value(elem) == self.rhs.value(elem)
  44. def _compareNotEqual(self, elem):
  45. return self.lhs.value(elem) != self.rhs.value(elem)
  46. class BooleanValue:
  47. """
  48. Provide boolean XPath expression operators.
  49. @ivar lhs: Left hand side expression of the operator.
  50. @ivar op: The operator. One of C{'and'}, C{'or'}.
  51. @ivar rhs: Right hand side expression of the operator.
  52. @ivar value: Reference to the method that will calculate the value of
  53. this expression given an element.
  54. """
  55. def __init__(self, lhs, op, rhs):
  56. self.lhs = lhs
  57. self.rhs = rhs
  58. if op == "and":
  59. self.value = self._booleanAnd
  60. else:
  61. self.value = self._booleanOr
  62. def _booleanAnd(self, elem):
  63. """
  64. Calculate boolean and of the given expressions given an element.
  65. @param elem: The element to calculate the value of the expression from.
  66. """
  67. return self.lhs.value(elem) and self.rhs.value(elem)
  68. def _booleanOr(self, elem):
  69. """
  70. Calculate boolean or of the given expressions given an element.
  71. @param elem: The element to calculate the value of the expression from.
  72. """
  73. return self.lhs.value(elem) or self.rhs.value(elem)
  74. def Function(fname):
  75. """
  76. Internal method which selects the function object
  77. """
  78. klassname = "_%s_Function" % fname
  79. c = globals()[klassname]()
  80. return c
  81. class _not_Function:
  82. def __init__(self):
  83. self.baseValue = None
  84. def setParams(self, baseValue):
  85. self.baseValue = baseValue
  86. def value(self, elem):
  87. return not self.baseValue.value(elem)
  88. class _text_Function:
  89. def setParams(self):
  90. pass
  91. def value(self, elem):
  92. return unicode(elem)
  93. class _Location:
  94. def __init__(self):
  95. self.predicates = []
  96. self.elementName = None
  97. self.childLocation = None
  98. def matchesPredicates(self, elem):
  99. if self.elementName != None and self.elementName != elem.name:
  100. return 0
  101. for p in self.predicates:
  102. if not p.value(elem):
  103. return 0
  104. return 1
  105. def matches(self, elem):
  106. if not self.matchesPredicates(elem):
  107. return 0
  108. if self.childLocation != None:
  109. for c in elem.elements():
  110. if self.childLocation.matches(c):
  111. return 1
  112. else:
  113. return 1
  114. return 0
  115. def queryForString(self, elem, resultbuf):
  116. if not self.matchesPredicates(elem):
  117. return
  118. if self.childLocation != None:
  119. for c in elem.elements():
  120. self.childLocation.queryForString(c, resultbuf)
  121. else:
  122. resultbuf.write(unicode(elem))
  123. def queryForNodes(self, elem, resultlist):
  124. if not self.matchesPredicates(elem):
  125. return
  126. if self.childLocation != None:
  127. for c in elem.elements():
  128. self.childLocation.queryForNodes(c, resultlist)
  129. else:
  130. resultlist.append(elem)
  131. def queryForStringList(self, elem, resultlist):
  132. if not self.matchesPredicates(elem):
  133. return
  134. if self.childLocation != None:
  135. for c in elem.elements():
  136. self.childLocation.queryForStringList(c, resultlist)
  137. else:
  138. for c in elem.children:
  139. if isinstance(c, StringType):
  140. resultlist.append(c)
  141. class _AnyLocation:
  142. def __init__(self):
  143. self.predicates = []
  144. self.elementName = None
  145. self.childLocation = None
  146. def matchesPredicates(self, elem):
  147. for p in self.predicates:
  148. if not p.value(elem):
  149. return 0
  150. return 1
  151. def listParents(self, elem, parentlist):
  152. if elem.parent != None:
  153. self.listParents(elem.parent, parentlist)
  154. parentlist.append(elem.name)
  155. def isRootMatch(self, elem):
  156. if (self.elementName == None or self.elementName == elem.name) and \
  157. self.matchesPredicates(elem):
  158. if self.childLocation != None:
  159. for c in elem.elements():
  160. if self.childLocation.matches(c):
  161. return True
  162. else:
  163. return True
  164. return False
  165. def findFirstRootMatch(self, elem):
  166. if (self.elementName == None or self.elementName == elem.name) and \
  167. self.matchesPredicates(elem):
  168. # Thus far, the name matches and the predicates match,
  169. # now check into the children and find the first one
  170. # that matches the rest of the structure
  171. # the rest of the structure
  172. if self.childLocation != None:
  173. for c in elem.elements():
  174. if self.childLocation.matches(c):
  175. return c
  176. return None
  177. else:
  178. # No children locations; this is a match!
  179. return elem
  180. else:
  181. # Ok, predicates or name didn't match, so we need to start
  182. # down each child and treat it as the root and try
  183. # again
  184. for c in elem.elements():
  185. if self.matches(c):
  186. return c
  187. # No children matched...
  188. return None
  189. def matches(self, elem):
  190. if self.isRootMatch(elem):
  191. return True
  192. else:
  193. # Ok, initial element isn't an exact match, walk
  194. # down each child and treat it as the root and try
  195. # again
  196. for c in elem.elements():
  197. if self.matches(c):
  198. return True
  199. # No children matched...
  200. return False
  201. def queryForString(self, elem, resultbuf):
  202. raise NotImplementedError(
  203. "queryForString is not implemented for any location")
  204. def queryForNodes(self, elem, resultlist):
  205. # First check to see if _this_ element is a root
  206. if self.isRootMatch(elem):
  207. resultlist.append(elem)
  208. # Now check each child
  209. for c in elem.elements():
  210. self.queryForNodes(c, resultlist)
  211. def queryForStringList(self, elem, resultlist):
  212. if self.isRootMatch(elem):
  213. for c in elem.children:
  214. if isinstance(c, StringType):
  215. resultlist.append(c)
  216. for c in elem.elements():
  217. self.queryForStringList(c, resultlist)
  218. class XPathQuery:
  219. def __init__(self, queryStr):
  220. self.queryStr = queryStr
  221. # Prevent a circular import issue, as xpathparser imports this module.
  222. from twisted.words.xish.xpathparser import (XPathParser,
  223. XPathParserScanner)
  224. parser = XPathParser(XPathParserScanner(queryStr))
  225. self.baseLocation = getattr(parser, 'XPATH')()
  226. def __hash__(self):
  227. return self.queryStr.__hash__()
  228. def matches(self, elem):
  229. return self.baseLocation.matches(elem)
  230. def queryForString(self, elem):
  231. result = StringIO()
  232. self.baseLocation.queryForString(elem, result)
  233. return result.getvalue()
  234. def queryForNodes(self, elem):
  235. result = []
  236. self.baseLocation.queryForNodes(elem, result)
  237. if len(result) == 0:
  238. return None
  239. else:
  240. return result
  241. def queryForStringList(self, elem):
  242. result = []
  243. self.baseLocation.queryForStringList(elem, result)
  244. if len(result) == 0:
  245. return None
  246. else:
  247. return result
  248. __internedQueries = {}
  249. def internQuery(queryString):
  250. if queryString not in __internedQueries:
  251. __internedQueries[queryString] = XPathQuery(queryString)
  252. return __internedQueries[queryString]
  253. def matches(xpathstr, elem):
  254. return internQuery(xpathstr).matches(elem)
  255. def queryForStringList(xpathstr, elem):
  256. return internQuery(xpathstr).queryForStringList(elem)
  257. def queryForString(xpathstr, elem):
  258. return internQuery(xpathstr).queryForString(elem)
  259. def queryForNodes(xpathstr, elem):
  260. return internQuery(xpathstr).queryForNodes(elem)