core.py 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449
  1. import ast
  2. import builtins
  3. import operator
  4. from collections import ChainMap, OrderedDict, deque
  5. from contextlib import suppress
  6. from types import FrameType
  7. from typing import Any, Tuple, Iterable, List, Mapping, Dict, Union, Set
  8. from pure_eval.my_getattr_static import getattr_static
  9. from pure_eval.utils import (
  10. CannotEval,
  11. has_ast_name,
  12. copy_ast_without_context,
  13. is_standard_types,
  14. of_standard_types,
  15. is_any,
  16. of_type,
  17. ensure_dict,
  18. )
  19. class Evaluator:
  20. def __init__(self, names: Mapping[str, Any]):
  21. """
  22. Construct a new evaluator with the given variable names.
  23. This is a low level API, typically you will use `Evaluator.from_frame(frame)`.
  24. :param names: a mapping from variable names to their values.
  25. """
  26. self.names = names
  27. self._cache = {} # type: Dict[ast.expr, Any]
  28. @classmethod
  29. def from_frame(cls, frame: FrameType) -> 'Evaluator':
  30. """
  31. Construct an Evaluator that can look up variables from the given frame.
  32. :param frame: a frame object, e.g. from a traceback or `inspect.currentframe().f_back`.
  33. """
  34. return cls(ChainMap(
  35. ensure_dict(frame.f_locals),
  36. ensure_dict(frame.f_globals),
  37. ensure_dict(frame.f_builtins),
  38. ))
  39. def __getitem__(self, node: ast.expr) -> Any:
  40. """
  41. Find the value of the given node.
  42. If it cannot be evaluated safely, this raises `CannotEval`.
  43. The result is cached either way.
  44. :param node: an AST expression to evaluate
  45. :return: the value of the node
  46. """
  47. if not isinstance(node, ast.expr):
  48. raise TypeError("node should be an ast.expr, not {!r}".format(type(node).__name__))
  49. with suppress(KeyError):
  50. result = self._cache[node]
  51. if result is CannotEval:
  52. raise CannotEval
  53. else:
  54. return result
  55. try:
  56. self._cache[node] = result = self._handle(node)
  57. return result
  58. except CannotEval:
  59. self._cache[node] = CannotEval
  60. raise
  61. def _handle(self, node: ast.expr) -> Any:
  62. """
  63. This is where the evaluation happens.
  64. Users should use `__getitem__`, i.e. `evaluator[node]`,
  65. as it provides caching.
  66. :param node: an AST expression to evaluate
  67. :return: the value of the node
  68. """
  69. with suppress(Exception):
  70. return ast.literal_eval(node)
  71. if isinstance(node, ast.Name):
  72. try:
  73. return self.names[node.id]
  74. except KeyError:
  75. raise CannotEval
  76. elif isinstance(node, ast.Attribute):
  77. value = self[node.value]
  78. attr = node.attr
  79. return getattr_static(value, attr)
  80. elif isinstance(node, ast.Subscript):
  81. return self._handle_subscript(node)
  82. elif isinstance(node, (ast.List, ast.Tuple, ast.Set, ast.Dict)):
  83. return self._handle_container(node)
  84. elif isinstance(node, ast.UnaryOp):
  85. return self._handle_unary(node)
  86. elif isinstance(node, ast.BinOp):
  87. return self._handle_binop(node)
  88. elif isinstance(node, ast.BoolOp):
  89. return self._handle_boolop(node)
  90. elif isinstance(node, ast.Compare):
  91. return self._handle_compare(node)
  92. elif isinstance(node, ast.Call):
  93. return self._handle_call(node)
  94. raise CannotEval
  95. def _handle_call(self, node):
  96. if node.keywords:
  97. raise CannotEval
  98. func = self[node.func]
  99. args = [self[arg] for arg in node.args]
  100. if (
  101. is_any(
  102. func,
  103. slice,
  104. int,
  105. range,
  106. round,
  107. complex,
  108. list,
  109. tuple,
  110. abs,
  111. hex,
  112. bin,
  113. oct,
  114. bool,
  115. ord,
  116. float,
  117. len,
  118. chr,
  119. )
  120. or len(args) == 0
  121. and is_any(func, set, dict, str, frozenset, bytes, bytearray, object)
  122. or len(args) >= 2
  123. and is_any(func, str, divmod, bytes, bytearray, pow)
  124. ):
  125. args = [
  126. of_standard_types(arg, check_dict_values=False, deep=False)
  127. for arg in args
  128. ]
  129. try:
  130. return func(*args)
  131. except Exception as e:
  132. raise CannotEval from e
  133. if len(args) == 1:
  134. arg = args[0]
  135. if is_any(func, id, type):
  136. try:
  137. return func(arg)
  138. except Exception as e:
  139. raise CannotEval from e
  140. if is_any(func, all, any, sum):
  141. of_type(arg, tuple, frozenset, list, set, dict, OrderedDict, deque)
  142. for x in arg:
  143. of_standard_types(x, check_dict_values=False, deep=False)
  144. try:
  145. return func(arg)
  146. except Exception as e:
  147. raise CannotEval from e
  148. if is_any(
  149. func, sorted, min, max, hash, set, dict, ascii, str, repr, frozenset
  150. ):
  151. of_standard_types(arg, check_dict_values=True, deep=True)
  152. try:
  153. return func(arg)
  154. except Exception as e:
  155. raise CannotEval from e
  156. raise CannotEval
  157. def _handle_compare(self, node):
  158. left = self[node.left]
  159. result = True
  160. for op, right in zip(node.ops, node.comparators):
  161. right = self[right]
  162. op_type = type(op)
  163. op_func = {
  164. ast.Eq: operator.eq,
  165. ast.NotEq: operator.ne,
  166. ast.Lt: operator.lt,
  167. ast.LtE: operator.le,
  168. ast.Gt: operator.gt,
  169. ast.GtE: operator.ge,
  170. ast.Is: operator.is_,
  171. ast.IsNot: operator.is_not,
  172. ast.In: (lambda a, b: a in b),
  173. ast.NotIn: (lambda a, b: a not in b),
  174. }[op_type]
  175. if op_type not in (ast.Is, ast.IsNot):
  176. of_standard_types(left, check_dict_values=False, deep=True)
  177. of_standard_types(right, check_dict_values=False, deep=True)
  178. try:
  179. result = op_func(left, right)
  180. except Exception as e:
  181. raise CannotEval from e
  182. if not result:
  183. return result
  184. left = right
  185. return result
  186. def _handle_boolop(self, node):
  187. left = of_standard_types(
  188. self[node.values[0]], check_dict_values=False, deep=False
  189. )
  190. for right in node.values[1:]:
  191. # We need short circuiting so that the whole operation can be evaluated
  192. # even if the right operand can't
  193. if isinstance(node.op, ast.Or):
  194. left = left or of_standard_types(
  195. self[right], check_dict_values=False, deep=False
  196. )
  197. else:
  198. assert isinstance(node.op, ast.And)
  199. left = left and of_standard_types(
  200. self[right], check_dict_values=False, deep=False
  201. )
  202. return left
  203. def _handle_binop(self, node):
  204. op_type = type(node.op)
  205. op = {
  206. ast.Add: operator.add,
  207. ast.Sub: operator.sub,
  208. ast.Mult: operator.mul,
  209. ast.Div: operator.truediv,
  210. ast.FloorDiv: operator.floordiv,
  211. ast.Mod: operator.mod,
  212. ast.Pow: operator.pow,
  213. ast.LShift: operator.lshift,
  214. ast.RShift: operator.rshift,
  215. ast.BitOr: operator.or_,
  216. ast.BitXor: operator.xor,
  217. ast.BitAnd: operator.and_,
  218. }.get(op_type)
  219. if not op:
  220. raise CannotEval
  221. left = self[node.left]
  222. hash_type = is_any(type(left), set, frozenset, dict, OrderedDict)
  223. left = of_standard_types(left, check_dict_values=False, deep=hash_type)
  224. formatting = type(left) in (str, bytes) and op_type == ast.Mod
  225. right = of_standard_types(
  226. self[node.right],
  227. check_dict_values=formatting,
  228. deep=formatting or hash_type,
  229. )
  230. try:
  231. return op(left, right)
  232. except Exception as e:
  233. raise CannotEval from e
  234. def _handle_unary(self, node: ast.UnaryOp):
  235. value = of_standard_types(
  236. self[node.operand], check_dict_values=False, deep=False
  237. )
  238. op_type = type(node.op)
  239. op = {
  240. ast.USub: operator.neg,
  241. ast.UAdd: operator.pos,
  242. ast.Not: operator.not_,
  243. ast.Invert: operator.invert,
  244. }[op_type]
  245. try:
  246. return op(value)
  247. except Exception as e:
  248. raise CannotEval from e
  249. def _handle_subscript(self, node):
  250. value = self[node.value]
  251. of_standard_types(
  252. value, check_dict_values=False, deep=is_any(type(value), dict, OrderedDict)
  253. )
  254. index = node.slice
  255. if isinstance(index, ast.Slice):
  256. index = slice(
  257. *[
  258. None if p is None else self[p]
  259. for p in [index.lower, index.upper, index.step]
  260. ]
  261. )
  262. elif isinstance(index, ast.ExtSlice):
  263. raise CannotEval
  264. else:
  265. if isinstance(index, ast.Index):
  266. index = index.value
  267. index = self[index]
  268. of_standard_types(index, check_dict_values=False, deep=True)
  269. try:
  270. return value[index]
  271. except Exception:
  272. raise CannotEval
  273. def _handle_container(
  274. self,
  275. node: Union[ast.List, ast.Tuple, ast.Set, ast.Dict]
  276. ) -> Union[List, Tuple, Set, Dict]:
  277. """Handle container nodes, including List, Set, Tuple and Dict"""
  278. if isinstance(node, ast.Dict):
  279. elts = node.keys
  280. if None in elts: # ** unpacking inside {}, not yet supported
  281. raise CannotEval
  282. else:
  283. elts = node.elts
  284. elts = [self[elt] for elt in elts]
  285. if isinstance(node, ast.List):
  286. return elts
  287. if isinstance(node, ast.Tuple):
  288. return tuple(elts)
  289. # Set and Dict
  290. if not all(
  291. is_standard_types(elt, check_dict_values=False, deep=True) for elt in elts
  292. ):
  293. raise CannotEval
  294. if isinstance(node, ast.Set):
  295. try:
  296. return set(elts)
  297. except TypeError:
  298. raise CannotEval
  299. assert isinstance(node, ast.Dict)
  300. pairs = [(elt, self[val]) for elt, val in zip(elts, node.values)]
  301. try:
  302. return dict(pairs)
  303. except TypeError:
  304. raise CannotEval
  305. def find_expressions(self, root: ast.AST) -> Iterable[Tuple[ast.expr, Any]]:
  306. """
  307. Find all expressions in the given tree that can be safely evaluated.
  308. This is a low level API, typically you will use `interesting_expressions_grouped`.
  309. :param root: any AST node
  310. :return: generator of pairs (tuples) of expression nodes and their corresponding values.
  311. """
  312. for node in ast.walk(root):
  313. if not isinstance(node, ast.expr):
  314. continue
  315. try:
  316. value = self[node]
  317. except CannotEval:
  318. continue
  319. yield node, value
  320. def interesting_expressions_grouped(self, root: ast.AST) -> List[Tuple[List[ast.expr], Any]]:
  321. """
  322. Find all interesting expressions in the given tree that can be safely evaluated,
  323. grouping equivalent nodes together.
  324. For more control and details, see:
  325. - Evaluator.find_expressions
  326. - is_expression_interesting
  327. - group_expressions
  328. :param root: any AST node
  329. :return: A list of pairs (tuples) containing:
  330. - A list of equivalent AST expressions
  331. - The value of the first expression node
  332. (which should be the same for all nodes, unless threads are involved)
  333. """
  334. return group_expressions(
  335. pair
  336. for pair in self.find_expressions(root)
  337. if is_expression_interesting(*pair)
  338. )
  339. def is_expression_interesting(node: ast.expr, value: Any) -> bool:
  340. """
  341. Determines if an expression is potentially interesting, at least in my opinion.
  342. Returns False for the following expressions whose value is generally obvious:
  343. - Literals (e.g. 123, 'abc', [1, 2, 3], {'a': (), 'b': ([1, 2], [3])})
  344. - Variables or attributes whose name is equal to the value's __name__.
  345. For example, a function `def foo(): ...` is not interesting when referred to
  346. as `foo` as it usually would, but `bar` can be interesting if `bar is foo`.
  347. Similarly the method `self.foo` is not interesting.
  348. - Builtins (e.g. `len`) referred to by their usual name.
  349. This is a low level API, typically you will use `interesting_expressions_grouped`.
  350. :param node: an AST expression
  351. :param value: the value of the node
  352. :return: a boolean: True if the expression is interesting, False otherwise
  353. """
  354. with suppress(ValueError):
  355. ast.literal_eval(node)
  356. return False
  357. # TODO exclude inner modules, e.g. numpy.random.__name__ == 'numpy.random' != 'random'
  358. # TODO exclude common module abbreviations, e.g. numpy as np, pandas as pd
  359. if has_ast_name(value, node):
  360. return False
  361. if (
  362. isinstance(node, ast.Name)
  363. and getattr(builtins, node.id, object()) is value
  364. ):
  365. return False
  366. return True
  367. def group_expressions(expressions: Iterable[Tuple[ast.expr, Any]]) -> List[Tuple[List[ast.expr], Any]]:
  368. """
  369. Organise expression nodes and their values such that equivalent nodes are together.
  370. Two nodes are considered equivalent if they have the same structure,
  371. ignoring context (Load, Store, or Delete) and location (lineno, col_offset).
  372. For example, this will group together the same variable name mentioned multiple times in an expression.
  373. This will not check the values of the nodes. Equivalent nodes should have the same values,
  374. unless threads are involved.
  375. This is a low level API, typically you will use `interesting_expressions_grouped`.
  376. :param expressions: pairs of AST expressions and their values, as obtained from
  377. `Evaluator.find_expressions`, or `(node, evaluator[node])`.
  378. :return: A list of pairs (tuples) containing:
  379. - A list of equivalent AST expressions
  380. - The value of the first expression node
  381. (which should be the same for all nodes, unless threads are involved)
  382. """
  383. result = {}
  384. for node, value in expressions:
  385. dump = ast.dump(copy_ast_without_context(node))
  386. result.setdefault(dump, ([], value))[0].append(node)
  387. return list(result.values())