fixed subscription table

This commit is contained in:
2025-02-02 00:02:31 -05:00
parent a1ab31acfe
commit ef5f57e678
5389 changed files with 686710 additions and 28 deletions

View File

@@ -0,0 +1,17 @@
from .core import Evaluator, CannotEval, group_expressions, is_expression_interesting
from .my_getattr_static import getattr_static
try:
from .version import __version__
except ImportError:
# version.py is auto-generated with the git tag when building
__version__ = "???"
__all__ = [
"Evaluator",
"CannotEval",
"group_expressions",
"is_expression_interesting",
"getattr_static",
"__version__",
]

View File

@@ -0,0 +1,449 @@
import ast
import builtins
import operator
from collections import ChainMap, OrderedDict, deque
from contextlib import suppress
from types import FrameType
from typing import Any, Tuple, Iterable, List, Mapping, Dict, Union, Set
from pure_eval.my_getattr_static import getattr_static
from pure_eval.utils import (
CannotEval,
has_ast_name,
copy_ast_without_context,
is_standard_types,
of_standard_types,
is_any,
of_type,
ensure_dict,
)
class Evaluator:
def __init__(self, names: Mapping[str, Any]):
"""
Construct a new evaluator with the given variable names.
This is a low level API, typically you will use `Evaluator.from_frame(frame)`.
:param names: a mapping from variable names to their values.
"""
self.names = names
self._cache = {} # type: Dict[ast.expr, Any]
@classmethod
def from_frame(cls, frame: FrameType) -> 'Evaluator':
"""
Construct an Evaluator that can look up variables from the given frame.
:param frame: a frame object, e.g. from a traceback or `inspect.currentframe().f_back`.
"""
return cls(ChainMap(
ensure_dict(frame.f_locals),
ensure_dict(frame.f_globals),
ensure_dict(frame.f_builtins),
))
def __getitem__(self, node: ast.expr) -> Any:
"""
Find the value of the given node.
If it cannot be evaluated safely, this raises `CannotEval`.
The result is cached either way.
:param node: an AST expression to evaluate
:return: the value of the node
"""
if not isinstance(node, ast.expr):
raise TypeError("node should be an ast.expr, not {!r}".format(type(node).__name__))
with suppress(KeyError):
result = self._cache[node]
if result is CannotEval:
raise CannotEval
else:
return result
try:
self._cache[node] = result = self._handle(node)
return result
except CannotEval:
self._cache[node] = CannotEval
raise
def _handle(self, node: ast.expr) -> Any:
"""
This is where the evaluation happens.
Users should use `__getitem__`, i.e. `evaluator[node]`,
as it provides caching.
:param node: an AST expression to evaluate
:return: the value of the node
"""
with suppress(Exception):
return ast.literal_eval(node)
if isinstance(node, ast.Name):
try:
return self.names[node.id]
except KeyError:
raise CannotEval
elif isinstance(node, ast.Attribute):
value = self[node.value]
attr = node.attr
return getattr_static(value, attr)
elif isinstance(node, ast.Subscript):
return self._handle_subscript(node)
elif isinstance(node, (ast.List, ast.Tuple, ast.Set, ast.Dict)):
return self._handle_container(node)
elif isinstance(node, ast.UnaryOp):
return self._handle_unary(node)
elif isinstance(node, ast.BinOp):
return self._handle_binop(node)
elif isinstance(node, ast.BoolOp):
return self._handle_boolop(node)
elif isinstance(node, ast.Compare):
return self._handle_compare(node)
elif isinstance(node, ast.Call):
return self._handle_call(node)
raise CannotEval
def _handle_call(self, node):
if node.keywords:
raise CannotEval
func = self[node.func]
args = [self[arg] for arg in node.args]
if (
is_any(
func,
slice,
int,
range,
round,
complex,
list,
tuple,
abs,
hex,
bin,
oct,
bool,
ord,
float,
len,
chr,
)
or len(args) == 0
and is_any(func, set, dict, str, frozenset, bytes, bytearray, object)
or len(args) >= 2
and is_any(func, str, divmod, bytes, bytearray, pow)
):
args = [
of_standard_types(arg, check_dict_values=False, deep=False)
for arg in args
]
try:
return func(*args)
except Exception as e:
raise CannotEval from e
if len(args) == 1:
arg = args[0]
if is_any(func, id, type):
try:
return func(arg)
except Exception as e:
raise CannotEval from e
if is_any(func, all, any, sum):
of_type(arg, tuple, frozenset, list, set, dict, OrderedDict, deque)
for x in arg:
of_standard_types(x, check_dict_values=False, deep=False)
try:
return func(arg)
except Exception as e:
raise CannotEval from e
if is_any(
func, sorted, min, max, hash, set, dict, ascii, str, repr, frozenset
):
of_standard_types(arg, check_dict_values=True, deep=True)
try:
return func(arg)
except Exception as e:
raise CannotEval from e
raise CannotEval
def _handle_compare(self, node):
left = self[node.left]
result = True
for op, right in zip(node.ops, node.comparators):
right = self[right]
op_type = type(op)
op_func = {
ast.Eq: operator.eq,
ast.NotEq: operator.ne,
ast.Lt: operator.lt,
ast.LtE: operator.le,
ast.Gt: operator.gt,
ast.GtE: operator.ge,
ast.Is: operator.is_,
ast.IsNot: operator.is_not,
ast.In: (lambda a, b: a in b),
ast.NotIn: (lambda a, b: a not in b),
}[op_type]
if op_type not in (ast.Is, ast.IsNot):
of_standard_types(left, check_dict_values=False, deep=True)
of_standard_types(right, check_dict_values=False, deep=True)
try:
result = op_func(left, right)
except Exception as e:
raise CannotEval from e
if not result:
return result
left = right
return result
def _handle_boolop(self, node):
left = of_standard_types(
self[node.values[0]], check_dict_values=False, deep=False
)
for right in node.values[1:]:
# We need short circuiting so that the whole operation can be evaluated
# even if the right operand can't
if isinstance(node.op, ast.Or):
left = left or of_standard_types(
self[right], check_dict_values=False, deep=False
)
else:
assert isinstance(node.op, ast.And)
left = left and of_standard_types(
self[right], check_dict_values=False, deep=False
)
return left
def _handle_binop(self, node):
op_type = type(node.op)
op = {
ast.Add: operator.add,
ast.Sub: operator.sub,
ast.Mult: operator.mul,
ast.Div: operator.truediv,
ast.FloorDiv: operator.floordiv,
ast.Mod: operator.mod,
ast.Pow: operator.pow,
ast.LShift: operator.lshift,
ast.RShift: operator.rshift,
ast.BitOr: operator.or_,
ast.BitXor: operator.xor,
ast.BitAnd: operator.and_,
}.get(op_type)
if not op:
raise CannotEval
left = self[node.left]
hash_type = is_any(type(left), set, frozenset, dict, OrderedDict)
left = of_standard_types(left, check_dict_values=False, deep=hash_type)
formatting = type(left) in (str, bytes) and op_type == ast.Mod
right = of_standard_types(
self[node.right],
check_dict_values=formatting,
deep=formatting or hash_type,
)
try:
return op(left, right)
except Exception as e:
raise CannotEval from e
def _handle_unary(self, node: ast.UnaryOp):
value = of_standard_types(
self[node.operand], check_dict_values=False, deep=False
)
op_type = type(node.op)
op = {
ast.USub: operator.neg,
ast.UAdd: operator.pos,
ast.Not: operator.not_,
ast.Invert: operator.invert,
}[op_type]
try:
return op(value)
except Exception as e:
raise CannotEval from e
def _handle_subscript(self, node):
value = self[node.value]
of_standard_types(
value, check_dict_values=False, deep=is_any(type(value), dict, OrderedDict)
)
index = node.slice
if isinstance(index, ast.Slice):
index = slice(
*[
None if p is None else self[p]
for p in [index.lower, index.upper, index.step]
]
)
elif isinstance(index, ast.ExtSlice):
raise CannotEval
else:
if isinstance(index, ast.Index):
index = index.value
index = self[index]
of_standard_types(index, check_dict_values=False, deep=True)
try:
return value[index]
except Exception:
raise CannotEval
def _handle_container(
self,
node: Union[ast.List, ast.Tuple, ast.Set, ast.Dict]
) -> Union[List, Tuple, Set, Dict]:
"""Handle container nodes, including List, Set, Tuple and Dict"""
if isinstance(node, ast.Dict):
elts = node.keys
if None in elts: # ** unpacking inside {}, not yet supported
raise CannotEval
else:
elts = node.elts
elts = [self[elt] for elt in elts]
if isinstance(node, ast.List):
return elts
if isinstance(node, ast.Tuple):
return tuple(elts)
# Set and Dict
if not all(
is_standard_types(elt, check_dict_values=False, deep=True) for elt in elts
):
raise CannotEval
if isinstance(node, ast.Set):
try:
return set(elts)
except TypeError:
raise CannotEval
assert isinstance(node, ast.Dict)
pairs = [(elt, self[val]) for elt, val in zip(elts, node.values)]
try:
return dict(pairs)
except TypeError:
raise CannotEval
def find_expressions(self, root: ast.AST) -> Iterable[Tuple[ast.expr, Any]]:
"""
Find all expressions in the given tree that can be safely evaluated.
This is a low level API, typically you will use `interesting_expressions_grouped`.
:param root: any AST node
:return: generator of pairs (tuples) of expression nodes and their corresponding values.
"""
for node in ast.walk(root):
if not isinstance(node, ast.expr):
continue
try:
value = self[node]
except CannotEval:
continue
yield node, value
def interesting_expressions_grouped(self, root: ast.AST) -> List[Tuple[List[ast.expr], Any]]:
"""
Find all interesting expressions in the given tree that can be safely evaluated,
grouping equivalent nodes together.
For more control and details, see:
- Evaluator.find_expressions
- is_expression_interesting
- group_expressions
:param root: any AST node
:return: A list of pairs (tuples) containing:
- A list of equivalent AST expressions
- The value of the first expression node
(which should be the same for all nodes, unless threads are involved)
"""
return group_expressions(
pair
for pair in self.find_expressions(root)
if is_expression_interesting(*pair)
)
def is_expression_interesting(node: ast.expr, value: Any) -> bool:
"""
Determines if an expression is potentially interesting, at least in my opinion.
Returns False for the following expressions whose value is generally obvious:
- Literals (e.g. 123, 'abc', [1, 2, 3], {'a': (), 'b': ([1, 2], [3])})
- Variables or attributes whose name is equal to the value's __name__.
For example, a function `def foo(): ...` is not interesting when referred to
as `foo` as it usually would, but `bar` can be interesting if `bar is foo`.
Similarly the method `self.foo` is not interesting.
- Builtins (e.g. `len`) referred to by their usual name.
This is a low level API, typically you will use `interesting_expressions_grouped`.
:param node: an AST expression
:param value: the value of the node
:return: a boolean: True if the expression is interesting, False otherwise
"""
with suppress(ValueError):
ast.literal_eval(node)
return False
# TODO exclude inner modules, e.g. numpy.random.__name__ == 'numpy.random' != 'random'
# TODO exclude common module abbreviations, e.g. numpy as np, pandas as pd
if has_ast_name(value, node):
return False
if (
isinstance(node, ast.Name)
and getattr(builtins, node.id, object()) is value
):
return False
return True
def group_expressions(expressions: Iterable[Tuple[ast.expr, Any]]) -> List[Tuple[List[ast.expr], Any]]:
"""
Organise expression nodes and their values such that equivalent nodes are together.
Two nodes are considered equivalent if they have the same structure,
ignoring context (Load, Store, or Delete) and location (lineno, col_offset).
For example, this will group together the same variable name mentioned multiple times in an expression.
This will not check the values of the nodes. Equivalent nodes should have the same values,
unless threads are involved.
This is a low level API, typically you will use `interesting_expressions_grouped`.
:param expressions: pairs of AST expressions and their values, as obtained from
`Evaluator.find_expressions`, or `(node, evaluator[node])`.
:return: A list of pairs (tuples) containing:
- A list of equivalent AST expressions
- The value of the first expression node
(which should be the same for all nodes, unless threads are involved)
"""
result = {}
for node, value in expressions:
dump = ast.dump(copy_ast_without_context(node))
result.setdefault(dump, ([], value))[0].append(node)
return list(result.values())

View File

@@ -0,0 +1,140 @@
import types
from pure_eval.utils import of_type, CannotEval
_sentinel = object()
def _static_getmro(klass):
return type.__dict__['__mro__'].__get__(klass)
def _check_instance(obj, attr):
instance_dict = {}
try:
instance_dict = object.__getattribute__(obj, "__dict__")
except AttributeError:
pass
return dict.get(instance_dict, attr, _sentinel)
def _check_class(klass, attr):
for entry in _static_getmro(klass):
if _shadowed_dict(type(entry)) is _sentinel:
try:
return entry.__dict__[attr]
except KeyError:
pass
else:
break
return _sentinel
def _is_type(obj):
try:
_static_getmro(obj)
except TypeError:
return False
return True
def _shadowed_dict(klass):
dict_attr = type.__dict__["__dict__"]
for entry in _static_getmro(klass):
try:
class_dict = dict_attr.__get__(entry)["__dict__"]
except KeyError:
pass
else:
if not (type(class_dict) is types.GetSetDescriptorType and
class_dict.__name__ == "__dict__" and
class_dict.__objclass__ is entry):
return class_dict
return _sentinel
def getattr_static(obj, attr):
"""Retrieve attributes without triggering dynamic lookup via the
descriptor protocol, __getattr__ or __getattribute__.
Note: this function may not be able to retrieve all attributes
that getattr can fetch (like dynamically created attributes)
and may find attributes that getattr can't (like descriptors
that raise AttributeError). It can also return descriptor objects
instead of instance members in some cases. See the
documentation for details.
"""
instance_result = _sentinel
if not _is_type(obj):
klass = type(obj)
dict_attr = _shadowed_dict(klass)
if (dict_attr is _sentinel or
type(dict_attr) is types.MemberDescriptorType):
instance_result = _check_instance(obj, attr)
else:
raise CannotEval
else:
klass = obj
klass_result = _check_class(klass, attr)
if instance_result is not _sentinel and klass_result is not _sentinel:
if _check_class(type(klass_result), "__get__") is not _sentinel and (
_check_class(type(klass_result), "__set__") is not _sentinel
or _check_class(type(klass_result), "__delete__") is not _sentinel
):
return _resolve_descriptor(klass_result, obj, klass)
if instance_result is not _sentinel:
return instance_result
if klass_result is not _sentinel:
get = _check_class(type(klass_result), '__get__')
if get is _sentinel:
return klass_result
else:
if obj is klass:
instance = None
else:
instance = obj
return _resolve_descriptor(klass_result, instance, klass)
if obj is klass:
# for types we check the metaclass too
for entry in _static_getmro(type(klass)):
if _shadowed_dict(type(entry)) is _sentinel:
try:
result = entry.__dict__[attr]
get = _check_class(type(result), '__get__')
if get is not _sentinel:
raise CannotEval
return result
except KeyError:
pass
raise CannotEval
class _foo:
__slots__ = ['foo']
method = lambda: 0
slot_descriptor = _foo.foo
wrapper_descriptor = str.__dict__['__add__']
method_descriptor = str.__dict__['startswith']
user_method_descriptor = _foo.__dict__['method']
safe_descriptors_raw = [
slot_descriptor,
wrapper_descriptor,
method_descriptor,
user_method_descriptor,
]
safe_descriptor_types = list(map(type, safe_descriptors_raw))
def _resolve_descriptor(d, instance, owner):
try:
return type(of_type(d, *safe_descriptor_types)).__get__(d, instance, owner)
except AttributeError as e:
raise CannotEval from e

View File

@@ -0,0 +1 @@
# Marker file for PEP 561. The pure_eval package uses inline types.

View File

@@ -0,0 +1,206 @@
from collections import OrderedDict, deque
from datetime import date, time, datetime
from decimal import Decimal
from fractions import Fraction
import ast
import enum
import typing
class CannotEval(Exception):
def __repr__(self):
return self.__class__.__name__
__str__ = __repr__
def is_any(x, *args):
return any(
x is arg
for arg in args
)
def of_type(x, *types):
if is_any(type(x), *types):
return x
else:
raise CannotEval
def of_standard_types(x, *, check_dict_values: bool, deep: bool):
if is_standard_types(x, check_dict_values=check_dict_values, deep=deep):
return x
else:
raise CannotEval
def is_standard_types(x, *, check_dict_values: bool, deep: bool):
try:
return _is_standard_types_deep(x, check_dict_values, deep)[0]
except RecursionError:
return False
def _is_standard_types_deep(x, check_dict_values: bool, deep: bool):
typ = type(x)
if is_any(
typ,
str,
int,
bool,
float,
bytes,
complex,
date,
time,
datetime,
Fraction,
Decimal,
type(None),
object,
):
return True, 0
if is_any(typ, tuple, frozenset, list, set, dict, OrderedDict, deque, slice):
if typ in [slice]:
length = 0
else:
length = len(x)
assert isinstance(deep, bool)
if not deep:
return True, length
if check_dict_values and typ in (dict, OrderedDict):
items = (v for pair in x.items() for v in pair)
elif typ is slice:
items = [x.start, x.stop, x.step]
else:
items = x
for item in items:
if length > 100000:
return False, length
is_standard, item_length = _is_standard_types_deep(
item, check_dict_values, deep
)
if not is_standard:
return False, length
length += item_length
return True, length
return False, 0
class _E(enum.Enum):
pass
class _C:
def foo(self): pass # pragma: nocover
def bar(self): pass # pragma: nocover
@classmethod
def cm(cls): pass # pragma: nocover
@staticmethod
def sm(): pass # pragma: nocover
safe_name_samples = {
"len": len,
"append": list.append,
"__add__": list.__add__,
"insert": [].insert,
"__mul__": [].__mul__,
"fromkeys": dict.__dict__['fromkeys'],
"is_any": is_any,
"__repr__": CannotEval.__repr__,
"foo": _C().foo,
"bar": _C.bar,
"cm": _C.cm,
"sm": _C.sm,
"ast": ast,
"CannotEval": CannotEval,
"_E": _E,
}
typing_annotation_samples = {
name: getattr(typing, name)
for name in "List Dict Tuple Set Callable Mapping".split()
}
safe_name_types = tuple({
type(f)
for f in safe_name_samples.values()
})
typing_annotation_types = tuple({
type(f)
for f in typing_annotation_samples.values()
})
def eq_checking_types(a, b):
return type(a) is type(b) and a == b
def ast_name(node):
if isinstance(node, ast.Name):
return node.id
elif isinstance(node, ast.Attribute):
return node.attr
else:
return None
def safe_name(value):
typ = type(value)
if is_any(typ, *safe_name_types):
return value.__name__
elif value is typing.Optional:
return "Optional"
elif value is typing.Union:
return "Union"
elif is_any(typ, *typing_annotation_types):
return getattr(value, "__name__", None) or getattr(value, "_name", None)
else:
return None
def has_ast_name(value, node):
value_name = safe_name(value)
if type(value_name) is not str:
return False
return eq_checking_types(ast_name(node), value_name)
def copy_ast_without_context(x):
if isinstance(x, ast.AST):
kwargs = {
field: copy_ast_without_context(getattr(x, field))
for field in x._fields
if field != 'ctx'
if hasattr(x, field)
}
a = type(x)(**kwargs)
if hasattr(a, 'ctx'):
# Python 3.13.0b2+ defaults to Load when we don't pass ctx
# https://github.com/python/cpython/pull/118871
del a.ctx
return a
elif isinstance(x, list):
return list(map(copy_ast_without_context, x))
else:
return x
def ensure_dict(x):
"""
Handles invalid non-dict inputs
"""
try:
return dict(x)
except Exception:
return {}

View File

@@ -0,0 +1 @@
__version__ = '0.2.3'