fixed subscription table

This commit is contained in:
2025-02-02 00:02:31 -05:00
parent a1ab31acfe
commit ef5f57e678
5389 changed files with 686710 additions and 28 deletions

View File

@@ -0,0 +1,24 @@
# Copyright 2016 Grist Labs, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
This module enhances the Python AST tree with token and source code information, sufficent to
detect the source text of each AST node. This is helpful for tools that make source code
transformations.
"""
from .line_numbers import LineNumbers
from .asttokens import ASTText, ASTTokens, supports_tokenless
__all__ = ['ASTText', 'ASTTokens', 'LineNumbers', 'supports_tokenless']

View File

@@ -0,0 +1,18 @@
try:
from astroid import nodes as astroid_node_classes
# astroid_node_classes should be whichever module has the NodeNG class
from astroid.nodes import NodeNG
from astroid.nodes import BaseContainer
except Exception:
try:
from astroid import node_classes as astroid_node_classes
from astroid.node_classes import NodeNG
from astroid.node_classes import _BaseContainer as BaseContainer
except Exception: # pragma: no cover
astroid_node_classes = None
NodeNG = None
BaseContainer = None
__all__ = ["astroid_node_classes", "NodeNG", "BaseContainer"]

View File

@@ -0,0 +1,450 @@
# Copyright 2016 Grist Labs, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import abc
import ast
import bisect
import sys
import token
from ast import Module
from typing import Iterable, Iterator, List, Optional, Tuple, Any, cast, TYPE_CHECKING
from .line_numbers import LineNumbers
from .util import (
Token, match_token, is_non_coding_token, patched_generate_tokens, last_stmt,
annotate_fstring_nodes, generate_tokens, is_module, is_stmt
)
if TYPE_CHECKING: # pragma: no cover
from .util import AstNode, TokenInfo
class ASTTextBase(metaclass=abc.ABCMeta):
def __init__(self, source_text: str, filename: str) -> None:
self._filename = filename
# Decode source after parsing to let Python 2 handle coding declarations.
# (If the encoding was not utf-8 compatible, then even if it parses correctly,
# we'll fail with a unicode error here.)
source_text = str(source_text)
self._text = source_text
self._line_numbers = LineNumbers(source_text)
@abc.abstractmethod
def get_text_positions(self, node, padded):
# type: (AstNode, bool) -> Tuple[Tuple[int, int], Tuple[int, int]]
"""
Returns two ``(lineno, col_offset)`` tuples for the start and end of the given node.
If the positions can't be determined, or the nodes don't correspond to any particular text,
returns ``(1, 0)`` for both.
``padded`` corresponds to the ``padded`` argument to ``ast.get_source_segment()``.
This means that if ``padded`` is True, the start position will be adjusted to include
leading whitespace if ``node`` is a multiline statement.
"""
raise NotImplementedError # pragma: no cover
def get_text_range(self, node, padded=True):
# type: (AstNode, bool) -> Tuple[int, int]
"""
Returns the (startpos, endpos) positions in source text corresponding to the given node.
Returns (0, 0) for nodes (like `Load`) that don't correspond to any particular text.
See ``get_text_positions()`` for details on the ``padded`` argument.
"""
start, end = self.get_text_positions(node, padded)
return (
self._line_numbers.line_to_offset(*start),
self._line_numbers.line_to_offset(*end),
)
def get_text(self, node, padded=True):
# type: (AstNode, bool) -> str
"""
Returns the text corresponding to the given node.
Returns '' for nodes (like `Load`) that don't correspond to any particular text.
See ``get_text_positions()`` for details on the ``padded`` argument.
"""
start, end = self.get_text_range(node, padded)
return self._text[start: end]
class ASTTokens(ASTTextBase):
"""
ASTTokens maintains the text of Python code in several forms: as a string, as line numbers, and
as tokens, and is used to mark and access token and position information.
``source_text`` must be a unicode or UTF8-encoded string. If you pass in UTF8 bytes, remember
that all offsets you'll get are to the unicode text, which is available as the ``.text``
property.
If ``parse`` is set, the ``source_text`` will be parsed with ``ast.parse()``, and the resulting
tree marked with token info and made available as the ``.tree`` property.
If ``tree`` is given, it will be marked and made available as the ``.tree`` property. In
addition to the trees produced by the ``ast`` module, ASTTokens will also mark trees produced
using ``astroid`` library <https://www.astroid.org>.
If only ``source_text`` is given, you may use ``.mark_tokens(tree)`` to mark the nodes of an AST
tree created separately.
"""
def __init__(self, source_text, parse=False, tree=None, filename='<unknown>', tokens=None):
# type: (Any, bool, Optional[Module], str, Iterable[TokenInfo]) -> None
super(ASTTokens, self).__init__(source_text, filename)
self._tree = ast.parse(source_text, filename) if parse else tree
# Tokenize the code.
if tokens is None:
tokens = generate_tokens(self._text)
self._tokens = list(self._translate_tokens(tokens))
# Extract the start positions of all tokens, so that we can quickly map positions to tokens.
self._token_offsets = [tok.startpos for tok in self._tokens]
if self._tree:
self.mark_tokens(self._tree)
def mark_tokens(self, root_node):
# type: (Module) -> None
"""
Given the root of the AST or Astroid tree produced from source_text, visits all nodes marking
them with token and position information by adding ``.first_token`` and
``.last_token`` attributes. This is done automatically in the constructor when ``parse`` or
``tree`` arguments are set, but may be used manually with a separate AST or Astroid tree.
"""
# The hard work of this class is done by MarkTokens
from .mark_tokens import MarkTokens # to avoid import loops
MarkTokens(self).visit_tree(root_node)
def _translate_tokens(self, original_tokens):
# type: (Iterable[TokenInfo]) -> Iterator[Token]
"""
Translates the given standard library tokens into our own representation.
"""
for index, tok in enumerate(patched_generate_tokens(original_tokens)):
tok_type, tok_str, start, end, line = tok
yield Token(tok_type, tok_str, start, end, line, index,
self._line_numbers.line_to_offset(start[0], start[1]),
self._line_numbers.line_to_offset(end[0], end[1]))
@property
def text(self):
# type: () -> str
"""The source code passed into the constructor."""
return self._text
@property
def tokens(self):
# type: () -> List[Token]
"""The list of tokens corresponding to the source code from the constructor."""
return self._tokens
@property
def tree(self):
# type: () -> Optional[Module]
"""The root of the AST tree passed into the constructor or parsed from the source code."""
return self._tree
@property
def filename(self):
# type: () -> str
"""The filename that was parsed"""
return self._filename
def get_token_from_offset(self, offset):
# type: (int) -> Token
"""
Returns the token containing the given character offset (0-based position in source text),
or the preceeding token if the position is between tokens.
"""
return self._tokens[bisect.bisect(self._token_offsets, offset) - 1]
def get_token(self, lineno, col_offset):
# type: (int, int) -> Token
"""
Returns the token containing the given (lineno, col_offset) position, or the preceeding token
if the position is between tokens.
"""
# TODO: add test for multibyte unicode. We need to translate offsets from ast module (which
# are in utf8) to offsets into the unicode text. tokenize module seems to use unicode offsets
# but isn't explicit.
return self.get_token_from_offset(self._line_numbers.line_to_offset(lineno, col_offset))
def get_token_from_utf8(self, lineno, col_offset):
# type: (int, int) -> Token
"""
Same as get_token(), but interprets col_offset as a UTF8 offset, which is what `ast` uses.
"""
return self.get_token(lineno, self._line_numbers.from_utf8_col(lineno, col_offset))
def next_token(self, tok, include_extra=False):
# type: (Token, bool) -> Token
"""
Returns the next token after the given one. If include_extra is True, includes non-coding
tokens from the tokenize module, such as NL and COMMENT.
"""
i = tok.index + 1
if not include_extra:
while is_non_coding_token(self._tokens[i].type):
i += 1
return self._tokens[i]
def prev_token(self, tok, include_extra=False):
# type: (Token, bool) -> Token
"""
Returns the previous token before the given one. If include_extra is True, includes non-coding
tokens from the tokenize module, such as NL and COMMENT.
"""
i = tok.index - 1
if not include_extra:
while is_non_coding_token(self._tokens[i].type):
i -= 1
return self._tokens[i]
def find_token(self, start_token, tok_type, tok_str=None, reverse=False):
# type: (Token, int, Optional[str], bool) -> Token
"""
Looks for the first token, starting at start_token, that matches tok_type and, if given, the
token string. Searches backwards if reverse is True. Returns ENDMARKER token if not found (you
can check it with `token.ISEOF(t.type)`).
"""
t = start_token
advance = self.prev_token if reverse else self.next_token
while not match_token(t, tok_type, tok_str) and not token.ISEOF(t.type):
t = advance(t, include_extra=True)
return t
def token_range(self,
first_token, # type: Token
last_token, # type: Token
include_extra=False, # type: bool
):
# type: (...) -> Iterator[Token]
"""
Yields all tokens in order from first_token through and including last_token. If
include_extra is True, includes non-coding tokens such as tokenize.NL and .COMMENT.
"""
for i in range(first_token.index, last_token.index + 1):
if include_extra or not is_non_coding_token(self._tokens[i].type):
yield self._tokens[i]
def get_tokens(self, node, include_extra=False):
# type: (AstNode, bool) -> Iterator[Token]
"""
Yields all tokens making up the given node. If include_extra is True, includes non-coding
tokens such as tokenize.NL and .COMMENT.
"""
return self.token_range(node.first_token, node.last_token, include_extra=include_extra)
def get_text_positions(self, node, padded):
# type: (AstNode, bool) -> Tuple[Tuple[int, int], Tuple[int, int]]
"""
Returns two ``(lineno, col_offset)`` tuples for the start and end of the given node.
If the positions can't be determined, or the nodes don't correspond to any particular text,
returns ``(1, 0)`` for both.
``padded`` corresponds to the ``padded`` argument to ``ast.get_source_segment()``.
This means that if ``padded`` is True, the start position will be adjusted to include
leading whitespace if ``node`` is a multiline statement.
"""
if not hasattr(node, 'first_token'):
return (1, 0), (1, 0)
start = node.first_token.start
end = node.last_token.end
if padded and any(match_token(t, token.NEWLINE) for t in self.get_tokens(node)):
# Set col_offset to 0 to include leading indentation for multiline statements.
start = (start[0], 0)
return start, end
class ASTText(ASTTextBase):
"""
Supports the same ``get_text*`` methods as ``ASTTokens``,
but uses the AST to determine the text positions instead of tokens.
This is faster than ``ASTTokens`` as it requires less setup work.
It also (sometimes) supports nodes inside f-strings, which ``ASTTokens`` doesn't.
Some node types and/or Python versions are not supported.
In these cases the ``get_text*`` methods will fall back to using ``ASTTokens``
which incurs the usual setup cost the first time.
If you want to avoid this, check ``supports_tokenless(node)`` before calling ``get_text*`` methods.
"""
def __init__(self, source_text, tree=None, filename='<unknown>'):
# type: (Any, Optional[Module], str) -> None
super(ASTText, self).__init__(source_text, filename)
self._tree = tree
if self._tree is not None:
annotate_fstring_nodes(self._tree)
self._asttokens = None # type: Optional[ASTTokens]
@property
def tree(self):
# type: () -> Module
if self._tree is None:
self._tree = ast.parse(self._text, self._filename)
annotate_fstring_nodes(self._tree)
return self._tree
@property
def asttokens(self):
# type: () -> ASTTokens
if self._asttokens is None:
self._asttokens = ASTTokens(
self._text,
tree=self.tree,
filename=self._filename,
)
return self._asttokens
def _get_text_positions_tokenless(self, node, padded):
# type: (AstNode, bool) -> Tuple[Tuple[int, int], Tuple[int, int]]
"""
Version of ``get_text_positions()`` that doesn't use tokens.
"""
if is_module(node):
# Modules don't have position info, so just return the range of the whole text.
# The token-using method does something different, but its behavior seems weird and inconsistent.
# For example, in a file with only comments, it only returns the first line.
# It's hard to imagine a case when this matters.
return (1, 0), self._line_numbers.offset_to_line(len(self._text))
if getattr(node, 'lineno', None) is None:
return (1, 0), (1, 0)
assert node # tell mypy that node is not None, which we allowed up to here for compatibility
decorators = getattr(node, 'decorator_list', [])
if not decorators:
# Astroid uses node.decorators.nodes instead of node.decorator_list.
decorators_node = getattr(node, 'decorators', None)
decorators = getattr(decorators_node, 'nodes', [])
if decorators:
# Function/Class definition nodes are marked by AST as starting at def/class,
# not the first decorator. This doesn't match the token-using behavior,
# or inspect.getsource(), and just seems weird.
start_node = decorators[0]
else:
start_node = node
start_lineno = start_node.lineno
end_node = last_stmt(node)
# Include leading indentation for multiline statements.
# This doesn't mean simple statements that happen to be on multiple lines,
# but compound statements where inner indentation matters.
# So we don't just compare node.lineno and node.end_lineno,
# we check for a contained statement starting on a different line.
if padded and (
start_lineno != end_node.lineno
or (
# Astroid docstrings aren't treated as separate statements.
# So to handle function/class definitions with a docstring but no other body,
# we just check that the node is a statement with a docstring
# and spanning multiple lines in the simple, literal sense.
start_lineno != node.end_lineno
and getattr(node, "doc_node", None)
and is_stmt(node)
)
):
start_col_offset = 0
else:
start_col_offset = self._line_numbers.from_utf8_col(start_lineno, start_node.col_offset)
start = (start_lineno, start_col_offset)
# To match the token-using behaviour, we exclude trailing semicolons and comments.
# This means that for blocks containing multiple statements, we have to use the last one
# instead of the actual node for end_lineno and end_col_offset.
end_lineno = cast(int, end_node.end_lineno)
end_col_offset = cast(int, end_node.end_col_offset)
end_col_offset = self._line_numbers.from_utf8_col(end_lineno, end_col_offset)
end = (end_lineno, end_col_offset)
return start, end
def get_text_positions(self, node, padded):
# type: (AstNode, bool) -> Tuple[Tuple[int, int], Tuple[int, int]]
"""
Returns two ``(lineno, col_offset)`` tuples for the start and end of the given node.
If the positions can't be determined, or the nodes don't correspond to any particular text,
returns ``(1, 0)`` for both.
``padded`` corresponds to the ``padded`` argument to ``ast.get_source_segment()``.
This means that if ``padded`` is True, the start position will be adjusted to include
leading whitespace if ``node`` is a multiline statement.
"""
if getattr(node, "_broken_positions", None):
# This node was marked in util.annotate_fstring_nodes as having untrustworthy lineno/col_offset.
return (1, 0), (1, 0)
if supports_tokenless(node):
return self._get_text_positions_tokenless(node, padded)
return self.asttokens.get_text_positions(node, padded)
# Node types that _get_text_positions_tokenless doesn't support.
# These initial values are missing lineno.
_unsupported_tokenless_types = ("arguments", "Arguments", "withitem") # type: Tuple[str, ...]
if sys.version_info[:2] == (3, 8):
# _get_text_positions_tokenless works incorrectly for these types due to bugs in Python 3.8.
_unsupported_tokenless_types += ("arg", "Starred")
# no lineno in 3.8
_unsupported_tokenless_types += ("Slice", "ExtSlice", "Index", "keyword")
def supports_tokenless(node=None):
# type: (Any) -> bool
"""
Returns True if the Python version and the node (if given) are supported by
the ``get_text*`` methods of ``ASTText`` without falling back to ``ASTTokens``.
See ``ASTText`` for why this matters.
The following cases are not supported:
- PyPy
- ``ast.arguments`` / ``astroid.Arguments``
- ``ast.withitem``
- ``astroid.Comprehension``
- ``astroid.AssignName`` inside ``astroid.Arguments`` or ``astroid.ExceptHandler``
- The following nodes in Python 3.8 only:
- ``ast.arg``
- ``ast.Starred``
- ``ast.Slice``
- ``ast.ExtSlice``
- ``ast.Index``
- ``ast.keyword``
"""
return (
type(node).__name__ not in _unsupported_tokenless_types
and not (
# astroid nodes
not isinstance(node, ast.AST) and node is not None and (
(
type(node).__name__ == "AssignName"
and type(node.parent).__name__ in ("Arguments", "ExceptHandler")
)
)
)
and 'pypy' not in sys.version.lower()
)

View File

@@ -0,0 +1,76 @@
# Copyright 2016 Grist Labs, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import bisect
import re
from typing import Dict, List, Tuple
_line_start_re = re.compile(r'^', re.M)
class LineNumbers:
"""
Class to convert between character offsets in a text string, and pairs (line, column) of 1-based
line and 0-based column numbers, as used by tokens and AST nodes.
This class expects unicode for input and stores positions in unicode. But it supports
translating to and from utf8 offsets, which are used by ast parsing.
"""
def __init__(self, text):
# type: (str) -> None
# A list of character offsets of each line's first character.
self._line_offsets = [m.start(0) for m in _line_start_re.finditer(text)]
self._text = text
self._text_len = len(text)
self._utf8_offset_cache = {} # type: Dict[int, List[int]] # maps line num to list of char offset for each byte in line
def from_utf8_col(self, line, utf8_column):
# type: (int, int) -> int
"""
Given a 1-based line number and 0-based utf8 column, returns a 0-based unicode column.
"""
offsets = self._utf8_offset_cache.get(line)
if offsets is None:
end_offset = self._line_offsets[line] if line < len(self._line_offsets) else self._text_len
line_text = self._text[self._line_offsets[line - 1] : end_offset]
offsets = [i for i,c in enumerate(line_text) for byte in c.encode('utf8')]
offsets.append(len(line_text))
self._utf8_offset_cache[line] = offsets
return offsets[max(0, min(len(offsets)-1, utf8_column))]
def line_to_offset(self, line, column):
# type: (int, int) -> int
"""
Converts 1-based line number and 0-based column to 0-based character offset into text.
"""
line -= 1
if line >= len(self._line_offsets):
return self._text_len
elif line < 0:
return 0
else:
return min(self._line_offsets[line] + max(0, column), self._text_len)
def offset_to_line(self, offset):
# type: (int) -> Tuple[int, int]
"""
Converts 0-based character offset to pair (line, col) of 1-based line and 0-based column
numbers.
"""
offset = max(0, min(self._text_len, offset))
line_index = bisect.bisect_right(self._line_offsets, offset) - 1
return (line_index + 1, offset - self._line_offsets[line_index])

View File

@@ -0,0 +1,467 @@
# Copyright 2016 Grist Labs, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import ast
import numbers
import sys
import token
from ast import Module
from typing import Callable, List, Union, cast, Optional, Tuple, TYPE_CHECKING
from . import util
from .asttokens import ASTTokens
from .astroid_compat import astroid_node_classes as nc, BaseContainer as AstroidBaseContainer
if TYPE_CHECKING:
from .util import AstNode
# Mapping of matching braces. To find a token here, look up token[:2].
_matching_pairs_left = {
(token.OP, '('): (token.OP, ')'),
(token.OP, '['): (token.OP, ']'),
(token.OP, '{'): (token.OP, '}'),
}
_matching_pairs_right = {
(token.OP, ')'): (token.OP, '('),
(token.OP, ']'): (token.OP, '['),
(token.OP, '}'): (token.OP, '{'),
}
class MarkTokens:
"""
Helper that visits all nodes in the AST tree and assigns .first_token and .last_token attributes
to each of them. This is the heart of the token-marking logic.
"""
def __init__(self, code):
# type: (ASTTokens) -> None
self._code = code
self._methods = util.NodeMethods()
self._iter_children = None # type: Optional[Callable]
def visit_tree(self, node):
# type: (Module) -> None
self._iter_children = util.iter_children_func(node)
util.visit_tree(node, self._visit_before_children, self._visit_after_children)
def _visit_before_children(self, node, parent_token):
# type: (AstNode, Optional[util.Token]) -> Tuple[Optional[util.Token], Optional[util.Token]]
col = getattr(node, 'col_offset', None)
token = self._code.get_token_from_utf8(node.lineno, col) if col is not None else None
if not token and util.is_module(node):
# We'll assume that a Module node starts at the start of the source code.
token = self._code.get_token(1, 0)
# Use our own token, or our parent's if we don't have one, to pass to child calls as
# parent_token argument. The second value becomes the token argument of _visit_after_children.
return (token or parent_token, token)
def _visit_after_children(self, node, parent_token, token):
# type: (AstNode, Optional[util.Token], Optional[util.Token]) -> None
# This processes the node generically first, after all children have been processed.
# Get the first and last tokens that belong to children. Note how this doesn't assume that we
# iterate through children in order that corresponds to occurrence in source code. This
# assumption can fail (e.g. with return annotations).
first = token
last = None
for child in cast(Callable, self._iter_children)(node):
# astroid slices have especially wrong positions, we don't want them to corrupt their parents.
if util.is_empty_astroid_slice(child):
continue
if not first or child.first_token.index < first.index:
first = child.first_token
if not last or child.last_token.index > last.index:
last = child.last_token
# If we don't have a first token from _visit_before_children, and there were no children, then
# use the parent's token as the first token.
first = first or parent_token
# If no children, set last token to the first one.
last = last or first
# Statements continue to before NEWLINE. This helps cover a few different cases at once.
if util.is_stmt(node):
last = self._find_last_in_stmt(cast(util.Token, last))
# Capture any unmatched brackets.
first, last = self._expand_to_matching_pairs(cast(util.Token, first), cast(util.Token, last), node)
# Give a chance to node-specific methods to adjust.
nfirst, nlast = self._methods.get(self, node.__class__)(node, first, last)
if (nfirst, nlast) != (first, last):
# If anything changed, expand again to capture any unmatched brackets.
nfirst, nlast = self._expand_to_matching_pairs(nfirst, nlast, node)
node.first_token = nfirst
node.last_token = nlast
def _find_last_in_stmt(self, start_token):
# type: (util.Token) -> util.Token
t = start_token
while (not util.match_token(t, token.NEWLINE) and
not util.match_token(t, token.OP, ';') and
not token.ISEOF(t.type)):
t = self._code.next_token(t, include_extra=True)
return self._code.prev_token(t)
def _expand_to_matching_pairs(self, first_token, last_token, node):
# type: (util.Token, util.Token, AstNode) -> Tuple[util.Token, util.Token]
"""
Scan tokens in [first_token, last_token] range that are between node's children, and for any
unmatched brackets, adjust first/last tokens to include the closing pair.
"""
# We look for opening parens/braces among non-child tokens (i.e. tokens between our actual
# child nodes). If we find any closing ones, we match them to the opens.
to_match_right = [] # type: List[Tuple[int, str]]
to_match_left = []
for tok in self._code.token_range(first_token, last_token):
tok_info = tok[:2]
if to_match_right and tok_info == to_match_right[-1]:
to_match_right.pop()
elif tok_info in _matching_pairs_left:
to_match_right.append(_matching_pairs_left[tok_info])
elif tok_info in _matching_pairs_right:
to_match_left.append(_matching_pairs_right[tok_info])
# Once done, extend `last_token` to match any unclosed parens/braces.
for match in reversed(to_match_right):
last = self._code.next_token(last_token)
# Allow for trailing commas or colons (allowed in subscripts) before the closing delimiter
while any(util.match_token(last, token.OP, x) for x in (',', ':')):
last = self._code.next_token(last)
# Now check for the actual closing delimiter.
if util.match_token(last, *match):
last_token = last
# And extend `first_token` to match any unclosed opening parens/braces.
for match in to_match_left:
first = self._code.prev_token(first_token)
if util.match_token(first, *match):
first_token = first
return (first_token, last_token)
#----------------------------------------------------------------------
# Node visitors. Each takes a preliminary first and last tokens, and returns the adjusted pair
# that will actually be assigned.
def visit_default(self, node, first_token, last_token):
# type: (AstNode, util.Token, util.Token) -> Tuple[util.Token, util.Token]
# pylint: disable=no-self-use
# By default, we don't need to adjust the token we computed earlier.
return (first_token, last_token)
def handle_comp(self, open_brace, node, first_token, last_token):
# type: (str, AstNode, util.Token, util.Token) -> Tuple[util.Token, util.Token]
# For list/set/dict comprehensions, we only get the token of the first child, so adjust it to
# include the opening brace (the closing brace will be matched automatically).
before = self._code.prev_token(first_token)
util.expect_token(before, token.OP, open_brace)
return (before, last_token)
def visit_comprehension(self,
node, # type: AstNode
first_token, # type: util.Token
last_token, # type: util.Token
):
# type: (...) -> Tuple[util.Token, util.Token]
# The 'comprehension' node starts with 'for' but we only get first child; we search backwards
# to find the 'for' keyword.
first = self._code.find_token(first_token, token.NAME, 'for', reverse=True)
return (first, last_token)
def visit_if(self, node, first_token, last_token):
# type: (util.Token, util.Token, util.Token) -> Tuple[util.Token, util.Token]
while first_token.string not in ('if', 'elif'):
first_token = self._code.prev_token(first_token)
return first_token, last_token
def handle_attr(self, node, first_token, last_token):
# type: (AstNode, util.Token, util.Token) -> Tuple[util.Token, util.Token]
# Attribute node has ".attr" (2 tokens) after the last child.
dot = self._code.find_token(last_token, token.OP, '.')
name = self._code.next_token(dot)
util.expect_token(name, token.NAME)
return (first_token, name)
visit_attribute = handle_attr
visit_assignattr = handle_attr
visit_delattr = handle_attr
def handle_def(self, node, first_token, last_token):
# type: (AstNode, util.Token, util.Token) -> Tuple[util.Token, util.Token]
# With astroid, nodes that start with a doc-string can have an empty body, in which case we
# need to adjust the last token to include the doc string.
if not node.body and (getattr(node, 'doc_node', None) or getattr(node, 'doc', None)): # type: ignore[union-attr]
last_token = self._code.find_token(last_token, token.STRING)
# Include @ from decorator
if first_token.index > 0:
prev = self._code.prev_token(first_token)
if util.match_token(prev, token.OP, '@'):
first_token = prev
return (first_token, last_token)
visit_classdef = handle_def
visit_functiondef = handle_def
def handle_following_brackets(self, node, last_token, opening_bracket):
# type: (AstNode, util.Token, str) -> util.Token
# This is for calls and subscripts, which have a pair of brackets
# at the end which may contain no nodes, e.g. foo() or bar[:].
# We look for the opening bracket and then let the matching pair be found automatically
# Remember that last_token is at the end of all children,
# so we are not worried about encountering a bracket that belongs to a child.
first_child = next(cast(Callable, self._iter_children)(node))
call_start = self._code.find_token(first_child.last_token, token.OP, opening_bracket)
if call_start.index > last_token.index:
last_token = call_start
return last_token
def visit_call(self, node, first_token, last_token):
# type: (util.Token, util.Token, util.Token) -> Tuple[util.Token, util.Token]
last_token = self.handle_following_brackets(node, last_token, '(')
# Handling a python bug with decorators with empty parens, e.g.
# @deco()
# def ...
if util.match_token(first_token, token.OP, '@'):
first_token = self._code.next_token(first_token)
return (first_token, last_token)
def visit_matchclass(self, node, first_token, last_token):
# type: (util.Token, util.Token, util.Token) -> Tuple[util.Token, util.Token]
last_token = self.handle_following_brackets(node, last_token, '(')
return (first_token, last_token)
def visit_subscript(self,
node, # type: AstNode
first_token, # type: util.Token
last_token, # type: util.Token
):
# type: (...) -> Tuple[util.Token, util.Token]
last_token = self.handle_following_brackets(node, last_token, '[')
return (first_token, last_token)
def visit_slice(self, node, first_token, last_token):
# type: (AstNode, util.Token, util.Token) -> Tuple[util.Token, util.Token]
# consume `:` tokens to the left and right. In Python 3.9, Slice nodes are
# given a col_offset, (and end_col_offset), so this will always start inside
# the slice, even if it is the empty slice. However, in 3.8 and below, this
# will only expand to the full slice if the slice contains a node with a
# col_offset. So x[:] will only get the correct tokens in 3.9, but x[1:] and
# x[:1] will even on earlier versions of Python.
while True:
prev = self._code.prev_token(first_token)
if prev.string != ':':
break
first_token = prev
while True:
next_ = self._code.next_token(last_token)
if next_.string != ':':
break
last_token = next_
return (first_token, last_token)
def handle_bare_tuple(self, node, first_token, last_token):
# type: (AstNode, util.Token, util.Token) -> Tuple[util.Token, util.Token]
# A bare tuple doesn't include parens; if there is a trailing comma, make it part of the tuple.
maybe_comma = self._code.next_token(last_token)
if util.match_token(maybe_comma, token.OP, ','):
last_token = maybe_comma
return (first_token, last_token)
# In Python3.8 parsed tuples include parentheses when present.
def handle_tuple_nonempty(self, node, first_token, last_token):
# type: (AstNode, util.Token, util.Token) -> Tuple[util.Token, util.Token]
assert isinstance(node, ast.Tuple) or isinstance(node, AstroidBaseContainer)
# It's a bare tuple if the first token belongs to the first child. The first child may
# include extraneous parentheses (which don't create new nodes), so account for those too.
child = node.elts[0]
if TYPE_CHECKING:
child = cast(AstNode, child)
child_first, child_last = self._gobble_parens(child.first_token, child.last_token, True)
if first_token == child_first:
return self.handle_bare_tuple(node, first_token, last_token)
return (first_token, last_token)
def visit_tuple(self, node, first_token, last_token):
# type: (AstNode, util.Token, util.Token) -> Tuple[util.Token, util.Token]
assert isinstance(node, ast.Tuple) or isinstance(node, AstroidBaseContainer)
if not node.elts:
# An empty tuple is just "()", and we need no further info.
return (first_token, last_token)
return self.handle_tuple_nonempty(node, first_token, last_token)
def _gobble_parens(self, first_token, last_token, include_all=False):
# type: (util.Token, util.Token, bool) -> Tuple[util.Token, util.Token]
# Expands a range of tokens to include one or all pairs of surrounding parentheses, and
# returns (first, last) tokens that include these parens.
while first_token.index > 0:
prev = self._code.prev_token(first_token)
next = self._code.next_token(last_token)
if util.match_token(prev, token.OP, '(') and util.match_token(next, token.OP, ')'):
first_token, last_token = prev, next
if include_all:
continue
break
return (first_token, last_token)
def visit_str(self, node, first_token, last_token):
# type: (AstNode, util.Token, util.Token) -> Tuple[util.Token, util.Token]
return self.handle_str(first_token, last_token)
def visit_joinedstr(self,
node, # type: AstNode
first_token, # type: util.Token
last_token, # type: util.Token
):
# type: (...) -> Tuple[util.Token, util.Token]
if sys.version_info < (3, 12):
# Older versions don't tokenize the contents of f-strings
return self.handle_str(first_token, last_token)
last = first_token
while True:
if util.match_token(last, getattr(token, "FSTRING_START")):
# Python 3.12+ has tokens for the start (e.g. `f"`) and end (`"`)
# of the f-string. We can't just look for the next FSTRING_END
# because f-strings can be nested, e.g. f"{f'{x}'}", so we need
# to treat this like matching balanced parentheses.
count = 1
while count > 0:
last = self._code.next_token(last)
# mypy complains about token.FSTRING_START and token.FSTRING_END.
if util.match_token(last, getattr(token, "FSTRING_START")):
count += 1
elif util.match_token(last, getattr(token, "FSTRING_END")):
count -= 1
last_token = last
last = self._code.next_token(last_token)
elif util.match_token(last, token.STRING):
# Similar to handle_str, we also need to handle adjacent strings.
last_token = last
last = self._code.next_token(last_token)
else:
break
return (first_token, last_token)
def visit_bytes(self, node, first_token, last_token):
# type: (AstNode, util.Token, util.Token) -> Tuple[util.Token, util.Token]
return self.handle_str(first_token, last_token)
def handle_str(self, first_token, last_token):
# type: (util.Token, util.Token) -> Tuple[util.Token, util.Token]
# Multiple adjacent STRING tokens form a single string.
last = self._code.next_token(last_token)
while util.match_token(last, token.STRING):
last_token = last
last = self._code.next_token(last_token)
return (first_token, last_token)
def handle_num(self,
node, # type: AstNode
value, # type: Union[complex, int, numbers.Number]
first_token, # type: util.Token
last_token, # type: util.Token
):
# type: (...) -> Tuple[util.Token, util.Token]
# A constant like '-1' gets turned into two tokens; this will skip the '-'.
while util.match_token(last_token, token.OP):
last_token = self._code.next_token(last_token)
if isinstance(value, complex):
# A complex number like -2j cannot be compared directly to 0
# A complex number like 1-2j is expressed as a binary operation
# so we don't need to worry about it
value = value.imag
# This makes sure that the - is included
if value < 0 and first_token.type == token.NUMBER: # type: ignore[operator]
first_token = self._code.prev_token(first_token)
return (first_token, last_token)
def visit_num(self, node, first_token, last_token):
# type: (AstNode, util.Token, util.Token) -> Tuple[util.Token, util.Token]
return self.handle_num(node, cast(ast.Num, node).n, first_token, last_token)
def visit_const(self, node, first_token, last_token):
# type: (AstNode, util.Token, util.Token) -> Tuple[util.Token, util.Token]
assert isinstance(node, ast.Constant) or isinstance(node, nc.Const)
if isinstance(node.value, numbers.Number):
return self.handle_num(node, node.value, first_token, last_token)
elif isinstance(node.value, (str, bytes)):
return self.visit_str(node, first_token, last_token)
return (first_token, last_token)
visit_constant = visit_const
def visit_keyword(self, node, first_token, last_token):
# type: (AstNode, util.Token, util.Token) -> Tuple[util.Token, util.Token]
# Until python 3.9 (https://bugs.python.org/issue40141),
# ast.keyword nodes didn't have line info. Astroid has lineno None.
assert isinstance(node, ast.keyword) or isinstance(node, nc.Keyword)
if node.arg is not None and getattr(node, 'lineno', None) is None:
equals = self._code.find_token(first_token, token.OP, '=', reverse=True)
name = self._code.prev_token(equals)
util.expect_token(name, token.NAME, node.arg)
first_token = name
return (first_token, last_token)
def visit_starred(self, node, first_token, last_token):
# type: (AstNode, util.Token, util.Token) -> Tuple[util.Token, util.Token]
# Astroid has 'Starred' nodes (for "foo(*bar)" type args), but they need to be adjusted.
if not util.match_token(first_token, token.OP, '*'):
star = self._code.prev_token(first_token)
if util.match_token(star, token.OP, '*'):
first_token = star
return (first_token, last_token)
def visit_assignname(self, node, first_token, last_token):
# type: (AstNode, util.Token, util.Token) -> Tuple[util.Token, util.Token]
# Astroid may turn 'except' clause into AssignName, but we need to adjust it.
if util.match_token(first_token, token.NAME, 'except'):
colon = self._code.find_token(last_token, token.OP, ':')
first_token = last_token = self._code.prev_token(colon)
return (first_token, last_token)
# Async nodes should typically start with the word 'async'
# but Python < 3.7 doesn't put the col_offset there
# AsyncFunctionDef is slightly different because it might have
# decorators before that, which visit_functiondef handles
def handle_async(self, node, first_token, last_token):
# type: (AstNode, util.Token, util.Token) -> Tuple[util.Token, util.Token]
if not first_token.string == 'async':
first_token = self._code.prev_token(first_token)
return (first_token, last_token)
visit_asyncfor = handle_async
visit_asyncwith = handle_async
def visit_asyncfunctiondef(self,
node, # type: AstNode
first_token, # type: util.Token
last_token, # type: util.Token
):
# type: (...) -> Tuple[util.Token, util.Token]
if util.match_token(first_token, token.NAME, 'def'):
# Include the 'async' token
first_token = self._code.prev_token(first_token)
return self.visit_functiondef(node, first_token, last_token)

View File

@@ -0,0 +1,485 @@
# Copyright 2016 Grist Labs, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import ast
import collections
import io
import sys
import token
import tokenize
from abc import ABCMeta
from ast import Module, expr, AST
from functools import lru_cache
from typing import (
Callable,
Dict,
Iterable,
Iterator,
List,
Optional,
Tuple,
Union,
cast,
Any,
TYPE_CHECKING,
Type,
)
if TYPE_CHECKING: # pragma: no cover
from .astroid_compat import NodeNG
# Type class used to expand out the definition of AST to include fields added by this library
# It's not actually used for anything other than type checking though!
class EnhancedAST(AST):
# Additional attributes set by mark_tokens
first_token = None # type: Token
last_token = None # type: Token
lineno = 0 # type: int
AstNode = Union[EnhancedAST, NodeNG]
TokenInfo = tokenize.TokenInfo
def token_repr(tok_type, string):
# type: (int, Optional[str]) -> str
"""Returns a human-friendly representation of a token with the given type and string."""
# repr() prefixes unicode with 'u' on Python2 but not Python3; strip it out for consistency.
return '%s:%s' % (token.tok_name[tok_type], repr(string).lstrip('u'))
class Token(collections.namedtuple('Token', 'type string start end line index startpos endpos')):
"""
TokenInfo is an 8-tuple containing the same 5 fields as the tokens produced by the tokenize
module, and 3 additional ones useful for this module:
- [0] .type Token type (see token.py)
- [1] .string Token (a string)
- [2] .start Starting (row, column) indices of the token (a 2-tuple of ints)
- [3] .end Ending (row, column) indices of the token (a 2-tuple of ints)
- [4] .line Original line (string)
- [5] .index Index of the token in the list of tokens that it belongs to.
- [6] .startpos Starting character offset into the input text.
- [7] .endpos Ending character offset into the input text.
"""
def __str__(self):
# type: () -> str
return token_repr(self.type, self.string)
def match_token(token, tok_type, tok_str=None):
# type: (Token, int, Optional[str]) -> bool
"""Returns true if token is of the given type and, if a string is given, has that string."""
return token.type == tok_type and (tok_str is None or token.string == tok_str)
def expect_token(token, tok_type, tok_str=None):
# type: (Token, int, Optional[str]) -> None
"""
Verifies that the given token is of the expected type. If tok_str is given, the token string
is verified too. If the token doesn't match, raises an informative ValueError.
"""
if not match_token(token, tok_type, tok_str):
raise ValueError("Expected token %s, got %s on line %s col %s" % (
token_repr(tok_type, tok_str), str(token),
token.start[0], token.start[1] + 1))
def is_non_coding_token(token_type):
# type: (int) -> bool
"""
These are considered non-coding tokens, as they don't affect the syntax tree.
"""
return token_type in (token.NL, token.COMMENT, token.ENCODING)
def generate_tokens(text):
# type: (str) -> Iterator[TokenInfo]
"""
Generates standard library tokens for the given code.
"""
# tokenize.generate_tokens is technically an undocumented API for Python3, but allows us to use the same API as for
# Python2. See http://stackoverflow.com/a/4952291/328565.
# FIXME: Remove cast once https://github.com/python/typeshed/issues/7003 gets fixed
return tokenize.generate_tokens(cast(Callable[[], str], io.StringIO(text).readline))
def iter_children_func(node):
# type: (AST) -> Callable
"""
Returns a function which yields all direct children of a AST node,
skipping children that are singleton nodes.
The function depends on whether ``node`` is from ``ast`` or from the ``astroid`` module.
"""
return iter_children_astroid if hasattr(node, 'get_children') else iter_children_ast
def iter_children_astroid(node, include_joined_str=False):
# type: (NodeNG, bool) -> Union[Iterator, List]
if not include_joined_str and is_joined_str(node):
return []
return node.get_children()
SINGLETONS = {c for n, c in ast.__dict__.items() if isinstance(c, type) and
issubclass(c, (ast.expr_context, ast.boolop, ast.operator, ast.unaryop, ast.cmpop))}
def iter_children_ast(node, include_joined_str=False):
# type: (AST, bool) -> Iterator[Union[AST, expr]]
if not include_joined_str and is_joined_str(node):
return
if isinstance(node, ast.Dict):
# override the iteration order: instead of <all keys>, <all values>,
# yield keys and values in source order (key1, value1, key2, value2, ...)
for (key, value) in zip(node.keys, node.values):
if key is not None:
yield key
yield value
return
for child in ast.iter_child_nodes(node):
# Skip singleton children; they don't reflect particular positions in the code and break the
# assumptions about the tree consisting of distinct nodes. Note that collecting classes
# beforehand and checking them in a set is faster than using isinstance each time.
if child.__class__ not in SINGLETONS:
yield child
stmt_class_names = {n for n, c in ast.__dict__.items()
if isinstance(c, type) and issubclass(c, ast.stmt)}
expr_class_names = ({n for n, c in ast.__dict__.items()
if isinstance(c, type) and issubclass(c, ast.expr)} |
{'AssignName', 'DelName', 'Const', 'AssignAttr', 'DelAttr'})
# These feel hacky compared to isinstance() but allow us to work with both ast and astroid nodes
# in the same way, and without even importing astroid.
def is_expr(node):
# type: (AstNode) -> bool
"""Returns whether node is an expression node."""
return node.__class__.__name__ in expr_class_names
def is_stmt(node):
# type: (AstNode) -> bool
"""Returns whether node is a statement node."""
return node.__class__.__name__ in stmt_class_names
def is_module(node):
# type: (AstNode) -> bool
"""Returns whether node is a module node."""
return node.__class__.__name__ == 'Module'
def is_joined_str(node):
# type: (AstNode) -> bool
"""Returns whether node is a JoinedStr node, used to represent f-strings."""
# At the moment, nodes below JoinedStr have wrong line/col info, and trying to process them only
# leads to errors.
return node.__class__.__name__ == 'JoinedStr'
def is_expr_stmt(node):
# type: (AstNode) -> bool
"""Returns whether node is an `Expr` node, which is a statement that is an expression."""
return node.__class__.__name__ == 'Expr'
CONSTANT_CLASSES: Tuple[Type, ...] = (ast.Constant,)
try:
from astroid import Const
CONSTANT_CLASSES += (Const,)
except ImportError: # pragma: no cover
# astroid is not available
pass
def is_constant(node):
# type: (AstNode) -> bool
"""Returns whether node is a Constant node."""
return isinstance(node, CONSTANT_CLASSES)
def is_ellipsis(node):
# type: (AstNode) -> bool
"""Returns whether node is an Ellipsis node."""
return is_constant(node) and node.value is Ellipsis # type: ignore
def is_starred(node):
# type: (AstNode) -> bool
"""Returns whether node is a starred expression node."""
return node.__class__.__name__ == 'Starred'
def is_slice(node):
# type: (AstNode) -> bool
"""Returns whether node represents a slice, e.g. `1:2` in `x[1:2]`"""
# Before 3.9, a tuple containing a slice is an ExtSlice,
# but this was removed in https://bugs.python.org/issue34822
return (
node.__class__.__name__ in ('Slice', 'ExtSlice')
or (
node.__class__.__name__ == 'Tuple'
and any(map(is_slice, cast(ast.Tuple, node).elts))
)
)
def is_empty_astroid_slice(node):
# type: (AstNode) -> bool
return (
node.__class__.__name__ == "Slice"
and not isinstance(node, ast.AST)
and node.lower is node.upper is node.step is None
)
# Sentinel value used by visit_tree().
_PREVISIT = object()
def visit_tree(node, previsit, postvisit):
# type: (Module, Callable[[AstNode, Optional[Token]], Tuple[Optional[Token], Optional[Token]]], Optional[Callable[[AstNode, Optional[Token], Optional[Token]], None]]) -> None
"""
Scans the tree under the node depth-first using an explicit stack. It avoids implicit recursion
via the function call stack to avoid hitting 'maximum recursion depth exceeded' error.
It calls ``previsit()`` and ``postvisit()`` as follows:
* ``previsit(node, par_value)`` - should return ``(par_value, value)``
``par_value`` is as returned from ``previsit()`` of the parent.
* ``postvisit(node, par_value, value)`` - should return ``value``
``par_value`` is as returned from ``previsit()`` of the parent, and ``value`` is as
returned from ``previsit()`` of this node itself. The return ``value`` is ignored except
the one for the root node, which is returned from the overall ``visit_tree()`` call.
For the initial node, ``par_value`` is None. ``postvisit`` may be None.
"""
if not postvisit:
postvisit = lambda node, pvalue, value: None
iter_children = iter_children_func(node)
done = set()
ret = None
stack = [(node, None, _PREVISIT)] # type: List[Tuple[AstNode, Optional[Token], Union[Optional[Token], object]]]
while stack:
current, par_value, value = stack.pop()
if value is _PREVISIT:
assert current not in done # protect againt infinite loop in case of a bad tree.
done.add(current)
pvalue, post_value = previsit(current, par_value)
stack.append((current, par_value, post_value))
# Insert all children in reverse order (so that first child ends up on top of the stack).
ins = len(stack)
for n in iter_children(current):
stack.insert(ins, (n, pvalue, _PREVISIT))
else:
ret = postvisit(current, par_value, cast(Optional[Token], value))
return ret
def walk(node, include_joined_str=False):
# type: (AST, bool) -> Iterator[Union[Module, AstNode]]
"""
Recursively yield all descendant nodes in the tree starting at ``node`` (including ``node``
itself), using depth-first pre-order traversal (yieling parents before their children).
This is similar to ``ast.walk()``, but with a different order, and it works for both ``ast`` and
``astroid`` trees. Also, as ``iter_children()``, it skips singleton nodes generated by ``ast``.
By default, ``JoinedStr`` (f-string) nodes and their contents are skipped
because they previously couldn't be handled. Set ``include_joined_str`` to True to include them.
"""
iter_children = iter_children_func(node)
done = set()
stack = [node]
while stack:
current = stack.pop()
assert current not in done # protect againt infinite loop in case of a bad tree.
done.add(current)
yield current
# Insert all children in reverse order (so that first child ends up on top of the stack).
# This is faster than building a list and reversing it.
ins = len(stack)
for c in iter_children(current, include_joined_str):
stack.insert(ins, c)
def replace(text, replacements):
# type: (str, List[Tuple[int, int, str]]) -> str
"""
Replaces multiple slices of text with new values. This is a convenience method for making code
modifications of ranges e.g. as identified by ``ASTTokens.get_text_range(node)``. Replacements is
an iterable of ``(start, end, new_text)`` tuples.
For example, ``replace("this is a test", [(0, 4, "X"), (8, 9, "THE")])`` produces
``"X is THE test"``.
"""
p = 0
parts = []
for (start, end, new_text) in sorted(replacements):
parts.append(text[p:start])
parts.append(new_text)
p = end
parts.append(text[p:])
return ''.join(parts)
class NodeMethods:
"""
Helper to get `visit_{node_type}` methods given a node's class and cache the results.
"""
def __init__(self):
# type: () -> None
self._cache = {} # type: Dict[Union[ABCMeta, type], Callable[[AstNode, Token, Token], Tuple[Token, Token]]]
def get(self, obj, cls):
# type: (Any, Union[ABCMeta, type]) -> Callable
"""
Using the lowercase name of the class as node_type, returns `obj.visit_{node_type}`,
or `obj.visit_default` if the type-specific method is not found.
"""
method = self._cache.get(cls)
if not method:
name = "visit_" + cls.__name__.lower()
method = getattr(obj, name, obj.visit_default)
self._cache[cls] = method
return method
def patched_generate_tokens(original_tokens):
# type: (Iterable[TokenInfo]) -> Iterator[TokenInfo]
"""
Fixes tokens yielded by `tokenize.generate_tokens` to handle more non-ASCII characters in identifiers.
Workaround for https://github.com/python/cpython/issues/68382.
Should only be used when tokenizing a string that is known to be valid syntax,
because it assumes that error tokens are not actually errors.
Combines groups of consecutive NAME, NUMBER, and/or ERRORTOKEN tokens into a single NAME token.
"""
group = [] # type: List[tokenize.TokenInfo]
for tok in original_tokens:
if (
tok.type in (tokenize.NAME, tokenize.ERRORTOKEN, tokenize.NUMBER)
# Only combine tokens if they have no whitespace in between
and (not group or group[-1].end == tok.start)
):
group.append(tok)
else:
for combined_token in combine_tokens(group):
yield combined_token
group = []
yield tok
for combined_token in combine_tokens(group):
yield combined_token
def combine_tokens(group):
# type: (List[tokenize.TokenInfo]) -> List[tokenize.TokenInfo]
if not any(tok.type == tokenize.ERRORTOKEN for tok in group) or len({tok.line for tok in group}) != 1:
return group
return [
tokenize.TokenInfo(
type=tokenize.NAME,
string="".join(t.string for t in group),
start=group[0].start,
end=group[-1].end,
line=group[0].line,
)
]
def last_stmt(node):
# type: (ast.AST) -> ast.AST
"""
If the given AST node contains multiple statements, return the last one.
Otherwise, just return the node.
"""
child_stmts = [
child for child in iter_children_func(node)(node)
if is_stmt(child) or type(child).__name__ in (
"excepthandler",
"ExceptHandler",
"match_case",
"MatchCase",
"TryExcept",
"TryFinally",
)
]
if child_stmts:
return last_stmt(child_stmts[-1])
return node
@lru_cache(maxsize=None)
def fstring_positions_work():
# type: () -> bool
"""
The positions attached to nodes inside f-string FormattedValues have some bugs
that were fixed in Python 3.9.7 in https://github.com/python/cpython/pull/27729.
This checks for those bugs more concretely without relying on the Python version.
Specifically this checks:
- Values with a format spec or conversion
- Repeated (i.e. identical-looking) expressions
- f-strings implicitly concatenated over multiple lines.
- Multiline, triple-quoted f-strings.
"""
source = """(
f"a {b}{b} c {d!r} e {f:g} h {i:{j}} k {l:{m:n}}"
f"a {b}{b} c {d!r} e {f:g} h {i:{j}} k {l:{m:n}}"
f"{x + y + z} {x} {y} {z} {z} {z!a} {z:z}"
f'''
{s} {t}
{u} {v}
'''
)"""
tree = ast.parse(source)
name_nodes = [node for node in ast.walk(tree) if isinstance(node, ast.Name)]
name_positions = [(node.lineno, node.col_offset) for node in name_nodes]
positions_are_unique = len(set(name_positions)) == len(name_positions)
correct_source_segments = all(
ast.get_source_segment(source, node) == node.id
for node in name_nodes
)
return positions_are_unique and correct_source_segments
def annotate_fstring_nodes(tree):
# type: (ast.AST) -> None
"""
Add a special attribute `_broken_positions` to nodes inside f-strings
if the lineno/col_offset cannot be trusted.
"""
if sys.version_info >= (3, 12):
# f-strings were weirdly implemented until https://peps.python.org/pep-0701/
# In Python 3.12, inner nodes have sensible positions.
return
for joinedstr in walk(tree, include_joined_str=True):
if not isinstance(joinedstr, ast.JoinedStr):
continue
for part in joinedstr.values:
# The ast positions of the FormattedValues/Constant nodes span the full f-string, which is weird.
setattr(part, '_broken_positions', True) # use setattr for mypy
if isinstance(part, ast.FormattedValue):
if not fstring_positions_work():
for child in walk(part.value):
setattr(child, '_broken_positions', True)
if part.format_spec: # this is another JoinedStr
# Again, the standard positions span the full f-string.
setattr(part.format_spec, '_broken_positions', True)

View File

@@ -0,0 +1 @@
__version__ = "3.0.0"