fixed subscription table
This commit is contained in:
24
.venv/lib/python3.12/site-packages/asttokens/__init__.py
Normal file
24
.venv/lib/python3.12/site-packages/asttokens/__init__.py
Normal file
@@ -0,0 +1,24 @@
|
||||
# Copyright 2016 Grist Labs, Inc.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
"""
|
||||
This module enhances the Python AST tree with token and source code information, sufficent to
|
||||
detect the source text of each AST node. This is helpful for tools that make source code
|
||||
transformations.
|
||||
"""
|
||||
|
||||
from .line_numbers import LineNumbers
|
||||
from .asttokens import ASTText, ASTTokens, supports_tokenless
|
||||
|
||||
__all__ = ['ASTText', 'ASTTokens', 'LineNumbers', 'supports_tokenless']
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
@@ -0,0 +1,18 @@
|
||||
try:
|
||||
from astroid import nodes as astroid_node_classes
|
||||
|
||||
# astroid_node_classes should be whichever module has the NodeNG class
|
||||
from astroid.nodes import NodeNG
|
||||
from astroid.nodes import BaseContainer
|
||||
except Exception:
|
||||
try:
|
||||
from astroid import node_classes as astroid_node_classes
|
||||
from astroid.node_classes import NodeNG
|
||||
from astroid.node_classes import _BaseContainer as BaseContainer
|
||||
except Exception: # pragma: no cover
|
||||
astroid_node_classes = None
|
||||
NodeNG = None
|
||||
BaseContainer = None
|
||||
|
||||
|
||||
__all__ = ["astroid_node_classes", "NodeNG", "BaseContainer"]
|
450
.venv/lib/python3.12/site-packages/asttokens/asttokens.py
Normal file
450
.venv/lib/python3.12/site-packages/asttokens/asttokens.py
Normal file
@@ -0,0 +1,450 @@
|
||||
# Copyright 2016 Grist Labs, Inc.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
import abc
|
||||
import ast
|
||||
import bisect
|
||||
import sys
|
||||
import token
|
||||
from ast import Module
|
||||
from typing import Iterable, Iterator, List, Optional, Tuple, Any, cast, TYPE_CHECKING
|
||||
|
||||
from .line_numbers import LineNumbers
|
||||
from .util import (
|
||||
Token, match_token, is_non_coding_token, patched_generate_tokens, last_stmt,
|
||||
annotate_fstring_nodes, generate_tokens, is_module, is_stmt
|
||||
)
|
||||
|
||||
if TYPE_CHECKING: # pragma: no cover
|
||||
from .util import AstNode, TokenInfo
|
||||
|
||||
|
||||
class ASTTextBase(metaclass=abc.ABCMeta):
|
||||
def __init__(self, source_text: str, filename: str) -> None:
|
||||
self._filename = filename
|
||||
|
||||
# Decode source after parsing to let Python 2 handle coding declarations.
|
||||
# (If the encoding was not utf-8 compatible, then even if it parses correctly,
|
||||
# we'll fail with a unicode error here.)
|
||||
source_text = str(source_text)
|
||||
|
||||
self._text = source_text
|
||||
self._line_numbers = LineNumbers(source_text)
|
||||
|
||||
@abc.abstractmethod
|
||||
def get_text_positions(self, node, padded):
|
||||
# type: (AstNode, bool) -> Tuple[Tuple[int, int], Tuple[int, int]]
|
||||
"""
|
||||
Returns two ``(lineno, col_offset)`` tuples for the start and end of the given node.
|
||||
If the positions can't be determined, or the nodes don't correspond to any particular text,
|
||||
returns ``(1, 0)`` for both.
|
||||
|
||||
``padded`` corresponds to the ``padded`` argument to ``ast.get_source_segment()``.
|
||||
This means that if ``padded`` is True, the start position will be adjusted to include
|
||||
leading whitespace if ``node`` is a multiline statement.
|
||||
"""
|
||||
raise NotImplementedError # pragma: no cover
|
||||
|
||||
def get_text_range(self, node, padded=True):
|
||||
# type: (AstNode, bool) -> Tuple[int, int]
|
||||
"""
|
||||
Returns the (startpos, endpos) positions in source text corresponding to the given node.
|
||||
Returns (0, 0) for nodes (like `Load`) that don't correspond to any particular text.
|
||||
|
||||
See ``get_text_positions()`` for details on the ``padded`` argument.
|
||||
"""
|
||||
start, end = self.get_text_positions(node, padded)
|
||||
return (
|
||||
self._line_numbers.line_to_offset(*start),
|
||||
self._line_numbers.line_to_offset(*end),
|
||||
)
|
||||
|
||||
def get_text(self, node, padded=True):
|
||||
# type: (AstNode, bool) -> str
|
||||
"""
|
||||
Returns the text corresponding to the given node.
|
||||
Returns '' for nodes (like `Load`) that don't correspond to any particular text.
|
||||
|
||||
See ``get_text_positions()`` for details on the ``padded`` argument.
|
||||
"""
|
||||
start, end = self.get_text_range(node, padded)
|
||||
return self._text[start: end]
|
||||
|
||||
|
||||
class ASTTokens(ASTTextBase):
|
||||
"""
|
||||
ASTTokens maintains the text of Python code in several forms: as a string, as line numbers, and
|
||||
as tokens, and is used to mark and access token and position information.
|
||||
|
||||
``source_text`` must be a unicode or UTF8-encoded string. If you pass in UTF8 bytes, remember
|
||||
that all offsets you'll get are to the unicode text, which is available as the ``.text``
|
||||
property.
|
||||
|
||||
If ``parse`` is set, the ``source_text`` will be parsed with ``ast.parse()``, and the resulting
|
||||
tree marked with token info and made available as the ``.tree`` property.
|
||||
|
||||
If ``tree`` is given, it will be marked and made available as the ``.tree`` property. In
|
||||
addition to the trees produced by the ``ast`` module, ASTTokens will also mark trees produced
|
||||
using ``astroid`` library <https://www.astroid.org>.
|
||||
|
||||
If only ``source_text`` is given, you may use ``.mark_tokens(tree)`` to mark the nodes of an AST
|
||||
tree created separately.
|
||||
"""
|
||||
|
||||
def __init__(self, source_text, parse=False, tree=None, filename='<unknown>', tokens=None):
|
||||
# type: (Any, bool, Optional[Module], str, Iterable[TokenInfo]) -> None
|
||||
super(ASTTokens, self).__init__(source_text, filename)
|
||||
|
||||
self._tree = ast.parse(source_text, filename) if parse else tree
|
||||
|
||||
# Tokenize the code.
|
||||
if tokens is None:
|
||||
tokens = generate_tokens(self._text)
|
||||
self._tokens = list(self._translate_tokens(tokens))
|
||||
|
||||
# Extract the start positions of all tokens, so that we can quickly map positions to tokens.
|
||||
self._token_offsets = [tok.startpos for tok in self._tokens]
|
||||
|
||||
if self._tree:
|
||||
self.mark_tokens(self._tree)
|
||||
|
||||
def mark_tokens(self, root_node):
|
||||
# type: (Module) -> None
|
||||
"""
|
||||
Given the root of the AST or Astroid tree produced from source_text, visits all nodes marking
|
||||
them with token and position information by adding ``.first_token`` and
|
||||
``.last_token`` attributes. This is done automatically in the constructor when ``parse`` or
|
||||
``tree`` arguments are set, but may be used manually with a separate AST or Astroid tree.
|
||||
"""
|
||||
# The hard work of this class is done by MarkTokens
|
||||
from .mark_tokens import MarkTokens # to avoid import loops
|
||||
MarkTokens(self).visit_tree(root_node)
|
||||
|
||||
def _translate_tokens(self, original_tokens):
|
||||
# type: (Iterable[TokenInfo]) -> Iterator[Token]
|
||||
"""
|
||||
Translates the given standard library tokens into our own representation.
|
||||
"""
|
||||
for index, tok in enumerate(patched_generate_tokens(original_tokens)):
|
||||
tok_type, tok_str, start, end, line = tok
|
||||
yield Token(tok_type, tok_str, start, end, line, index,
|
||||
self._line_numbers.line_to_offset(start[0], start[1]),
|
||||
self._line_numbers.line_to_offset(end[0], end[1]))
|
||||
|
||||
@property
|
||||
def text(self):
|
||||
# type: () -> str
|
||||
"""The source code passed into the constructor."""
|
||||
return self._text
|
||||
|
||||
@property
|
||||
def tokens(self):
|
||||
# type: () -> List[Token]
|
||||
"""The list of tokens corresponding to the source code from the constructor."""
|
||||
return self._tokens
|
||||
|
||||
@property
|
||||
def tree(self):
|
||||
# type: () -> Optional[Module]
|
||||
"""The root of the AST tree passed into the constructor or parsed from the source code."""
|
||||
return self._tree
|
||||
|
||||
@property
|
||||
def filename(self):
|
||||
# type: () -> str
|
||||
"""The filename that was parsed"""
|
||||
return self._filename
|
||||
|
||||
def get_token_from_offset(self, offset):
|
||||
# type: (int) -> Token
|
||||
"""
|
||||
Returns the token containing the given character offset (0-based position in source text),
|
||||
or the preceeding token if the position is between tokens.
|
||||
"""
|
||||
return self._tokens[bisect.bisect(self._token_offsets, offset) - 1]
|
||||
|
||||
def get_token(self, lineno, col_offset):
|
||||
# type: (int, int) -> Token
|
||||
"""
|
||||
Returns the token containing the given (lineno, col_offset) position, or the preceeding token
|
||||
if the position is between tokens.
|
||||
"""
|
||||
# TODO: add test for multibyte unicode. We need to translate offsets from ast module (which
|
||||
# are in utf8) to offsets into the unicode text. tokenize module seems to use unicode offsets
|
||||
# but isn't explicit.
|
||||
return self.get_token_from_offset(self._line_numbers.line_to_offset(lineno, col_offset))
|
||||
|
||||
def get_token_from_utf8(self, lineno, col_offset):
|
||||
# type: (int, int) -> Token
|
||||
"""
|
||||
Same as get_token(), but interprets col_offset as a UTF8 offset, which is what `ast` uses.
|
||||
"""
|
||||
return self.get_token(lineno, self._line_numbers.from_utf8_col(lineno, col_offset))
|
||||
|
||||
def next_token(self, tok, include_extra=False):
|
||||
# type: (Token, bool) -> Token
|
||||
"""
|
||||
Returns the next token after the given one. If include_extra is True, includes non-coding
|
||||
tokens from the tokenize module, such as NL and COMMENT.
|
||||
"""
|
||||
i = tok.index + 1
|
||||
if not include_extra:
|
||||
while is_non_coding_token(self._tokens[i].type):
|
||||
i += 1
|
||||
return self._tokens[i]
|
||||
|
||||
def prev_token(self, tok, include_extra=False):
|
||||
# type: (Token, bool) -> Token
|
||||
"""
|
||||
Returns the previous token before the given one. If include_extra is True, includes non-coding
|
||||
tokens from the tokenize module, such as NL and COMMENT.
|
||||
"""
|
||||
i = tok.index - 1
|
||||
if not include_extra:
|
||||
while is_non_coding_token(self._tokens[i].type):
|
||||
i -= 1
|
||||
return self._tokens[i]
|
||||
|
||||
def find_token(self, start_token, tok_type, tok_str=None, reverse=False):
|
||||
# type: (Token, int, Optional[str], bool) -> Token
|
||||
"""
|
||||
Looks for the first token, starting at start_token, that matches tok_type and, if given, the
|
||||
token string. Searches backwards if reverse is True. Returns ENDMARKER token if not found (you
|
||||
can check it with `token.ISEOF(t.type)`).
|
||||
"""
|
||||
t = start_token
|
||||
advance = self.prev_token if reverse else self.next_token
|
||||
while not match_token(t, tok_type, tok_str) and not token.ISEOF(t.type):
|
||||
t = advance(t, include_extra=True)
|
||||
return t
|
||||
|
||||
def token_range(self,
|
||||
first_token, # type: Token
|
||||
last_token, # type: Token
|
||||
include_extra=False, # type: bool
|
||||
):
|
||||
# type: (...) -> Iterator[Token]
|
||||
"""
|
||||
Yields all tokens in order from first_token through and including last_token. If
|
||||
include_extra is True, includes non-coding tokens such as tokenize.NL and .COMMENT.
|
||||
"""
|
||||
for i in range(first_token.index, last_token.index + 1):
|
||||
if include_extra or not is_non_coding_token(self._tokens[i].type):
|
||||
yield self._tokens[i]
|
||||
|
||||
def get_tokens(self, node, include_extra=False):
|
||||
# type: (AstNode, bool) -> Iterator[Token]
|
||||
"""
|
||||
Yields all tokens making up the given node. If include_extra is True, includes non-coding
|
||||
tokens such as tokenize.NL and .COMMENT.
|
||||
"""
|
||||
return self.token_range(node.first_token, node.last_token, include_extra=include_extra)
|
||||
|
||||
def get_text_positions(self, node, padded):
|
||||
# type: (AstNode, bool) -> Tuple[Tuple[int, int], Tuple[int, int]]
|
||||
"""
|
||||
Returns two ``(lineno, col_offset)`` tuples for the start and end of the given node.
|
||||
If the positions can't be determined, or the nodes don't correspond to any particular text,
|
||||
returns ``(1, 0)`` for both.
|
||||
|
||||
``padded`` corresponds to the ``padded`` argument to ``ast.get_source_segment()``.
|
||||
This means that if ``padded`` is True, the start position will be adjusted to include
|
||||
leading whitespace if ``node`` is a multiline statement.
|
||||
"""
|
||||
if not hasattr(node, 'first_token'):
|
||||
return (1, 0), (1, 0)
|
||||
|
||||
start = node.first_token.start
|
||||
end = node.last_token.end
|
||||
if padded and any(match_token(t, token.NEWLINE) for t in self.get_tokens(node)):
|
||||
# Set col_offset to 0 to include leading indentation for multiline statements.
|
||||
start = (start[0], 0)
|
||||
|
||||
return start, end
|
||||
|
||||
|
||||
class ASTText(ASTTextBase):
|
||||
"""
|
||||
Supports the same ``get_text*`` methods as ``ASTTokens``,
|
||||
but uses the AST to determine the text positions instead of tokens.
|
||||
This is faster than ``ASTTokens`` as it requires less setup work.
|
||||
|
||||
It also (sometimes) supports nodes inside f-strings, which ``ASTTokens`` doesn't.
|
||||
|
||||
Some node types and/or Python versions are not supported.
|
||||
In these cases the ``get_text*`` methods will fall back to using ``ASTTokens``
|
||||
which incurs the usual setup cost the first time.
|
||||
If you want to avoid this, check ``supports_tokenless(node)`` before calling ``get_text*`` methods.
|
||||
"""
|
||||
def __init__(self, source_text, tree=None, filename='<unknown>'):
|
||||
# type: (Any, Optional[Module], str) -> None
|
||||
super(ASTText, self).__init__(source_text, filename)
|
||||
|
||||
self._tree = tree
|
||||
if self._tree is not None:
|
||||
annotate_fstring_nodes(self._tree)
|
||||
|
||||
self._asttokens = None # type: Optional[ASTTokens]
|
||||
|
||||
@property
|
||||
def tree(self):
|
||||
# type: () -> Module
|
||||
if self._tree is None:
|
||||
self._tree = ast.parse(self._text, self._filename)
|
||||
annotate_fstring_nodes(self._tree)
|
||||
return self._tree
|
||||
|
||||
@property
|
||||
def asttokens(self):
|
||||
# type: () -> ASTTokens
|
||||
if self._asttokens is None:
|
||||
self._asttokens = ASTTokens(
|
||||
self._text,
|
||||
tree=self.tree,
|
||||
filename=self._filename,
|
||||
)
|
||||
return self._asttokens
|
||||
|
||||
def _get_text_positions_tokenless(self, node, padded):
|
||||
# type: (AstNode, bool) -> Tuple[Tuple[int, int], Tuple[int, int]]
|
||||
"""
|
||||
Version of ``get_text_positions()`` that doesn't use tokens.
|
||||
"""
|
||||
if is_module(node):
|
||||
# Modules don't have position info, so just return the range of the whole text.
|
||||
# The token-using method does something different, but its behavior seems weird and inconsistent.
|
||||
# For example, in a file with only comments, it only returns the first line.
|
||||
# It's hard to imagine a case when this matters.
|
||||
return (1, 0), self._line_numbers.offset_to_line(len(self._text))
|
||||
|
||||
if getattr(node, 'lineno', None) is None:
|
||||
return (1, 0), (1, 0)
|
||||
|
||||
assert node # tell mypy that node is not None, which we allowed up to here for compatibility
|
||||
|
||||
decorators = getattr(node, 'decorator_list', [])
|
||||
if not decorators:
|
||||
# Astroid uses node.decorators.nodes instead of node.decorator_list.
|
||||
decorators_node = getattr(node, 'decorators', None)
|
||||
decorators = getattr(decorators_node, 'nodes', [])
|
||||
if decorators:
|
||||
# Function/Class definition nodes are marked by AST as starting at def/class,
|
||||
# not the first decorator. This doesn't match the token-using behavior,
|
||||
# or inspect.getsource(), and just seems weird.
|
||||
start_node = decorators[0]
|
||||
else:
|
||||
start_node = node
|
||||
|
||||
start_lineno = start_node.lineno
|
||||
end_node = last_stmt(node)
|
||||
|
||||
# Include leading indentation for multiline statements.
|
||||
# This doesn't mean simple statements that happen to be on multiple lines,
|
||||
# but compound statements where inner indentation matters.
|
||||
# So we don't just compare node.lineno and node.end_lineno,
|
||||
# we check for a contained statement starting on a different line.
|
||||
if padded and (
|
||||
start_lineno != end_node.lineno
|
||||
or (
|
||||
# Astroid docstrings aren't treated as separate statements.
|
||||
# So to handle function/class definitions with a docstring but no other body,
|
||||
# we just check that the node is a statement with a docstring
|
||||
# and spanning multiple lines in the simple, literal sense.
|
||||
start_lineno != node.end_lineno
|
||||
and getattr(node, "doc_node", None)
|
||||
and is_stmt(node)
|
||||
)
|
||||
):
|
||||
start_col_offset = 0
|
||||
else:
|
||||
start_col_offset = self._line_numbers.from_utf8_col(start_lineno, start_node.col_offset)
|
||||
|
||||
start = (start_lineno, start_col_offset)
|
||||
|
||||
# To match the token-using behaviour, we exclude trailing semicolons and comments.
|
||||
# This means that for blocks containing multiple statements, we have to use the last one
|
||||
# instead of the actual node for end_lineno and end_col_offset.
|
||||
end_lineno = cast(int, end_node.end_lineno)
|
||||
end_col_offset = cast(int, end_node.end_col_offset)
|
||||
end_col_offset = self._line_numbers.from_utf8_col(end_lineno, end_col_offset)
|
||||
end = (end_lineno, end_col_offset)
|
||||
|
||||
return start, end
|
||||
|
||||
def get_text_positions(self, node, padded):
|
||||
# type: (AstNode, bool) -> Tuple[Tuple[int, int], Tuple[int, int]]
|
||||
"""
|
||||
Returns two ``(lineno, col_offset)`` tuples for the start and end of the given node.
|
||||
If the positions can't be determined, or the nodes don't correspond to any particular text,
|
||||
returns ``(1, 0)`` for both.
|
||||
|
||||
``padded`` corresponds to the ``padded`` argument to ``ast.get_source_segment()``.
|
||||
This means that if ``padded`` is True, the start position will be adjusted to include
|
||||
leading whitespace if ``node`` is a multiline statement.
|
||||
"""
|
||||
if getattr(node, "_broken_positions", None):
|
||||
# This node was marked in util.annotate_fstring_nodes as having untrustworthy lineno/col_offset.
|
||||
return (1, 0), (1, 0)
|
||||
|
||||
if supports_tokenless(node):
|
||||
return self._get_text_positions_tokenless(node, padded)
|
||||
|
||||
return self.asttokens.get_text_positions(node, padded)
|
||||
|
||||
|
||||
# Node types that _get_text_positions_tokenless doesn't support.
|
||||
# These initial values are missing lineno.
|
||||
_unsupported_tokenless_types = ("arguments", "Arguments", "withitem") # type: Tuple[str, ...]
|
||||
if sys.version_info[:2] == (3, 8):
|
||||
# _get_text_positions_tokenless works incorrectly for these types due to bugs in Python 3.8.
|
||||
_unsupported_tokenless_types += ("arg", "Starred")
|
||||
# no lineno in 3.8
|
||||
_unsupported_tokenless_types += ("Slice", "ExtSlice", "Index", "keyword")
|
||||
|
||||
|
||||
def supports_tokenless(node=None):
|
||||
# type: (Any) -> bool
|
||||
"""
|
||||
Returns True if the Python version and the node (if given) are supported by
|
||||
the ``get_text*`` methods of ``ASTText`` without falling back to ``ASTTokens``.
|
||||
See ``ASTText`` for why this matters.
|
||||
|
||||
The following cases are not supported:
|
||||
|
||||
- PyPy
|
||||
- ``ast.arguments`` / ``astroid.Arguments``
|
||||
- ``ast.withitem``
|
||||
- ``astroid.Comprehension``
|
||||
- ``astroid.AssignName`` inside ``astroid.Arguments`` or ``astroid.ExceptHandler``
|
||||
- The following nodes in Python 3.8 only:
|
||||
- ``ast.arg``
|
||||
- ``ast.Starred``
|
||||
- ``ast.Slice``
|
||||
- ``ast.ExtSlice``
|
||||
- ``ast.Index``
|
||||
- ``ast.keyword``
|
||||
"""
|
||||
return (
|
||||
type(node).__name__ not in _unsupported_tokenless_types
|
||||
and not (
|
||||
# astroid nodes
|
||||
not isinstance(node, ast.AST) and node is not None and (
|
||||
(
|
||||
type(node).__name__ == "AssignName"
|
||||
and type(node.parent).__name__ in ("Arguments", "ExceptHandler")
|
||||
)
|
||||
)
|
||||
)
|
||||
and 'pypy' not in sys.version.lower()
|
||||
)
|
76
.venv/lib/python3.12/site-packages/asttokens/line_numbers.py
Normal file
76
.venv/lib/python3.12/site-packages/asttokens/line_numbers.py
Normal file
@@ -0,0 +1,76 @@
|
||||
# Copyright 2016 Grist Labs, Inc.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
import bisect
|
||||
import re
|
||||
from typing import Dict, List, Tuple
|
||||
|
||||
_line_start_re = re.compile(r'^', re.M)
|
||||
|
||||
class LineNumbers:
|
||||
"""
|
||||
Class to convert between character offsets in a text string, and pairs (line, column) of 1-based
|
||||
line and 0-based column numbers, as used by tokens and AST nodes.
|
||||
|
||||
This class expects unicode for input and stores positions in unicode. But it supports
|
||||
translating to and from utf8 offsets, which are used by ast parsing.
|
||||
"""
|
||||
def __init__(self, text):
|
||||
# type: (str) -> None
|
||||
# A list of character offsets of each line's first character.
|
||||
self._line_offsets = [m.start(0) for m in _line_start_re.finditer(text)]
|
||||
self._text = text
|
||||
self._text_len = len(text)
|
||||
self._utf8_offset_cache = {} # type: Dict[int, List[int]] # maps line num to list of char offset for each byte in line
|
||||
|
||||
def from_utf8_col(self, line, utf8_column):
|
||||
# type: (int, int) -> int
|
||||
"""
|
||||
Given a 1-based line number and 0-based utf8 column, returns a 0-based unicode column.
|
||||
"""
|
||||
offsets = self._utf8_offset_cache.get(line)
|
||||
if offsets is None:
|
||||
end_offset = self._line_offsets[line] if line < len(self._line_offsets) else self._text_len
|
||||
line_text = self._text[self._line_offsets[line - 1] : end_offset]
|
||||
|
||||
offsets = [i for i,c in enumerate(line_text) for byte in c.encode('utf8')]
|
||||
offsets.append(len(line_text))
|
||||
self._utf8_offset_cache[line] = offsets
|
||||
|
||||
return offsets[max(0, min(len(offsets)-1, utf8_column))]
|
||||
|
||||
def line_to_offset(self, line, column):
|
||||
# type: (int, int) -> int
|
||||
"""
|
||||
Converts 1-based line number and 0-based column to 0-based character offset into text.
|
||||
"""
|
||||
line -= 1
|
||||
if line >= len(self._line_offsets):
|
||||
return self._text_len
|
||||
elif line < 0:
|
||||
return 0
|
||||
else:
|
||||
return min(self._line_offsets[line] + max(0, column), self._text_len)
|
||||
|
||||
def offset_to_line(self, offset):
|
||||
# type: (int) -> Tuple[int, int]
|
||||
"""
|
||||
Converts 0-based character offset to pair (line, col) of 1-based line and 0-based column
|
||||
numbers.
|
||||
"""
|
||||
offset = max(0, min(self._text_len, offset))
|
||||
line_index = bisect.bisect_right(self._line_offsets, offset) - 1
|
||||
return (line_index + 1, offset - self._line_offsets[line_index])
|
||||
|
||||
|
467
.venv/lib/python3.12/site-packages/asttokens/mark_tokens.py
Normal file
467
.venv/lib/python3.12/site-packages/asttokens/mark_tokens.py
Normal file
@@ -0,0 +1,467 @@
|
||||
# Copyright 2016 Grist Labs, Inc.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
import ast
|
||||
import numbers
|
||||
import sys
|
||||
import token
|
||||
from ast import Module
|
||||
from typing import Callable, List, Union, cast, Optional, Tuple, TYPE_CHECKING
|
||||
|
||||
from . import util
|
||||
from .asttokens import ASTTokens
|
||||
from .astroid_compat import astroid_node_classes as nc, BaseContainer as AstroidBaseContainer
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from .util import AstNode
|
||||
|
||||
|
||||
# Mapping of matching braces. To find a token here, look up token[:2].
|
||||
_matching_pairs_left = {
|
||||
(token.OP, '('): (token.OP, ')'),
|
||||
(token.OP, '['): (token.OP, ']'),
|
||||
(token.OP, '{'): (token.OP, '}'),
|
||||
}
|
||||
|
||||
_matching_pairs_right = {
|
||||
(token.OP, ')'): (token.OP, '('),
|
||||
(token.OP, ']'): (token.OP, '['),
|
||||
(token.OP, '}'): (token.OP, '{'),
|
||||
}
|
||||
|
||||
|
||||
class MarkTokens:
|
||||
"""
|
||||
Helper that visits all nodes in the AST tree and assigns .first_token and .last_token attributes
|
||||
to each of them. This is the heart of the token-marking logic.
|
||||
"""
|
||||
def __init__(self, code):
|
||||
# type: (ASTTokens) -> None
|
||||
self._code = code
|
||||
self._methods = util.NodeMethods()
|
||||
self._iter_children = None # type: Optional[Callable]
|
||||
|
||||
def visit_tree(self, node):
|
||||
# type: (Module) -> None
|
||||
self._iter_children = util.iter_children_func(node)
|
||||
util.visit_tree(node, self._visit_before_children, self._visit_after_children)
|
||||
|
||||
def _visit_before_children(self, node, parent_token):
|
||||
# type: (AstNode, Optional[util.Token]) -> Tuple[Optional[util.Token], Optional[util.Token]]
|
||||
col = getattr(node, 'col_offset', None)
|
||||
token = self._code.get_token_from_utf8(node.lineno, col) if col is not None else None
|
||||
|
||||
if not token and util.is_module(node):
|
||||
# We'll assume that a Module node starts at the start of the source code.
|
||||
token = self._code.get_token(1, 0)
|
||||
|
||||
# Use our own token, or our parent's if we don't have one, to pass to child calls as
|
||||
# parent_token argument. The second value becomes the token argument of _visit_after_children.
|
||||
return (token or parent_token, token)
|
||||
|
||||
def _visit_after_children(self, node, parent_token, token):
|
||||
# type: (AstNode, Optional[util.Token], Optional[util.Token]) -> None
|
||||
# This processes the node generically first, after all children have been processed.
|
||||
|
||||
# Get the first and last tokens that belong to children. Note how this doesn't assume that we
|
||||
# iterate through children in order that corresponds to occurrence in source code. This
|
||||
# assumption can fail (e.g. with return annotations).
|
||||
first = token
|
||||
last = None
|
||||
for child in cast(Callable, self._iter_children)(node):
|
||||
# astroid slices have especially wrong positions, we don't want them to corrupt their parents.
|
||||
if util.is_empty_astroid_slice(child):
|
||||
continue
|
||||
if not first or child.first_token.index < first.index:
|
||||
first = child.first_token
|
||||
if not last or child.last_token.index > last.index:
|
||||
last = child.last_token
|
||||
|
||||
# If we don't have a first token from _visit_before_children, and there were no children, then
|
||||
# use the parent's token as the first token.
|
||||
first = first or parent_token
|
||||
|
||||
# If no children, set last token to the first one.
|
||||
last = last or first
|
||||
|
||||
# Statements continue to before NEWLINE. This helps cover a few different cases at once.
|
||||
if util.is_stmt(node):
|
||||
last = self._find_last_in_stmt(cast(util.Token, last))
|
||||
|
||||
# Capture any unmatched brackets.
|
||||
first, last = self._expand_to_matching_pairs(cast(util.Token, first), cast(util.Token, last), node)
|
||||
|
||||
# Give a chance to node-specific methods to adjust.
|
||||
nfirst, nlast = self._methods.get(self, node.__class__)(node, first, last)
|
||||
|
||||
if (nfirst, nlast) != (first, last):
|
||||
# If anything changed, expand again to capture any unmatched brackets.
|
||||
nfirst, nlast = self._expand_to_matching_pairs(nfirst, nlast, node)
|
||||
|
||||
node.first_token = nfirst
|
||||
node.last_token = nlast
|
||||
|
||||
def _find_last_in_stmt(self, start_token):
|
||||
# type: (util.Token) -> util.Token
|
||||
t = start_token
|
||||
while (not util.match_token(t, token.NEWLINE) and
|
||||
not util.match_token(t, token.OP, ';') and
|
||||
not token.ISEOF(t.type)):
|
||||
t = self._code.next_token(t, include_extra=True)
|
||||
return self._code.prev_token(t)
|
||||
|
||||
def _expand_to_matching_pairs(self, first_token, last_token, node):
|
||||
# type: (util.Token, util.Token, AstNode) -> Tuple[util.Token, util.Token]
|
||||
"""
|
||||
Scan tokens in [first_token, last_token] range that are between node's children, and for any
|
||||
unmatched brackets, adjust first/last tokens to include the closing pair.
|
||||
"""
|
||||
# We look for opening parens/braces among non-child tokens (i.e. tokens between our actual
|
||||
# child nodes). If we find any closing ones, we match them to the opens.
|
||||
to_match_right = [] # type: List[Tuple[int, str]]
|
||||
to_match_left = []
|
||||
for tok in self._code.token_range(first_token, last_token):
|
||||
tok_info = tok[:2]
|
||||
if to_match_right and tok_info == to_match_right[-1]:
|
||||
to_match_right.pop()
|
||||
elif tok_info in _matching_pairs_left:
|
||||
to_match_right.append(_matching_pairs_left[tok_info])
|
||||
elif tok_info in _matching_pairs_right:
|
||||
to_match_left.append(_matching_pairs_right[tok_info])
|
||||
|
||||
# Once done, extend `last_token` to match any unclosed parens/braces.
|
||||
for match in reversed(to_match_right):
|
||||
last = self._code.next_token(last_token)
|
||||
# Allow for trailing commas or colons (allowed in subscripts) before the closing delimiter
|
||||
while any(util.match_token(last, token.OP, x) for x in (',', ':')):
|
||||
last = self._code.next_token(last)
|
||||
# Now check for the actual closing delimiter.
|
||||
if util.match_token(last, *match):
|
||||
last_token = last
|
||||
|
||||
# And extend `first_token` to match any unclosed opening parens/braces.
|
||||
for match in to_match_left:
|
||||
first = self._code.prev_token(first_token)
|
||||
if util.match_token(first, *match):
|
||||
first_token = first
|
||||
|
||||
return (first_token, last_token)
|
||||
|
||||
#----------------------------------------------------------------------
|
||||
# Node visitors. Each takes a preliminary first and last tokens, and returns the adjusted pair
|
||||
# that will actually be assigned.
|
||||
|
||||
def visit_default(self, node, first_token, last_token):
|
||||
# type: (AstNode, util.Token, util.Token) -> Tuple[util.Token, util.Token]
|
||||
# pylint: disable=no-self-use
|
||||
# By default, we don't need to adjust the token we computed earlier.
|
||||
return (first_token, last_token)
|
||||
|
||||
def handle_comp(self, open_brace, node, first_token, last_token):
|
||||
# type: (str, AstNode, util.Token, util.Token) -> Tuple[util.Token, util.Token]
|
||||
# For list/set/dict comprehensions, we only get the token of the first child, so adjust it to
|
||||
# include the opening brace (the closing brace will be matched automatically).
|
||||
before = self._code.prev_token(first_token)
|
||||
util.expect_token(before, token.OP, open_brace)
|
||||
return (before, last_token)
|
||||
|
||||
def visit_comprehension(self,
|
||||
node, # type: AstNode
|
||||
first_token, # type: util.Token
|
||||
last_token, # type: util.Token
|
||||
):
|
||||
# type: (...) -> Tuple[util.Token, util.Token]
|
||||
# The 'comprehension' node starts with 'for' but we only get first child; we search backwards
|
||||
# to find the 'for' keyword.
|
||||
first = self._code.find_token(first_token, token.NAME, 'for', reverse=True)
|
||||
return (first, last_token)
|
||||
|
||||
def visit_if(self, node, first_token, last_token):
|
||||
# type: (util.Token, util.Token, util.Token) -> Tuple[util.Token, util.Token]
|
||||
while first_token.string not in ('if', 'elif'):
|
||||
first_token = self._code.prev_token(first_token)
|
||||
return first_token, last_token
|
||||
|
||||
def handle_attr(self, node, first_token, last_token):
|
||||
# type: (AstNode, util.Token, util.Token) -> Tuple[util.Token, util.Token]
|
||||
# Attribute node has ".attr" (2 tokens) after the last child.
|
||||
dot = self._code.find_token(last_token, token.OP, '.')
|
||||
name = self._code.next_token(dot)
|
||||
util.expect_token(name, token.NAME)
|
||||
return (first_token, name)
|
||||
|
||||
visit_attribute = handle_attr
|
||||
visit_assignattr = handle_attr
|
||||
visit_delattr = handle_attr
|
||||
|
||||
def handle_def(self, node, first_token, last_token):
|
||||
# type: (AstNode, util.Token, util.Token) -> Tuple[util.Token, util.Token]
|
||||
# With astroid, nodes that start with a doc-string can have an empty body, in which case we
|
||||
# need to adjust the last token to include the doc string.
|
||||
if not node.body and (getattr(node, 'doc_node', None) or getattr(node, 'doc', None)): # type: ignore[union-attr]
|
||||
last_token = self._code.find_token(last_token, token.STRING)
|
||||
|
||||
# Include @ from decorator
|
||||
if first_token.index > 0:
|
||||
prev = self._code.prev_token(first_token)
|
||||
if util.match_token(prev, token.OP, '@'):
|
||||
first_token = prev
|
||||
return (first_token, last_token)
|
||||
|
||||
visit_classdef = handle_def
|
||||
visit_functiondef = handle_def
|
||||
|
||||
def handle_following_brackets(self, node, last_token, opening_bracket):
|
||||
# type: (AstNode, util.Token, str) -> util.Token
|
||||
# This is for calls and subscripts, which have a pair of brackets
|
||||
# at the end which may contain no nodes, e.g. foo() or bar[:].
|
||||
# We look for the opening bracket and then let the matching pair be found automatically
|
||||
# Remember that last_token is at the end of all children,
|
||||
# so we are not worried about encountering a bracket that belongs to a child.
|
||||
first_child = next(cast(Callable, self._iter_children)(node))
|
||||
call_start = self._code.find_token(first_child.last_token, token.OP, opening_bracket)
|
||||
if call_start.index > last_token.index:
|
||||
last_token = call_start
|
||||
return last_token
|
||||
|
||||
def visit_call(self, node, first_token, last_token):
|
||||
# type: (util.Token, util.Token, util.Token) -> Tuple[util.Token, util.Token]
|
||||
last_token = self.handle_following_brackets(node, last_token, '(')
|
||||
|
||||
# Handling a python bug with decorators with empty parens, e.g.
|
||||
# @deco()
|
||||
# def ...
|
||||
if util.match_token(first_token, token.OP, '@'):
|
||||
first_token = self._code.next_token(first_token)
|
||||
return (first_token, last_token)
|
||||
|
||||
def visit_matchclass(self, node, first_token, last_token):
|
||||
# type: (util.Token, util.Token, util.Token) -> Tuple[util.Token, util.Token]
|
||||
last_token = self.handle_following_brackets(node, last_token, '(')
|
||||
return (first_token, last_token)
|
||||
|
||||
def visit_subscript(self,
|
||||
node, # type: AstNode
|
||||
first_token, # type: util.Token
|
||||
last_token, # type: util.Token
|
||||
):
|
||||
# type: (...) -> Tuple[util.Token, util.Token]
|
||||
last_token = self.handle_following_brackets(node, last_token, '[')
|
||||
return (first_token, last_token)
|
||||
|
||||
def visit_slice(self, node, first_token, last_token):
|
||||
# type: (AstNode, util.Token, util.Token) -> Tuple[util.Token, util.Token]
|
||||
# consume `:` tokens to the left and right. In Python 3.9, Slice nodes are
|
||||
# given a col_offset, (and end_col_offset), so this will always start inside
|
||||
# the slice, even if it is the empty slice. However, in 3.8 and below, this
|
||||
# will only expand to the full slice if the slice contains a node with a
|
||||
# col_offset. So x[:] will only get the correct tokens in 3.9, but x[1:] and
|
||||
# x[:1] will even on earlier versions of Python.
|
||||
while True:
|
||||
prev = self._code.prev_token(first_token)
|
||||
if prev.string != ':':
|
||||
break
|
||||
first_token = prev
|
||||
while True:
|
||||
next_ = self._code.next_token(last_token)
|
||||
if next_.string != ':':
|
||||
break
|
||||
last_token = next_
|
||||
return (first_token, last_token)
|
||||
|
||||
def handle_bare_tuple(self, node, first_token, last_token):
|
||||
# type: (AstNode, util.Token, util.Token) -> Tuple[util.Token, util.Token]
|
||||
# A bare tuple doesn't include parens; if there is a trailing comma, make it part of the tuple.
|
||||
maybe_comma = self._code.next_token(last_token)
|
||||
if util.match_token(maybe_comma, token.OP, ','):
|
||||
last_token = maybe_comma
|
||||
return (first_token, last_token)
|
||||
|
||||
# In Python3.8 parsed tuples include parentheses when present.
|
||||
def handle_tuple_nonempty(self, node, first_token, last_token):
|
||||
# type: (AstNode, util.Token, util.Token) -> Tuple[util.Token, util.Token]
|
||||
assert isinstance(node, ast.Tuple) or isinstance(node, AstroidBaseContainer)
|
||||
# It's a bare tuple if the first token belongs to the first child. The first child may
|
||||
# include extraneous parentheses (which don't create new nodes), so account for those too.
|
||||
child = node.elts[0]
|
||||
if TYPE_CHECKING:
|
||||
child = cast(AstNode, child)
|
||||
child_first, child_last = self._gobble_parens(child.first_token, child.last_token, True)
|
||||
if first_token == child_first:
|
||||
return self.handle_bare_tuple(node, first_token, last_token)
|
||||
return (first_token, last_token)
|
||||
|
||||
def visit_tuple(self, node, first_token, last_token):
|
||||
# type: (AstNode, util.Token, util.Token) -> Tuple[util.Token, util.Token]
|
||||
assert isinstance(node, ast.Tuple) or isinstance(node, AstroidBaseContainer)
|
||||
if not node.elts:
|
||||
# An empty tuple is just "()", and we need no further info.
|
||||
return (first_token, last_token)
|
||||
return self.handle_tuple_nonempty(node, first_token, last_token)
|
||||
|
||||
def _gobble_parens(self, first_token, last_token, include_all=False):
|
||||
# type: (util.Token, util.Token, bool) -> Tuple[util.Token, util.Token]
|
||||
# Expands a range of tokens to include one or all pairs of surrounding parentheses, and
|
||||
# returns (first, last) tokens that include these parens.
|
||||
while first_token.index > 0:
|
||||
prev = self._code.prev_token(first_token)
|
||||
next = self._code.next_token(last_token)
|
||||
if util.match_token(prev, token.OP, '(') and util.match_token(next, token.OP, ')'):
|
||||
first_token, last_token = prev, next
|
||||
if include_all:
|
||||
continue
|
||||
break
|
||||
return (first_token, last_token)
|
||||
|
||||
def visit_str(self, node, first_token, last_token):
|
||||
# type: (AstNode, util.Token, util.Token) -> Tuple[util.Token, util.Token]
|
||||
return self.handle_str(first_token, last_token)
|
||||
|
||||
def visit_joinedstr(self,
|
||||
node, # type: AstNode
|
||||
first_token, # type: util.Token
|
||||
last_token, # type: util.Token
|
||||
):
|
||||
# type: (...) -> Tuple[util.Token, util.Token]
|
||||
if sys.version_info < (3, 12):
|
||||
# Older versions don't tokenize the contents of f-strings
|
||||
return self.handle_str(first_token, last_token)
|
||||
|
||||
last = first_token
|
||||
while True:
|
||||
if util.match_token(last, getattr(token, "FSTRING_START")):
|
||||
# Python 3.12+ has tokens for the start (e.g. `f"`) and end (`"`)
|
||||
# of the f-string. We can't just look for the next FSTRING_END
|
||||
# because f-strings can be nested, e.g. f"{f'{x}'}", so we need
|
||||
# to treat this like matching balanced parentheses.
|
||||
count = 1
|
||||
while count > 0:
|
||||
last = self._code.next_token(last)
|
||||
# mypy complains about token.FSTRING_START and token.FSTRING_END.
|
||||
if util.match_token(last, getattr(token, "FSTRING_START")):
|
||||
count += 1
|
||||
elif util.match_token(last, getattr(token, "FSTRING_END")):
|
||||
count -= 1
|
||||
last_token = last
|
||||
last = self._code.next_token(last_token)
|
||||
elif util.match_token(last, token.STRING):
|
||||
# Similar to handle_str, we also need to handle adjacent strings.
|
||||
last_token = last
|
||||
last = self._code.next_token(last_token)
|
||||
else:
|
||||
break
|
||||
return (first_token, last_token)
|
||||
|
||||
def visit_bytes(self, node, first_token, last_token):
|
||||
# type: (AstNode, util.Token, util.Token) -> Tuple[util.Token, util.Token]
|
||||
return self.handle_str(first_token, last_token)
|
||||
|
||||
def handle_str(self, first_token, last_token):
|
||||
# type: (util.Token, util.Token) -> Tuple[util.Token, util.Token]
|
||||
# Multiple adjacent STRING tokens form a single string.
|
||||
last = self._code.next_token(last_token)
|
||||
while util.match_token(last, token.STRING):
|
||||
last_token = last
|
||||
last = self._code.next_token(last_token)
|
||||
return (first_token, last_token)
|
||||
|
||||
def handle_num(self,
|
||||
node, # type: AstNode
|
||||
value, # type: Union[complex, int, numbers.Number]
|
||||
first_token, # type: util.Token
|
||||
last_token, # type: util.Token
|
||||
):
|
||||
# type: (...) -> Tuple[util.Token, util.Token]
|
||||
# A constant like '-1' gets turned into two tokens; this will skip the '-'.
|
||||
while util.match_token(last_token, token.OP):
|
||||
last_token = self._code.next_token(last_token)
|
||||
|
||||
if isinstance(value, complex):
|
||||
# A complex number like -2j cannot be compared directly to 0
|
||||
# A complex number like 1-2j is expressed as a binary operation
|
||||
# so we don't need to worry about it
|
||||
value = value.imag
|
||||
|
||||
# This makes sure that the - is included
|
||||
if value < 0 and first_token.type == token.NUMBER: # type: ignore[operator]
|
||||
first_token = self._code.prev_token(first_token)
|
||||
return (first_token, last_token)
|
||||
|
||||
def visit_num(self, node, first_token, last_token):
|
||||
# type: (AstNode, util.Token, util.Token) -> Tuple[util.Token, util.Token]
|
||||
return self.handle_num(node, cast(ast.Num, node).n, first_token, last_token)
|
||||
|
||||
def visit_const(self, node, first_token, last_token):
|
||||
# type: (AstNode, util.Token, util.Token) -> Tuple[util.Token, util.Token]
|
||||
assert isinstance(node, ast.Constant) or isinstance(node, nc.Const)
|
||||
if isinstance(node.value, numbers.Number):
|
||||
return self.handle_num(node, node.value, first_token, last_token)
|
||||
elif isinstance(node.value, (str, bytes)):
|
||||
return self.visit_str(node, first_token, last_token)
|
||||
return (first_token, last_token)
|
||||
|
||||
visit_constant = visit_const
|
||||
|
||||
def visit_keyword(self, node, first_token, last_token):
|
||||
# type: (AstNode, util.Token, util.Token) -> Tuple[util.Token, util.Token]
|
||||
# Until python 3.9 (https://bugs.python.org/issue40141),
|
||||
# ast.keyword nodes didn't have line info. Astroid has lineno None.
|
||||
assert isinstance(node, ast.keyword) or isinstance(node, nc.Keyword)
|
||||
if node.arg is not None and getattr(node, 'lineno', None) is None:
|
||||
equals = self._code.find_token(first_token, token.OP, '=', reverse=True)
|
||||
name = self._code.prev_token(equals)
|
||||
util.expect_token(name, token.NAME, node.arg)
|
||||
first_token = name
|
||||
return (first_token, last_token)
|
||||
|
||||
def visit_starred(self, node, first_token, last_token):
|
||||
# type: (AstNode, util.Token, util.Token) -> Tuple[util.Token, util.Token]
|
||||
# Astroid has 'Starred' nodes (for "foo(*bar)" type args), but they need to be adjusted.
|
||||
if not util.match_token(first_token, token.OP, '*'):
|
||||
star = self._code.prev_token(first_token)
|
||||
if util.match_token(star, token.OP, '*'):
|
||||
first_token = star
|
||||
return (first_token, last_token)
|
||||
|
||||
def visit_assignname(self, node, first_token, last_token):
|
||||
# type: (AstNode, util.Token, util.Token) -> Tuple[util.Token, util.Token]
|
||||
# Astroid may turn 'except' clause into AssignName, but we need to adjust it.
|
||||
if util.match_token(first_token, token.NAME, 'except'):
|
||||
colon = self._code.find_token(last_token, token.OP, ':')
|
||||
first_token = last_token = self._code.prev_token(colon)
|
||||
return (first_token, last_token)
|
||||
|
||||
# Async nodes should typically start with the word 'async'
|
||||
# but Python < 3.7 doesn't put the col_offset there
|
||||
# AsyncFunctionDef is slightly different because it might have
|
||||
# decorators before that, which visit_functiondef handles
|
||||
def handle_async(self, node, first_token, last_token):
|
||||
# type: (AstNode, util.Token, util.Token) -> Tuple[util.Token, util.Token]
|
||||
if not first_token.string == 'async':
|
||||
first_token = self._code.prev_token(first_token)
|
||||
return (first_token, last_token)
|
||||
|
||||
visit_asyncfor = handle_async
|
||||
visit_asyncwith = handle_async
|
||||
|
||||
def visit_asyncfunctiondef(self,
|
||||
node, # type: AstNode
|
||||
first_token, # type: util.Token
|
||||
last_token, # type: util.Token
|
||||
):
|
||||
# type: (...) -> Tuple[util.Token, util.Token]
|
||||
if util.match_token(first_token, token.NAME, 'def'):
|
||||
# Include the 'async' token
|
||||
first_token = self._code.prev_token(first_token)
|
||||
return self.visit_functiondef(node, first_token, last_token)
|
485
.venv/lib/python3.12/site-packages/asttokens/util.py
Normal file
485
.venv/lib/python3.12/site-packages/asttokens/util.py
Normal file
@@ -0,0 +1,485 @@
|
||||
# Copyright 2016 Grist Labs, Inc.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
import ast
|
||||
import collections
|
||||
import io
|
||||
import sys
|
||||
import token
|
||||
import tokenize
|
||||
from abc import ABCMeta
|
||||
from ast import Module, expr, AST
|
||||
from functools import lru_cache
|
||||
from typing import (
|
||||
Callable,
|
||||
Dict,
|
||||
Iterable,
|
||||
Iterator,
|
||||
List,
|
||||
Optional,
|
||||
Tuple,
|
||||
Union,
|
||||
cast,
|
||||
Any,
|
||||
TYPE_CHECKING,
|
||||
Type,
|
||||
)
|
||||
|
||||
if TYPE_CHECKING: # pragma: no cover
|
||||
from .astroid_compat import NodeNG
|
||||
|
||||
# Type class used to expand out the definition of AST to include fields added by this library
|
||||
# It's not actually used for anything other than type checking though!
|
||||
class EnhancedAST(AST):
|
||||
# Additional attributes set by mark_tokens
|
||||
first_token = None # type: Token
|
||||
last_token = None # type: Token
|
||||
lineno = 0 # type: int
|
||||
|
||||
AstNode = Union[EnhancedAST, NodeNG]
|
||||
|
||||
TokenInfo = tokenize.TokenInfo
|
||||
|
||||
|
||||
def token_repr(tok_type, string):
|
||||
# type: (int, Optional[str]) -> str
|
||||
"""Returns a human-friendly representation of a token with the given type and string."""
|
||||
# repr() prefixes unicode with 'u' on Python2 but not Python3; strip it out for consistency.
|
||||
return '%s:%s' % (token.tok_name[tok_type], repr(string).lstrip('u'))
|
||||
|
||||
|
||||
class Token(collections.namedtuple('Token', 'type string start end line index startpos endpos')):
|
||||
"""
|
||||
TokenInfo is an 8-tuple containing the same 5 fields as the tokens produced by the tokenize
|
||||
module, and 3 additional ones useful for this module:
|
||||
|
||||
- [0] .type Token type (see token.py)
|
||||
- [1] .string Token (a string)
|
||||
- [2] .start Starting (row, column) indices of the token (a 2-tuple of ints)
|
||||
- [3] .end Ending (row, column) indices of the token (a 2-tuple of ints)
|
||||
- [4] .line Original line (string)
|
||||
- [5] .index Index of the token in the list of tokens that it belongs to.
|
||||
- [6] .startpos Starting character offset into the input text.
|
||||
- [7] .endpos Ending character offset into the input text.
|
||||
"""
|
||||
def __str__(self):
|
||||
# type: () -> str
|
||||
return token_repr(self.type, self.string)
|
||||
|
||||
|
||||
def match_token(token, tok_type, tok_str=None):
|
||||
# type: (Token, int, Optional[str]) -> bool
|
||||
"""Returns true if token is of the given type and, if a string is given, has that string."""
|
||||
return token.type == tok_type and (tok_str is None or token.string == tok_str)
|
||||
|
||||
|
||||
def expect_token(token, tok_type, tok_str=None):
|
||||
# type: (Token, int, Optional[str]) -> None
|
||||
"""
|
||||
Verifies that the given token is of the expected type. If tok_str is given, the token string
|
||||
is verified too. If the token doesn't match, raises an informative ValueError.
|
||||
"""
|
||||
if not match_token(token, tok_type, tok_str):
|
||||
raise ValueError("Expected token %s, got %s on line %s col %s" % (
|
||||
token_repr(tok_type, tok_str), str(token),
|
||||
token.start[0], token.start[1] + 1))
|
||||
|
||||
|
||||
def is_non_coding_token(token_type):
|
||||
# type: (int) -> bool
|
||||
"""
|
||||
These are considered non-coding tokens, as they don't affect the syntax tree.
|
||||
"""
|
||||
return token_type in (token.NL, token.COMMENT, token.ENCODING)
|
||||
|
||||
|
||||
def generate_tokens(text):
|
||||
# type: (str) -> Iterator[TokenInfo]
|
||||
"""
|
||||
Generates standard library tokens for the given code.
|
||||
"""
|
||||
# tokenize.generate_tokens is technically an undocumented API for Python3, but allows us to use the same API as for
|
||||
# Python2. See http://stackoverflow.com/a/4952291/328565.
|
||||
# FIXME: Remove cast once https://github.com/python/typeshed/issues/7003 gets fixed
|
||||
return tokenize.generate_tokens(cast(Callable[[], str], io.StringIO(text).readline))
|
||||
|
||||
|
||||
def iter_children_func(node):
|
||||
# type: (AST) -> Callable
|
||||
"""
|
||||
Returns a function which yields all direct children of a AST node,
|
||||
skipping children that are singleton nodes.
|
||||
The function depends on whether ``node`` is from ``ast`` or from the ``astroid`` module.
|
||||
"""
|
||||
return iter_children_astroid if hasattr(node, 'get_children') else iter_children_ast
|
||||
|
||||
|
||||
def iter_children_astroid(node, include_joined_str=False):
|
||||
# type: (NodeNG, bool) -> Union[Iterator, List]
|
||||
if not include_joined_str and is_joined_str(node):
|
||||
return []
|
||||
|
||||
return node.get_children()
|
||||
|
||||
|
||||
SINGLETONS = {c for n, c in ast.__dict__.items() if isinstance(c, type) and
|
||||
issubclass(c, (ast.expr_context, ast.boolop, ast.operator, ast.unaryop, ast.cmpop))}
|
||||
|
||||
|
||||
def iter_children_ast(node, include_joined_str=False):
|
||||
# type: (AST, bool) -> Iterator[Union[AST, expr]]
|
||||
if not include_joined_str and is_joined_str(node):
|
||||
return
|
||||
|
||||
if isinstance(node, ast.Dict):
|
||||
# override the iteration order: instead of <all keys>, <all values>,
|
||||
# yield keys and values in source order (key1, value1, key2, value2, ...)
|
||||
for (key, value) in zip(node.keys, node.values):
|
||||
if key is not None:
|
||||
yield key
|
||||
yield value
|
||||
return
|
||||
|
||||
for child in ast.iter_child_nodes(node):
|
||||
# Skip singleton children; they don't reflect particular positions in the code and break the
|
||||
# assumptions about the tree consisting of distinct nodes. Note that collecting classes
|
||||
# beforehand and checking them in a set is faster than using isinstance each time.
|
||||
if child.__class__ not in SINGLETONS:
|
||||
yield child
|
||||
|
||||
|
||||
stmt_class_names = {n for n, c in ast.__dict__.items()
|
||||
if isinstance(c, type) and issubclass(c, ast.stmt)}
|
||||
expr_class_names = ({n for n, c in ast.__dict__.items()
|
||||
if isinstance(c, type) and issubclass(c, ast.expr)} |
|
||||
{'AssignName', 'DelName', 'Const', 'AssignAttr', 'DelAttr'})
|
||||
|
||||
# These feel hacky compared to isinstance() but allow us to work with both ast and astroid nodes
|
||||
# in the same way, and without even importing astroid.
|
||||
def is_expr(node):
|
||||
# type: (AstNode) -> bool
|
||||
"""Returns whether node is an expression node."""
|
||||
return node.__class__.__name__ in expr_class_names
|
||||
|
||||
def is_stmt(node):
|
||||
# type: (AstNode) -> bool
|
||||
"""Returns whether node is a statement node."""
|
||||
return node.__class__.__name__ in stmt_class_names
|
||||
|
||||
def is_module(node):
|
||||
# type: (AstNode) -> bool
|
||||
"""Returns whether node is a module node."""
|
||||
return node.__class__.__name__ == 'Module'
|
||||
|
||||
def is_joined_str(node):
|
||||
# type: (AstNode) -> bool
|
||||
"""Returns whether node is a JoinedStr node, used to represent f-strings."""
|
||||
# At the moment, nodes below JoinedStr have wrong line/col info, and trying to process them only
|
||||
# leads to errors.
|
||||
return node.__class__.__name__ == 'JoinedStr'
|
||||
|
||||
|
||||
def is_expr_stmt(node):
|
||||
# type: (AstNode) -> bool
|
||||
"""Returns whether node is an `Expr` node, which is a statement that is an expression."""
|
||||
return node.__class__.__name__ == 'Expr'
|
||||
|
||||
|
||||
|
||||
CONSTANT_CLASSES: Tuple[Type, ...] = (ast.Constant,)
|
||||
try:
|
||||
from astroid import Const
|
||||
CONSTANT_CLASSES += (Const,)
|
||||
except ImportError: # pragma: no cover
|
||||
# astroid is not available
|
||||
pass
|
||||
|
||||
def is_constant(node):
|
||||
# type: (AstNode) -> bool
|
||||
"""Returns whether node is a Constant node."""
|
||||
return isinstance(node, CONSTANT_CLASSES)
|
||||
|
||||
|
||||
def is_ellipsis(node):
|
||||
# type: (AstNode) -> bool
|
||||
"""Returns whether node is an Ellipsis node."""
|
||||
return is_constant(node) and node.value is Ellipsis # type: ignore
|
||||
|
||||
|
||||
def is_starred(node):
|
||||
# type: (AstNode) -> bool
|
||||
"""Returns whether node is a starred expression node."""
|
||||
return node.__class__.__name__ == 'Starred'
|
||||
|
||||
|
||||
def is_slice(node):
|
||||
# type: (AstNode) -> bool
|
||||
"""Returns whether node represents a slice, e.g. `1:2` in `x[1:2]`"""
|
||||
# Before 3.9, a tuple containing a slice is an ExtSlice,
|
||||
# but this was removed in https://bugs.python.org/issue34822
|
||||
return (
|
||||
node.__class__.__name__ in ('Slice', 'ExtSlice')
|
||||
or (
|
||||
node.__class__.__name__ == 'Tuple'
|
||||
and any(map(is_slice, cast(ast.Tuple, node).elts))
|
||||
)
|
||||
)
|
||||
|
||||
|
||||
def is_empty_astroid_slice(node):
|
||||
# type: (AstNode) -> bool
|
||||
return (
|
||||
node.__class__.__name__ == "Slice"
|
||||
and not isinstance(node, ast.AST)
|
||||
and node.lower is node.upper is node.step is None
|
||||
)
|
||||
|
||||
|
||||
# Sentinel value used by visit_tree().
|
||||
_PREVISIT = object()
|
||||
|
||||
def visit_tree(node, previsit, postvisit):
|
||||
# type: (Module, Callable[[AstNode, Optional[Token]], Tuple[Optional[Token], Optional[Token]]], Optional[Callable[[AstNode, Optional[Token], Optional[Token]], None]]) -> None
|
||||
"""
|
||||
Scans the tree under the node depth-first using an explicit stack. It avoids implicit recursion
|
||||
via the function call stack to avoid hitting 'maximum recursion depth exceeded' error.
|
||||
|
||||
It calls ``previsit()`` and ``postvisit()`` as follows:
|
||||
|
||||
* ``previsit(node, par_value)`` - should return ``(par_value, value)``
|
||||
``par_value`` is as returned from ``previsit()`` of the parent.
|
||||
|
||||
* ``postvisit(node, par_value, value)`` - should return ``value``
|
||||
``par_value`` is as returned from ``previsit()`` of the parent, and ``value`` is as
|
||||
returned from ``previsit()`` of this node itself. The return ``value`` is ignored except
|
||||
the one for the root node, which is returned from the overall ``visit_tree()`` call.
|
||||
|
||||
For the initial node, ``par_value`` is None. ``postvisit`` may be None.
|
||||
"""
|
||||
if not postvisit:
|
||||
postvisit = lambda node, pvalue, value: None
|
||||
|
||||
iter_children = iter_children_func(node)
|
||||
done = set()
|
||||
ret = None
|
||||
stack = [(node, None, _PREVISIT)] # type: List[Tuple[AstNode, Optional[Token], Union[Optional[Token], object]]]
|
||||
while stack:
|
||||
current, par_value, value = stack.pop()
|
||||
if value is _PREVISIT:
|
||||
assert current not in done # protect againt infinite loop in case of a bad tree.
|
||||
done.add(current)
|
||||
|
||||
pvalue, post_value = previsit(current, par_value)
|
||||
stack.append((current, par_value, post_value))
|
||||
|
||||
# Insert all children in reverse order (so that first child ends up on top of the stack).
|
||||
ins = len(stack)
|
||||
for n in iter_children(current):
|
||||
stack.insert(ins, (n, pvalue, _PREVISIT))
|
||||
else:
|
||||
ret = postvisit(current, par_value, cast(Optional[Token], value))
|
||||
return ret
|
||||
|
||||
|
||||
def walk(node, include_joined_str=False):
|
||||
# type: (AST, bool) -> Iterator[Union[Module, AstNode]]
|
||||
"""
|
||||
Recursively yield all descendant nodes in the tree starting at ``node`` (including ``node``
|
||||
itself), using depth-first pre-order traversal (yieling parents before their children).
|
||||
|
||||
This is similar to ``ast.walk()``, but with a different order, and it works for both ``ast`` and
|
||||
``astroid`` trees. Also, as ``iter_children()``, it skips singleton nodes generated by ``ast``.
|
||||
|
||||
By default, ``JoinedStr`` (f-string) nodes and their contents are skipped
|
||||
because they previously couldn't be handled. Set ``include_joined_str`` to True to include them.
|
||||
"""
|
||||
iter_children = iter_children_func(node)
|
||||
done = set()
|
||||
stack = [node]
|
||||
while stack:
|
||||
current = stack.pop()
|
||||
assert current not in done # protect againt infinite loop in case of a bad tree.
|
||||
done.add(current)
|
||||
|
||||
yield current
|
||||
|
||||
# Insert all children in reverse order (so that first child ends up on top of the stack).
|
||||
# This is faster than building a list and reversing it.
|
||||
ins = len(stack)
|
||||
for c in iter_children(current, include_joined_str):
|
||||
stack.insert(ins, c)
|
||||
|
||||
|
||||
def replace(text, replacements):
|
||||
# type: (str, List[Tuple[int, int, str]]) -> str
|
||||
"""
|
||||
Replaces multiple slices of text with new values. This is a convenience method for making code
|
||||
modifications of ranges e.g. as identified by ``ASTTokens.get_text_range(node)``. Replacements is
|
||||
an iterable of ``(start, end, new_text)`` tuples.
|
||||
|
||||
For example, ``replace("this is a test", [(0, 4, "X"), (8, 9, "THE")])`` produces
|
||||
``"X is THE test"``.
|
||||
"""
|
||||
p = 0
|
||||
parts = []
|
||||
for (start, end, new_text) in sorted(replacements):
|
||||
parts.append(text[p:start])
|
||||
parts.append(new_text)
|
||||
p = end
|
||||
parts.append(text[p:])
|
||||
return ''.join(parts)
|
||||
|
||||
|
||||
class NodeMethods:
|
||||
"""
|
||||
Helper to get `visit_{node_type}` methods given a node's class and cache the results.
|
||||
"""
|
||||
def __init__(self):
|
||||
# type: () -> None
|
||||
self._cache = {} # type: Dict[Union[ABCMeta, type], Callable[[AstNode, Token, Token], Tuple[Token, Token]]]
|
||||
|
||||
def get(self, obj, cls):
|
||||
# type: (Any, Union[ABCMeta, type]) -> Callable
|
||||
"""
|
||||
Using the lowercase name of the class as node_type, returns `obj.visit_{node_type}`,
|
||||
or `obj.visit_default` if the type-specific method is not found.
|
||||
"""
|
||||
method = self._cache.get(cls)
|
||||
if not method:
|
||||
name = "visit_" + cls.__name__.lower()
|
||||
method = getattr(obj, name, obj.visit_default)
|
||||
self._cache[cls] = method
|
||||
return method
|
||||
|
||||
|
||||
def patched_generate_tokens(original_tokens):
|
||||
# type: (Iterable[TokenInfo]) -> Iterator[TokenInfo]
|
||||
"""
|
||||
Fixes tokens yielded by `tokenize.generate_tokens` to handle more non-ASCII characters in identifiers.
|
||||
Workaround for https://github.com/python/cpython/issues/68382.
|
||||
Should only be used when tokenizing a string that is known to be valid syntax,
|
||||
because it assumes that error tokens are not actually errors.
|
||||
Combines groups of consecutive NAME, NUMBER, and/or ERRORTOKEN tokens into a single NAME token.
|
||||
"""
|
||||
group = [] # type: List[tokenize.TokenInfo]
|
||||
for tok in original_tokens:
|
||||
if (
|
||||
tok.type in (tokenize.NAME, tokenize.ERRORTOKEN, tokenize.NUMBER)
|
||||
# Only combine tokens if they have no whitespace in between
|
||||
and (not group or group[-1].end == tok.start)
|
||||
):
|
||||
group.append(tok)
|
||||
else:
|
||||
for combined_token in combine_tokens(group):
|
||||
yield combined_token
|
||||
group = []
|
||||
yield tok
|
||||
for combined_token in combine_tokens(group):
|
||||
yield combined_token
|
||||
|
||||
def combine_tokens(group):
|
||||
# type: (List[tokenize.TokenInfo]) -> List[tokenize.TokenInfo]
|
||||
if not any(tok.type == tokenize.ERRORTOKEN for tok in group) or len({tok.line for tok in group}) != 1:
|
||||
return group
|
||||
return [
|
||||
tokenize.TokenInfo(
|
||||
type=tokenize.NAME,
|
||||
string="".join(t.string for t in group),
|
||||
start=group[0].start,
|
||||
end=group[-1].end,
|
||||
line=group[0].line,
|
||||
)
|
||||
]
|
||||
|
||||
|
||||
def last_stmt(node):
|
||||
# type: (ast.AST) -> ast.AST
|
||||
"""
|
||||
If the given AST node contains multiple statements, return the last one.
|
||||
Otherwise, just return the node.
|
||||
"""
|
||||
child_stmts = [
|
||||
child for child in iter_children_func(node)(node)
|
||||
if is_stmt(child) or type(child).__name__ in (
|
||||
"excepthandler",
|
||||
"ExceptHandler",
|
||||
"match_case",
|
||||
"MatchCase",
|
||||
"TryExcept",
|
||||
"TryFinally",
|
||||
)
|
||||
]
|
||||
if child_stmts:
|
||||
return last_stmt(child_stmts[-1])
|
||||
return node
|
||||
|
||||
|
||||
|
||||
@lru_cache(maxsize=None)
|
||||
def fstring_positions_work():
|
||||
# type: () -> bool
|
||||
"""
|
||||
The positions attached to nodes inside f-string FormattedValues have some bugs
|
||||
that were fixed in Python 3.9.7 in https://github.com/python/cpython/pull/27729.
|
||||
This checks for those bugs more concretely without relying on the Python version.
|
||||
Specifically this checks:
|
||||
- Values with a format spec or conversion
|
||||
- Repeated (i.e. identical-looking) expressions
|
||||
- f-strings implicitly concatenated over multiple lines.
|
||||
- Multiline, triple-quoted f-strings.
|
||||
"""
|
||||
source = """(
|
||||
f"a {b}{b} c {d!r} e {f:g} h {i:{j}} k {l:{m:n}}"
|
||||
f"a {b}{b} c {d!r} e {f:g} h {i:{j}} k {l:{m:n}}"
|
||||
f"{x + y + z} {x} {y} {z} {z} {z!a} {z:z}"
|
||||
f'''
|
||||
{s} {t}
|
||||
{u} {v}
|
||||
'''
|
||||
)"""
|
||||
tree = ast.parse(source)
|
||||
name_nodes = [node for node in ast.walk(tree) if isinstance(node, ast.Name)]
|
||||
name_positions = [(node.lineno, node.col_offset) for node in name_nodes]
|
||||
positions_are_unique = len(set(name_positions)) == len(name_positions)
|
||||
correct_source_segments = all(
|
||||
ast.get_source_segment(source, node) == node.id
|
||||
for node in name_nodes
|
||||
)
|
||||
return positions_are_unique and correct_source_segments
|
||||
|
||||
def annotate_fstring_nodes(tree):
|
||||
# type: (ast.AST) -> None
|
||||
"""
|
||||
Add a special attribute `_broken_positions` to nodes inside f-strings
|
||||
if the lineno/col_offset cannot be trusted.
|
||||
"""
|
||||
if sys.version_info >= (3, 12):
|
||||
# f-strings were weirdly implemented until https://peps.python.org/pep-0701/
|
||||
# In Python 3.12, inner nodes have sensible positions.
|
||||
return
|
||||
for joinedstr in walk(tree, include_joined_str=True):
|
||||
if not isinstance(joinedstr, ast.JoinedStr):
|
||||
continue
|
||||
for part in joinedstr.values:
|
||||
# The ast positions of the FormattedValues/Constant nodes span the full f-string, which is weird.
|
||||
setattr(part, '_broken_positions', True) # use setattr for mypy
|
||||
|
||||
if isinstance(part, ast.FormattedValue):
|
||||
if not fstring_positions_work():
|
||||
for child in walk(part.value):
|
||||
setattr(child, '_broken_positions', True)
|
||||
|
||||
if part.format_spec: # this is another JoinedStr
|
||||
# Again, the standard positions span the full f-string.
|
||||
setattr(part.format_spec, '_broken_positions', True)
|
1
.venv/lib/python3.12/site-packages/asttokens/version.py
Normal file
1
.venv/lib/python3.12/site-packages/asttokens/version.py
Normal file
@@ -0,0 +1 @@
|
||||
__version__ = "3.0.0"
|
Reference in New Issue
Block a user