# Licensed under the LGPL: https://www.gnu.org/licenses/old-licenses/lgpl-2.1.en.html
# For details: https://github.com/pylint-dev/astroid/blob/main/LICENSE
# Copyright (c) https://github.com/pylint-dev/astroid/blob/main/CONTRIBUTORS.txt
"""The AstroidBuilder makes astroid from living object and / or from _ast.
The builder is not thread safe and can't be used to parse different sources
at the same time.
"""
from __future__ import annotations
import ast
import os
import re
import textwrap
import types
import warnings
from collections.abc import Collection, Iterator, Sequence
from io import TextIOWrapper
from tokenize import detect_encoding
from typing import TYPE_CHECKING, cast
from astroid import bases, modutils, nodes, raw_building, rebuilder, util
from astroid._ast import ParserModule, get_parser_module
from astroid.const import PY312_PLUS, PY314_PLUS
from astroid.exceptions import AstroidBuildingError, AstroidSyntaxError, InferenceError
if TYPE_CHECKING:
from astroid.manager import AstroidManager
# The name of the transient function that is used to
# wrap expressions to be extracted when calling
# extract_node.
_TRANSIENT_FUNCTION = "__"
# The comment used to select a statement to be extracted
# when calling extract_node.
_STATEMENT_SELECTOR = "#@"
if PY312_PLUS:
warnings.filterwarnings("ignore", ".*invalid escape sequence", SyntaxWarning)
if PY314_PLUS:
warnings.filterwarnings(
"ignore", "'(return|continue|break)' in a 'finally'", SyntaxWarning
)
def open_source_file(filename: str) -> tuple[TextIOWrapper, str, str]:
# pylint: disable=consider-using-with
with open(filename, "rb") as byte_stream:
encoding = detect_encoding(byte_stream.readline)[0]
stream = open(filename, newline=None, encoding=encoding)
data = stream.read()
return stream, encoding, data
def _can_assign_attr(node: nodes.ClassDef, attrname: str | None) -> bool:
try:
slots = node.slots()
except NotImplementedError:
pass
else:
if slots and attrname not in {slot.value for slot in slots}:
return False
return node.qname() != "builtins.object"
class AstroidBuilder(raw_building.InspectBuilder):
"""Class for building an astroid tree from source code or from a live module.
The param *manager* specifies the manager class which should be used. The
param *apply_transforms* determines if the transforms should be
applied after the tree was built from source or from a live object,
by default being True.
"""
def __init__(self, manager: AstroidManager, apply_transforms: bool = True) -> None:
super().__init__(manager)
self._apply_transforms = apply_transforms
if not raw_building.InspectBuilder.bootstrapped:
manager.bootstrap()
def module_build(
self, module: types.ModuleType, modname: str | None = None
) -> nodes.Module:
"""Build an astroid from a living module instance."""
node = None
path = getattr(module, "__file__", None)
loader = getattr(module, "__loader__", None)
# Prefer the loader to get the source rather than assuming we have a
# filesystem to read the source file from ourselves.
if loader:
modname = modname or module.__name__
source = loader.get_source(modname)
if source:
node = self.string_build(source, modname, path=path)
if node is None and path is not None:
path_, ext = os.path.splitext(modutils._path_from_filename(path))
if ext in {".py", ".pyc", ".pyo"} and os.path.exists(path_ + ".py"):
node = self.file_build(path_ + ".py", modname)
if node is None:
# this is a built-in module
# get a partial representation by introspection
node = self.inspect_build(module, modname=modname, path=path)
if self._apply_transforms:
# We have to handle transformation by ourselves since the
# rebuilder isn't called for builtin nodes
node = self._manager.visit_transforms(node)
assert isinstance(node, nodes.Module)
return node
def file_build(self, path: str, modname: str | None = None) -> nodes.Module:
"""Build astroid from a source code file (i.e. from an ast).
*path* is expected to be a python source file
"""
try:
stream, encoding, data = open_source_file(path)
except OSError as exc:
raise AstroidBuildingError(
"Unable to load file {path}:\n{error}",
modname=modname,
path=path,
error=exc,
) from exc
except (SyntaxError, LookupError) as exc:
raise AstroidSyntaxError(
"Python 3 encoding specification error or unknown encoding:\n"
"{error}",
modname=modname,
path=path,
error=exc,
) from exc
except UnicodeError as exc: # wrong encoding
# detect_encoding returns utf-8 if no encoding specified
raise AstroidBuildingError(
"Wrong or no encoding specified for {filename}.", filename=path
) from exc
with stream:
# get module name if necessary
if modname is None:
try:
modname = ".".join(modutils.modpath_from_file(path))
except ImportError:
modname = os.path.splitext(os.path.basename(path))[0]
# build astroid representation
module, builder = self._data_build(data, modname, path)
return self._post_build(module, builder, encoding)
def string_build(
self, data: str, modname: str = "", path: str | None = None
) -> nodes.Module:
"""Build astroid from source code string."""
module, builder = self._data_build(data, modname, path)
module.file_bytes = data.encode("utf-8")
return self._post_build(module, builder, "utf-8")
def _post_build(
self, module: nodes.Module, builder: rebuilder.TreeRebuilder, encoding: str
) -> nodes.Module:
"""Handles encoding and delayed nodes after a module has been built."""
module.file_encoding = encoding
self._manager.cache_module(module)
# post tree building steps after we stored the module in the cache:
for from_node, global_names in builder._import_from_nodes:
if from_node.modname == "__future__":
for symbol, _ in from_node.names:
module.future_imports.add(symbol)
self.add_from_names_to_locals(from_node, global_names)
# handle delayed assattr nodes
for delayed in builder._delayed_assattr:
self.delayed_assattr(delayed)
# Visit the transforms
if self._apply_transforms:
module = self._manager.visit_transforms(module)
return module
def _data_build(
self, data: str, modname: str, path: str | None
) -> tuple[nodes.Module, rebuilder.TreeRebuilder]:
"""Build tree node from data and add some informations."""
try:
node, parser_module = _parse_string(
data, type_comments=True, modname=modname
)
except (TypeError, ValueError, SyntaxError, MemoryError) as exc:
raise AstroidSyntaxError(
"Parsing Python code failed:\n{error}",
source=data,
modname=modname,
path=path,
error=exc,
) from exc
if path is not None:
node_file = os.path.abspath(path)
else:
node_file = "<?>"
if modname.endswith(".__init__"):
modname = modname[:-9]
package = True
else:
package = (
path is not None
and os.path.splitext(os.path.basename(path))[0] == "__init__"
)
builder = rebuilder.TreeRebuilder(self._manager, parser_module, data)
module = builder.visit_module(node, modname, node_file, package)
return module, builder
def add_from_names_to_locals(
self, node: nodes.ImportFrom, global_name: Collection[str]
) -> None:
"""Store imported names to the locals.
Resort the locals if coming from a delayed node
"""
def add_local(parent_or_root: nodes.NodeNG, name: str) -> None:
parent_or_root.set_local(name, node)
my_list = parent_or_root.scope().locals[name]
if TYPE_CHECKING:
my_list = cast(list[nodes.NodeNG], my_list)
my_list.sort(key=lambda n: n.fromlineno or 0)
assert node.parent # It should always default to the module
module = node.root()
for name, asname in node.names:
if name == "*":
try:
imported = node.do_import_module()
except AstroidBuildingError:
continue
for name in imported.public_names():
if name in global_name:
add_local(module, name)
else:
add_local(node.parent, name)
else:
name = asname or name
if name in global_name:
add_local(module, name)
else:
add_local(node.parent, name)
def delayed_assattr(self, node: nodes.AssignAttr) -> None:
"""Visit an AssignAttr node.
This adds name to locals and handle members definition.
"""
from astroid import objects # pylint: disable=import-outside-toplevel
try:
for inferred in node.expr.infer():
if isinstance(inferred, util.UninferableBase):
continue
try:
# We want a narrow check on the parent type, not all of its subclasses
if type(inferred) in {bases.Instance, objects.ExceptionInstance}:
inferred = inferred._proxied
iattrs = inferred.instance_attrs
if not _can_assign_attr(inferred, node.attrname):
continue
elif isinstance(inferred, bases.Instance):
# Const, Tuple or other containers that inherit from
# `Instance`
continue
elif isinstance(inferred, (bases.Proxy, util.UninferableBase)):
continue
elif inferred.is_function:
iattrs = inferred.instance_attrs
else:
iattrs = inferred.locals
except AttributeError:
# XXX log error
continue
values = iattrs.setdefault(node.attrname, [])
if node in values:
continue
values.append(node)
except InferenceError:
pass
def build_namespace_package_module(name: str, path: Sequence[str]) -> nodes.Module:
module = nodes.Module(name, path=path, package=True)
module.postinit(body=[], doc_node=None)
return module
[docs]
def parse(
code: str,
module_name: str = "",
path: str | None = None,
apply_transforms: bool = True,
) -> nodes.Module:
"""Parses a source string in order to obtain an astroid AST from it.
:param str code: The code for the module.
:param str module_name: The name for the module, if any
:param str path: The path for the module
:param bool apply_transforms:
Apply the transforms for the give code. Use it if you
don't want the default transforms to be applied.
"""
# pylint: disable-next=import-outside-toplevel
from astroid.manager import AstroidManager
code = textwrap.dedent(code)
builder = AstroidBuilder(AstroidManager(), apply_transforms=apply_transforms)
return builder.string_build(code, modname=module_name, path=path)
def _extract_expressions(node: nodes.NodeNG) -> Iterator[nodes.NodeNG]:
"""Find expressions in a call to _TRANSIENT_FUNCTION and extract them.
The function walks the AST recursively to search for expressions that
are wrapped into a call to _TRANSIENT_FUNCTION. If it finds such an
expression, it completely removes the function call node from the tree,
replacing it by the wrapped expression inside the parent.
:param node: An astroid node.
:type node: astroid.bases.NodeNG
:yields: The sequence of wrapped expressions on the modified tree
expression can be found.
"""
if (
isinstance(node, nodes.Call)
and isinstance(node.func, nodes.Name)
and node.func.name == _TRANSIENT_FUNCTION
and node.args
):
real_expr = node.args[0]
assert node.parent
real_expr.parent = node.parent
# Search for node in all _astng_fields (the fields checked when
# get_children is called) of its parent. Some of those fields may
# be lists or tuples, in which case the elements need to be checked.
# When we find it, replace it by real_expr, so that the AST looks
# like no call to _TRANSIENT_FUNCTION ever took place.
for name in node.parent._astroid_fields:
child = getattr(node.parent, name)
if isinstance(child, list):
for idx, compound_child in enumerate(child):
if compound_child is node:
child[idx] = real_expr
elif child is node:
setattr(node.parent, name, real_expr)
yield real_expr
else:
for child in node.get_children():
yield from _extract_expressions(child)
def _find_statement_by_line(node: nodes.NodeNG, line: int) -> nodes.NodeNG | None:
"""Extracts the statement on a specific line from an AST.
If the line number of node matches line, it will be returned;
otherwise its children are iterated and the function is called
recursively.
:param node: An astroid node.
:type node: astroid.bases.NodeNG
:param line: The line number of the statement to extract.
:type line: int
:returns: The statement on the line, or None if no statement for the line
can be found.
:rtype: astroid.bases.NodeNG or None
"""
if isinstance(node, (nodes.ClassDef, nodes.FunctionDef, nodes.MatchCase)):
# This is an inaccuracy in the AST: the nodes that can be
# decorated do not carry explicit information on which line
# the actual definition (class/def), but .fromline seems to
# be close enough.
node_line = node.fromlineno
else:
node_line = node.lineno
if node_line == line:
return node
for child in node.get_children():
result = _find_statement_by_line(child, line)
if result:
return result
return None
def _extract_single_node(code: str, module_name: str = "") -> nodes.NodeNG:
"""Call extract_node while making sure that only one value is returned."""
ret = extract_node(code, module_name)
if isinstance(ret, list):
return ret[0]
return ret
def _parse_string(
data: str, type_comments: bool = True, modname: str | None = None
) -> tuple[ast.Module, ParserModule]:
parser_module = get_parser_module(type_comments=type_comments)
try:
parsed = parser_module.parse(
data + "\n", type_comments=type_comments, filename=modname
)
except SyntaxError as exc:
# If the type annotations are misplaced for some reason, we do not want
# to fail the entire parsing of the file, so we need to retry the
# parsing without type comment support. We use a heuristic for
# determining if the error is due to type annotations.
type_annot_related = re.search(r"#\s+type:", exc.text or "")
if not (type_annot_related and type_comments):
raise
parser_module = get_parser_module(type_comments=False)
parsed = parser_module.parse(data + "\n", type_comments=False)
return parsed, parser_module