From 3d937f85841af6166f7b81e029f9d6dbbfc4970a Mon Sep 17 00:00:00 2001 From: Tobias Raabe Date: Sat, 9 Nov 2024 15:58:16 +0100 Subject: [PATCH 01/14] Add data catalog name to nodes. --- docs/source/how_to_guides/writing_custom_nodes.md | 6 +++--- pyproject.toml | 6 ------ src/_pytask/console.py | 7 +++++++ src/_pytask/data_catalog.py | 2 ++ src/_pytask/node_protocols.py | 2 ++ src/_pytask/nodes.py | 4 ++++ uv.lock | 8 +++++--- 7 files changed, 23 insertions(+), 12 deletions(-) diff --git a/docs/source/how_to_guides/writing_custom_nodes.md b/docs/source/how_to_guides/writing_custom_nodes.md index 813f3d96..2204c0c9 100644 --- a/docs/source/how_to_guides/writing_custom_nodes.md +++ b/docs/source/how_to_guides/writing_custom_nodes.md @@ -142,9 +142,9 @@ databases. [^kedro] ## References -[^structural-subtyping]: Structural subtyping is similar to ABCs an approach in Python to enforce interfaces, but - it can be considered more pythonic since it is closer to duck typing. Hynek Schlawack - wrote a comprehensive +[^structural-subtyping]: Structural subtyping is similar to ABCs an approach in Python to enforce interfaces, + but it can be considered more pythonic since it is closer to duck typing. Hynek + Schlawack wrote a comprehensive [guide on subclassing](https://hynek.me/articles/python-subclassing-redux/) that features protocols under "Type 2". Glyph wrote an introduction to protocols called [I want a new duck](https://glyph.twistedmatrix.com/2020/07/new-duck.html). diff --git a/pyproject.toml b/pyproject.toml index 288c1ce7..36ab216f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -84,12 +84,6 @@ Tracker = "https://github.com/pytask-dev/pytask/issues" [project.scripts] pytask = "pytask:cli" -[tool.uv.sources] -pytask-parallel = { workspace = true } - -[tool.uv.workspace] -members = ["packages/*"] - [tool.uv] dev-dependencies = [ "tox-uv>=1.7.0", "pygraphviz" diff --git a/src/_pytask/console.py b/src/_pytask/console.py index e46838a4..507b4603 100644 --- a/src/_pytask/console.py +++ b/src/_pytask/console.py @@ -24,6 +24,7 @@ from rich.theme import Theme from rich.tree import Tree +from _pytask.data_catalog_utils import DATA_CATALOG_NAME_FIELD from _pytask.node_protocols import PNode from _pytask.node_protocols import PPathNode from _pytask.node_protocols import PProvisionalNode @@ -146,6 +147,12 @@ def format_node_name( """Format the name of a node.""" if isinstance(node, PPathNode): if node.name != node.path.as_posix(): + # Use getattr with default because on existing projects PNode.attribute does + # not exist. Remove with v0.6.0. + if data_catalog_name := getattr(node, "attributes", {}).get( + DATA_CATALOG_NAME_FIELD + ): + return Text(f"{data_catalog_name}::{node.name}") return Text(node.name) name = shorten_path(node.path, paths) return Text(name) diff --git a/src/_pytask/data_catalog.py b/src/_pytask/data_catalog.py index 8a9a08cd..6e9761dd 100644 --- a/src/_pytask/data_catalog.py +++ b/src/_pytask/data_catalog.py @@ -17,6 +17,7 @@ from attrs import field from _pytask.config_utils import find_project_root_and_config +from _pytask.data_catalog_utils import DATA_CATALOG_NAME_FIELD from _pytask.exceptions import NodeNotCollectedError from _pytask.models import NodeInfo from _pytask.node_protocols import PNode @@ -133,3 +134,4 @@ def add(self, name: str, node: PNode | PProvisionalNode | Any = None) -> None: msg = f"{node!r} cannot be parsed." raise NodeNotCollectedError(msg) self._entries[name] = collected_node + self._entries[name].attributes[DATA_CATALOG_NAME_FIELD] = self.name diff --git a/src/_pytask/node_protocols.py b/src/_pytask/node_protocols.py index 6a1d8fc0..7916324d 100644 --- a/src/_pytask/node_protocols.py +++ b/src/_pytask/node_protocols.py @@ -21,6 +21,7 @@ class PNode(Protocol): """Protocol for nodes.""" name: str + attributes: dict[Any, Any] @property def signature(self) -> str: @@ -116,6 +117,7 @@ class PProvisionalNode(Protocol): """ name: str + attributes: dict[Any, Any] @property def signature(self) -> str: diff --git a/src/_pytask/nodes.py b/src/_pytask/nodes.py index 4c678d9d..614f2799 100644 --- a/src/_pytask/nodes.py +++ b/src/_pytask/nodes.py @@ -167,6 +167,7 @@ class PathNode(PPathNode): path: Path name: str = "" + attributes: dict[Any, Any] = field(factory=dict) @property def signature(self) -> str: @@ -237,6 +238,7 @@ class PythonNode(PNode): value: Any | NoDefault = no_default hash: bool | Callable[[Any], bool] = False node_info: NodeInfo | None = None + attributes: dict[Any, Any] = field(factory=dict) @property def signature(self) -> str: @@ -306,6 +308,7 @@ class PickleNode(PPathNode): path: Path name: str = "" + attributes: dict[Any, Any] = field(factory=dict) @property def signature(self) -> str: @@ -355,6 +358,7 @@ class DirectoryNode(PProvisionalNode): name: str = "" pattern: str = "*" root_dir: Path | None = None + attributes: dict[Any, Any] = field(factory=dict) @property def signature(self) -> str: diff --git a/uv.lock b/uv.lock index 8f2822eb..75b198b9 100644 --- a/uv.lock +++ b/uv.lock @@ -2,7 +2,8 @@ version = 1 requires-python = ">=3.9" resolution-markers = [ "python_full_version < '3.10'", - "python_full_version >= '3.10' and python_full_version < '3.12'", + "python_full_version == '3.10.*'", + "python_full_version == '3.11.*'", "python_full_version == '3.12.*'", "python_full_version >= '3.13'", ] @@ -2225,7 +2226,8 @@ version = "1.24.4" source = { registry = "https://pypi.org/simple" } resolution-markers = [ "python_full_version < '3.10'", - "python_full_version >= '3.10' and python_full_version < '3.12'", + "python_full_version == '3.10.*'", + "python_full_version == '3.11.*'", ] sdist = { url = "https://files.pythonhosted.org/packages/a4/9b/027bec52c633f6556dba6b722d9a0befb40498b9ceddd29cbe67a45a127c/numpy-1.24.4.tar.gz", hash = "sha256:80f5e3a4e498641401868df4208b74581206afbee7cf7b8329daae82676d9463", size = 10911229 } wheels = [ @@ -2789,7 +2791,7 @@ wheels = [ [[package]] name = "pytask" -version = "0.5.2.dev37+g53eb5ce" +version = "0.5.2.dev13+g5804f52.d20241108" source = { editable = "." } dependencies = [ { name = "attrs" }, From 3bc4bd0fd46e0569751040499cda27612b0a90ed Mon Sep 17 00:00:00 2001 From: Tobias Raabe Date: Sat, 9 Nov 2024 16:03:06 +0100 Subject: [PATCH 02/14] fix. --- src/_pytask/console.py | 6 +----- src/_pytask/data_catalog.py | 5 +++++ src/_pytask/data_catalog_utils.py | 6 ++++++ 3 files changed, 12 insertions(+), 5 deletions(-) create mode 100644 src/_pytask/data_catalog_utils.py diff --git a/src/_pytask/console.py b/src/_pytask/console.py index 507b4603..7082f314 100644 --- a/src/_pytask/console.py +++ b/src/_pytask/console.py @@ -147,11 +147,7 @@ def format_node_name( """Format the name of a node.""" if isinstance(node, PPathNode): if node.name != node.path.as_posix(): - # Use getattr with default because on existing projects PNode.attribute does - # not exist. Remove with v0.6.0. - if data_catalog_name := getattr(node, "attributes", {}).get( - DATA_CATALOG_NAME_FIELD - ): + if data_catalog_name := node.attributes.get(DATA_CATALOG_NAME_FIELD): return Text(f"{data_catalog_name}::{node.name}") return Text(node.name) name = shorten_path(node.path, paths) diff --git a/src/_pytask/data_catalog.py b/src/_pytask/data_catalog.py index 6e9761dd..65f0e4ea 100644 --- a/src/_pytask/data_catalog.py +++ b/src/_pytask/data_catalog.py @@ -93,6 +93,11 @@ def __attrs_post_init__(self) -> None: # Initialize the data catalog with persisted nodes from previous runs. for path in self.path.glob("*-node.pkl"): node = pickle.loads(path.read_bytes()) # noqa: S301 + + # To ease transition from nodes with and without attributes and it if it + # does not exist. Necessary since #650. Remove in v0.6.0. + if not hasattr(node, "attributes"): + node.attributes = {DATA_CATALOG_NAME_FIELD: self.name} self._entries[node.name] = node def __getitem__(self, name: str) -> PNode | PProvisionalNode: diff --git a/src/_pytask/data_catalog_utils.py b/src/_pytask/data_catalog_utils.py new file mode 100644 index 00000000..1dabc26d --- /dev/null +++ b/src/_pytask/data_catalog_utils.py @@ -0,0 +1,6 @@ +"""Contains utilities for the data catalog.""" + +__all__ = ["DATA_CATALOG_NAME_FIELD"] + + +DATA_CATALOG_NAME_FIELD = "catalog_name" From 0fb00d039c2fcd63966caafa7ab9635429040868 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Sat, 9 Nov 2024 15:05:17 +0000 Subject: [PATCH 03/14] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- docs/source/how_to_guides/writing_custom_nodes.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/docs/source/how_to_guides/writing_custom_nodes.md b/docs/source/how_to_guides/writing_custom_nodes.md index 2204c0c9..813f3d96 100644 --- a/docs/source/how_to_guides/writing_custom_nodes.md +++ b/docs/source/how_to_guides/writing_custom_nodes.md @@ -142,9 +142,9 @@ databases. [^kedro] ## References -[^structural-subtyping]: Structural subtyping is similar to ABCs an approach in Python to enforce interfaces, - but it can be considered more pythonic since it is closer to duck typing. Hynek - Schlawack wrote a comprehensive +[^structural-subtyping]: Structural subtyping is similar to ABCs an approach in Python to enforce interfaces, but + it can be considered more pythonic since it is closer to duck typing. Hynek Schlawack + wrote a comprehensive [guide on subclassing](https://hynek.me/articles/python-subclassing-redux/) that features protocols under "Type 2". Glyph wrote an introduction to protocols called [I want a new duck](https://glyph.twistedmatrix.com/2020/07/new-duck.html). From 5b91510b97fdae089f38d1b70fc71cd962069703 Mon Sep 17 00:00:00 2001 From: Tobias Raabe Date: Tue, 24 Dec 2024 10:58:36 +0100 Subject: [PATCH 04/14] Implement smooth transition to attributes on nodes. --- src/_pytask/click.py | 5 +++-- src/_pytask/collect.py | 11 +++++++++ src/_pytask/console.py | 7 +++++- src/_pytask/node_protocols.py | 2 -- src/_pytask/nodes.py | 8 +++++++ src/_pytask/warnings.py | 5 ++++- tests/test_node_protocols.py | 42 +++++++++++++++++++++++++++++++++++ 7 files changed, 74 insertions(+), 6 deletions(-) diff --git a/src/_pytask/click.py b/src/_pytask/click.py index 6daab734..9b19eb93 100644 --- a/src/_pytask/click.py +++ b/src/_pytask/click.py @@ -24,6 +24,7 @@ from _pytask import __version__ as version from _pytask.console import console +from _pytask.console import create_panel_title if TYPE_CHECKING: from collections.abc import Sequence @@ -109,7 +110,7 @@ def format_help( console.print( Panel( commands_table, - title="[bold #f2f2f2]Commands[/]", + title=create_panel_title("Commands"), title_align="left", border_style="grey37", ) @@ -244,7 +245,7 @@ def _print_options(group_or_command: Command | DefaultGroup, ctx: Context) -> No console.print( Panel( options_table, - title="[bold #f2f2f2]Options[/]", + title=create_panel_title("Options"), title_align="left", border_style="grey37", ) diff --git a/src/_pytask/collect.py b/src/_pytask/collect.py index f9bc70fd..e7044185 100644 --- a/src/_pytask/collect.py +++ b/src/_pytask/collect.py @@ -7,6 +7,7 @@ import os import sys import time +import warnings from contextlib import suppress from pathlib import Path from typing import TYPE_CHECKING @@ -385,6 +386,16 @@ def pytask_collect_node( # noqa: C901, PLR0912 """ node = node_info.value + if isinstance(node, (PNode, PProvisionalNode)) and not hasattr(node, "attributes"): + warnings.warn( + "PNode and PProvisionalNode will require an 'attributes' field starting " + "with pytask v0.6.0. It is a dictionary with any type of key and values " + "similar to PTask. See https://tinyurl.com/pytask-custom-nodes for more " + "information about adjusting your custom nodes.", + stacklevel=1, + category=FutureWarning, + ) + if isinstance(node, DirectoryNode): if node.root_dir is None: node.root_dir = path diff --git a/src/_pytask/console.py b/src/_pytask/console.py index 7082f314..d643dc09 100644 --- a/src/_pytask/console.py +++ b/src/_pytask/console.py @@ -296,10 +296,15 @@ def create_summary_panel( return Panel( grid, - title="[bold #f2f2f2]Summary[/]", + title=create_panel_title("Summary"), expand=False, style="none", border_style=outcome_enum.FAIL.style if counts[outcome_enum.FAIL] else outcome_enum.SUCCESS.style, ) + + +def create_panel_title(title: str) -> Text: + """Create a title for a panel.""" + return Text(title, style="bold #f2f2f2") diff --git a/src/_pytask/node_protocols.py b/src/_pytask/node_protocols.py index 7916324d..6a1d8fc0 100644 --- a/src/_pytask/node_protocols.py +++ b/src/_pytask/node_protocols.py @@ -21,7 +21,6 @@ class PNode(Protocol): """Protocol for nodes.""" name: str - attributes: dict[Any, Any] @property def signature(self) -> str: @@ -117,7 +116,6 @@ class PProvisionalNode(Protocol): """ name: str - attributes: dict[Any, Any] @property def signature(self) -> str: diff --git a/src/_pytask/nodes.py b/src/_pytask/nodes.py index 4c93dde1..75b9a0ee 100644 --- a/src/_pytask/nodes.py +++ b/src/_pytask/nodes.py @@ -162,6 +162,8 @@ class PathNode(PPathNode): Name of the node which makes it identifiable in the DAG. path The path to the file. + attributes: dict[Any, Any] + A dictionary to store additional information of the task. """ @@ -220,6 +222,8 @@ class PythonNode(PNode): objects. The function should return either an integer or a string. node_info The infos acquired while collecting the node. + attributes: dict[Any, Any] + A dictionary to store additional information of the task. Examples -------- @@ -304,6 +308,8 @@ class PickleNode(PPathNode): Name of the node which makes it identifiable in the DAG. path The path to the file. + attributes: dict[Any, Any] + A dictionary to store additional information of the task. """ @@ -353,6 +359,8 @@ class DirectoryNode(PProvisionalNode): root_dir The pattern is interpreted relative to the path given by ``root_dir``. If ``root_dir = None``, it is the directory where the path is defined. + attributes: dict[Any, Any] + A dictionary to store additional information of the task. """ diff --git a/src/_pytask/warnings.py b/src/_pytask/warnings.py index 6701daee..3b2325e8 100644 --- a/src/_pytask/warnings.py +++ b/src/_pytask/warnings.py @@ -12,6 +12,7 @@ from rich.panel import Panel from _pytask.console import console +from _pytask.console import create_panel_title from _pytask.pluginmanager import hookimpl from _pytask.warnings_utils import WarningReport from _pytask.warnings_utils import catch_warnings_for_item @@ -82,7 +83,9 @@ def pytask_log_session_footer(session: Session) -> None: """Log warnings at the end of a session.""" if session.warnings: renderable = _WarningsRenderable(session.warnings) - panel = Panel(renderable, title="Warnings", style="warning") + panel = Panel( + renderable, title=create_panel_title("Warnings"), style="warning" + ) console.print(panel) diff --git a/tests/test_node_protocols.py b/tests/test_node_protocols.py index dbf627aa..d35fbb3e 100644 --- a/tests/test_node_protocols.py +++ b/tests/test_node_protocols.py @@ -13,6 +13,7 @@ def test_node_protocol_for_custom_nodes(runner, tmp_path): source = """ from typing import Annotated + from typing import Any from pytask import Product from attrs import define from pathlib import Path @@ -22,6 +23,7 @@ class CustomNode: name: str value: str signature: str = "id" + attributes: dict[Any, Any] = {} def state(self): return self.value @@ -43,12 +45,14 @@ def task_example( result = runner.invoke(cli, [tmp_path.as_posix()]) assert result.exit_code == ExitCode.OK assert tmp_path.joinpath("out.txt").read_text() == "text" + assert "FutureWarning" not in result.output @pytest.mark.end_to_end def test_node_protocol_for_custom_nodes_with_paths(runner, tmp_path): source = """ from typing import Annotated + from typing import Any from pytask import Product from pathlib import Path from attrs import define @@ -60,6 +64,7 @@ class PickleFile: path: Path value: Path signature: str = "id" + attributes: dict[Any, Any] = {} def state(self): return str(self.path.stat().st_mtime) @@ -87,3 +92,40 @@ def task_example( result = runner.invoke(cli, [tmp_path.as_posix()]) assert result.exit_code == ExitCode.OK assert tmp_path.joinpath("out.txt").read_text() == "text" + + +@pytest.mark.end_to_end +def test_node_protocol_for_custom_nodes_adding_attributes(runner, tmp_path): + source = """ + from typing import Annotated + from pytask import Product + from attrs import define + from pathlib import Path + + @define + class CustomNode: + name: str + value: str + signature: str = "id" + + def state(self): + return self.value + + def load(self, is_product): + return self.value + + def save(self, value): + self.value = value + + def task_example( + data = CustomNode("custom", "text"), + out: Annotated[Path, Product] = Path("out.txt"), + ) -> None: + out.write_text(data) + """ + tmp_path.joinpath("task_module.py").write_text(textwrap.dedent(source)) + + result = runner.invoke(cli, [tmp_path.as_posix()]) + assert result.exit_code == ExitCode.OK + assert tmp_path.joinpath("out.txt").read_text() == "text" + assert "FutureWarning" in result.output From e0578647dbf6cd3039ed157cdda1ac33316f605c Mon Sep 17 00:00:00 2001 From: Tobias Raabe Date: Tue, 24 Dec 2024 11:05:27 +0100 Subject: [PATCH 05/14] Adjust docs. --- .../writing_custom_nodes_example_3_py310.py | 10 +++++++++- .../writing_custom_nodes_example_3_py38.py | 10 +++++++++- tests/test_collect_command.py | 2 ++ 3 files changed, 20 insertions(+), 2 deletions(-) diff --git a/docs_src/how_to_guides/writing_custom_nodes_example_3_py310.py b/docs_src/how_to_guides/writing_custom_nodes_example_3_py310.py index ff01f495..e4d00b2e 100644 --- a/docs_src/how_to_guides/writing_custom_nodes_example_3_py310.py +++ b/docs_src/how_to_guides/writing_custom_nodes_example_3_py310.py @@ -15,12 +15,20 @@ class PickleNode: Name of the node which makes it identifiable in the DAG. path The path to the file. + attributes + Additional attributes that are stored in the node. """ - def __init__(self, name: str = "", path: Path | None = None) -> None: + def __init__( + self, + name: str = "", + path: Path | None = None, + attributes: dict[Any, Any] | None = None, + ) -> None: self.name = name self.path = path + self.attributes = attributes or {} @property def signature(self) -> str: diff --git a/docs_src/how_to_guides/writing_custom_nodes_example_3_py38.py b/docs_src/how_to_guides/writing_custom_nodes_example_3_py38.py index 98281374..d6499a64 100644 --- a/docs_src/how_to_guides/writing_custom_nodes_example_3_py38.py +++ b/docs_src/how_to_guides/writing_custom_nodes_example_3_py38.py @@ -16,12 +16,20 @@ class PickleNode: Name of the node which makes it identifiable in the DAG. path The path to the file. + attributes + Additional attributes that are stored in the node. """ - def __init__(self, name: str = "", path: Optional[Path] = None) -> None: + def __init__( + self, + name: str = "", + path: Optional[Path] = None, + attributes: Optional[dict[Any, Any]] = None, + ) -> None: self.name = name self.path = path + self.attributes = attributes or {} @property def signature(self) -> str: diff --git a/tests/test_collect_command.py b/tests/test_collect_command.py index 0183b1be..1f6a14b9 100644 --- a/tests/test_collect_command.py +++ b/tests/test_collect_command.py @@ -517,6 +517,7 @@ def task_example( def test_node_protocol_for_custom_nodes_with_paths(runner, tmp_path): source = """ from typing import Annotated + from typing import Any from pytask import Product from pathlib import Path from attrs import define @@ -527,6 +528,7 @@ class PickleFile: name: str path: Path signature: str = "id" + attributes: dict[Any, Any] = {} def state(self): return str(self.path.stat().st_mtime) From 73e1d6a6cb1678ceae79e40d7b75bd18d2088b27 Mon Sep 17 00:00:00 2001 From: Tobias Raabe Date: Tue, 24 Dec 2024 11:06:46 +0100 Subject: [PATCH 06/14] fix. --- src/_pytask/console.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/_pytask/console.py b/src/_pytask/console.py index d643dc09..87931983 100644 --- a/src/_pytask/console.py +++ b/src/_pytask/console.py @@ -43,6 +43,7 @@ __all__ = [ "console", + "create_panel_title", "create_summary_panel", "create_url_style_for_path", "create_url_style_for_task", From 95a9657f613d039da803806c6a4e7e7989fbe060 Mon Sep 17 00:00:00 2001 From: Tobias Raabe Date: Tue, 24 Dec 2024 11:09:00 +0100 Subject: [PATCH 07/14] Add to changes. --- docs/source/changes.md | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/docs/source/changes.md b/docs/source/changes.md index ec245536..97008dec 100644 --- a/docs/source/changes.md +++ b/docs/source/changes.md @@ -5,6 +5,12 @@ chronological order. Releases follow [semantic versioning](https://semver.org/) releases are available on [PyPI](https://pypi.org/project/pytask) and [Anaconda.org](https://anaconda.org/conda-forge/pytask). +## 0.5.3 - 2025-xx-xx + +- {pull}`650` allows to identify from which data catalog a node is coming from. The + feature is enabled by adding an `attributes` field on `PNode` and `PProvisionalNode` + that will be mandatory on custom nodes in v0.6.0. + ## 0.5.2 - 2024-12-19 - {pull}`633` adds support for Python 3.13 and drops support for 3.8. From 14d82c5b2452be921c17fcc4e76051e04f190894 Mon Sep 17 00:00:00 2001 From: Tobias Raabe Date: Tue, 24 Dec 2024 11:23:13 +0100 Subject: [PATCH 08/14] Add more warnings: --- src/_pytask/collect.py | 11 ++--------- src/_pytask/console.py | 6 ++++-- src/_pytask/data_catalog.py | 9 +++++++-- src/_pytask/node_protocols.py | 12 ++++++++++++ 4 files changed, 25 insertions(+), 13 deletions(-) diff --git a/src/_pytask/collect.py b/src/_pytask/collect.py index e7044185..c9676f79 100644 --- a/src/_pytask/collect.py +++ b/src/_pytask/collect.py @@ -30,7 +30,7 @@ from _pytask.mark import MarkGenerator from _pytask.mark_utils import get_all_marks from _pytask.mark_utils import has_mark -from _pytask.node_protocols import PNode +from _pytask.node_protocols import PNode, warn_about_upcoming_attributes_field_on_nodes from _pytask.node_protocols import PPathNode from _pytask.node_protocols import PProvisionalNode from _pytask.node_protocols import PTask @@ -387,14 +387,7 @@ def pytask_collect_node( # noqa: C901, PLR0912 node = node_info.value if isinstance(node, (PNode, PProvisionalNode)) and not hasattr(node, "attributes"): - warnings.warn( - "PNode and PProvisionalNode will require an 'attributes' field starting " - "with pytask v0.6.0. It is a dictionary with any type of key and values " - "similar to PTask. See https://tinyurl.com/pytask-custom-nodes for more " - "information about adjusting your custom nodes.", - stacklevel=1, - category=FutureWarning, - ) + warn_about_upcoming_attributes_field_on_nodes() if isinstance(node, DirectoryNode): if node.root_dir is None: diff --git a/src/_pytask/console.py b/src/_pytask/console.py index 87931983..991b7a45 100644 --- a/src/_pytask/console.py +++ b/src/_pytask/console.py @@ -12,6 +12,7 @@ from typing import Any from typing import Callable from typing import Literal +import warnings from rich.console import Console from rich.console import RenderableType @@ -25,7 +26,7 @@ from rich.tree import Tree from _pytask.data_catalog_utils import DATA_CATALOG_NAME_FIELD -from _pytask.node_protocols import PNode +from _pytask.node_protocols import PNode, warn_about_upcoming_attributes_field_on_nodes from _pytask.node_protocols import PPathNode from _pytask.node_protocols import PProvisionalNode from _pytask.node_protocols import PTaskWithPath @@ -148,8 +149,9 @@ def format_node_name( """Format the name of a node.""" if isinstance(node, PPathNode): if node.name != node.path.as_posix(): - if data_catalog_name := node.attributes.get(DATA_CATALOG_NAME_FIELD): + if data_catalog_name := getattr(node, "attributes", {}).get(DATA_CATALOG_NAME_FIELD): return Text(f"{data_catalog_name}::{node.name}") + warn_about_upcoming_attributes_field_on_nodes() return Text(node.name) name = shorten_path(node.path, paths) return Text(name) diff --git a/src/_pytask/data_catalog.py b/src/_pytask/data_catalog.py index 65f0e4ea..36c2fbfb 100644 --- a/src/_pytask/data_catalog.py +++ b/src/_pytask/data_catalog.py @@ -20,7 +20,7 @@ from _pytask.data_catalog_utils import DATA_CATALOG_NAME_FIELD from _pytask.exceptions import NodeNotCollectedError from _pytask.models import NodeInfo -from _pytask.node_protocols import PNode +from _pytask.node_protocols import PNode, warn_about_upcoming_attributes_field_on_nodes from _pytask.node_protocols import PPathNode from _pytask.node_protocols import PProvisionalNode from _pytask.nodes import PickleNode @@ -139,4 +139,9 @@ def add(self, name: str, node: PNode | PProvisionalNode | Any = None) -> None: msg = f"{node!r} cannot be parsed." raise NodeNotCollectedError(msg) self._entries[name] = collected_node - self._entries[name].attributes[DATA_CATALOG_NAME_FIELD] = self.name + + node = self._entries[name] + if hasattr(node, "attributes"): + node.attributes[DATA_CATALOG_NAME_FIELD] = self.name + else: + warn_about_upcoming_attributes_field_on_nodes() diff --git a/src/_pytask/node_protocols.py b/src/_pytask/node_protocols.py index 6a1d8fc0..1e1d8f9d 100644 --- a/src/_pytask/node_protocols.py +++ b/src/_pytask/node_protocols.py @@ -5,6 +5,7 @@ from typing import Callable from typing import Protocol from typing import runtime_checkable +import warnings if TYPE_CHECKING: from pathlib import Path @@ -138,3 +139,14 @@ def load(self, is_product: bool = False) -> Any: # pragma: no cover def collect(self) -> list[Any]: """Collect the objects that are defined by the provisional nodes.""" + + +def warn_about_upcoming_attributes_field_on_nodes() -> None: + warnings.warn( + "PNode and PProvisionalNode will require an 'attributes' field starting " + "with pytask v0.6.0. It is a dictionary with any type of key and values " + "similar to PTask. See https://tinyurl.com/pytask-custom-nodes for more " + "information about adjusting your custom nodes.", + stacklevel=1, + category=FutureWarning, + ) From 7f004e7ae5c63c947c3398d94e7e8f2ea826e8b2 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 24 Dec 2024 10:23:29 +0000 Subject: [PATCH 09/14] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- src/_pytask/collect.py | 4 ++-- src/_pytask/console.py | 8 +++++--- src/_pytask/data_catalog.py | 3 ++- src/_pytask/node_protocols.py | 2 +- 4 files changed, 10 insertions(+), 7 deletions(-) diff --git a/src/_pytask/collect.py b/src/_pytask/collect.py index c9676f79..0df50442 100644 --- a/src/_pytask/collect.py +++ b/src/_pytask/collect.py @@ -7,7 +7,6 @@ import os import sys import time -import warnings from contextlib import suppress from pathlib import Path from typing import TYPE_CHECKING @@ -30,10 +29,11 @@ from _pytask.mark import MarkGenerator from _pytask.mark_utils import get_all_marks from _pytask.mark_utils import has_mark -from _pytask.node_protocols import PNode, warn_about_upcoming_attributes_field_on_nodes +from _pytask.node_protocols import PNode from _pytask.node_protocols import PPathNode from _pytask.node_protocols import PProvisionalNode from _pytask.node_protocols import PTask +from _pytask.node_protocols import warn_about_upcoming_attributes_field_on_nodes from _pytask.nodes import DirectoryNode from _pytask.nodes import PathNode from _pytask.nodes import PythonNode diff --git a/src/_pytask/console.py b/src/_pytask/console.py index 991b7a45..f18544cc 100644 --- a/src/_pytask/console.py +++ b/src/_pytask/console.py @@ -12,7 +12,6 @@ from typing import Any from typing import Callable from typing import Literal -import warnings from rich.console import Console from rich.console import RenderableType @@ -26,10 +25,11 @@ from rich.tree import Tree from _pytask.data_catalog_utils import DATA_CATALOG_NAME_FIELD -from _pytask.node_protocols import PNode, warn_about_upcoming_attributes_field_on_nodes +from _pytask.node_protocols import PNode from _pytask.node_protocols import PPathNode from _pytask.node_protocols import PProvisionalNode from _pytask.node_protocols import PTaskWithPath +from _pytask.node_protocols import warn_about_upcoming_attributes_field_on_nodes from _pytask.path import shorten_path if TYPE_CHECKING: @@ -149,7 +149,9 @@ def format_node_name( """Format the name of a node.""" if isinstance(node, PPathNode): if node.name != node.path.as_posix(): - if data_catalog_name := getattr(node, "attributes", {}).get(DATA_CATALOG_NAME_FIELD): + if data_catalog_name := getattr(node, "attributes", {}).get( + DATA_CATALOG_NAME_FIELD + ): return Text(f"{data_catalog_name}::{node.name}") warn_about_upcoming_attributes_field_on_nodes() return Text(node.name) diff --git a/src/_pytask/data_catalog.py b/src/_pytask/data_catalog.py index 36c2fbfb..d23460c0 100644 --- a/src/_pytask/data_catalog.py +++ b/src/_pytask/data_catalog.py @@ -20,9 +20,10 @@ from _pytask.data_catalog_utils import DATA_CATALOG_NAME_FIELD from _pytask.exceptions import NodeNotCollectedError from _pytask.models import NodeInfo -from _pytask.node_protocols import PNode, warn_about_upcoming_attributes_field_on_nodes +from _pytask.node_protocols import PNode from _pytask.node_protocols import PPathNode from _pytask.node_protocols import PProvisionalNode +from _pytask.node_protocols import warn_about_upcoming_attributes_field_on_nodes from _pytask.nodes import PickleNode from _pytask.pluginmanager import storage from _pytask.session import Session diff --git a/src/_pytask/node_protocols.py b/src/_pytask/node_protocols.py index 1e1d8f9d..56b3ab8f 100644 --- a/src/_pytask/node_protocols.py +++ b/src/_pytask/node_protocols.py @@ -1,11 +1,11 @@ from __future__ import annotations +import warnings from typing import TYPE_CHECKING from typing import Any from typing import Callable from typing import Protocol from typing import runtime_checkable -import warnings if TYPE_CHECKING: from pathlib import Path From cbf8711f89e9e67e8658e31affe4b6fda7fc0b39 Mon Sep 17 00:00:00 2001 From: Tobias Raabe Date: Tue, 24 Dec 2024 11:25:44 +0100 Subject: [PATCH 10/14] fix. --- src/_pytask/data_catalog.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/_pytask/data_catalog.py b/src/_pytask/data_catalog.py index 36c2fbfb..ce72626a 100644 --- a/src/_pytask/data_catalog.py +++ b/src/_pytask/data_catalog.py @@ -93,10 +93,9 @@ def __attrs_post_init__(self) -> None: # Initialize the data catalog with persisted nodes from previous runs. for path in self.path.glob("*-node.pkl"): node = pickle.loads(path.read_bytes()) # noqa: S301 - - # To ease transition from nodes with and without attributes and it if it - # does not exist. Necessary since #650. Remove in v0.6.0. if not hasattr(node, "attributes"): + warn_about_upcoming_attributes_field_on_nodes() + else: node.attributes = {DATA_CATALOG_NAME_FIELD: self.name} self._entries[node.name] = node From 7241bca6e6f2b5e2448a6a9857761fa1f0cccd7f Mon Sep 17 00:00:00 2001 From: Tobias Raabe Date: Tue, 24 Dec 2024 15:54:58 +0100 Subject: [PATCH 11/14] Don't warn while formatting. --- src/_pytask/console.py | 1 - 1 file changed, 1 deletion(-) diff --git a/src/_pytask/console.py b/src/_pytask/console.py index f18544cc..b0954ec6 100644 --- a/src/_pytask/console.py +++ b/src/_pytask/console.py @@ -153,7 +153,6 @@ def format_node_name( DATA_CATALOG_NAME_FIELD ): return Text(f"{data_catalog_name}::{node.name}") - warn_about_upcoming_attributes_field_on_nodes() return Text(node.name) name = shorten_path(node.path, paths) return Text(name) From 1513f72bce537808ee0027a43fd56ea616073e7f Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 24 Dec 2024 14:55:18 +0000 Subject: [PATCH 12/14] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- src/_pytask/console.py | 1 - 1 file changed, 1 deletion(-) diff --git a/src/_pytask/console.py b/src/_pytask/console.py index b0954ec6..a7e9d5ac 100644 --- a/src/_pytask/console.py +++ b/src/_pytask/console.py @@ -29,7 +29,6 @@ from _pytask.node_protocols import PPathNode from _pytask.node_protocols import PProvisionalNode from _pytask.node_protocols import PTaskWithPath -from _pytask.node_protocols import warn_about_upcoming_attributes_field_on_nodes from _pytask.path import shorten_path if TYPE_CHECKING: From c056b6b5558371342db5151c07fa11ebe401bf7b Mon Sep 17 00:00:00 2001 From: Tobias Raabe Date: Tue, 24 Dec 2024 16:12:40 +0100 Subject: [PATCH 13/14] Also format any other node if it has a data catalog name. --- src/_pytask/console.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/_pytask/console.py b/src/_pytask/console.py index a7e9d5ac..ed932d3f 100644 --- a/src/_pytask/console.py +++ b/src/_pytask/console.py @@ -148,6 +148,7 @@ def format_node_name( """Format the name of a node.""" if isinstance(node, PPathNode): if node.name != node.path.as_posix(): + # For example, any node added to a data catalog has its name set to the key. if data_catalog_name := getattr(node, "attributes", {}).get( DATA_CATALOG_NAME_FIELD ): @@ -162,6 +163,11 @@ def format_node_name( reduced_name = shorten_path(Path(path), paths) return Text(f"{reduced_name}::{rest}") + # Python or other custom nodes that are not PathNodes. + if data_catalog_name := getattr(node, "attributes", {}).get( + DATA_CATALOG_NAME_FIELD + ): + return Text(f"{data_catalog_name}::{node.name}") return Text(node.name) From 91a58087139e6757c4c67631dc06c8311db94ea2 Mon Sep 17 00:00:00 2001 From: Tobias Raabe Date: Tue, 24 Dec 2024 16:32:04 +0100 Subject: [PATCH 14/14] add thank you. --- docs/source/changes.md | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/docs/source/changes.md b/docs/source/changes.md index 97008dec..94cdc796 100644 --- a/docs/source/changes.md +++ b/docs/source/changes.md @@ -7,9 +7,10 @@ releases are available on [PyPI](https://pypi.org/project/pytask) and ## 0.5.3 - 2025-xx-xx -- {pull}`650` allows to identify from which data catalog a node is coming from. The - feature is enabled by adding an `attributes` field on `PNode` and `PProvisionalNode` - that will be mandatory on custom nodes in v0.6.0. +- {pull}`650` allows to identify from which data catalog a node is coming from. Thanks + to {user}`felixschmitz` for the report! The feature is enabled by adding an + `attributes` field on `PNode` and `PProvisionalNode` that will be mandatory on custom + nodes in v0.6.0. ## 0.5.2 - 2024-12-19