diff --git a/docs/conf.py b/docs/conf.py index 6d044def4..13e77547b 100755 --- a/docs/conf.py +++ b/docs/conf.py @@ -15,7 +15,11 @@ import inspect import os import sys +import tomllib import warnings +from pathlib import Path + +PROJECT_ROOT = (Path(__file__).parent / "..").resolve() # If extensions (or modules to document with autodoc) are in another directory, # add these directories to sys.path here. If the directory is relative to the @@ -180,15 +184,20 @@ # ---------------- numpydoc_class_members_toctree = False # https://stackoverflow.com/a/73294408 -# full list of numpydoc error codes: https://numpydoc.readthedocs.io/en/latest/validation.html -numpydoc_validation_checks = { - "GL05", - "GL06", - "GL07", - "GL10", - "PR05", - "PR10", - "RT02", +with open(PROJECT_ROOT / "tools/tool-data.toml", "rb") as f: + numpydoc_skip_errors = tomllib.load(f)["numpydoc_skip_errors"] + +numpydoc_validation_checks = {"all"} | set(numpydoc_skip_errors) +numpydoc_validation_exclude = { # regex to ignore during docstring check + r"\.__getitem__", + r"\.__contains__", + r"\.__hash__", + r"\.__mul__", + r"\.__sub__", + r"\.__add__", + r"\.__iter__", + r"\.__div__", + r"\.__neg__", } # Add any paths that contain custom static files (such as style sheets) here, diff --git a/pixi.toml b/pixi.toml index f67712291..3c040ceb7 100644 --- a/pixi.toml +++ b/pixi.toml @@ -75,7 +75,7 @@ gsw = "*" [feature.docs.dependencies] parcels = { path = "." } -numpydoc = "!=1.9.0" +numpydoc = "*" myst-nb = "*" ipython = "*" sphinx = "*" @@ -97,6 +97,12 @@ pre_commit = "*" [feature.pre-commit.tasks] lint = "pre-commit run --all-files" +[feature.numpydoc.dependencies] +numpydoc = "*" + +[feature.numpydoc.tasks] +numpydoc-lint = "python tools/numpydoc-public-api.py" + [feature.typing.dependencies] mypy = "*" lxml = "*" # in CI @@ -112,6 +118,7 @@ default = { features = [ "notebooks", "typing", "pre-commit", + "numpydoc", ], solve-group = "main" } test = { features = ["test"], solve-group = "main" } test-minimum = { features = ["test", "minimum"] } diff --git a/pyproject.toml b/pyproject.toml index 29e6cb191..ec5508606 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -80,7 +80,7 @@ select = [ "UP", # pyupgrade "LOG", # logging "ICN", # import conventions - "G", # logging-format + # "G", # logging-format "RUF", # ruff "ISC001", # single-line-implicit-string-concatenation "TID", # flake8-tidy-imports diff --git a/tools/numpydoc-public-api.py b/tools/numpydoc-public-api.py new file mode 100644 index 000000000..b7aef228d --- /dev/null +++ b/tools/numpydoc-public-api.py @@ -0,0 +1,127 @@ +#!/usr/bin/env python +"""A script that can be quickly run that explores the public API of Parcels +and validates docstrings along the way according to the numpydoc conventions. + +This script is a best attempt, and it meant as a first line of defence (compared +to the sphinx numpydoc integration which is the ground truth - as those are the +docstrings that end up in the documentation). +""" + +import functools +import importlib +import logging +import sys +import tomllib +import types +from pathlib import Path + +from numpydoc.validate import validate + +logger = logging.getLogger("numpydoc-public-api") +handler = logging.StreamHandler() +handler.setFormatter(logging.Formatter("%(levelname)s: %(message)s")) +logger.addHandler(handler) + +PROJECT_ROOT = (Path(__file__).parent / "..").resolve() +PUBLIC_MODULES = ["parcels", "parcels.interpolators"] +ROOT_PACKAGE = "parcels" + + +def is_built_in(type_or_instance: type | object): + if isinstance(type_or_instance, type): + return type_or_instance.__module__ == "builtins" + else: + return type_or_instance.__class__.__module__ == "builtins" + + +def walk_module(module_str: str, public_api: list[str] | None = None) -> list[str]: + if public_api is None: + public_api = [] + + module = importlib.import_module(module_str) + try: + all_ = module.__all__ + except AttributeError: + print(f"No __all__ variable found in public module {module_str!r}") + return public_api + + if module_str not in public_api: + public_api.append(module_str) + for item_str in all_: + item = getattr(module, item_str) + if isinstance(item, types.ModuleType): + walk_module(f"{module_str}.{item_str}", public_api) + if isinstance(item, (types.FunctionType,)): + public_api.append(f"{module_str}.{item_str}") + elif is_built_in(item): + print(f"Found builtin at '{module_str}.{item_str}' of type {type(item)}") + continue + elif isinstance(item, type): + public_api.append(f"{module_str}.{item_str}") + walk_class(module_str, item, public_api) + else: + logger.info( + f"Encountered unexpected public object at '{module_str}.{item_str}' of {item!r} in public API. Don't know how to handle with numpydoc - ignoring." + ) + + return public_api + + +def get_public_class_attrs(class_: type) -> set[str]: + return {a for a in dir(class_) if not a.startswith("_")} + + +def walk_class(module_str: str, class_: type, public_api: list[str]) -> list[str]: + class_str = class_.__name__ + + # attributes that were introduced by this class specifically - not from inheritance + attrs = get_public_class_attrs(class_) - functools.reduce( + set.add, (get_public_class_attrs(base) for base in class_.__bases__) + ) + + public_api.extend([f"{module_str}.{class_str}.{attr_str}" for attr_str in attrs]) + return public_api + + +def main(): + import argparse + + parser = argparse.ArgumentParser(description="Validate numpydoc docstrings in the public API") + parser.add_argument("-v", "--verbose", action="count", default=0, help="Increase verbosity (can be repeated)") + args = parser.parse_args() + + # Set logging level based on verbosity: 0=WARNING, 1=INFO, 2+=DEBUG + if args.verbose == 0: + log_level = logging.WARNING + elif args.verbose == 1: + log_level = logging.INFO + else: + log_level = logging.DEBUG + + logger.setLevel(log_level) + + with open(PROJECT_ROOT / "tools/tool-data.toml", "rb") as f: + skip_errors = tomllib.load(f)["numpydoc_skip_errors"] + public_api = [] + for module in PUBLIC_MODULES: + public_api += walk_module(module) + + errors = 0 + for item in public_api: + logger.info(f"Processing validating {item}") + try: + res = validate(item) + except (AttributeError, StopIteration) as e: + logger.warning(f"Could not process {item!r}. Encountered error. {e!r}") + continue + if res["type"] in ("module", "float", "int", "dict"): + continue + for err in res["errors"]: + if err[0] not in skip_errors: + print(f"{item}: {err}") + errors += 1 + sys.exit(errors) + + +if __name__ == "__main__": + main() diff --git a/tools/tool-data.toml b/tools/tool-data.toml new file mode 100644 index 000000000..b0345d144 --- /dev/null +++ b/tools/tool-data.toml @@ -0,0 +1,46 @@ +# full list of numpydoc error codes: https://numpydoc.readthedocs.io/en/latest/validation.html +numpydoc_skip_errors = [ + "SA01", # Parcels doesn't require the "See also" section + "SA04", # + "ES01", # We don't require the extended summary for all docstrings + "EX01", # We don't require the "Examples" section for all docstrings + "SS06", # Not possible to make all summaries one line + + # To be fixed up + "GL03", # Double line break found; please use only one blank line to separate sections or paragraphs, and do not leave blank lines at the end of docstrings + "GL05", # Tabs found at the start of line "{line_with_tabs}", please use whitespace only + "GL06", # Found unknown section "{section}". Allowed sections are: {allowed_sections} + "GL07", # Sections are in the wrong order. Correct order is: {correct_sections} + "GL08", # The object does not have a docstring + "SS01", # No summary found (a short summary in a single line should be present at the beginning of the docstring) + "SS02", # Summary does not start with a capital letter + "SS03", # Summary does not end with a period + "SS04", # Summary contains heading whitespaces + "SS05", # Summary must start with infinitive verb, not third person (e.g. use "Generate" instead of "Generates") + "PR01", # Parameters {missing_params} not documented + "PR02", # Unknown parameters {unknown_params} + "PR03", # Wrong parameters order. Actual: {actual_params}. Documented: {documented_params} + "SA02", # Missing period at end of description for See Also "{reference_name}" reference + "SA03", # Description should be capitalized for See Also + + #? Might conflict with Ruff rules. Needs more testing... Enable ignore if they conflict + # "GL01", # Docstring text (summary) should start in the line immediately after the opening quotes (not in the same line, or leaving a blank line in between) + # "GL02", # Closing quotes should be placed in the line after the last text in the docstring (do not close the quotes in the same line as the text, or leave a blank line between the last text and the quotes) + + # TODO consider whether to continue ignoring the following + "GL09", # Deprecation warning should precede extended summary + "GL10", # reST directives {directives} must be followed by two colons + "PR04", # Parameter "{param_name}" has no type + "PR05", # Parameter "{param_name}" type should not finish with "." + "PR06", # Parameter "{param_name}" type should use "{right_type}" instead of "{wrong_type}" + "PR07", # Parameter "{param_name}" has no description + "PR08", # Parameter "{param_name}" description should start with a capital letter + "PR09", # Parameter "{param_name}" description should finish with "." + "PR10", # Parameter "{param_name}" requires a space before the colon separating the parameter name and type + "RT01", # No Returns section found + "RT02", # The first line of the Returns section should contain only the type, unless multiple values are being returned + "RT03", # Return value has no description + "RT04", # Return value description should start with a capital letter + "RT05", # Return value description should finish with "." + "YD01", # No Yields section found +]