From 357af0005a33bb1f435f32837e477c79a1d96ad2 Mon Sep 17 00:00:00 2001 From: Gautzilla Date: Wed, 7 Jan 2026 17:07:19 +0100 Subject: [PATCH 1/3] add public api Dataset.build_from_files method --- src/osekit/public_api/dataset.py | 42 +++++++++++++++++++++++++++++--- 1 file changed, 39 insertions(+), 3 deletions(-) diff --git a/src/osekit/public_api/dataset.py b/src/osekit/public_api/dataset.py index e284350d..ec992650 100644 --- a/src/osekit/public_api/dataset.py +++ b/src/osekit/public_api/dataset.py @@ -10,11 +10,10 @@ import logging import shutil +from collections.abc import Iterable from pathlib import Path from typing import TYPE_CHECKING, TypeVar -from pandas import Timestamp - from osekit import config from osekit.config import DPDEFAULT, resample_quality_settings from osekit.core_api import audio_file_manager as afm @@ -32,6 +31,10 @@ from osekit.utils.path_utils import move_tree if TYPE_CHECKING: + from os import PathLike + + from pandas import Timestamp + from osekit.core_api.audio_file import AudioFile from osekit.utils.job import JobBuilder @@ -124,7 +127,9 @@ def analyses(self) -> list[str]: """Return the list of the names of the analyses ran with this Dataset.""" return list({dataset["analysis"] for dataset in self.datasets.values()}) - def build(self) -> None: + def build( + self, + ) -> None: """Build the Dataset. Building a dataset moves the original audio files to a specific folder @@ -170,6 +175,37 @@ def build(self) -> None: self.logger.info("Build done!") + def build_from_files( + self, + files: Iterable[PathLike | str], + *, + move_files: bool = False, + ) -> None: + """Build the dataset from the specified files. + + The files will be copied (or moved) to the dataset.folder folder. + + Parameters + ---------- + files: Iterable[PathLike|str] + Files that are included in the dataset. + move_files: bool + If set to True, the files will be moved (rather than copied) in the dataset + folder. + + """ + if not self.folder.exists(): + self.folder.mkdir(mode=DPDEFAULT) + + for file in map(Path, files): + destination = self.folder / file.name + if move_files: + file.replace(destination) + else: + shutil.copyfile(file, destination) + + self.build() + def _create_logger(self) -> None: if not logging.getLogger("dataset").handlers: message = ( From 1e250cd1ffd103e4e17cdb37e02089c169c73994 Mon Sep 17 00:00:00 2001 From: Gautzilla Date: Wed, 7 Jan 2026 17:13:54 +0100 Subject: [PATCH 2/3] setup dataset logger before file manipulation --- src/osekit/public_api/dataset.py | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/src/osekit/public_api/dataset.py b/src/osekit/public_api/dataset.py index ec992650..f9ce0d95 100644 --- a/src/osekit/public_api/dataset.py +++ b/src/osekit/public_api/dataset.py @@ -10,7 +10,6 @@ import logging import shutil -from collections.abc import Iterable from pathlib import Path from typing import TYPE_CHECKING, TypeVar @@ -31,6 +30,7 @@ from osekit.utils.path_utils import move_tree if TYPE_CHECKING: + from collections.abc import Iterable from os import PathLike from pandas import Timestamp @@ -107,6 +107,7 @@ def __init__( # noqa: PLR0913 self.job_builder = job_builder self.instrument = instrument self.first_file_begin = first_file_begin + self.logger = None @property def origin_files(self) -> list[AudioFile] | None: @@ -194,6 +195,11 @@ def build_from_files( folder. """ + self._create_logger() + + msg = f"{'Moving' if move_files else 'Copying'} files to the dataset folder." + self.logger.info(msg) + if not self.folder.exists(): self.folder.mkdir(mode=DPDEFAULT) @@ -207,6 +213,8 @@ def build_from_files( self.build() def _create_logger(self) -> None: + if self.logger: + return if not logging.getLogger("dataset").handlers: message = ( "Logging has not been configured. " @@ -219,7 +227,7 @@ def _create_logger(self) -> None: logs_directory = self.folder / "log" if not logs_directory.exists(): - logs_directory.mkdir(mode=DPDEFAULT) + logs_directory.mkdir(mode=DPDEFAULT, parents=True) self.logger = logging.getLogger("dataset").getChild(self.folder.name) file_handler = logging.FileHandler(logs_directory / "logs.log", mode="w") file_handler.setFormatter( From 30e5839076fd1ee2a0c901c724b0902d5c2d983f Mon Sep 17 00:00:00 2001 From: Gautzilla Date: Wed, 7 Jan 2026 17:39:37 +0100 Subject: [PATCH 3/3] remove dataset logger on Dataset.reset() call --- src/osekit/public_api/dataset.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/osekit/public_api/dataset.py b/src/osekit/public_api/dataset.py index f9ce0d95..790ac2aa 100644 --- a/src/osekit/public_api/dataset.py +++ b/src/osekit/public_api/dataset.py @@ -261,6 +261,7 @@ def reset(self) -> None: file.unlink() self.datasets = {} + self.logger = None def get_analysis_audiodataset(self, analysis: Analysis) -> AudioDataset: """Return an AudioDataset created from the analysis parameters.