Skip to content
Draft
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
53 changes: 49 additions & 4 deletions src/osekit/public_api/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,6 @@
from pathlib import Path
from typing import TYPE_CHECKING, TypeVar

from pandas import Timestamp

from osekit import config
from osekit.config import DPDEFAULT, resample_quality_settings
from osekit.core_api import audio_file_manager as afm
Expand All @@ -32,6 +30,11 @@
from osekit.utils.path_utils import move_tree

if TYPE_CHECKING:
from collections.abc import Iterable
from os import PathLike

from pandas import Timestamp

from osekit.core_api.audio_file import AudioFile
from osekit.utils.job import JobBuilder

Expand Down Expand Up @@ -104,6 +107,7 @@ def __init__( # noqa: PLR0913
self.job_builder = job_builder
self.instrument = instrument
self.first_file_begin = first_file_begin
self.logger = None

@property
def origin_files(self) -> list[AudioFile] | None:
Expand All @@ -124,7 +128,9 @@ def analyses(self) -> list[str]:
"""Return the list of the names of the analyses ran with this Dataset."""
return list({dataset["analysis"] for dataset in self.datasets.values()})

def build(self) -> None:
def build(
self,
) -> None:
"""Build the Dataset.

Building a dataset moves the original audio files to a specific folder
Expand Down Expand Up @@ -170,7 +176,45 @@ def build(self) -> None:

self.logger.info("Build done!")

def build_from_files(
self,
files: Iterable[PathLike | str],
*,
move_files: bool = False,
) -> None:
"""Build the dataset from the specified files.

The files will be copied (or moved) to the dataset.folder folder.

Parameters
----------
files: Iterable[PathLike|str]
Files that are included in the dataset.
move_files: bool
If set to True, the files will be moved (rather than copied) in the dataset
folder.

"""
self._create_logger()

msg = f"{'Moving' if move_files else 'Copying'} files to the dataset folder."
self.logger.info(msg)

if not self.folder.exists():
self.folder.mkdir(mode=DPDEFAULT)

for file in map(Path, files):
destination = self.folder / file.name
if move_files:
file.replace(destination)
else:
shutil.copyfile(file, destination)

self.build()

def _create_logger(self) -> None:
if self.logger:
return
if not logging.getLogger("dataset").handlers:
message = (
"Logging has not been configured. "
Expand All @@ -183,7 +227,7 @@ def _create_logger(self) -> None:

logs_directory = self.folder / "log"
if not logs_directory.exists():
logs_directory.mkdir(mode=DPDEFAULT)
logs_directory.mkdir(mode=DPDEFAULT, parents=True)
self.logger = logging.getLogger("dataset").getChild(self.folder.name)
file_handler = logging.FileHandler(logs_directory / "logs.log", mode="w")
file_handler.setFormatter(
Expand Down Expand Up @@ -217,6 +261,7 @@ def reset(self) -> None:
file.unlink()

self.datasets = {}
self.logger = None

def get_analysis_audiodataset(self, analysis: Analysis) -> AudioDataset:
"""Return an AudioDataset created from the analysis parameters.
Expand Down