Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGES.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
## Changes in 0.0.3 (under development)

- Add the command-line conversion tool convert-enmap (#9)
- Added support for backend methods `open_datatree` and `open_groups_as_dict` (#2)
- Scale reflectance and handle nodata in reader (#8)
- Include band center wavelengths in data and/or metadata (#6)
- Include quicklook data (#7)
Expand Down
71 changes: 62 additions & 9 deletions xarray_enmap/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,10 @@
import shutil
import sys
import tempfile
from collections.abc import Iterable

import xarray

from . import xarray_enmap

LOGGER = logging.getLogger(__name__)
Expand Down Expand Up @@ -55,6 +59,13 @@ def main():
help="Higher Zarr output compression. ~25%% smaller than default compression. "
"Compression process (but not decompression) is much slower.",
)
parser.add_argument(
"--datatree",
"-d",
action="store_true",
help="Whether to write the data as datatree. This parameter is only considered when "
"the parameter zarr-output is given.",
)
parser.add_argument("--verbose", "-v", action="count", default=0)
args = parser.parse_args()
scale_reflectance = not args.raw_reflectance
Expand All @@ -79,6 +90,7 @@ def loglevel(verbosity):
args.tiff_output,
temp_dir,
args.compress,
args.datatree,
scale_reflectance,
)
else:
Expand All @@ -91,6 +103,7 @@ def loglevel(verbosity):
temp_dir,
args.compress,
args.extract_only,
args.datatree,
scale_reflectance,
)

Expand All @@ -101,6 +114,7 @@ def process(
output_dir_tiff: str,
temp_dir: str,
compress: bool = False,
open_as_datatree: bool = False,
scale_reflectance: bool = True,
):
if output_dir_zarr is output_dir_tiff is None:
Expand Down Expand Up @@ -130,13 +144,16 @@ def process(
raise ValueError(
f"{input_filename} is neither a file nor a directory."
)
for data_dir in data_dirs:
if output_dir_tiff is not None:
shutil.copytree(
data_dir, pathlib.Path(output_dir_tiff) / data_dir.name
)
if output_dir_zarr is not None:
write_zarr(data_dir, output_dir_zarr, compress, scale_reflectance)
if output_dir_zarr is not None and open_as_datatree:
write_datatree_as_zarr(input_path, data_dirs, output_dir_zarr, compress, scale_reflectance)
else:
for data_dir in data_dirs:
if output_dir_tiff is not None:
shutil.copytree(
data_dir, pathlib.Path(output_dir_tiff) / data_dir.name
)
if output_dir_zarr is not None:
write_zarr(data_dir, output_dir_zarr, compress, scale_reflectance)


def write_zarr(
Expand All @@ -154,9 +171,45 @@ def write_zarr(
ds = xarray_enmap.read_dataset_from_inner_directory(
data_dir, scale_reflectance
)
store_path = pathlib.Path(output_dir) / (data_dir.name + ".zarr")
zarr_args = _get_zarr_args(compress, store_path)
ds.to_zarr(**zarr_args)


def write_datatree_as_zarr(
input_path: pathlib.Path,
data_dirs: Iterable[pathlib.Path | str],
output_dir: str,
compress: bool = False,
scale_reflectance: bool = True
):
name = input_path.name
LOGGER.info(f"Writing {name} to a Zarr archive...")
suffixes = input_path.suffixes
suffixes.reverse()
for suffix in suffixes:
name = name.removesuffix(suffix)
ensure_module_importable("zarr")
LOGGER.info(
f"Using {'scaled' if scale_reflectance else 'unscaled'} "
f"reflectance."
)
groups = {}
for data_dir in data_dirs:
group_name = data_dir if isinstance(data_dir, str) else data_dir.name
groups[group_name] = xarray_enmap.read_dataset_from_inner_directory(
data_dir, scale_reflectance
)
dt = xarray.DataTree.from_dict(groups)
store_path = pathlib.Path(output_dir) / (name + ".zarr")
zarr_args = _get_zarr_args(compress, store_path)
dt.to_zarr(**zarr_args)


def _get_zarr_args(compress: bool, store_path: str):
zarr_args = {
"zarr_format": 2,
"store": pathlib.Path(output_dir) / (data_dir.name + ".zarr"),
"store": store_path
}
if compress:
ensure_module_importable("numcodecs")
Expand All @@ -169,7 +222,7 @@ def write_zarr(
)
}
}
ds.to_zarr(**zarr_args)
return zarr_args


def ensure_module_importable(module_name: str):
Expand Down
62 changes: 61 additions & 1 deletion xarray_enmap/xarray_enmap.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,10 @@
import zipfile

import shapely
import xarray
import xarray as xr

from xarray import DataTree, Dataset
from xarray.backends import AbstractDataStore

LOGGER = logging.getLogger(__name__)

Expand All @@ -42,6 +44,7 @@

class EnmapEntrypoint(xr.backends.BackendEntrypoint):

supports_groups = True
temp_dir = None

def open_dataset(
Expand Down Expand Up @@ -72,6 +75,34 @@ def open_dataset(
ds.set_close(self.close)
return ds

def open_datatree(
self,
filename_or_obj: str | os.PathLike[Any],
*,
drop_variables: str | Iterable[str] | None = None,
) -> DataTree:
groups = self.open_groups_as_dict(filename_or_obj, drop_variables=drop_variables)
dt = xarray.DataTree.from_dict(data=groups)
return dt

def open_groups_as_dict(
self,
filename_or_obj: str | os.PathLike[Any],
*,
drop_variables: str | Iterable[str] | None = None,
) -> dict[str, Dataset]:
self.temp_dir = tempfile.mkdtemp(prefix="xarray-enmap-")
path = pathlib.Path(filename_or_obj)
if path.is_file():
groups = read_groups_from_archive(filename_or_obj, self.temp_dir)
elif path.is_dir():
groups = read_groups_from_unknown_directory(path, self.temp_dir)
else:
raise ValueError(
f"{filename_or_obj} is neither a path nor a directory."
)
return groups

def close(self):
if self.temp_dir:
shutil.rmtree(self.temp_dir)
Expand Down Expand Up @@ -137,6 +168,35 @@ def read_dataset_from_inner_directory(
return ds


def read_groups_from_archive(
input_filename: str | os.PathLike[Any], temp_dir: str
) -> dict[str, Dataset]:
data_dirs = list(extract_archives(input_filename, temp_dir))
groups = {}
for data_dir in data_dirs:
group_name = data_dir if isinstance(data_dir, str) else data_dir.name
groups[group_name] = read_dataset_from_inner_directory(data_dir)
return groups


def read_groups_from_unknown_directory(
data_dir: str | os.PathLike[Any], temp_dir: str
) -> dict[str, Dataset]:
data_path = pathlib.Path(data_dir)
metadata_files = list(data_path.glob("*METADATA.XML"))
match len(metadata_files):
case 0:
# assume outer directory
return read_groups_from_archive(data_path, temp_dir)
case 1:
# assume inner directory
return dict(
data_path=read_dataset_from_inner_directory(data_path)
)
case _:
raise RuntimeError("Too many METADATA.XML files")


def find_datafiles(data_path: pathlib.Path) -> Mapping[str, pathlib.Path]:
assert data_path.is_dir()
tiffs = list(data_path.glob("*.TIF"))
Expand Down