bcdev · TonioF · Jan 6, 2026 · Jan 9, 2026 · Jan 9, 2026 · Jan 9, 2026
diff --git a/CHANGES.md b/CHANGES.md
@@ -1,6 +1,7 @@
 ## Changes in 0.0.3 (under development)
 
 - Add the command-line conversion tool convert-enmap (#9)
+- Added support for backend methods `open_datatree` and `open_groups_as_dict` (#2)
 - Scale reflectance and handle nodata in reader (#8)
 - Include band center wavelengths in data and/or metadata (#6)
 - Include quicklook data (#7)

diff --git a/xarray_enmap/cli.py b/xarray_enmap/cli.py
@@ -10,6 +10,10 @@
 import shutil
 import sys
 import tempfile
+from collections.abc import Iterable
+
+import xarray
+
 from . import xarray_enmap
 
 LOGGER = logging.getLogger(__name__)
@@ -55,6 +59,13 @@ def main():
         help="Higher Zarr output compression. ~25%% smaller than default compression. "
         "Compression process (but not decompression) is much slower.",
     )
+    parser.add_argument(
+        "--datatree",
+        "-d",
+        action="store_true",
+        help="Whether to write the data as datatree. This parameter is only considered when "
+             "the parameter zarr-output is given.",
+    )
     parser.add_argument("--verbose", "-v", action="count", default=0)
     args = parser.parse_args()
     scale_reflectance = not args.raw_reflectance
@@ -79,6 +90,7 @@ def loglevel(verbosity):
                 args.tiff_output,
                 temp_dir,
                 args.compress,
+                args.datatree,
                 scale_reflectance,
             )
     else:
@@ -91,6 +103,7 @@ def loglevel(verbosity):
             temp_dir,
             args.compress,
             args.extract_only,
+            args.datatree,
             scale_reflectance,
         )
 
@@ -101,6 +114,7 @@ def process(
     output_dir_tiff: str,
     temp_dir: str,
     compress: bool = False,
+    open_as_datatree: bool = False,
     scale_reflectance: bool = True,
 ):
     if output_dir_zarr is output_dir_tiff is None:
@@ -130,13 +144,16 @@ def process(
         raise ValueError(
             f"{input_filename} is neither a file nor a directory."
         )
-    for data_dir in data_dirs:
-        if output_dir_tiff is not None:
-            shutil.copytree(
-                data_dir, pathlib.Path(output_dir_tiff) / data_dir.name
-            )
-        if output_dir_zarr is not None:
-            write_zarr(data_dir, output_dir_zarr, compress, scale_reflectance)
+    if output_dir_zarr is not None and open_as_datatree:
+        write_datatree_as_zarr(input_path, data_dirs, output_dir_zarr, compress, scale_reflectance)
+    else:
+        for data_dir in data_dirs:
+            if output_dir_tiff is not None:
+                shutil.copytree(
+                    data_dir, pathlib.Path(output_dir_tiff) / data_dir.name
+                )
+            if output_dir_zarr is not None:
+                write_zarr(data_dir, output_dir_zarr, compress, scale_reflectance)
 
 
 def write_zarr(
@@ -154,9 +171,45 @@ def write_zarr(
     ds = xarray_enmap.read_dataset_from_inner_directory(
         data_dir, scale_reflectance
     )
+    store_path = pathlib.Path(output_dir) / (data_dir.name + ".zarr")
+    zarr_args = _get_zarr_args(compress, store_path)
+    ds.to_zarr(**zarr_args)
+
+
+def write_datatree_as_zarr(
+    input_path: pathlib.Path,
+        data_dirs: Iterable[pathlib.Path | str],
+        output_dir: str,
+        compress: bool = False,
+        scale_reflectance: bool = True
+):
+    name = input_path.name
+    LOGGER.info(f"Writing {name} to a Zarr archive...")
+    suffixes = input_path.suffixes
+    suffixes.reverse()
+    for suffix in suffixes:
+        name = name.removesuffix(suffix)
+    ensure_module_importable("zarr")
+    LOGGER.info(
+        f"Using {'scaled' if scale_reflectance else 'unscaled'} "
+        f"reflectance."
+    )
+    groups = {}
+    for data_dir in data_dirs:
+        group_name = data_dir if isinstance(data_dir, str) else data_dir.name
+        groups[group_name] = xarray_enmap.read_dataset_from_inner_directory(
+            data_dir, scale_reflectance
+        )
+    dt = xarray.DataTree.from_dict(groups)
+    store_path = pathlib.Path(output_dir) / (name + ".zarr")
+    zarr_args = _get_zarr_args(compress, store_path)
+    dt.to_zarr(**zarr_args)
+
+
+def _get_zarr_args(compress: bool, store_path: str):
     zarr_args = {
         "zarr_format": 2,
-        "store": pathlib.Path(output_dir) / (data_dir.name + ".zarr"),
+        "store": store_path
     }
     if compress:
         ensure_module_importable("numcodecs")
@@ -169,7 +222,7 @@ def write_zarr(
                 )
             }
         }
-    ds.to_zarr(**zarr_args)
+    return zarr_args
 
 
 def ensure_module_importable(module_name: str):

diff --git a/xarray_enmap/xarray_enmap.py b/xarray_enmap/xarray_enmap.py
@@ -18,8 +18,10 @@
 import zipfile
 
 import shapely
+import xarray
 import xarray as xr
-
+from xarray import DataTree, Dataset
+from xarray.backends import AbstractDataStore
 
 LOGGER = logging.getLogger(__name__)
 
@@ -42,6 +44,7 @@
 
 class EnmapEntrypoint(xr.backends.BackendEntrypoint):
 
+    supports_groups = True
     temp_dir = None
 
     def open_dataset(
@@ -72,6 +75,34 @@ def open_dataset(
         ds.set_close(self.close)
         return ds
 
+    def open_datatree(
+        self,
+        filename_or_obj: str | os.PathLike[Any],
+        *,
+        drop_variables: str | Iterable[str] | None = None,
+    ) -> DataTree:
+        groups = self.open_groups_as_dict(filename_or_obj, drop_variables=drop_variables)
+        dt = xarray.DataTree.from_dict(data=groups)
+        return dt
+
+    def open_groups_as_dict(
+        self,
+        filename_or_obj: str | os.PathLike[Any],
+        *,
+        drop_variables: str | Iterable[str] | None = None,
+    ) -> dict[str, Dataset]:
+        self.temp_dir = tempfile.mkdtemp(prefix="xarray-enmap-")
+        path = pathlib.Path(filename_or_obj)
+        if path.is_file():
+            groups = read_groups_from_archive(filename_or_obj, self.temp_dir)
+        elif path.is_dir():
+            groups = read_groups_from_unknown_directory(path, self.temp_dir)
+        else:
+            raise ValueError(
+                f"{filename_or_obj} is neither a path nor a directory."
+            )
+        return groups
+
     def close(self):
         if self.temp_dir:
             shutil.rmtree(self.temp_dir)
@@ -137,6 +168,35 @@ def read_dataset_from_inner_directory(
     return ds
 
 
+def read_groups_from_archive(
+    input_filename: str | os.PathLike[Any], temp_dir: str
+) -> dict[str, Dataset]:
+    data_dirs = list(extract_archives(input_filename, temp_dir))
+    groups = {}
+    for data_dir in data_dirs:
+        group_name = data_dir if isinstance(data_dir, str) else data_dir.name
+        groups[group_name] = read_dataset_from_inner_directory(data_dir)
+    return groups
+
+
+def read_groups_from_unknown_directory(
+    data_dir: str | os.PathLike[Any], temp_dir: str
+) -> dict[str, Dataset]:
+    data_path = pathlib.Path(data_dir)
+    metadata_files = list(data_path.glob("*METADATA.XML"))
+    match len(metadata_files):
+        case 0:
+            # assume outer directory
+            return read_groups_from_archive(data_path, temp_dir)
+        case 1:
+            # assume inner directory
+            return dict(
+                data_path=read_dataset_from_inner_directory(data_path)
+            )
+        case _:
+            raise RuntimeError("Too many METADATA.XML files")
+
+
 def find_datafiles(data_path: pathlib.Path) -> Mapping[str, pathlib.Path]:
     assert data_path.is_dir()
     tiffs = list(data_path.glob("*.TIF"))