Copy tests

ome-zarr-models · Jan 7, 2025 · 08a330a · 08a330a
1 parent 66f8a83
commit 08a330a
Show file tree

Hide file tree

Showing 28 changed files with 1,824 additions and 0 deletions.
diff --git a/tests/v05/conftest.py b/tests/v05/conftest.py
@@ -0,0 +1,258 @@
+from collections.abc import Sequence
+from pathlib import Path
+from typing import Any, Literal, TypeVar
+
+import numcodecs
+import numpy as np
+import numpy.typing as npt
+from numcodecs.abc import Codec
+from pydantic_zarr.v2 import ArraySpec, GroupSpec
+from zarr.util import guess_chunks
+
+from ome_zarr_models.base import BaseAttrs
+from ome_zarr_models.v04.axes import Axis
+from ome_zarr_models.v04.image import Image, ImageAttrs
+from ome_zarr_models.v04.multiscales import (
+    Dataset,
+    Multiscale,
+)
+
+T = TypeVar("T", bound=BaseAttrs)
+
+
+def read_in_json(*, json_fname: str, model_cls: type[T]) -> T:
+    with open(Path(__file__).parent / "data" / json_fname) as f:
+        return model_cls.model_validate_json(f.read())
+
+
+def normalize_chunks(
+    chunks: Any,
+    shapes: tuple[tuple[int, ...], ...],
+    typesizes: tuple[int, ...],
+) -> tuple[tuple[int, ...], ...]:
+    """
+    If chunks is "auto", then use zarr default chunking based on the
+    largest array for all the arrays.
+    If chunks is a sequence of ints, then use those chunks for all arrays.
+    If chunks is a sequence of sequences of ints, then use those chunks for each array.
+    """
+    if chunks == "auto":
+        # sort shapes by descending size
+        params_sorted_descending = sorted(
+            zip(shapes, typesizes, strict=False),
+            key=lambda v: np.prod(v[0]),  # type: ignore[return-value, arg-type]
+            reverse=True,
+        )
+        return (guess_chunks(*params_sorted_descending[0]),) * len(shapes)
+    if isinstance(chunks, Sequence):
+        if all(isinstance(element, int) for element in chunks):
+            return (tuple(chunks),) * len(shapes)
+        if all(isinstance(element, Sequence) for element in chunks):
+            if all(all(isinstance(k, int) for k in v) for v in chunks):
+                return tuple(map(tuple, chunks))
+            else:
+                msg = f"Expected a sequence of sequences of ints. Got {chunks} instead."
+                raise ValueError(msg)
+    msg = f'Input must be a sequence or the string "auto". Got {type(chunks)}'
+    raise TypeError(msg)
+
+
+def from_arrays(
+    arrays: Sequence[npt.NDArray[Any]],
+    *,
+    paths: Sequence[str],
+    axes: Sequence[Axis],
+    scales: Sequence[tuple[int | float, ...]],
+    translations: Sequence[tuple[int | float, ...]],
+    name: str | None = None,
+    type: str | None = None,
+    metadata: dict[str, Any] | None = None,
+    chunks: tuple[int, ...] | tuple[tuple[int, ...], ...] | Literal["auto"] = "auto",
+    compressor: Codec | Literal["auto"] = "auto",
+    fill_value: Any = 0,
+    order: Literal["C", "F", "auto"] = "auto",
+) -> Image:
+    """
+    Create a `Image` from a sequence of multiscale arrays
+    and spatial metadata.
+
+    The arrays are used as templates for corresponding `ArraySpec` instances,
+    which model the Zarr arrays that would be created if the `Image`
+    was stored.
+
+    Parameters
+    ----------
+    paths: Sequence[str]
+        The paths to the arrays.
+    axes: Sequence[Axis]
+        `Axis` objects describing the dimensions of the arrays.
+    arrays: Sequence[ArrayLike] | Sequence[ChunkedArrayLike]
+        A sequence of array-like objects that collectively represent the same image
+        at multiple levels of detail.
+        The attributes of these arrays are used to create `ArraySpec` objects
+        that model Zarr arrays stored in the Zarr group.
+    scales: Sequence[Sequence[int | float]]
+        A scale value for each axis of the array, for each array in `arrays`.
+    translations: Sequence[Sequence[int | float]]
+        A translation value for each axis the array, for each array in `arrays`.
+    name: str | None, default = None
+        A name for the multiscale collection. Optional.
+    type: str | None, default = None
+        A description of the type of multiscale image represented by this group.
+        Optional.
+    metadata: Dict[str, Any] | None, default = None
+        Arbitrary metadata associated with this multiscale collection. Optional.
+    chunks: tuple[int] | tuple[tuple[int, ...]] | Literal["auto"], default = "auto"
+        The chunks for the arrays in this multiscale group.
+        If the string "auto" is provided, each array will have chunks set to the
+        zarr-python default value, which depends on the shape and dtype of the array.
+        If a single sequence of ints is provided, then this defines the
+        chunks for all arrays. If a sequence of sequences of ints is provided,
+        then this defines the chunks for each array.
+    fill_value: Any, default = 0
+        The fill value for the Zarr arrays.
+    compressor: `Codec` | "auto", default = `numcodecs.ZStd`
+        The compressor to use for the arrays. Default is `numcodecs.ZStd`.
+    order: "auto" | "C" | "F"
+        The memory layout used for chunks of Zarr arrays.
+        The default is "auto", which will infer the order from the input arrays,
+        and fall back to "C" if that inference fails.
+    """
+
+    chunks_normalized = normalize_chunks(
+        chunks,
+        shapes=tuple(s.shape for s in arrays),
+        typesizes=tuple(s.dtype.itemsize for s in arrays),
+    )
+
+    members_flat = {
+        "/" + key.lstrip("/"): ArraySpec.from_array(
+            array=arr,
+            chunks=cnks,
+            attributes={},
+            compressor=compressor,
+            filters=None,
+            fill_value=fill_value,
+            order=order,
+        )
+        for key, arr, cnks in zip(paths, arrays, chunks_normalized, strict=False)
+    }
+
+    multimeta = Multiscale(
+        name=name,
+        type=type,
+        metadata=metadata,
+        axes=tuple(axes),
+        datasets=tuple(
+            Dataset.build(path=path, scale=scale, translation=translation)
+            for path, scale, translation in zip(
+                paths, scales, translations, strict=False
+            )
+        ),
+        coordinateTransformations=None,
+    )
+    return Image(
+        members=GroupSpec.from_flat(members_flat).members,
+        attributes=ImageAttrs(multiscales=(multimeta,)),
+    )
+
+
+def from_array_props(
+    dtype: npt.DTypeLike,
+    shapes: Sequence[Sequence[int]],
+    paths: Sequence[str],
+    axes: Sequence[Axis],
+    scales: Sequence[tuple[int | float, ...]],
+    translations: Sequence[tuple[int | float, ...]],
+    name: str | None = None,
+    type: str | None = None,
+    metadata: dict[str, Any] | None = None,
+    chunks: tuple[int, ...] | tuple[tuple[int, ...], ...] | Literal["auto"] = "auto",
+    compressor: Codec | Literal["auto"] = "auto",
+    fill_value: Any = 0,
+    order: Literal["C", "F"] = "C",
+) -> Image:
+    """
+    Create a `Image` from a dtype and a sequence of shapes.
+
+    The dtype and shapes are used to parametrize `ArraySpec` instances which model the
+    Zarr arrays that would be created if the `Image` was stored.
+
+    Parameters
+    ----------
+    dtype: np.dtype[Any]
+        The data type of the arrays.
+    shapes: Sequence[Sequence[str]]
+        The shapes of the arrays.
+    paths: Sequence[str]
+        The paths to the arrays.
+    axes: Sequence[Axis]
+        `Axis` objects describing the dimensions of the arrays.
+    scales: Sequence[Sequence[int | float]]
+        A scale value for each axis of the array, for each shape in `shapes`.
+    translations: Sequence[Sequence[int | float]]
+        A translation value for each axis the array, for each shape in `shapes`.
+    name: str | None, default = None
+        A name for the multiscale collection. Optional.
+    type: str | None, default = None
+        A description of the type of multiscale image represented by this group.
+        Optional.
+    metadata: Dict[str, Any] | None, default = None
+        Arbitrary metadata associated with this multiscale collection. Optional.
+    chunks: tuple[int] | tuple[tuple[int, ...]] | Literal["auto"], default = "auto"
+        The chunks for the arrays in this multiscale group.
+        If the string "auto" is provided, each array will have chunks set to the
+        zarr-python default value, which depends on the shape and dtype of the array.
+        If a single sequence of ints is provided, then this defines the chunks for
+        all arrays. If a sequence of sequences of ints is provided, then this defines
+        the chunks for each array.
+    fill_value: Any, default = 0
+        The fill value for the Zarr arrays.
+    compressor: `Codec`
+        The compressor to use for the arrays. Default is `numcodecs.ZStd`.
+    order: "C" | "F", default = "C"
+        The memory layout used for chunks of Zarr arrays. The default is "C".
+    """
+
+    dtype_normalized = np.dtype(dtype)
+    if compressor == "auto":
+        compressor_parsed = numcodecs.Zstd(level=3)
+    else:
+        compressor_parsed = compressor
+    chunks_normalized = normalize_chunks(
+        chunks,
+        shapes=tuple(tuple(s) for s in shapes),
+        typesizes=tuple(dtype_normalized.itemsize for s in shapes),
+    )
+
+    members_flat = {
+        "/" + key.lstrip("/"): ArraySpec(
+            dtype=dtype,
+            shape=shape,
+            chunks=cnks,
+            attributes={},
+            compressor=compressor_parsed,
+            filters=None,
+            fill_value=fill_value,
+            order=order,
+        )
+        for key, shape, cnks in zip(paths, shapes, chunks_normalized, strict=False)
+    }
+
+    multimeta = Multiscale(
+        name=name,
+        type=type,
+        metadata=metadata,
+        axes=tuple(axes),
+        datasets=tuple(
+            Dataset.build(path=path, scale=scale, translation=translation)
+            for path, scale, translation in zip(
+                paths, scales, translations, strict=False
+            )
+        ),
+        coordinateTransformations=None,
+    )
+    return Image(
+        members=GroupSpec.from_flat(members_flat).members,
+        attributes=ImageAttrs(multiscales=(multimeta,)),
+    )
diff --git a/tests/v05/data/bioformats2raw_example.json b/tests/v05/data/bioformats2raw_example.json
@@ -0,0 +1,30 @@
+{
+  "bioformats2raw.layout": 3,
+  "plate": {
+    "columns": [
+      {
+        "name": "1"
+      }
+    ],
+    "name": "Plate Name 0",
+    "wells": [
+      {
+        "path": "A/1",
+        "rowIndex": 0,
+        "columnIndex": 0
+      }
+    ],
+    "field_count": 1,
+    "rows": [
+      {
+        "name": "A"
+      }
+    ],
+    "acquisitions": [
+      {
+        "id": 0
+      }
+    ],
+    "version": "0.4"
+  }
+}
diff --git a/tests/v05/data/hcs_example.ome.zarr/.zattrs b/tests/v05/data/hcs_example.ome.zarr/.zattrs
@@ -0,0 +1,28 @@
+{
+    "plate": {
+        "acquisitions": [
+            {
+                "id": 0,
+                "name": "20200812-CardiomyocyteDifferentiation14-Cycle1"
+            }
+        ],
+        "columns": [
+            {
+                "name": "03"
+            }
+        ],
+        "rows": [
+            {
+                "name": "B"
+            }
+        ],
+        "version": "0.4",
+        "wells": [
+            {
+                "columnIndex": 0,
+                "path": "B/03",
+                "rowIndex": 0
+            }
+        ]
+    }
+}
diff --git a/tests/v05/data/hcs_example.ome.zarr/.zgroup b/tests/v05/data/hcs_example.ome.zarr/.zgroup
@@ -0,0 +1,3 @@
+{
+    "zarr_format": 2
+}
diff --git a/tests/v05/data/hcs_example.ome.zarr/B/.zgroup b/tests/v05/data/hcs_example.ome.zarr/B/.zgroup
@@ -0,0 +1,3 @@
+{
+    "zarr_format": 2
+}
diff --git a/tests/v05/data/hcs_example.ome.zarr/B/03/.zattrs b/tests/v05/data/hcs_example.ome.zarr/B/03/.zattrs
@@ -0,0 +1,10 @@
+{
+    "well": {
+        "images": [
+            {
+                "path": "0"
+            }
+        ],
+        "version": "0.4"
+    }
+}
diff --git a/tests/v05/data/hcs_example.ome.zarr/B/03/.zgroup b/tests/v05/data/hcs_example.ome.zarr/B/03/.zgroup
@@ -0,0 +1,3 @@
+{
+    "zarr_format": 2
+}