diff --git a/tests/v05/conftest.py b/tests/v05/conftest.py new file mode 100644 index 0000000..05ccbcf --- /dev/null +++ b/tests/v05/conftest.py @@ -0,0 +1,258 @@ +from collections.abc import Sequence +from pathlib import Path +from typing import Any, Literal, TypeVar + +import numcodecs +import numpy as np +import numpy.typing as npt +from numcodecs.abc import Codec +from pydantic_zarr.v2 import ArraySpec, GroupSpec +from zarr.util import guess_chunks + +from ome_zarr_models.base import BaseAttrs +from ome_zarr_models.v04.axes import Axis +from ome_zarr_models.v04.image import Image, ImageAttrs +from ome_zarr_models.v04.multiscales import ( + Dataset, + Multiscale, +) + +T = TypeVar("T", bound=BaseAttrs) + + +def read_in_json(*, json_fname: str, model_cls: type[T]) -> T: + with open(Path(__file__).parent / "data" / json_fname) as f: + return model_cls.model_validate_json(f.read()) + + +def normalize_chunks( + chunks: Any, + shapes: tuple[tuple[int, ...], ...], + typesizes: tuple[int, ...], +) -> tuple[tuple[int, ...], ...]: + """ + If chunks is "auto", then use zarr default chunking based on the + largest array for all the arrays. + If chunks is a sequence of ints, then use those chunks for all arrays. + If chunks is a sequence of sequences of ints, then use those chunks for each array. + """ + if chunks == "auto": + # sort shapes by descending size + params_sorted_descending = sorted( + zip(shapes, typesizes, strict=False), + key=lambda v: np.prod(v[0]), # type: ignore[return-value, arg-type] + reverse=True, + ) + return (guess_chunks(*params_sorted_descending[0]),) * len(shapes) + if isinstance(chunks, Sequence): + if all(isinstance(element, int) for element in chunks): + return (tuple(chunks),) * len(shapes) + if all(isinstance(element, Sequence) for element in chunks): + if all(all(isinstance(k, int) for k in v) for v in chunks): + return tuple(map(tuple, chunks)) + else: + msg = f"Expected a sequence of sequences of ints. Got {chunks} instead." + raise ValueError(msg) + msg = f'Input must be a sequence or the string "auto". Got {type(chunks)}' + raise TypeError(msg) + + +def from_arrays( + arrays: Sequence[npt.NDArray[Any]], + *, + paths: Sequence[str], + axes: Sequence[Axis], + scales: Sequence[tuple[int | float, ...]], + translations: Sequence[tuple[int | float, ...]], + name: str | None = None, + type: str | None = None, + metadata: dict[str, Any] | None = None, + chunks: tuple[int, ...] | tuple[tuple[int, ...], ...] | Literal["auto"] = "auto", + compressor: Codec | Literal["auto"] = "auto", + fill_value: Any = 0, + order: Literal["C", "F", "auto"] = "auto", +) -> Image: + """ + Create a `Image` from a sequence of multiscale arrays + and spatial metadata. + + The arrays are used as templates for corresponding `ArraySpec` instances, + which model the Zarr arrays that would be created if the `Image` + was stored. + + Parameters + ---------- + paths: Sequence[str] + The paths to the arrays. + axes: Sequence[Axis] + `Axis` objects describing the dimensions of the arrays. + arrays: Sequence[ArrayLike] | Sequence[ChunkedArrayLike] + A sequence of array-like objects that collectively represent the same image + at multiple levels of detail. + The attributes of these arrays are used to create `ArraySpec` objects + that model Zarr arrays stored in the Zarr group. + scales: Sequence[Sequence[int | float]] + A scale value for each axis of the array, for each array in `arrays`. + translations: Sequence[Sequence[int | float]] + A translation value for each axis the array, for each array in `arrays`. + name: str | None, default = None + A name for the multiscale collection. Optional. + type: str | None, default = None + A description of the type of multiscale image represented by this group. + Optional. + metadata: Dict[str, Any] | None, default = None + Arbitrary metadata associated with this multiscale collection. Optional. + chunks: tuple[int] | tuple[tuple[int, ...]] | Literal["auto"], default = "auto" + The chunks for the arrays in this multiscale group. + If the string "auto" is provided, each array will have chunks set to the + zarr-python default value, which depends on the shape and dtype of the array. + If a single sequence of ints is provided, then this defines the + chunks for all arrays. If a sequence of sequences of ints is provided, + then this defines the chunks for each array. + fill_value: Any, default = 0 + The fill value for the Zarr arrays. + compressor: `Codec` | "auto", default = `numcodecs.ZStd` + The compressor to use for the arrays. Default is `numcodecs.ZStd`. + order: "auto" | "C" | "F" + The memory layout used for chunks of Zarr arrays. + The default is "auto", which will infer the order from the input arrays, + and fall back to "C" if that inference fails. + """ + + chunks_normalized = normalize_chunks( + chunks, + shapes=tuple(s.shape for s in arrays), + typesizes=tuple(s.dtype.itemsize for s in arrays), + ) + + members_flat = { + "/" + key.lstrip("/"): ArraySpec.from_array( + array=arr, + chunks=cnks, + attributes={}, + compressor=compressor, + filters=None, + fill_value=fill_value, + order=order, + ) + for key, arr, cnks in zip(paths, arrays, chunks_normalized, strict=False) + } + + multimeta = Multiscale( + name=name, + type=type, + metadata=metadata, + axes=tuple(axes), + datasets=tuple( + Dataset.build(path=path, scale=scale, translation=translation) + for path, scale, translation in zip( + paths, scales, translations, strict=False + ) + ), + coordinateTransformations=None, + ) + return Image( + members=GroupSpec.from_flat(members_flat).members, + attributes=ImageAttrs(multiscales=(multimeta,)), + ) + + +def from_array_props( + dtype: npt.DTypeLike, + shapes: Sequence[Sequence[int]], + paths: Sequence[str], + axes: Sequence[Axis], + scales: Sequence[tuple[int | float, ...]], + translations: Sequence[tuple[int | float, ...]], + name: str | None = None, + type: str | None = None, + metadata: dict[str, Any] | None = None, + chunks: tuple[int, ...] | tuple[tuple[int, ...], ...] | Literal["auto"] = "auto", + compressor: Codec | Literal["auto"] = "auto", + fill_value: Any = 0, + order: Literal["C", "F"] = "C", +) -> Image: + """ + Create a `Image` from a dtype and a sequence of shapes. + + The dtype and shapes are used to parametrize `ArraySpec` instances which model the + Zarr arrays that would be created if the `Image` was stored. + + Parameters + ---------- + dtype: np.dtype[Any] + The data type of the arrays. + shapes: Sequence[Sequence[str]] + The shapes of the arrays. + paths: Sequence[str] + The paths to the arrays. + axes: Sequence[Axis] + `Axis` objects describing the dimensions of the arrays. + scales: Sequence[Sequence[int | float]] + A scale value for each axis of the array, for each shape in `shapes`. + translations: Sequence[Sequence[int | float]] + A translation value for each axis the array, for each shape in `shapes`. + name: str | None, default = None + A name for the multiscale collection. Optional. + type: str | None, default = None + A description of the type of multiscale image represented by this group. + Optional. + metadata: Dict[str, Any] | None, default = None + Arbitrary metadata associated with this multiscale collection. Optional. + chunks: tuple[int] | tuple[tuple[int, ...]] | Literal["auto"], default = "auto" + The chunks for the arrays in this multiscale group. + If the string "auto" is provided, each array will have chunks set to the + zarr-python default value, which depends on the shape and dtype of the array. + If a single sequence of ints is provided, then this defines the chunks for + all arrays. If a sequence of sequences of ints is provided, then this defines + the chunks for each array. + fill_value: Any, default = 0 + The fill value for the Zarr arrays. + compressor: `Codec` + The compressor to use for the arrays. Default is `numcodecs.ZStd`. + order: "C" | "F", default = "C" + The memory layout used for chunks of Zarr arrays. The default is "C". + """ + + dtype_normalized = np.dtype(dtype) + if compressor == "auto": + compressor_parsed = numcodecs.Zstd(level=3) + else: + compressor_parsed = compressor + chunks_normalized = normalize_chunks( + chunks, + shapes=tuple(tuple(s) for s in shapes), + typesizes=tuple(dtype_normalized.itemsize for s in shapes), + ) + + members_flat = { + "/" + key.lstrip("/"): ArraySpec( + dtype=dtype, + shape=shape, + chunks=cnks, + attributes={}, + compressor=compressor_parsed, + filters=None, + fill_value=fill_value, + order=order, + ) + for key, shape, cnks in zip(paths, shapes, chunks_normalized, strict=False) + } + + multimeta = Multiscale( + name=name, + type=type, + metadata=metadata, + axes=tuple(axes), + datasets=tuple( + Dataset.build(path=path, scale=scale, translation=translation) + for path, scale, translation in zip( + paths, scales, translations, strict=False + ) + ), + coordinateTransformations=None, + ) + return Image( + members=GroupSpec.from_flat(members_flat).members, + attributes=ImageAttrs(multiscales=(multimeta,)), + ) diff --git a/tests/v05/data/bioformats2raw_example.json b/tests/v05/data/bioformats2raw_example.json new file mode 100644 index 0000000..33fb6b6 --- /dev/null +++ b/tests/v05/data/bioformats2raw_example.json @@ -0,0 +1,30 @@ +{ + "bioformats2raw.layout": 3, + "plate": { + "columns": [ + { + "name": "1" + } + ], + "name": "Plate Name 0", + "wells": [ + { + "path": "A/1", + "rowIndex": 0, + "columnIndex": 0 + } + ], + "field_count": 1, + "rows": [ + { + "name": "A" + } + ], + "acquisitions": [ + { + "id": 0 + } + ], + "version": "0.4" + } +} diff --git a/tests/v05/data/hcs_example.ome.zarr/.zattrs b/tests/v05/data/hcs_example.ome.zarr/.zattrs new file mode 100644 index 0000000..810890a --- /dev/null +++ b/tests/v05/data/hcs_example.ome.zarr/.zattrs @@ -0,0 +1,28 @@ +{ + "plate": { + "acquisitions": [ + { + "id": 0, + "name": "20200812-CardiomyocyteDifferentiation14-Cycle1" + } + ], + "columns": [ + { + "name": "03" + } + ], + "rows": [ + { + "name": "B" + } + ], + "version": "0.4", + "wells": [ + { + "columnIndex": 0, + "path": "B/03", + "rowIndex": 0 + } + ] + } +} \ No newline at end of file diff --git a/tests/v05/data/hcs_example.ome.zarr/.zgroup b/tests/v05/data/hcs_example.ome.zarr/.zgroup new file mode 100644 index 0000000..3b7daf2 --- /dev/null +++ b/tests/v05/data/hcs_example.ome.zarr/.zgroup @@ -0,0 +1,3 @@ +{ + "zarr_format": 2 +} \ No newline at end of file diff --git a/tests/v05/data/hcs_example.ome.zarr/B/.zgroup b/tests/v05/data/hcs_example.ome.zarr/B/.zgroup new file mode 100644 index 0000000..3b7daf2 --- /dev/null +++ b/tests/v05/data/hcs_example.ome.zarr/B/.zgroup @@ -0,0 +1,3 @@ +{ + "zarr_format": 2 +} \ No newline at end of file diff --git a/tests/v05/data/hcs_example.ome.zarr/B/03/.zattrs b/tests/v05/data/hcs_example.ome.zarr/B/03/.zattrs new file mode 100644 index 0000000..752d788 --- /dev/null +++ b/tests/v05/data/hcs_example.ome.zarr/B/03/.zattrs @@ -0,0 +1,10 @@ +{ + "well": { + "images": [ + { + "path": "0" + } + ], + "version": "0.4" + } +} \ No newline at end of file diff --git a/tests/v05/data/hcs_example.ome.zarr/B/03/.zgroup b/tests/v05/data/hcs_example.ome.zarr/B/03/.zgroup new file mode 100644 index 0000000..3b7daf2 --- /dev/null +++ b/tests/v05/data/hcs_example.ome.zarr/B/03/.zgroup @@ -0,0 +1,3 @@ +{ + "zarr_format": 2 +} \ No newline at end of file diff --git a/tests/v05/data/hcs_example.ome.zarr/B/03/0/.zattrs b/tests/v05/data/hcs_example.ome.zarr/B/03/0/.zattrs new file mode 100644 index 0000000..f954045 --- /dev/null +++ b/tests/v05/data/hcs_example.ome.zarr/B/03/0/.zattrs @@ -0,0 +1,118 @@ +{ + "multiscales": [ + { + "axes": [ + { + "name": "c", + "type": "channel" + }, + { + "name": "z", + "type": "space", + "unit": "micrometer" + }, + { + "name": "y", + "type": "space", + "unit": "micrometer" + }, + { + "name": "x", + "type": "space", + "unit": "micrometer" + } + ], + "datasets": [ + { + "coordinateTransformations": [ + { + "scale": [ + 1, + 1.0, + 0.1625, + 0.1625 + ], + "type": "scale" + } + ], + "path": "0" + }, + { + "coordinateTransformations": [ + { + "scale": [ + 1, + 1.0, + 0.325, + 0.325 + ], + "type": "scale" + } + ], + "path": "1" + }, + { + "coordinateTransformations": [ + { + "scale": [ + 1, + 1.0, + 0.65, + 0.65 + ], + "type": "scale" + } + ], + "path": "2" + }, + { + "coordinateTransformations": [ + { + "scale": [ + 1, + 1.0, + 1.3, + 1.3 + ], + "type": "scale" + } + ], + "path": "3" + }, + { + "coordinateTransformations": [ + { + "scale": [ + 1, + 1.0, + 2.6, + 2.6 + ], + "type": "scale" + } + ], + "path": "4" + } + ], + "version": "0.4" + } + ], + "omero": { + "channels": [ + { + "color": "00FFFF", + "label": "DAPI", + "wavelength_id": "A01_C01", + "window": { + "end": 800, + "max": 65535, + "min": 0, + "start": 110 + } + } + ], + "id": 1, + "name": "TBD", + "version": "0.4" + } +} \ No newline at end of file diff --git a/tests/v05/data/hcs_example.ome.zarr/B/03/0/.zgroup b/tests/v05/data/hcs_example.ome.zarr/B/03/0/.zgroup new file mode 100644 index 0000000..3b7daf2 --- /dev/null +++ b/tests/v05/data/hcs_example.ome.zarr/B/03/0/.zgroup @@ -0,0 +1,3 @@ +{ + "zarr_format": 2 +} \ No newline at end of file diff --git a/tests/v05/data/hcs_example.ome.zarr/B/03/0/0/.zarray b/tests/v05/data/hcs_example.ome.zarr/B/03/0/0/.zarray new file mode 100644 index 0000000..d270a33 --- /dev/null +++ b/tests/v05/data/hcs_example.ome.zarr/B/03/0/0/.zarray @@ -0,0 +1,27 @@ +{ + "chunks": [ + 1, + 1, + 2160, + 2560 + ], + "compressor": { + "blocksize": 0, + "clevel": 5, + "cname": "lz4", + "id": "blosc", + "shuffle": 1 + }, + "dimension_separator": "/", + "dtype": " None: + model = read_in_json( + json_fname="bioformats2raw_example.json", model_cls=BioFormats2RawAttrs + ) + + assert model == BioFormats2RawAttrs( + bioformats2raw_layout=3, + plate=Plate( + acquisitions=[ + Acquisition(id=0, maximumfieldcount=None, name=None, description=None) + ], + columns=[Column(name="1")], + field_count=1, + name="Plate Name 0", + rows=[Row(name="A")], + version="0.4", + wells=[WellInPlate(path="A/1", rowIndex=0, columnIndex=0)], + ), + series=None, + ) diff --git a/tests/v05/test_hcs.py b/tests/v05/test_hcs.py new file mode 100644 index 0000000..c7b8f2f --- /dev/null +++ b/tests/v05/test_hcs.py @@ -0,0 +1,110 @@ +from pathlib import Path + +import zarr + +from ome_zarr_models.v04.axes import Axis +from ome_zarr_models.v04.coordinate_transformations import VectorScale +from ome_zarr_models.v04.hcs import HCS, HCSAttrs +from ome_zarr_models.v04.image import ImageAttrs +from ome_zarr_models.v04.multiscales import Dataset, Multiscale +from ome_zarr_models.v04.omero import Channel, Omero, Window +from ome_zarr_models.v04.plate import Acquisition, Column, Plate, Row, WellInPlate +from ome_zarr_models.v04.well_types import WellImage, WellMeta + + +def test_example_hcs() -> None: + group = zarr.open(Path(__file__).parent / "data" / "hcs_example.ome.zarr", mode="r") + hcs = HCS.from_zarr(group) + assert hcs.attributes == HCSAttrs( + plate=Plate( + acquisitions=[ + Acquisition( + id=0, + name="20200812-CardiomyocyteDifferentiation14-Cycle1", + maximumfieldcount=None, + description=None, + starttime=None, + endtime=None, + ) + ], + columns=[Column(name="03")], + field_count=None, + name=None, + rows=[Row(name="B")], + version="0.4", + wells=[WellInPlate(path="B/03", rowIndex=0, columnIndex=0)], + ) + ) + + well_groups = list(hcs.well_groups) + assert len(well_groups) == 1 + well_group = well_groups[0] + assert well_group.attributes.well == WellMeta( + images=[WellImage(path="0", acquisition=None)], version="0.4" + ) + + images = list(well_group.images) + assert len(images) == 1 + + assert images[0].attributes == ImageAttrs( + multiscales=[ + Multiscale( + axes=[ + Axis(name="c", type="channel", unit=None), + Axis(name="z", type="space", unit="micrometer"), + Axis(name="y", type="space", unit="micrometer"), + Axis(name="x", type="space", unit="micrometer"), + ], + datasets=[ + Dataset( + path="0", + coordinateTransformations=( + VectorScale(type="scale", scale=[1.0, 1.0, 0.1625, 0.1625]), + ), + ), + Dataset( + path="1", + coordinateTransformations=( + VectorScale(type="scale", scale=[1.0, 1.0, 0.325, 0.325]), + ), + ), + Dataset( + path="2", + coordinateTransformations=( + VectorScale(type="scale", scale=[1.0, 1.0, 0.65, 0.65]), + ), + ), + Dataset( + path="3", + coordinateTransformations=( + VectorScale(type="scale", scale=[1.0, 1.0, 1.3, 1.3]), + ), + ), + Dataset( + path="4", + coordinateTransformations=( + VectorScale(type="scale", scale=[1.0, 1.0, 2.6, 2.6]), + ), + ), + ], + version="0.4", + coordinateTransformations=None, + metadata=None, + name=None, + type=None, + ) + ], + omero=Omero( + channels=[ + Channel( + color="00FFFF", + window=Window(max=65535.0, min=0.0, start=110.0, end=800.0), + label="DAPI", + wavelength_id="A01_C01", + ) + ], + id=1, + name="TBD", + version="0.4", + ), + ) diff --git a/tests/v05/test_image_label.py b/tests/v05/test_image_label.py new file mode 100644 index 0000000..f70ff31 --- /dev/null +++ b/tests/v05/test_image_label.py @@ -0,0 +1,47 @@ +import pytest +from pydantic import ValidationError + +from ome_zarr_models.v04.image_label_types import ( + Color, + Label, + Property, + Source, +) +from tests.v04.conftest import read_in_json + + +def test_image_label_example_json() -> None: + model = read_in_json(json_fname="image_label_example.json", model_cls=Label) + + assert model == Label( + colors=( + Color(label_value=1, rgba=(255, 255, 255, 255)), + Color(label_value=4, rgba=(0, 255, 255, 128)), + ), + properties=( + Property(label_value=1, area=1200, cls="foo"), + Property(label_value=4, area=1650), + ), + source=Source(image="../../"), + version="0.4", + ) + + +def test_invalid_label() -> None: + """ + > Each color object MUST contain the label-value key whose value MUST be an integer + > specifying the pixel value for that label + """ + with pytest.raises(ValidationError, match="Input should be a valid integer"): + Color(label_value="abc", rgba=(255, 255, 255, 255)) + + +def test_invalid_rgba() -> None: + """ + > MUST be an array of four integers between 0 and 255 [uint8, uint8, uint8, uint8] + > specifying the label color as RGBA + """ + with pytest.raises( + ValidationError, match="Input should be less than or equal to 255" + ): + Color(label_value=1, rgba=(255, 255, 3412, 255)) diff --git a/tests/v05/test_labels.py b/tests/v05/test_labels.py new file mode 100644 index 0000000..f67bd2b --- /dev/null +++ b/tests/v05/test_labels.py @@ -0,0 +1,15 @@ +import zarr + +from ome_zarr_models.v04.image import Image +from ome_zarr_models.v04.labels import Labels, LabelsAttrs + + +def test_labels() -> None: + # TODO: turn this into a local test + group = zarr.open_group( + "https://uk1s3.embassy.ebi.ac.uk/idr/zarr/v0.4/idr0062A/6001240.zarr", mode="r" + ) + image = Image.from_zarr(group) + assert image.labels == Labels( + zarr_version=2, attributes=LabelsAttrs(labels=["0"]), members={} + ) diff --git a/tests/v05/test_multiscales.py b/tests/v05/test_multiscales.py new file mode 100644 index 0000000..5a1948b --- /dev/null +++ b/tests/v05/test_multiscales.py @@ -0,0 +1,567 @@ +from __future__ import annotations + +import re +from typing import TYPE_CHECKING, Any + +import numpy as np +import pytest +from pydantic import ValidationError +from pydantic_zarr.v2 import ArraySpec, GroupSpec + +from ome_zarr_models.v04.axes import Axis +from ome_zarr_models.v04.coordinate_transformations import ( + VectorScale, + VectorTranslation, + _build_transforms, +) +from ome_zarr_models.v04.image import Image, ImageAttrs +from ome_zarr_models.v04.multiscales import ( + Dataset, + Multiscale, +) +from tests.v04.conftest import from_array_props, from_arrays + +if TYPE_CHECKING: + from typing import Literal + +DEFAULT_UNITS_MAP = {"space": "meter", "time": "second"} + + +@pytest.fixture +def default_multiscale() -> Multiscale: + """ + Return a valid Multiscale object. + """ + axes = ( + Axis(name="c", type="channel", unit=None), + Axis(name="z", type="space", unit="meter"), + Axis(name="x", type="space", unit="meter"), + Axis(name="y", type="space", unit="meter"), + ) + rank = len(axes) + transforms_top = _build_transforms(scale=(1,) * rank, translation=None) + transforms_dset = _build_transforms(scale=(1,) * rank, translation=(0,) * rank) + num_datasets = 3 + datasets = tuple( + Dataset(path=f"path{idx}", coordinateTransformations=transforms_dset) + for idx in range(num_datasets) + ) + + multi = Multiscale( + axes=axes, + datasets=datasets, + coordinateTransformations=transforms_top, + ) + return multi + + +def test_immutable(default_multiscale: Multiscale) -> None: + """ + Check that models are immutable. + """ + with pytest.raises(ValidationError, match="Instance is frozen"): + default_multiscale.axes[0].name = "new_name" # type: ignore[misc] + + +def test_multiscale_unique_axis_names() -> None: + # TODO: is unique names actually part of the spec??? + axes = ( + Axis(name="x", type="space", unit="meter"), + Axis(name="x", type="space", unit="meter"), + ) + rank = len(axes) + datasets = (Dataset.build(path="path", scale=(1,) * rank, translation=(0,) * rank),) + + with pytest.raises(ValidationError, match="Axis names must be unique."): + Multiscale( + axes=axes, + datasets=datasets, + coordinateTransformations=_build_transforms(scale=(1, 1), translation=None), + ) + + +@pytest.mark.parametrize( + "axis_types", + [ + ("space", "space", "channel"), + ("space", "channel", "space", "channel"), + ], +) +def test_multiscale_space_axes_last(axis_types: list[str]) -> None: + """ + Error if the last axes isn't 'space'. + + > ... the entries MUST be ordered by "type" where the + > "time" axis must come first (if present), followed by the "channel" or + > custom axis (if present) and the axes of type "space". + """ + units_map = {"space": "meter", "time": "second"} + axes = tuple( + Axis(name=str(idx), type=t, unit=units_map.get(t)) + for idx, t in enumerate(axis_types) + ) + rank = len(axes) + datasets = (Dataset.build(path="path", scale=(1,) * rank, translation=(0,) * rank),) + # TODO: make some axis-specifc exceptions + with pytest.raises( + ValidationError, match="All space axes must be at the end of the axes list." + ): + Multiscale( + axes=axes, + datasets=datasets, + coordinateTransformations=_build_transforms( + scale=(1,) * rank, translation=None + ), + ) + + +@pytest.mark.parametrize( + "axis_types", + [ + ("channel", "time", "space", "space"), + ], +) +def test_axes_order(axis_types: list[str]) -> None: + """ + If 'time' is present, it must be first + + > ... the entries MUST be ordered by "type" where the + > "time" axis must come first (if present), followed by the "channel" or + > custom axis (if present) and the axes of type "space". + """ + axes = tuple( + Axis(name=str(idx), type=t, unit=DEFAULT_UNITS_MAP.get(t)) + for idx, t in enumerate(axis_types) + ) + rank = len(axes) + datasets = (Dataset.build(path="path", scale=(1,) * rank, translation=(0,) * rank),) + with pytest.raises( + ValidationError, match="Time axis must be at the beginning of axis list" + ): + Multiscale( + axes=axes, + datasets=datasets, + coordinateTransformations=_build_transforms( + scale=(1,) * rank, translation=None + ), + ) + + +@pytest.mark.parametrize("num_axes", [0, 1, 6, 7]) +def test_multiscale_axis_length(num_axes: int) -> None: + """ + > The length of "axes" must be between 2 and 5... + """ + rank = num_axes + axes = tuple( + Axis(name=str(idx), type="space", unit="meter") for idx in range(num_axes) + ) + datasets = (Dataset.build(path="path", scale=(1,) * rank, translation=(0,) * rank),) + with pytest.raises(ValidationError, match="Incorrect number of axes provided"): + Multiscale( + axes=axes, + datasets=datasets, + coordinateTransformations=_build_transforms( + scale=(1,) * rank, translation=None + ), + ) + + +def test_invalid_dataset_dimensions() -> None: + """ + > Each "datasets" dictionary MUST have the same number of dimensions... + """ + datasets = [ + Dataset.build(path="path", scale=(1,) * rank, translation=(0,) * rank) + for rank in [2, 3] + ] + axes = tuple(Axis(name=str(idx), type="space", unit="meter") for idx in range(3)) + with pytest.raises( + ValidationError, + match=( + "The length of axes does not match the dimensionality " + "of the scale transform" + ), + ): + Multiscale( + axes=axes, + datasets=datasets, + ) + + +@pytest.mark.parametrize( + "scale, translation", [((1, 1), (1, 1, 1)), ((1, 1, 1), (1, 1))] +) +def test_transform_invalid_ndims( + scale: tuple[int, ...], translation: tuple[int, ...] +) -> None: + """ + Make sure dimensions of scale/translation transforms match. + """ + with pytest.raises( + ValidationError, + match="The transforms have inconsistent dimensionality.", + ): + Dataset.build(path="foo", scale=scale, translation=translation) + + +@pytest.mark.parametrize( + "transforms", + [ + ( + VectorScale.build((1, 1, 1)), + VectorTranslation.build((1, 1, 1)), + VectorTranslation.build((1, 1, 1)), + ), + (VectorScale.build((1, 1, 1)),) * 5, + ], +) +def test_transform_invalid_length( + transforms: tuple[Any, ...], +) -> None: + """ + Error if there's the wrong number of transforms. + """ + with pytest.raises( + ValidationError, + match=f"Invalid number of transforms: got {len(transforms)}, expected 1 or 2", + ): + Dataset(path="foo", coordinateTransformations=transforms) + + +@pytest.mark.parametrize( + "transforms", + [ + (VectorTranslation.build((1, 1, 1)),) * 2, + ( + VectorTranslation.build((1, 1, 1)), + VectorScale.build((1, 1, 1)), + ), + ], +) +def test_transform_invalid_first_element( + transforms: tuple[Any, Any], +) -> None: + """ + Make sure first transform element is a scale. + """ + with pytest.raises( + ValidationError, + match="The first element of `coordinateTransformations` " + "must be a scale transform", + ): + Dataset(path="foo", coordinateTransformations=transforms) + + +@pytest.mark.parametrize( + "transforms", + ( + ( + VectorScale.build((1, 1, 1)), + VectorScale.build((1, 1, 1)), + ), + ), +) +def test_transform_invalid_second_element( + transforms: tuple[VectorScale, VectorScale], +) -> None: + """ + Make sure second transform is a translation. + """ + with pytest.raises( + ValidationError, + match="The second element of `coordinateTransformations` " + "must be a translation transform", + ): + Dataset(path="foo", coordinateTransformations=transforms) + + +def test_validate_axes_top_transforms() -> None: + """ + Test that the number of axes must match the dimensionality of the + top-level coordinateTransformations. + """ + axes_rank = 3 + tforms_rank = 2 + msg_expect = ( + f"The length of axes does not match the dimensionality of " + f"the scale transform in coordinateTransformations. " + f"Got {axes_rank} axes, but the scale transform has " + f"dimensionality {tforms_rank}" + ) + with pytest.raises( + ValidationError, + match=msg_expect, + ): + Multiscale( + name="foo", + axes=[Axis(name=str(idx), type="space") for idx in range(axes_rank)], + datasets=( + Dataset.build( + path="foo", scale=(1,) * axes_rank, translation=(0,) * axes_rank + ), + ), + coordinateTransformations=_build_transforms( + scale=(1,) * tforms_rank, translation=None + ), + ) + + +def test_validate_axes_dset_transforms() -> None: + """ + Test that the number of axes must match the dimensionality of the + per-dataset coordinateTransformations + """ + axes_rank = 3 + tforms_rank = 2 + axes = [Axis(name=str(idx), type="space") for idx in range(axes_rank)] + + msg_expect = ( + f"The length of axes does not match the dimensionality of " + f"the scale transform in datasets[0].coordinateTransformations. " + f"Got {axes_rank} axes, but the scale transform has " + f"dimensionality {tforms_rank}" + ) + + with pytest.raises( + ValidationError, + match=re.escape(msg_expect), + ): + Multiscale( + name="foo", + axes=axes, + datasets=[ + Dataset.build( + path="foo", scale=(1,) * tforms_rank, translation=(0,) * tforms_rank + ) + ], + coordinateTransformations=_build_transforms( + scale=(1,) * axes_rank, translation=None + ), + ) + + +def test_ordred_multiscales() -> None: + """ + > The "path"s MUST be ordered from largest (i.e. highest resolution) to smallest. + """ + axes = ( + Axis(name="c", type="channel", unit=None), + Axis(name="z", type="space", unit="meter"), + Axis(name="x", type="space", unit="meter"), + Axis(name="y", type="space", unit="meter"), + ) + datasets = ( + Dataset( + path="0", + coordinateTransformations=(VectorScale(type="scale", scale=(2, 2, 2, 2)),), + ), + Dataset( + path="1", + coordinateTransformations=(VectorScale(type="scale", scale=(2, 2, 1, 2)),), + ), + ) + with pytest.raises( + ValidationError, + match=re.escape( + "Dataset 0 has a lower resolution (scales = [2.0, 2.0, 2.0, 2.0]) " + "than dataset 1 (scales = [2.0, 2.0, 1.0, 2.0])" + ), + ): + Multiscale( + axes=axes, + datasets=datasets, + ) + + +@pytest.mark.skip +def test_multiscale_group_datasets_exist( + default_multiscale: Multiscale, +) -> None: + group_attrs = ImageAttrs(multiscales=(default_multiscale,)) + good_items = { + d.path: ArraySpec( + shape=(1, 1, 1, 1), + dtype="uint8", + chunks=(1, 1, 1, 1), + ) + for d in default_multiscale.datasets + } + Image(attributes=group_attrs, members=good_items) + + bad_items = { + d.path + "x": ArraySpec( + shape=(1, 1, 1, 1), + dtype="uint8", + chunks=(1, 1, 1, 1), + ) + for d in default_multiscale.datasets + } + + with pytest.raises( + ValidationError, + match="array with that name was found in the hierarchy", + ): + Image(attributes=group_attrs, members=bad_items) + + +def test_multiscale_group_datasets_ndim() -> None: + """ + Test that creating a Image with arrays with mismatched shapes raises + an exception + + > The length of "axes" ... MUST be equal to the dimensionality of the zarr arrays + > storing the image data + """ + true_ndim = 2 + bad_ndim = 3 + match = ( + f"The multiscale metadata has {true_ndim} axes " + "which does not match the dimensionality of the array " + f"found in this group at {bad_ndim} ({bad_ndim}). " + "The number of axes must match the array dimensionality." + ) + with pytest.raises(ValidationError, match=re.escape(match)): + _ = from_array_props( + shapes=((10,) * true_ndim, (10,) * bad_ndim), + chunks=((1,) * true_ndim, (1,) * bad_ndim), + dtype="uint8", + paths=(str(true_ndim), str(bad_ndim)), + axes=(Axis(name="x", type="space"), Axis(name="y", type="space")), + scales=((1, 1), (2, 2)), + translations=((0, 0), (0.5, 0.5)), + ) + + +def test_multiscale_group_missing_arrays() -> None: + """ + Test that creating a multiscale group fails when an expected Zarr array is missing + """ + arrays = np.zeros((10, 10)), np.zeros((5, 5)) + array_names = ("s0", "s1") + group_model = from_arrays( + arrays=arrays, + axes=(Axis(name="x", type="space"), Axis(name="y", type="space")), + paths=array_names, + scales=((1, 1), (2, 2)), + translations=((0, 0), (0.5, 0.5)), + ) + # remove an array, then re-create the model + group_model_broken = group_model.model_copy( + update={"members": {array_names[0]: group_model.members[array_names[0]]}} + ) + with pytest.raises( + ValidationError, + match=( + "The multiscale metadata references an array that does " + "not exist in this " + ), + ): + Image(**group_model_broken.model_dump()) + + +def test_multiscale_group_ectopic_group() -> None: + """ + Test that creating a multiscale group fails when an expected Zarr array + is actually a group + """ + arrays = np.zeros((10, 10)), np.zeros((5, 5)) + array_names = ("s0", "s1") + group_model = from_arrays( + arrays=arrays, + axes=(Axis(name="x", type="space"), Axis(name="y", type="space")), + paths=array_names, + scales=((1, 1), (2, 2)), + translations=((0, 0), (0.5, 0.5)), + ) + # remove an array, then re-create the model + group_model_broken = group_model.model_copy( + update={"members": {array_names[0]: GroupSpec()}} + ) + with pytest.raises( + ValidationError, + match=re.escape(f"The node at {array_names[0]} is a group, not an array."), + ): + Image(**group_model_broken.model_dump()) + + +@pytest.mark.parametrize("store", ["memory"], indirect=True) +def test_from_zarr_missing_metadata( + store: Literal["memory"], + request: pytest.FixtureRequest, +) -> None: + group_model = GroupSpec() + group = group_model.to_zarr(store, path="test") + # store_path = store.path if hasattr(store, "path") else "" + match = "multiscales\n Field required" + with pytest.raises(ValidationError, match=match): + Image.from_zarr(group) + + +@pytest.mark.parametrize("store", ["memory"], indirect=True) +def test_from_zarr_missing_array(store: Literal["memory"]) -> None: + """ + Test that creating a multiscale Group fails when an expected Zarr array is missing + or is a group instead of an array + """ + arrays = np.zeros((10, 10)), np.zeros((5, 5)) + group_path = "broken" + arrays_names = ("s0", "s1") + group_model = from_arrays( + arrays=arrays, + axes=(Axis(name="x", type="space"), Axis(name="y", type="space")), + paths=arrays_names, + scales=((1, 1), (2, 2)), + translations=((0, 0), (0.5, 0.5)), + ) + + # make an untyped model, and remove an array before serializing + removed_array_path = arrays_names[0] + model_dict = group_model.model_dump(exclude={"members": {removed_array_path: True}}) + broken_group = GroupSpec(**model_dict).to_zarr(store=store, path=group_path) + match = ( + f"Expected to find an array at {group_path}/{removed_array_path}, " + "but no array was found there." + ) + with pytest.raises(ValueError, match=match): + Image.from_zarr(broken_group) + + +@pytest.mark.parametrize("store", ["memory"], indirect=True) +def test_from_zarr_ectopic_group(store: Literal["memory"]) -> None: + """ + Test that creating a multiscale Group fails when an expected Zarr array is missing + or is a group instead of an array + """ + arrays = np.zeros((10, 10)), np.zeros((5, 5)) + group_path = "broken" + arrays_names = ("s0", "s1") + group_model = from_arrays( + arrays=arrays, + axes=(Axis(name="x", type="space"), Axis(name="y", type="space")), + paths=arrays_names, + scales=((1, 1), (2, 2)), + translations=((0, 0), (0.5, 0.5)), + ) + + # make an untyped model, and remove an array before serializing + removed_array_path = arrays_names[0] + model_dict = group_model.model_dump(exclude={"members": {removed_array_path: True}}) + broken_group = GroupSpec(**model_dict).to_zarr(store=store, path=group_path) + + # put a group where the array should be + broken_group.create_group(removed_array_path) + match = ( + f"Expected to find an array at {group_path}/{removed_array_path}, " + "but a group was found there instead." + ) + with pytest.raises(ValueError, match=match): + Image.from_zarr(broken_group) + + +@pytest.mark.skip +def test_hashable(default_multiscale: Multiscale) -> None: + """ + Test that `Multiscale` can be hashed + """ + assert set(default_multiscale) == set(default_multiscale) diff --git a/tests/v05/test_omero.py b/tests/v05/test_omero.py new file mode 100644 index 0000000..403b28a --- /dev/null +++ b/tests/v05/test_omero.py @@ -0,0 +1,24 @@ +from ome_zarr_models.v04.omero import Channel, Omero, Window +from tests.v04.conftest import read_in_json + + +def test_load_example_json() -> None: + model = read_in_json(json_fname="omero_example.json", model_cls=Omero) + + assert model == Omero( + channels=[ + Channel( + color="0000FF", + window=Window(max=65535.0, min=0.0, start=0.0, end=1500.0), + active=True, + coefficient=1, + family="linear", + inverted=False, + label="LaminB1", + ) + ], + id=1, + name="example.tif", + version="0.4", + rdefs={"defaultT": 0, "defaultZ": 118, "model": "color"}, + ) diff --git a/tests/v05/test_plate.py b/tests/v05/test_plate.py new file mode 100644 index 0000000..6c4d618 --- /dev/null +++ b/tests/v05/test_plate.py @@ -0,0 +1,143 @@ +import re + +import pytest +from pydantic import ValidationError + +from ome_zarr_models.v04.plate import Acquisition, Column, Plate, Row, WellInPlate +from tests.v04.conftest import read_in_json + + +def test_example_plate_json() -> None: + plate = read_in_json(json_fname="plate_example_1.json", model_cls=Plate) + assert plate == Plate( + acquisitions=[ + Acquisition( + id=1, + name="Meas_01(2012-07-31_10-41-12)", + maximumfieldcount=2, + description=None, + starttime=1343731272000, + endtime=None, + ), + Acquisition( + id=2, + name="Meas_02(201207-31_11-56-41)", + maximumfieldcount=2, + description=None, + starttime=1343735801000, + endtime=None, + ), + ], + columns=[Column(name="1"), Column(name="2"), Column(name="3")], + field_count=4, + name="test", + rows=[Row(name="A"), Row(name="B")], + version="0.4", + wells=[ + WellInPlate(path="A/1", rowIndex=0, columnIndex=0), + WellInPlate(path="A/2", rowIndex=0, columnIndex=1), + WellInPlate(path="A/3", rowIndex=0, columnIndex=2), + WellInPlate(path="B/1", rowIndex=1, columnIndex=0), + WellInPlate(path="B/2", rowIndex=1, columnIndex=1), + WellInPlate(path="B/3", rowIndex=1, columnIndex=2), + ], + ) + + +def test_example_plate_json_2() -> None: + plate = read_in_json(json_fname="plate_example_2.json", model_cls=Plate) + assert plate == Plate( + acquisitions=[ + Acquisition( + id=1, + name="single acquisition", + maximumfieldcount=1, + description=None, + starttime=1343731272000, + endtime=None, + ) + ], + columns=[ + Column(name="1"), + Column(name="2"), + Column(name="3"), + Column(name="4"), + Column(name="5"), + Column(name="6"), + Column(name="7"), + Column(name="8"), + Column(name="9"), + Column(name="10"), + Column(name="11"), + Column(name="12"), + ], + field_count=1, + name="sparse test", + rows=[ + Row(name="A"), + Row(name="B"), + Row(name="C"), + Row(name="D"), + Row(name="E"), + Row(name="F"), + Row(name="G"), + Row(name="H"), + ], + version="0.4", + wells=[ + WellInPlate(path="C/5", rowIndex=2, columnIndex=4), + WellInPlate(path="D/7", rowIndex=3, columnIndex=6), + ], + ) + + +def test_unique_column_names() -> None: + with pytest.raises(ValidationError, match="Duplicate values found in"): + Plate( + columns=[Column(name="col1"), Column(name="col1")], + rows=[Row(name="row1")], + version="0.4", + wells=[WellInPlate(path="path1", rowIndex=1, columnIndex=1)], + ) + + +def test_unique_row_names() -> None: + with pytest.raises(ValidationError, match="Duplicate values found in"): + Plate( + columns=[Column(name="col1")], + rows=[Row(name="row1"), Row(name="row1")], + version="0.4", + wells=[WellInPlate(path="path1", rowIndex=1, columnIndex=1)], + ) + + +@pytest.mark.parametrize("cls", [Row, Column]) +def test_alphanumeric_column_names(cls: type[Row | Column]) -> None: + with pytest.raises(ValidationError, match="String should match pattern "): + cls(name="col-1") + + +@pytest.mark.parametrize( + ("well_path", "msg"), + [ + ("path1", "well path 'path1' does not contain a single '/'"), + ("row1/col1/", "well path 'row1/col1/' does not contain a single '/'"), + ( + "row1/col2", + "column 'col2' in well path 'row1/col2' is not in list of columns", + ), + ( + "row2/col1", + "row 'row2' in well path 'row2/col1' is not in list of rows", + ), + ], +) +def test_well_paths(well_path: str, msg: str) -> None: + # No separator + with pytest.raises(ValidationError, match=re.escape(msg)): + Plate( + columns=[Column(name="col1")], + rows=[Row(name="row1")], + version="0.4", + wells=[WellInPlate(path=well_path, rowIndex=1, columnIndex=1)], + ) diff --git a/tests/v05/test_well.py b/tests/v05/test_well.py new file mode 100644 index 0000000..e89840f --- /dev/null +++ b/tests/v05/test_well.py @@ -0,0 +1,50 @@ +import pytest + +from ome_zarr_models.v04.well_types import WellImage, WellMeta +from tests.v04.conftest import read_in_json + + +@pytest.mark.parametrize( + ("filename", "model_expected"), + [ + ( + "well_example_1.json", + WellMeta( + images=[ + WellImage(path="0", acquisition=1), + WellImage(path="1", acquisition=1), + WellImage(path="2", acquisition=2), + WellImage(path="3", acquisition=2), + ], + version="0.4", + ), + ), + ( + "well_example_2.json", + WellMeta( + images=[ + WellImage(path="0", acquisition=0), + WellImage(path="1", acquisition=3), + ], + version="0.4", + ), + ), + ], +) +def test_examples_valid(filename: str, model_expected: WellMeta) -> None: + model = read_in_json(json_fname=filename, model_cls=WellMeta) + assert model == model_expected + + +def test_get_paths() -> None: + well = WellMeta( + images=[ + WellImage(path="0", acquisition=1), + WellImage(path="1", acquisition=1), + WellImage(path="2", acquisition=2), + WellImage(path="3", acquisition=2), + ], + version="0.4", + ) + + assert well.get_acquisition_paths() == {1: ["0", "1"], 2: ["2", "3"]}