diff --git a/impy/__init__.py b/impy/__init__.py index 3b8e29c..5ce53fc 100644 --- a/impy/__init__.py +++ b/impy/__init__.py @@ -1,19 +1,19 @@ -__version__ = "2.3.3" +__version__ = "2.4.0" __author__ = "Hanjin Liu" __email__ = "liuhanjin-sc@g.ecc.u-tokyo.ac.jp" import logging -from ._const import Const, SetConst, use +from ._const import Const, SetConst, use # noqa -from .collections import DataList, DataDict -from .core import * -from .binder import bind -from .viewer import gui -from .correlation import * -from .arrays import ImgArray, LazyImgArray, BigImgArray, Label # for typing -from . import random, io, lazy -from .axes import slicer +from .collections import DataList, DataDict # noqa +from .core import * # noqa +from .binder import bind # noqa +from .viewer import gui # noqa +from .correlation import * # noqa +from .arrays import ImgArray, LazyImgArray, BigImgArray, Label # noqa +from . import random, io, lazy # noqa +from .axes import slicer # noqa # Inheritance # ----------- @@ -34,7 +34,7 @@ del logging # dtypes -from numpy import ( +from numpy import ( # noqa uint8, uint16, uint32, uint64, int8, int16, int32, int64, float16, float32, float64, diff --git a/impy/io.py b/impy/io.py index 1c5f2f9..afd9579 100644 --- a/impy/io.py +++ b/impy/io.py @@ -1,5 +1,7 @@ from __future__ import annotations -from typing import TYPE_CHECKING, Any, NamedTuple, Callable, TypeVar, Union, Protocol + +from typing import TYPE_CHECKING, Any, NamedTuple, Callable, Sequence, TypeVar, Union, Protocol +from pathlib import Path import json import re import warnings @@ -50,6 +52,7 @@ class ImageData(NamedTuple): from .arrays.bases import MetaArray from .arrays import LazyImgArray from .axes import Axes + from dask.array.core import Array ImpyArray = Union[MetaArray, LazyImgArray] Reader = Callable[[str, bool], ImageData] @@ -123,7 +126,7 @@ def _register(f: _R): return _register - def imread(self, path: str, memmap: bool = False) -> ImageData: + def imread(self, path: str | Path, memmap: bool = False) -> ImageData: """ Read an image file. @@ -141,11 +144,15 @@ def imread(self, path: str, memmap: bool = False) -> ImageData: ImageData Image data tuple. """ - _, ext = os.path.splitext(path) + ext = Path(path).suffix reader = self._reader.get(ext, self._default_reader) - return reader(path, memmap) + return reader(str(path), memmap) - def imread_dask(self, path: str, chunks: Any) -> ImageData: + def _imread_slice(self, path, sl: tuple[slice, ...]) -> np.memmap: + mmap = self.imread(path, memmap=True).image + return np.asarray(mmap[sl], dtype=mmap.dtype) + + def imread_dask(self, path: str | Path, chunks: Any) -> ImageData: """ Read an image file as a dask array. @@ -165,21 +172,39 @@ def imread_dask(self, path: str, chunks: Any) -> ImageData: Image data tuple. """ from .array_api import xp + + path = Path(path) image_data = self.imread(path, memmap=True) img = image_data.image - if img.dtype == ">u2": - img = img.astype(np.uint16) + from dask import array as da, delayed + from dask.array.core import normalize_chunks - from dask import array as da - if str(type(img)) == "": + if path.suffix == ".zarr": + if img.dtype == ">u2": + img = img.astype(np.uint16) dask = da.from_zarr(img, chunks=chunks).map_blocks( xp.asarray, dtype=img.dtype ) else: - dask = da.from_array(img, chunks=chunks, meta=xp.array([])).map_blocks( - xp.asarray, dtype=img.dtype + chunks_: tuple[tuple[int, ...]] = normalize_chunks( + chunks, + shape=img.shape, + dtype=img.dtype, ) + chunk_slices = [_chunk_to_slice(c) for c in chunks_] + block_shape = tuple(len(c) for c in chunks_) + delayed_imread = delayed(self._imread_slice) + arr_blocks = np.empty(block_shape, dtype=object) + for ind, _ in np.ndenumerate(arr_blocks): + sl = tuple(sls[i] for i, sls in zip(ind, chunk_slices)) + cur_shape = tuple(_sl.stop - _sl.start for _sl in sl) + arr_blocks[ind] = da.from_delayed( + delayed_imread(path, sl), shape=cur_shape, dtype=img.dtype, + meta=xp.array([]), + ) + dask = da.block(arr_blocks.tolist()) + return ImageData( image=dask, axes=image_data.axes, @@ -199,6 +224,15 @@ def imsave( writer = self._writer.get(ext, self._default_writer) return writer(path, img, lazy) +def _chunk_to_slice(chunk: Sequence[int]) -> list[slice]: + # _chunk_to_slice([5, 15, 30]) --> [0:5, 5:20, 20:50] + start = 0 + out: list[slice] = [] + for c in chunk: + _next = start + c + out.append(slice(start, _next)) + start = _next + return out IO = ImageIO() @@ -336,7 +370,9 @@ def _(path: str, img: ImpyArray, lazy: bool = False): from dask import array as da kwargs = _get_ijmeta_from_img(img, update_lut=False) mmap = memmap(str(path), shape=img.shape, dtype=img.dtype, **kwargs) - da.store(img.value, mmap, compute=True) + img_dask = _rechunk_to_ones(img.value) + writer = _MemmapArrayWriter(path, mmap.offset, img.shape, img_dask.chunksize) + da.store(img_dask, writer) mmap.flush() return @@ -359,7 +395,7 @@ def _(path: str, img: ImpyArray, lazy: bool = False): img_new.set_scale(img) img = img_new - warnings.warn("Image axes changed", UserWarning) + warnings.warn("Image axes changed", UserWarning, stacklevel=2) img = img.sort_axes() if img.dtype == "bool": @@ -431,7 +467,10 @@ def _(path: str, img: ImpyArray, lazy: bool = False): mode = _MRC_MODE[img.dtype] mrc_mmap = mrcfile.new_mmap(path, img.shape, mrc_mode=mode, overwrite=True) mrc_mmap.voxel_size = voxel_size - da.store(img.value, mrc_mmap.data) + + img_dask = _rechunk_to_ones(img.value) + writer = _MemmapArrayWriter(path, mrc_mmap.data.offset, img.shape, img_dask.chunksize) + da.store(img_dask, writer) mrc_mmap.flush() return None @@ -656,3 +695,56 @@ def _scalar(x: Any) -> np.ndarray: ar = np.array(None, dtype=object) ar[()] = x return ar + +def _rechunk_to_ones(arr: Array): + """Rechunk the array to (1, 1, ..., 1, n, Ny, Nx)""" + size = np.prod(arr.chunksize) + shape = arr.shape + cur_prod = 1 + max_i = arr.ndim + for i in reversed(range(arr.ndim)): + cur_prod *= shape[i] + if cur_prod > size: + break + max_i = i + nslices = max(int(size / np.prod(shape[max_i:])), 1) + if max_i == 0: + return arr + else: + return arr.rechunk((1,) * (max_i - 1) + (nslices,) + shape[max_i:]) + +class _MemmapArrayWriter: + def __init__( + self, + path: str, + offset: int, + shape: tuple[int, ...], + chunksize: tuple[int, ...], + ): + self._path = path + self._offset = offset + self._shape = shape # original shape + self._chunksize = chunksize # chunk size + # shape = (33, 160, 1000, 1000) + # chunksize = (1, 16, 1000, 1000) + border = 0 + for i, c in enumerate(chunksize): + if c != 1: + border = i + break + self._border = border + + def __setitem__(self, sl: tuple[slice, ...], arr: np.ndarray): + # efficient: shape = (10, 100, 150) and sl = (3:5, 0:100, 0:150) + # sl = (0:1, 16:32, 0:1000, 0:1000) + + offset = np.sum([sl[i].start * arr.strides[i] for i in range(self._border + 1)]) + mmap = np.memmap( + self._path, + mode="r+", + offset=self._offset + offset, + dtype=arr.dtype, + ) + arr_ravel = arr.ravel() + mmap[:arr_ravel.size] = arr_ravel + mmap.flush() diff --git a/impy/lazy/__init__.py b/impy/lazy/__init__.py index 429074a..0db2a01 100644 --- a/impy/lazy/__init__.py +++ b/impy/lazy/__init__.py @@ -1,2 +1,2 @@ -from .core import * -from . import random +from .core import * # noqa +from . import random # noqa diff --git a/impy/lazy/core.py b/impy/lazy/core.py index f88c5c7..cfdbe88 100644 --- a/impy/lazy/core.py +++ b/impy/lazy/core.py @@ -41,8 +41,8 @@ Name of image. axes : str, optional Image axes. - """ - + """ + def _write_docs(func): """Add doc for numpy function.""" func.__doc__ = re.sub(r"{}", shared_docs, func.__doc__) @@ -60,12 +60,12 @@ def _func(*args, **kwargs): name = kwargs.pop("name", None) dafunc: Callable = getattr(da, func.__name__) return asarray(dafunc(*args, **kwargs), name=name, axes=axes, like=like) - + _func.__doc__ = ( f""" - impy.lazy version of numpy.{func.__name__}. This function has additional parameters + impy.lazy version of numpy.{func.__name__}. This function has additional parameters ``axes`` and ``name``. Original docstring follows. - + Additional Parameters --------------------- axes : str, optional @@ -83,7 +83,7 @@ def _func(*args, **kwargs): @_inject_numpy_function def zeros(shape: ShapeLike, dtype: DTypeLike = np.uint16, *, name: str | None = None, axes: AxesLike | None = None, like: MetaArray | None = None): ... - + @_inject_numpy_function def empty(shape: ShapeLike, dtype: DTypeLike = np.uint16, *, name: str | None = None, axes: AxesLike | None = None, like: MetaArray | None = None): ... @@ -100,7 +100,7 @@ def arange(stop: int, dtype: DTypeLike = None): ... def array( arr: ArrayLike, /, - dtype: DTypeLike = None, + dtype: DTypeLike = None, *, copy: bool = True, chunks="auto", @@ -110,7 +110,7 @@ def array( ) -> LazyImgArray: """ make an LazyImgArray object, like ``np.array(x)`` - + Parameters ---------- arr : array-like @@ -118,7 +118,7 @@ def array( copy : bool, default is True If True, a copy of the original array is made. {} - + Returns ------- LazyImgArray @@ -127,14 +127,22 @@ def array( from dask.array.core import Array as DaskArray if isinstance(arr, MetaArray): + if axes is None: + axes = arr.axes + if name is None: + name = arr.name arr = da.from_array(arr.value, chunks=chunks) elif isinstance(arr, (np.ndarray, np.memmap)): arr = da.from_array(arr, chunks=chunks) elif isinstance(arr, LazyImgArray): + if axes is None: + axes = arr.axes + if name is None: + name = arr.name arr = arr.value elif not isinstance(arr, DaskArray): arr = da.asarray(arr) - + if isinstance(arr, np.ndarray) and dtype is None: if arr.dtype in (np.uint8, np.uint16, np.float32): dtype = arr.dtype @@ -142,7 +150,7 @@ def array( dtype = np.float32 else: dtype = arr.dtype - + axes, name = _normalize_params(axes, name, like) # Automatically determine axes @@ -151,14 +159,14 @@ def array( if copy: arr = arr.copy() self = LazyImgArray(arr, name=name, axes=axes) - + return self @_write_docs def asarray( arr: ArrayLike, dtype: DTypeLike | None = None, - *, + *, name: str | None = None, axes: str | None = None, like: MetaArray | None = None, @@ -166,7 +174,7 @@ def asarray( ) -> LazyImgArray: """ make an LazyImgArray object, like ``np.asarray(x)`` - + Parameters ---------- arr : array-like @@ -174,25 +182,25 @@ def asarray( {} copy : bool, default is True If True, a copy of the original array is made. - + Returns ------- ImgArray - + """ return array(arr, dtype=dtype, name=name, axes=axes, copy=False, chunks=chunks, like=like) def indices( dimensions: ShapeLike, dtype: DTypeLike = np.uint16, - *, + *, name: str | None = None, axes: AxesLike | None = None, like: MetaArray | None = None, ) -> AxesTuple[LazyImgArray]: """ Copy of ``numpy.indices``. - + Parameters ---------- dimensions : shape-like @@ -217,9 +225,9 @@ def indices( return out[0].axes.tuple(out) def imread( - path: str, + path: str, chunks="auto", - *, + *, name: str | None = None, squeeze: bool = False, ) -> LazyImgArray: @@ -238,17 +246,17 @@ def imread( Name of array. squeeze : bool, default is False If True and there is one-sized axis, then call `np.squeeze`. - + Returns ------- LazyImgArray - """ + """ path = str(path) if "*" in path: return _imread_glob(path, chunks=chunks, squeeze=squeeze) if not os.path.exists(path): raise ValueError(f"Path does not exist: {path}.") - + # read as a dask array image_data = io.imread_dask(path, chunks) img = image_data.image @@ -257,11 +265,11 @@ def imread( spatial_scale_unit = image_data.unit metadata = image_data.metadata labels = image_data.labels - + if squeeze: axes = "".join(a for i, a in enumerate(axes) if img.shape[i] > 1) img = np.squeeze(img) - + self = LazyImgArray(img, name=name, axes=axes, source=path, metadata=metadata) # read scale if possible @@ -269,13 +277,13 @@ def imread( if "c" in self.axes and labels is not None: self.set_axis_label(c=labels) - + for k, v in scale.items(): if k in self.axes: self.set_scale({k: v}) if k in "zyx": self.axes[k].unit = spatial_scale_unit.get(k) - + return self.sort_axes() @@ -287,24 +295,24 @@ def _imread_glob(path: str, squeeze: bool = False, **kwargs) -> LazyImgArray: ---------- path : str Path with wildcard. - - """ + + """ path = str(path) paths = glob.glob(path, recursive=True) - + imgs: list[LazyImgArray] = [] for path in paths: imgl = imread(path, **kwargs) imgs.append(imgl) - + if len(imgs) == 0: raise RuntimeError("Could not read any images.") - + from dask import array as da out = da.stack([i.value for i in imgs], axis=0) out = LazyImgArray(out) out._set_info(imgs[0], new_axes="p"+str(imgs[0].axes)) - + if squeeze: axes = "".join(a for i, a in enumerate(out.axes) if out.shape[i] > 1) img = da.squeeze(out.value) @@ -314,7 +322,7 @@ def _imread_glob(path: str, squeeze: bool = False, **kwargs) -> LazyImgArray: out.source = os.path.split(path.split("*")[0])[0] except Exception: pass - + return out def _normalize_params( diff --git a/tests/test_lazy.py b/tests/test_lazy.py index e1f426f..c9df39b 100644 --- a/tests/test_lazy.py +++ b/tests/test_lazy.py @@ -1,3 +1,4 @@ +import tempfile import impy as ip from pathlib import Path import numpy as np @@ -28,12 +29,12 @@ def test_filters(fn, resource): with ip.SetConst(RESOURCE=resource): path = Path(__file__).parent / "_test_images" / "image_tzcyx.tif" img = ip.lazy.imread(path, chunks=(4, 5, 2, 32, 32)) - + assert_allclose( getattr(img, fn)().compute(), getattr(img.compute(), fn)() ) - + def test_numpy_function(): from dask.array.core import Array as DaskArray @@ -57,3 +58,18 @@ def test_operator(opname): op(img1, img2).compute(), op(img1.compute(), img2.compute()), ) + +@pytest.mark.parametrize("ext", [".tif", ".mrc"]) +def test_lazy_imsave(ext: str): + rng = ip.random.default_rng(1234) + img = rng.random_uint16((4, 8, 80), axes="zyx") + img_lazy = ip.lazy.asarray(img, chunks=(1, 2, 80)) + img_lazy.scale_unit = "nm" + with tempfile.TemporaryDirectory() as path: + fp = Path(path) / f"test{ext}" + img_lazy.imsave(fp) + img0 = ip.imread(fp) + assert img_lazy.shape == img0.shape + assert img_lazy.dtype == img0.dtype + assert img_lazy.axes == img0.axes + assert_allclose(img, img0)