Skip to content

Commit

Permalink
Merge pull request #92 from sameeul/biowriter_perf
Browse files Browse the repository at this point in the history
Update BioWriter append functionality
  • Loading branch information
sameeul authored Aug 1, 2024
2 parents adfb491 + 09ceb57 commit fde781e
Show file tree
Hide file tree
Showing 3 changed files with 129 additions and 25 deletions.
86 changes: 61 additions & 25 deletions src/bfio/backends.py
Original file line number Diff line number Diff line change
Expand Up @@ -617,7 +617,15 @@ def _init_writer(self):
f"Image:{Path(self.frontend._file_path).name}"
)

if self.frontend.X * self.frontend.Y * self.frontend.bpp > 2**31:
if (
self.frontend.X
* self.frontend.Y
* self.frontend.Z
* self.frontend.C
* self.frontend.T
* self.frontend.bpp
> 2**31
):
big_tiff = True
else:
big_tiff = False
Expand Down Expand Up @@ -1405,8 +1413,9 @@ def _init_writer(self):
In the future, it may be reasonable to not enforce read-only
"""
if self.frontend._file_path.exists():
shutil.rmtree(self.frontend._file_path)
if self.frontend.append is False:
if self.frontend._file_path.exists():
shutil.rmtree(self.frontend._file_path)

shape = (
self.frontend.T,
Expand All @@ -1417,39 +1426,66 @@ def _init_writer(self):
)

compressor = Blosc(cname="zstd", clevel=1, shuffle=Blosc.SHUFFLE)

self._root = zarr.group(store=str(self.frontend._file_path.resolve()))
mode = "w"
if self.frontend.append is True:
mode = "a"
self._root = zarr.open_group(
store=str(self.frontend._file_path.resolve()), mode=mode
)

# Create the metadata
metadata_path = (
Path(self.frontend._file_path)
.joinpath("OME")
.joinpath("METADATA.ome.xml")
)
metadata_path.parent.mkdir(parents=True, exist_ok=True)
with open(metadata_path, "w") as fw:
fw.write(str(self.frontend._metadata.to_xml()))

self._root.attrs["multiscales"] = [
{
"version": "0.1",
"name": self.frontend._file_path.name,
"datasets": [{"path": "0"}],
"metadata": {"method": "mean"},
}
]

writer = self._root.zeros(
"0",
shape=shape,
chunks=(1, 1, 1, self.frontend._TILE_SIZE, self.frontend._TILE_SIZE),
dtype=self.frontend.dtype,
compressor=compressor,
)
if self.frontend.append is False or (
self.frontend.append is True and metadata_path.exists() is False
):
metadata_path.parent.mkdir(parents=True, exist_ok=True)
with open(metadata_path, "w") as fw:
fw.write(str(self.frontend._metadata.to_xml()))

self._root.attrs["multiscales"] = [
{
"version": "0.1",
"name": self.frontend._file_path.name,
"datasets": [{"path": "0"}],
"metadata": {"method": "mean"},
}
]
if (
self.frontend.append is True
and len(sorted(self._root.array_keys())) > 0
):
writer = self._root["0"]
else:
writer = self._root.zeros(
"0",
shape=shape,
chunks=(
1,
1,
1,
self.frontend._TILE_SIZE,
self.frontend._TILE_SIZE,
),
dtype=self.frontend.dtype,
compressor=compressor,
dimension_separator="/",
)

# This is recommended to do for cloud storage to increase read/write
# speed, but it also increases write speed locally when threading.
zarr.consolidate_metadata(str(self.frontend._file_path.resolve()))
consolidated_metadata_file = Path(self.frontend._file_path).joinpath(
".zmetadata"
)
if self.frontend.append is False or (
self.frontend.append is True
and consolidated_metadata_file.exists() is False
):
zarr.consolidate_metadata(str(self.frontend._file_path.resolve()))

self._writer = writer

Expand Down
6 changes: 6 additions & 0 deletions src/bfio/bfio.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@ class BioReader(BioBase):
"_read_only",
"_backend",
"level",
"append",
]

def __init__(
Expand Down Expand Up @@ -1070,6 +1071,11 @@ class if specified. *Defaults to None.*
setattr(self, k, v)

self.set_backend(backend)
self.append = False
if kwargs and "append" in kwargs:
if kwargs["append"] is True:
self.append = True

# Ensure backend is supported
if self._backend_name == "python":
self._backend = backends.PythonWriter(self)
Expand Down
62 changes: 62 additions & 0 deletions tests/test_write.py
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,68 @@ def test_write_java(self):
assert np.array_equal(image[:], br[:])


class TestPythonZarrWriter(unittest.TestCase):

def test_write_zarr_append_no_file(self):

with bfio.BioReader(str(TEST_DIR.joinpath("5025551.zarr"))) as br:

actual_shape = br.shape
actual_dtype = br.dtype
actual_image = br[:]
actual_mdata = br.metadata
print(br.shape)
with tempfile.TemporaryDirectory() as dir:

test_file_path = os.path.join(dir, "out/test.ome.zarr")

with bfio.BioWriter(
test_file_path, metadata=actual_mdata, backend="zarr", append=True
) as bw:

expanded = np.expand_dims(actual_image, axis=-1)
bw[:, :, :, :, :] = expanded[:, :, :, :, :]

with bfio.BioReader(test_file_path) as br:

assert br.shape == actual_shape
assert br.dtype == actual_dtype

assert br[:].sum() == actual_image.sum()

def test_write_zarr_append_file_exist(self):

with bfio.BioReader(str(TEST_DIR.joinpath("5025551.zarr"))) as br:

actual_shape = br.shape
actual_dtype = br.dtype
actual_image = br[:]
actual_mdata = br.metadata
with tempfile.TemporaryDirectory() as dir:

test_file_path = os.path.join(dir, "out/test.ome.zarr")

with bfio.BioWriter(
test_file_path, metadata=actual_mdata, backend="zarr"
) as bw:

expanded = np.expand_dims(actual_image, axis=-1)
bw[:, :, :, 0:13, :] = expanded[:, :, :, 0:13, :]

with bfio.BioWriter(
test_file_path, metadata=actual_mdata, backend="zarr", append=True
) as bw:

expanded = np.expand_dims(actual_image, axis=-1)
bw[:, :, :, 13:, :] = expanded[:, :, :, 13:, :]
with bfio.BioReader(test_file_path) as br:

assert br.shape == actual_shape
assert br.dtype == actual_dtype

assert br[:].sum() == actual_image.sum()


class TestOmeZarrWriter(unittest.TestCase):

def test_write_zarr_tensorstore(self):
Expand Down

0 comments on commit fde781e

Please sign in to comment.