From 4268ee599e4f33b2ee514d9c4899adc285ee4f36 Mon Sep 17 00:00:00 2001 From: sameeul Date: Mon, 29 Jul 2024 10:21:01 -0400 Subject: [PATCH 1/6] attempt to disable overwrite an existing zarr array --- src/bfio/backends.py | 37 +++++++++++++++++++++++++------------ src/bfio/bfio.py | 6 ++++++ 2 files changed, 31 insertions(+), 12 deletions(-) diff --git a/src/bfio/backends.py b/src/bfio/backends.py index c379e44..280dc4b 100644 --- a/src/bfio/backends.py +++ b/src/bfio/backends.py @@ -1405,8 +1405,9 @@ def _init_writer(self): In the future, it may be reasonable to not enforce read-only """ - if self.frontend._file_path.exists(): - shutil.rmtree(self.frontend._file_path) + if self.frontend.append == False: + if self.frontend._file_path.exists(): + shutil.rmtree(self.frontend._file_path) shape = ( self.frontend.T, @@ -1417,8 +1418,10 @@ def _init_writer(self): ) compressor = Blosc(cname="zstd", clevel=1, shuffle=Blosc.SHUFFLE) - - self._root = zarr.group(store=str(self.frontend._file_path.resolve())) + mode = "w" + if self.frontend.append == True: + mode = "a" + self._root = zarr.open_group(store=str(self.frontend._file_path.resolve()), mode=mode) # Create the metadata metadata_path = ( @@ -1438,14 +1441,24 @@ def _init_writer(self): "metadata": {"method": "mean"}, } ] - - writer = self._root.zeros( - "0", - shape=shape, - chunks=(1, 1, 1, self.frontend._TILE_SIZE, self.frontend._TILE_SIZE), - dtype=self.frontend.dtype, - compressor=compressor, - ) + if self.frontend.append == True: + print("Append option selected") + writer = self._root.create( + "0", + shape=shape, + chunks=(1, 1, 1, self.frontend._TILE_SIZE, self.frontend._TILE_SIZE), + dtype=self.frontend.dtype, + compressor=compressor, + overwrite=False, + ) + else: + writer = self._root.zeros( + "0", + shape=shape, + chunks=(1, 1, 1, self.frontend._TILE_SIZE, self.frontend._TILE_SIZE), + dtype=self.frontend.dtype, + compressor=compressor, + ) # This is recommended to do for cloud storage to increase read/write # speed, but it also increases write speed locally when threading. diff --git a/src/bfio/bfio.py b/src/bfio/bfio.py index 41965f2..c5557b7 100644 --- a/src/bfio/bfio.py +++ b/src/bfio/bfio.py @@ -52,6 +52,7 @@ class BioReader(BioBase): "_read_only", "_backend", "level", + "append", ] def __init__( @@ -1070,6 +1071,11 @@ class if specified. *Defaults to None.* setattr(self, k, v) self.set_backend(backend) + self.append = False + if kwargs and "append" in kwargs: + if kwargs["append"] == True: + self.append = True + # Ensure backend is supported if self._backend_name == "python": self._backend = backends.PythonWriter(self) From e97e22fcbea95fcdb5f9e0030bd3eec34a1ef14c Mon Sep 17 00:00:00 2001 From: sameeul Date: Mon, 29 Jul 2024 10:37:43 -0400 Subject: [PATCH 2/6] PIck up the existing zarr array instead of creating it --- src/bfio/backends.py | 48 +++++++++++++++++++------------------------- 1 file changed, 21 insertions(+), 27 deletions(-) diff --git a/src/bfio/backends.py b/src/bfio/backends.py index 280dc4b..cb17543 100644 --- a/src/bfio/backends.py +++ b/src/bfio/backends.py @@ -1424,33 +1424,26 @@ def _init_writer(self): self._root = zarr.open_group(store=str(self.frontend._file_path.resolve()), mode=mode) # Create the metadata - metadata_path = ( - Path(self.frontend._file_path) - .joinpath("OME") - .joinpath("METADATA.ome.xml") - ) - metadata_path.parent.mkdir(parents=True, exist_ok=True) - with open(metadata_path, "w") as fw: - fw.write(str(self.frontend._metadata.to_xml())) - - self._root.attrs["multiscales"] = [ - { - "version": "0.1", - "name": self.frontend._file_path.name, - "datasets": [{"path": "0"}], - "metadata": {"method": "mean"}, - } - ] + if self.frontend.append != True: + metadata_path = ( + Path(self.frontend._file_path) + .joinpath("OME") + .joinpath("METADATA.ome.xml") + ) + metadata_path.parent.mkdir(parents=True, exist_ok=True) + with open(metadata_path, "w") as fw: + fw.write(str(self.frontend._metadata.to_xml())) + + self._root.attrs["multiscales"] = [ + { + "version": "0.1", + "name": self.frontend._file_path.name, + "datasets": [{"path": "0"}], + "metadata": {"method": "mean"}, + } + ] if self.frontend.append == True: - print("Append option selected") - writer = self._root.create( - "0", - shape=shape, - chunks=(1, 1, 1, self.frontend._TILE_SIZE, self.frontend._TILE_SIZE), - dtype=self.frontend.dtype, - compressor=compressor, - overwrite=False, - ) + writer = self._root["0"] else: writer = self._root.zeros( "0", @@ -1462,7 +1455,8 @@ def _init_writer(self): # This is recommended to do for cloud storage to increase read/write # speed, but it also increases write speed locally when threading. - zarr.consolidate_metadata(str(self.frontend._file_path.resolve())) + if self.frontend.append != True: + zarr.consolidate_metadata(str(self.frontend._file_path.resolve())) self._writer = writer From caffd0ef239ed21a34c815fa44f872159a9262ee Mon Sep 17 00:00:00 2001 From: sameeul Date: Wed, 31 Jul 2024 15:55:37 -0400 Subject: [PATCH 3/6] Improve append behavior, add test, change dimension separator to '/' --- src/bfio/backends.py | 45 ++++++++++++++++++++++--------- tests/test_write.py | 64 ++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 97 insertions(+), 12 deletions(-) diff --git a/src/bfio/backends.py b/src/bfio/backends.py index cb17543..5b85c99 100644 --- a/src/bfio/backends.py +++ b/src/bfio/backends.py @@ -1421,15 +1421,20 @@ def _init_writer(self): mode = "w" if self.frontend.append == True: mode = "a" - self._root = zarr.open_group(store=str(self.frontend._file_path.resolve()), mode=mode) + self._root = zarr.open_group( + store=str(self.frontend._file_path.resolve()), mode=mode + ) # Create the metadata - if self.frontend.append != True: - metadata_path = ( - Path(self.frontend._file_path) - .joinpath("OME") - .joinpath("METADATA.ome.xml") - ) + metadata_path = ( + Path(self.frontend._file_path) + .joinpath("OME") + .joinpath("METADATA.ome.xml") + ) + + if self.frontend.append == False or ( + self.frontend.append == True and metadata_path.exists() == False + ): metadata_path.parent.mkdir(parents=True, exist_ok=True) with open(metadata_path, "w") as fw: fw.write(str(self.frontend._metadata.to_xml())) @@ -1442,20 +1447,36 @@ def _init_writer(self): "metadata": {"method": "mean"}, } ] - if self.frontend.append == True: - writer = self._root["0"] - else: + if ( + self.frontend.append == True + and len(sorted(self._root.array_keys())) > 0 + ): + writer = self._root["0"] + else: writer = self._root.zeros( "0", shape=shape, - chunks=(1, 1, 1, self.frontend._TILE_SIZE, self.frontend._TILE_SIZE), + chunks=( + 1, + 1, + 1, + self.frontend._TILE_SIZE, + self.frontend._TILE_SIZE, + ), dtype=self.frontend.dtype, compressor=compressor, + dimension_separator="/", ) # This is recommended to do for cloud storage to increase read/write # speed, but it also increases write speed locally when threading. - if self.frontend.append != True: + consolidated_metadata_file = Path(self.frontend._file_path).joinpath( + ".zmetadata" + ) + if self.frontend.append == False or ( + self.frontend.append == True + and consolidated_metadata_file.exists() == False + ): zarr.consolidate_metadata(str(self.frontend._file_path.resolve())) self._writer = writer diff --git a/tests/test_write.py b/tests/test_write.py index d9a173e..3b5d4bf 100644 --- a/tests/test_write.py +++ b/tests/test_write.py @@ -105,6 +105,70 @@ def test_write_java(self): assert np.array_equal(image[:], br[:]) +class TestPythonZarrWriter(unittest.TestCase): + + def test_write_zarr_append_no_file(self): + + with bfio.BioReader(str(TEST_DIR.joinpath("5025551.zarr"))) as br: + + actual_shape = br.shape + actual_dtype = br.dtype + actual_image = br[:] + actual_mdata = br.metadata + print(br.shape) + with tempfile.TemporaryDirectory() as dir: + + # Use the temporary directory + test_file_path = str(TEST_DIR.joinpath("out/test.ome.zarr")) + + with bfio.BioWriter( + test_file_path, metadata=actual_mdata, backend="zarr", append=True + ) as bw: + + expanded = np.expand_dims(actual_image, axis=-1) + bw[:, :, :, :, :] = expanded[:, :, :, :, :] + + with bfio.BioReader(test_file_path) as br: + + assert br.shape == actual_shape + assert br.dtype == actual_dtype + + assert br[:].sum() == actual_image.sum() + + def test_write_zarr_append_file_exist(self): + + with bfio.BioReader(str(TEST_DIR.joinpath("5025551.zarr"))) as br: + + actual_shape = br.shape + actual_dtype = br.dtype + actual_image = br[:] + actual_mdata = br.metadata + with tempfile.TemporaryDirectory() as dir: + + # Use the temporary directory + test_file_path = str(TEST_DIR.joinpath("out/test.ome.zarr")) + + with bfio.BioWriter( + test_file_path, metadata=actual_mdata, backend="zarr" + ) as bw: + + expanded = np.expand_dims(actual_image, axis=-1) + bw[:, :, :, 0:13, :] = expanded[:, :, :, 0:13, :] + + with bfio.BioWriter( + test_file_path, metadata=actual_mdata, backend="zarr", append=True + ) as bw: + + expanded = np.expand_dims(actual_image, axis=-1) + bw[:, :, :, 13:, :] = expanded[:, :, :, 13:, :] + with bfio.BioReader(test_file_path) as br: + + assert br.shape == actual_shape + assert br.dtype == actual_dtype + + assert br[:].sum() == actual_image.sum() + + class TestOmeZarrWriter(unittest.TestCase): def test_write_zarr_tensorstore(self): From 130dae54bbefed4b0b7b0bc05dcbaeed81e70bdb Mon Sep 17 00:00:00 2001 From: sameeul Date: Wed, 31 Jul 2024 16:21:05 -0400 Subject: [PATCH 4/6] update test path --- tests/test_write.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/tests/test_write.py b/tests/test_write.py index 3b5d4bf..52e0546 100644 --- a/tests/test_write.py +++ b/tests/test_write.py @@ -118,8 +118,7 @@ def test_write_zarr_append_no_file(self): print(br.shape) with tempfile.TemporaryDirectory() as dir: - # Use the temporary directory - test_file_path = str(TEST_DIR.joinpath("out/test.ome.zarr")) + test_file_path = os.path.join(dir, "out/test.ome.zarr") with bfio.BioWriter( test_file_path, metadata=actual_mdata, backend="zarr", append=True @@ -145,8 +144,7 @@ def test_write_zarr_append_file_exist(self): actual_mdata = br.metadata with tempfile.TemporaryDirectory() as dir: - # Use the temporary directory - test_file_path = str(TEST_DIR.joinpath("out/test.ome.zarr")) + test_file_path = os.path.join(dir, "out/test.ome.zarr") with bfio.BioWriter( test_file_path, metadata=actual_mdata, backend="zarr" From 8c9560968da5adceab17a6629fdb293b0dae7a1c Mon Sep 17 00:00:00 2001 From: sameeul Date: Wed, 31 Jul 2024 16:33:46 -0400 Subject: [PATCH 5/6] Fix formatting --- src/bfio/backends.py | 16 ++++++++-------- src/bfio/bfio.py | 2 +- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/src/bfio/backends.py b/src/bfio/backends.py index 5b85c99..9acf63d 100644 --- a/src/bfio/backends.py +++ b/src/bfio/backends.py @@ -1405,7 +1405,7 @@ def _init_writer(self): In the future, it may be reasonable to not enforce read-only """ - if self.frontend.append == False: + if self.frontend.append is False: if self.frontend._file_path.exists(): shutil.rmtree(self.frontend._file_path) @@ -1419,7 +1419,7 @@ def _init_writer(self): compressor = Blosc(cname="zstd", clevel=1, shuffle=Blosc.SHUFFLE) mode = "w" - if self.frontend.append == True: + if self.frontend.append is True: mode = "a" self._root = zarr.open_group( store=str(self.frontend._file_path.resolve()), mode=mode @@ -1432,8 +1432,8 @@ def _init_writer(self): .joinpath("METADATA.ome.xml") ) - if self.frontend.append == False or ( - self.frontend.append == True and metadata_path.exists() == False + if self.frontend.append is False or ( + self.frontend.append is True and metadata_path.exists() is False ): metadata_path.parent.mkdir(parents=True, exist_ok=True) with open(metadata_path, "w") as fw: @@ -1448,7 +1448,7 @@ def _init_writer(self): } ] if ( - self.frontend.append == True + self.frontend.append is True and len(sorted(self._root.array_keys())) > 0 ): writer = self._root["0"] @@ -1473,9 +1473,9 @@ def _init_writer(self): consolidated_metadata_file = Path(self.frontend._file_path).joinpath( ".zmetadata" ) - if self.frontend.append == False or ( - self.frontend.append == True - and consolidated_metadata_file.exists() == False + if self.frontend.append is False or ( + self.frontend.append is True + and consolidated_metadata_file.exists() is False ): zarr.consolidate_metadata(str(self.frontend._file_path.resolve())) diff --git a/src/bfio/bfio.py b/src/bfio/bfio.py index c5557b7..4651618 100644 --- a/src/bfio/bfio.py +++ b/src/bfio/bfio.py @@ -1073,7 +1073,7 @@ class if specified. *Defaults to None.* self.set_backend(backend) self.append = False if kwargs and "append" in kwargs: - if kwargs["append"] == True: + if kwargs["append"] is True: self.append = True # Ensure backend is supported From 09ceb5733540302b1cfe906872db601fe12dfa4e Mon Sep 17 00:00:00 2001 From: sameeul Date: Wed, 31 Jul 2024 17:15:34 -0400 Subject: [PATCH 6/6] update big tiff logic --- src/bfio/backends.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/src/bfio/backends.py b/src/bfio/backends.py index 9acf63d..a82f062 100644 --- a/src/bfio/backends.py +++ b/src/bfio/backends.py @@ -617,7 +617,15 @@ def _init_writer(self): f"Image:{Path(self.frontend._file_path).name}" ) - if self.frontend.X * self.frontend.Y * self.frontend.bpp > 2**31: + if ( + self.frontend.X + * self.frontend.Y + * self.frontend.Z + * self.frontend.C + * self.frontend.T + * self.frontend.bpp + > 2**31 + ): big_tiff = True else: big_tiff = False