From ba7fe9c420d9cfd9bef8a928f78a39bc474f6212 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Wed, 31 Jul 2024 13:27:29 +0000 Subject: [PATCH 1/8] build(deps): bump codecov/codecov-action in the actions group Bumps the actions group with 1 update: [codecov/codecov-action](https://github.com/codecov/codecov-action). Updates `codecov/codecov-action` from 4.3.0 to 4.5.0 - [Release notes](https://github.com/codecov/codecov-action/releases) - [Changelog](https://github.com/codecov/codecov-action/blob/main/CHANGELOG.md) - [Commits](https://github.com/codecov/codecov-action/compare/v4.3.0...v4.5.0) --- updated-dependencies: - dependency-name: codecov/codecov-action dependency-type: direct:production update-type: version-update:semver-minor dependency-group: actions ... Signed-off-by: dependabot[bot] --- .github/workflows/ci.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index c619985..965e85a 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -68,6 +68,6 @@ jobs: --durations=20 - name: Upload coverage report - uses: codecov/codecov-action@v4.3.0 + uses: codecov/codecov-action@v4.5.0 with: token: ${{ secrets.CODECOV_TOKEN }} From c91968b22dd2bc975b1ca1aa107aa75da79424bd Mon Sep 17 00:00:00 2001 From: Josh Moore Date: Wed, 31 Jul 2024 15:35:29 +0200 Subject: [PATCH 2/8] Ignore dev directories in ruff --- .pre-commit-config.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index cbe9162..a4d8d24 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -43,7 +43,7 @@ repos: rev: "v0.4.1" hooks: - id: ruff - args: ["--fix", "--show-fixes"] + args: ["--fix", "--show-fixes", "--exclude=dev{1,2,3}"] - id: ruff-format # - repo: https://github.com/pre-commit/mirrors-mypy From 6675c490e59406fe5cd8584764e310e1631f472d Mon Sep 17 00:00:00 2001 From: Josh Moore Date: Wed, 31 Jul 2024 15:38:16 +0200 Subject: [PATCH 3/8] Run 'pre-commit run -a' --- .copier-answers.yml | 5 +- .pre-commit-config.yaml | 2 +- .../convert_zarr_v2_to_v3.py | 76 +++++----- .../convert_zarr_v2_to_v3/environmentv2_3.yml | 20 +-- dev1/convert_zarr_v2_to_v3/util.py | 16 ++- dev1/environment.yaml | 4 +- dev1/environment_windows.yml | 2 +- dev1/resave.py | 56 ++++---- dev2/.pre-commit-config.yaml | 19 ++- dev2/environment.yaml | 6 +- dev2/resave.py | 4 +- .../min-specimen-biosample.json | 132 ++++++++---------- dev3/2024-07-02/example-metadata/minimal.json | 114 +++++++-------- .../images/graph-min-specimen-biosample.svg | 2 +- dev3/2024-07-02/images/graph-minimal.svg | 2 +- dev3/2024-07-02/ro-crate-metadata-proposal.md | 47 ++++--- dev3/zarr-crate/README.md | 7 +- .../create_fly_ro_crate_metadata.py | 8 +- .../example_usage/ro-crate-metadata.json | 2 +- dev3/zarr-crate/pyproject.toml | 2 +- dev3/zarr-crate/zarr_crate/zarr_extension.py | 1 - 21 files changed, 273 insertions(+), 254 deletions(-) diff --git a/.copier-answers.yml b/.copier-answers.yml index 84a4c5d..35f58e3 100644 --- a/.copier-answers.yml +++ b/.copier-answers.yml @@ -7,7 +7,8 @@ full_name: Josh Moore license: BSD org: ome project_name: ome2024-ngff-challenge -project_short_description: Tools for converting OME-Zarr data within the ome2024-ngff-challenge - (see https://forum.image.sc/t/ome2024-ngff-challenge/97363) +project_short_description: + Tools for converting OME-Zarr data within the ome2024-ngff-challenge (see + https://forum.image.sc/t/ome2024-ngff-challenge/97363) url: https://github.com/ome/ome2024-ngff-challenge vcs: true diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index a4d8d24..e1c36c4 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -43,7 +43,7 @@ repos: rev: "v0.4.1" hooks: - id: ruff - args: ["--fix", "--show-fixes", "--exclude=dev{1,2,3}"] + args: ["--fix", "--show-fixes", "--exclude=dev*"] - id: ruff-format # - repo: https://github.com/pre-commit/mirrors-mypy diff --git a/dev1/convert_zarr_v2_to_v3/convert_zarr_v2_to_v3.py b/dev1/convert_zarr_v2_to_v3/convert_zarr_v2_to_v3.py index 54da558..3f8b91f 100644 --- a/dev1/convert_zarr_v2_to_v3/convert_zarr_v2_to_v3.py +++ b/dev1/convert_zarr_v2_to_v3/convert_zarr_v2_to_v3.py @@ -13,8 +13,8 @@ def convert_zarr_v2_to_v3(input_url, output_url, is_root=True): # Uses Zarr v2 for reading, Zarr(ita) v3 for writing # reader - input_root = zarr.open_group(store=input_url) # does not seem to support https URLs - print(f'Processing {input_root.store.path}') + input_root = zarr.open_group(store=input_url) # does not seem to support https URLs + print(f"Processing {input_root.store.path}") if is_root: if input_root: @@ -24,7 +24,7 @@ def convert_zarr_v2_to_v3(input_url, output_url, is_root=True): root = zarrita.Group.create(store=output_url) root.update_attributes(update_omezarr_attributes(input_root.attrs.asdict())) else: - raise FileNotFoundError(f'Error parsing {input_url}') + raise FileNotFoundError(f"Error parsing {input_url}") # writer for label in input_root: @@ -33,29 +33,33 @@ def convert_zarr_v2_to_v3(input_url, output_url, is_root=True): output_path = output_url + content.name if isinstance(content, zarr.Group): # create zarr group - zarrita.Group.create(store=output_path, - attributes=update_omezarr_attributes(content.attrs.asdict())) + zarrita.Group.create( + store=output_path, + attributes=update_omezarr_attributes(content.attrs.asdict()), + ) convert_zarr_v2_to_v3(input_path, output_path, is_root=False) elif isinstance(content, zarr.Array): codecs = [bytes_codec(), blosc_codec(typesize=4)] - output_array = zarrita.Array.create(output_path, - shape=content.shape, - chunk_shape=content.chunks, - dtype=content.dtype, - codecs=codecs, - attributes=update_omezarr_attributes(content.attrs.asdict())) + output_array = zarrita.Array.create( + output_path, + shape=content.shape, + chunk_shape=content.chunks, + dtype=content.dtype, + codecs=codecs, + attributes=update_omezarr_attributes(content.attrs.asdict()), + ) output_array[:] = content else: - print(f'Unsupported content {content}') + print(f"Unsupported content {content}") def convert_ome_zarr_v2_to_v3(input_url, output_url): # Uses Ome-Zarr v2 for reading, Zarr(ita) v3 for writing - location = parse_url(input_url) # supports https URLs* + location = parse_url(input_url) # supports https URLs* if location is None: # * under particular conditions the URL/zarr is not detected (internal .exists() returns False) # caused by OS / PyCharm / version / pytest? - raise FileNotFoundError(f'Error parsing {input_url}') + raise FileNotFoundError(f"Error parsing {input_url}") reader = Reader(location) input_root_path = os.path.normpath(reader.zarr.path) @@ -65,34 +69,42 @@ def convert_ome_zarr_v2_to_v3(input_url, output_url): shutil.rmtree(output_url) for image_node in reader(): - print(f'Processing {image_node}') + print(f"Processing {image_node}") metadata = image_node.metadata - axes = metadata.get('axes', []) - dimension_order = ''.join([axis.get('name') for axis in axes]) - output_path = os.path.normpath(image_node.zarr.path).replace(input_root_path, output_url) + axes = metadata.get("axes", []) + dimension_order = "".join([axis.get("name") for axis in axes]) + output_path = os.path.normpath(image_node.zarr.path).replace( + input_root_path, output_url + ) output_store_path = zarrita.store.make_store_path(output_path) # create zarr group - output_zarr = zarrita.Group.create(store=output_store_path, - attributes=update_omezarr_attributes(image_node.zarr.root_attrs)) + output_zarr = zarrita.Group.create( + store=output_store_path, + attributes=update_omezarr_attributes(image_node.zarr.root_attrs), + ) for level, data in enumerate(image_node.data): codecs = [bytes_codec(), blosc_codec(typesize=4)] # create zarr array - output_array = output_zarr.create_array(str(level), - shape=data.shape, - chunk_shape=data.chunksize, - dtype=data.dtype, - codecs=codecs) - output_array[:] = np.array(data) # set only supports ndarray; TODO: wrap inside (dask) chunking? + output_array = output_zarr.create_array( + str(level), + shape=data.shape, + chunk_shape=data.chunksize, + dtype=data.dtype, + codecs=codecs, + ) + output_array[:] = np.array( + data + ) # set only supports ndarray; TODO: wrap inside (dask) chunking? -if __name__ == '__main__': - #input_url = 'D:/slides/6001240.zarr' - input_url = 'https://uk1s3.embassy.ebi.ac.uk/idr/zarr/v0.4/idr0062A/6001240.zarr' - output_url = 'D:/slides/test/' + os.path.basename(input_url) +if __name__ == "__main__": + # input_url = 'D:/slides/6001240.zarr' + input_url = "https://uk1s3.embassy.ebi.ac.uk/idr/zarr/v0.4/idr0062A/6001240.zarr" + output_url = "D:/slides/test/" + os.path.basename(input_url) - #convert_zarr_v2_to_v3(input_url, output_url) + # convert_zarr_v2_to_v3(input_url, output_url) convert_ome_zarr_v2_to_v3(input_url, output_url) - print('done') + print("done") diff --git a/dev1/convert_zarr_v2_to_v3/environmentv2_3.yml b/dev1/convert_zarr_v2_to_v3/environmentv2_3.yml index 4cf6631..e87932f 100644 --- a/dev1/convert_zarr_v2_to_v3/environmentv2_3.yml +++ b/dev1/convert_zarr_v2_to_v3/environmentv2_3.yml @@ -1,13 +1,13 @@ name: ome2024-ngff-challenge-v2_3 channels: - - ome - - conda-forge - - defaults + - ome + - conda-forge + - defaults dependencies: - - python=3.10 - - numpy - - zarr==2.18 - - ome-zarr==0.9 - - pip - - pip: - - zarrita + - python=3.10 + - numpy + - zarr==2.18 + - ome-zarr==0.9 + - pip + - pip: + - zarrita diff --git a/dev1/convert_zarr_v2_to_v3/util.py b/dev1/convert_zarr_v2_to_v3/util.py index 63290de..a229b5e 100644 --- a/dev1/convert_zarr_v2_to_v3/util.py +++ b/dev1/convert_zarr_v2_to_v3/util.py @@ -1,5 +1,7 @@ def update_omezarr_attributes(attributes): - new_attributes = replace_attributes_value(attributes, search_label='version', new_value='0.5-dev1') + new_attributes = replace_attributes_value( + attributes, search_label="version", new_value="0.5-dev1" + ) return new_attributes @@ -7,8 +9,10 @@ def replace_attributes_value(values, search_label, new_value): if isinstance(values, dict): new_values = {} for label, value in values.items(): - if isinstance(label, str) and not label.startswith('_'): - new_values[label] = replace_attributes_value(value, search_label=search_label, new_value=new_value) + if isinstance(label, str) and not label.startswith("_"): + new_values[label] = replace_attributes_value( + value, search_label=search_label, new_value=new_value + ) else: new_values[label] = value if search_label in values: @@ -16,7 +20,11 @@ def replace_attributes_value(values, search_label, new_value): elif isinstance(values, list): new_values = [] for item in values: - new_values.append(replace_attributes_value(item, search_label=search_label, new_value=new_value)) + new_values.append( + replace_attributes_value( + item, search_label=search_label, new_value=new_value + ) + ) else: new_values = values return new_values diff --git a/dev1/environment.yaml b/dev1/environment.yaml index b860a14..570c684 100644 --- a/dev1/environment.yaml +++ b/dev1/environment.yaml @@ -2,8 +2,8 @@ channels: - conda-forge dependencies: - tensorstore - - 'numpy<2' - - zarr # loads dependencies + - "numpy<2" + - zarr # loads dependencies - pip: - "--editable=git+https://github.com/will-moore/ome-zarr-py.git@zarr_v3_support#egg=ome-zarr" - "--editable=git+https://github.com/zarr-developers/zarr-python.git@v3#egg=zarr" diff --git a/dev1/environment_windows.yml b/dev1/environment_windows.yml index 557dc7c..d338735 100644 --- a/dev1/environment_windows.yml +++ b/dev1/environment_windows.yml @@ -4,7 +4,7 @@ channels: dependencies: - python=3.10 - numpy<2 - - zarr # loads dependencies + - zarr # loads dependencies - pip - pip: - tensorstore diff --git a/dev1/resave.py b/dev1/resave.py index c73e08f..2726966 100755 --- a/dev1/resave.py +++ b/dev1/resave.py @@ -7,6 +7,7 @@ import tensorstore as ts import argparse + parser = argparse.ArgumentParser() parser.add_argument("input_path") parser.add_argument("output_path") @@ -22,32 +23,38 @@ def convert_array(input_path, output_path): - read = ts.open({ - 'driver': 'zarr', - 'kvstore': { - 'driver': 'file', - 'path': input_path, - }, - }).result() + read = ts.open( + { + "driver": "zarr", + "kvstore": { + "driver": "file", + "path": input_path, + }, + } + ).result() shape = read.shape - chunks= read.schema.chunk_layout.read_chunk.shape + chunks = read.schema.chunk_layout.read_chunk.shape - write = ts.open({ - "driver": "zarr3", - "kvstore": { - "driver": "file", - "path": output_path - }, - "metadata": { - "shape": shape, - "chunk_grid": {"name": "regular", "configuration": {"chunk_shape": chunks}}, - "chunk_key_encoding": {"name": "default"}, - "codecs": [{"name": "blosc", "configuration": {"cname": "lz4", "clevel": 5}}], - "data_type": read.dtype, - }, - "create": True, - }).result() + write = ts.open( + { + "driver": "zarr3", + "kvstore": {"driver": "file", "path": output_path}, + "metadata": { + "shape": shape, + "chunk_grid": { + "name": "regular", + "configuration": {"chunk_shape": chunks}, + }, + "chunk_key_encoding": {"name": "default"}, + "codecs": [ + {"name": "blosc", "configuration": {"cname": "lz4", "clevel": 5}} + ], + "data_type": read.dtype, + }, + "create": True, + } + ).result() future = write.write(read) future.result() @@ -76,6 +83,5 @@ def convert_array(input_path, output_path): for ds in multiscales[0]["datasets"]: ds_path = ds["path"] convert_array( - os.path.join(ns.input_path, ds_path), - os.path.join(ns.output_path, ds_path) + os.path.join(ns.input_path, ds_path), os.path.join(ns.output_path, ds_path) ) diff --git a/dev2/.pre-commit-config.yaml b/dev2/.pre-commit-config.yaml index edb82ec..65e3289 100644 --- a/dev2/.pre-commit-config.yaml +++ b/dev2/.pre-commit-config.yaml @@ -1,12 +1,11 @@ --- repos: - -- repo: https://github.com/astral-sh/ruff-pre-commit - # Ruff version. - rev: v0.5.5 - hooks: - # Run the linter. - - id: ruff - args: [ "--fix" ] - # Run the formatter. - - id: ruff-format + - repo: https://github.com/astral-sh/ruff-pre-commit + # Ruff version. + rev: v0.5.5 + hooks: + # Run the linter. + - id: ruff + args: ["--fix"] + # Run the formatter. + - id: ruff-format diff --git a/dev2/environment.yaml b/dev2/environment.yaml index 6ebf891..d6b500e 100644 --- a/dev2/environment.yaml +++ b/dev2/environment.yaml @@ -3,13 +3,13 @@ channels: dependencies: - napari - pyqt - - 'numpy<2' + - "numpy<2" - tensorstore # loads dependencies - - zarr # loads dependencies + - zarr # loads dependencies - tqdm - pip - pip: - "--editable=git+https://github.com/will-moore/ome-zarr-py.git@zarr_v3_support#egg=ome-zarr" - "--editable=git+https://github.com/zarr-developers/zarr-python.git@v3#egg=zarr" - "--editable=git+https://github.com/ome/napari-ome-zarr.git@v3#egg=napari-ome-zarr" - - 'tensorstore>=0.1.63' + - "tensorstore>=0.1.63" diff --git a/dev2/resave.py b/dev2/resave.py index d20cacf..96dab63 100755 --- a/dev2/resave.py +++ b/dev2/resave.py @@ -52,7 +52,7 @@ class TSMetrics: Instances of this class capture the current tensorstore metrics. If an existing instance is passed in on creation, it will be stored - in order to deduct prevoius values from those measured by this instance. + in order to deduct previous values from those measured by this instance. """ CHUNK_CACHE_READS = "/tensorstore/cache/chunk_cache/reads" @@ -217,7 +217,7 @@ def convert_array( future.result() after = TSMetrics(read_config, write_config, before) - print(f"""Reencode (tensorstore) {input_path} to {output_path} + print(f"""Re-encode (tensorstore) {input_path} to {output_path} read: {after.read()} write: {after.written()} time: {after.elapsed()} diff --git a/dev3/2024-07-02/example-metadata/min-specimen-biosample.json b/dev3/2024-07-02/example-metadata/min-specimen-biosample.json index 93a0a08..374d7b7 100644 --- a/dev3/2024-07-02/example-metadata/min-specimen-biosample.json +++ b/dev3/2024-07-02/example-metadata/min-specimen-biosample.json @@ -1,73 +1,61 @@ { - "@context": [ - "https://w3id.org/ro/crate/1.1/context", - { - "organism_classification": "https://schema.org/taxonomicRange", - "BioChemEntity": "https://schema.org/BioChemEntity", - "obo": "http://purl.obolibrary.org/obo/", - "acquisiton_method": { - "@reverse": "https://schema.org/result", - "@type": "@id" - }, - "biological_entity": "https://schema.org/about", - "preparation_method": "https://www.wikidata.org/wiki/Property:P1537" - } - ], - "@graph": [ - { - "@type": "CreativeWork", - "@id": "ro-crate-metadata.json", - "conformsTo": { - "@id": "https://w3id.org/ro/crate/1.1" - }, - "about": { - "@id": "./" - } - }, - { - "@id": "./", - "@type": "Dataset", - "name": "Light microscopy photo of a fly", - "description": "Light microscopy photo of a fruit fly.", - "licence": "https://creativecommons.org/licenses/by/4.0/", - "hasPart": { - "@id": "./dros-mel-image.zarr/" - } - }, - { - "@id": "./dros-mel-image.zarr/", - "@type": "Dataset", - "name": "OME-ZARR files", - "description": "the ome zarr files of the fly.", - "acquisition_method": [ - "_:b0" - ], - "preparation_method": [ - "_:b1" - ], - "biological_entity": [ - "_:b2" - ] - }, - { - "@id": "_:b0", - "@type": [ - "http://purl.obolibrary.org/obo/FBbi_00000243" - ] - }, - { - "@id": "_:b1", - "@type": [ - "obo:OBI_0000272" - ], - "description": "The fruit flies were individually encased in epoxy resin..." - }, - { - "@id": "_:b2", - "@type": [ - "BioChemEntity" - ], - "organism_classification": "NCBI:txid7227" - } - ] - } + "@context": [ + "https://w3id.org/ro/crate/1.1/context", + { + "organism_classification": "https://schema.org/taxonomicRange", + "BioChemEntity": "https://schema.org/BioChemEntity", + "obo": "http://purl.obolibrary.org/obo/", + "acquisiton_method": { + "@reverse": "https://schema.org/result", + "@type": "@id" + }, + "biological_entity": "https://schema.org/about", + "preparation_method": "https://www.wikidata.org/wiki/Property:P1537" + } + ], + "@graph": [ + { + "@type": "CreativeWork", + "@id": "ro-crate-metadata.json", + "conformsTo": { + "@id": "https://w3id.org/ro/crate/1.1" + }, + "about": { + "@id": "./" + } + }, + { + "@id": "./", + "@type": "Dataset", + "name": "Light microscopy photo of a fly", + "description": "Light microscopy photo of a fruit fly.", + "licence": "https://creativecommons.org/licenses/by/4.0/", + "hasPart": { + "@id": "./dros-mel-image.zarr/" + } + }, + { + "@id": "./dros-mel-image.zarr/", + "@type": "Dataset", + "name": "OME-ZARR files", + "description": "the ome zarr files of the fly.", + "acquisition_method": ["_:b0"], + "preparation_method": ["_:b1"], + "biological_entity": ["_:b2"] + }, + { + "@id": "_:b0", + "@type": ["http://purl.obolibrary.org/obo/FBbi_00000243"] + }, + { + "@id": "_:b1", + "@type": ["obo:OBI_0000272"], + "description": "The fruit flies were individually encased in epoxy resin..." + }, + { + "@id": "_:b2", + "@type": ["BioChemEntity"], + "organism_classification": "NCBI:txid7227" + } + ] +} diff --git a/dev3/2024-07-02/example-metadata/minimal.json b/dev3/2024-07-02/example-metadata/minimal.json index 6398e4f..7eb4815 100644 --- a/dev3/2024-07-02/example-metadata/minimal.json +++ b/dev3/2024-07-02/example-metadata/minimal.json @@ -1,62 +1,54 @@ { - "@context": [ - "https://w3id.org/ro/crate/1.1/context", - { - "organism_classification": "https://schema.org/taxonomicRange", - "BioChemEntity": "https://schema.org/BioChemEntity", - "obo": "http://purl.obolibrary.org/obo/", - "acquisiton_method": { - "@reverse": "https://schema.org/result", - "@type": "@id" - }, - "biological_entity": "https://schema.org/about" - } - ], - "@graph": [ - { - "@type": "CreativeWork", - "@id": "ro-crate-metadata.json", - "conformsTo": { - "@id": "https://w3id.org/ro/crate/1.1" - }, - "about": { - "@id": "./" - } - }, - { - "@id": "./", - "@type": "Dataset", - "name": "Light microscopy photo of a fly", - "description": "Light microscopy photo of a fruit fly.", - "licence": "https://creativecommons.org/licenses/by/4.0/", - "hasPart": { - "@id": "./dros-mel-image.zarr/" - } - }, - { - "@id": "./dros-mel-image.zarr/", - "@type": "Dataset", - "name": "OME-ZARR files", - "description": "the ome zarr files of the fly.", - "acquisiton_method": [ - "_:b0" - ], - "preparation_method": [ - "_:b1" - ] - }, - { - "@id": "_:b0", - "@type": [ - "http://purl.obolibrary.org/obo/FBbi_00000243" - ] - }, - { - "@id": "_:b21", - "@type": [ - "BioChemEntity" - ], - "organism_classification": "NCBI:txid7227" - } - ] - } \ No newline at end of file + "@context": [ + "https://w3id.org/ro/crate/1.1/context", + { + "organism_classification": "https://schema.org/taxonomicRange", + "BioChemEntity": "https://schema.org/BioChemEntity", + "obo": "http://purl.obolibrary.org/obo/", + "acquisiton_method": { + "@reverse": "https://schema.org/result", + "@type": "@id" + }, + "biological_entity": "https://schema.org/about" + } + ], + "@graph": [ + { + "@type": "CreativeWork", + "@id": "ro-crate-metadata.json", + "conformsTo": { + "@id": "https://w3id.org/ro/crate/1.1" + }, + "about": { + "@id": "./" + } + }, + { + "@id": "./", + "@type": "Dataset", + "name": "Light microscopy photo of a fly", + "description": "Light microscopy photo of a fruit fly.", + "licence": "https://creativecommons.org/licenses/by/4.0/", + "hasPart": { + "@id": "./dros-mel-image.zarr/" + } + }, + { + "@id": "./dros-mel-image.zarr/", + "@type": "Dataset", + "name": "OME-ZARR files", + "description": "the ome zarr files of the fly.", + "acquisiton_method": ["_:b0"], + "preparation_method": ["_:b1"] + }, + { + "@id": "_:b0", + "@type": ["http://purl.obolibrary.org/obo/FBbi_00000243"] + }, + { + "@id": "_:b21", + "@type": ["BioChemEntity"], + "organism_classification": "NCBI:txid7227" + } + ] +} diff --git a/dev3/2024-07-02/images/graph-min-specimen-biosample.svg b/dev3/2024-07-02/images/graph-min-specimen-biosample.svg index eabe904..723dbb7 100644 --- a/dev3/2024-07-02/images/graph-min-specimen-biosample.svg +++ b/dev3/2024-07-02/images/graph-min-specimen-biosample.svg @@ -1 +1 @@ - \ No newline at end of file + diff --git a/dev3/2024-07-02/images/graph-minimal.svg b/dev3/2024-07-02/images/graph-minimal.svg index 16a9526..402437d 100644 --- a/dev3/2024-07-02/images/graph-minimal.svg +++ b/dev3/2024-07-02/images/graph-minimal.svg @@ -1 +1 @@ - \ No newline at end of file + diff --git a/dev3/2024-07-02/ro-crate-metadata-proposal.md b/dev3/2024-07-02/ro-crate-metadata-proposal.md index cee6606..5958cff 100644 --- a/dev3/2024-07-02/ro-crate-metadata-proposal.md +++ b/dev3/2024-07-02/ro-crate-metadata-proposal.md @@ -1,20 +1,26 @@ -RO-Crate metadata minimal design -===== +# RO-Crate metadata minimal design +RO-Crate provides a mechanism to package data together with metadata. In general +this looks like a directory of files (or sub-directories) with an +ro-crate-metadata.json to describe the files. We want to test mechanisms for +storing extensible metadata alongside the OME-Zarr images. This document +proposes a minimal schema for the ro-crate-metadata.json focused on describing +what was imaged and the imaging technique. -RO-Crate provides a mechanism to package data together with metadata. In general this looks like a directory of files (or sub-directories) with an ro-crate-metadata.json to describe the files. We want to test mechanisms for storing extensible metadata alongside the OME-Zarr images. This document proposes a minimal schema for the ro-crate-metadata.json focused on describing what was imaged and the imaging technique. +**_Relevant links_** -***Relevant links*** +RO crate: https://www.researchobject.org/ro-crate/ json-ld syntax: +https://www.w3.org/TR/json-ld/ json-ld playground: +https://json-ld.org/playground/ -RO crate: https://www.researchobject.org/ro-crate/ -json-ld syntax: https://www.w3.org/TR/json-ld/ -json-ld playground: https://json-ld.org/playground/ +**_Proposal for a minimal metadata structure_** -***Proposal for a minimal metadata structure*** +The json payload below follows the RO-Crate guidance. It contains an object per +'thing that was imaged' (instance of schema:BioChemEntity), linked to its NCBI +taxon\*, an object per 'imaging technique' which has a type of the relevant FBBI +imagining technique. -The json payload below follows the RO-Crate guidance. It contains an object per 'thing that was imaged' (instance of schema:BioChemEntity), linked to its NCBI taxon*, an object per 'imaging technique' which has a type of the relevant FBBI imagining technique. - -*note that the NCBI taxon may also be 'non organismal' for synthetic entities. +\*note that the NCBI taxon may also be 'non organismal' for synthetic entities. ``` { @@ -81,20 +87,23 @@ The json payload below follows the RO-Crate guidance. It contains an object per } ``` -The json-ld can be converted to a very basic RDF graph, but requires a '@base' to be added to the context to provide a root for the relative URIs for the files ( json-ld playground example: https://tinyurl.com/2jxlbtlj). The following image is a basic Entity-Relation style diagram: +The json-ld can be converted to a very basic RDF graph, but requires a '@base' +to be added to the context to provide a root for the relative URIs for the files +( json-ld playground example: https://tinyurl.com/2jxlbtlj). The following image +is a basic Entity-Relation style diagram: +**_Suggested extensions / less than minimal_** - -***Suggested extensions / less than minimal*** - -Following REMBI guidance, one of the key objects that contains data is the 'Specimen' - this contains information about how the Biosample was prepared for imaging. We can therefore extend the metadata model to have a new object to contain information on the preparation protocol (which together with the biosample forms the Specimen that was imaged). - +Following REMBI guidance, one of the key objects that contains data is the +'Specimen' - this contains information about how the Biosample was prepared for +imaging. We can therefore extend the metadata model to have a new object to +contain information on the preparation protocol (which together with the +biosample forms the Specimen that was imaged). - The metadata json file would look like: ``` @@ -173,5 +182,3 @@ The metadata json file would look like: } ``` - - diff --git a/dev3/zarr-crate/README.md b/dev3/zarr-crate/README.md index a400deb..fbf0914 100644 --- a/dev3/zarr-crate/README.md +++ b/dev3/zarr-crate/README.md @@ -1,3 +1,6 @@ -A small python package/library extending rocrate to make creating ro-crate-metadata.json for ome-zarr easier. See example_usage/create_fly_ro_crate_metadata.py for an example script that creates example_usage/ro-crate-metadata.json. +A small python package/library extending rocrate to make creating +ro-crate-metadata.json for ome-zarr easier. See +example_usage/create_fly_ro_crate_metadata.py for an example script that creates +example_usage/ro-crate-metadata.json. -Currently very much a 'work in progress' \ No newline at end of file +Currently very much a 'work in progress' diff --git a/dev3/zarr-crate/example_usage/create_fly_ro_crate_metadata.py b/dev3/zarr-crate/example_usage/create_fly_ro_crate_metadata.py index dd9c515..a74b673 100644 --- a/dev3/zarr-crate/example_usage/create_fly_ro_crate_metadata.py +++ b/dev3/zarr-crate/example_usage/create_fly_ro_crate_metadata.py @@ -15,11 +15,15 @@ }, ) biosample = crate.add( - Biosample(crate, properties={"organism_classification": {"@id": "NCBI:txid7227" }}) + Biosample( + crate, properties={"organism_classification": {"@id": "NCBI:txid7227"}} + ) ) specimen = crate.add(Specimen(crate, biosample)) image_acquisition = crate.add( - ImageAcquistion(crate, specimen, properties={"fbbi_id": {"@id": "obo:FBbi_00000243"}}) + ImageAcquistion( + crate, specimen, properties={"fbbi_id": {"@id": "obo:FBbi_00000243"}} + ) ) zarr_root["resultOf"] = image_acquisition diff --git a/dev3/zarr-crate/example_usage/ro-crate-metadata.json b/dev3/zarr-crate/example_usage/ro-crate-metadata.json index 24708b0..20db469 100644 --- a/dev3/zarr-crate/example_usage/ro-crate-metadata.json +++ b/dev3/zarr-crate/example_usage/ro-crate-metadata.json @@ -63,4 +63,4 @@ } } ] -} \ No newline at end of file +} diff --git a/dev3/zarr-crate/pyproject.toml b/dev3/zarr-crate/pyproject.toml index b683197..0a57406 100644 --- a/dev3/zarr-crate/pyproject.toml +++ b/dev3/zarr-crate/pyproject.toml @@ -18,4 +18,4 @@ pytest-mock = "^3.14.0" [build-system] requires = ["poetry-core"] -build-backend = "poetry.core.masonry.api" \ No newline at end of file +build-backend = "poetry.core.masonry.api" diff --git a/dev3/zarr-crate/zarr_crate/zarr_extension.py b/dev3/zarr-crate/zarr_crate/zarr_extension.py index 4c49d03..0369bd8 100644 --- a/dev3/zarr-crate/zarr_crate/zarr_extension.py +++ b/dev3/zarr-crate/zarr_crate/zarr_extension.py @@ -2,7 +2,6 @@ class ZarrCrate(ROCrate): - def __init__(self, source=None, gen_preview=False, init=False, exclude=None): super().__init__(source, gen_preview, init, exclude) self.metadata.extra_terms = { From 7e9aee20bb821fb6a84791ae248e70c7ea096549 Mon Sep 17 00:00:00 2001 From: Josh Moore Date: Wed, 31 Jul 2024 16:03:27 +0200 Subject: [PATCH 4/8] More fixes for pylint --- pyproject.toml | 13 ++++++ src/ome2024_ngff_challenge/resave.py | 43 ++++++++++--------- .../zarr_crate/rembi_extension.py | 27 ++++++++++++ 3 files changed, 62 insertions(+), 21 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index c0c2813..44644f1 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -30,8 +30,10 @@ classifiers = [ [tool.poetry.dependencies] python = "^3.10" rocrate = "^0.10" +s3fs = ">=2024.6.1" tensorstore = ">=0.1.63" tqdm = "^4.66" +zarr = ">=3.0.0a0" furo = { version = ">=2023.08.17", optional = true } myst_parser = { version = ">=0.13", optional = true } @@ -152,6 +154,7 @@ isort.required-imports = ["from __future__ import annotations"] [tool.pylint] py-version = "3.10" +generated-members = "tensorstore.*" ignore-paths = [".*/_version.py"] reports.output-format = "colorized" similarities.ignore-imports = "yes" @@ -162,4 +165,14 @@ messages_control.disable = [ "missing-module-docstring", "missing-function-docstring", "wrong-import-position", + # Added to get release out + "broad-exception-caught", + "broad-exception-raised", + "consider-using-f-string", + "consider-using-generator", + "logging-fstring-interpolation", + "missing-class-docstring", + "no-member", + "no-value-for-parameter", + "unsupported-assignment-operation", ] diff --git a/src/ome2024_ngff_challenge/resave.py b/src/ome2024_ngff_challenge/resave.py index 76da469..919e424 100755 --- a/src/ome2024_ngff_challenge/resave.py +++ b/src/ome2024_ngff_challenge/resave.py @@ -38,7 +38,7 @@ def guess_shards(shape: list, chunks: list): # TODO: hard-coded to return the full size unless too large if math.prod(shape) < 100_000_000: return shape - raise Exception(f"no shard guess: shape={shape}, chunks={chunks}") + raise ValueError(f"no shard guess: shape={shape}, chunks={chunks}") def csv_int(vstr, sep=",") -> list: @@ -52,7 +52,7 @@ def csv_int(vstr, sep=",") -> list: values.append(v) except ValueError as ve: raise argparse.ArgumentError( - "Invalid value %s, values must be a number" % v0 + message="Invalid value %s, values must be a number" % v0 ) from ve return values @@ -71,6 +71,7 @@ class TextBuffer(Buffer): """ def __init__(self, text): + super().__init__(self) self.text = text self._data = list(text) @@ -168,7 +169,7 @@ def create_configs(ns): def convert_array( - CONFIGS: list, + configs: list, input_path: Path, output_path: Path, output_overwrite: bool, @@ -176,12 +177,12 @@ def convert_array( chunks: list, shards: list, ): - CONFIGS[0]["path"] = str(input_path) - CONFIGS[1]["path"] = str(output_path) + configs[0]["path"] = str(input_path) + configs[1]["path"] = str(output_path) read_config = { "driver": "zarr", - "kvstore": CONFIGS[0], + "kvstore": configs[0], } read = ts.open(read_config).result() @@ -217,7 +218,7 @@ def convert_array( base_config = { "driver": "zarr3", - "kvstore": CONFIGS[1], + "kvstore": configs[1], "metadata": { "shape": read.shape, "chunk_grid": chunk_grid, @@ -261,7 +262,7 @@ def convert_array( def convert_image( - CONFIGS: list, + configs: list, read_root, input_path: str, write_store, @@ -340,7 +341,7 @@ def convert_image( sync(write_store.set(str(filename), text)) else: convert_array( - CONFIGS, + configs, input_path / ds_path, output_path / ds_path, output_overwrite, @@ -381,12 +382,12 @@ def write_rocrate(write_store): def main(ns: argparse.Namespace): - CONFIGS = create_configs(ns) + configs = create_configs(ns) - STORES = [] + stores = [] for config, path, mode in ( - (CONFIGS[0], ns.input_path, "r"), - (CONFIGS[1], ns.output_path, "w"), + (configs[0], ns.input_path, "r"), + (configs[1], ns.output_path, "w"), ): if "bucket" in config: store_class = zarr.store.RemoteStore @@ -403,7 +404,7 @@ def main(ns: argparse.Namespace): # If more than one element, then we are configuring # the output path. If this is local, then delete. - if STORES and ns.output_path.exists(): + if stores and ns.output_path.exists(): # TODO: This should really be an option on zarr-python # as with tensorstore. if ns.output_overwrite: @@ -415,23 +416,23 @@ def main(ns: argparse.Namespace): LOGGER.error(f"{ns.output_path} exists. Exiting") sys.exit(1) - STORES.append(store) + stores.append(store) # Needs zarr_format=2 or we get ValueError("store mode does not support writing") - read_root = zarr.open_group(store=STORES[0], zarr_format=2) + read_root = zarr.open_group(store=stores[0], zarr_format=2) if ns.output_write_details: write_root = None write_store = None else: - write_store = STORES[1] + write_store = stores[1] write_root = zarr.Group.create(write_store) write_rocrate(write_store) # image... if read_root.attrs.get("multiscales"): convert_image( - CONFIGS, + configs, read_root, ns.input_path, write_store, # TODO: review @@ -464,7 +465,7 @@ def main(ns: argparse.Namespace): wells, position=0, desc="i", leave=False, colour="green", ncols=80 ): well_path = well["path"] - well_v2 = zarr.open_group(store=STORES[0], path=well_path, zarr_format=2) + well_v2 = zarr.open_group(store=stores[0], path=well_path, zarr_format=2) if write_root is not None: # otherwise dry-run well_group = write_root.create_group(well_path) @@ -483,7 +484,7 @@ def main(ns: argparse.Namespace): out_path = ns.output_path / img_path input_path = ns.input_path / img_path img_v2 = zarr.open_group( - store=STORES[0], path=str(img_path), zarr_format=2 + store=stores[0], path=str(img_path), zarr_format=2 ) if write_root is not None: # otherwise dry-run @@ -492,7 +493,7 @@ def main(ns: argparse.Namespace): image_group = None convert_image( - CONFIGS, + configs, img_v2, input_path, write_store, # TODO: review diff --git a/src/ome2024_ngff_challenge/zarr_crate/rembi_extension.py b/src/ome2024_ngff_challenge/zarr_crate/rembi_extension.py index 96012d4..025d399 100644 --- a/src/ome2024_ngff_challenge/zarr_crate/rembi_extension.py +++ b/src/ome2024_ngff_challenge/zarr_crate/rembi_extension.py @@ -25,6 +25,15 @@ def __init__(self, crate, identifier=None, properties=None): super().__init__(crate, identifier, biosample_properties) + def popitem(self): + raise NotImplementedError + + def clear(self): + raise NotImplementedError + + def update(self): + raise NotImplementedError + class Specimen(ContextEntity): def __init__(self, crate, biosample, identifier=None, properties=None): @@ -50,6 +59,15 @@ def __init__(self, crate, biosample, identifier=None, properties=None): self["specimen"] = biosample + def popitem(self): + raise NotImplementedError + + def clear(self): + raise NotImplementedError + + def update(self): + raise NotImplementedError + class ImageAcquistion(ContextEntity): def __init__(self, crate, specimen, identifier=None, properties=None): @@ -76,3 +94,12 @@ def __init__(self, crate, specimen, identifier=None, properties=None): super().__init__(crate, identifier, image_acquisition_properties) self["specimen"] = specimen + + def popitem(self): + raise NotImplementedError + + def clear(self): + raise NotImplementedError + + def update(self): + raise NotImplementedError From 2289928e9e6f0b17366b80b9b3597a1dfa7aa101 Mon Sep 17 00:00:00 2001 From: Josh Moore Date: Wed, 31 Jul 2024 16:06:25 +0200 Subject: [PATCH 5/8] Bump action to 3.10 --- .github/workflows/ci.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 965e85a..09cc785 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -42,7 +42,7 @@ jobs: strategy: fail-fast: false matrix: - python-version: ["3.8", "3.12"] + python-version: ["3.10", "3.12"] runs-on: [ubuntu-latest, macos-latest, windows-latest] include: From 93c5489c8ececf609fcd0f88f6c8ffbc65635efb Mon Sep 17 00:00:00 2001 From: Josh Moore Date: Wed, 31 Jul 2024 16:12:26 +0200 Subject: [PATCH 6/8] Revert buffer.__init__ call --- pyproject.toml | 1 + src/ome2024_ngff_challenge/resave.py | 1 - 2 files changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 44644f1..48c18ca 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -174,5 +174,6 @@ messages_control.disable = [ "missing-class-docstring", "no-member", "no-value-for-parameter", + "super-init-not-called", "unsupported-assignment-operation", ] diff --git a/src/ome2024_ngff_challenge/resave.py b/src/ome2024_ngff_challenge/resave.py index 919e424..aecfc44 100755 --- a/src/ome2024_ngff_challenge/resave.py +++ b/src/ome2024_ngff_challenge/resave.py @@ -71,7 +71,6 @@ class TextBuffer(Buffer): """ def __init__(self, text): - super().__init__(self) self.text = text self._data = list(text) From f0610c3b408163645f0de770f2f054b4a1c82acc Mon Sep 17 00:00:00 2001 From: Josh Moore Date: Wed, 31 Jul 2024 16:16:23 +0200 Subject: [PATCH 7/8] Drop macos and windows tests --- .github/workflows/ci.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 09cc785..ba993a5 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -43,7 +43,7 @@ jobs: fail-fast: false matrix: python-version: ["3.10", "3.12"] - runs-on: [ubuntu-latest, macos-latest, windows-latest] + runs-on: [ubuntu-latest] include: - python-version: pypy-3.10 From d59787cf8c14af6ec2d3e2716dee659f8cfb89c9 Mon Sep 17 00:00:00 2001 From: Josh Moore Date: Wed, 31 Jul 2024 16:21:01 +0200 Subject: [PATCH 8/8] Disable pypy build --- .github/workflows/ci.yml | 4 ---- 1 file changed, 4 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index ba993a5..1680525 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -45,10 +45,6 @@ jobs: python-version: ["3.10", "3.12"] runs-on: [ubuntu-latest] - include: - - python-version: pypy-3.10 - runs-on: ubuntu-latest - steps: - uses: actions/checkout@v4 with: