diff --git a/CHANGELOG.md b/CHANGELOG.md index 763a41d7..d9dc17d5 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,9 @@ +## [1.x.x] - 2024-xx-xx + +### Fix +- Support `baysor>=0.7.0` (#125, @lguerard). + - NB: For Snakemake, please remove the `new_component_*` arguments from the Baysor config. + ## [1.1.5] - 2024-09-17 ### Fix diff --git a/docs/tutorials/api_usage.ipynb b/docs/tutorials/api_usage.ipynb index 2dd0fc1c..fc985f74 100644 --- a/docs/tutorials/api_usage.ipynb +++ b/docs/tutorials/api_usage.ipynb @@ -78,8 +78,10 @@ "outputs": [], "source": [ "image_key = \"image\"\n", - "points_key = \"transcripts\" # (ignore this for multiplex imaging)\n", - "gene_column = \"genes\" # (optional) column of sdata[points_key] containing the gene names" + "points_key = \"transcripts\" # (ignore this for multiplex imaging)\n", + "gene_column = (\n", + " \"genes\" # (optional) column of sdata[points_key] containing the gene names\n", + ")" ] }, { @@ -117,7 +119,9 @@ } ], "source": [ - "patches = sopa.segmentation.Patches2D(sdata, image_key, patch_width=1200, patch_overlap=50)\n", + "patches = sopa.segmentation.Patches2D(\n", + " sdata, image_key, patch_width=1200, patch_overlap=50\n", + ")\n", "patches.write();" ] }, @@ -163,8 +167,12 @@ "source": [ "channels = [\"DAPI\"]\n", "\n", - "method = sopa.segmentation.methods.cellpose_patch(diameter=35, channels=channels, flow_threshold=2, cellprob_threshold=-6)\n", - "segmentation = sopa.segmentation.StainingSegmentation(sdata, method, channels, min_area=2500)\n", + "method = sopa.segmentation.methods.cellpose_patch(\n", + " diameter=35, channels=channels, flow_threshold=2, cellprob_threshold=-6\n", + ")\n", + "segmentation = sopa.segmentation.StainingSegmentation(\n", + " sdata, method, channels, min_area=2500\n", + ")\n", "\n", "# The cellpose boundaries will be temporary saved here. You can choose a different path\n", "cellpose_temp_dir = \"tuto.zarr/.sopa_cache/cellpose\"" @@ -231,7 +239,7 @@ ], "source": [ "# parallelize this for loop yourself (or use the Snakemake pipeline)\n", - "for patch_index in range(len(sdata['sopa_patches'])):\n", + "for patch_index in range(len(sdata[\"sopa_patches\"])):\n", " segmentation.write_patch_cells(cellpose_temp_dir, patch_index)" ] }, @@ -269,7 +277,7 @@ "cells = sopa.segmentation.StainingSegmentation.read_patches_cells(cellpose_temp_dir)\n", "cells = sopa.segmentation.shapes.solve_conflicts(cells)\n", "\n", - "shapes_key = \"cellpose_boundaries\" # name of the key given to the cells in sdata.shapes\n", + "shapes_key = \"cellpose_boundaries\" # name of the key given to the cells in sdata.shapes\n", "\n", "sopa.segmentation.StainingSegmentation.add_shapes(sdata, cells, image_key, shapes_key)" ] @@ -287,7 +295,7 @@ "metadata": {}, "outputs": [], "source": [ - "shapes_key = \"baysor_boundaries\" # the name that we will give to the baysor \"shapes\"" + "shapes_key = \"baysor_boundaries\" # the name that we will give to the baysor \"shapes\"" ] }, { @@ -317,7 +325,7 @@ " \"gene\": \"genes\",\n", " \"min_molecules_per_gene\": 0,\n", " \"min_molecules_per_segment\": 3,\n", - " \"confidence_nn_id\": 6\n", + " \"confidence_nn_id\": 6,\n", " },\n", " \"segmentation\": {\n", " \"scale\": 3, # Important parameter: typical cell diameter, in microns (see our configs)\n", @@ -329,9 +337,7 @@ " \"n_cells_init\": 0,\n", " \"nuclei_genes\": \"\",\n", " \"cyto_genes\": \"\",\n", - " \"new_component_weight\": 0.2,\n", - " \"new_component_fraction\": 0.3\n", - " }\n", + " },\n", "}" ] }, @@ -373,7 +379,9 @@ "# The cellpose boundaries will be temporary saved here. You can choose a different path\n", "baysor_temp_dir = \"tuto.zarr/.sopa_cache/baysor\"\n", "\n", - "patches = sopa.segmentation.Patches2D(sdata, points_key, patch_width=3000, patch_overlap=50)\n", + "patches = sopa.segmentation.Patches2D(\n", + " sdata, points_key, patch_width=3000, patch_overlap=50\n", + ")\n", "valid_indices = patches.patchify_transcripts(baysor_temp_dir, config=config)" ] }, @@ -409,7 +417,7 @@ "for patch_index in valid_indices:\n", " command = f\"\"\"\n", " cd {baysor_temp_dir}/{patch_index}\n", - " {baysor_executable_path} run --save-polygons GeoJSON -c config.toml transcripts.csv\n", + " {baysor_executable_path} run --polygon-format=GeometryCollection -c config.toml transcripts.csv\n", " \"\"\"\n", " subprocess.run(command, shell=True)" ] @@ -501,7 +509,9 @@ } ], "source": [ - "aggregator = sopa.segmentation.Aggregator(sdata, image_key=image_key, shapes_key=shapes_key)\n", + "aggregator = sopa.segmentation.Aggregator(\n", + " sdata, image_key=image_key, shapes_key=shapes_key\n", + ")\n", "\n", "aggregator.compute_table(gene_column=gene_column, average_intensities=True)" ] @@ -611,11 +621,7 @@ "source": [ "from sopa.annotation import higher_z_score\n", "\n", - "marker_cell_dict = {\n", - " \"CK\": \"Tumoral cell\",\n", - " \"CD20\": \"B cell\",\n", - " \"CD3\": \"T cell\"\n", - "}\n", + "marker_cell_dict = {\"CK\": \"Tumoral cell\", \"CD20\": \"B cell\", \"CD3\": \"T cell\"}\n", "\n", "higher_z_score(sdata.tables[\"table\"], marker_cell_dict)" ] @@ -698,7 +704,9 @@ } ], "source": [ - "sopa.io.write(\"tuto.explorer\", sdata, image_key, points_key=points_key, gene_column=gene_column)" + "sopa.io.write(\n", + " \"tuto.explorer\", sdata, image_key, points_key=points_key, gene_column=gene_column\n", + ")" ] }, { @@ -767,11 +775,11 @@ } ], "source": [ - "sdata\\\n", - " .pl.render_points(size=0.01, color=\"r\", alpha=0.5)\\\n", - " .pl.render_images()\\\n", - " .pl.render_shapes(shapes_key, outline=True, fill_alpha=0, outline_color=\"w\")\\\n", - " .pl.show(\"global\")" + "sdata.pl.render_points(\n", + " size=0.01, color=\"r\", alpha=0.5\n", + ").pl.render_images().pl.render_shapes(\n", + " shapes_key, outline=True, fill_alpha=0, outline_color=\"w\"\n", + ").pl.show(\"global\")" ] }, { diff --git a/docs/tutorials/cli_usage.md b/docs/tutorials/cli_usage.md index 6ff9b80c..a80b0df1 100644 --- a/docs/tutorials/cli_usage.md +++ b/docs/tutorials/cli_usage.md @@ -165,8 +165,6 @@ iters = 500 n_cells_init = 0 nuclei_genes = "" cyto_genes = "" -new_component_weight = 0.2 -new_component_fraction = 0.3 ``` Then, we generate the bounding boxes of the patches on which Baysor will be run. Here, the patches have a width and height of 1200 microns and an overlap of 50 microns. We advise bigger sizes for real datasets (see our default parameters in one of our [config files](https://github.com/gustaveroussy/sopa/tree/master/workflow/config)). On the toy dataset, this will generate **4** patches. diff --git a/sopa/io/reader/macsima.py b/sopa/io/reader/macsima.py index e13eac2f..748b350f 100644 --- a/sopa/io/reader/macsima.py +++ b/sopa/io/reader/macsima.py @@ -1,11 +1,12 @@ from __future__ import annotations import logging +import re from pathlib import Path from spatialdata import SpatialData -from .utils import _general_tif_directory_reader +from .utils import _deduplicate_names, _general_tif_directory_reader log = logging.getLogger(__name__) @@ -23,4 +24,22 @@ def macsima(path: Path, **kwargs: int) -> SpatialData: Returns: A `SpatialData` object with a 2D-image of shape `(C, Y, X)` """ + files = list(Path(path).glob("*.tif")) + + if any("A-" in file.name for file in files): # non-ome.tif format + return _general_tif_directory_reader(path, files_to_channels=_get_channel_names_macsima, **kwargs) + return _general_tif_directory_reader(path, **kwargs) + + +def _parse_name_macsima(file): + match = re.search(r"_A-(.*?)_C-", file.name) + if match: + antibody = match.group(1) + else: + antibody = re.search(r"_A-(.*?)\.tif", file.name).group(1) + return antibody + + +def _get_channel_names_macsima(files): + return _deduplicate_names([_parse_name_macsima(file) for file in files]) diff --git a/sopa/segmentation/transcripts.py b/sopa/segmentation/transcripts.py index 31cf85e7..8a309096 100644 --- a/sopa/segmentation/transcripts.py +++ b/sopa/segmentation/transcripts.py @@ -97,6 +97,7 @@ def _read_one_segmented_patch( directory: str, min_area: float = 0, min_vertices: int = 4 ) -> tuple[list[Polygon], AnnData]: directory: Path = Path(directory) + id_as_string, polygon_file = _find_polygon_file(directory) loom_file = directory / "segmentation_counts.loom" if loom_file.exists(): @@ -106,16 +107,19 @@ def _read_one_segmented_patch( adata.obs.rename(columns={"area": SopaKeys.ORIGINAL_AREA_OBS}, inplace=True) - cells_num = pd.Series(adata.obs["CellID"].astype(int), index=adata.obs_names) + cells_num = pd.Series(adata.obs_names if id_as_string else adata.obs["CellID"].astype(int), index=adata.obs_names) del adata.obs["CellID"] - with open(directory / "segmentation_polygons.json") as f: + with open(polygon_file) as f: polygons_dict = json.load(f) polygons_dict = {c["cell"]: c for c in polygons_dict["geometries"]} cells_num = cells_num[cells_num.map(lambda num: len(polygons_dict[num]["coordinates"][0]) >= min_vertices)] - gdf = gpd.GeoDataFrame(index=cells_num.index, geometry=[shape(polygons_dict[cell_num]) for cell_num in cells_num]) + gdf = gpd.GeoDataFrame( + index=cells_num.index, + geometry=[shape(polygons_dict[cell_num]) for cell_num in cells_num], + ) gdf.geometry = gdf.geometry.map(lambda cell: shapes._ensure_polygon(cell)) gdf = gdf[~gdf.geometry.isna()] @@ -129,6 +133,15 @@ def _read_one_segmented_patch( return gdf.geometry.values, adata[gdf.index].copy() +def _find_polygon_file(directory: Path) -> tuple[bool, Path]: + old_baysor_path = directory / "segmentation_polygons.json" + if old_baysor_path.exists(): + return False, old_baysor_path + new_baysor_path = directory / "segmentation_polygons_2d.json" + assert new_baysor_path.exists(), f"Could not find the segmentation polygons file in {directory}" + return True, new_baysor_path + + def _read_all_segmented_patches( temp_dir: str, min_area: float = 0, diff --git a/workflow/Snakefile b/workflow/Snakefile index f087bf22..74e2a5d7 100644 --- a/workflow/Snakefile +++ b/workflow/Snakefile @@ -113,7 +113,6 @@ rule patch_segmentation_baysor: patches_file = paths.smk_patches_file_baysor, baysor_patch = paths.smk_baysor_temp_dir / "{index}", output: - paths.smk_baysor_temp_dir / "{index}" / "segmentation_polygons.json", paths.smk_baysor_temp_dir / "{index}" / "segmentation_counts.loom", params: args_baysor_prior_seg = args.baysor_prior_seg, @@ -126,7 +125,13 @@ rule patch_segmentation_baysor: fi cd {input.baysor_patch} - {config[executables][baysor]} run --save-polygons GeoJSON -c config.toml transcripts.csv {params.args_baysor_prior_seg} + + help_output=$({config[executables][baysor]} run --help 2>&1) # check if the polygon-format option is available + if [[ $help_output == *"polygon-format"* ]]; then + {config[executables][baysor]} run --polygon-format GeometryCollection -c config.toml transcripts.csv {params.args_baysor_prior_seg} + else + {config[executables][baysor]} run --save-polygons GeoJSON -c config.toml transcripts.csv {params.args_baysor_prior_seg} + fi """ rule patch_segmentation_comseg: diff --git a/workflow/config/cosmx/baysor.yaml b/workflow/config/cosmx/baysor.yaml index a3d0fbc0..16731c09 100644 --- a/workflow/config/cosmx/baysor.yaml +++ b/workflow/config/cosmx/baysor.yaml @@ -34,8 +34,6 @@ segmentation: n_cells_init: 0 nuclei_genes: "" cyto_genes: "" - new_component_weight: 0.2 - new_component_fraction: 0.3 aggregate: average_intensities: true diff --git a/workflow/config/cosmx/cellpose_baysor.yaml b/workflow/config/cosmx/cellpose_baysor.yaml index 7705d439..e0accae4 100644 --- a/workflow/config/cosmx/cellpose_baysor.yaml +++ b/workflow/config/cosmx/cellpose_baysor.yaml @@ -41,8 +41,6 @@ segmentation: n_cells_init: 0 nuclei_genes: "" cyto_genes: "" - new_component_weight: 0.2 - new_component_fraction: 0.3 aggregate: average_intensities: true diff --git a/workflow/config/example_commented.yaml b/workflow/config/example_commented.yaml index f33a1206..fcc2199b 100644 --- a/workflow/config/example_commented.yaml +++ b/workflow/config/example_commented.yaml @@ -63,8 +63,6 @@ segmentation: n_cells_init: 0 nuclei_genes: "" cyto_genes: "" - new_component_weight: 0.2 - new_component_fraction: 0.3 aggregate: diff --git a/workflow/config/merscope/baysor_cellpose.yaml b/workflow/config/merscope/baysor_cellpose.yaml index adbf7137..c987ff24 100644 --- a/workflow/config/merscope/baysor_cellpose.yaml +++ b/workflow/config/merscope/baysor_cellpose.yaml @@ -42,8 +42,6 @@ segmentation: n_cells_init: 0 nuclei_genes: "" cyto_genes: "" - new_component_weight: 0.2 - new_component_fraction: 0.3 aggregate: average_intensities: true diff --git a/workflow/config/merscope/baysor_vizgen.yaml b/workflow/config/merscope/baysor_vizgen.yaml index 10920027..53bc15b2 100644 --- a/workflow/config/merscope/baysor_vizgen.yaml +++ b/workflow/config/merscope/baysor_vizgen.yaml @@ -37,8 +37,6 @@ segmentation: n_cells_init: 0 nuclei_genes: "" cyto_genes: "" - new_component_weight: 0.2 - new_component_fraction: 0.3 aggregate: average_intensities: true diff --git a/workflow/config/toy/uniform_baysor.yaml b/workflow/config/toy/uniform_baysor.yaml index b628470d..ef656ab6 100644 --- a/workflow/config/toy/uniform_baysor.yaml +++ b/workflow/config/toy/uniform_baysor.yaml @@ -34,8 +34,6 @@ segmentation: n_cells_init: 0 nuclei_genes: "" cyto_genes: "" - new_component_weight: 0.2 - new_component_fraction: 0.3 aggregate: average_intensities: true diff --git a/workflow/config/toy/uniform_baysor_overlaps.yaml b/workflow/config/toy/uniform_baysor_overlaps.yaml index 3b08951f..f3097950 100644 --- a/workflow/config/toy/uniform_baysor_overlaps.yaml +++ b/workflow/config/toy/uniform_baysor_overlaps.yaml @@ -34,8 +34,6 @@ segmentation: n_cells_init: 0 nuclei_genes: "" cyto_genes: "" - new_component_weight: 0.2 - new_component_fraction: 0.3 aggregate: average_intensities: true diff --git a/workflow/config/toy/uniform_baysor_vizgen.yaml b/workflow/config/toy/uniform_baysor_vizgen.yaml index a6b9db07..7aac2fbc 100644 --- a/workflow/config/toy/uniform_baysor_vizgen.yaml +++ b/workflow/config/toy/uniform_baysor_vizgen.yaml @@ -38,8 +38,6 @@ segmentation: n_cells_init: 0 nuclei_genes: "" cyto_genes: "" - new_component_weight: 0.2 - new_component_fraction: 0.3 aggregate: average_intensities: true diff --git a/workflow/config/toy/uniform_cellpose_baysor.yaml b/workflow/config/toy/uniform_cellpose_baysor.yaml index bd3479c5..588496ad 100644 --- a/workflow/config/toy/uniform_cellpose_baysor.yaml +++ b/workflow/config/toy/uniform_cellpose_baysor.yaml @@ -41,8 +41,6 @@ segmentation: n_cells_init: 0 nuclei_genes: "" cyto_genes: "" - new_component_weight: 0.2 - new_component_fraction: 0.3 aggregate: average_intensities: true diff --git a/workflow/config/xenium/baysor.yaml b/workflow/config/xenium/baysor.yaml index e12b466e..1401b62f 100644 --- a/workflow/config/xenium/baysor.yaml +++ b/workflow/config/xenium/baysor.yaml @@ -32,8 +32,6 @@ segmentation: n_cells_init: 0 nuclei_genes: "" cyto_genes: "" - new_component_weight: 0.2 - new_component_fraction: 0.3 aggregate: average_intensities: true diff --git a/workflow/config/xenium/baysor_multimodal.yaml b/workflow/config/xenium/baysor_multimodal.yaml index 1e5eb53b..af4306f7 100644 --- a/workflow/config/xenium/baysor_multimodal.yaml +++ b/workflow/config/xenium/baysor_multimodal.yaml @@ -36,8 +36,6 @@ segmentation: n_cells_init: 0 nuclei_genes: "" cyto_genes: "" - new_component_weight: 0.2 - new_component_fraction: 0.3 aggregate: average_intensities: true diff --git a/workflow/config/xenium/cellpose_baysor.yaml b/workflow/config/xenium/cellpose_baysor.yaml index 0b5a8590..79df1286 100644 --- a/workflow/config/xenium/cellpose_baysor.yaml +++ b/workflow/config/xenium/cellpose_baysor.yaml @@ -41,8 +41,6 @@ segmentation: n_cells_init: 0 nuclei_genes: "" cyto_genes: "" - new_component_weight: 0.2 - new_component_fraction: 0.3 aggregate: average_intensities: true diff --git a/workflow/utils.py b/workflow/utils.py index dee924e4..0d81eea6 100644 --- a/workflow/utils.py +++ b/workflow/utils.py @@ -98,7 +98,7 @@ def cells_paths(self, file_content: str, name, dirs: bool = False): return [str(self.smk_cellpose_temp_dir / f"{i}.parquet") for i in range(int(file_content))] if name == "baysor": indices = map(int, file_content.split()) - BAYSOR_FILES = ["segmentation_polygons.json", "segmentation_counts.loom"] + BAYSOR_FILES = ["segmentation_counts.loom"] if dirs: return [str(self.smk_baysor_temp_dir / str(i)) for i in indices]