diff --git a/docs/tutorials/api_usage.ipynb b/docs/tutorials/api_usage.ipynb index 2dd0fc1..fc985f7 100644 --- a/docs/tutorials/api_usage.ipynb +++ b/docs/tutorials/api_usage.ipynb @@ -78,8 +78,10 @@ "outputs": [], "source": [ "image_key = \"image\"\n", - "points_key = \"transcripts\" # (ignore this for multiplex imaging)\n", - "gene_column = \"genes\" # (optional) column of sdata[points_key] containing the gene names" + "points_key = \"transcripts\" # (ignore this for multiplex imaging)\n", + "gene_column = (\n", + " \"genes\" # (optional) column of sdata[points_key] containing the gene names\n", + ")" ] }, { @@ -117,7 +119,9 @@ } ], "source": [ - "patches = sopa.segmentation.Patches2D(sdata, image_key, patch_width=1200, patch_overlap=50)\n", + "patches = sopa.segmentation.Patches2D(\n", + " sdata, image_key, patch_width=1200, patch_overlap=50\n", + ")\n", "patches.write();" ] }, @@ -163,8 +167,12 @@ "source": [ "channels = [\"DAPI\"]\n", "\n", - "method = sopa.segmentation.methods.cellpose_patch(diameter=35, channels=channels, flow_threshold=2, cellprob_threshold=-6)\n", - "segmentation = sopa.segmentation.StainingSegmentation(sdata, method, channels, min_area=2500)\n", + "method = sopa.segmentation.methods.cellpose_patch(\n", + " diameter=35, channels=channels, flow_threshold=2, cellprob_threshold=-6\n", + ")\n", + "segmentation = sopa.segmentation.StainingSegmentation(\n", + " sdata, method, channels, min_area=2500\n", + ")\n", "\n", "# The cellpose boundaries will be temporary saved here. You can choose a different path\n", "cellpose_temp_dir = \"tuto.zarr/.sopa_cache/cellpose\"" @@ -231,7 +239,7 @@ ], "source": [ "# parallelize this for loop yourself (or use the Snakemake pipeline)\n", - "for patch_index in range(len(sdata['sopa_patches'])):\n", + "for patch_index in range(len(sdata[\"sopa_patches\"])):\n", " segmentation.write_patch_cells(cellpose_temp_dir, patch_index)" ] }, @@ -269,7 +277,7 @@ "cells = sopa.segmentation.StainingSegmentation.read_patches_cells(cellpose_temp_dir)\n", "cells = sopa.segmentation.shapes.solve_conflicts(cells)\n", "\n", - "shapes_key = \"cellpose_boundaries\" # name of the key given to the cells in sdata.shapes\n", + "shapes_key = \"cellpose_boundaries\" # name of the key given to the cells in sdata.shapes\n", "\n", "sopa.segmentation.StainingSegmentation.add_shapes(sdata, cells, image_key, shapes_key)" ] @@ -287,7 +295,7 @@ "metadata": {}, "outputs": [], "source": [ - "shapes_key = \"baysor_boundaries\" # the name that we will give to the baysor \"shapes\"" + "shapes_key = \"baysor_boundaries\" # the name that we will give to the baysor \"shapes\"" ] }, { @@ -317,7 +325,7 @@ " \"gene\": \"genes\",\n", " \"min_molecules_per_gene\": 0,\n", " \"min_molecules_per_segment\": 3,\n", - " \"confidence_nn_id\": 6\n", + " \"confidence_nn_id\": 6,\n", " },\n", " \"segmentation\": {\n", " \"scale\": 3, # Important parameter: typical cell diameter, in microns (see our configs)\n", @@ -329,9 +337,7 @@ " \"n_cells_init\": 0,\n", " \"nuclei_genes\": \"\",\n", " \"cyto_genes\": \"\",\n", - " \"new_component_weight\": 0.2,\n", - " \"new_component_fraction\": 0.3\n", - " }\n", + " },\n", "}" ] }, @@ -373,7 +379,9 @@ "# The cellpose boundaries will be temporary saved here. You can choose a different path\n", "baysor_temp_dir = \"tuto.zarr/.sopa_cache/baysor\"\n", "\n", - "patches = sopa.segmentation.Patches2D(sdata, points_key, patch_width=3000, patch_overlap=50)\n", + "patches = sopa.segmentation.Patches2D(\n", + " sdata, points_key, patch_width=3000, patch_overlap=50\n", + ")\n", "valid_indices = patches.patchify_transcripts(baysor_temp_dir, config=config)" ] }, @@ -409,7 +417,7 @@ "for patch_index in valid_indices:\n", " command = f\"\"\"\n", " cd {baysor_temp_dir}/{patch_index}\n", - " {baysor_executable_path} run --save-polygons GeoJSON -c config.toml transcripts.csv\n", + " {baysor_executable_path} run --polygon-format=GeometryCollection -c config.toml transcripts.csv\n", " \"\"\"\n", " subprocess.run(command, shell=True)" ] @@ -501,7 +509,9 @@ } ], "source": [ - "aggregator = sopa.segmentation.Aggregator(sdata, image_key=image_key, shapes_key=shapes_key)\n", + "aggregator = sopa.segmentation.Aggregator(\n", + " sdata, image_key=image_key, shapes_key=shapes_key\n", + ")\n", "\n", "aggregator.compute_table(gene_column=gene_column, average_intensities=True)" ] @@ -611,11 +621,7 @@ "source": [ "from sopa.annotation import higher_z_score\n", "\n", - "marker_cell_dict = {\n", - " \"CK\": \"Tumoral cell\",\n", - " \"CD20\": \"B cell\",\n", - " \"CD3\": \"T cell\"\n", - "}\n", + "marker_cell_dict = {\"CK\": \"Tumoral cell\", \"CD20\": \"B cell\", \"CD3\": \"T cell\"}\n", "\n", "higher_z_score(sdata.tables[\"table\"], marker_cell_dict)" ] @@ -698,7 +704,9 @@ } ], "source": [ - "sopa.io.write(\"tuto.explorer\", sdata, image_key, points_key=points_key, gene_column=gene_column)" + "sopa.io.write(\n", + " \"tuto.explorer\", sdata, image_key, points_key=points_key, gene_column=gene_column\n", + ")" ] }, { @@ -767,11 +775,11 @@ } ], "source": [ - "sdata\\\n", - " .pl.render_points(size=0.01, color=\"r\", alpha=0.5)\\\n", - " .pl.render_images()\\\n", - " .pl.render_shapes(shapes_key, outline=True, fill_alpha=0, outline_color=\"w\")\\\n", - " .pl.show(\"global\")" + "sdata.pl.render_points(\n", + " size=0.01, color=\"r\", alpha=0.5\n", + ").pl.render_images().pl.render_shapes(\n", + " shapes_key, outline=True, fill_alpha=0, outline_color=\"w\"\n", + ").pl.show(\"global\")" ] }, { diff --git a/sopa/segmentation/transcripts.py b/sopa/segmentation/transcripts.py index 31cf85e..8a30909 100644 --- a/sopa/segmentation/transcripts.py +++ b/sopa/segmentation/transcripts.py @@ -97,6 +97,7 @@ def _read_one_segmented_patch( directory: str, min_area: float = 0, min_vertices: int = 4 ) -> tuple[list[Polygon], AnnData]: directory: Path = Path(directory) + id_as_string, polygon_file = _find_polygon_file(directory) loom_file = directory / "segmentation_counts.loom" if loom_file.exists(): @@ -106,16 +107,19 @@ def _read_one_segmented_patch( adata.obs.rename(columns={"area": SopaKeys.ORIGINAL_AREA_OBS}, inplace=True) - cells_num = pd.Series(adata.obs["CellID"].astype(int), index=adata.obs_names) + cells_num = pd.Series(adata.obs_names if id_as_string else adata.obs["CellID"].astype(int), index=adata.obs_names) del adata.obs["CellID"] - with open(directory / "segmentation_polygons.json") as f: + with open(polygon_file) as f: polygons_dict = json.load(f) polygons_dict = {c["cell"]: c for c in polygons_dict["geometries"]} cells_num = cells_num[cells_num.map(lambda num: len(polygons_dict[num]["coordinates"][0]) >= min_vertices)] - gdf = gpd.GeoDataFrame(index=cells_num.index, geometry=[shape(polygons_dict[cell_num]) for cell_num in cells_num]) + gdf = gpd.GeoDataFrame( + index=cells_num.index, + geometry=[shape(polygons_dict[cell_num]) for cell_num in cells_num], + ) gdf.geometry = gdf.geometry.map(lambda cell: shapes._ensure_polygon(cell)) gdf = gdf[~gdf.geometry.isna()] @@ -129,6 +133,15 @@ def _read_one_segmented_patch( return gdf.geometry.values, adata[gdf.index].copy() +def _find_polygon_file(directory: Path) -> tuple[bool, Path]: + old_baysor_path = directory / "segmentation_polygons.json" + if old_baysor_path.exists(): + return False, old_baysor_path + new_baysor_path = directory / "segmentation_polygons_2d.json" + assert new_baysor_path.exists(), f"Could not find the segmentation polygons file in {directory}" + return True, new_baysor_path + + def _read_all_segmented_patches( temp_dir: str, min_area: float = 0,