icbi-lab · grst · Jan 14, 2022 · Jan 14, 2022 · Jan 14, 2022
diff --git a/docs/api.rst b/docs/api.rst
@@ -22,6 +22,7 @@ Input/Output: `io`
    :toctree: ./generated
 
    genomic_position_from_gtf
+   read_scevan
 
 
 Preprocessing: `pp`

diff --git a/docs/references.bib b/docs/references.bib
@@ -48,3 +48,14 @@ @article{Wu2021
   title     = {Single-cell profiling of tumor heterogeneity and the microenvironment in advanced non-small cell lung cancer},
   journal   = {Nature Communications}
 }
+
+@article{DeFalco2021,
+  doi       = {10.1101/2021.11.20.469390},
+  url       = {https://doi.org/10.1101/2021.11.20.469390},
+  year      = {2021},
+  month     = nov,
+  publisher = {Cold Spring Harbor Laboratory},
+  author    = {Antonio De Falco and Francesca Caruso and Xiao-Dong Su and Antonio Iavarone and Michele Ceccarelli},
+  title     = {A fast variational algorithm to detect the clonal copy number substructure of tumors from single-cell data},
+  journal   = {bioRxiv}
+}
diff --git a/infercnvpy/io/__init__.py b/infercnvpy/io/__init__.py
@@ -1 +1,2 @@
 from ._genepos import genomic_position_from_gtf
+from ._scevan import read_scevan
diff --git a/infercnvpy/io/_scevan.py b/infercnvpy/io/_scevan.py
@@ -0,0 +1,126 @@
+"""Read in result files from scevan"""
+
+from typing import Optional, Union
+from anndata import AnnData
+from pathlib import Path
+import pyreadr
+from scanpy import logging
+import pandas as pd
+import numpy as np
+
+
+def _get_chr_pos_from_array(chr_pos_array):
+    """Get starting position for each chromosome.
+
+    Assumes that chr_pos_array is an array with chromosomes ascendingly (like
+    in the scevan `count_mtx_annot` table).
+    """
+    chr_pos = {}
+    for i, sn in enumerate(chr_pos_array):
+        chr_name = f"chr{int(sn)}"
+        if chr_name not in chr_pos:
+            chr_pos[chr_name] = i
+    return chr_pos
+
+
+def read_scevan(
+    adata: AnnData,
+    scevan_res_dir: Union[str, Path],
+    scevan_res_table: Optional[Union[str, Path]] = None,
+    *,
+    subclones: bool = True,
+    inplace: bool = True,
+    subset: bool = True,
+    key_added: str = "scevan",
+) -> Optional[AnnData]:
+    """
+    Load results from SCEVAN :cite:`DeFalco2021` for downstream analysis with infercnvpy.
+
+    Requires that the cell barcodes used for SCEVAN and `adata.obs_names` match,
+    but the order is irrelevant.
+
+    Parameters
+    ----------
+    adata
+        adata object to which the SCEVAN results shall be added
+    scevan_res_table
+        The results of `SCEVAN::pipelineCNA` saved as CSV file. Will add
+        the columns `{key_added}_class`, `{key_added}_confident_normal` and, if SCEVAN was
+        ran with subclone calling, `{key_added}_subclone` to `adata.obs`. This parameter can
+        be omitted if you only want to load the CNV matrix.
+    scevan_res_dir
+        The output directory created by SCEVAN. Must only contain results for a single
+        sample. Will read the files `*_CNAmtx.RData`, `*_CNAmtxSubclones.RData` (if available),
+        and `*_count_mtx_annot.RData`.
+    subclones
+        Load the separate subclone segmentation, if available.
+    subset
+        If `True` (the default), subset anndata to the cells that have not been filtered
+        out by the SCEVAN analysis. Otherwise the CNV matrix may contain `nan`-values and
+        downstream analysis such as PCA will not work.
+    key_added
+        Used as prefix for the columns added to `adata.obs` and will add the CNV matrix
+        as `X_{key_added}` to `adata.obsm`, and chromosome indices to `adata.uns[key_added]`.
+    inplace
+        If `True`, modify anndata inplace. Otherwise, return a copy.
+
+    Returns
+    -------
+    Depending on the value of `inplace` returns a modified AnnData object or returns
+    `None` and modifies adata inplace.
+    """
+    scevan_res_dir = Path(scevan_res_dir)
+    scevan_res_file = list(scevan_res_dir.glob("*_CNAmtx.RData"))
+    scevan_subclones_file = list(scevan_res_dir.glob("*_CNAmtxSubclones.RData"))
+    scevan_anno_file = list(scevan_res_dir.glob("*_count_mtx_annot.RData"))
+
+    if (
+        len(scevan_res_file) != 1
+        or len(scevan_subclones_file) > 1
+        or len(scevan_anno_file) != 1
+    ):
+        raise ValueError(
+            "There must be exactely one CNA_mtx and count_mtx_annot file and at most one "
+            "CNAmtxSubclones file in the result directory!"
+        )
+
+    # read data
+    if scevan_res_table is not None:
+        tumor_normal_call = pd.read_csv(scevan_res_table, index_col=0)
+    else:
+        tumor_normal_call = None
+        logging.warning(
+            "No `scevan_res_table` specified. Will not add tumor/normal classification."
+        )
+    scevan_res = pyreadr.read_r(scevan_res_file[0])["CNA_mtx_relat"].T
+    scevan_anno = pyreadr.read_r(scevan_anno_file[0])["count_mtx_annot"]
+    scevan_subclone_res = None
+    if subclones and len(scevan_subclones_file):
+        scevan_subclone_res = pyreadr.read_r(scevan_subclones_file[0])["results.com"].T
+
+    if not inplace:
+        adata = adata.copy()
+
+    # add obs annotation
+    if tumor_normal_call is not None:
+        adata.obs[f"{key_added}_class"] = tumor_normal_call["class"]
+        adata.obs[f"{key_added}_confident_normal"] = tumor_normal_call[
+            "confidentNormal"
+        ]
+        if "subclone" in tumor_normal_call.columns:
+            adata.obs[f"{key_added}_subclone"] = tumor_normal_call["subclone"].apply(
+                lambda x: f"{int(x)}" if not pd.isnull(x) else np.nan
+            )
+
+    if subset:
+        adata._inplace_subset_obs(scevan_res.index.values)
+
+    adata.obsm[f"X_{key_added}"] = scevan_res.reindex(adata.obs_names)
+    if scevan_subclone_res is not None:
+        adata.obsm[f"X_{key_added}"].loc[
+            scevan_subclone_res.index, :
+        ] = scevan_subclone_res
+    adata.uns[key_added] = {"chr_pos": _get_chr_pos_from_array(scevan_anno["seqnames"])}
+
+    if not inplace:
+        return adata
diff --git a/infercnvpy/pl/_chromosome_heatmap.py b/infercnvpy/pl/_chromosome_heatmap.py
@@ -65,7 +65,8 @@ def chromosome_heatmap(
     chr_pos = list(chr_pos_dict.values())
 
     # center color map at 0
-    norm = TwoSlopeNorm(0, vmin=np.min(tmp_adata.X), vmax=np.max(tmp_adata.X))
+    tmp_data = tmp_adata.X.data if issparse(tmp_adata.X) else tmp_adata.X
+    norm = TwoSlopeNorm(0, vmin=np.nanmin(tmp_data), vmax=np.nanmax(tmp_data))
 
     # add chromosome annotations
     var_group_positions = list(zip(chr_pos, chr_pos[1:] + [tmp_adata.shape[1]]))

diff --git a/pyproject.toml b/pyproject.toml
@@ -33,6 +33,7 @@ requires = [
     'gtfparse>=1.2.1',
     'pycairo>=1.20; sys_platform == "win32"',
     'leidenalg',
+    'pyreadr'
 ]
 
 [tool.flit.metadata.requires-extra]
Original file line number	Diff line number	Diff line change
		@@ -1 +1,2 @@
		from ._genepos import genomic_position_from_gtf
		from ._scevan import read_scevan