Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

scevan loader #32

Merged
merged 2 commits into from
Jan 14, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions docs/api.rst
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ Input/Output: `io`
:toctree: ./generated

genomic_position_from_gtf
read_scevan


Preprocessing: `pp`
Expand Down
11 changes: 11 additions & 0 deletions docs/references.bib
Original file line number Diff line number Diff line change
Expand Up @@ -48,3 +48,14 @@ @article{Wu2021
title = {Single-cell profiling of tumor heterogeneity and the microenvironment in advanced non-small cell lung cancer},
journal = {Nature Communications}
}

@article{DeFalco2021,
doi = {10.1101/2021.11.20.469390},
url = {https://doi.org/10.1101/2021.11.20.469390},
year = {2021},
month = nov,
publisher = {Cold Spring Harbor Laboratory},
author = {Antonio De Falco and Francesca Caruso and Xiao-Dong Su and Antonio Iavarone and Michele Ceccarelli},
title = {A fast variational algorithm to detect the clonal copy number substructure of tumors from single-cell data},
journal = {bioRxiv}
}
1 change: 1 addition & 0 deletions infercnvpy/io/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1,2 @@
from ._genepos import genomic_position_from_gtf
from ._scevan import read_scevan
126 changes: 126 additions & 0 deletions infercnvpy/io/_scevan.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,126 @@
"""Read in result files from scevan"""

from typing import Optional, Union
from anndata import AnnData
from pathlib import Path
import pyreadr
from scanpy import logging
import pandas as pd
import numpy as np


def _get_chr_pos_from_array(chr_pos_array):
"""Get starting position for each chromosome.

Assumes that chr_pos_array is an array with chromosomes ascendingly (like
in the scevan `count_mtx_annot` table).
"""
chr_pos = {}
for i, sn in enumerate(chr_pos_array):
chr_name = f"chr{int(sn)}"
if chr_name not in chr_pos:
chr_pos[chr_name] = i
return chr_pos


def read_scevan(
adata: AnnData,
scevan_res_dir: Union[str, Path],
scevan_res_table: Optional[Union[str, Path]] = None,
*,
subclones: bool = True,
inplace: bool = True,
subset: bool = True,
key_added: str = "scevan",
) -> Optional[AnnData]:
"""
Load results from SCEVAN :cite:`DeFalco2021` for downstream analysis with infercnvpy.

Requires that the cell barcodes used for SCEVAN and `adata.obs_names` match,
but the order is irrelevant.

Parameters
----------
adata
adata object to which the SCEVAN results shall be added
scevan_res_table
The results of `SCEVAN::pipelineCNA` saved as CSV file. Will add
the columns `{key_added}_class`, `{key_added}_confident_normal` and, if SCEVAN was
ran with subclone calling, `{key_added}_subclone` to `adata.obs`. This parameter can
be omitted if you only want to load the CNV matrix.
scevan_res_dir
The output directory created by SCEVAN. Must only contain results for a single
sample. Will read the files `*_CNAmtx.RData`, `*_CNAmtxSubclones.RData` (if available),
and `*_count_mtx_annot.RData`.
subclones
Load the separate subclone segmentation, if available.
subset
If `True` (the default), subset anndata to the cells that have not been filtered
out by the SCEVAN analysis. Otherwise the CNV matrix may contain `nan`-values and
downstream analysis such as PCA will not work.
key_added
Used as prefix for the columns added to `adata.obs` and will add the CNV matrix
as `X_{key_added}` to `adata.obsm`, and chromosome indices to `adata.uns[key_added]`.
inplace
If `True`, modify anndata inplace. Otherwise, return a copy.

Returns
-------
Depending on the value of `inplace` returns a modified AnnData object or returns
`None` and modifies adata inplace.
"""
scevan_res_dir = Path(scevan_res_dir)
scevan_res_file = list(scevan_res_dir.glob("*_CNAmtx.RData"))
scevan_subclones_file = list(scevan_res_dir.glob("*_CNAmtxSubclones.RData"))
scevan_anno_file = list(scevan_res_dir.glob("*_count_mtx_annot.RData"))

if (
len(scevan_res_file) != 1
or len(scevan_subclones_file) > 1
or len(scevan_anno_file) != 1
):
raise ValueError(
"There must be exactely one CNA_mtx and count_mtx_annot file and at most one "
"CNAmtxSubclones file in the result directory!"
)

# read data
if scevan_res_table is not None:
tumor_normal_call = pd.read_csv(scevan_res_table, index_col=0)
else:
tumor_normal_call = None
logging.warning(
"No `scevan_res_table` specified. Will not add tumor/normal classification."
)
scevan_res = pyreadr.read_r(scevan_res_file[0])["CNA_mtx_relat"].T
scevan_anno = pyreadr.read_r(scevan_anno_file[0])["count_mtx_annot"]
scevan_subclone_res = None
if subclones and len(scevan_subclones_file):
scevan_subclone_res = pyreadr.read_r(scevan_subclones_file[0])["results.com"].T

if not inplace:
adata = adata.copy()

# add obs annotation
if tumor_normal_call is not None:
adata.obs[f"{key_added}_class"] = tumor_normal_call["class"]
adata.obs[f"{key_added}_confident_normal"] = tumor_normal_call[
"confidentNormal"
]
if "subclone" in tumor_normal_call.columns:
adata.obs[f"{key_added}_subclone"] = tumor_normal_call["subclone"].apply(
lambda x: f"{int(x)}" if not pd.isnull(x) else np.nan
)

if subset:
adata._inplace_subset_obs(scevan_res.index.values)

adata.obsm[f"X_{key_added}"] = scevan_res.reindex(adata.obs_names)
if scevan_subclone_res is not None:
adata.obsm[f"X_{key_added}"].loc[
scevan_subclone_res.index, :
] = scevan_subclone_res
adata.uns[key_added] = {"chr_pos": _get_chr_pos_from_array(scevan_anno["seqnames"])}

if not inplace:
return adata
3 changes: 2 additions & 1 deletion infercnvpy/pl/_chromosome_heatmap.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,8 @@ def chromosome_heatmap(
chr_pos = list(chr_pos_dict.values())

# center color map at 0
norm = TwoSlopeNorm(0, vmin=np.min(tmp_adata.X), vmax=np.max(tmp_adata.X))
tmp_data = tmp_adata.X.data if issparse(tmp_adata.X) else tmp_adata.X
norm = TwoSlopeNorm(0, vmin=np.nanmin(tmp_data), vmax=np.nanmax(tmp_data))

# add chromosome annotations
var_group_positions = list(zip(chr_pos, chr_pos[1:] + [tmp_adata.shape[1]]))
Expand Down
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ requires = [
'gtfparse>=1.2.1',
'pycairo>=1.20; sys_platform == "win32"',
'leidenalg',
'pyreadr'
]

[tool.flit.metadata.requires-extra]
Expand Down