Skip to content

Commit

Permalink
refactor: finemapping method enum (opentargets#897)
Browse files Browse the repository at this point in the history
Co-authored-by: Yakov <[email protected]>
  • Loading branch information
d0choa and addramir authored Nov 8, 2024
1 parent 93de448 commit b5b71f0
Show file tree
Hide file tree
Showing 7 changed files with 72 additions and 19 deletions.
4 changes: 4 additions & 0 deletions docs/python_api/datasets/study_locus.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,10 @@ title: Study Locus

---

::: gentropy.dataset.study_locus.FinemappingMethod

---

::: gentropy.dataset.study_locus.StudyLocusQualityCheck

---
Expand Down
6 changes: 4 additions & 2 deletions src/gentropy/colocalisation.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
from pyspark.sql.functions import col

from gentropy.common.session import Session
from gentropy.dataset.study_locus import StudyLocus
from gentropy.dataset.study_locus import FinemappingMethod, StudyLocus
from gentropy.method.colocalisation import Coloc, ColocalisationMethodInterface


Expand Down Expand Up @@ -56,7 +56,9 @@ def __init__(
)
if colocalisation_method == Coloc.METHOD_NAME.lower():
credible_set = credible_set.filter(
col("finemappingMethod").isin("SuSie", "SuSiE-inf")
col("finemappingMethod").isin(
FinemappingMethod.SUSIE.value, FinemappingMethod.SUSIE_INF.value
)
)

# Transform
Expand Down
53 changes: 45 additions & 8 deletions src/gentropy/dataset/study_locus.py
Original file line number Diff line number Diff line change
Expand Up @@ -139,6 +139,20 @@ class CredibleInterval(Enum):
IS99 = "is99CredibleSet"


class FinemappingMethod(Enum):
"""Finemapping method enum.
Attributes:
PICS (str): PICS
SUSIE (str): SuSiE method
SUSIE_INF (str): SuSiE-inf method implemented in `gentropy`
"""

PICS = "pics"
SUSIE = "SuSie"
SUSIE_INF = "SuSiE-inf"


@dataclass
class StudyLocus(Dataset):
"""Study-Locus dataset.
Expand Down Expand Up @@ -1056,7 +1070,7 @@ def qc_redundant_top_hits_from_PICS(self: StudyLocus) -> StudyLocus:
StudyLocus: Updated study locus with redundant top hits flagged.
"""
studies_with_pics_sumstats = (
self.df.filter(f.col("finemappingMethod") == "pics")
self.df.filter(f.col("finemappingMethod") == FinemappingMethod.PICS.value)
# Returns True if the study contains any PICS associations from summary statistics
.withColumn(
"hasPicsSumstats",
Expand Down Expand Up @@ -1095,7 +1109,11 @@ def qc_explained_by_SuSiE(self: StudyLocus) -> StudyLocus:
"""
# unique study-regions covered by SuSie credible sets
susie_study_regions = (
self.filter(f.col("finemappingMethod") == "SuSiE-inf")
self.filter(
f.col("finemappingMethod").isin(
FinemappingMethod.SUSIE.value, FinemappingMethod.SUSIE_INF.value
)
)
.df.select(
"studyId",
"chromosome",
Expand All @@ -1108,7 +1126,11 @@ def qc_explained_by_SuSiE(self: StudyLocus) -> StudyLocus:

# non SuSiE credible sets (studyLocusId) overlapping in any variant with SuSiE locus
redundant_study_locus = (
self.filter(f.col("finemappingMethod") != "SuSiE-inf")
self.filter(
~f.col("finemappingMethod").isin(
FinemappingMethod.SUSIE.value, FinemappingMethod.SUSIE_INF.value
)
)
.df.withColumn("l", f.explode("locus"))
.select(
"studyLocusId",
Expand Down Expand Up @@ -1141,7 +1163,12 @@ def qc_explained_by_SuSiE(self: StudyLocus) -> StudyLocus:
# credible set in SuSiE overlapping region
f.col("inSuSiE")
# credible set not based on SuSiE
& (f.col("finemappingMethod") != "SuSiE-inf"),
& (
~f.col("finemappingMethod").isin(
FinemappingMethod.SUSIE.value,
FinemappingMethod.SUSIE_INF.value,
)
),
StudyLocusQualityCheck.EXPLAINED_BY_SUSIE,
),
)
Expand Down Expand Up @@ -1268,7 +1295,12 @@ def assign_confidence(self: StudyLocus) -> StudyLocus:
df = self.df.withColumn(
"confidence",
f.when(
(f.col("finemappingMethod").isin(["SuSiE-inf", "SuSie"]))
(
f.col("finemappingMethod").isin(
FinemappingMethod.SUSIE.value,
FinemappingMethod.SUSIE_INF.value,
)
)
& (
~f.array_contains(
f.col("qualityControls"),
Expand All @@ -1278,7 +1310,12 @@ def assign_confidence(self: StudyLocus) -> StudyLocus:
CredibleSetConfidenceClasses.FINEMAPPED_IN_SAMPLE_LD.value,
)
.when(
(f.col("finemappingMethod").isin(["SuSiE-inf", "SuSie"]))
(
f.col("finemappingMethod").isin(
FinemappingMethod.SUSIE.value,
FinemappingMethod.SUSIE_INF.value,
)
)
& (
f.array_contains(
f.col("qualityControls"),
Expand All @@ -1288,7 +1325,7 @@ def assign_confidence(self: StudyLocus) -> StudyLocus:
CredibleSetConfidenceClasses.FINEMAPPED_OUT_OF_SAMPLE_LD.value,
)
.when(
(f.col("finemappingMethod") == "pics")
(f.col("finemappingMethod") == FinemappingMethod.PICS.value)
& (
~f.array_contains(
f.col("qualityControls"), StudyLocusQualityCheck.TOP_HIT.value
Expand All @@ -1297,7 +1334,7 @@ def assign_confidence(self: StudyLocus) -> StudyLocus:
CredibleSetConfidenceClasses.PICSED_SUMMARY_STATS.value,
)
.when(
(f.col("finemappingMethod") == "pics")
(f.col("finemappingMethod") == FinemappingMethod.PICS.value)
& (
f.array_contains(
f.col("qualityControls"), StudyLocusQualityCheck.TOP_HIT.value
Expand Down
4 changes: 2 additions & 2 deletions src/gentropy/datasource/eqtl_catalogue/finemapping.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@

from gentropy.common.session import Session
from gentropy.common.utils import parse_pvalue
from gentropy.dataset.study_locus import StudyLocus
from gentropy.dataset.study_locus import FinemappingMethod, StudyLocus
from gentropy.datasource.eqtl_catalogue.study_index import EqtlCatalogueStudyIndex

if TYPE_CHECKING:
Expand Down Expand Up @@ -166,7 +166,7 @@ def parse_susie_results(
f.col("se").alias("standardError"),
f.col("credibleSetIndex"),
f.col("logBF"),
f.lit("SuSie").alias("finemappingMethod"),
f.lit(FinemappingMethod.SUSIE.value).alias("finemappingMethod"),
# Study metadata
f.col("molecular_trait_id").alias("traitFromSource"),
f.col("gene_id").alias("geneId"),
Expand Down
4 changes: 2 additions & 2 deletions src/gentropy/datasource/finngen/finemapping.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@

from gentropy.common.spark_helpers import get_top_ranked_in_window
from gentropy.common.utils import parse_pvalue
from gentropy.dataset.study_locus import StudyLocus
from gentropy.dataset.study_locus import FinemappingMethod, StudyLocus


@dataclass
Expand Down Expand Up @@ -319,7 +319,7 @@ def from_finngen_susie_finemapping(
# Add standard error, and allele frequency information.
f.col("se").cast("double").alias("standardError"),
f.col("maf").cast("float").alias("effectAlleleFrequencyFromSource"),
f.lit("SuSie").cast("string").alias("finemappingMethod"),
f.lit(FinemappingMethod.SUSIE.value).alias("finemappingMethod"),
*[
f.col(f"alpha{i}").cast(t.DoubleType()).alias(f"alpha_{i}")
for i in range(1, 11)
Expand Down
12 changes: 9 additions & 3 deletions src/gentropy/method/pics.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,11 @@
import pyspark.sql.types as t
from scipy.stats import norm

from gentropy.dataset.study_locus import StudyLocus, StudyLocusQualityCheck
from gentropy.dataset.study_locus import (
FinemappingMethod,
StudyLocus,
StudyLocusQualityCheck,
)

if TYPE_CHECKING:
from pyspark.sql import Row
Expand Down Expand Up @@ -213,9 +217,11 @@ def finemap(
"""
# Finemapping method is an optional column:
finemapping_method_expression = (
f.lit("pics")
f.lit(FinemappingMethod.PICS.value)
if "finemappingMethod" not in associations.df.columns
else f.coalesce(f.col("finemappingMethod"), f.lit("pics"))
else f.coalesce(
f.col("finemappingMethod"), f.lit(FinemappingMethod.PICS.value)
)
)

# Flagging expression for loci that do not qualify for PICS:
Expand Down
8 changes: 6 additions & 2 deletions src/gentropy/susie_finemapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,11 @@
order_array_of_structs_by_field,
)
from gentropy.dataset.study_index import StudyIndex
from gentropy.dataset.study_locus import StudyLocus, StudyLocusQualityCheck
from gentropy.dataset.study_locus import (
FinemappingMethod,
StudyLocus,
StudyLocusQualityCheck,
)
from gentropy.method.carma import CARMA
from gentropy.method.ld import LDAnnotator
from gentropy.method.ld_matrix_interface import LDMatrixInterface
Expand Down Expand Up @@ -290,7 +294,7 @@ def susie_inf_to_studylocus( # noqa: C901
"region": f.lit(region),
"credibleSetIndex": f.lit(counter),
"credibleSetlog10BF": f.lit(cs_lbf_value * 0.4342944819),
"finemappingMethod": f.lit("SuSiE-inf"),
"finemappingMethod": f.lit(FinemappingMethod.SUSIE_INF.value),
}
)
.withColumn(
Expand Down

0 comments on commit b5b71f0

Please sign in to comment.