From b5b71f0a288163845e26d9ba9c085120c3a9b6ca Mon Sep 17 00:00:00 2001 From: David Ochoa Date: Fri, 8 Nov 2024 12:41:54 +0000 Subject: [PATCH] refactor: finemapping method enum (#897) Co-authored-by: Yakov --- docs/python_api/datasets/study_locus.md | 4 ++ src/gentropy/colocalisation.py | 6 ++- src/gentropy/dataset/study_locus.py | 53 ++++++++++++++++--- .../datasource/eqtl_catalogue/finemapping.py | 4 +- .../datasource/finngen/finemapping.py | 4 +- src/gentropy/method/pics.py | 12 +++-- src/gentropy/susie_finemapper.py | 8 ++- 7 files changed, 72 insertions(+), 19 deletions(-) diff --git a/docs/python_api/datasets/study_locus.md b/docs/python_api/datasets/study_locus.md index 6896db167..700e39944 100644 --- a/docs/python_api/datasets/study_locus.md +++ b/docs/python_api/datasets/study_locus.md @@ -6,6 +6,10 @@ title: Study Locus --- +::: gentropy.dataset.study_locus.FinemappingMethod + +--- + ::: gentropy.dataset.study_locus.StudyLocusQualityCheck --- diff --git a/src/gentropy/colocalisation.py b/src/gentropy/colocalisation.py index a45a9a6a1..9682a8ed9 100644 --- a/src/gentropy/colocalisation.py +++ b/src/gentropy/colocalisation.py @@ -8,7 +8,7 @@ from pyspark.sql.functions import col from gentropy.common.session import Session -from gentropy.dataset.study_locus import StudyLocus +from gentropy.dataset.study_locus import FinemappingMethod, StudyLocus from gentropy.method.colocalisation import Coloc, ColocalisationMethodInterface @@ -56,7 +56,9 @@ def __init__( ) if colocalisation_method == Coloc.METHOD_NAME.lower(): credible_set = credible_set.filter( - col("finemappingMethod").isin("SuSie", "SuSiE-inf") + col("finemappingMethod").isin( + FinemappingMethod.SUSIE.value, FinemappingMethod.SUSIE_INF.value + ) ) # Transform diff --git a/src/gentropy/dataset/study_locus.py b/src/gentropy/dataset/study_locus.py index 908c093b6..1a2aa3697 100644 --- a/src/gentropy/dataset/study_locus.py +++ b/src/gentropy/dataset/study_locus.py @@ -139,6 +139,20 @@ class CredibleInterval(Enum): IS99 = "is99CredibleSet" +class FinemappingMethod(Enum): + """Finemapping method enum. + + Attributes: + PICS (str): PICS + SUSIE (str): SuSiE method + SUSIE_INF (str): SuSiE-inf method implemented in `gentropy` + """ + + PICS = "pics" + SUSIE = "SuSie" + SUSIE_INF = "SuSiE-inf" + + @dataclass class StudyLocus(Dataset): """Study-Locus dataset. @@ -1056,7 +1070,7 @@ def qc_redundant_top_hits_from_PICS(self: StudyLocus) -> StudyLocus: StudyLocus: Updated study locus with redundant top hits flagged. """ studies_with_pics_sumstats = ( - self.df.filter(f.col("finemappingMethod") == "pics") + self.df.filter(f.col("finemappingMethod") == FinemappingMethod.PICS.value) # Returns True if the study contains any PICS associations from summary statistics .withColumn( "hasPicsSumstats", @@ -1095,7 +1109,11 @@ def qc_explained_by_SuSiE(self: StudyLocus) -> StudyLocus: """ # unique study-regions covered by SuSie credible sets susie_study_regions = ( - self.filter(f.col("finemappingMethod") == "SuSiE-inf") + self.filter( + f.col("finemappingMethod").isin( + FinemappingMethod.SUSIE.value, FinemappingMethod.SUSIE_INF.value + ) + ) .df.select( "studyId", "chromosome", @@ -1108,7 +1126,11 @@ def qc_explained_by_SuSiE(self: StudyLocus) -> StudyLocus: # non SuSiE credible sets (studyLocusId) overlapping in any variant with SuSiE locus redundant_study_locus = ( - self.filter(f.col("finemappingMethod") != "SuSiE-inf") + self.filter( + ~f.col("finemappingMethod").isin( + FinemappingMethod.SUSIE.value, FinemappingMethod.SUSIE_INF.value + ) + ) .df.withColumn("l", f.explode("locus")) .select( "studyLocusId", @@ -1141,7 +1163,12 @@ def qc_explained_by_SuSiE(self: StudyLocus) -> StudyLocus: # credible set in SuSiE overlapping region f.col("inSuSiE") # credible set not based on SuSiE - & (f.col("finemappingMethod") != "SuSiE-inf"), + & ( + ~f.col("finemappingMethod").isin( + FinemappingMethod.SUSIE.value, + FinemappingMethod.SUSIE_INF.value, + ) + ), StudyLocusQualityCheck.EXPLAINED_BY_SUSIE, ), ) @@ -1268,7 +1295,12 @@ def assign_confidence(self: StudyLocus) -> StudyLocus: df = self.df.withColumn( "confidence", f.when( - (f.col("finemappingMethod").isin(["SuSiE-inf", "SuSie"])) + ( + f.col("finemappingMethod").isin( + FinemappingMethod.SUSIE.value, + FinemappingMethod.SUSIE_INF.value, + ) + ) & ( ~f.array_contains( f.col("qualityControls"), @@ -1278,7 +1310,12 @@ def assign_confidence(self: StudyLocus) -> StudyLocus: CredibleSetConfidenceClasses.FINEMAPPED_IN_SAMPLE_LD.value, ) .when( - (f.col("finemappingMethod").isin(["SuSiE-inf", "SuSie"])) + ( + f.col("finemappingMethod").isin( + FinemappingMethod.SUSIE.value, + FinemappingMethod.SUSIE_INF.value, + ) + ) & ( f.array_contains( f.col("qualityControls"), @@ -1288,7 +1325,7 @@ def assign_confidence(self: StudyLocus) -> StudyLocus: CredibleSetConfidenceClasses.FINEMAPPED_OUT_OF_SAMPLE_LD.value, ) .when( - (f.col("finemappingMethod") == "pics") + (f.col("finemappingMethod") == FinemappingMethod.PICS.value) & ( ~f.array_contains( f.col("qualityControls"), StudyLocusQualityCheck.TOP_HIT.value @@ -1297,7 +1334,7 @@ def assign_confidence(self: StudyLocus) -> StudyLocus: CredibleSetConfidenceClasses.PICSED_SUMMARY_STATS.value, ) .when( - (f.col("finemappingMethod") == "pics") + (f.col("finemappingMethod") == FinemappingMethod.PICS.value) & ( f.array_contains( f.col("qualityControls"), StudyLocusQualityCheck.TOP_HIT.value diff --git a/src/gentropy/datasource/eqtl_catalogue/finemapping.py b/src/gentropy/datasource/eqtl_catalogue/finemapping.py index ea4264fdd..0db240350 100644 --- a/src/gentropy/datasource/eqtl_catalogue/finemapping.py +++ b/src/gentropy/datasource/eqtl_catalogue/finemapping.py @@ -17,7 +17,7 @@ from gentropy.common.session import Session from gentropy.common.utils import parse_pvalue -from gentropy.dataset.study_locus import StudyLocus +from gentropy.dataset.study_locus import FinemappingMethod, StudyLocus from gentropy.datasource.eqtl_catalogue.study_index import EqtlCatalogueStudyIndex if TYPE_CHECKING: @@ -166,7 +166,7 @@ def parse_susie_results( f.col("se").alias("standardError"), f.col("credibleSetIndex"), f.col("logBF"), - f.lit("SuSie").alias("finemappingMethod"), + f.lit(FinemappingMethod.SUSIE.value).alias("finemappingMethod"), # Study metadata f.col("molecular_trait_id").alias("traitFromSource"), f.col("gene_id").alias("geneId"), diff --git a/src/gentropy/datasource/finngen/finemapping.py b/src/gentropy/datasource/finngen/finemapping.py index 723d918bf..e0f39689d 100644 --- a/src/gentropy/datasource/finngen/finemapping.py +++ b/src/gentropy/datasource/finngen/finemapping.py @@ -13,7 +13,7 @@ from gentropy.common.spark_helpers import get_top_ranked_in_window from gentropy.common.utils import parse_pvalue -from gentropy.dataset.study_locus import StudyLocus +from gentropy.dataset.study_locus import FinemappingMethod, StudyLocus @dataclass @@ -319,7 +319,7 @@ def from_finngen_susie_finemapping( # Add standard error, and allele frequency information. f.col("se").cast("double").alias("standardError"), f.col("maf").cast("float").alias("effectAlleleFrequencyFromSource"), - f.lit("SuSie").cast("string").alias("finemappingMethod"), + f.lit(FinemappingMethod.SUSIE.value).alias("finemappingMethod"), *[ f.col(f"alpha{i}").cast(t.DoubleType()).alias(f"alpha_{i}") for i in range(1, 11) diff --git a/src/gentropy/method/pics.py b/src/gentropy/method/pics.py index 918850527..96d0902c3 100644 --- a/src/gentropy/method/pics.py +++ b/src/gentropy/method/pics.py @@ -8,7 +8,11 @@ import pyspark.sql.types as t from scipy.stats import norm -from gentropy.dataset.study_locus import StudyLocus, StudyLocusQualityCheck +from gentropy.dataset.study_locus import ( + FinemappingMethod, + StudyLocus, + StudyLocusQualityCheck, +) if TYPE_CHECKING: from pyspark.sql import Row @@ -213,9 +217,11 @@ def finemap( """ # Finemapping method is an optional column: finemapping_method_expression = ( - f.lit("pics") + f.lit(FinemappingMethod.PICS.value) if "finemappingMethod" not in associations.df.columns - else f.coalesce(f.col("finemappingMethod"), f.lit("pics")) + else f.coalesce( + f.col("finemappingMethod"), f.lit(FinemappingMethod.PICS.value) + ) ) # Flagging expression for loci that do not qualify for PICS: diff --git a/src/gentropy/susie_finemapper.py b/src/gentropy/susie_finemapper.py index 03a8730ef..94ad918a5 100644 --- a/src/gentropy/susie_finemapper.py +++ b/src/gentropy/susie_finemapper.py @@ -26,7 +26,11 @@ order_array_of_structs_by_field, ) from gentropy.dataset.study_index import StudyIndex -from gentropy.dataset.study_locus import StudyLocus, StudyLocusQualityCheck +from gentropy.dataset.study_locus import ( + FinemappingMethod, + StudyLocus, + StudyLocusQualityCheck, +) from gentropy.method.carma import CARMA from gentropy.method.ld import LDAnnotator from gentropy.method.ld_matrix_interface import LDMatrixInterface @@ -290,7 +294,7 @@ def susie_inf_to_studylocus( # noqa: C901 "region": f.lit(region), "credibleSetIndex": f.lit(counter), "credibleSetlog10BF": f.lit(cs_lbf_value * 0.4342944819), - "finemappingMethod": f.lit("SuSiE-inf"), + "finemappingMethod": f.lit(FinemappingMethod.SUSIE_INF.value), } ) .withColumn(