From 4837a4b4add387ac9e1edb3fba7fbbbea712bc80 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Irene=20L=C3=B3pez=20Santiago?= <45119610+ireneisdoomed@users.noreply.github.com> Date: Wed, 27 Nov 2024 10:07:42 +0000 Subject: [PATCH] feat(gold_standard): add traitFromSourceMappedId to schema (#924) * feat(gold_standard): add traitFromSourceMappedId to schema * chore: adapt tests * feat(feature_matrix): consider `traitFromSourceMappedId` a static column * feat(feature_matrix): consider `traitFromSourceMappedId` an optional column --- src/gentropy/assets/schemas/l2g_gold_standard.json | 6 ++++++ src/gentropy/dataset/l2g_feature_matrix.py | 2 ++ tests/gentropy/dataset/test_l2g_feature_matrix.py | 2 +- 3 files changed, 9 insertions(+), 1 deletion(-) diff --git a/src/gentropy/assets/schemas/l2g_gold_standard.json b/src/gentropy/assets/schemas/l2g_gold_standard.json index 6af921d61..6ba715963 100644 --- a/src/gentropy/assets/schemas/l2g_gold_standard.json +++ b/src/gentropy/assets/schemas/l2g_gold_standard.json @@ -25,6 +25,12 @@ "nullable": false, "metadata": {} }, + { + "name": "traitFromSourceMappedId", + "type": "string", + "nullable": true, + "metadata": {} + }, { "name": "goldStandardSet", "type": "string", diff --git a/src/gentropy/dataset/l2g_feature_matrix.py b/src/gentropy/dataset/l2g_feature_matrix.py index f59e1e725..8c3d97e88 100644 --- a/src/gentropy/dataset/l2g_feature_matrix.py +++ b/src/gentropy/dataset/l2g_feature_matrix.py @@ -39,6 +39,8 @@ def __init__( self.fixed_cols = ["studyLocusId", "geneId"] if self.with_gold_standard: self.fixed_cols.append("goldStandardSet") + if "traitFromSourceMappedId" in _df.columns: + self.fixed_cols.append("traitFromSourceMappedId") self.features_list = features_list or [ col for col in _df.columns if col not in self.fixed_cols diff --git a/tests/gentropy/dataset/test_l2g_feature_matrix.py b/tests/gentropy/dataset/test_l2g_feature_matrix.py index 4fe338254..6677d123e 100644 --- a/tests/gentropy/dataset/test_l2g_feature_matrix.py +++ b/tests/gentropy/dataset/test_l2g_feature_matrix.py @@ -87,7 +87,7 @@ def _setup(self: TestFromFeaturesList, spark: SparkSession) -> None: """Setup fixture.""" self.sample_gold_standard = L2GGoldStandard( _df=spark.createDataFrame( - [(1, "var1", "gwas1", "g1", "positive", ["a_source"])], + [(1, "var1", "gwas1", "g1", "efo1", "positive", ["a_source"])], L2GGoldStandard.get_schema(), ), _schema=L2GGoldStandard.get_schema(),