Skip to content

Commit

Permalink
refactor(L2GFeatureMatrix)!: streamline feature matrix management (op…
Browse files Browse the repository at this point in the history
…entargets#745)

* refactor(L2GFeatureMatrix): remove schema validation

* refactor(FeatureFactory): reshape feature generation WIP

* chore: pre-commit auto fixes [...]

* chore: set l2gfeature properties with decorator

* chore(l2gfeature): make credible_set and input_dependency instance attributes

* chore(l2gfeature): make credible_set and input_dependency instance attributes

* chore(featurefactory): distanceTssMeanFeature working

* refactor(l2g): improve step dependency management

* feat: implement

* chore: fix mypy issues

* feat: l2gfeaturematrix.from_features_list working

* chore: comment out obsolete refs

* chore(L2GFeatureMatrix): change `mode` attribute to `with_gold_standard`

* refactor(l2g): move feature matrix writing to training module

* feat(L2GFeatureMatrix): accept L2GGoldStandard or StudyLocus as inputs

* feat: implement methods to build a feature matrix based on a studylocus/L2GGoldStandard instance

* feat: coloc logic prototype

* feat(l2g): filter non gwas credible sets at the start of the step

* feat: rewrite colocalisation feature factory

* test: add `test_colocalisation_feature_type`

* test(colocalisation): add test_extract_maximum_coloc_probability_per_region_and_gene

* feat(L2GFeatureInputLoader): support multiple deps by passing loader as kwarg

* test: add integration tests `test_build_feature_matrix`

* chore: drop config yamls

* refactor: move feature classes to datasets module

* docs: update feature docs

* refactor(colocalisation): cleaner joins in `append_right_study_metadata`

* chore: better logging abstract methods

* test: add `L2GFeatureMatrix.test_from_features_list` unit tests

* fix: add goldStandardSet when a gs instance is passed to `from_features_list`

* fix: lowercase colocalisation type and add semantic test

* test: add semantic test for `append_right_study_metadata`

* feat(colocalisation): make `append_right_study_metadata` extensible to left metadata

* fix(colocalisation): append_study_metadata cant take a gold standard

* fix(colocalisation): extract_maximum_coloc_probability_per_region_and_gene cant take a gold standard

* feat: add `StudyLocus` as a dependency of colocalisation features

* fix: add studylocus to input loader in test

* fix: add studylocus to input loader in test

* fix: add studylocus to input loader in test
  • Loading branch information
ireneisdoomed authored Sep 23, 2024
1 parent 58fb726 commit b93842a
Show file tree
Hide file tree
Showing 25 changed files with 1,491 additions and 1,005 deletions.
22 changes: 21 additions & 1 deletion docs/python_api/datasets/l2g_feature.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,27 @@
title: L2G Feature
---

::: gentropy.method.l2g.feature_factory.L2GFeature
## Abstract Class

::: gentropy.dataset.l2g_feature.L2GFeature

## Feature Classes

### Derived from colocalisation

::: gentropy.dataset.l2g_feature.EQtlColocClppMaximumFeature
::: gentropy.dataset.l2g_feature.PQtlColocClppMaximumFeature
::: gentropy.dataset.l2g_feature.SQtlColocClppMaximumFeature
::: gentropy.dataset.l2g_feature.TuQtlColocClppMaximumFeature
::: gentropy.dataset.l2g_feature.EQtlColocH4MaximumFeature
::: gentropy.dataset.l2g_feature.PQtlColocH4MaximumFeature
::: gentropy.dataset.l2g_feature.SQtlColocH4MaximumFeature
::: gentropy.dataset.l2g_feature.TuQtlColocH4MaximumFeature

### Derived from distance

::: gentropy.dataset.l2g_feature.DistanceTssMinimumFeature
::: gentropy.dataset.l2g_feature.DistanceTssMeanFeature

## Schema

Expand Down
4 changes: 2 additions & 2 deletions docs/python_api/methods/l2g/feature_factory.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,6 @@
title: L2G Feature Factory
---

::: gentropy.method.l2g.feature_factory.ColocalisationFactory
::: gentropy.method.l2g.feature_factory.FeatureFactory

::: gentropy.method.l2g.feature_factory.StudyLocusFactory
::: gentropy.method.l2g.feature_factory.L2GFeatureInputLoader
155 changes: 0 additions & 155 deletions src/gentropy/assets/schemas/l2g_feature_matrix.json

This file was deleted.

55 changes: 11 additions & 44 deletions src/gentropy/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -227,50 +227,16 @@ class LocusToGeneConfig(StepConfig):
gene_interactions_path: str | None = None
features_list: list[str] = field(
default_factory=lambda: [
# average distance of all tagging variants to gene TSS
"distanceTssMean",
# minimum distance of all tagging variants to gene TSS
"distanceTssMinimum",
# maximum vep consequence score of the locus 95% credible set among all genes in the vicinity
"vepMaximumNeighborhood",
# maximum vep consequence score of the locus 95% credible set split by gene
"vepMaximum",
# mean vep consequence score of the locus 95% credible set among all genes in the vicinity
"vepMeanNeighborhood",
# mean vep consequence score of the locus 95% credible set split by gene
"vepMean",
# max clpp for each (study, locus, gene) aggregating over all eQTLs
"eqtlColocClppMaximum",
# max clpp for each (study, locus) aggregating over all eQTLs
"eqtlColocClppMaximumNeighborhood",
# max clpp for each (study, locus, gene) aggregating over all pQTLs
"pqtlColocClppMaximum",
# max clpp for each (study, locus) aggregating over all pQTLs
"pqtlColocClppMaximumNeighborhood",
# max clpp for each (study, locus, gene) aggregating over all sQTLs
"sqtlColocClppMaximum",
# max clpp for each (study, locus) aggregating over all sQTLs
"sqtlColocClppMaximumNeighborhood",
# max clpp for each (study, locus) aggregating over all tuQTLs
"tuqtlColocClppMaximum",
# max clpp for each (study, locus, gene) aggregating over all tuQTLs
"tuqtlColocClppMaximumNeighborhood",
# max log-likelihood ratio value for each (study, locus, gene) aggregating over all eQTLs
"eqtlColocLlrMaximum",
# max log-likelihood ratio value for each (study, locus) aggregating over all eQTLs
"eqtlColocLlrMaximumNeighborhood",
# max log-likelihood ratio value for each (study, locus, gene) aggregating over all pQTLs
"pqtlColocLlrMaximum",
# max log-likelihood ratio value for each (study, locus) aggregating over all pQTLs
"pqtlColocLlrMaximumNeighborhood",
# max log-likelihood ratio value for each (study, locus, gene) aggregating over all sQTLs
"sqtlColocLlrMaximum",
# max log-likelihood ratio value for each (study, locus) aggregating over all sQTLs
"sqtlColocLlrMaximumNeighborhood",
# max log-likelihood ratio value for each (study, locus, gene) aggregating over all tuQTLs
"tuqtlColocLlrMaximum",
# max log-likelihood ratio value for each (study, locus) aggregating over all tuQTLs
"tuqtlColocLlrMaximumNeighborhood",
# max CLPP for each (study, locus, gene) aggregating over a specific qtl type
"eQtlColocClppMaximum",
"pQtlColocClppMaximum",
"sQtlColocClppMaximum",
"tuQtlColocClppMaximum",
# max H4 for each (study, locus, gene) aggregating over a specific qtl type
"eQtlColocH4Maximum",
"pQtlColocH4Maximum",
"sQtlColocH4Maximum",
"tuQtlColocH4Maximum",
]
)
hyperparameters: dict[str, Any] = field(
Expand All @@ -283,6 +249,7 @@ class LocusToGeneConfig(StepConfig):
wandb_run_name: str | None = None
hf_hub_repo_id: str | None = "opentargets/locus_to_gene"
download_from_hub: bool = True
write_feature_matrix: bool = True
_target_: str = "gentropy.l2g.LocusToGeneStep"


Expand Down
Loading

0 comments on commit b93842a

Please sign in to comment.