From af2683f36b611c2a0a11fa407107e033a1c28c11 Mon Sep 17 00:00:00 2001 From: ashuaibi7 Date: Mon, 20 Jan 2025 10:15:32 -0500 Subject: [PATCH] modified identify script to track and save cbase p values for each gene in results files --- src/dialect/utils/identify.py | 27 ++++++++++++++++++++++++--- 1 file changed, 24 insertions(+), 3 deletions(-) diff --git a/src/dialect/utils/identify.py b/src/dialect/utils/identify.py index a87d777..5a99cad 100644 --- a/src/dialect/utils/identify.py +++ b/src/dialect/utils/identify.py @@ -30,21 +30,36 @@ def save_cbase_stats_to_gene_objects(genes, cbase_stats): f"{row['gene']}_M": row["phi_m_pos_or_p(m=0|s)"] for _, row in cbase_stats.iterrows() } + missense_gene_to_positive_selection_p = { + f"{row['gene']}_M": row["p_phi_m_pos"] + for _, row in cbase_stats.iterrows() + } nonsense_gene_to_positive_selection_phi = { f"{row['gene']}_N": row["phi_k_pos_or_p(k=0|s)"] for _, row in cbase_stats.iterrows() } + nonsense_gene_to_positive_selection_p = { + f"{row['gene']}_N": row["p_phi_k_pos"] + for _, row in cbase_stats.iterrows() + } gene_to_positive_selection_phi = { **missense_gene_to_positive_selection_phi, **nonsense_gene_to_positive_selection_phi, } + gene_to_positive_select_p = { + **missense_gene_to_positive_selection_p, + **nonsense_gene_to_positive_selection_p, + } for name, gene in genes.items(): if name not in gene_to_positive_selection_phi: - raise ValueError(f"Gene {name} not found in the CBaSE results file.") + raise ValueError( + f"Gene {name} not found in the CBaSE results file." + ) gene.cbase_phi = gene_to_positive_selection_phi[name] + gene.cbase_p = gene_to_positive_select_p[name] logging.info("Finished saving CBaSE phi statistic to gene objects.") return True @@ -66,6 +81,7 @@ def create_single_gene_results(genes, output_path, cbase_phi_vals_present): expected_mutations = gene.calculate_expected_mutations() obs_minus_exp_mutations = observed_mutations - expected_mutations cbase_phi = gene.cbase_phi + cbase_p = gene.cbase_p results.append( { @@ -77,6 +93,7 @@ def create_single_gene_results(genes, output_path, cbase_phi_vals_present): "Expected Mutations": expected_mutations, "Obs. - Exp. Mutations": obs_minus_exp_mutations, "CBaSE Pos. Sel. Phi": cbase_phi, + "CBaSE Pos. Sel. P-Val": cbase_p, } ) results_df = pd.DataFrame(results) @@ -210,6 +227,10 @@ def identify_pairwise_interactions( _, interactions = initialize_interaction_objects(k, genes.values()) estimate_taus_for_each_interaction(interactions) - cbase_phi_vals_present = save_cbase_stats_to_gene_objects(genes, cbase_stats) - create_single_gene_results(genes.values(), single_gene_fout, cbase_phi_vals_present) + cbase_phi_vals_present = save_cbase_stats_to_gene_objects( + genes, cbase_stats + ) + create_single_gene_results( + genes.values(), single_gene_fout, cbase_phi_vals_present + ) create_pairwise_results(interactions, pairwise_interaction_fout)