Skip to content

Commit

Permalink
modified identify script to track and save cbase p values for each ge…
Browse files Browse the repository at this point in the history
…ne in results files
  • Loading branch information
ashuaibi7 committed Jan 20, 2025
1 parent cb6353c commit af2683f
Showing 1 changed file with 24 additions and 3 deletions.
27 changes: 24 additions & 3 deletions src/dialect/utils/identify.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,21 +30,36 @@ def save_cbase_stats_to_gene_objects(genes, cbase_stats):
f"{row['gene']}_M": row["phi_m_pos_or_p(m=0|s)"]
for _, row in cbase_stats.iterrows()
}
missense_gene_to_positive_selection_p = {
f"{row['gene']}_M": row["p_phi_m_pos"]
for _, row in cbase_stats.iterrows()
}

nonsense_gene_to_positive_selection_phi = {
f"{row['gene']}_N": row["phi_k_pos_or_p(k=0|s)"]
for _, row in cbase_stats.iterrows()
}
nonsense_gene_to_positive_selection_p = {
f"{row['gene']}_N": row["p_phi_k_pos"]
for _, row in cbase_stats.iterrows()
}

gene_to_positive_selection_phi = {
**missense_gene_to_positive_selection_phi,
**nonsense_gene_to_positive_selection_phi,
}
gene_to_positive_select_p = {
**missense_gene_to_positive_selection_p,
**nonsense_gene_to_positive_selection_p,
}

for name, gene in genes.items():
if name not in gene_to_positive_selection_phi:
raise ValueError(f"Gene {name} not found in the CBaSE results file.")
raise ValueError(
f"Gene {name} not found in the CBaSE results file."
)
gene.cbase_phi = gene_to_positive_selection_phi[name]
gene.cbase_p = gene_to_positive_select_p[name]

logging.info("Finished saving CBaSE phi statistic to gene objects.")
return True
Expand All @@ -66,6 +81,7 @@ def create_single_gene_results(genes, output_path, cbase_phi_vals_present):
expected_mutations = gene.calculate_expected_mutations()
obs_minus_exp_mutations = observed_mutations - expected_mutations
cbase_phi = gene.cbase_phi
cbase_p = gene.cbase_p

results.append(
{
Expand All @@ -77,6 +93,7 @@ def create_single_gene_results(genes, output_path, cbase_phi_vals_present):
"Expected Mutations": expected_mutations,
"Obs. - Exp. Mutations": obs_minus_exp_mutations,
"CBaSE Pos. Sel. Phi": cbase_phi,
"CBaSE Pos. Sel. P-Val": cbase_p,
}
)
results_df = pd.DataFrame(results)
Expand Down Expand Up @@ -210,6 +227,10 @@ def identify_pairwise_interactions(
_, interactions = initialize_interaction_objects(k, genes.values())
estimate_taus_for_each_interaction(interactions)

cbase_phi_vals_present = save_cbase_stats_to_gene_objects(genes, cbase_stats)
create_single_gene_results(genes.values(), single_gene_fout, cbase_phi_vals_present)
cbase_phi_vals_present = save_cbase_stats_to_gene_objects(
genes, cbase_stats
)
create_single_gene_results(
genes.values(), single_gene_fout, cbase_phi_vals_present
)
create_pairwise_results(interactions, pairwise_interaction_fout)

0 comments on commit af2683f

Please sign in to comment.