Skip to content

Commit

Permalink
modified decoy gene fraction analysis script to make more user friendly
Browse files Browse the repository at this point in the history
  • Loading branch information
ashuaibi7 committed Jan 13, 2025
1 parent 5e6c2c5 commit 8a35d41
Showing 1 changed file with 77 additions and 19 deletions.
96 changes: 77 additions & 19 deletions analysis/decoy_genes_top_ranking_pairs.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,49 @@
import logging
import pandas as pd

from argparse import ArgumentParser

def get_decoy_gene_fraction_across_methods(ixn_res_df, decoy_genes, k):

# ---------------------------------------------------------------------------- #
# HELPER FUNCTIONS #
# ---------------------------------------------------------------------------- #
def build_argument_parser():
parser = ArgumentParser(description="Decoy Gene Analysis")
parser.add_argument(
"-k",
"--top_k",
type=int,
default=100,
help="Number of top ranking pairs to analyze",
)
parser.add_argument(
"-r",
"--results_dir",
type=str,
required=True,
help="Directory with results for all subtypes",
)
parser.add_argument(
"-d",
"--decoy_genes_dir",
type=str,
required=True,
help="Directory with all decoy gene files",
)
parser.add_argument(
"-o",
"--out",
type=str,
required=True,
help="Output directory",
)
return parser


# ---------------------------------------------------------------------------- #
# MAIN FUNCTIONS #
# ---------------------------------------------------------------------------- #
def compute_decoy_gene_fraction_across_methods(ixn_res_df, decoy_genes, k):
if ixn_res_df.empty:
raise ValueError("Input DataFrame is empty")

Expand All @@ -30,33 +71,50 @@ def get_decoy_gene_fraction_across_methods(ixn_res_df, decoy_genes, k):
return fractions


if __name__ == "__main__":
K = 50
RESULTS_DIR = "output/TOP_500_Genes"
DECOY_GENES_DIR = "data/decoy_genes"
OUTPUT_DIR = "output/RESULTS"

SUBTYPES = os.listdir(RESULTS_DIR)
def compute_decoy_gene_fractions_across_subtypes(results_dir, decoy_genes_dir, top_k):
subtypes = os.listdir(results_dir)
subtype_decoy_gene_fractions = {}
for subtype in SUBTYPES:
RES_FN = os.path.join(RESULTS_DIR, subtype, "complete_pairwise_ixn_results.csv")
DECOY_GENES_FN = os.path.join(DECOY_GENES_DIR, f"{subtype}_decoy_genes.txt")
if not os.path.exists(RES_FN) or not os.path.exists(DECOY_GENES_FN):
for subtype in subtypes:
results_fn = os.path.join(
results_dir, subtype, "complete_pairwise_ixn_results.csv"
)
decoy_genes_fn = os.path.join(decoy_genes_dir, f"{subtype}_decoy_genes.txt")
if not os.path.exists(results_fn) or not os.path.exists(decoy_genes_fn):
logging.info(f"Skipping {subtype} since input files not found")
continue
ixn_res_df = pd.read_csv(RES_FN)
ixn_res_df = pd.read_csv(results_fn)
decoy_genes = set(
pd.read_csv(DECOY_GENES_FN, header=None, names=["Gene"])["Gene"]
pd.read_csv(decoy_genes_fn, header=None, names=["Gene"])["Gene"]
)
subtype_decoy_gene_fractions[subtype] = get_decoy_gene_fraction_across_methods(
ixn_res_df,
decoy_genes,
k=K,
subtype_decoy_gene_fractions[subtype] = (
compute_decoy_gene_fraction_across_methods(
ixn_res_df,
decoy_genes,
k=top_k,
)
)

return subtype_decoy_gene_fractions


def save_output(subtype_decoy_gene_fractions, out):
fout = os.path.join(out, "decoy_gene_fractions_by_method.csv")
gene_fraction_data = [
{"Subtype": subtype, "Method": method, "Fraction": fraction}
for subtype, fractions in subtype_decoy_gene_fractions.items()
for method, fraction in fractions.items()
]
df = pd.DataFrame(gene_fraction_data)
df.to_csv(f"{OUTPUT_DIR}/decoy_gene_fractions_by_method.csv", index=False)
df.to_csv(fout, index=False)


if __name__ == "__main__":
parser = build_argument_parser()
args = parser.parse_args()

subtype_decoy_gene_fractions = compute_decoy_gene_fractions_across_subtypes(
args.results_dir,
args.decoy_genes_dir,
args.top_k,
)
save_output(subtype_decoy_gene_fractions, args.out)

0 comments on commit 8a35d41

Please sign in to comment.