diff --git a/analysis/cbase_results_analysis.py b/analysis/cbase_results_analysis.py index fa76f8b..1633176 100644 --- a/analysis/cbase_results_analysis.py +++ b/analysis/cbase_results_analysis.py @@ -1,20 +1,20 @@ """TODO: Add docstring.""" +from argparse import ArgumentParser from pathlib import Path + import pandas as pd -from argparse import ArgumentParser +from dialect.utils.helpers import ( + load_likely_passenger_genes, + load_putative_driver_genes, +) from dialect.utils.plotting import ( draw_cbase_likely_passenger_proportion_barplot, draw_cbase_top_likely_passenger_upset, draw_gene_expected_and_observed_mutations_barplot, ) -from dialect.utils.helpers import ( - load_putative_driver_genes, - load_likely_passenger_genes, -) - # ------------------------------------------------------------------------------------ # # HELPER FUNCTIONS # @@ -59,8 +59,12 @@ def load_all_subtype_single_gene_results(results_dir: Path, num_genes: int) -> d """TODO: Add docstring.""" subtype_to_results_df = {} for single_gene_results_fn in results_dir.iterdir(): - subtype = single_gene_results_fn.stem # TODO change for real data - subtype_results_df = pd.read_csv(single_gene_results_fn) # TODO update path + subtype = ( + single_gene_results_fn.stem + ) # TODO: update to take in server results dir + subtype_results_df = pd.read_csv( + single_gene_results_fn + ) # TODO: update to take in server results dir sorted_subtype_results_df = subtype_results_df.sort_values( by="CBaSE Pos. Sel. Phi", ascending=False, @@ -166,8 +170,6 @@ def main() -> None: putative_driver_genes, out_fn=args.out_dir / f"{subtype}", ) - print(args.out_dir / f"{subtype}") - quit() if __name__ == "__main__": diff --git a/analysis/decoy_genes_top_ranking_pairs.py b/analysis/decoy_genes_top_ranking_pairs.py deleted file mode 100644 index deaaa75..0000000 --- a/analysis/decoy_genes_top_ranking_pairs.py +++ /dev/null @@ -1,214 +0,0 @@ -"""TODO: Add docstring.""" - -import os -from argparse import ArgumentParser -from pathlib import Path - -import pandas as pd - -from dialect.utils.plotting import plot_decoy_gene_fractions -from dialect.utils.postprocessing import generate_top_ranking_tables - -EPSILON_MUTATION_COUNT = 10 -PVALUE_THRESHOLD = 1 - -# ------------------------------------------------------------------------------------ # -# HELPER FUNCTIONS # -# ------------------------------------------------------------------------------------ # -def build_argument_parser() -> ArgumentParser: - """TODO: Add docstring.""" - parser = ArgumentParser(description="Decoy Gene Analysis") - parser.add_argument( - "-n", - "--num_pairs", - type=int, - default=10, - help="Number of top ranking pairs to analyze", - ) - parser.add_argument( - "-r", - "--results_dir", - type=str, - required=True, - help="Directory with results for all subtypes", - ) - parser.add_argument( - "-d", - "--decoy_genes_dir", - type=str, - default="data/decoy_genes", - help="Directory with all decoy gene files", - ) - parser.add_argument( - "-o", - "--out", - type=str, - default="output/RESULTS", - help="Output directory", - ) - group = parser.add_mutually_exclusive_group(required=True) - group.add_argument( - "--me", - action="store_true", - help="Perform analysis for mutual exclusivity", - ) - group.add_argument( - "--co", - action="store_true", - help="Perform analysis for co-occurrence", - ) - return parser - - -def compute_prop_pairs_with_at_least_one_decoy( - decoy_genes: set, - top_ranking_pairs: pd.DataFrame, -) -> float: - """TODO: Add docstring.""" - pairs_with_at_least_one_decoy_gene = ( - top_ranking_pairs["Gene A"].isin(decoy_genes) - | top_ranking_pairs["Gene B"].isin(decoy_genes) - ).sum() - total_pairs = top_ranking_pairs.shape[0] - return pairs_with_at_least_one_decoy_gene / total_pairs - - -def compute_prop_unique_decoy_genes_in_top_pairs( - decoy_genes: set, - top_ranking_pairs: pd.DataFrame, -) -> float: - """TODO: Add docstring.""" - total_unique_genes = set( - top_ranking_pairs["Gene A"].tolist() + top_ranking_pairs["Gene B"].tolist(), - ) - total_unique_decoy_genes = len(total_unique_genes.intersection(decoy_genes)) - return total_unique_decoy_genes / len(total_unique_genes) - - -def compute_prop_decoy_genes_in_top_pairs( - decoy_genes: set, - top_ranking_pairs: pd.DataFrame, -) -> float: - """TODO: Add docstring.""" - all_genes_list = ( - top_ranking_pairs["Gene A"].tolist() + top_ranking_pairs["Gene B"].tolist() - ) - total_decoy_genes = len([x for x in all_genes_list if x in decoy_genes]) - return total_decoy_genes / len(all_genes_list) - - -# ------------------------------------------------------------------------------------ # -# MAIN FUNCTIONS # -# ------------------------------------------------------------------------------------ # -def compute_decoy_gene_fraction_across_methods( - ixn_res_df: pd.DataFrame, - decoy_genes: set, - num_samples: int, - num_pairs: int, - ixn_type: str, -) -> dict: - """TODO: Add docstring.""" - comp_scheme = 3 - comp_schemes = [0, 1, 2] - if ixn_res_df.empty: - msg = "Input DataFrame is empty" - raise ValueError(msg) - - top_tables = generate_top_ranking_tables( - results_df=ixn_res_df, - ixn_type=ixn_type, - num_pairs=num_pairs, - num_samples=num_samples, - ) - proportions = {} - for method, top_ranking_pairs in top_tables.items(): - if top_ranking_pairs is None or top_ranking_pairs.empty: - decoy_gene_proportion = 0 - elif comp_scheme == comp_schemes[0]: - decoy_gene_proportion = compute_prop_pairs_with_at_least_one_decoy( - decoy_genes, - top_ranking_pairs, - ) - elif comp_scheme == comp_schemes[1]: - decoy_gene_proportion = compute_prop_unique_decoy_genes_in_top_pairs( - decoy_genes, - top_ranking_pairs, - ) - else: - decoy_gene_proportion = compute_prop_decoy_genes_in_top_pairs( - decoy_genes, - top_ranking_pairs, - ) - proportions[method] = decoy_gene_proportion - - return proportions - - -def compute_decoy_gene_fractions_across_subtypes( - results_dir: str, - decoy_genes_dir: str, - num_pairs: int, - ixn_type: str, -) -> dict: - """TODO: Add docstring.""" - subtypes = os.listdir(results_dir) - subtype_decoy_gene_fractions = {} - for subtype in subtypes: - results_fn = Path(results_dir) / subtype / "complete_pairwise_ixn_results.csv" - cnt_mtx_fn = Path(results_dir) / subtype / "count_matrix.csv" - decoy_genes_fn = Path(decoy_genes_dir) / f"{subtype}_decoy_genes.txt" - if not results_fn.exists() or not decoy_genes_fn.exists(): - continue - ixn_res_df = pd.read_csv(results_fn) - decoy_genes = set( - pd.read_csv(decoy_genes_fn, header=None, names=["Gene"])["Gene"], - ) - num_samples = pd.read_csv(cnt_mtx_fn, index_col=0).shape[0] - subtype_decoy_gene_fractions[subtype] = ( - compute_decoy_gene_fraction_across_methods( - ixn_res_df, - decoy_genes, - num_samples, - num_pairs, - ixn_type, - ) - ) - - return subtype_decoy_gene_fractions - - -def save_output(subtype_decoy_gene_fractions: dict, fout: str) -> None: - """TODO: Add docstring.""" - gene_fraction_data = [ - {"Subtype": subtype, "Method": method, "Fraction": fraction} - for subtype, fractions in subtype_decoy_gene_fractions.items() - for method, fraction in fractions.items() - ] - results_df = pd.DataFrame(gene_fraction_data) - results_df.to_csv(fout, index=False) - - -def main() -> None: - """TODO: Add docstring.""" - parser = build_argument_parser() - args = parser.parse_args() - - ixn_type = "ME" if args.me else "CO" - subtype_decoy_gene_fractions = compute_decoy_gene_fractions_across_subtypes( - args.results_dir, - args.decoy_genes_dir, - args.num_pairs, - ixn_type, - ) - fout = Path(args.out) / f"{ixn_type}_decoy_gene_fractions_by_method.csv" - save_output(subtype_decoy_gene_fractions, fout) - plot_decoy_gene_fractions( - fout, - args.num_pairs, - args.me, - args.out, - ) - - -if __name__ == "__main__": - main() diff --git a/analysis/likely_passenger_proportion_across_methods.py b/analysis/likely_passenger_proportion_across_methods.py new file mode 100644 index 0000000..b7f9e41 --- /dev/null +++ b/analysis/likely_passenger_proportion_across_methods.py @@ -0,0 +1,152 @@ +"""TODO: Add docstring.""" + +from argparse import ArgumentParser +from pathlib import Path + +import pandas as pd + +from dialect.utils.helpers import load_likely_passenger_genes +from dialect.utils.plotting import draw_likely_passenger_gene_proportion_violinplot +from dialect.utils.postprocessing import ( + generate_top_ranked_co_interaction_tables, + generate_top_ranked_me_interaction_tables, +) + +# ------------------------------------------------------------------------------------ # +# CONSTANTS # +# ------------------------------------------------------------------------------------ # +ME_METHODS = ["DIALECT", "DISCOVER", "Fisher's Exact Test", "MEGSA", "WeSME"] +CO_METHODS = ["DIALECT", "DISCOVER", "Fisher's Exact Test", "WeSME"] + + +# ------------------------------------------------------------------------------------ # +# HELPER FUNCTIONS # +# ------------------------------------------------------------------------------------ # +def build_argument_parser() -> ArgumentParser: + """TODO: Add docstring.""" + parser = ArgumentParser() + parser.add_argument( + "-n", + "--num_pairs", + type=int, + default=10, + ) + parser.add_argument( + "-r", + "--results_dir", + type=Path, + required=True, + ) + parser.add_argument( + "-lp", + "--likely_passenger_dir", + type=Path, + required=True, + ) + parser.add_argument( + "-o", + "--out_dir", + type=Path, + required=True, + ) + group = parser.add_mutually_exclusive_group(required=True) + group.add_argument( + "-me", + "--mutual_exclusivity", + action="store_true", + ) + group.add_argument( + "-co", + "--cooccurrence", + action="store_true", + ) + return parser + + +def compute_num_likely_passengers_in_top_ranked_pairs( + likely_passengers: set, + top_ranking_pairs: pd.DataFrame, +) -> float: + """TODO: Add docstring.""" + all_genes_list = ( + top_ranking_pairs["Gene A"].tolist() + top_ranking_pairs["Gene B"].tolist() + ) + total_likely_passengers = len([x for x in all_genes_list if x in likely_passengers]) + return ( + 0 if not len(all_genes_list) else total_likely_passengers / len(all_genes_list) + ) + + +def compute_likely_passenger_proportions( + results_dir: Path, + subtype_to_likely_passengers: dict, + num_pairs: int, + methods: list, + generate_top_ranked_interaction_table: callable, +) -> dict: + """TODO: Add docstring.""" + method_to_subtype_to_likely_passenger_proportion = {} + for subtype_dir in results_dir.iterdir(): + subtype = subtype_dir.name + cnt_mtx_fn = subtype_dir / "count_matrix.csv" + results_df = pd.read_csv(subtype_dir / "complete_pairwise_ixn_results.csv") + likely_passengers = subtype_to_likely_passengers[subtype] + num_samples = pd.read_csv(cnt_mtx_fn, index_col=0).shape[0] + method_to_top_ranked_interaction_table = generate_top_ranked_interaction_table( + results_df=results_df, + num_pairs=num_pairs, + num_samples=num_samples, + methods=methods, + ) + for ( + method, + top_ranked_me_interaction_table, + ) in method_to_top_ranked_interaction_table.items(): + if method not in method_to_subtype_to_likely_passenger_proportion: + method_to_subtype_to_likely_passenger_proportion[method] = {} + likely_passenger_proportion = ( + compute_num_likely_passengers_in_top_ranked_pairs( + likely_passengers, + top_ranked_me_interaction_table, + ) + ) + method_to_subtype_to_likely_passenger_proportion[method][subtype] = ( + likely_passenger_proportion + ) + return method_to_subtype_to_likely_passenger_proportion + + +# ------------------------------------------------------------------------------------ # +# MAIN FUNCTION # +# ------------------------------------------------------------------------------------ # +def main() -> None: + """TODO: Add docstring.""" + parser = build_argument_parser() + args = parser.parse_args() + subtype_to_likely_passengers = load_likely_passenger_genes( + args.likely_passenger_dir, + ) + if args.mutual_exclusivity: + method_to_subtype_to_passenger_proportion = compute_likely_passenger_proportions( + results_dir=args.results_dir, + subtype_to_likely_passengers=subtype_to_likely_passengers, + num_pairs=args.num_pairs, + methods=ME_METHODS, + generate_top_ranked_interaction_table=generate_top_ranked_me_interaction_tables, + ) + else: # args.cooccurrence + method_to_subtype_to_passenger_proportion = compute_likely_passenger_proportions( + results_dir=args.results_dir, + subtype_to_likely_passengers=subtype_to_likely_passengers, + num_pairs=args.num_pairs, + methods=CO_METHODS, + generate_top_ranked_interaction_table=generate_top_ranked_co_interaction_tables, + ) + draw_likely_passenger_gene_proportion_violinplot( + method_to_subtype_to_passenger_proportion, + out_fn=args.out_dir / "likely_passenger_proportion_violinplot", + ) + + +if __name__ == "__main__": + main() diff --git a/src/dialect/utils/helpers.py b/src/dialect/utils/helpers.py index 4313abd..035748e 100644 --- a/src/dialect/utils/helpers.py +++ b/src/dialect/utils/helpers.py @@ -20,7 +20,7 @@ def load_likely_passenger_genes(likely_passenger_dir: Path) -> set: continue subtype = likely_passenger_fn.stem likely_passengers = pd.read_csv( - likely_passenger_fn, header=None, names=["Gene"] + likely_passenger_fn, header=None, names=["Gene"], )["Gene"] subtype_to_likely_passengers[subtype] = set(likely_passengers) return subtype_to_likely_passengers diff --git a/src/dialect/utils/plotting.py b/src/dialect/utils/plotting.py index bdb0ed0..cab292e 100644 --- a/src/dialect/utils/plotting.py +++ b/src/dialect/utils/plotting.py @@ -11,11 +11,9 @@ import numpy as np import pandas as pd import scienceplots # noqa: F401 -import seaborn as sns from matplotlib import rcParams from matplotlib.lines import Line2D from matplotlib.patches import Patch -from matplotlib.ticker import AutoMinorLocator from plotnine import ( aes, element_text, @@ -328,92 +326,62 @@ def draw_gene_expected_and_observed_mutations_barplot( # ------------------------------------------------------------------------------------ # # TOP RANKED LIKELY PASSENGER PROPORTION # # ------------------------------------------------------------------------------------ # -def plot_decoy_gene_fractions( - data_filepath: str, - num_pairs: int, - is_me: bool, - out_dir: str, +def draw_likely_passenger_gene_proportion_violinplot( + method_to_subtype_to_passenger_proportion: dict, + out_fn: str, + figsize: tuple = (6, 4), + font_scale: float = FONT_SCALE, ) -> None: """TODO: Add docstring.""" - data_df = pd.read_csv(data_filepath) - if not is_me: - data_df = data_df[data_df["Method"] != "MEGSA"] - - subtypes = data_df["Subtype"].unique() - colors = [ - "green", - "blue", - "red", - "purple", - "yellow", - "orange", - "black", - "brown", - ] - shapes = ["o", "s", "^", "D"] + plt.rcParams["font.serif"] = FONT_FAMILY + plt.rcParams["font.family"] = FONT_STYLE - color_shape_combinations = list(product(colors, shapes)) - color_shape_mapping = { - subtypes[i]: color_shape_combinations[i] for i in range(len(subtypes)) - } - color_mapping = { - subtype: combo[0] for subtype, combo in color_shape_mapping.items() - } - shape_mapping = { - subtype: combo[1] for subtype, combo in color_shape_mapping.items() - } - ixn_type = "ME" if is_me else "CO" + methods = list(method_to_subtype_to_passenger_proportion.keys()) + values = [ + list(method_to_subtype_to_passenger_proportion[method].values()) + for method in methods + ] + methods = [ + method.replace("Fisher's Exact Test", "Fisher's\nExact Test") + for method in methods + ] - plot = ( - ggplot( - data_df, - aes(x="Method", y="Fraction", color="Subtype", shape="Subtype"), - ) - + geom_boxplot( - aes(group="Method"), - alpha=0.5, - outlier_alpha=0, - show_legend=False, - ) - + geom_point( - position=position_jitter(width=0.25), - size=5, - alpha=0.75, - show_legend=True, - ) - + scale_color_manual(values=color_mapping) - + scale_shape_manual(values=shape_mapping) - + labs( - title=f"Proportion of Top-Ranked {ixn_type} Pairs w/ Likely Passengers", - x="Method", - y=f"Proportion of Top {num_pairs} {ixn_type} Pairs with Decoy Genes", - color="Subtype", - shape="Subtype", - ) - + theme_tufte(base_family="Computer Modern") - + theme( - figure_size=(12, 8), - plot_title=element_text(size=20, weight="bold"), - axis_title=element_text(size=18), - axis_text=element_text(size=16), - legend_title=element_text(size=14, hjust=0.5), - legend_text=element_text(size=12), - legend_position="bottom", - legend_box="horizontal", - ) - + guides( - color=guide_legend(title="Subtypes", ncol=11), - shape=guide_legend(title="Subtypes", ncol=11), - ) - + ylim(0, 1) + x_positions = np.arange(len(methods)) + fig, ax = plt.subplots(figsize=figsize) + vp = ax.violinplot( + values, + positions=x_positions, + showextrema=True, + showmedians=True, ) + ax.set_ylim(-0.05, 1.05) + ax.set_xticks(x_positions) + ax.set_xticklabels(methods) + ax.set_xlabel("Method", fontsize=font_scale * 10) + ax.set_ylabel("Likely Passenger\nProportion", fontsize=font_scale * 10) + + for body in vp["bodies"]: + body.set_facecolor("lightslategray") + body.set_edgecolor("darkslategray") + body.set_alpha(0.8) + + plt.setp(vp["cmedians"], color="maroon", linewidth=font_scale) + plt.setp(vp["cmins"], color="slategray", linewidth=font_scale) + plt.setp(vp["cmaxes"], color="slategray", linewidth=font_scale) + plt.setp(vp["cbars"], color="slategray", linewidth=font_scale) + + ax.minorticks_on() + ax.tick_params(axis="both", direction="in", length=font_scale * 4, width=font_scale) + ax.tick_params(axis="x", which="minor", top=False, bottom=False) + ax.tick_params(axis="y", which="minor", left=True, right=True) + ax.tick_params(axis="x", which="major", top=False, bottom=True) + ax.tick_params(axis="y", which="major", left=True, right=True) + ax.patch.set_alpha(0) - dout = Path(out_dir) - dout.mkdir(parents=True, exist_ok=True) - plot.save( - f"{out_dir}/{ixn_type}_decoy_gene_fractions_boxplot.svg", - dpi=300, - ) + plt.tight_layout() + plt.savefig(f"{out_fn}.png", dpi=300, transparent=True) + plt.savefig(f"{out_fn}.svg", dpi=300, transparent=True) + plt.close() # ------------------------------------------------------------------------------------ # diff --git a/src/dialect/utils/postprocessing.py b/src/dialect/utils/postprocessing.py index 2520a86..93d8e90 100644 --- a/src/dialect/utils/postprocessing.py +++ b/src/dialect/utils/postprocessing.py @@ -9,104 +9,25 @@ # ------------------------------------------------------------------------------------ # # CONSTANTS # # ------------------------------------------------------------------------------------ # -MIN_DRIVER_COUNT = 10 -PVALUE_THRESHOLD = 1.0 - -ME_COLUMN_MAP = { - "DIALECT": "Rho", - "DISCOVER": "Discover ME P-Val", - "Fisher's Exact Test": "Fisher's ME P-Val", - "MEGSA": "MEGSA S-Score (LRT)", - "WeSME": "WeSME P-Val", +ME_METHOD_RANKING_CRITERIA = { + "DIALECT": ("Rho", "ascending"), + "DISCOVER": ("Discover ME P-Val", "ascending"), + "Fisher's Exact Test": ("Fisher's ME P-Val", "ascending"), + "MEGSA": ("MEGSA S-Score (LRT)", "descending"), + "WeSME": ("WeSME P-Val", "ascending"), } -CO_COLUMN_MAP = { - "DIALECT": "Rho", - "DISCOVER": "Discover CO P-Val", - "Fisher's Exact Test": "Fisher's CO P-Val", - "MEGSA": None, - "WeSME": "WeSCO P-Val", +CO_METHOD_RANKING_CRITERIA = { + "DIALECT": ("Rho", "descending"), + "DISCOVER": ("Discover CO P-Val", "ascending"), + "Fisher's Exact Test": ("Fisher's CO P-Val", "ascending"), + "WeSCO": ("WeSCO P-Val", "ascending"), } # ------------------------------------------------------------------------------------ # # HELPER FUNCTIONS # # ------------------------------------------------------------------------------------ # -def get_sort_column(method: str, is_me: bool) -> str: - """TODO: Add docstring.""" - if is_me: - return ME_COLUMN_MAP.get(method) - return CO_COLUMN_MAP.get(method) - -def filter_by_dialect( - top_ranking_pairs: pd.DataFrame, - num_samples: int, - is_me: bool, -) -> pd.DataFrame: - """TODO: Add docstring.""" - epsilon = compute_epsilon_threshold(num_samples) - filtered_pairs = top_ranking_pairs[ - (top_ranking_pairs["Tau_1X"] > epsilon) - & (top_ranking_pairs["Tau_X1"] > epsilon) - ] - return ( - filtered_pairs[filtered_pairs["Rho"] < 0] - if is_me - else filtered_pairs[filtered_pairs["Rho"] > 0] - ) - - -def filter_by_method( - top_ranking_pairs: pd.DataFrame, - method: str, - is_me: bool, - num_samples: int, -) -> pd.DataFrame: - """TODO: Add docstring.""" - if method == "MEGSA" and not is_me: - return None - - if method == "DIALECT": - top_ranking_pairs = filter_by_dialect(top_ranking_pairs, num_samples, is_me) - - elif method == "MEGSA": - top_ranking_pairs = top_ranking_pairs[ - top_ranking_pairs["MEGSA S-Score (LRT)"] > 0 - ] - - elif method == "DISCOVER": - if is_me: - top_ranking_pairs = top_ranking_pairs[ - top_ranking_pairs["Discover ME P-Val"] < PVALUE_THRESHOLD - ] - else: - top_ranking_pairs = top_ranking_pairs[ - top_ranking_pairs["Discover CO P-Val"] < PVALUE_THRESHOLD - ] - - elif method == "Fisher's Exact Test": - if is_me: - top_ranking_pairs = top_ranking_pairs[ - top_ranking_pairs["Fisher's ME P-Val"] < PVALUE_THRESHOLD - ] - else: - top_ranking_pairs = top_ranking_pairs[ - top_ranking_pairs["Fisher's CO P-Val"] < PVALUE_THRESHOLD - ] - - elif method == "WeSME": - if is_me: - top_ranking_pairs = top_ranking_pairs[ - top_ranking_pairs["WeSME P-Val"] < PVALUE_THRESHOLD - ] - else: - top_ranking_pairs = top_ranking_pairs[ - top_ranking_pairs["WeSCO P-Val"] < PVALUE_THRESHOLD - ] - - return top_ranking_pairs - - def compute_epsilon_threshold(num_samples: int, alpha: float = 0.001) -> float: r"""Compute the epsilon threshold for the one-sided normal approximation CI. @@ -140,61 +61,62 @@ def f(eps: float) -> float: return brentq(f, 1e-6, 1.0) -def get_top_ranked_pairs_by_method( +def generate_top_ranked_co_interaction_tables( results_df: pd.DataFrame, - method: str, - is_me: bool, num_pairs: int, num_samples: int, -) -> pd.DataFrame: + methods: list[str], +) -> dict: """TODO: Add docstring.""" - sort_col = get_sort_column(method, is_me) - if sort_col is None: - return None - - if method == "DIALECT": - ascending = is_me - elif method == "MEGSA": - ascending = False - else: - ascending = True - - top_ranking_pairs = results_df.sort_values( - by=sort_col, - ascending=ascending, - ) - top_ranking_pairs = filter_by_method( - top_ranking_pairs, - method, - is_me, - num_samples, - ) - if top_ranking_pairs is None or top_ranking_pairs.empty: - return None - top_ranking_pairs = top_ranking_pairs.head(num_pairs) - return top_ranking_pairs[["Gene A", "Gene B", sort_col]] - - -def generate_top_ranking_tables( + method_to_top_ranked_co_interaction_table = {} + for method in methods: + if method not in CO_METHOD_RANKING_CRITERIA: + continue + metric, sort_order = CO_METHOD_RANKING_CRITERIA[method] + top_ranked_co_interaction_table = results_df.sort_values( + by=metric, + ascending=sort_order == "ascending", + ) + if method == "DIALECT": + epsilon = compute_epsilon_threshold(num_samples) + top_ranked_co_interaction_table = top_ranked_co_interaction_table[ + (top_ranked_co_interaction_table["Tau_1X"] > epsilon) + & (top_ranked_co_interaction_table["Tau_X1"] > epsilon) + & (top_ranked_co_interaction_table["Rho"] > 0) + ] + method_to_top_ranked_co_interaction_table[method] = ( + top_ranked_co_interaction_table[["Gene A", "Gene B", metric]].head( + num_pairs, + ) + ) + return method_to_top_ranked_co_interaction_table + +def generate_top_ranked_me_interaction_tables( results_df: pd.DataFrame, - is_me: bool, num_pairs: int, num_samples: int, + methods: list[str], ) -> dict: """TODO: Add docstring.""" - methods = ["DIALECT", "DISCOVER", "Fisher's Exact Test", "MEGSA", "WeSME"] - tables = {} + method_to_top_ranked_me_interaction_table = {} for method in methods: - top_df = get_top_ranked_pairs_by_method( - results_df=results_df, - method=method, - is_me=is_me, - num_pairs=num_pairs, - num_samples=num_samples, + if method not in ME_METHOD_RANKING_CRITERIA: + continue + metric, sort_order = ME_METHOD_RANKING_CRITERIA[method] + top_ranked_me_interaction_table = results_df.sort_values( + by=metric, + ascending=sort_order == "ascending", ) - top_df = ( - pd.DataFrame(columns=["Gene A", "Gene B"]) if top_df is None else top_df + if method == "DIALECT": + epsilon = compute_epsilon_threshold(num_samples) + top_ranked_me_interaction_table = top_ranked_me_interaction_table[ + (top_ranked_me_interaction_table["Tau_1X"] > epsilon) + & (top_ranked_me_interaction_table["Tau_X1"] > epsilon) + & (top_ranked_me_interaction_table["Rho"] < 0) + ] + method_to_top_ranked_me_interaction_table[method] = ( + top_ranked_me_interaction_table[["Gene A", "Gene B", metric]].head( + num_pairs, + ) ) - tables[method] = top_df - - return tables + return method_to_top_ranked_me_interaction_table