From a3e513e5bc7bdac1489792afb08ea54c4ebd13a4 Mon Sep 17 00:00:00 2001 From: Priya Kasimbeg Date: Tue, 2 Jul 2024 17:53:47 +0000 Subject: [PATCH 1/7] cosmetic and functional fixes --- scoring/performance_profile.py | 79 ++++++++++++++++++++++++++-------- scoring/score_submissions.py | 40 ++++++++++++----- 2 files changed, 89 insertions(+), 30 deletions(-) diff --git a/scoring/performance_profile.py b/scoring/performance_profile.py index 8ee271804..f209c8610 100644 --- a/scoring/performance_profile.py +++ b/scoring/performance_profile.py @@ -31,9 +31,14 @@ import re from absl import logging +import matplotlib as mpl import matplotlib.pyplot as plt import numpy as np import pandas as pd +from tabulate import tabulate +import re + +import logging from algorithmic_efficiency.workloads.workloads import get_base_workload_name import algorithmic_efficiency.workloads.workloads as workloads_registry @@ -63,6 +68,30 @@ MAX_EVAL_METRICS = ['mean_average_precision', 'ssim', 'accuracy', 'bleu'] +#MPL params +mpl.rcParams['figure.figsize'] = (16, 10) # Width, height in inches +mpl.rcParams['font.family'] = 'serif' +mpl.rcParams['font.serif'] = ['Times New Roman'] + mpl.rcParams['font.serif'] # Add Times New Roman as first choice +mpl.rcParams['font.size'] = 22 +mpl.rcParams['savefig.dpi'] = 300 # Set resolution for saved figures + +# Plot Elements +mpl.rcParams['lines.linewidth'] = 3 # Adjust line thickness if needed +mpl.rcParams['lines.markersize'] = 6 # Adjust marker size if needed +mpl.rcParams['axes.prop_cycle'] = mpl.cycler(color=["#1f77b4", "#ff7f0e", "#2ca02c", "#d62728", "#9467bd"]) # Example color cycle (consider ColorBrewer or viridis) +mpl.rcParams['axes.labelsize'] = 22 # Axis label font size +mpl.rcParams['xtick.labelsize'] = 20 # Tick label font size +mpl.rcParams['ytick.labelsize'] = 20 + +# Legends and Gridlines +mpl.rcParams['legend.fontsize'] = 20 # Legend font size +mpl.rcParams['legend.loc'] = 'best' # Let matplotlib decide the best legend location +mpl.rcParams['axes.grid'] = True # Enable grid +mpl.rcParams['grid.alpha'] = 0.4 # Gridline transparency + +def print_dataframe(df): + tabulated_df = tabulate(df.T, headers='keys', tablefmt='psql') + logging.info(tabulated_df) def generate_eval_cols(metrics): splits = ['train', 'validation'] @@ -177,10 +206,10 @@ def get_workloads_time_to_target(submission, num_trials = len(group) if num_trials != NUM_TRIALS and not self_tuning_ruleset: if strict: - raise ValueError(f'Expecting {NUM_TRIALS} trials for workload ' + raise ValueError(f'In Study {study}: Expecting {NUM_TRIALS} trials for workload ' f'{workload} but found {num_trials} trials.') else: - logging.warning(f'Expecting {NUM_TRIALS} trials for workload ' + logging.warning(f'In Study {study}: Expecting {NUM_TRIALS} trials for workload ' f'{workload} but found {num_trials} trials.') # Get trial and time index that reaches target @@ -194,13 +223,14 @@ def get_workloads_time_to_target(submission, workloads.append({ 'submission': submission_name, - 'workload': workload, + 'workload': re.sub(r'_(jax|pytorch)$', '', workload), time_col: np.median(time_vals_per_study), }) df = pd.DataFrame.from_records(workloads) df = df.pivot(index='submission', columns='workload', values=time_col) - + logging.info("HELLOOOOOOOOO") + print_dataframe(df) return df @@ -269,26 +299,30 @@ def compute_performance_profiles(submissions, strict)) df = pd.concat(dfs) + logging.info("TIME TO TARGET") + print_dataframe(df) + # Set score to inf if not within 4x of fastest submission best_scores = df.min(axis=0) df[df.apply(lambda x: x > 4 * best_scores, axis=1)] = np.inf + logging.info("4X of budget") + print_dataframe(df) + # For each held-out workload if variant target was not hit set submission to inf framework = None for workload in df.keys(): - # Check if this is a variant - framework = workload.split('_')[-1] - workload_ = workload.split(f'_{framework}')[0] - if workload_ not in BASE_WORKLOADS: + if workload not in BASE_WORKLOADS: # If variants do not have finite score set base_workload score to inf - base_workload = get_base_workload_name(workload_) + base_workload = get_base_workload_name(workload) df[base_workload] = df.apply( - variant_criteria_filter(base_workload + f'_{framework}', workload), + variant_criteria_filter(base_workload, workload), axis=1) + + logging.info("HELDOUT_WORKLOAD FILTER") + print_dataframe(df) - base_workloads = [w + f'_{framework}' for w in BASE_WORKLOADS] - df = df[base_workloads] - print(df) + df = df[BASE_WORKLOADS] if verbosity > 0: logging.info('\n`{time_col}` to reach target:') @@ -316,11 +350,17 @@ def compute_performance_profiles(submissions, 1000): logging.info(df) + logging.info('DIVIDE BY FASTEST') + print_dataframe(df) + # If no max_tau is supplied, choose the value of tau that would plot all non # inf or nan data. if max_tau is None: max_tau = df.replace(float('inf'), -1).replace(np.nan, -1).values.max() + logging.info('AFTER MAYBE SETTING MAX TAU') + print_dataframe(df) + if scale == 'linear': points = np.linspace(min_tau, max_tau, num=num_points) elif scale == 'log': @@ -375,8 +415,8 @@ def plot_performance_profiles(perf_df, df_col, scale='linear', save_dir=None, - figsize=(30, 10), - font_size=18): + figsize=(30, 10) + ): """Plot performance profiles. Args: @@ -396,12 +436,13 @@ def plot_performance_profiles(perf_df, Returns: None. If a valid save_dir is provided, save both the plot and perf_df. """ - fig = perf_df.T.plot(figsize=figsize) + fig = perf_df.T.plot(figsize=figsize, alpha=0.7) df_col_display = f'log10({df_col})' if scale == 'log' else df_col fig.set_xlabel( - f'Ratio of `{df_col_display}` to best submission', size=font_size) - fig.set_ylabel('Proportion of workloads', size=font_size) - fig.legend(prop={'size': font_size}, bbox_to_anchor=(1.0, 1.0)) + f'Ratio of `{df_col_display}` to best submission') + fig.set_ylabel('Proportion of workloads') + fig.legend(bbox_to_anchor=(1.0, 1.0)) + plt.tight_layout() maybe_save_figure(save_dir, f'performance_profile_by_{df_col_display}') maybe_save_df_to_csv(save_dir, perf_df, diff --git a/scoring/score_submissions.py b/scoring/score_submissions.py index 0b768855e..4fd5b5b8e 100644 --- a/scoring/score_submissions.py +++ b/scoring/score_submissions.py @@ -22,8 +22,10 @@ import pandas as pd import scoring_utils from tabulate import tabulate +import json +import pickle -from scoring import performance_profile +import performance_profile flags.DEFINE_string( 'submission_directory', @@ -101,8 +103,13 @@ def get_summary_df(workload, workload_df, include_test_split=False): return summary_df -def print_submission_summary(df, include_test_split=True): +def get_submission_summary(df, include_test_split=True): + """Summarizes the submission results into metric and time tables + organized by workload. + """ + dfs = [] + print(df) for workload, group in df.groupby('workload'): summary_df = get_summary_df( workload, group, include_test_split=include_test_split) @@ -115,15 +122,26 @@ def print_submission_summary(df, include_test_split=True): def main(_): results = {} - - for submission in os.listdir(FLAGS.submission_directory): - experiment_path = os.path.join(FLAGS.submission_directory, submission) - df = scoring_utils.get_experiment_df(experiment_path) - results[submission] = df - summary_df = print_submission_summary(df) - with open(os.path.join(FLAGS.output_dir, f'{submission}_summary.csv'), - 'w') as fout: - summary_df.to_csv(fout) + os.makedirs(FLAGS.output_dir, exist_ok=True) + +# for team in os.listdir(FLAGS.submission_directory): +# for submission in os.listdir(os.path.join(FLAGS.submission_directory, team)): +# print(submission) +# experiment_path = os.path.join(FLAGS.submission_directory, team, submission) +# df = scoring_utils.get_experiment_df(experiment_path) +# results[submission] = df +# summary_df = get_submission_summary(df) +# with open(os.path.join(FLAGS.output_dir, f'{submission}_summary.csv'), +# 'w') as fout: +# summary_df.to_csv(fout) + +# # Save results +# with open(os.path.join(FLAGS.output_dir, 'results.pkl'), 'wb') as f: +# pickle.dump(results, f) + + # Read results + with open(os.path.join(FLAGS.output_dir, 'results.pkl'), 'rb') as f: + results = pickle.load(f) if not FLAGS.strict: logging.warning( From e125201e7452f6f659eec8c2738759e34cd26a68 Mon Sep 17 00:00:00 2001 From: Priya Kasimbeg Date: Tue, 2 Jul 2024 17:58:08 +0000 Subject: [PATCH 2/7] formatting --- scoring/performance_profile.py | 54 +++++++++++++++++++--------------- scoring/score_submissions.py | 30 +++++++++---------- 2 files changed, 45 insertions(+), 39 deletions(-) diff --git a/scoring/performance_profile.py b/scoring/performance_profile.py index f209c8610..949ec9d45 100644 --- a/scoring/performance_profile.py +++ b/scoring/performance_profile.py @@ -70,28 +70,35 @@ #MPL params mpl.rcParams['figure.figsize'] = (16, 10) # Width, height in inches -mpl.rcParams['font.family'] = 'serif' -mpl.rcParams['font.serif'] = ['Times New Roman'] + mpl.rcParams['font.serif'] # Add Times New Roman as first choice +mpl.rcParams['font.family'] = 'serif' +mpl.rcParams['font.serif'] = [ + 'Times New Roman' +] + mpl.rcParams['font.serif'] # Add Times New Roman as first choice mpl.rcParams['font.size'] = 22 mpl.rcParams['savefig.dpi'] = 300 # Set resolution for saved figures # Plot Elements -mpl.rcParams['lines.linewidth'] = 3 # Adjust line thickness if needed -mpl.rcParams['lines.markersize'] = 6 # Adjust marker size if needed -mpl.rcParams['axes.prop_cycle'] = mpl.cycler(color=["#1f77b4", "#ff7f0e", "#2ca02c", "#d62728", "#9467bd"]) # Example color cycle (consider ColorBrewer or viridis) -mpl.rcParams['axes.labelsize'] = 22 # Axis label font size -mpl.rcParams['xtick.labelsize'] = 20 # Tick label font size +mpl.rcParams['lines.linewidth'] = 3 # Adjust line thickness if needed +mpl.rcParams['lines.markersize'] = 6 # Adjust marker size if needed +mpl.rcParams['axes.prop_cycle'] = mpl.cycler( + color=["#1f77b4", "#ff7f0e", "#2ca02c", "#d62728", + "#9467bd"]) # Example color cycle (consider ColorBrewer or viridis) +mpl.rcParams['axes.labelsize'] = 22 # Axis label font size +mpl.rcParams['xtick.labelsize'] = 20 # Tick label font size mpl.rcParams['ytick.labelsize'] = 20 # Legends and Gridlines -mpl.rcParams['legend.fontsize'] = 20 # Legend font size -mpl.rcParams['legend.loc'] = 'best' # Let matplotlib decide the best legend location -mpl.rcParams['axes.grid'] = True # Enable grid -mpl.rcParams['grid.alpha'] = 0.4 # Gridline transparency +mpl.rcParams['legend.fontsize'] = 20 # Legend font size +mpl.rcParams[ + 'legend.loc'] = 'best' # Let matplotlib decide the best legend location +mpl.rcParams['axes.grid'] = True # Enable grid +mpl.rcParams['grid.alpha'] = 0.4 # Gridline transparency + def print_dataframe(df): - tabulated_df = tabulate(df.T, headers='keys', tablefmt='psql') - logging.info(tabulated_df) + tabulated_df = tabulate(df.T, headers='keys', tablefmt='psql') + logging.info(tabulated_df) + def generate_eval_cols(metrics): splits = ['train', 'validation'] @@ -206,11 +213,13 @@ def get_workloads_time_to_target(submission, num_trials = len(group) if num_trials != NUM_TRIALS and not self_tuning_ruleset: if strict: - raise ValueError(f'In Study {study}: Expecting {NUM_TRIALS} trials for workload ' - f'{workload} but found {num_trials} trials.') + raise ValueError( + f'In Study {study}: Expecting {NUM_TRIALS} trials for workload ' + f'{workload} but found {num_trials} trials.') else: - logging.warning(f'In Study {study}: Expecting {NUM_TRIALS} trials for workload ' - f'{workload} but found {num_trials} trials.') + logging.warning( + f'In Study {study}: Expecting {NUM_TRIALS} trials for workload ' + f'{workload} but found {num_trials} trials.') # Get trial and time index that reaches target trial_idx, time_idx = get_best_trial_index( @@ -316,9 +325,8 @@ def compute_performance_profiles(submissions, # If variants do not have finite score set base_workload score to inf base_workload = get_base_workload_name(workload) df[base_workload] = df.apply( - variant_criteria_filter(base_workload, workload), - axis=1) - + variant_criteria_filter(base_workload, workload), axis=1) + logging.info("HELDOUT_WORKLOAD FILTER") print_dataframe(df) @@ -415,8 +423,7 @@ def plot_performance_profiles(perf_df, df_col, scale='linear', save_dir=None, - figsize=(30, 10) - ): + figsize=(30, 10)): """Plot performance profiles. Args: @@ -438,8 +445,7 @@ def plot_performance_profiles(perf_df, """ fig = perf_df.T.plot(figsize=figsize, alpha=0.7) df_col_display = f'log10({df_col})' if scale == 'log' else df_col - fig.set_xlabel( - f'Ratio of `{df_col_display}` to best submission') + fig.set_xlabel(f'Ratio of `{df_col_display}` to best submission') fig.set_ylabel('Proportion of workloads') fig.legend(bbox_to_anchor=(1.0, 1.0)) plt.tight_layout() diff --git a/scoring/score_submissions.py b/scoring/score_submissions.py index 4fd5b5b8e..0ca56a4a8 100644 --- a/scoring/score_submissions.py +++ b/scoring/score_submissions.py @@ -124,21 +124,21 @@ def main(_): results = {} os.makedirs(FLAGS.output_dir, exist_ok=True) -# for team in os.listdir(FLAGS.submission_directory): -# for submission in os.listdir(os.path.join(FLAGS.submission_directory, team)): -# print(submission) -# experiment_path = os.path.join(FLAGS.submission_directory, team, submission) -# df = scoring_utils.get_experiment_df(experiment_path) -# results[submission] = df -# summary_df = get_submission_summary(df) -# with open(os.path.join(FLAGS.output_dir, f'{submission}_summary.csv'), -# 'w') as fout: -# summary_df.to_csv(fout) - -# # Save results -# with open(os.path.join(FLAGS.output_dir, 'results.pkl'), 'wb') as f: -# pickle.dump(results, f) - + # for team in os.listdir(FLAGS.submission_directory): + # for submission in os.listdir(os.path.join(FLAGS.submission_directory, team)): + # print(submission) + # experiment_path = os.path.join(FLAGS.submission_directory, team, submission) + # df = scoring_utils.get_experiment_df(experiment_path) + # results[submission] = df + # summary_df = get_submission_summary(df) + # with open(os.path.join(FLAGS.output_dir, f'{submission}_summary.csv'), + # 'w') as fout: + # summary_df.to_csv(fout) + + # # Save results + # with open(os.path.join(FLAGS.output_dir, 'results.pkl'), 'wb') as f: + # pickle.dump(results, f) + # Read results with open(os.path.join(FLAGS.output_dir, 'results.pkl'), 'rb') as f: results = pickle.load(f) From ea586fe2f2882b55d2d269905da70ed3f83b9d9b Mon Sep 17 00:00:00 2001 From: Priya Kasimbeg Date: Tue, 2 Jul 2024 18:22:01 +0000 Subject: [PATCH 3/7] sorting --- scoring/performance_profile.py | 4 +--- scoring/score_submissions.py | 7 +++---- 2 files changed, 4 insertions(+), 7 deletions(-) diff --git a/scoring/performance_profile.py b/scoring/performance_profile.py index 949ec9d45..ae5984381 100644 --- a/scoring/performance_profile.py +++ b/scoring/performance_profile.py @@ -26,6 +26,7 @@ the dictionary of submissions. """ import itertools +import logging import operator import os import re @@ -36,9 +37,6 @@ import numpy as np import pandas as pd from tabulate import tabulate -import re - -import logging from algorithmic_efficiency.workloads.workloads import get_base_workload_name import algorithmic_efficiency.workloads.workloads as workloads_registry diff --git a/scoring/score_submissions.py b/scoring/score_submissions.py index 0ca56a4a8..0e99a19c6 100644 --- a/scoring/score_submissions.py +++ b/scoring/score_submissions.py @@ -12,20 +12,19 @@ --compute_performance_profiles """ +import json import operator import os +import pickle from absl import app from absl import flags from absl import logging import numpy as np import pandas as pd +import performance_profile import scoring_utils from tabulate import tabulate -import json -import pickle - -import performance_profile flags.DEFINE_string( 'submission_directory', From 32121151412c6a27063d44b3ab22d5d61f2a4e24 Mon Sep 17 00:00:00 2001 From: Priya Kasimbeg Date: Tue, 2 Jul 2024 18:34:33 +0000 Subject: [PATCH 4/7] add flags for saving results files --- scoring/score_submissions.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/scoring/score_submissions.py b/scoring/score_submissions.py index 0e99a19c6..4c7f91eaa 100644 --- a/scoring/score_submissions.py +++ b/scoring/score_submissions.py @@ -46,6 +46,16 @@ 'self_tuning_ruleset', False, 'Whether to score on self-tuning ruleset or externally tuned ruleset') +flags.DEFINE_string( + 'save_results_to_filename', + None, + 'Filename to save the processed results that are fed into the performance profile functions' +) +flags.DEFINE_boolean( + 'load_results_from_filename', + None, + 'Filename to load processed results from that are fed into performance profile functions' +) FLAGS = flags.FLAGS From ffc1ee512e1bcfabc1a3965e7e9fd2b063053e15 Mon Sep 17 00:00:00 2001 From: Priya Kasimbeg Date: Tue, 2 Jul 2024 18:42:22 +0000 Subject: [PATCH 5/7] remove debugging statements --- scoring/performance_profile.py | 17 ----------------- 1 file changed, 17 deletions(-) diff --git a/scoring/performance_profile.py b/scoring/performance_profile.py index ae5984381..31106f057 100644 --- a/scoring/performance_profile.py +++ b/scoring/performance_profile.py @@ -236,8 +236,6 @@ def get_workloads_time_to_target(submission, df = pd.DataFrame.from_records(workloads) df = df.pivot(index='submission', columns='workload', values=time_col) - logging.info("HELLOOOOOOOOO") - print_dataframe(df) return df @@ -306,16 +304,10 @@ def compute_performance_profiles(submissions, strict)) df = pd.concat(dfs) - logging.info("TIME TO TARGET") - print_dataframe(df) - # Set score to inf if not within 4x of fastest submission best_scores = df.min(axis=0) df[df.apply(lambda x: x > 4 * best_scores, axis=1)] = np.inf - logging.info("4X of budget") - print_dataframe(df) - # For each held-out workload if variant target was not hit set submission to inf framework = None for workload in df.keys(): @@ -325,9 +317,6 @@ def compute_performance_profiles(submissions, df[base_workload] = df.apply( variant_criteria_filter(base_workload, workload), axis=1) - logging.info("HELDOUT_WORKLOAD FILTER") - print_dataframe(df) - df = df[BASE_WORKLOADS] if verbosity > 0: @@ -356,17 +345,11 @@ def compute_performance_profiles(submissions, 1000): logging.info(df) - logging.info('DIVIDE BY FASTEST') - print_dataframe(df) - # If no max_tau is supplied, choose the value of tau that would plot all non # inf or nan data. if max_tau is None: max_tau = df.replace(float('inf'), -1).replace(np.nan, -1).values.max() - logging.info('AFTER MAYBE SETTING MAX TAU') - print_dataframe(df) - if scale == 'linear': points = np.linspace(min_tau, max_tau, num=num_points) elif scale == 'log': From 90c465220195e567e2ab4a7f8f192cc02259ca33 Mon Sep 17 00:00:00 2001 From: Priya Kasimbeg Date: Tue, 2 Jul 2024 18:43:42 +0000 Subject: [PATCH 6/7] add flags --- scoring/score_submissions.py | 52 ++++++++++++++++++------------------ 1 file changed, 26 insertions(+), 26 deletions(-) diff --git a/scoring/score_submissions.py b/scoring/score_submissions.py index 4c7f91eaa..6bf51d72e 100644 --- a/scoring/score_submissions.py +++ b/scoring/score_submissions.py @@ -47,15 +47,13 @@ False, 'Whether to score on self-tuning ruleset or externally tuned ruleset') flags.DEFINE_string( - 'save_results_to_filename', - None, - 'Filename to save the processed results that are fed into the performance profile functions' -) + 'save_results_to_filename', + None, + 'Filename to save the processed results that are fed into the performance profile functions.') flags.DEFINE_boolean( - 'load_results_from_filename', - None, - 'Filename to load processed results from that are fed into performance profile functions' -) + 'load_results_from_filename', + None, + 'Filename to load processed results from that are fed into performance profile functions') FLAGS = flags.FLAGS @@ -133,24 +131,26 @@ def main(_): results = {} os.makedirs(FLAGS.output_dir, exist_ok=True) - # for team in os.listdir(FLAGS.submission_directory): - # for submission in os.listdir(os.path.join(FLAGS.submission_directory, team)): - # print(submission) - # experiment_path = os.path.join(FLAGS.submission_directory, team, submission) - # df = scoring_utils.get_experiment_df(experiment_path) - # results[submission] = df - # summary_df = get_submission_summary(df) - # with open(os.path.join(FLAGS.output_dir, f'{submission}_summary.csv'), - # 'w') as fout: - # summary_df.to_csv(fout) - - # # Save results - # with open(os.path.join(FLAGS.output_dir, 'results.pkl'), 'wb') as f: - # pickle.dump(results, f) - - # Read results - with open(os.path.join(FLAGS.output_dir, 'results.pkl'), 'rb') as f: - results = pickle.load(f) + # Optionally read results to filename + if FLAGS.load_results_from_filename: + with open(os.path.join(FLAGS.output_dir, FLAGS.load_results_from_filename), 'rb') as f: + results = pickle.load(f) + else: + for team in os.listdir(FLAGS.submission_directory): + for submission in os.listdir(os.path.join(FLAGS.submission_directory, team)): + print(submission) + experiment_path = os.path.join(FLAGS.submission_directory, team, submission) + df = scoring_utils.get_experiment_df(experiment_path) + results[submission] = df + summary_df = get_submission_summary(df) + with open(os.path.join(FLAGS.output_dir, f'{submission}_summary.csv'), + 'w') as fout: + summary_df.to_csv(fout) + + # Optionally save results to filename + if FLAGS.save_results_to_filename: + with open(os.path.join(FLAGS.output_dir, FLAGS.save_results_to_filename), 'wb') as f: + pickle.dump(results, f) if not FLAGS.strict: logging.warning( From be6560ebf5d3bc0754a614df10f0599215ea9a95 Mon Sep 17 00:00:00 2001 From: Priya Kasimbeg Date: Tue, 2 Jul 2024 18:46:09 +0000 Subject: [PATCH 7/7] formatting --- scoring/score_submissions.py | 46 ++++++++++++++++++++++-------------- 1 file changed, 28 insertions(+), 18 deletions(-) diff --git a/scoring/score_submissions.py b/scoring/score_submissions.py index 6bf51d72e..97264748f 100644 --- a/scoring/score_submissions.py +++ b/scoring/score_submissions.py @@ -47,13 +47,15 @@ False, 'Whether to score on self-tuning ruleset or externally tuned ruleset') flags.DEFINE_string( - 'save_results_to_filename', - None, - 'Filename to save the processed results that are fed into the performance profile functions.') + 'save_results_to_filename', + None, + 'Filename to save the processed results that are fed into the performance profile functions.' +) flags.DEFINE_boolean( - 'load_results_from_filename', - None, - 'Filename to load processed results from that are fed into performance profile functions') + 'load_results_from_filename', + None, + 'Filename to load processed results from that are fed into performance profile functions' +) FLAGS = flags.FLAGS @@ -131,25 +133,33 @@ def main(_): results = {} os.makedirs(FLAGS.output_dir, exist_ok=True) - # Optionally read results to filename + # Optionally read results to filename if FLAGS.load_results_from_filename: - with open(os.path.join(FLAGS.output_dir, FLAGS.load_results_from_filename), 'rb') as f: + with open( + os.path.join(FLAGS.output_dir, FLAGS.load_results_from_filename), + 'rb') as f: results = pickle.load(f) else: for team in os.listdir(FLAGS.submission_directory): - for submission in os.listdir(os.path.join(FLAGS.submission_directory, team)): - print(submission) - experiment_path = os.path.join(FLAGS.submission_directory, team, submission) - df = scoring_utils.get_experiment_df(experiment_path) - results[submission] = df - summary_df = get_submission_summary(df) - with open(os.path.join(FLAGS.output_dir, f'{submission}_summary.csv'), - 'w') as fout: - summary_df.to_csv(fout) + for submission in os.listdir( + os.path.join(FLAGS.submission_directory, team)): + print(submission) + experiment_path = os.path.join(FLAGS.submission_directory, + team, + submission) + df = scoring_utils.get_experiment_df(experiment_path) + results[submission] = df + summary_df = get_submission_summary(df) + with open( + os.path.join(FLAGS.output_dir, f'{submission}_summary.csv'), + 'w') as fout: + summary_df.to_csv(fout) # Optionally save results to filename if FLAGS.save_results_to_filename: - with open(os.path.join(FLAGS.output_dir, FLAGS.save_results_to_filename), 'wb') as f: + with open( + os.path.join(FLAGS.output_dir, FLAGS.save_results_to_filename), + 'wb') as f: pickle.dump(results, f) if not FLAGS.strict: