From a3e513e5bc7bdac1489792afb08ea54c4ebd13a4 Mon Sep 17 00:00:00 2001
From: Priya Kasimbeg <kasimbeg@google.com>
Date: Tue, 2 Jul 2024 17:53:47 +0000
Subject: [PATCH 1/7] cosmetic and functional fixes

---
 scoring/performance_profile.py | 79 ++++++++++++++++++++++++++--------
 scoring/score_submissions.py   | 40 ++++++++++++-----
 2 files changed, 89 insertions(+), 30 deletions(-)

diff --git a/scoring/performance_profile.py b/scoring/performance_profile.py
index 8ee271804..f209c8610 100644
--- a/scoring/performance_profile.py
+++ b/scoring/performance_profile.py
@@ -31,9 +31,14 @@
 import re
 
 from absl import logging
+import matplotlib as mpl
 import matplotlib.pyplot as plt
 import numpy as np
 import pandas as pd
+from tabulate import tabulate
+import re
+
+import logging
 
 from algorithmic_efficiency.workloads.workloads import get_base_workload_name
 import algorithmic_efficiency.workloads.workloads as workloads_registry
@@ -63,6 +68,30 @@
 
 MAX_EVAL_METRICS = ['mean_average_precision', 'ssim', 'accuracy', 'bleu']
 
+#MPL params
+mpl.rcParams['figure.figsize'] = (16, 10)  # Width, height in inches
+mpl.rcParams['font.family'] = 'serif'      
+mpl.rcParams['font.serif'] = ['Times New Roman'] + mpl.rcParams['font.serif']  # Add Times New Roman as first choice
+mpl.rcParams['font.size'] = 22
+mpl.rcParams['savefig.dpi'] = 300  # Set resolution for saved figures
+
+# Plot Elements
+mpl.rcParams['lines.linewidth'] = 3     # Adjust line thickness if needed
+mpl.rcParams['lines.markersize'] = 6      # Adjust marker size if needed
+mpl.rcParams['axes.prop_cycle'] = mpl.cycler(color=["#1f77b4", "#ff7f0e", "#2ca02c", "#d62728", "#9467bd"])  # Example color cycle (consider ColorBrewer or viridis)
+mpl.rcParams['axes.labelsize'] = 22       # Axis label font size
+mpl.rcParams['xtick.labelsize'] = 20       # Tick label font size
+mpl.rcParams['ytick.labelsize'] = 20
+
+# Legends and Gridlines
+mpl.rcParams['legend.fontsize'] = 20       # Legend font size
+mpl.rcParams['legend.loc'] = 'best'       # Let matplotlib decide the best legend location
+mpl.rcParams['axes.grid'] = True          # Enable grid
+mpl.rcParams['grid.alpha'] = 0.4          # Gridline transparency
+
+def print_dataframe(df):
+      tabulated_df = tabulate(df.T, headers='keys', tablefmt='psql')
+      logging.info(tabulated_df)
 
 def generate_eval_cols(metrics):
   splits = ['train', 'validation']
@@ -177,10 +206,10 @@ def get_workloads_time_to_target(submission,
       num_trials = len(group)
       if num_trials != NUM_TRIALS and not self_tuning_ruleset:
         if strict:
-          raise ValueError(f'Expecting {NUM_TRIALS} trials for workload '
+          raise ValueError(f'In Study {study}: Expecting {NUM_TRIALS} trials for workload '
                            f'{workload} but found {num_trials} trials.')
         else:
-          logging.warning(f'Expecting {NUM_TRIALS} trials for workload '
+          logging.warning(f'In Study {study}: Expecting {NUM_TRIALS} trials for workload '
                           f'{workload} but found {num_trials} trials.')
 
       # Get trial and time index that reaches target
@@ -194,13 +223,14 @@ def get_workloads_time_to_target(submission,
 
     workloads.append({
         'submission': submission_name,
-        'workload': workload,
+        'workload': re.sub(r'_(jax|pytorch)$', '', workload),
         time_col: np.median(time_vals_per_study),
     })
 
   df = pd.DataFrame.from_records(workloads)
   df = df.pivot(index='submission', columns='workload', values=time_col)
-
+  logging.info("HELLOOOOOOOOO")
+  print_dataframe(df)
   return df
 
 
@@ -269,26 +299,30 @@ def compute_performance_profiles(submissions,
                                      strict))
   df = pd.concat(dfs)
 
+  logging.info("TIME TO TARGET")
+  print_dataframe(df)
+
   # Set score to inf if not within 4x of fastest submission
   best_scores = df.min(axis=0)
   df[df.apply(lambda x: x > 4 * best_scores, axis=1)] = np.inf
 
+  logging.info("4X of budget")
+  print_dataframe(df)
+
   # For each held-out workload if variant target was not hit set submission to inf
   framework = None
   for workload in df.keys():
-    # Check if this is a variant
-    framework = workload.split('_')[-1]
-    workload_ = workload.split(f'_{framework}')[0]
-    if workload_ not in BASE_WORKLOADS:
+    if workload not in BASE_WORKLOADS:
       # If variants do not have finite score set base_workload score to inf
-      base_workload = get_base_workload_name(workload_)
+      base_workload = get_base_workload_name(workload)
       df[base_workload] = df.apply(
-          variant_criteria_filter(base_workload + f'_{framework}', workload),
+          variant_criteria_filter(base_workload, workload),
           axis=1)
+    
+  logging.info("HELDOUT_WORKLOAD FILTER")
+  print_dataframe(df)
 
-  base_workloads = [w + f'_{framework}' for w in BASE_WORKLOADS]
-  df = df[base_workloads]
-  print(df)
+  df = df[BASE_WORKLOADS]
 
   if verbosity > 0:
     logging.info('\n`{time_col}` to reach target:')
@@ -316,11 +350,17 @@ def compute_performance_profiles(submissions,
                            1000):
       logging.info(df)
 
+  logging.info('DIVIDE BY FASTEST')
+  print_dataframe(df)
+
   # If no max_tau is supplied, choose the value of tau that would plot all non
   # inf or nan data.
   if max_tau is None:
     max_tau = df.replace(float('inf'), -1).replace(np.nan, -1).values.max()
 
+  logging.info('AFTER MAYBE SETTING MAX TAU')
+  print_dataframe(df)
+
   if scale == 'linear':
     points = np.linspace(min_tau, max_tau, num=num_points)
   elif scale == 'log':
@@ -375,8 +415,8 @@ def plot_performance_profiles(perf_df,
                               df_col,
                               scale='linear',
                               save_dir=None,
-                              figsize=(30, 10),
-                              font_size=18):
+                              figsize=(30, 10)
+                              ):
   """Plot performance profiles.
 
   Args:
@@ -396,12 +436,13 @@ def plot_performance_profiles(perf_df,
   Returns:
     None. If a valid save_dir is provided, save both the plot and perf_df.
   """
-  fig = perf_df.T.plot(figsize=figsize)
+  fig = perf_df.T.plot(figsize=figsize, alpha=0.7)
   df_col_display = f'log10({df_col})' if scale == 'log' else df_col
   fig.set_xlabel(
-      f'Ratio of `{df_col_display}` to best submission', size=font_size)
-  fig.set_ylabel('Proportion of workloads', size=font_size)
-  fig.legend(prop={'size': font_size}, bbox_to_anchor=(1.0, 1.0))
+      f'Ratio of `{df_col_display}` to best submission')
+  fig.set_ylabel('Proportion of workloads')
+  fig.legend(bbox_to_anchor=(1.0, 1.0))
+  plt.tight_layout()
   maybe_save_figure(save_dir, f'performance_profile_by_{df_col_display}')
   maybe_save_df_to_csv(save_dir,
                        perf_df,
diff --git a/scoring/score_submissions.py b/scoring/score_submissions.py
index 0b768855e..4fd5b5b8e 100644
--- a/scoring/score_submissions.py
+++ b/scoring/score_submissions.py
@@ -22,8 +22,10 @@
 import pandas as pd
 import scoring_utils
 from tabulate import tabulate
+import json
+import pickle
 
-from scoring import performance_profile
+import performance_profile
 
 flags.DEFINE_string(
     'submission_directory',
@@ -101,8 +103,13 @@ def get_summary_df(workload, workload_df, include_test_split=False):
   return summary_df
 
 
-def print_submission_summary(df, include_test_split=True):
+def get_submission_summary(df, include_test_split=True):
+  """Summarizes the submission results into metric and time tables
+  organized by workload.
+  """
+
   dfs = []
+  print(df)
   for workload, group in df.groupby('workload'):
     summary_df = get_summary_df(
         workload, group, include_test_split=include_test_split)
@@ -115,15 +122,26 @@ def print_submission_summary(df, include_test_split=True):
 
 def main(_):
   results = {}
-
-  for submission in os.listdir(FLAGS.submission_directory):
-    experiment_path = os.path.join(FLAGS.submission_directory, submission)
-    df = scoring_utils.get_experiment_df(experiment_path)
-    results[submission] = df
-    summary_df = print_submission_summary(df)
-    with open(os.path.join(FLAGS.output_dir, f'{submission}_summary.csv'),
-              'w') as fout:
-      summary_df.to_csv(fout)
+  os.makedirs(FLAGS.output_dir, exist_ok=True)
+
+#   for team in os.listdir(FLAGS.submission_directory):
+#     for submission in os.listdir(os.path.join(FLAGS.submission_directory, team)):
+#         print(submission)
+#         experiment_path = os.path.join(FLAGS.submission_directory, team, submission)
+#         df = scoring_utils.get_experiment_df(experiment_path)
+#         results[submission] = df
+#         summary_df = get_submission_summary(df)
+#         with open(os.path.join(FLAGS.output_dir, f'{submission}_summary.csv'),
+#                 'w') as fout:
+#             summary_df.to_csv(fout)
+  
+#   # Save results
+#   with open(os.path.join(FLAGS.output_dir, 'results.pkl'), 'wb') as f:
+#     pickle.dump(results, f)
+ 
+  # Read results
+  with open(os.path.join(FLAGS.output_dir, 'results.pkl'), 'rb') as f:
+    results = pickle.load(f)
 
   if not FLAGS.strict:
     logging.warning(

From e125201e7452f6f659eec8c2738759e34cd26a68 Mon Sep 17 00:00:00 2001
From: Priya Kasimbeg <kasimbeg@google.com>
Date: Tue, 2 Jul 2024 17:58:08 +0000
Subject: [PATCH 2/7] formatting

---
 scoring/performance_profile.py | 54 +++++++++++++++++++---------------
 scoring/score_submissions.py   | 30 +++++++++----------
 2 files changed, 45 insertions(+), 39 deletions(-)

diff --git a/scoring/performance_profile.py b/scoring/performance_profile.py
index f209c8610..949ec9d45 100644
--- a/scoring/performance_profile.py
+++ b/scoring/performance_profile.py
@@ -70,28 +70,35 @@
 
 #MPL params
 mpl.rcParams['figure.figsize'] = (16, 10)  # Width, height in inches
-mpl.rcParams['font.family'] = 'serif'      
-mpl.rcParams['font.serif'] = ['Times New Roman'] + mpl.rcParams['font.serif']  # Add Times New Roman as first choice
+mpl.rcParams['font.family'] = 'serif'
+mpl.rcParams['font.serif'] = [
+    'Times New Roman'
+] + mpl.rcParams['font.serif']  # Add Times New Roman as first choice
 mpl.rcParams['font.size'] = 22
 mpl.rcParams['savefig.dpi'] = 300  # Set resolution for saved figures
 
 # Plot Elements
-mpl.rcParams['lines.linewidth'] = 3     # Adjust line thickness if needed
-mpl.rcParams['lines.markersize'] = 6      # Adjust marker size if needed
-mpl.rcParams['axes.prop_cycle'] = mpl.cycler(color=["#1f77b4", "#ff7f0e", "#2ca02c", "#d62728", "#9467bd"])  # Example color cycle (consider ColorBrewer or viridis)
-mpl.rcParams['axes.labelsize'] = 22       # Axis label font size
-mpl.rcParams['xtick.labelsize'] = 20       # Tick label font size
+mpl.rcParams['lines.linewidth'] = 3  # Adjust line thickness if needed
+mpl.rcParams['lines.markersize'] = 6  # Adjust marker size if needed
+mpl.rcParams['axes.prop_cycle'] = mpl.cycler(
+    color=["#1f77b4", "#ff7f0e", "#2ca02c", "#d62728",
+           "#9467bd"])  # Example color cycle (consider ColorBrewer or viridis)
+mpl.rcParams['axes.labelsize'] = 22  # Axis label font size
+mpl.rcParams['xtick.labelsize'] = 20  # Tick label font size
 mpl.rcParams['ytick.labelsize'] = 20
 
 # Legends and Gridlines
-mpl.rcParams['legend.fontsize'] = 20       # Legend font size
-mpl.rcParams['legend.loc'] = 'best'       # Let matplotlib decide the best legend location
-mpl.rcParams['axes.grid'] = True          # Enable grid
-mpl.rcParams['grid.alpha'] = 0.4          # Gridline transparency
+mpl.rcParams['legend.fontsize'] = 20  # Legend font size
+mpl.rcParams[
+    'legend.loc'] = 'best'  # Let matplotlib decide the best legend location
+mpl.rcParams['axes.grid'] = True  # Enable grid
+mpl.rcParams['grid.alpha'] = 0.4  # Gridline transparency
+
 
 def print_dataframe(df):
-      tabulated_df = tabulate(df.T, headers='keys', tablefmt='psql')
-      logging.info(tabulated_df)
+  tabulated_df = tabulate(df.T, headers='keys', tablefmt='psql')
+  logging.info(tabulated_df)
+
 
 def generate_eval_cols(metrics):
   splits = ['train', 'validation']
@@ -206,11 +213,13 @@ def get_workloads_time_to_target(submission,
       num_trials = len(group)
       if num_trials != NUM_TRIALS and not self_tuning_ruleset:
         if strict:
-          raise ValueError(f'In Study {study}: Expecting {NUM_TRIALS} trials for workload '
-                           f'{workload} but found {num_trials} trials.')
+          raise ValueError(
+              f'In Study {study}: Expecting {NUM_TRIALS} trials for workload '
+              f'{workload} but found {num_trials} trials.')
         else:
-          logging.warning(f'In Study {study}: Expecting {NUM_TRIALS} trials for workload '
-                          f'{workload} but found {num_trials} trials.')
+          logging.warning(
+              f'In Study {study}: Expecting {NUM_TRIALS} trials for workload '
+              f'{workload} but found {num_trials} trials.')
 
       # Get trial and time index that reaches target
       trial_idx, time_idx = get_best_trial_index(
@@ -316,9 +325,8 @@ def compute_performance_profiles(submissions,
       # If variants do not have finite score set base_workload score to inf
       base_workload = get_base_workload_name(workload)
       df[base_workload] = df.apply(
-          variant_criteria_filter(base_workload, workload),
-          axis=1)
-    
+          variant_criteria_filter(base_workload, workload), axis=1)
+
   logging.info("HELDOUT_WORKLOAD FILTER")
   print_dataframe(df)
 
@@ -415,8 +423,7 @@ def plot_performance_profiles(perf_df,
                               df_col,
                               scale='linear',
                               save_dir=None,
-                              figsize=(30, 10)
-                              ):
+                              figsize=(30, 10)):
   """Plot performance profiles.
 
   Args:
@@ -438,8 +445,7 @@ def plot_performance_profiles(perf_df,
   """
   fig = perf_df.T.plot(figsize=figsize, alpha=0.7)
   df_col_display = f'log10({df_col})' if scale == 'log' else df_col
-  fig.set_xlabel(
-      f'Ratio of `{df_col_display}` to best submission')
+  fig.set_xlabel(f'Ratio of `{df_col_display}` to best submission')
   fig.set_ylabel('Proportion of workloads')
   fig.legend(bbox_to_anchor=(1.0, 1.0))
   plt.tight_layout()
diff --git a/scoring/score_submissions.py b/scoring/score_submissions.py
index 4fd5b5b8e..0ca56a4a8 100644
--- a/scoring/score_submissions.py
+++ b/scoring/score_submissions.py
@@ -124,21 +124,21 @@ def main(_):
   results = {}
   os.makedirs(FLAGS.output_dir, exist_ok=True)
 
-#   for team in os.listdir(FLAGS.submission_directory):
-#     for submission in os.listdir(os.path.join(FLAGS.submission_directory, team)):
-#         print(submission)
-#         experiment_path = os.path.join(FLAGS.submission_directory, team, submission)
-#         df = scoring_utils.get_experiment_df(experiment_path)
-#         results[submission] = df
-#         summary_df = get_submission_summary(df)
-#         with open(os.path.join(FLAGS.output_dir, f'{submission}_summary.csv'),
-#                 'w') as fout:
-#             summary_df.to_csv(fout)
-  
-#   # Save results
-#   with open(os.path.join(FLAGS.output_dir, 'results.pkl'), 'wb') as f:
-#     pickle.dump(results, f)
- 
+  #   for team in os.listdir(FLAGS.submission_directory):
+  #     for submission in os.listdir(os.path.join(FLAGS.submission_directory, team)):
+  #         print(submission)
+  #         experiment_path = os.path.join(FLAGS.submission_directory, team, submission)
+  #         df = scoring_utils.get_experiment_df(experiment_path)
+  #         results[submission] = df
+  #         summary_df = get_submission_summary(df)
+  #         with open(os.path.join(FLAGS.output_dir, f'{submission}_summary.csv'),
+  #                 'w') as fout:
+  #             summary_df.to_csv(fout)
+
+  #   # Save results
+  #   with open(os.path.join(FLAGS.output_dir, 'results.pkl'), 'wb') as f:
+  #     pickle.dump(results, f)
+
   # Read results
   with open(os.path.join(FLAGS.output_dir, 'results.pkl'), 'rb') as f:
     results = pickle.load(f)

From ea586fe2f2882b55d2d269905da70ed3f83b9d9b Mon Sep 17 00:00:00 2001
From: Priya Kasimbeg <kasimbeg@google.com>
Date: Tue, 2 Jul 2024 18:22:01 +0000
Subject: [PATCH 3/7] sorting

---
 scoring/performance_profile.py | 4 +---
 scoring/score_submissions.py   | 7 +++----
 2 files changed, 4 insertions(+), 7 deletions(-)

diff --git a/scoring/performance_profile.py b/scoring/performance_profile.py
index 949ec9d45..ae5984381 100644
--- a/scoring/performance_profile.py
+++ b/scoring/performance_profile.py
@@ -26,6 +26,7 @@
   the dictionary of submissions.
 """
 import itertools
+import logging
 import operator
 import os
 import re
@@ -36,9 +37,6 @@
 import numpy as np
 import pandas as pd
 from tabulate import tabulate
-import re
-
-import logging
 
 from algorithmic_efficiency.workloads.workloads import get_base_workload_name
 import algorithmic_efficiency.workloads.workloads as workloads_registry
diff --git a/scoring/score_submissions.py b/scoring/score_submissions.py
index 0ca56a4a8..0e99a19c6 100644
--- a/scoring/score_submissions.py
+++ b/scoring/score_submissions.py
@@ -12,20 +12,19 @@
   --compute_performance_profiles
 """
 
+import json
 import operator
 import os
+import pickle
 
 from absl import app
 from absl import flags
 from absl import logging
 import numpy as np
 import pandas as pd
+import performance_profile
 import scoring_utils
 from tabulate import tabulate
-import json
-import pickle
-
-import performance_profile
 
 flags.DEFINE_string(
     'submission_directory',

From 32121151412c6a27063d44b3ab22d5d61f2a4e24 Mon Sep 17 00:00:00 2001
From: Priya Kasimbeg <kasimbeg@google.com>
Date: Tue, 2 Jul 2024 18:34:33 +0000
Subject: [PATCH 4/7] add flags for saving results files

---
 scoring/score_submissions.py | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/scoring/score_submissions.py b/scoring/score_submissions.py
index 0e99a19c6..4c7f91eaa 100644
--- a/scoring/score_submissions.py
+++ b/scoring/score_submissions.py
@@ -46,6 +46,16 @@
     'self_tuning_ruleset',
     False,
     'Whether to score on self-tuning ruleset or externally tuned ruleset')
+flags.DEFINE_string(
+    'save_results_to_filename',
+    None,
+    'Filename to save the processed results that are fed into the performance profile functions'
+)
+flags.DEFINE_boolean(
+    'load_results_from_filename',
+    None,
+    'Filename to load processed results from that are fed into performance profile functions'
+)
 FLAGS = flags.FLAGS
 
 

From ffc1ee512e1bcfabc1a3965e7e9fd2b063053e15 Mon Sep 17 00:00:00 2001
From: Priya Kasimbeg <kasimbeg@google.com>
Date: Tue, 2 Jul 2024 18:42:22 +0000
Subject: [PATCH 5/7] remove debugging statements

---
 scoring/performance_profile.py | 17 -----------------
 1 file changed, 17 deletions(-)

diff --git a/scoring/performance_profile.py b/scoring/performance_profile.py
index ae5984381..31106f057 100644
--- a/scoring/performance_profile.py
+++ b/scoring/performance_profile.py
@@ -236,8 +236,6 @@ def get_workloads_time_to_target(submission,
 
   df = pd.DataFrame.from_records(workloads)
   df = df.pivot(index='submission', columns='workload', values=time_col)
-  logging.info("HELLOOOOOOOOO")
-  print_dataframe(df)
   return df
 
 
@@ -306,16 +304,10 @@ def compute_performance_profiles(submissions,
                                      strict))
   df = pd.concat(dfs)
 
-  logging.info("TIME TO TARGET")
-  print_dataframe(df)
-
   # Set score to inf if not within 4x of fastest submission
   best_scores = df.min(axis=0)
   df[df.apply(lambda x: x > 4 * best_scores, axis=1)] = np.inf
 
-  logging.info("4X of budget")
-  print_dataframe(df)
-
   # For each held-out workload if variant target was not hit set submission to inf
   framework = None
   for workload in df.keys():
@@ -325,9 +317,6 @@ def compute_performance_profiles(submissions,
       df[base_workload] = df.apply(
           variant_criteria_filter(base_workload, workload), axis=1)
 
-  logging.info("HELDOUT_WORKLOAD FILTER")
-  print_dataframe(df)
-
   df = df[BASE_WORKLOADS]
 
   if verbosity > 0:
@@ -356,17 +345,11 @@ def compute_performance_profiles(submissions,
                            1000):
       logging.info(df)
 
-  logging.info('DIVIDE BY FASTEST')
-  print_dataframe(df)
-
   # If no max_tau is supplied, choose the value of tau that would plot all non
   # inf or nan data.
   if max_tau is None:
     max_tau = df.replace(float('inf'), -1).replace(np.nan, -1).values.max()
 
-  logging.info('AFTER MAYBE SETTING MAX TAU')
-  print_dataframe(df)
-
   if scale == 'linear':
     points = np.linspace(min_tau, max_tau, num=num_points)
   elif scale == 'log':

From 90c465220195e567e2ab4a7f8f192cc02259ca33 Mon Sep 17 00:00:00 2001
From: Priya Kasimbeg <kasimbeg@google.com>
Date: Tue, 2 Jul 2024 18:43:42 +0000
Subject: [PATCH 6/7] add flags

---
 scoring/score_submissions.py | 52 ++++++++++++++++++------------------
 1 file changed, 26 insertions(+), 26 deletions(-)

diff --git a/scoring/score_submissions.py b/scoring/score_submissions.py
index 4c7f91eaa..6bf51d72e 100644
--- a/scoring/score_submissions.py
+++ b/scoring/score_submissions.py
@@ -47,15 +47,13 @@
     False,
     'Whether to score on self-tuning ruleset or externally tuned ruleset')
 flags.DEFINE_string(
-    'save_results_to_filename',
-    None,
-    'Filename to save the processed results that are fed into the performance profile functions'
-)
+  'save_results_to_filename',
+   None,
+  'Filename to save the processed results that are fed into the performance profile functions.')
 flags.DEFINE_boolean(
-    'load_results_from_filename',
-    None,
-    'Filename to load processed results from that are fed into performance profile functions'
-)
+  'load_results_from_filename',
+  None,
+  'Filename to load processed results from that are fed into performance profile functions')
 FLAGS = flags.FLAGS
 
 
@@ -133,24 +131,26 @@ def main(_):
   results = {}
   os.makedirs(FLAGS.output_dir, exist_ok=True)
 
-  #   for team in os.listdir(FLAGS.submission_directory):
-  #     for submission in os.listdir(os.path.join(FLAGS.submission_directory, team)):
-  #         print(submission)
-  #         experiment_path = os.path.join(FLAGS.submission_directory, team, submission)
-  #         df = scoring_utils.get_experiment_df(experiment_path)
-  #         results[submission] = df
-  #         summary_df = get_submission_summary(df)
-  #         with open(os.path.join(FLAGS.output_dir, f'{submission}_summary.csv'),
-  #                 'w') as fout:
-  #             summary_df.to_csv(fout)
-
-  #   # Save results
-  #   with open(os.path.join(FLAGS.output_dir, 'results.pkl'), 'wb') as f:
-  #     pickle.dump(results, f)
-
-  # Read results
-  with open(os.path.join(FLAGS.output_dir, 'results.pkl'), 'rb') as f:
-    results = pickle.load(f)
+    # Optionally read results to filename 
+  if FLAGS.load_results_from_filename:
+    with open(os.path.join(FLAGS.output_dir, FLAGS.load_results_from_filename), 'rb') as f:
+      results = pickle.load(f)
+  else:
+    for team in os.listdir(FLAGS.submission_directory):
+      for submission in os.listdir(os.path.join(FLAGS.submission_directory, team)):
+          print(submission)
+          experiment_path = os.path.join(FLAGS.submission_directory, team, submission)
+          df = scoring_utils.get_experiment_df(experiment_path)
+          results[submission] = df
+          summary_df = get_submission_summary(df)
+          with open(os.path.join(FLAGS.output_dir, f'{submission}_summary.csv'),
+                  'w') as fout:
+              summary_df.to_csv(fout)
+
+    # Optionally save results to filename
+    if FLAGS.save_results_to_filename:
+      with open(os.path.join(FLAGS.output_dir, FLAGS.save_results_to_filename), 'wb') as f:
+        pickle.dump(results, f)
 
   if not FLAGS.strict:
     logging.warning(

From be6560ebf5d3bc0754a614df10f0599215ea9a95 Mon Sep 17 00:00:00 2001
From: Priya Kasimbeg <kasimbeg@google.com>
Date: Tue, 2 Jul 2024 18:46:09 +0000
Subject: [PATCH 7/7] formatting

---
 scoring/score_submissions.py | 46 ++++++++++++++++++++++--------------
 1 file changed, 28 insertions(+), 18 deletions(-)

diff --git a/scoring/score_submissions.py b/scoring/score_submissions.py
index 6bf51d72e..97264748f 100644
--- a/scoring/score_submissions.py
+++ b/scoring/score_submissions.py
@@ -47,13 +47,15 @@
     False,
     'Whether to score on self-tuning ruleset or externally tuned ruleset')
 flags.DEFINE_string(
-  'save_results_to_filename',
-   None,
-  'Filename to save the processed results that are fed into the performance profile functions.')
+    'save_results_to_filename',
+    None,
+    'Filename to save the processed results that are fed into the performance profile functions.'
+)
 flags.DEFINE_boolean(
-  'load_results_from_filename',
-  None,
-  'Filename to load processed results from that are fed into performance profile functions')
+    'load_results_from_filename',
+    None,
+    'Filename to load processed results from that are fed into performance profile functions'
+)
 FLAGS = flags.FLAGS
 
 
@@ -131,25 +133,33 @@ def main(_):
   results = {}
   os.makedirs(FLAGS.output_dir, exist_ok=True)
 
-    # Optionally read results to filename 
+  # Optionally read results to filename
   if FLAGS.load_results_from_filename:
-    with open(os.path.join(FLAGS.output_dir, FLAGS.load_results_from_filename), 'rb') as f:
+    with open(
+        os.path.join(FLAGS.output_dir, FLAGS.load_results_from_filename),
+        'rb') as f:
       results = pickle.load(f)
   else:
     for team in os.listdir(FLAGS.submission_directory):
-      for submission in os.listdir(os.path.join(FLAGS.submission_directory, team)):
-          print(submission)
-          experiment_path = os.path.join(FLAGS.submission_directory, team, submission)
-          df = scoring_utils.get_experiment_df(experiment_path)
-          results[submission] = df
-          summary_df = get_submission_summary(df)
-          with open(os.path.join(FLAGS.output_dir, f'{submission}_summary.csv'),
-                  'w') as fout:
-              summary_df.to_csv(fout)
+      for submission in os.listdir(
+          os.path.join(FLAGS.submission_directory, team)):
+        print(submission)
+        experiment_path = os.path.join(FLAGS.submission_directory,
+                                       team,
+                                       submission)
+        df = scoring_utils.get_experiment_df(experiment_path)
+        results[submission] = df
+        summary_df = get_submission_summary(df)
+        with open(
+            os.path.join(FLAGS.output_dir, f'{submission}_summary.csv'),
+            'w') as fout:
+          summary_df.to_csv(fout)
 
     # Optionally save results to filename
     if FLAGS.save_results_to_filename:
-      with open(os.path.join(FLAGS.output_dir, FLAGS.save_results_to_filename), 'wb') as f:
+      with open(
+          os.path.join(FLAGS.output_dir, FLAGS.save_results_to_filename),
+          'wb') as f:
         pickle.dump(results, f)
 
   if not FLAGS.strict: