Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Scoring: Quality of Life improvements #776

Merged
merged 5 commits into from
Jul 30, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@
> [!IMPORTANT]
> Submitters are no longer required to self-report results.
> We are currently in the process of evaluating and scoring received submissions.
> We are aiming to release results by July 15th 2024.
> Results coming soon!
> For other key dates please see [Call for Submissions](CALL_FOR_SUBMISSIONS.md).

## Table of Contents <!-- omit from toc -->
Expand Down
22 changes: 12 additions & 10 deletions scoring/performance_profile.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,6 @@
the dictionary of submissions.
"""
import itertools
import logging
import operator
import os
import re
Expand Down Expand Up @@ -184,10 +183,10 @@ def get_workloads_time_to_target(submission,
if strict:
raise ValueError(
f'Expecting {NUM_BASE_WORKLOADS + NUM_VARIANT_WORKLOADS} workloads '
f'but found {num_workloads} workloads.')
f'but found {num_workloads} workloads for {submission_name}.')
logging.warning(
f'Expecting {NUM_BASE_WORKLOADS + NUM_VARIANT_WORKLOADS} workloads '
f'but found {num_workloads} workloads.')
f'but found {num_workloads} workloads for {submission_name}.')

# For each workload get submission time get the submission times to target.
for workload, group in submission.groupby('workload'):
Expand All @@ -198,11 +197,13 @@ def get_workloads_time_to_target(submission,
num_studies = len(group.groupby('study'))
if num_studies != NUM_STUDIES:
if strict:
raise ValueError(f'Expecting {NUM_STUDIES} trials for workload '
f'{workload} but found {num_studies} trials.')
raise ValueError(f'Expecting {NUM_STUDIES} studies for workload '
f'{workload} but found {num_studies} studies '
f'for {submission_name}.')
else:
logging.warning(f'Expecting {NUM_STUDIES} trials for workload '
f'{workload} but found {num_studies} trials.')
logging.warning(f'Expecting {NUM_STUDIES} studies for workload '
f'{workload} but found {num_studies} studies '
f'for {submission_name}.')

# For each study check trials
for study, group in group.groupby('study'):
Expand All @@ -213,11 +214,13 @@ def get_workloads_time_to_target(submission,
if strict:
raise ValueError(
f'In Study {study}: Expecting {NUM_TRIALS} trials for workload '
f'{workload} but found {num_trials} trials.')
f'{workload} but found {num_trials} trials '
f'for {submission_name}.')
else:
logging.warning(
f'In Study {study}: Expecting {NUM_TRIALS} trials for workload '
f'{workload} but found {num_trials} trials.')
f'{workload} but found {num_trials} trials '
f'for {submission_name}.')

# Get trial and time index that reaches target
trial_idx, time_idx = get_best_trial_index(
Expand Down Expand Up @@ -309,7 +312,6 @@ def compute_performance_profiles(submissions,
df[df.apply(lambda x: x > 4 * best_scores, axis=1)] = np.inf

# For each held-out workload if variant target was not hit set submission to inf
framework = None
for workload in df.keys():
if workload not in BASE_WORKLOADS:
# If variants do not have finite score set base_workload score to inf
Expand Down
3 changes: 1 addition & 2 deletions scoring/score_submissions.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@
--compute_performance_profiles
"""

import json
import operator
import os
import pickle
Expand Down Expand Up @@ -136,7 +135,7 @@ def get_submission_summary(df, include_test_split=True):
return df


def compute_leaderboard_score(df, normalize=False):
def compute_leaderboard_score(df, normalize=True):
"""Compute leaderboard score by taking integral of performance profile.

Args:
Expand Down
Loading