diff --git a/base/base/contest.py b/base/base/contest.py index 40080ba0..9979ba34 100644 --- a/base/base/contest.py +++ b/base/base/contest.py @@ -98,11 +98,11 @@ def normalize(baseline_value: float, benchmark_value: float, metric_type: Metric return (relative_improvement * self.metric_weights.get(metric_type, 0)) / total_weight score = sum([ - normalize(baseline.generation_time, benchmark.model.generation_time, MetricType.GENERATION_TIME), - normalize(baseline.size, benchmark.model.size, MetricType.SIZE), - normalize(baseline.vram_used, benchmark.model.vram_used, MetricType.VRAM_USED), - normalize(baseline.watts_used, benchmark.model.watts_used, MetricType.WATTS_USED), - normalize(baseline.load_time, benchmark.model.load_time, MetricType.LOAD_TIME) + normalize(baseline.generation_time, benchmark.metrics.generation_time, MetricType.GENERATION_TIME), + normalize(baseline.size, benchmark.metrics.size, MetricType.SIZE), + normalize(baseline.vram_used, benchmark.metrics.vram_used, MetricType.VRAM_USED), + normalize(baseline.watts_used, benchmark.metrics.watts_used, MetricType.WATTS_USED), + normalize(baseline.load_time, benchmark.metrics.load_time, MetricType.LOAD_TIME) ]) return score * similarity * self.metric_weights.get(MetricType.SIMILARITY_SCORE, 0) / total_weight diff --git a/base/testing/benchmarker.py b/base/testing/benchmarker.py index 82a6d5f3..11520c57 100644 --- a/base/testing/benchmarker.py +++ b/base/testing/benchmarker.py @@ -124,7 +124,7 @@ def benchmark_submissions(self, contest: Contest, submissions: dict[Key, Reposit average_benchmarking_time = self.get_average_benchmarking_time() if average_benchmarking_time: eta = (len(submissions) - len(self.benchmarks)) * average_benchmarking_time - logger.info(f"Average benchmark time: {average_benchmarking_time}, ETA: {timedelta(seconds=eta)}") + logger.info(f"Average benchmark time: {average_benchmarking_time:.2f}s, ETA: {timedelta(seconds=eta)}") if self._is_done(submissions): logger.info("Benchmarking complete") diff --git a/validator/weight_setting/validator.py b/validator/weight_setting/validator.py index f4515866..d636c0f9 100644 --- a/validator/weight_setting/validator.py +++ b/validator/weight_setting/validator.py @@ -185,7 +185,7 @@ def update_benchmarks(self, benchmarking_results: list[BenchmarkingResults]): if average_benchmarking_time: eta = (len(self.contest_state.submissions) - len(self.contest_state.benchmarks)) * average_benchmarking_time - logger.info(f"Average benchmark time: {average_benchmarking_time}, ETA: {timedelta(seconds=eta)}") + logger.info(f"Average benchmark time: {average_benchmarking_time:.2f}s, ETA: {timedelta(seconds=eta)}") def step(self): return self.contest_state.step if self.contest_state else 0