From 89e75c7b6d8d9e1d500ed4f511e91d28d5ddc5a4 Mon Sep 17 00:00:00 2001 From: Ashley Wright Date: Thu, 28 Nov 2024 13:02:49 -0800 Subject: [PATCH 1/2] Fix score calculation --- base/base/contest.py | 10 +++++----- base/testing/benchmarker.py | 2 +- validator/weight_setting/validator.py | 2 +- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/base/base/contest.py b/base/base/contest.py index 40080ba0..9979ba34 100644 --- a/base/base/contest.py +++ b/base/base/contest.py @@ -98,11 +98,11 @@ def normalize(baseline_value: float, benchmark_value: float, metric_type: Metric return (relative_improvement * self.metric_weights.get(metric_type, 0)) / total_weight score = sum([ - normalize(baseline.generation_time, benchmark.model.generation_time, MetricType.GENERATION_TIME), - normalize(baseline.size, benchmark.model.size, MetricType.SIZE), - normalize(baseline.vram_used, benchmark.model.vram_used, MetricType.VRAM_USED), - normalize(baseline.watts_used, benchmark.model.watts_used, MetricType.WATTS_USED), - normalize(baseline.load_time, benchmark.model.load_time, MetricType.LOAD_TIME) + normalize(baseline.generation_time, benchmark.metrics.generation_time, MetricType.GENERATION_TIME), + normalize(baseline.size, benchmark.metrics.size, MetricType.SIZE), + normalize(baseline.vram_used, benchmark.metrics.vram_used, MetricType.VRAM_USED), + normalize(baseline.watts_used, benchmark.metrics.watts_used, MetricType.WATTS_USED), + normalize(baseline.load_time, benchmark.metrics.load_time, MetricType.LOAD_TIME) ]) return score * similarity * self.metric_weights.get(MetricType.SIMILARITY_SCORE, 0) / total_weight diff --git a/base/testing/benchmarker.py b/base/testing/benchmarker.py index 82a6d5f3..11520c57 100644 --- a/base/testing/benchmarker.py +++ b/base/testing/benchmarker.py @@ -124,7 +124,7 @@ def benchmark_submissions(self, contest: Contest, submissions: dict[Key, Reposit average_benchmarking_time = self.get_average_benchmarking_time() if average_benchmarking_time: eta = (len(submissions) - len(self.benchmarks)) * average_benchmarking_time - logger.info(f"Average benchmark time: {average_benchmarking_time}, ETA: {timedelta(seconds=eta)}") + logger.info(f"Average benchmark time: {average_benchmarking_time:.2f}s, ETA: {timedelta(seconds=eta)}") if self._is_done(submissions): logger.info("Benchmarking complete") diff --git a/validator/weight_setting/validator.py b/validator/weight_setting/validator.py index f4515866..d636c0f9 100644 --- a/validator/weight_setting/validator.py +++ b/validator/weight_setting/validator.py @@ -185,7 +185,7 @@ def update_benchmarks(self, benchmarking_results: list[BenchmarkingResults]): if average_benchmarking_time: eta = (len(self.contest_state.submissions) - len(self.contest_state.benchmarks)) * average_benchmarking_time - logger.info(f"Average benchmark time: {average_benchmarking_time}, ETA: {timedelta(seconds=eta)}") + logger.info(f"Average benchmark time: {average_benchmarking_time:.2f}s, ETA: {timedelta(seconds=eta)}") def step(self): return self.contest_state.step if self.contest_state else 0 From 698d54eb611a8a63383ab887858371e024d1dce0 Mon Sep 17 00:00:00 2001 From: Ashley Wright Date: Thu, 28 Nov 2024 14:15:05 -0800 Subject: [PATCH 2/2] Fix wandb, fix trying to send to inactive contest --- base/testing/benchmarker.py | 5 +- validator/weight_setting/benchmarking_api.py | 4 +- validator/weight_setting/validator.py | 49 +++++++++++--------- 3 files changed, 33 insertions(+), 25 deletions(-) diff --git a/base/testing/benchmarker.py b/base/testing/benchmarker.py index 11520c57..7d4970de 100644 --- a/base/testing/benchmarker.py +++ b/base/testing/benchmarker.py @@ -123,8 +123,9 @@ def benchmark_submissions(self, contest: Contest, submissions: dict[Key, Reposit average_benchmarking_time = self.get_average_benchmarking_time() if average_benchmarking_time: - eta = (len(submissions) - len(self.benchmarks)) * average_benchmarking_time - logger.info(f"Average benchmark time: {average_benchmarking_time:.2f}s, ETA: {timedelta(seconds=eta)}") + submissions_left = len(submissions) - len(self.benchmarks) + eta = submissions_left * average_benchmarking_time + logger.info(f"{submissions_left}/{len(submissions)} benchmarked. Average benchmark time: {average_benchmarking_time:.2f}s, ETA: {timedelta(seconds=eta)}") if self._is_done(submissions): logger.info("Benchmarking complete") diff --git a/validator/weight_setting/benchmarking_api.py b/validator/weight_setting/benchmarking_api.py index f8554025..c91e9b3f 100644 --- a/validator/weight_setting/benchmarking_api.py +++ b/validator/weight_setting/benchmarking_api.py @@ -9,7 +9,7 @@ from substrateinterface import Keypair from base.checkpoint import Key, Uid, Submissions -from base.contest import ContestId, RepositoryInfo +from base.contest import ContestId, RepositoryInfo, ACTIVE_CONTESTS from base_validator.api_data import BenchmarkingStartRequest, ApiMetadata, BenchmarkingResults, BenchmarkingInitializeRequest logger = get_logger(__name__) @@ -105,7 +105,7 @@ def send_submissions_to_api(version: str, all_apis: list[BenchmarkingApi], submi contest_api_assignment[lowest_contest_id].append(api) for contest_id, apis in contest_api_assignment.items(): - if contest_id not in submissions_by_contest: + if contest_id not in submissions_by_contest and contest_id in ACTIVE_CONTESTS: raise RuntimeError(f"No API compatible with contest type {contest_id.name}") contest_submissions = submissions_by_contest[contest_id] diff --git a/validator/weight_setting/validator.py b/validator/weight_setting/validator.py index d636c0f9..a2443df3 100644 --- a/validator/weight_setting/validator.py +++ b/validator/weight_setting/validator.py @@ -66,14 +66,7 @@ class Validator: netuid=metagraph.netuid, ) - wandb_manager: WandbManager = WandbManager( - config=config, - validator_version=validator_version, - uid=metagraph.netuid, - netuid=metagraph.netuid, - hotkey=keypair.ss58_address, - signature=signature, - ) + wandb_manager: WandbManager weight_setter: WeightSetter benchmarking_apis: list[BenchmarkingApi] @@ -82,6 +75,22 @@ def __init__(self): self.metagraph.sync_nodes() self.uid = list(self.metagraph.nodes.keys()).index(self.keypair.ss58_address) + init_open_telemetry_logging({ + "neuron.uid": self.uid, + "neuron.signature": self.signature, + "subtensor.chain_endpoint": self.substrate.url, + "validator.version": self.validator_version, + }) + + self.wandb_manager = WandbManager( + config=self.config, + validator_version=self.validator_version, + uid=self.uid, + netuid=self.metagraph.netuid, + hotkey=self.keypair.ss58_address, + signature=self.signature, + ) + self.weight_setter: WeightSetter = WeightSetter( version=self.validator_version, epoch_length=self.config["epoch_length"], @@ -92,17 +101,13 @@ def __init__(self): contest_state=lambda: self.contest_state, ) - self.contest_state = self.state_manager.load_state() + contest_state = self.state_manager.load_state() + if contest_state: + self.contest_state = contest_state + self.wandb_manager.init_wandb(self.contest_state) self.benchmarking_apis = [BenchmarkingApi(api=api, keypair=self.keypair) for api in self.config["benchmarker_api"]] - init_open_telemetry_logging({ - "neuron.uid": self.uid, - "neuron.signature": self.signature, - "subtensor.chain_endpoint": self.substrate.url, - "validator.version": self.validator_version, - }) - self.run() @tracer.start_as_current_span("initialize_contest") @@ -168,11 +173,10 @@ def update_benchmarks(self, benchmarking_results: list[BenchmarkingResults]): baseline = benchmarking_results[0].baseline average_benchmarking_time = benchmarking_results[0].average_benchmarking_time - if self.contest_state.baseline != baseline: + if baseline and baseline != self.contest_state.baseline: logger.info(f"Updating baseline to {baseline}") self.contest_state.baseline = baseline - self.contest_state.average_benchmarking_time = average_benchmarking_time for result in benchmarking_results: for key in result.benchmarks.keys() - self.contest_state.benchmarks.keys(): @@ -183,9 +187,12 @@ def update_benchmarks(self, benchmarking_results: list[BenchmarkingResults]): self.contest_state.benchmarks.update(result.benchmarks) self.contest_state.invalid_submissions.update(result.invalid_submissions) - if average_benchmarking_time: - eta = (len(self.contest_state.submissions) - len(self.contest_state.benchmarks)) * average_benchmarking_time - logger.info(f"Average benchmark time: {average_benchmarking_time:.2f}s, ETA: {timedelta(seconds=eta)}") + if average_benchmarking_time and average_benchmarking_time != self.contest_state.average_benchmarking_time: + submissions_left = len(self.contest_state.submissions) - len(self.contest_state.benchmarks) + eta = submissions_left * average_benchmarking_time + logger.info(f"{submissions_left}/{len(self.contest_state.submissions)} benchmarked. Average benchmark time: {average_benchmarking_time:.2f}s, ETA: {timedelta(seconds=eta)}") + + self.contest_state.average_benchmarking_time = average_benchmarking_time def step(self): return self.contest_state.step if self.contest_state else 0