From ee09a3ffa6b56d9530d23a9371772915ab98e978 Mon Sep 17 00:00:00 2001 From: Ashley Wright Date: Tue, 22 Oct 2024 05:41:53 +0300 Subject: [PATCH] Handle inference errors better (#68) --- miner/miner/submit.py | 5 +- neuron/neuron/__init__.py | 1 - neuron/neuron/submission_tester/__init__.py | 2 +- neuron/neuron/submission_tester/metrics.py | 2 +- neuron/neuron/submission_tester/testing.py | 90 ++++++++++--------- .../neuron/submission_tester/vram_monitor.py | 2 +- validator/base_validator/__init__.py | 2 +- validator/submission_tester/benchmarker.py | 2 +- validator/weight_setting/deduplication.py | 2 +- validator/weight_setting/validator.py | 2 +- 10 files changed, 57 insertions(+), 53 deletions(-) diff --git a/miner/miner/submit.py b/miner/miner/submit.py index 639cc435..e003bb61 100644 --- a/miner/miner/submit.py +++ b/miner/miner/submit.py @@ -22,15 +22,16 @@ make_submission, random_inputs, ModelRepositoryInfo, - BaselineBenchmark, TextToImageRequest, - MetricData, GenerationOutput, BENCHMARKS_VERSION, ) + from neuron.submission_tester import ( generate_baseline, compare_checkpoints, + BaselineBenchmark, + MetricData, ) VALID_PROVIDER_REGEX = r'^[a-zA-Z0-9-.]+$' diff --git a/neuron/neuron/__init__.py b/neuron/neuron/__init__.py index e0dc60dd..a043c549 100644 --- a/neuron/neuron/__init__.py +++ b/neuron/neuron/__init__.py @@ -3,4 +3,3 @@ from .config import * from .contest import * from .random_inputs import * -from .submission_tester import * diff --git a/neuron/neuron/submission_tester/__init__.py b/neuron/neuron/submission_tester/__init__.py index 3a1d68e9..4bd6f8f6 100644 --- a/neuron/neuron/submission_tester/__init__.py +++ b/neuron/neuron/submission_tester/__init__.py @@ -3,4 +3,4 @@ from .testing import * from .metrics import * from .vram_monitor import * -from .hash import * \ No newline at end of file +from .hash import * diff --git a/neuron/neuron/submission_tester/metrics.py b/neuron/neuron/submission_tester/metrics.py index aadb3546..91b39e5d 100644 --- a/neuron/neuron/submission_tester/metrics.py +++ b/neuron/neuron/submission_tester/metrics.py @@ -2,8 +2,8 @@ from pydantic import BaseModel -from neuron import GenerationOutput from pipelines import TextToImageRequest +from .. import GenerationOutput SIMILARITY_SCORE_THRESHOLD = 0.8 diff --git a/neuron/neuron/submission_tester/testing.py b/neuron/neuron/submission_tester/testing.py index e80a21ec..e5322d9b 100644 --- a/neuron/neuron/submission_tester/testing.py +++ b/neuron/neuron/submission_tester/testing.py @@ -1,27 +1,28 @@ -import asyncio import logging +from collections.abc import Iterable from concurrent.futures import ThreadPoolExecutor, CancelledError +from io import BytesIO from pathlib import Path from statistics import mean -from collections.abc import Iterable -from io import BytesIO from threading import Event from time import perf_counter -from .hash import load_image_hash, save_image_hash, GENERATION_TIME_DIFFERENCE_THRESHOLD -from .metrics import CheckpointBenchmark, MetricData, BaselineBenchmark import imagehash from PIL import Image -from neuron import ( +from pipelines import TextToImageRequest +from . import InvalidSubmissionError +from .hash import load_image_hash, save_image_hash, GENERATION_TIME_DIFFERENCE_THRESHOLD +from .inference_sandbox import InferenceSandbox +from .metrics import CheckpointBenchmark, MetricData, BaselineBenchmark +from .vram_monitor import VRamMonitor +from .. import ( GenerationOutput, ModelRepositoryInfo, CURRENT_CONTEST, - Key, OutputComparator, + Key, + OutputComparator, ) -from .vram_monitor import VRamMonitor -from pipelines import TextToImageRequest -from .inference_sandbox import InferenceSandbox SANDBOX_DIRECTORY = Path("/sandbox") BASELINE_SANDBOX_DIRECTORY = Path("/baseline-sandbox") @@ -116,49 +117,52 @@ def compare_checkpoints( image_hash = None - f"Take {len(inputs)} samples, keeping track of how fast/accurate generations have been" - for index, request in enumerate(inputs): - logger.info(f"Sample {index + 1}, prompt {request.prompt} and seed {request.seed}") + try: + f"Take {len(inputs)} samples, keeping track of how fast/accurate generations have been" + for index, request in enumerate(inputs): + logger.info(f"Sample {index + 1}, prompt {request.prompt} and seed {request.seed}") - if cancelled_event and cancelled_event.is_set(): - raise CancelledError() + if cancelled_event and cancelled_event.is_set(): + raise CancelledError() - output = generate(sandbox, request) + output = generate(sandbox, request) - if not image_hash: - with BytesIO(output.output) as data: - image_hash = imagehash.average_hash(Image.open(data)) + if not image_hash: + with BytesIO(output.output) as data: + image_hash = imagehash.average_hash(Image.open(data)) - image_hash_bytes = save_image_hash(image_hash) + image_hash_bytes = save_image_hash(image_hash) - match = next( - ( - (key, existing_benchmark) - for key, existing_benchmark in existing_benchmarks - if ( - existing_benchmark and - not (image_hash - load_image_hash(existing_benchmark.image_hash)) and - abs(output.generation_time - existing_benchmark.model.generation_time) < GENERATION_TIME_DIFFERENCE_THRESHOLD - ) - ), - None, - ) + match = next( + ( + (key, existing_benchmark) + for key, existing_benchmark in existing_benchmarks + if ( + existing_benchmark and + not (image_hash - load_image_hash(existing_benchmark.image_hash)) and + abs(output.generation_time - existing_benchmark.model.generation_time) < GENERATION_TIME_DIFFERENCE_THRESHOLD + ) + ), + None, + ) - if match: - key, benchmark = match + if match: + key, benchmark = match - logger.info(f"Submission {submission} marked as duplicate of hotkey {key}'s submission") + logger.info(f"Submission {submission} marked as duplicate of hotkey {key}'s submission") - return benchmark + return benchmark - logger.info( - f"Sample {index + 1} Generated\n" - f"Generation Time: {output.generation_time}s\n" - f"VRAM Usage: {output.vram_used}b\n" - f"Power Usage: {output.watts_used}W" - ) + logger.info( + f"Sample {index + 1} Generated\n" + f"Generation Time: {output.generation_time}s\n" + f"VRAM Usage: {output.vram_used}b\n" + f"Power Usage: {output.watts_used}W" + ) - outputs.append(output) + outputs.append(output) + except Exception as e: + raise InvalidSubmissionError(f"Failed to run inference on {submission}") from e average_time = sum(output.generation_time for output in outputs) / len(outputs) vram_used = max(output.vram_used for output in outputs) diff --git a/neuron/neuron/submission_tester/vram_monitor.py b/neuron/neuron/submission_tester/vram_monitor.py index e25e9b5a..988a454d 100644 --- a/neuron/neuron/submission_tester/vram_monitor.py +++ b/neuron/neuron/submission_tester/vram_monitor.py @@ -1,7 +1,7 @@ import threading import time -from neuron import Contest +from .. import Contest POLL_RATE_SECONDS = 0.1 diff --git a/validator/base_validator/__init__.py b/validator/base_validator/__init__.py index 188c775e..b912a058 100644 --- a/validator/base_validator/__init__.py +++ b/validator/base_validator/__init__.py @@ -1,3 +1,3 @@ from .metrics import * -API_VERSION = "4.2.0" +API_VERSION = "4.2.1" diff --git a/validator/submission_tester/benchmarker.py b/validator/submission_tester/benchmarker.py index 61d3ca63..d7409fe6 100644 --- a/validator/submission_tester/benchmarker.py +++ b/validator/submission_tester/benchmarker.py @@ -12,6 +12,7 @@ MetricData, compare_checkpoints, generate_baseline, + InvalidSubmissionError, ) from neuron import ( @@ -19,7 +20,6 @@ ModelRepositoryInfo, TIMEZONE, random_inputs, - InvalidSubmissionError, ) from pipelines import TextToImageRequest diff --git a/validator/weight_setting/deduplication.py b/validator/weight_setting/deduplication.py index 5486f968..adbacbed 100644 --- a/validator/weight_setting/deduplication.py +++ b/validator/weight_setting/deduplication.py @@ -3,7 +3,7 @@ from imagehash import ImageHash -from neuron import GENERATION_TIME_DIFFERENCE_THRESHOLD +from neuron.submission_tester import GENERATION_TIME_DIFFERENCE_THRESHOLD @dataclass diff --git a/validator/weight_setting/validator.py b/validator/weight_setting/validator.py index ddb7d1d4..0a490ba7 100644 --- a/validator/weight_setting/validator.py +++ b/validator/weight_setting/validator.py @@ -51,7 +51,7 @@ from .wandb_args import add_wandb_args from .winner_selection import get_scores, get_contestant_scores -VALIDATOR_VERSION: tuple[int, int, int] = (4, 2, 0) +VALIDATOR_VERSION: tuple[int, int, int] = (4, 2, 1) VALIDATOR_VERSION_STRING = ".".join(map(str, VALIDATOR_VERSION)) WEIGHTS_VERSION = (