Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Reward top winner a bigger cut, with a higher threshold for what defines a tier #69

Merged
merged 11 commits into from
Oct 22, 2024
2 changes: 1 addition & 1 deletion miner/miner/submit.py
Original file line number Diff line number Diff line change
Expand Up @@ -147,6 +147,7 @@ def start_benchmarking(submission: CheckpointSubmission):
baseline = generate_baseline(
inputs,
BASELINE_MODEL_DIRECTORY,
switch_user=False,
cache=True,
)
save_baseline_cache(baseline)
Expand All @@ -159,7 +160,6 @@ def start_benchmarking(submission: CheckpointSubmission):

compare_checkpoints(
ModelRepositoryInfo(url=submission.get_repo_link(), revision=submission.revision),
[],
inputs,
baseline,
MODEL_DIRECTORY,
Expand Down
1 change: 0 additions & 1 deletion neuron/neuron/submission_tester/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,3 @@
from .testing import *
from .metrics import *
from .vram_monitor import *
from .hash import *
20 changes: 0 additions & 20 deletions neuron/neuron/submission_tester/hash.py

This file was deleted.

1 change: 0 additions & 1 deletion neuron/neuron/submission_tester/metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,6 @@ class CheckpointBenchmark(BaseModel):
model: MetricData
average_similarity: float
min_similarity: float
image_hash: bytes

def calculate_score(self, baseline_metrics: MetricData) -> float:
if self.min_similarity < SIMILARITY_SCORE_THRESHOLD:
Expand Down
39 changes: 1 addition & 38 deletions neuron/neuron/submission_tester/testing.py
Original file line number Diff line number Diff line change
@@ -1,26 +1,19 @@
import logging
from collections.abc import Iterable
from concurrent.futures import ThreadPoolExecutor, CancelledError
from io import BytesIO
from pathlib import Path
from statistics import mean
from threading import Event
from time import perf_counter

import imagehash
from PIL import Image

from pipelines import TextToImageRequest
from . import InvalidSubmissionError
from .hash import load_image_hash, save_image_hash, GENERATION_TIME_DIFFERENCE_THRESHOLD
from .inference_sandbox import InferenceSandbox
from .metrics import CheckpointBenchmark, MetricData, BaselineBenchmark
from .vram_monitor import VRamMonitor
from .. import (
GenerationOutput,
ModelRepositoryInfo,
CURRENT_CONTEST,
Key,
OutputComparator,
)

Expand Down Expand Up @@ -100,7 +93,6 @@ def generate_baseline(

def compare_checkpoints(
submission: ModelRepositoryInfo,
existing_benchmarks: Iterable[tuple[Key, CheckpointBenchmark | None]],
inputs: list[TextToImageRequest],
baseline: BaselineBenchmark,
sandbox_directory: Path = SANDBOX_DIRECTORY,
Expand All @@ -115,8 +107,6 @@ def compare_checkpoints(
with InferenceSandbox(submission, False, sandbox_directory, switch_user, cache) as sandbox:
size = sandbox.model_size

image_hash = None

try:
f"Take {len(inputs)} samples, keeping track of how fast/accurate generations have been"
for index, request in enumerate(inputs):
Expand All @@ -127,32 +117,6 @@ def compare_checkpoints(

output = generate(sandbox, request)

if not image_hash:
with BytesIO(output.output) as data:
image_hash = imagehash.average_hash(Image.open(data))

image_hash_bytes = save_image_hash(image_hash)

match = next(
(
(key, existing_benchmark)
for key, existing_benchmark in existing_benchmarks
if (
existing_benchmark and
not (image_hash - load_image_hash(existing_benchmark.image_hash)) and
abs(output.generation_time - existing_benchmark.model.generation_time) < GENERATION_TIME_DIFFERENCE_THRESHOLD
)
),
None,
)

if match:
key, benchmark = match

logger.info(f"Submission {submission} marked as duplicate of hotkey {key}'s submission")

return benchmark

logger.info(
f"Sample {index + 1} Generated\n"
f"Generation Time: {output.generation_time}s\n"
Expand All @@ -171,7 +135,7 @@ def compare_checkpoints(
with CURRENT_CONTEST.output_comparator() as output_comparator:
def calculate_similarity(comparator: OutputComparator, baseline_output: GenerationOutput, optimized_output: GenerationOutput):
try:
if cancelled_event.is_set():
if cancelled_event and cancelled_event.is_set():
raise CancelledError()

return comparator(
Expand Down Expand Up @@ -205,7 +169,6 @@ def calculate_similarity(comparator: OutputComparator, baseline_output: Generati
),
average_similarity=average_similarity,
min_similarity=min_similarity,
image_hash=image_hash_bytes,
)

logger.info(
Expand Down
1 change: 0 additions & 1 deletion validator/submission_tester/benchmarker.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,6 @@ def _benchmark_key(self, hotkey: Key):
try:
self.benchmarks[hotkey] = compare_checkpoints(
submission,
self.benchmarks.items(),
self.inputs,
self.baseline,
cancelled_event=self.cancelled_event,
Expand Down
47 changes: 0 additions & 47 deletions validator/weight_setting/deduplication.py

This file was deleted.

26 changes: 6 additions & 20 deletions validator/weight_setting/validator.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,16 +42,14 @@
BENCHMARKS_VERSION,
)
from neuron.submission_tester import (
load_image_hash,
CheckpointBenchmark,
MetricData,
)
from .benchmarking_api import BenchmarkingApi, benchmarking_api
from .deduplication import find_duplicates, PotentiallyDuplicateSubmissionInfo
from .wandb_args import add_wandb_args
from .winner_selection import get_scores, get_contestant_scores
from .winner_selection import get_scores, get_contestant_scores, get_tiers

VALIDATOR_VERSION: tuple[int, int, int] = (4, 2, 1)
VALIDATOR_VERSION: tuple[int, int, int] = (4, 3, 0)
VALIDATOR_VERSION_STRING = ".".join(map(str, VALIDATOR_VERSION))

WEIGHTS_VERSION = (
Expand Down Expand Up @@ -123,8 +121,6 @@ class Validator:
benchmarking_state: BenchmarkState
failed: set[int] = set() # for backwards depickling compatibility
invalid: dict[int, str]
hash_prompt: str
hash_seed: int
contest: Contest

def __init__(self):
Expand Down Expand Up @@ -550,7 +546,10 @@ def set_weights(self):

logger.info("Setting weights")

weights = get_scores(get_contestant_scores(self.benchmarks, self.baseline_metrics), len(self.metagraph.nodes))
contestants = get_contestant_scores(self.benchmarks, self.baseline_metrics)
tiers = get_tiers(contestants)
blocks = [info.block if info else None for info in self.contest_state.miner_info]
weights = get_scores(tiers, blocks, len(self.metagraph.nodes))

self.send_wandb_metrics()

Expand Down Expand Up @@ -848,19 +847,6 @@ async def do_step(self, block: int):
)
logger.info(self.benchmarks)

benchmark_duplicate_info = [
PotentiallyDuplicateSubmissionInfo(
image_hash=load_image_hash(benchmark.image_hash),
generation_time=benchmark.model.generation_time,
block=self.contest_state.miner_info[uid].block,
) if benchmark else None
for uid, benchmark in enumerate(self.benchmarks)
]

for duplicate_uid, original_uid in find_duplicates(benchmark_duplicate_info):
self.benchmarks[duplicate_uid] = None
self.invalid[duplicate_uid] = f"Duplicate of UID {original_uid}'s submission"

self.benchmarking = False
self.step += 1

Expand Down
47 changes: 41 additions & 6 deletions validator/weight_setting/winner_selection.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
from operator import itemgetter

from neuron import Uid
from neuron.submission_tester import CheckpointBenchmark, MetricData

TIER_SCORE_IMPROVEMENT_THRESHOLD = 1.05
WINNER_PERCENTAGE = 0.5


def get_contestant_scores(benchmarks: list[CheckpointBenchmark | None], baseline_metrics: MetricData):
Expand All @@ -17,23 +19,56 @@ def get_contestant_scores(benchmarks: list[CheckpointBenchmark | None], baseline
return sorted_contestants


def get_scores(contestants: list[tuple[int, float]], node_count: int) -> list[float]:
def get_tiers(contestants: list[tuple[Uid, float]]) -> list[list[Uid]]:
if not contestants:
return []

_, last_tier_score = contestants[0]

scores = [0.0] * node_count
tier = 1
tiers = [[]]

for contestant in contestants:
uid, score = contestant

if score > last_tier_score * TIER_SCORE_IMPROVEMENT_THRESHOLD:
# No longer in top threshold
# New tier
last_tier_score = score
tier += 1
tiers.append([])

tiers[-1].append(uid)

return tiers


def get_scores(tiers: list[list[Uid]], blocks: list[int | None], node_count: int) -> list[float]:
if not tiers:
return [1.0] * node_count

ordered_tiers = [
sorted(tier, key=blocks.__getitem__) for tier in tiers
]

modified_tiers = []

last_tier = None

for tier in ordered_tiers:
if last_tier:
modified_tiers.append([tier[0], *last_tier[1:]])
else:
modified_tiers.append([tier[0]])

last_tier = tier

modified_tiers.append(last_tier[1:])

scores = [0.0] * node_count

for index, tier in enumerate(modified_tiers):
incentive_pool = WINNER_PERCENTAGE * ((1 - WINNER_PERCENTAGE) ** index)
score = incentive_pool / len(tier)

scores[uid] = (score + 1) ** (tier * 0.75)
for uid in tier:
scores[uid] = score

return scores