Skip to content

Commit

Permalink
Add system info
Browse files Browse the repository at this point in the history
  • Loading branch information
MsRandom committed Dec 11, 2024
1 parent ac33225 commit 7ef6368
Show file tree
Hide file tree
Showing 8 changed files with 71 additions and 5 deletions.
1 change: 0 additions & 1 deletion base/base/contest.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
import math
from dataclasses import dataclass
from enum import IntEnum
from functools import partial
Expand Down
45 changes: 45 additions & 0 deletions base/base/system_info.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
import psutil
from pydantic import BaseModel


class SystemInfo(BaseModel):
cpu: str
min_frequency_mhz: float
max_frequency_mhz: float
current_frequency_mhz: float
physical_cores: int
total_cores: int
ram: int
gpu: str


def get_system_info() -> SystemInfo:
with open("/proc/cpuinfo", "r") as f:
for line in f:
if "model name" in line:
cpu = line.strip().split(":")[1].strip()
break

cpu_frequency = psutil.cpu_freq()
min_frequency = cpu_frequency.min
max_frequency = cpu_frequency.max
current_frequency = cpu_frequency.current

physical_cores = psutil.cpu_count(logical=False)
total_cores = psutil.cpu_count(logical=True)

ram = psutil.virtual_memory().total

import torch
gpu = torch.cuda.get_device_name(0)

return SystemInfo(
cpu=cpu,
min_frequency_mhz=min_frequency,
max_frequency_mhz=max_frequency,
current_frequency_mhz=current_frequency,
physical_cores=physical_cores,
total_cores=total_cores,
ram=ram,
gpu=gpu,
)
2 changes: 1 addition & 1 deletion validator/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ build-backend = "setuptools.build_meta"
name = "edge-maxxing-validator"
description = "The validator which checks models and checkpoints provided by miners"
requires-python = ">=3.10,<3.13"
version = "5.6.7"
version = "5.6.8"
dependencies = [
"edge-maxxing-base==1.0.0",
"opentelemetry-api>=1.28.2",
Expand Down
4 changes: 4 additions & 0 deletions validator/submission_tester/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
from base_validator.api_data import BenchmarkingStartRequest, BenchmarkingResults, ApiMetadata, BenchmarkingInitializeRequest
from base_validator.auto_updater import AutoUpdater
from base_validator.telemetry import init_open_telemetry_logging
from base.system_info import get_system_info, SystemInfo
from testing.benchmarker import Benchmarker

hotkey = os.getenv("VALIDATOR_HOTKEY_SS58_ADDRESS")
Expand Down Expand Up @@ -128,6 +129,9 @@ def metadata(request: Request) -> ApiMetadata:
compatible_contests=request.state.compatible_contests,
)

@app.get("/hardware")
def hardware() -> SystemInfo:
return get_system_info()

@app.post("/initialize")
def initialize(
Expand Down
6 changes: 6 additions & 0 deletions validator/weight_setting/benchmarking_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
from base.contest import ContestId, RepositoryInfo
from base.inputs_api import get_inputs_state
from base_validator.api_data import BenchmarkingStartRequest, ApiMetadata, BenchmarkingResults, BenchmarkingInitializeRequest
from base.system_info import SystemInfo

logger = get_logger(__name__)

Expand Down Expand Up @@ -60,6 +61,11 @@ def metadata(self) -> ApiMetadata:
response.raise_for_status()
return ApiMetadata.model_validate(response.json())

def hardware(self) -> SystemInfo:
response = requests.get(f"{self._api}/hardware")
response.raise_for_status()
return SystemInfo.model_validate(response.json())

def results(self) -> BenchmarkingResults:
response = requests.get(f"{self._api}/state")
response.raise_for_status()
Expand Down
9 changes: 7 additions & 2 deletions validator/weight_setting/validator.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
from base.contest import BenchmarkState
from base.inputs_api import get_inputs_state
from base.submissions import get_submissions
from base.system_info import SystemInfo
from base_validator.api_data import BenchmarkingResults
from base_validator.auto_updater import AutoUpdater
from base_validator.telemetry import init_open_telemetry_logging
Expand Down Expand Up @@ -70,6 +71,7 @@ class Validator:

weight_setter: WeightSetter
benchmarking_apis: list[BenchmarkingApi]
api_hardware: list[SystemInfo] = []

def __init__(self):
self.metagraph.sync_nodes()
Expand Down Expand Up @@ -107,6 +109,7 @@ def __init__(self):
keypair=self.keypair,
uid=self.uid,
contest_state=lambda: self.contest_state,
api_hardware=self.api_hardware,
wandb_manager=self.wandb_manager,
)

Expand All @@ -115,13 +118,15 @@ def __init__(self):
self.run()

def initialize_apis(self, untested_submissions: Submissions):
self.api_hardware.clear()
for api in self.benchmarking_apis:
api.initialize(
uid=self.uid,
signature=self.signature,
netuid=self.metagraph.netuid,
substrate_url=self.substrate.url,
)
self.api_hardware.append(api.hardware())
send_submissions_to_api(
version=self.validator_version,
all_apis=self.benchmarking_apis,
Expand Down Expand Up @@ -167,7 +172,7 @@ def do_step(self):
if not untested_submissions:
self.contest_state.benchmarking_state = BenchmarkState.FINISHED
self.state_manager.save_state(self.contest_state)
self.wandb_manager.send_metrics(self.contest_state)
self.wandb_manager.send_metrics(self.contest_state, self.api_hardware)
self.contest_state.sleep_to_next_contest(self._stop_flag)
return

Expand Down Expand Up @@ -228,7 +233,7 @@ def run(self):
if self.contest_state:
self.contest_state.step += 1
self.state_manager.save_state(self.contest_state)
self.wandb_manager.send_metrics(self.contest_state)
self.wandb_manager.send_metrics(self.contest_state, self.api_hardware)
except (ConnectionError, HTTPError) as e:
logger.error(f"Error connecting to API, retrying in 10 blocks: {e}")
self._stop_flag.wait(BENCHMARK_UPDATE_RATE_BLOCKS * 12)
Expand Down
3 changes: 3 additions & 0 deletions validator/weight_setting/wandb_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
from wandb.apis.public import Run

from base.checkpoint import Uid, Key
from base.system_info import SystemInfo
from .contest_state import ContestState


Expand Down Expand Up @@ -69,6 +70,7 @@ def init_wandb(self, contest_state: ContestState):
def send_metrics(
self,
contest_state: ContestState,
api_hardware: list[SystemInfo],
scores: dict[Key, float] | None = None,
ranks: dict[Key, int] | None = None
):
Expand All @@ -77,6 +79,7 @@ def send_metrics(

data = {
"scores": scores or contest_state.get_scores(contest_state.benchmarks),
"api_hardware": [api.model_dump() for api in api_hardware],
"ranks": ranks or contest_state.get_ranks(scores),
"num_gpus": len(self.config["benchmarker_api"]),
} | contest_state.model_dump()
Expand Down
6 changes: 5 additions & 1 deletion validator/weight_setting/weight_setter.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
from substrateinterface import SubstrateInterface, Keypair

from base.inputs_api import get_blacklist, get_inputs_state
from base.system_info import SystemInfo
from weight_setting.contest_state import ContestState
from weight_setting.wandb_manager import WandbManager

Expand All @@ -26,6 +27,7 @@ class WeightSetter:
_keypair: Keypair
_uid: int
_contest_state: Callable[[], ContestState]
_api_hardware: list[SystemInfo]
_wandb_manager: WandbManager
_weights_version: int

Expand All @@ -38,6 +40,7 @@ def __init__(
keypair: Keypair,
uid: int,
contest_state: Callable[[], ContestState],
api_hardware: list[SystemInfo],
wandb_manager: WandbManager,
):
self._epoch_length = epoch_length
Expand All @@ -46,6 +49,7 @@ def __init__(
self._keypair = keypair
self._uid = uid
self._contest_state = contest_state
self._api_hardware = api_hardware
self._wandb_manager = wandb_manager

parts: list[str] = version.split(".")
Expand Down Expand Up @@ -114,7 +118,7 @@ def set_weights(self) -> bool:

weights_by_key = contest_state.calculate_weights(ranks=ranks)

self._wandb_manager.send_metrics(contest_state, scores, ranks)
self._wandb_manager.send_metrics(contest_state, self._api_hardware, scores, ranks)
return self._set_weights([
weights_by_key.get(key, 0)
for key in self._metagraph.nodes.keys()
Expand Down

0 comments on commit 7ef6368

Please sign in to comment.