Add system info

womboai · Dec 11, 2024 · 7ef6368 · 7ef6368
1 parent ac33225
commit 7ef6368
Show file tree

Hide file tree

Showing 8 changed files with 71 additions and 5 deletions.
diff --git a/base/base/contest.py b/base/base/contest.py
@@ -1,4 +1,3 @@
-import math
 from dataclasses import dataclass
 from enum import IntEnum
 from functools import partial

diff --git a/base/base/system_info.py b/base/base/system_info.py
@@ -0,0 +1,45 @@
+import psutil
+from pydantic import BaseModel
+
+
+class SystemInfo(BaseModel):
+    cpu: str
+    min_frequency_mhz: float
+    max_frequency_mhz: float
+    current_frequency_mhz: float
+    physical_cores: int
+    total_cores: int
+    ram: int
+    gpu: str
+
+
+def get_system_info() -> SystemInfo:
+    with open("/proc/cpuinfo", "r") as f:
+        for line in f:
+            if "model name" in line:
+                cpu = line.strip().split(":")[1].strip()
+                break
+
+    cpu_frequency = psutil.cpu_freq()
+    min_frequency = cpu_frequency.min
+    max_frequency = cpu_frequency.max
+    current_frequency = cpu_frequency.current
+
+    physical_cores = psutil.cpu_count(logical=False)
+    total_cores = psutil.cpu_count(logical=True)
+
+    ram = psutil.virtual_memory().total
+
+    import torch
+    gpu = torch.cuda.get_device_name(0)
+
+    return SystemInfo(
+        cpu=cpu,
+        min_frequency_mhz=min_frequency,
+        max_frequency_mhz=max_frequency,
+        current_frequency_mhz=current_frequency,
+        physical_cores=physical_cores,
+        total_cores=total_cores,
+        ram=ram,
+        gpu=gpu,
+    )
diff --git a/validator/pyproject.toml b/validator/pyproject.toml
@@ -6,7 +6,7 @@ build-backend = "setuptools.build_meta"
 name = "edge-maxxing-validator"
 description = "The validator which checks models and checkpoints provided by miners"
 requires-python = ">=3.10,<3.13"
-version = "5.6.7"
+version = "5.6.8"
 dependencies = [
     "edge-maxxing-base==1.0.0",
     "opentelemetry-api>=1.28.2",

diff --git a/validator/submission_tester/api.py b/validator/submission_tester/api.py
@@ -16,6 +16,7 @@
 from base_validator.api_data import BenchmarkingStartRequest, BenchmarkingResults, ApiMetadata, BenchmarkingInitializeRequest
 from base_validator.auto_updater import AutoUpdater
 from base_validator.telemetry import init_open_telemetry_logging
+from base.system_info import get_system_info, SystemInfo
 from testing.benchmarker import Benchmarker
 
 hotkey = os.getenv("VALIDATOR_HOTKEY_SS58_ADDRESS")
@@ -128,6 +129,9 @@ def metadata(request: Request) -> ApiMetadata:
         compatible_contests=request.state.compatible_contests,
     )
 
+@app.get("/hardware")
+def hardware() -> SystemInfo:
+    return get_system_info()
 
 @app.post("/initialize")
 def initialize(

diff --git a/validator/weight_setting/benchmarking_api.py b/validator/weight_setting/benchmarking_api.py
@@ -12,6 +12,7 @@
 from base.contest import ContestId, RepositoryInfo
 from base.inputs_api import get_inputs_state
 from base_validator.api_data import BenchmarkingStartRequest, ApiMetadata, BenchmarkingResults, BenchmarkingInitializeRequest
+from base.system_info import SystemInfo
 
 logger = get_logger(__name__)
 
@@ -60,6 +61,11 @@ def metadata(self) -> ApiMetadata:
         response.raise_for_status()
         return ApiMetadata.model_validate(response.json())
 
+    def hardware(self) -> SystemInfo:
+        response = requests.get(f"{self._api}/hardware")
+        response.raise_for_status()
+        return SystemInfo.model_validate(response.json())
+
     def results(self) -> BenchmarkingResults:
         response = requests.get(f"{self._api}/state")
         response.raise_for_status()

diff --git a/validator/weight_setting/validator.py b/validator/weight_setting/validator.py
@@ -17,6 +17,7 @@
 from base.contest import BenchmarkState
 from base.inputs_api import get_inputs_state
 from base.submissions import get_submissions
+from base.system_info import SystemInfo
 from base_validator.api_data import BenchmarkingResults
 from base_validator.auto_updater import AutoUpdater
 from base_validator.telemetry import init_open_telemetry_logging
@@ -70,6 +71,7 @@ class Validator:
 
     weight_setter: WeightSetter
     benchmarking_apis: list[BenchmarkingApi]
+    api_hardware: list[SystemInfo] = []
 
     def __init__(self):
         self.metagraph.sync_nodes()
@@ -107,6 +109,7 @@ def __init__(self):
             keypair=self.keypair,
             uid=self.uid,
             contest_state=lambda: self.contest_state,
+            api_hardware=self.api_hardware,
             wandb_manager=self.wandb_manager,
         )
 
@@ -115,13 +118,15 @@ def __init__(self):
         self.run()
 
     def initialize_apis(self, untested_submissions: Submissions):
+        self.api_hardware.clear()
         for api in self.benchmarking_apis:
             api.initialize(
                 uid=self.uid,
                 signature=self.signature,
                 netuid=self.metagraph.netuid,
                 substrate_url=self.substrate.url,
             )
+            self.api_hardware.append(api.hardware())
         send_submissions_to_api(
             version=self.validator_version,
             all_apis=self.benchmarking_apis,
@@ -167,7 +172,7 @@ def do_step(self):
         if not untested_submissions:
             self.contest_state.benchmarking_state = BenchmarkState.FINISHED
             self.state_manager.save_state(self.contest_state)
-            self.wandb_manager.send_metrics(self.contest_state)
+            self.wandb_manager.send_metrics(self.contest_state, self.api_hardware)
             self.contest_state.sleep_to_next_contest(self._stop_flag)
             return
 
@@ -228,7 +233,7 @@ def run(self):
                 if self.contest_state:
                     self.contest_state.step += 1
                     self.state_manager.save_state(self.contest_state)
-                    self.wandb_manager.send_metrics(self.contest_state)
+                    self.wandb_manager.send_metrics(self.contest_state, self.api_hardware)
             except (ConnectionError, HTTPError) as e:
                 logger.error(f"Error connecting to API, retrying in 10 blocks: {e}")
                 self._stop_flag.wait(BENCHMARK_UPDATE_RATE_BLOCKS * 12)

diff --git a/validator/weight_setting/wandb_manager.py b/validator/weight_setting/wandb_manager.py
@@ -5,6 +5,7 @@
 from wandb.apis.public import Run
 
 from base.checkpoint import Uid, Key
+from base.system_info import SystemInfo
 from .contest_state import ContestState
 
 
@@ -69,6 +70,7 @@ def init_wandb(self, contest_state: ContestState):
     def send_metrics(
         self,
         contest_state: ContestState,
+        api_hardware: list[SystemInfo],
         scores: dict[Key, float] | None = None,
         ranks: dict[Key, int] | None = None
     ):
@@ -77,6 +79,7 @@ def send_metrics(
 
         data = {
             "scores": scores or contest_state.get_scores(contest_state.benchmarks),
+            "api_hardware": [api.model_dump() for api in api_hardware],
             "ranks": ranks or contest_state.get_ranks(scores),
             "num_gpus": len(self.config["benchmarker_api"]),
         } | contest_state.model_dump()

diff --git a/validator/weight_setting/weight_setter.py b/validator/weight_setting/weight_setter.py
@@ -9,6 +9,7 @@
 from substrateinterface import SubstrateInterface, Keypair
 
 from base.inputs_api import get_blacklist, get_inputs_state
+from base.system_info import SystemInfo
 from weight_setting.contest_state import ContestState
 from weight_setting.wandb_manager import WandbManager
 
@@ -26,6 +27,7 @@ class WeightSetter:
     _keypair: Keypair
     _uid: int
     _contest_state: Callable[[], ContestState]
+    _api_hardware: list[SystemInfo]
     _wandb_manager: WandbManager
     _weights_version: int
 
@@ -38,6 +40,7 @@ def __init__(
         keypair: Keypair,
         uid: int,
         contest_state: Callable[[], ContestState],
+        api_hardware: list[SystemInfo],
         wandb_manager: WandbManager,
     ):
         self._epoch_length = epoch_length
@@ -46,6 +49,7 @@ def __init__(
         self._keypair = keypair
         self._uid = uid
         self._contest_state = contest_state
+        self._api_hardware = api_hardware
         self._wandb_manager = wandb_manager
 
         parts: list[str] = version.split(".")
@@ -114,7 +118,7 @@ def set_weights(self) -> bool:
 
         weights_by_key = contest_state.calculate_weights(ranks=ranks)
 
-        self._wandb_manager.send_metrics(contest_state, scores, ranks)
+        self._wandb_manager.send_metrics(contest_state, self._api_hardware, scores, ranks)
         return self._set_weights([
             weights_by_key.get(key, 0)
             for key in self._metagraph.nodes.keys()