chore: a lot of stuff

thuasta · Feb 7, 2025 · 6ca1c44 · 6ca1c44
1 parent d0d6b3d
commit 6ca1c44
Show file tree

Hide file tree

Showing 11 changed files with 205 additions and 22 deletions.
diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml
@@ -0,0 +1,52 @@
+on:
+  release:
+    types: 
+      - published
+
+env:
+  REGISTRY: ghcr.io
+  NAMESPACE: thuasta
+  IMAGE_NAME: saiblo-worker
+
+jobs:
+  publish-docker-image:
+    permissions:
+      packages: write
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+
+      - id: get-version
+        uses: frabert/replace-string-action@v2
+        with:
+          pattern: '^v'
+          string: ${{ github.event.release.tag_name }}
+          replace-with: ''
+
+      - uses: docker/login-action@v3
+        with:
+          registry: ${{ env.REGISTRY }}
+          username: ${{ github.actor }}
+          password: ${{ secrets.GITHUB_TOKEN }}
+
+      - uses: docker/build-push-action@v5
+        with:
+          push: true
+          tags: |
+            ${{ env.REGISTRY }}/${{ env.NAMESPACE }}/${{ env.IMAGE_NAME }}:${{ steps.get-version.outputs.replaced }}
+            ${{ env.REGISTRY }}/${{ env.NAMESPACE }}/${{ env.IMAGE_NAME }}:latest
+
+  release:
+    permissions:
+      contents: write
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+
+      - id: extract-release-notes
+        uses: ffurrer2/extract-release-notes@v2
+
+      - uses: softprops/action-gh-release@v2
+        with:
+          body: |
+            ${{ steps.extract-release-notes.outputs.release_notes }}
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -0,0 +1,12 @@
+# Changelog
+
+All notable changes to this project will be documented in this file.
+
+The format is based on [Keep a Changelog](https://keepachangelog.com/)
+and this project adheres to [Semantic Versioning](https://semver.org/).
+
+## [0.1.0] - 2025-02-07
+
+First version
+
+[0.1.0]: https://github.com/thuasta/saiblo-worker/releases/tag/v0.1.0
diff --git a/Dockerfile b/Dockerfile
@@ -3,4 +3,6 @@ WORKDIR /app
 COPY requirements.txt .
 RUN pip install --disable-pip-version-check --no-cache-dir -r requirements.txt
 COPY . .
-ENTRYPOINT ["python", "main.py"]
+COPY entrypoint.sh /usr/local/bin/
+RUN chmod +x entrypoint.sh
+ENTRYPOINT ["entrypoint.sh"]
diff --git a/README.md b/README.md
@@ -2,10 +2,90 @@
 
 A worker for Saiblo
 
-## Install
-
 ## Usage
 
+### Run with Docker
+
+We provide a pre-built Docker image for the worker. You can run it with the following command:
+
+```sh
+docker run -d --env-file .env ghcr.io/thuasta/saiblo-worker
+```
+
+Or set environment variables directly:
+
+```sh
+docker run -dit -e GAME_HOST_IMAGE=<your-game-host-image> -e NAME=<worker-name> --rm --privileged ghcr.io/thuasta/saiblo-worker
+```
+
+The worker will automatically connect to the Saiblo server and start processing matches.
+
+If you want to build the Docker image yourself, follow these steps:
+
+1. Build the Docker image:
+
+    ```sh
+    docker build -t saiblo-worker .
+    ```
+
+2. Run the Docker container:
+
+    ```sh
+    docker run -d --env-file .env saiblo-worker
+    ```
+
+   You can also set environment variables directly:
+
+    ```sh
+    docker run -dit -e GAME_HOST_IMAGE=<your-game-host-image> -e NAME=<worker-name> --rm --privileged saiblo-worker
+    ```
+
+### Run Manually
+
+1. Set up environment variables in a `.env` file:
+
+    ```sh
+    GAME_HOST_IMAGE=<your-game-host-image>  # Required: Docker image for the game host
+    NAME=<worker-name>                      # Required: Unique name for this worker to set on Saiblo
+
+    HTTP_BASE_URL=<url>                     # Optional: API base URL (default: https://api.dev.saiblo.net)
+    WEBSOCKET_URL=<url>                     # Optional: WebSocket URL (default: wss://api.dev.saiblo.net/ws/)
+    ```
+
+2. Install dependencies:
+   ```bash
+   pip install -r requirements.txt
+   ```
+
+3. Run the worker:
+   ```bash
+   python main.py
+   ```
+
+The worker will:
+- Connect to the Saiblo server
+- Build Docker images for submitted code
+- Run matches using the game host image
+- Report results back to the server
+
+## Environment Variables
+
+The worker will pass the following environment variables to the game host Docker container:
+
+- `TOKENS`: A comma-separated list of tokens for the players
+
+And the following environment variables to the agent Docker container:
+
+- `TOKEN`: The token for the player
+- `GAME_HOST`: The address of the game host
+
+## Match Result Retrieval
+
+The worker will automatically retrieve match results from the Saiblo server after each match. For the game host Docker image, make sure after each match these files are created:
+
+- `/app/data/result.json`: The match result in JSON format
+- `/app/data/replay.dat`: The match replay in binary format
+
 ## Contributing
 
 PRs are welcome!

diff --git a/build_task.py b/build_task.py
@@ -1,5 +1,6 @@
 """Contains the task for building agents."""
 
+import logging
 from typing import Optional
 
 from base_agent_code_fetcher import BaseAgentCodeFetcher
@@ -32,10 +33,13 @@ def __init__(
         self._reporter = reporter
 
     async def execute(self) -> BuildResult:
+        logging.info("Fetching agent code %s", self._code_id)
         agent_code_tarball_path = await self._fetcher.fetch(self._code_id)
 
+        logging.info("Building agent code %s", self._code_id)
         build_result = await self._builder.build(self._code_id, agent_code_tarball_path)
 
+        logging.info("Reporting build result for agent code %s", self._code_id)
         await self._reporter.report(build_result)
 
         self._result = build_result

diff --git a/entrypoint.sh b/entrypoint.sh
@@ -0,0 +1,11 @@
+#!/bin/sh
+
+dockerd-entrypoint.sh > /dev/null 2>&1 &
+
+# Wait for the Docker daemon to start up
+while ! docker info > /dev/null 2>&1; do
+    echo "Waiting for the Docker daemon to start up..."
+    sleep 3
+done
+
+python main.py $@
diff --git a/judge_task.py b/judge_task.py
@@ -1,6 +1,6 @@
 """Contains the task for judging matches."""
 
-import asyncio
+import logging
 from dataclasses import dataclass
 from typing import List, Optional
 
@@ -56,24 +56,24 @@ def match_id(self) -> str:
         return self._match_id
 
     async def execute(self) -> MatchResult:
-        agent_build_results = await asyncio.gather(
-            *[
-                BuildTask(
-                    code_id,
-                    self._fetcher,
-                    self._builder,
-                    self._build_result_reporter,
-                ).execute()
-                for code_id in self._agent_code_ids
-            ]
-        )
-
+        agent_build_results = [
+            await BuildTask(
+                code_id,
+                self._fetcher,
+                self._builder,
+                self._build_result_reporter,
+            ).execute()
+            for code_id in self._agent_code_ids
+        ]
+
+        logging.info("Judging match %s", self._match_id)
         match_result = await self._judger.judge(
             self._match_id,
             self._game_host_image_tag,
             [x.image for x in agent_build_results],
         )
 
+        logging.info("Reporting match result for match %s", self._match_id)
         await self._match_result_reporter.report(match_result)
 
         self._result = match_result

diff --git a/main.py b/main.py
@@ -1,6 +1,7 @@
 """Main module."""
 
 import asyncio
+import logging
 import os
 
 import aiohttp
@@ -18,6 +19,7 @@
 from task_scheduler import TaskScheduler
 
 DEFAULT_HTTP_BASE_URL = "https://api.dev.saiblo.net"
+DEFAULT_LOGGING_LEVEL = "INFO"
 DEFAULT_WEBSOCKET_URL = "wss://api.dev.saiblo.net/ws/"
 
 
@@ -37,9 +39,13 @@ async def main():
 
     http_base_url = yarl.URL(os.getenv("HTTP_BASE_URL", DEFAULT_HTTP_BASE_URL))
 
+    logging_level = os.getenv("LOGGING_LEVEL", DEFAULT_LOGGING_LEVEL)
+
     websocket_url = os.getenv("WEBSOCKET_URL", DEFAULT_WEBSOCKET_URL)
 
     # Set up everything.
+    logging.getLogger().setLevel(logging_level)
+
     task_scheduler = TaskScheduler()
 
     session = aiohttp.ClientSession(http_base_url)

diff --git a/match_judger.py b/match_judger.py
@@ -20,14 +20,17 @@
 from match_result import MatchResult
 
 _AGENT_CONTAINER_NAME_PREFIX = "saiblo-worker-agent"
+_AGENT_MEM_LIMIT = "1g"
+_AGENT_NANO_CPUS = 500000000  # 0.5 CPU
 _GAME_HOST_APP_DATA_DIR_PATH = "/app/data/"
 _GAME_HOST_CONTAINER_NAME_PREFIX = "saiblo-worker-game-host"
+_GAME_HOST_MEM_LIMIT = "1.8g"
+_GAME_HOST_NANO_CPUS = 900000000  # 0.9 CPU
 _GAME_HOST_REPLAY_FILE_NAME = "data/replay.dat"
 _GAME_HOST_RESULT_FILE_NAME = "data/result.json"
+_JUDGE_TIMEOUT = 600  # In seconds
 _NETWORK_NAME_PREFIX = "saiblo-worker-network"
 
-JUDGE_TIMEOUT = 600  # In seconds
-
 
 @dataclass
 class _AgentInfo:
@@ -141,7 +144,9 @@ async def judge(
                         ]
                     )
                 },
+                mem_limit=_GAME_HOST_MEM_LIMIT,
                 name=game_host_container_name,
+                nano_cpus=_GAME_HOST_NANO_CPUS,
             )
 
             # Run agent containers.
@@ -163,7 +168,9 @@ async def judge(
                         "TOKEN": agent_info.token,
                         "GAME_HOST": f"ws://{game_host_container_name}:14514",
                     },
+                    mem_limit=_AGENT_MEM_LIMIT,
                     name=agent_info.container_name,
+                    network=agent_info.network_name,
                 )
                 agent_containers.append(agent_container)
 
@@ -174,7 +181,7 @@ async def judge(
                 agent_networks.append(agent_network)
 
             # Wait until the game host finishes or timeout.
-            await asyncio.to_thread(game_host_container.wait, timeout=JUDGE_TIMEOUT)
+            await asyncio.to_thread(game_host_container.wait, timeout=_JUDGE_TIMEOUT)
 
             # Stop the game host and agent containers.
             # For game host, we give it some time after SIGTERM to write the result file.
@@ -260,7 +267,7 @@ async def judge(
             return match_result
 
         except Exception as e:  # pylint: disable=broad-except
-            return MatchResult(
+            match_result = MatchResult(
                 match_id=match_id,
                 agent_results=[
                     MatchResult.AgentResult(
@@ -280,6 +287,8 @@ async def judge(
                 ),
             )
 
+            return match_result
+
         finally:
             # Clean networks.
             for network in self._client.networks.list():

diff --git a/saiblo_client.py b/saiblo_client.py
@@ -2,6 +2,7 @@
 
 import asyncio
 import json
+import logging
 
 import websockets.asyncio.client
 from websockets import ClientConnection, ConnectionClosed
@@ -42,6 +43,8 @@ def __init__(
     async def start(self) -> None:
         async for connection in websockets.asyncio.client.connect(self._websocket_url):
             try:
+                logging.info("Connected to %s", self._websocket_url)
+
                 await connection.send(
                     json.dumps(
                         {
@@ -61,6 +64,7 @@ async def start(self) -> None:
                 )
 
             except ConnectionClosed:
+                logging.error("Connection closed. Reconnecting...")
                 continue
 
     async def _keep_finish_judge_task(self, connection: ClientConnection) -> None:
@@ -90,10 +94,11 @@ async def _keep_receive_message(self, connection: ClientConnection) -> None:
                     await self._task_scheduler.schedule(task)
 
                 case "judge_task":
-                    self._request_judge_task_condition.notify()
+                    async with self._request_judge_task_condition:
+                        self._request_judge_task_condition.notify()
 
                     task = self._judge_task_factory.create(
-                        message["data"]["match_id"],
+                        str(message["data"]["match_id"]),
                         [x["code_id"] for x in message["data"]["players"]],
                     )
 

diff --git a/task_scheduler.py b/task_scheduler.py
@@ -40,6 +40,8 @@ async def start(self) -> None:
         while True:
             task = await self._pending_tasks.get()
 
+            logging.info("Executing task %s", type(task).__name__)
+
             try:
                 await task.execute()
             except Exception as e:  # pylint: disable=broad-except