Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
83 changes: 82 additions & 1 deletion Taskfile.yml
Original file line number Diff line number Diff line change
Expand Up @@ -184,4 +184,85 @@ tasks:
dir: backend
desc: Run backend application
cmds:
- go run main.go
- go run main.go

#
# kubernetes tasks
#
k8s:install-envsubst:
desc: Install envsubst for Windows (required for k8s manifest templating)
platforms: [windows]
cmds:
- go install github.com/a8m/envsubst/cmd/envsubst@latest
status:
- which envsubst

k8s:install-kind:
desc: Install kind (Kubernetes in Docker) for local cluster development
cmds:
- go install sigs.k8s.io/kind@latest
status:
- which kind

k8s:cluster-create:
desc: Create an empty kind cluster (use k8s:cluster-setup for full setup)
deps:
- k8s:install-kind
cmds:
- kind create cluster --name towerfall
- kubectl config use-context kind-towerfall
- echo "Cluster created! Run 'task k8s:build-load' then 'task k8s:deploy'"
status:
- kind get clusters | grep -q "^towerfall$"

k8s:cluster-destroy:
desc: Delete the kind cluster and all resources
cmds:
- kind delete cluster --name towerfall
status:
- '! kind get clusters | grep -q "^towerfall$"'

k8s:deploy:
dir: k8s
desc: Generate manifests and deploy to current cluster
preconditions:
- sh: test -f .env
msg: "k8s/.env file not found. Copy .env.example to .env and configure it."
cmds:
- cmd: bash install.sh --apply
platforms: [linux, darwin]
- cmd: powershell -ExecutionPolicy Bypass -File install.ps1 -Apply
platforms: [windows]

k8s:build-load:
desc: Rebuild all Docker images and load into kind cluster
cmds:
- docker build -t towerfall/backend:latest ./backend
- docker build -t towerfall/frontend:latest ./frontend
- docker build -t towerfall/bot2:latest ./bot2
- kind load docker-image towerfall/backend:latest --name towerfall
- kind load docker-image towerfall/frontend:latest --name towerfall
- kind load docker-image towerfall/bot2:latest --name towerfall
- cmd: kubectl rollout restart -n towerfall deployment/backend deployment/frontend deployment/bot2 2>/dev/null || echo "Note - Deployments not yet created, skipping restart"
ignore_error: true

k8s:cluster-setup:
desc: Create a kind cluster, build images, and deploy all services
cmds:
- task: k8s:cluster-create
- task: k8s:build-load
- task: k8s:deploy
- echo "Cluster ready! Use 'kubectl get pods -n towerfall' to check status"

k8s:logs:
desc: View logs from all towerfall pods
cmds:
- kubectl logs -n towerfall -l app=backend --tail=50
- kubectl logs -n towerfall -l app=frontend --tail=50
- kubectl logs -n towerfall -l app=bot2 --tail=50
- kubectl logs -n towerfall -l app=cloudflared --tail=50

k8s:status:
desc: Show status of all towerfall resources
cmds:
- kubectl get pods,svc,deployments,configmaps -n towerfall
21 changes: 21 additions & 0 deletions bot2/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
FROM nvidia/cuda:12.1-runtime-ubuntu22.04

RUN apt-get update && apt-get install -y \
python3.11 python3.11-venv python3-pip curl \
&& rm -rf /var/lib/apt/lists/*

RUN curl -LsSf https://astral.sh/uv/install.sh | sh
ENV PATH="/root/.local/bin:$PATH"

WORKDIR /app
COPY pyproject.toml uv.lock ./
RUN uv sync --frozen

COPY src/ ./src/

# bot.service provides a FastAPI wrapper around ML bots for Kubernetes orchestration
# It exposes /health on port 8080 for liveness/readiness probes
ENV GAME_SERVER_HTTP_URL="http://backend-service:4000"
ENV GAME_SERVER_WS_URL="ws://backend-service:4000/ws"

CMD ["uv", "run", "python", "-m", "bot.service"]
24 changes: 17 additions & 7 deletions bot2/src/bot/client/game_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
import asyncio
import json
import logging
import os
from enum import Enum
from typing import Awaitable, Callable

Expand All @@ -38,6 +39,13 @@ class ClientMode(Enum):
REST = "rest" # Training mode with REST-based actions


# Default URLs from environment variables.
# Note: These are evaluated at module import time. Set environment variables
# before importing this module, or pass URLs explicitly to GameClient.__init__.
DEFAULT_HTTP_URL = os.getenv("GAME_SERVER_HTTP_URL", "http://localhost:4000")
DEFAULT_WS_URL = os.getenv("GAME_SERVER_WS_URL", "ws://localhost:4000/ws")


class GameClientError(GameHTTPClientError):
"""Base exception for GameClient errors."""

Expand Down Expand Up @@ -76,27 +84,29 @@ class GameClient:

def __init__(
self,
http_url: str = "http://localhost:4000",
ws_url: str = "ws://localhost:4000/ws",
http_url: str | None = None,
ws_url: str | None = None,
mode: ClientMode = ClientMode.WEBSOCKET,
timeout: float = 30.0,
):
"""Initialize the GameClient.

Args:
http_url: Base URL for REST API.
ws_url: WebSocket endpoint URL.
http_url: Base URL for REST API. If not provided, uses
GAME_SERVER_HTTP_URL env var or defaults to http://localhost:4000.
ws_url: WebSocket endpoint URL. If not provided, uses
GAME_SERVER_WS_URL env var or defaults to ws://localhost:4000/ws.
mode: Operating mode (WEBSOCKET or REST).
timeout: Request timeout in seconds.
"""
self.http_url = http_url
self.ws_url = ws_url
self.http_url = http_url if http_url is not None else DEFAULT_HTTP_URL
self.ws_url = ws_url if ws_url is not None else DEFAULT_WS_URL
self.mode = mode
self.timeout = timeout

# HTTP client for REST operations
self._http_client = GameHTTPClient(
base_url=http_url,
base_url=self.http_url,
timeout=timeout,
)

Expand Down
14 changes: 11 additions & 3 deletions bot2/src/bot/client/http_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@

import asyncio
import logging
import os
from typing import Any, TypeVar

from httpx import AsyncClient, HTTPError, TimeoutException
Expand All @@ -35,6 +36,11 @@

T = TypeVar("T", bound=BaseModel)

# Default URL from environment variable.
# Note: Evaluated at module import time. Set GAME_SERVER_HTTP_URL before importing,
# or pass base_url explicitly to GameHTTPClient.__init__.
DEFAULT_HTTP_URL = os.getenv("GAME_SERVER_HTTP_URL", "http://localhost:4000")


# =============================================================================
# Custom Exceptions
Expand Down Expand Up @@ -81,18 +87,20 @@ class GameHTTPClient:

def __init__(
self,
base_url: str = "http://localhost:4000",
base_url: str | None = None,
timeout: float = 30.0,
max_retries: int = 3,
):
"""Initialize the HTTP client.

Args:
base_url: Base URL for the game server (default: http://localhost:4000)
base_url: Base URL for the game server. If not provided, uses
GAME_SERVER_HTTP_URL env var or defaults to http://localhost:4000.
timeout: Request timeout in seconds (default: 30.0)
max_retries: Maximum retry attempts for transient failures (default: 3)
"""
self.base_url = base_url.rstrip("/")
resolved_url = base_url if base_url is not None else DEFAULT_HTTP_URL
self.base_url = resolved_url.rstrip("/")
self.timeout = timeout
self.max_retries = max_retries
self._client: AsyncClient | None = None
Expand Down
17 changes: 12 additions & 5 deletions bot2/src/bot/gym/towerfall_env.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
from __future__ import annotations

import asyncio
import os
from typing import Any

import gymnasium as gym
Expand All @@ -23,6 +24,12 @@
from bot.models.constants import GAME_CONSTANTS
from bot.observation import ObservationBuilder, ObservationConfig

# Default URLs from environment variables.
# Note: These are evaluated at module import time. Set environment variables
# before importing this module, or pass URLs explicitly to TowerfallEnv.__init__.
DEFAULT_HTTP_URL = os.getenv("GAME_SERVER_HTTP_URL", "http://localhost:4000")
DEFAULT_WS_URL = os.getenv("GAME_SERVER_WS_URL", "ws://localhost:4000/ws")


class TowerfallEnv(gym.Env[NDArray[np.float32], int]):
"""Gymnasium environment for TowerFall RL training.
Expand All @@ -45,8 +52,8 @@ class TowerfallEnv(gym.Env[NDArray[np.float32], int]):

def __init__(
self,
http_url: str = "http://localhost:4000",
ws_url: str = "ws://localhost:4000/ws",
http_url: str | None = None,
ws_url: str | None = None,
player_name: str = "MLBot",
room_name: str = "Training",
map_type: str = "default",
Expand Down Expand Up @@ -81,9 +88,9 @@ def __init__(
"""
super().__init__()

# Configuration
self.http_url = http_url
self.ws_url = ws_url
# Configuration (use environment variable defaults if not provided)
self.http_url = http_url if http_url is not None else DEFAULT_HTTP_URL
self.ws_url = ws_url if ws_url is not None else DEFAULT_WS_URL
self.player_name = player_name
self.room_name = room_name
self.map_type = map_type
Expand Down
16 changes: 12 additions & 4 deletions bot2/src/bot/gym/vectorized_env.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
from __future__ import annotations

import asyncio
import os
import uuid
from typing import Any

Expand All @@ -29,6 +30,12 @@
from bot.models.constants import GAME_CONSTANTS
from bot.observation import ObservationBuilder, ObservationConfig

# Default URLs from environment variables.
# Note: These are evaluated at module import time. Set environment variables
# before importing this module, or pass URLs explicitly to VectorizedTowerfallEnv.__init__.
DEFAULT_HTTP_URL = os.getenv("GAME_SERVER_HTTP_URL", "http://localhost:4000")
DEFAULT_WS_URL = os.getenv("GAME_SERVER_WS_URL", "ws://localhost:4000/ws")


class VectorizedTowerfallEnv(gym.vector.VectorEnv):
"""Vectorized wrapper for running multiple Towerfall environments in parallel.
Expand Down Expand Up @@ -68,8 +75,8 @@ class VectorizedTowerfallEnv(gym.vector.VectorEnv):
def __init__(
self,
num_envs: int,
http_url: str = "http://localhost:4000",
ws_url: str = "ws://localhost:4000/ws",
http_url: str | None = None,
ws_url: str | None = None,
player_name: str = "MLBot",
room_name_prefix: str = "Training",
map_type: str = "default",
Expand Down Expand Up @@ -101,8 +108,9 @@ def __init__(
termination_config: Optional episode termination configuration.
"""
self.num_envs = num_envs
self.http_url = http_url
self.ws_url = ws_url
# Use environment variable defaults if not provided
self.http_url = http_url if http_url is not None else DEFAULT_HTTP_URL
self.ws_url = ws_url if ws_url is not None else DEFAULT_WS_URL
self.player_name = player_name
self.room_name_prefix = room_name_prefix
self.map_type = map_type
Expand Down
14 changes: 11 additions & 3 deletions bot2/src/bot/training/server_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from __future__ import annotations

import logging
import os
import time
from dataclasses import dataclass, field

Expand All @@ -22,6 +23,11 @@
MaxGamesExceededError,
)

# Default URL from environment variable.
# Note: Evaluated at module import time. Set GAME_SERVER_HTTP_URL before importing,
# or pass base_url explicitly to GameServerManager.__init__.
DEFAULT_HTTP_URL = os.getenv("GAME_SERVER_HTTP_URL", "http://localhost:4000")

logger = logging.getLogger(__name__)


Expand Down Expand Up @@ -71,16 +77,18 @@ class GameServerManager:

def __init__(
self,
http_url: str = "http://localhost:4000",
http_url: str | None = None,
max_concurrent_games: int = 10,
) -> None:
"""Initialize the game server manager.

Args:
http_url: Base URL of the game server HTTP API.
http_url: Base URL of the game server HTTP API. If not provided, uses
GAME_SERVER_HTTP_URL env var or defaults to http://localhost:4000.
max_concurrent_games: Maximum number of concurrent game instances.
"""
self._http_url = http_url.rstrip("/")
resolved_url = http_url if http_url is not None else DEFAULT_HTTP_URL
self._http_url = resolved_url.rstrip("/")
self._max_concurrent_games = max_concurrent_games
self._client: httpx.AsyncClient | None = None
self._games: dict[str, GameInstance] = {}
Expand Down
12 changes: 12 additions & 0 deletions k8s/.env.example
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
# Cloudflare Tunnel Configuration
CF_TUNNEL_ID=xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx
CF_ACCOUNT_TAG=xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
CF_TUNNEL_SECRET=base64-encoded-tunnel-secret

# Domain Configuration
API_DOMAIN=api.example.com
FRONTEND_DOMAIN=game.example.com

# Container Registry
# Use "towerfall" for local kind clusters, or your registry prefix for remote
IMAGE_REGISTRY=towerfall
2 changes: 2 additions & 0 deletions k8s/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
generated/
.env
Loading
Loading