Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 18 additions & 0 deletions .github/workflows/build-pr-monitoring.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
name: "PR Build Check: monitoring"

on:
pull_request:
paths:
- "monitoring/**"

jobs:
build:
name: Build monitoring (PR check)
uses: ./.github/workflows/_build-push.yaml
with:
service_name: monitoring
context_path: monitoring
dockerfile_path: monitoring/docker/Dockerfile
image_name: local/monitoring
image_tag: pr-check
push: false
25 changes: 25 additions & 0 deletions .github/workflows/build-preprod-monitoring.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
name: "Build & Deploy to Preprod: monitoring"

on:
push:
branches:
- develop
paths:
- "monitoring/**"

jobs:
build:
name: Build monitoring (preprod)
uses: ./.github/workflows/_build-push.yaml
with:
service_name: monitoring
context_path: monitoring
dockerfile_path: monitoring/docker/Dockerfile
image_name: harbor.dyingstar-game.space/dyingstar/monitoring
image_tag: develop
chart_name: service-monitoring
trigger_preprod_deploy: true
secrets:
HARBOR_USERNAME: ${{ secrets.HARBOR_USERNAME }}
HARBOR_PASSWORD: ${{ secrets.HARBOR_PASSWORD }}
KUBERNETES_REPO_TOKEN: ${{ secrets.KUBERNETES_REPO_TOKEN }}
37 changes: 37 additions & 0 deletions .github/workflows/build-prod-monitoring.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
name: "Build Production Image: monitoring"

on:
push:
tags:
- "monitoring-v*"

jobs:
extract-version:
name: Extract version from tag
runs-on: ubuntu-latest
outputs:
version: ${{ steps.tag.outputs.version }}
steps:
- name: Extract version
id: tag
run: |
TAG="${GITHUB_REF_NAME}"
VERSION="${TAG#monitoring-}"
echo "version=${VERSION}" >> "$GITHUB_OUTPUT"

build:
name: Build monitoring (prod)
needs: extract-version
uses: ./.github/workflows/_build-push.yaml
with:
service_name: monitoring
context_path: monitoring
dockerfile_path: monitoring/docker/Dockerfile
image_name: harbor.dyingstar-game.space/dyingstar/monitoring
image_tag: ${{ needs.extract-version.outputs.version }}
additional_tags: latest
chart_name: service-monitoring
trigger_preprod_deploy: false
secrets:
HARBOR_USERNAME: ${{ secrets.HARBOR_USERNAME }}
HARBOR_PASSWORD: ${{ secrets.HARBOR_PASSWORD }}
4 changes: 4 additions & 0 deletions monitoring/.env.example
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
# Environment variables for local development.
# Copy this file to .env and adjust as needed.
PORT=9300
RUST_LOG=monitoring=debug
34 changes: 34 additions & 0 deletions monitoring/Cargo.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
[package]
name = "monitoring"
version = "0.1.0"
edition = "2021"

[[bin]]
name = "monitoring"
path = "src/main.rs"

[dependencies]
# Async runtime
tokio = { version = "1", features = ["full"] }

# Web framework + WebSocket
axum = { version = "0.8", features = ["ws", "macros"] }
tower = "0.5"
tower-http = { version = "0.6", features = ["trace"] }

# Serialization
serde = { version = "1", features = ["derive"] }
serde_json = "1"

# Prometheus metrics
prometheus = { version = "0.13", features = ["process"] }

# Logging / tracing
tracing = "0.1"
tracing-subscriber = { version = "0.3", features = ["env-filter", "fmt"] }

# Error handling
anyhow = "1"

# Environment / config
dotenvy = "0.15"
31 changes: 31 additions & 0 deletions monitoring/docker/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
# ─── Stage 1: build ────────────────────────────────────────────────────────
FROM rust:1.87-slim AS builder

RUN apt-get update && apt-get install -y pkg-config libssl-dev && rm -rf /var/lib/apt/lists/*

WORKDIR /app

# Cache dependencies separately from source code.
COPY Cargo.toml Cargo.lock* ./
RUN mkdir -p src && echo 'fn main() {}' > src/main.rs
RUN cargo build --release && rm -rf src

# Copy real source and rebuild.
COPY src ./src
RUN touch src/main.rs && cargo build --release

# ─── Stage 2: minimal runtime ──────────────────────────────────────────────
FROM debian:bookworm-slim AS runtime

RUN apt-get update && apt-get install -y libssl3 ca-certificates && rm -rf /var/lib/apt/lists/*

RUN useradd -r -u 1001 -g root monitoring
WORKDIR /app

COPY --from=builder /app/target/release/monitoring /app/monitoring

USER 1001

EXPOSE 9300

ENTRYPOINT ["/app/monitoring"]
19 changes: 19 additions & 0 deletions monitoring/src/config.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
use std::env;

/// Application configuration loaded from environment variables.
#[derive(Debug, Clone)]
pub struct Config {
/// Port for the combined HTTP server (WebSocket /ws + metrics /metrics).
pub port: u16,
}

impl Config {
pub fn from_env() -> Self {
let port = env::var("PORT")
.ok()
.and_then(|v| v.parse().ok())
.unwrap_or(9300);

Self { port }
}
}
61 changes: 61 additions & 0 deletions monitoring/src/main.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
pub mod config;
pub mod metrics;
pub mod websocket;

use std::{net::SocketAddr, sync::Arc};

use axum::{routing::get, Router};
use prometheus::{Encoder, TextEncoder};
use tokio::net::TcpListener;
use tracing::info;

use crate::{config::Config, metrics::Metrics, websocket::ws_handler};

#[tokio::main]
async fn main() -> anyhow::Result<()> {
// Load .env file if present (non-fatal if absent).
let _ = dotenvy::dotenv();

// Initialise structured logging.
tracing_subscriber::fmt()
.with_env_filter(
tracing_subscriber::EnvFilter::try_from_default_env()
.unwrap_or_else(|_| "monitoring=debug,tower_http=info".parse().unwrap()),
)
.init();

let config = Arc::new(Config::from_env());
info!(?config, "Starting monitoring service");

// Register all Prometheus metrics.
Metrics::init();

// Build the axum router:
// GET /ws — WebSocket endpoint (ds_bridge connects here)
// GET /metrics — Prometheus text endpoint (Prometheus scrapes here)
// GET /health — Simple liveness probe
let app = Router::new()
.route("/ws", get(ws_handler))
.route("/metrics", get(metrics_handler))
.route("/health", get(|| async { "ok" }));

let addr: SocketAddr = format!("0.0.0.0:{}", config.port).parse()?;
let listener = TcpListener::bind(addr).await?;
info!(%addr, "listening");

axum::serve(listener, app).await?;
Ok(())
}

/// Render all registered Prometheus metrics as text.
async fn metrics_handler() -> impl axum::response::IntoResponse {
let mut buf = Vec::new();
let encoder = TextEncoder::new();
let content_type = encoder.format_type().to_owned();
let metric_families = prometheus::gather();
encoder.encode(&metric_families, &mut buf).unwrap_or(());
(
[(axum::http::header::CONTENT_TYPE, content_type)],
buf,
)
}
84 changes: 84 additions & 0 deletions monitoring/src/metrics.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
use prometheus::{
register_gauge, register_int_counter, register_int_counter_vec,
Gauge, IntCounter, IntCounterVec,
};
use std::sync::OnceLock;

/// All Prometheus metrics for the monitoring service.
/// Initialised once at startup via `Metrics::init()`.
pub struct Metrics {
/// Current number of players connected to Horizon (gauge).
pub players_connected: Gauge,
/// Total player connections since startup (counter).
pub players_connect_total: IntCounter,
/// Total player disconnections since startup (counter).
pub players_disconnect_total: IntCounter,
/// Number of active Godot game server instances (gauge).
pub godot_servers_active: Gauge,
/// Total items created, by kind (counter, label: kind).
pub items_created_total: IntCounterVec,
/// Total successful player authentications (counter).
pub auth_success_total: IntCounter,
}

static METRICS: OnceLock<Metrics> = OnceLock::new();

impl Metrics {
/// Register all metrics with the default Prometheus registry.
/// Must be called exactly once at startup before the HTTP server starts.
pub fn init() -> &'static Metrics {
METRICS.get_or_init(|| Metrics {
players_connected: register_gauge!(
"horizon_players_connected",
"Current number of players connected to Horizon"
)
.expect("metric registration failed"),

players_connect_total: register_int_counter!(
"horizon_players_connect_total",
"Total player connections since service start"
)
.expect("metric registration failed"),

players_disconnect_total: register_int_counter!(
"horizon_players_disconnect_total",
"Total player disconnections since service start"
)
.expect("metric registration failed"),

godot_servers_active: register_gauge!(
"horizon_godot_servers_active",
"Number of active Godot game server instances registered with Horizon"
)
.expect("metric registration failed"),

items_created_total: register_int_counter_vec!(
"horizon_items_created_total",
"Total items created in Horizon, by kind",
&["kind"]
)
.expect("metric registration failed"),

auth_success_total: register_int_counter!(
"horizon_auth_success_total",
"Total successful player authentications"
)
.expect("metric registration failed"),
})
}

/// Get the global metrics instance (panics if not yet initialised).
pub fn get() -> &'static Metrics {
METRICS.get().expect("Metrics not initialised — call Metrics::init() first")
}

/// Reset all metric values to zero (called when Horizon restarts).
pub fn reset(&self) {
self.players_connected.set(0.0);
self.godot_servers_active.set(0.0);
self.players_connect_total.reset();
self.players_disconnect_total.reset();
self.auth_success_total.reset();
self.items_created_total.reset();
}
}
Loading
Loading