diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 000000000..ff0d4e68e --- /dev/null +++ b/.dockerignore @@ -0,0 +1,12 @@ +.git +.gitignore +.node_modules +.tmp +.uv-cache +**/.terraform +**/.uv-cache +**/node_modules +**/target +coverage +frontend-pwa/dist +target diff --git a/Makefile b/Makefile index ff8e1a813..91c28ed42 100644 --- a/Makefile +++ b/Makefile @@ -38,11 +38,12 @@ MARKDOWNLINT_CLI2_VERSION ?= 0.14.0 YAMLLINT_VERSION ?= 1.35.1 OPENAPI_SPEC ?= spec/openapi.json -# Place one consolidated PHONY declaration near the top of the file -.PHONY: all clean be fe fe-build openapi gen docker-up docker-down fmt lint test test-rust test-frontend typecheck deps lockfile lint-specs \ - check-fmt markdownlint markdownlint-docs mermaid-lint nixie yamllint audit \ - lint-rust lint-frontend lint-asyncapi lint-openapi lint-makefile lint-actions \ - lint-architecture workspace-sync +.PHONY: all clean be fe fe-build openapi gen docker-up docker-down +.PHONY: local-k8s-up local-k8s-down local-k8s-status local-k8s-logs +.PHONY: fmt lint test test-rust test-frontend typecheck deps lockfile lint-specs +.PHONY: check-fmt markdownlint markdownlint-docs mermaid-lint nixie yamllint audit +.PHONY: lint-rust lint-frontend lint-asyncapi lint-openapi lint-makefile lint-actions +.PHONY: lint-architecture workspace-sync workspace-sync: ./scripts/sync_workspace_members.py @@ -78,6 +79,18 @@ docker-up: docker-down: cd deploy && docker compose down +local-k8s-up: + uv run scripts/local_k8s.py up + +local-k8s-down: + uv run scripts/local_k8s.py down + +local-k8s-status: + uv run scripts/local_k8s.py status + +local-k8s-logs: + uv run scripts/local_k8s.py logs + fmt: workspace-sync cargo fmt --all $(call exec_or_bunx,biome,format --write frontend-pwa packages,@biomejs/biome@$(BIOME_VERSION)) diff --git a/backend/src/domain/health.rs b/backend/src/domain/health.rs new file mode 100644 index 000000000..80f5348e3 --- /dev/null +++ b/backend/src/domain/health.rs @@ -0,0 +1,214 @@ +//! Domain health observations for process liveness and readiness. +//! +//! The health model is intentionally small: it records whether the process +//! should be considered alive and whether it is ready to receive traffic. HTTP, +//! Kubernetes, Docker, and Helm adapters map these domain observations to their +//! own protocols. + +use std::sync::atomic::{AtomicBool, Ordering}; + +use crate::domain::ports::HealthObserver; + +/// Health status reported by a domain health observation. +#[derive(Clone, Copy, Debug, Eq, PartialEq)] +pub enum HealthStatus { + /// The observed capability is available. + Healthy, + /// The observed capability is unavailable. + Unhealthy, +} + +impl HealthStatus { + /// Return whether this status represents a healthy observation. + /// + /// # Examples + /// + /// ``` + /// use backend::domain::HealthStatus; + /// + /// assert!(HealthStatus::Healthy.is_healthy()); + /// assert!(!HealthStatus::Unhealthy.is_healthy()); + /// ``` + pub fn is_healthy(self) -> bool { + matches!(self, Self::Healthy) + } +} + +/// A liveness or readiness observation owned by the domain layer. +#[derive(Clone, Copy, Debug, Eq, PartialEq)] +pub struct HealthObservation { + status: HealthStatus, +} + +impl HealthObservation { + /// Build a healthy observation. + /// + /// # Examples + /// + /// ``` + /// use backend::domain::HealthObservation; + /// + /// assert!(HealthObservation::healthy().is_healthy()); + /// ``` + pub fn healthy() -> Self { + Self { + status: HealthStatus::Healthy, + } + } + + /// Build an unhealthy observation. + /// + /// # Examples + /// + /// ``` + /// use backend::domain::HealthObservation; + /// + /// assert!(!HealthObservation::unhealthy().is_healthy()); + /// ``` + pub fn unhealthy() -> Self { + Self { + status: HealthStatus::Unhealthy, + } + } + + /// Return this observation's status. + pub fn status(self) -> HealthStatus { + self.status + } + + /// Return whether this observation is healthy. + pub fn is_healthy(self) -> bool { + self.status.is_healthy() + } +} + +/// Shared process health state used by runtime adapters. +/// +/// New instances start live but not ready. The server composition root marks +/// readiness once the HTTP listener has been constructed. +pub struct ProcessHealth { + ready: AtomicBool, + live: AtomicBool, +} + +impl Default for ProcessHealth { + fn default() -> Self { + Self { + ready: AtomicBool::new(false), + live: AtomicBool::new(true), + } + } +} + +impl ProcessHealth { + /// Create health state starting live but not ready. + /// + /// # Examples + /// + /// ``` + /// use backend::domain::ProcessHealth; + /// use backend::domain::ports::HealthObserver; + /// + /// let health = ProcessHealth::new(); + /// assert!(health.observe_liveness().is_healthy()); + /// assert!(!health.observe_readiness().is_healthy()); + /// ``` + pub fn new() -> Self { + Self::default() + } + + /// Mark the process as ready to serve traffic. + pub fn mark_ready(&self) { + self.ready.store(true, Ordering::Release); + } + + /// Mark the process as not ready to serve traffic. + pub fn mark_not_ready(&self) { + self.ready.store(false, Ordering::Release); + } + + /// Mark the process unhealthy so liveness checks fail. + pub fn mark_unhealthy(&self) { + self.live.store(false, Ordering::Release); + } + + fn observation_from(is_healthy: bool) -> HealthObservation { + if is_healthy { + HealthObservation::healthy() + } else { + HealthObservation::unhealthy() + } + } +} + +impl HealthObserver for ProcessHealth { + fn observe_liveness(&self) -> HealthObservation { + Self::observation_from(self.live.load(Ordering::Acquire)) + } + + fn observe_readiness(&self) -> HealthObservation { + Self::observation_from(self.ready.load(Ordering::Acquire)) + } +} + +#[cfg(test)] +mod tests { + //! Tests for domain health observations and state transitions. + + use super::{HealthObservation, HealthStatus, ProcessHealth}; + use crate::domain::ports::HealthObserver; + use rstest::{fixture, rstest}; + + #[fixture] + fn health() -> ProcessHealth { + ProcessHealth::new() + } + + #[rstest] + fn default_health_starts_live_but_not_ready(health: ProcessHealth) { + assert_eq!( + health.observe_liveness().status(), + HealthStatus::Healthy, + "process should start live" + ); + assert_eq!( + health.observe_readiness().status(), + HealthStatus::Unhealthy, + "process should not start ready before runtime initialisation" + ); + } + + #[rstest] + fn marking_ready_makes_readiness_healthy(health: ProcessHealth) { + health.mark_ready(); + + assert!(health.observe_readiness().is_healthy()); + } + + #[rstest] + fn marking_not_ready_makes_readiness_unhealthy(health: ProcessHealth) { + health.mark_ready(); + health.mark_not_ready(); + + assert!(!health.observe_readiness().is_healthy()); + } + + #[rstest] + fn marking_unhealthy_makes_liveness_unhealthy(health: ProcessHealth) { + health.mark_unhealthy(); + + assert!(!health.observe_liveness().is_healthy()); + } + + #[rstest] + #[case(HealthObservation::healthy(), HealthStatus::Healthy, true)] + #[case(HealthObservation::unhealthy(), HealthStatus::Unhealthy, false)] + fn observations_report_status_and_predicate( + #[case] observation: HealthObservation, + #[case] expected_status: HealthStatus, + #[case] expected_healthy: bool, + ) { + assert_eq!(observation.status(), expected_status); + assert_eq!(observation.is_healthy(), expected_healthy); + } +} diff --git a/backend/src/domain/mod.rs b/backend/src/domain/mod.rs index bcfe9eac2..8f7e3dc39 100644 --- a/backend/src/domain/mod.rs +++ b/backend/src/domain/mod.rs @@ -66,6 +66,7 @@ pub mod er_diagram; pub mod error; #[cfg(feature = "example-data")] pub mod example_data; +pub mod health; pub mod idempotency; pub mod interest_theme; pub mod localization; @@ -111,6 +112,7 @@ pub use self::er_diagram::{ pub use self::error::{Error, ErrorCode, ErrorValidationError}; #[cfg(feature = "example-data")] pub use self::example_data::{ExampleDataSeedOutcome, ExampleDataSeeder, ExampleDataSeedingError}; +pub use self::health::{HealthObservation, HealthStatus, ProcessHealth}; pub use self::idempotency::{ IdempotencyConfig, IdempotencyKey, IdempotencyKeyValidationError, IdempotencyLookupQuery, IdempotencyLookupResult, IdempotencyRecord, MutationType, ParseMutationTypeError, PayloadHash, diff --git a/backend/src/domain/ports/health_observer.rs b/backend/src/domain/ports/health_observer.rs new file mode 100644 index 000000000..35b9793a9 --- /dev/null +++ b/backend/src/domain/ports/health_observer.rs @@ -0,0 +1,12 @@ +//! Domain port for observing runtime health. + +use crate::domain::HealthObservation; + +/// Observes process health without leaking adapter protocols into the domain. +pub trait HealthObserver { + /// Report whether the process should be considered alive. + fn observe_liveness(&self) -> HealthObservation; + + /// Report whether the process is ready to receive traffic. + fn observe_readiness(&self) -> HealthObservation; +} diff --git a/backend/src/domain/ports/mod.rs b/backend/src/domain/ports/mod.rs index d69c35809..13801b5e0 100644 --- a/backend/src/domain/ports/mod.rs +++ b/backend/src/domain/ports/mod.rs @@ -12,6 +12,7 @@ mod enrichment_job_metrics; mod enrichment_provenance_repository; mod example_data_runs_repository; mod example_data_seed_repository; +mod health_observer; mod idempotency_metrics; mod idempotency_repository; mod login_service; @@ -92,6 +93,7 @@ pub use example_data_seed_repository::{ ExampleDataSeedRepository, ExampleDataSeedRepositoryError, ExampleDataSeedRequest, ExampleDataSeedUser, }; +pub use health_observer::HealthObserver; pub use idempotency_metrics::{ IdempotencyMetricLabels, IdempotencyMetrics, IdempotencyMetricsError, NoOpIdempotencyMetrics, }; diff --git a/backend/src/inbound/http/health.rs b/backend/src/inbound/http/health.rs index e91f21531..7c098341c 100644 --- a/backend/src/inbound/http/health.rs +++ b/backend/src/inbound/http/health.rs @@ -1,61 +1,44 @@ //! Health endpoints: liveness & readiness probes for orchestration and load balancers. //! Document endpoints in OpenAPI via Utoipa. use actix_web::{HttpResponse, get, http::header, web}; -use std::sync::atomic::{AtomicBool, Ordering}; +use serde::Serialize; +use std::collections::BTreeMap; -/// Shared health state for readiness and liveness checks. -/// Track readiness and whether the process should report itself as alive to orchestrators. -pub struct HealthState { - ready: AtomicBool, - live: AtomicBool, -} +use crate::domain::ProcessHealth; +use crate::domain::ports::HealthObserver; -impl Default for HealthState { - fn default() -> Self { - Self { - ready: AtomicBool::new(false), - live: AtomicBool::new(true), - } - } -} - -impl HealthState { - /// Create a new health state starting as not ready but live. - pub fn new() -> Self { - Self::default() - } +/// Backwards-compatible name for the domain-owned process health state. +pub type HealthState = ProcessHealth; - /// Mark the service as ready. - pub fn mark_ready(&self) { - self.ready.store(true, Ordering::Release); - } - - /// Flag the service as unhealthy so liveness checks fail fast during shutdown. - pub fn mark_unhealthy(&self) { - self.live.store(false, Ordering::Release); - } +#[derive(Serialize)] +struct HealthProbeCheck { + status: &'static str, +} - /// Return readiness state. - pub fn is_ready(&self) -> bool { - self.ready.load(Ordering::Acquire) - } +#[derive(Serialize)] +struct HealthProbeBody { + status: &'static str, + checks: BTreeMap<&'static str, HealthProbeCheck>, +} - /// Return liveness state. When false, liveness probes emit 503 to trigger restarts. - pub fn is_alive(&self) -> bool { - self.live.load(Ordering::Acquire) +impl HealthProbeBody { + fn new(check_name: &'static str, probe_ok: bool) -> Self { + let status = if probe_ok { "pass" } else { "fail" }; + let checks = BTreeMap::from([(check_name, HealthProbeCheck { status })]); + Self { status, checks } } +} - fn probe_response(probe_ok: bool) -> HttpResponse { - let mut response = if probe_ok { - HttpResponse::Ok() - } else { - HttpResponse::ServiceUnavailable() - }; +fn probe_response(check_name: &'static str, probe_ok: bool) -> HttpResponse { + let mut response = if probe_ok { + HttpResponse::Ok() + } else { + HttpResponse::ServiceUnavailable() + }; - response - .insert_header((header::CACHE_CONTROL, "no-store")) - .finish() - } + response + .insert_header((header::CACHE_CONTROL, "no-store")) + .json(HealthProbeBody::new(check_name, probe_ok)) } /// Readiness probe. Return 200 when dependencies are initialised and the server can handle traffic; return 503 otherwise. @@ -75,7 +58,7 @@ impl HealthState { )] #[get("/health/ready")] pub async fn ready(state: web::Data) -> HttpResponse { - HealthState::probe_response(state.is_ready()) + probe_response("readiness", state.observe_readiness().is_healthy()) } /// Liveness probe. Return 200 while the process is marked alive and 503 once draining. @@ -99,5 +82,88 @@ pub async fn ready(state: web::Data) -> HttpResponse { )] #[get("/health/live")] pub async fn live(state: web::Data) -> HttpResponse { - HealthState::probe_response(state.is_alive()) + probe_response("liveness", state.observe_liveness().is_healthy()) +} + +#[cfg(test)] +mod tests { + //! Tests for HTTP health probe response mapping. + + use super::{HealthState, live, ready}; + use actix_web::http::StatusCode; + use actix_web::{App, http::header, test, web}; + use rstest::{fixture, rstest}; + + #[fixture] + fn health_state() -> web::Data { + web::Data::new(HealthState::new()) + } + + #[rstest] + #[actix_web::test] + async fn readiness_returns_unavailable_until_ready(health_state: web::Data) { + let app = test::init_service( + App::new() + .app_data(health_state.clone()) + .service(ready) + .service(live), + ) + .await; + let request = test::TestRequest::get().uri("/health/ready").to_request(); + + let response = test::call_service(&app, request).await; + + assert_eq!(response.status(), StatusCode::SERVICE_UNAVAILABLE); + assert_eq!( + response.headers().get(header::CACHE_CONTROL), + Some(&header::HeaderValue::from_static("no-store")) + ); + } + + #[rstest] + #[actix_web::test] + async fn readiness_returns_ok_after_ready(health_state: web::Data) { + health_state.mark_ready(); + let app = test::init_service(App::new().app_data(health_state).service(ready)).await; + let request = test::TestRequest::get().uri("/health/ready").to_request(); + + let response = test::call_service(&app, request).await; + + assert_eq!(response.status(), StatusCode::OK); + assert_eq!( + response.headers().get(header::CACHE_CONTROL), + Some(&header::HeaderValue::from_static("no-store")) + ); + } + + #[rstest] + #[actix_web::test] + async fn liveness_returns_ok_while_live(health_state: web::Data) { + let app = test::init_service(App::new().app_data(health_state).service(live)).await; + let request = test::TestRequest::get().uri("/health/live").to_request(); + + let response = test::call_service(&app, request).await; + + assert_eq!(response.status(), StatusCode::OK); + assert_eq!( + response.headers().get(header::CACHE_CONTROL), + Some(&header::HeaderValue::from_static("no-store")) + ); + } + + #[rstest] + #[actix_web::test] + async fn liveness_returns_unavailable_after_unhealthy(health_state: web::Data) { + health_state.mark_unhealthy(); + let app = test::init_service(App::new().app_data(health_state).service(live)).await; + let request = test::TestRequest::get().uri("/health/live").to_request(); + + let response = test::call_service(&app, request).await; + + assert_eq!(response.status(), StatusCode::SERVICE_UNAVAILABLE); + assert_eq!( + response.headers().get(header::CACHE_CONTROL), + Some(&header::HeaderValue::from_static("no-store")) + ); + } } diff --git a/backend/src/lib.rs b/backend/src/lib.rs index a881c5a03..5a0656abd 100644 --- a/backend/src/lib.rs +++ b/backend/src/lib.rs @@ -22,6 +22,6 @@ pub use middleware::Trace; /// Public OpenAPI surface used by Swagger UI and tooling. pub use doc::ApiDoc; +pub use domain::ProcessHealth; pub use inbound::http; pub use inbound::http::error::ApiResult; -pub use inbound::http::health::HealthState; diff --git a/backend/src/tests.rs b/backend/src/tests.rs index 58b4ddd72..905b0c429 100644 --- a/backend/src/tests.rs +++ b/backend/src/tests.rs @@ -8,6 +8,7 @@ use actix_web::cookie::{Key, SameSite}; use actix_web::web; #[cfg(feature = "metrics")] use actix_web_prom::PrometheusMetricsBuilder; +use backend::domain::ports::HealthObserver; use rstest::{fixture, rstest}; use std::net::SocketAddr; use tokio::time::{Duration, timeout}; @@ -115,7 +116,10 @@ async fn assert_server_marks_ready( health_state: web::Data, server_config: ServerConfig, ) { - assert!(!health_state.is_ready(), "state should start unready"); + assert!( + !health_state.observe_readiness().is_healthy(), + "state should start unready" + ); let server = match create_server(health_state.clone(), server_config) { Ok(server) => server, @@ -125,7 +129,7 @@ async fn assert_server_marks_ready( let server_join = actix_rt::spawn(server); assert!( - health_state.is_ready(), + health_state.observe_readiness().is_healthy(), "server creation should mark readiness" ); if let Err(error) = timeout(Duration::from_secs(5), handle.stop(true)).await { diff --git a/backend/tests/features/health_probes.feature b/backend/tests/features/health_probes.feature new file mode 100644 index 000000000..8c8d944aa --- /dev/null +++ b/backend/tests/features/health_probes.feature @@ -0,0 +1,28 @@ +Feature: Health probes + Orchestrators observe Wildside through stable liveness and readiness probes. + + Scenario: Readiness rejects traffic before startup completes + Given a live Wildside runtime + When the readiness probe is requested + Then the probe response status is 503 + And the probe response is not cacheable + + Scenario: Readiness accepts traffic after startup completes + Given a live Wildside runtime + And the runtime is ready + When the readiness probe is requested + Then the probe response status is 200 + And the probe response is not cacheable + + Scenario: Liveness accepts traffic while the runtime is live + Given a live Wildside runtime + When the liveness probe is requested + Then the probe response status is 200 + And the probe response is not cacheable + + Scenario: Liveness fails after the runtime is marked unhealthy + Given a live Wildside runtime + And the runtime is unhealthy + When the liveness probe is requested + Then the probe response status is 503 + And the probe response is not cacheable diff --git a/backend/tests/health_probes_bdd.rs b/backend/tests/health_probes_bdd.rs new file mode 100644 index 000000000..f0b331833 --- /dev/null +++ b/backend/tests/health_probes_bdd.rs @@ -0,0 +1,183 @@ +//! Behaviour tests for externally observable health probes. + +use actix_web::http::StatusCode; +use actix_web::http::header::{CACHE_CONTROL, CONTENT_TYPE, HeaderValue}; +use actix_web::{App, test, web}; +use backend::inbound::http::health::{HealthState, live, ready}; +use insta::assert_json_snapshot; +use rstest::{fixture, rstest}; +use rstest_bdd_macros::{given, scenario, then, when}; +use serde_json::{Value, json}; +use std::cell::RefCell; + +struct HealthProbeWorld { + health: web::Data, + response: RefCell>, +} + +struct ProbeResponse { + uri: &'static str, + status: StatusCode, + cache_control: Option, + content_type: Option, + body: Value, +} + +#[derive(Clone, Copy)] +enum ProbeSetup { + Default, + Ready, + Unhealthy, +} + +impl HealthProbeWorld { + fn new() -> Self { + Self { + health: web::Data::new(HealthState::new()), + response: RefCell::new(None), + } + } + + async fn request_probe(&self, uri: &'static str) { + let app = test::init_service( + App::new() + .app_data(self.health.clone()) + .service(ready) + .service(live), + ) + .await; + let request = test::TestRequest::get().uri(uri).to_request(); + let response = test::call_service(&app, request).await; + let status = response.status(); + let cache_control = response.headers().get(CACHE_CONTROL).cloned(); + let content_type = response.headers().get(CONTENT_TYPE).cloned(); + let body = test::read_body(response).await; + let body = serde_json::from_slice(body.as_ref()).expect("health probe JSON body"); + let response = ProbeResponse { + uri, + status, + cache_control, + content_type, + body, + }; + *self.response.borrow_mut() = Some(response); + } + + async fn request_readiness(&self) { + self.request_probe("/health/ready").await; + } + + async fn request_liveness(&self) { + self.request_probe("/health/live").await; + } + + fn with_response(&self, f: F) + where + F: FnOnce(&ProbeResponse), + { + let response = self.response.borrow(); + let response = response.as_ref().expect("probe response"); + f(response); + } +} + +impl ProbeResponse { + fn snapshot_name(&self) -> String { + let uri = self.uri.trim_start_matches('/').replace('/', "_"); + format!("health_probe_{uri}_{}", self.status.as_u16()) + } + + fn snapshot_payload(&self) -> Value { + json!({ + "uri": self.uri, + "status": self.status.as_u16(), + "headers": { + "cache-control": header_value_to_str(&self.cache_control), + "content-type": header_value_to_str(&self.content_type), + }, + "body": self.body, + }) + } +} + +fn header_value_to_str(header: &Option) -> Option<&str> { + header + .as_ref() + .map(|value| value.to_str().expect("health probe header value")) +} + +#[fixture] +fn world() -> HealthProbeWorld { + HealthProbeWorld::new() +} + +#[given("a live Wildside runtime")] +fn live_runtime(world: &HealthProbeWorld) { + let _ = world; +} + +#[given("the runtime is ready")] +fn runtime_is_ready(world: &HealthProbeWorld) { + world.health.mark_ready(); +} + +#[given("the runtime is unhealthy")] +fn runtime_is_unhealthy(world: &HealthProbeWorld) { + world.health.mark_unhealthy(); +} + +#[when("the readiness probe is requested")] +async fn readiness_probe_is_requested(world: &HealthProbeWorld) { + world.request_readiness().await; +} + +#[when("the liveness probe is requested")] +async fn liveness_probe_is_requested(world: &HealthProbeWorld) { + world.request_liveness().await; +} + +#[then("the probe response status is {status}")] +fn probe_response_status_is(world: &HealthProbeWorld, status: u16) { + let expected = StatusCode::from_u16(status).expect("valid feature status"); + world.with_response(|response| assert_eq!(response.status, expected)); +} + +#[then("the probe response is not cacheable")] +fn probe_response_is_not_cacheable(world: &HealthProbeWorld) { + world.with_response(|response| { + assert_eq!( + response.cache_control.as_ref(), + Some(&HeaderValue::from_static("no-store")) + ); + }); +} + +#[rstest] +#[case::readiness_not_ready("/health/ready", ProbeSetup::Default)] +#[case::readiness_ready("/health/ready", ProbeSetup::Ready)] +#[case::liveness_live("/health/live", ProbeSetup::Default)] +#[case::liveness_unhealthy("/health/live", ProbeSetup::Unhealthy)] +#[tokio::test(flavor = "current_thread")] +async fn health_probe_responses_match_snapshots( + #[case] uri: &'static str, + #[case] setup: ProbeSetup, +) { + let world = HealthProbeWorld::new(); + match setup { + ProbeSetup::Default => {} + ProbeSetup::Ready => world.health.mark_ready(), + ProbeSetup::Unhealthy => world.health.mark_unhealthy(), + } + + world.request_probe(uri).await; + + world.with_response(|response| { + assert_json_snapshot!(response.snapshot_name(), response.snapshot_payload()); + }); +} + +#[scenario(path = "tests/features/health_probes.feature")] +#[tokio::test(flavor = "current_thread")] +async fn health_probe_scenarios(world: HealthProbeWorld) { + drop(world); +} diff --git a/backend/tests/snapshots/health_probes_bdd__health_probe_health_live_200.snap b/backend/tests/snapshots/health_probes_bdd__health_probe_health_live_200.snap new file mode 100644 index 000000000..15b99dd47 --- /dev/null +++ b/backend/tests/snapshots/health_probes_bdd__health_probe_health_live_200.snap @@ -0,0 +1,20 @@ +--- +source: backend/tests/health_probes_bdd.rs +expression: response.snapshot_payload() +--- +{ + "body": { + "checks": { + "liveness": { + "status": "pass" + } + }, + "status": "pass" + }, + "headers": { + "cache-control": "no-store", + "content-type": "application/json" + }, + "status": 200, + "uri": "/health/live" +} diff --git a/backend/tests/snapshots/health_probes_bdd__health_probe_health_live_503.snap b/backend/tests/snapshots/health_probes_bdd__health_probe_health_live_503.snap new file mode 100644 index 000000000..c32bf56d3 --- /dev/null +++ b/backend/tests/snapshots/health_probes_bdd__health_probe_health_live_503.snap @@ -0,0 +1,20 @@ +--- +source: backend/tests/health_probes_bdd.rs +expression: response.snapshot_payload() +--- +{ + "body": { + "checks": { + "liveness": { + "status": "fail" + } + }, + "status": "fail" + }, + "headers": { + "cache-control": "no-store", + "content-type": "application/json" + }, + "status": 503, + "uri": "/health/live" +} diff --git a/backend/tests/snapshots/health_probes_bdd__health_probe_health_ready_200.snap b/backend/tests/snapshots/health_probes_bdd__health_probe_health_ready_200.snap new file mode 100644 index 000000000..36e15dff4 --- /dev/null +++ b/backend/tests/snapshots/health_probes_bdd__health_probe_health_ready_200.snap @@ -0,0 +1,20 @@ +--- +source: backend/tests/health_probes_bdd.rs +expression: response.snapshot_payload() +--- +{ + "body": { + "checks": { + "readiness": { + "status": "pass" + } + }, + "status": "pass" + }, + "headers": { + "cache-control": "no-store", + "content-type": "application/json" + }, + "status": 200, + "uri": "/health/ready" +} diff --git a/backend/tests/snapshots/health_probes_bdd__health_probe_health_ready_503.snap b/backend/tests/snapshots/health_probes_bdd__health_probe_health_ready_503.snap new file mode 100644 index 000000000..9d67ffcca --- /dev/null +++ b/backend/tests/snapshots/health_probes_bdd__health_probe_health_ready_503.snap @@ -0,0 +1,20 @@ +--- +source: backend/tests/health_probes_bdd.rs +expression: response.snapshot_payload() +--- +{ + "body": { + "checks": { + "readiness": { + "status": "fail" + } + }, + "status": "fail" + }, + "headers": { + "cache-control": "no-store", + "content-type": "application/json" + }, + "status": 503, + "uri": "/health/ready" +} diff --git a/deploy/charts/wildside/templates/_helpers.tpl b/deploy/charts/wildside/templates/_helpers.tpl index 1da5e7d21..9b66e87c5 100644 --- a/deploy/charts/wildside/templates/_helpers.tpl +++ b/deploy/charts/wildside/templates/_helpers.tpl @@ -28,11 +28,26 @@ app.kubernetes.io/name: {{ include "wildside.name" . }} app.kubernetes.io/instance: {{ .Release.Name }} {{- end -}} +{{- define "wildside.serviceAccountName" -}} +{{- if .Values.serviceAccount.create -}} +{{- default (include "wildside.fullname" .) .Values.serviceAccount.name -}} +{{- else -}} +{{- default "default" .Values.serviceAccount.name -}} +{{- end -}} +{{- end -}} + +{{- define "wildside.effectiveSecretName" -}} +{{- $externalSecret := .Values.externalSecret | default dict -}} +{{- $externalSecretEnabled := $externalSecret.enabled | default false -}} +{{- $externalSecretTarget := $externalSecret.targetName | default "" -}} +{{- .Values.existingSecretName | default (ternary (default (include "wildside.fullname" .) $externalSecretTarget) "" $externalSecretEnabled) -}} +{{- end -}} + {{/* Validate that secretEnvFromKeys references an existing Secret when set. -- Requires .Values.existingSecretName when secretEnvFromKeys has entries. -- Optionally fails if the Secret is missing and allowMissingSecret is false. +- Requires an effective Secret name when secretEnvFromKeys has entries. +- Optionally validates the live Secret when validateExistingSecret is true. - Validates that listed keys exist within the referenced Secret. */}} {{- define "wildside.validateSecrets" -}} @@ -41,12 +56,22 @@ Validate that secretEnvFromKeys references an existing Secret when set. {{- fail (printf "secretEnvFromKeys must be a map, got %s" (typeOf $raw)) -}} {{- end -}} {{- $sec := $raw | default dict -}} -{{- $name := .Values.existingSecretName -}} +{{- $name := include "wildside.effectiveSecretName" . -}} {{- $allowMissing := .Values.allowMissingSecret | default true -}} +{{- $validateExistingSecret := .Values.validateExistingSecret | default false -}} {{- if and (gt (len $sec) 0) (not $name) -}} -{{- fail "existingSecretName is required when secretEnvFromKeys is set" -}} +{{- fail "existingSecretName (or externalSecret.targetName / externalSecret.enabled) is required when secretEnvFromKeys is set" -}} {{- end -}} -{{- if and (gt (len $sec) 0) $name -}} +{{- if gt (len $sec) 0 -}} +{{- range $k, $secretKey := $sec -}} +{{- if not (regexMatch "^[A-Za-z_][A-Za-z0-9_]*$" $k) -}} +{{- fail (printf "secretEnvFromKeys has invalid env var name %q (must match ^[A-Za-z_][A-Za-z0-9_]*$)" $k) -}} +{{- end -}} +{{- if not $secretKey -}} +{{- fail (printf "secretEnvFromKeys maps %q to an empty secret key" $k) -}} +{{- end -}} +{{- end -}} +{{- if and $validateExistingSecret $name -}} {{- if not (semverCompare ">=3.2.0" .Capabilities.HelmVersion.Version) -}} {{- fail "wildside.validateSecrets requires Helm >= 3.2.0" -}} {{- end -}} @@ -60,12 +85,6 @@ Validate that secretEnvFromKeys references an existing Secret when set. {{- $stringData := (get $found "stringData") | default dict -}} {{- $missing := list -}} {{- range $k, $secretKey := $sec -}} -{{- if not (regexMatch "^[A-Za-z_][A-Za-z0-9_]*$" $k) -}} -{{- fail (printf "secretEnvFromKeys has invalid env var name %q (must match ^[A-Za-z_][A-Za-z0-9_]*$)" $k) -}} -{{- end -}} -{{- if not $secretKey -}} -{{- fail (printf "secretEnvFromKeys maps %q to an empty secret key" $k) -}} -{{- end -}} {{- if not (or (hasKey $data $secretKey) (hasKey $stringData $secretKey)) -}} {{- $missing = append $missing $secretKey -}} {{- end -}} @@ -76,3 +95,4 @@ Validate that secretEnvFromKeys references an existing Secret when set. {{- end -}} {{- end -}} {{- end -}} +{{- end -}} diff --git a/deploy/charts/wildside/templates/deployment.yaml b/deploy/charts/wildside/templates/deployment.yaml index 72f91001c..c045ba286 100644 --- a/deploy/charts/wildside/templates/deployment.yaml +++ b/deploy/charts/wildside/templates/deployment.yaml @@ -25,15 +25,20 @@ spec: {{- include "wildside.labels" . | nindent 8 }} app.kubernetes.io/component: backend app.kubernetes.io/part-of: {{ .Chart.Name }} + {{- if or .Values.config .Values.sessionSecret.enabled }} annotations: - checksum/config: {{ if .Values.config }}{{ toYaml .Values.config | sha256sum }}{{ end }} + {{- if .Values.config }} + checksum/config: {{ toYaml .Values.config | sha256sum }} + {{- end }} {{- if .Values.sessionSecret.enabled }} {{- $secret := lookup "v1" "Secret" .Release.Namespace (.Values.sessionSecret.name | default "wildside-session-key") }} {{- if $secret }} checksum/session-secret: {{ $secret.data | toYaml | sha256sum }} {{- end }} {{- end }} + {{- end }} spec: + serviceAccountName: {{ include "wildside.serviceAccountName" . }} terminationGracePeriodSeconds: 30 {{- with .Values.podSecurityContext }} securityContext: @@ -66,7 +71,7 @@ spec: containerPort: 8080 {{- $cfg := .Values.config | default dict }} {{- $sec := .Values.secretEnvFromKeys | default dict }} - {{- $name := .Values.existingSecretName }} + {{- $name := include "wildside.effectiveSecretName" . }} {{- include "wildside.validateSecrets" . -}} {{- if or (gt (len $cfg) 0) (gt (len $sec) 0) }} env: @@ -84,23 +89,23 @@ spec: name: {{ $name | quote }} key: {{ $secretKey | quote }} optional: {{ $.Values.allowMissingSecret | default true }} - {{- end }} - {{- end }} + {{- end }} + {{- end }} {{- with .Values.securityContext }} securityContext: {{ toYaml . | replace "drop:\n -" "drop:\n -" | nindent 12 }} {{- end }} {{- with .Values.container.readinessProbe }} - readinessProbe: -{{- toYaml . | nindent 14 }} + readinessProbe: +{{- toYaml . | nindent 12 }} {{- end }} {{- with .Values.container.livenessProbe }} - livenessProbe: -{{- toYaml . | nindent 14 }} + livenessProbe: +{{- toYaml . | nindent 12 }} {{- end }} {{- with .Values.container.startupProbe }} - startupProbe: -{{- toYaml . | nindent 14 }} + startupProbe: +{{- toYaml . | nindent 12 }} {{- end }} {{- with .Values.resources }} resources: diff --git a/deploy/charts/wildside/templates/externalsecret.yaml b/deploy/charts/wildside/templates/externalsecret.yaml new file mode 100644 index 000000000..04f99a1eb --- /dev/null +++ b/deploy/charts/wildside/templates/externalsecret.yaml @@ -0,0 +1,18 @@ +{{- if .Values.externalSecret.enabled -}} +{{- $storeRef := .Values.externalSecret.secretStoreRef -}} +apiVersion: external-secrets.io/v1beta1 +kind: ExternalSecret +metadata: + name: {{ include "wildside.fullname" . }} + labels: + {{- include "wildside.labels" . | nindent 4 }} +spec: + refreshInterval: {{ .Values.externalSecret.refreshInterval }} + secretStoreRef: + name: {{ $storeRef.name | quote }} + kind: {{ $storeRef.kind | quote }} + target: + name: {{ include "wildside.effectiveSecretName" . }} + data: + {{- toYaml .Values.externalSecret.data | nindent 4 }} +{{- end }} diff --git a/deploy/charts/wildside/templates/ingress.yaml b/deploy/charts/wildside/templates/ingress.yaml index a0650e4cf..6a0ce1eef 100644 --- a/deploy/charts/wildside/templates/ingress.yaml +++ b/deploy/charts/wildside/templates/ingress.yaml @@ -13,24 +13,36 @@ spec: {{- with .Values.ingress.className }} ingressClassName: {{ . | quote }} {{- end }} - {{- with .Values.ingress.hostname }} - {{- if $.Values.ingress.tlsSecretName }} + {{- $hosts := .Values.ingress.hosts | default list }} + {{- if and (eq (len $hosts) 0) .Values.ingress.hostname }} + {{- $hosts = list (dict "host" .Values.ingress.hostname "paths" (list (dict "path" "/" "pathType" "Prefix"))) }} + {{- end }} + {{- $tls := .Values.ingress.tls | default list }} + {{- if and (eq (len $tls) 0) .Values.ingress.tlsSecretName .Values.ingress.hostname }} + {{- $tls = list (dict "secretName" .Values.ingress.tlsSecretName "hosts" (list .Values.ingress.hostname)) }} + {{- end }} + {{- if $tls }} tls: - - hosts: - - {{ . | quote }} - secretName: {{ $.Values.ingress.tlsSecretName | quote }} + {{- toYaml $tls | nindent 4 }} {{- end }} + {{- if $hosts }} rules: - - host: {{ . | quote }} + {{- range $hostConfig := $hosts }} + - {{- with $hostConfig.host }} + host: {{ . | quote }} + {{- end }} http: paths: - - path: / - pathType: Prefix + {{- range $path := ($hostConfig.paths | default (list (dict "path" "/" "pathType" "Prefix"))) }} + - path: {{ $path.path | default "/" | quote }} + pathType: {{ $path.pathType | default "Prefix" }} backend: service: name: {{ include "wildside.fullname" $ }} port: name: {{ $.Values.service.portName | default "http" | quote }} + {{- end }} + {{- end }} {{- else }} # Leave host unset to apply the rule to all hosts rules: diff --git a/deploy/charts/wildside/templates/serviceaccount.yaml b/deploy/charts/wildside/templates/serviceaccount.yaml new file mode 100644 index 000000000..91adab2b2 --- /dev/null +++ b/deploy/charts/wildside/templates/serviceaccount.yaml @@ -0,0 +1,12 @@ +{{- if .Values.serviceAccount.create -}} +apiVersion: v1 +kind: ServiceAccount +metadata: + name: {{ include "wildside.serviceAccountName" . }} + labels: + {{- include "wildside.labels" . | nindent 4 }} + {{- with .Values.serviceAccount.annotations }} + annotations: + {{- toYaml . | nindent 4 }} + {{- end }} +{{- end }} diff --git a/deploy/charts/wildside/values.local.yaml b/deploy/charts/wildside/values.local.yaml new file mode 100644 index 000000000..25ddc7ca8 --- /dev/null +++ b/deploy/charts/wildside/values.local.yaml @@ -0,0 +1,25 @@ +image: + repository: wildside-backend + tag: local + pullPolicy: IfNotPresent + +config: + APP_ENV: "local-k8s" + +ingress: + enabled: true + className: traefik + hosts: + - host: "" + paths: + - path: / + pathType: Prefix + +allowMissingSecret: true +validateExistingSecret: false + +externalSecret: + enabled: false + +serviceAccount: + create: true diff --git a/deploy/charts/wildside/values.schema.json b/deploy/charts/wildside/values.schema.json index 5ae8c6307..31fb7a1f9 100644 --- a/deploy/charts/wildside/values.schema.json +++ b/deploy/charts/wildside/values.schema.json @@ -2,42 +2,68 @@ "$schema": "https://json-schema.org/draft-07/schema#", "type": "object", "additionalProperties": false, + "definitions": { + "probe": { + "type": "object", + "required": ["httpGet"], + "properties": { + "httpGet": { + "type": "object", + "required": ["path", "port"], + "properties": { + "path": { "type": "string", "pattern": "^/" }, + "port": { "type": ["integer", "string"] } + }, + "additionalProperties": false + }, + "initialDelaySeconds": { "type": "integer", "minimum": 0 }, + "periodSeconds": { "type": "integer", "minimum": 1 }, + "timeoutSeconds": { "type": "integer", "minimum": 1 }, + "failureThreshold": { "type": "integer", "minimum": 1 }, + "successThreshold": { "type": "integer", "minimum": 1 } + }, + "additionalProperties": false + }, + "ingressPath": { + "type": "object", + "properties": { + "path": { "type": "string", "minLength": 1 }, + "pathType": { "type": "string", "enum": ["Exact", "Prefix", "ImplementationSpecific"] } + }, + "additionalProperties": false + }, + "ingressHost": { + "type": "object", + "properties": { + "host": { "type": "string" }, + "paths": { + "type": "array", + "minItems": 1, + "items": { "$ref": "#/definitions/ingressPath" } + } + }, + "additionalProperties": false + } + }, "properties": { "replicaCount": { "type": "integer", "minimum": 1 }, "nameOverride": { "type": "string" }, "fullnameOverride": { "type": "string" }, "image": { "type": "object", + "required": ["repository"], "properties": { - "repository": { "type": "string" }, + "repository": { "type": "string", "minLength": 1 }, "tag": { "type": "string" }, - "pullPolicy": { - "type": "string", - "enum": ["Always", "IfNotPresent", "Never"] - } + "pullPolicy": { "type": "string", "enum": ["Always", "IfNotPresent", "Never"] } }, - "required": ["repository"], "additionalProperties": false }, "resources": { "type": "object", "properties": { - "requests": { - "type": "object", - "properties": { - "cpu": { "type": "string" }, - "memory": { "type": "string" } - }, - "additionalProperties": false - }, - "limits": { - "type": "object", - "properties": { - "cpu": { "type": "string" }, - "memory": { "type": "string" } - }, - "additionalProperties": false - } + "requests": { "type": "object", "additionalProperties": { "type": "string" } }, + "limits": { "type": "object", "additionalProperties": { "type": "string" } } }, "additionalProperties": false }, @@ -45,29 +71,20 @@ "type": "object", "properties": { "enabled": { "type": "boolean" }, - "minAvailable": { "type": "integer" }, - "maxUnavailable": { "type": "integer" } + "minAvailable": { "type": "integer", "minimum": 0 }, + "maxUnavailable": { "type": "integer", "minimum": 0 } }, - "allOf": [ - { - "not": { - "required": ["minAvailable", "maxUnavailable"] - } - } - ], + "allOf": [{ "not": { "required": ["minAvailable", "maxUnavailable"] } }], "additionalProperties": false }, "service": { "type": "object", "properties": { - "type": { - "type": "string", - "enum": ["ClusterIP", "NodePort", "LoadBalancer"] - }, - "portName": { "type": "string" }, + "type": { "type": "string", "enum": ["ClusterIP", "NodePort", "LoadBalancer"] }, + "portName": { "type": "string", "minLength": 1 }, "port": { "type": "integer", "minimum": 1, "maximum": 65535 }, "targetPort": { "type": ["integer", "string"] }, - "annotations": { "type": "object" } + "annotations": { "type": "object", "additionalProperties": { "type": "string" } } }, "additionalProperties": false }, @@ -75,13 +92,10 @@ "type": "object", "properties": { "enabled": { "type": "boolean" }, - "minReplicas": { "type": "integer" }, - "maxReplicas": { "type": "integer" }, - "targetCPUUtilizationPercentage": { "type": "integer" }, - "behavior": { - "type": "object", - "additionalProperties": false - } + "minReplicas": { "type": "integer", "minimum": 1 }, + "maxReplicas": { "type": "integer", "minimum": 1 }, + "targetCPUUtilizationPercentage": { "type": "integer", "minimum": 1 }, + "behavior": { "type": "object" } }, "additionalProperties": false }, @@ -90,50 +104,91 @@ "properties": { "enabled": { "type": "boolean" }, "className": { "type": "string" }, - "hostname": { - "type": "string", - "oneOf": [{ "const": "" }, { "format": "hostname", "minLength": 1 }] + "hostname": { "type": "string" }, + "hosts": { + "type": "array", + "items": { "$ref": "#/definitions/ingressHost" } }, - "tlsSecretName": { - "type": "string", - "oneOf": [{ "const": "" }, { "minLength": 1 }] + "tlsSecretName": { "type": "string" }, + "tls": { + "type": "array", + "items": { + "type": "object", + "properties": { + "secretName": { "type": "string", "minLength": 1 }, + "hosts": { "type": "array", "items": { "type": "string" } } + }, + "required": ["secretName", "hosts"], + "additionalProperties": false + } }, - "annotations": { - "type": "object", - "additionalProperties": { "type": "string" } - } + "annotations": { "type": "object", "additionalProperties": { "type": "string" } } }, - "allOf": [ - { - "if": { "properties": { "enabled": { "const": true } } }, - "then": { "required": ["hostname"] } - } - ] + "additionalProperties": false }, "config": { "type": "object", - "propertyNames": { - "type": "string", - "pattern": "^[A-Za-z0-9._-]{1,253}$" - }, - "patternProperties": { - "^[A-Za-z_][A-Za-z0-9_]*$": { - "type": ["string", "number", "boolean"] - } - }, - "additionalProperties": false + "propertyNames": { "type": "string", "pattern": "^[A-Za-z_][A-Za-z0-9_]*$" }, + "additionalProperties": { "type": ["string", "number", "boolean"] } }, "existingSecretName": { "type": "string" }, "secretEnvFromKeys": { "type": "object", - "propertyNames": { "type": "string", "pattern": "^[A-Z_][A-Z0-9_]*$" }, + "propertyNames": { "type": "string", "pattern": "^[A-Za-z_][A-Za-z0-9_]*$" }, "additionalProperties": { "type": "string", "minLength": 1 } }, "allowMissingSecret": { "type": "boolean", "default": true }, + "validateExistingSecret": { "type": "boolean", "default": false }, + "externalSecret": { + "type": "object", + "properties": { + "enabled": { "type": "boolean" }, + "refreshInterval": { "type": "string", "minLength": 1 }, + "secretStoreRef": { + "type": "object", + "properties": { + "name": { "type": "string" }, + "kind": { "type": "string", "enum": ["SecretStore", "ClusterSecretStore"] } + }, + "required": ["name", "kind"], + "additionalProperties": false + }, + "targetName": { "type": "string" }, + "data": { "type": "array", "items": { "type": "object" } } + }, + "required": ["enabled", "refreshInterval", "secretStoreRef", "data"], + "additionalProperties": false, + "if": { "properties": { "enabled": { "const": true } }, "required": ["enabled"] }, + "then": { + "properties": { + "secretStoreRef": { + "properties": { "name": { "minLength": 1 } } + }, + "data": { "minItems": 1 } + } + } + }, "affinity": { "type": "object" }, "podSecurityContext": { "type": "object" }, "securityContext": { "type": "object" }, - "container": { "type": "object" }, + "container": { + "type": "object", + "properties": { + "livenessProbe": { "$ref": "#/definitions/probe" }, + "readinessProbe": { "$ref": "#/definitions/probe" }, + "startupProbe": { "$ref": "#/definitions/probe" } + }, + "additionalProperties": false + }, + "serviceAccount": { + "type": "object", + "properties": { + "create": { "type": "boolean" }, + "name": { "type": "string" }, + "annotations": { "type": "object", "additionalProperties": { "type": "string" } } + }, + "additionalProperties": false + }, "sessionSecret": { "type": "object", "properties": { @@ -143,28 +198,8 @@ "mountPath": { "type": "string", "minLength": 1 } }, "additionalProperties": false, - "if": { - "properties": { "enabled": { "const": true } }, - "required": ["enabled"] - }, - "then": { - "required": ["name", "keyName", "mountPath"] - } - } - }, - "allOf": [ - { - "if": { - "properties": { - "secretEnvFromKeys": { "minProperties": 1 } - } - }, - "then": { - "required": ["existingSecretName"], - "properties": { - "existingSecretName": { "type": "string", "minLength": 1 } - } - } + "if": { "properties": { "enabled": { "const": true } }, "required": ["enabled"] }, + "then": { "required": ["name", "keyName", "mountPath"] } } - ] + } } diff --git a/deploy/charts/wildside/values.yaml b/deploy/charts/wildside/values.yaml index f76698831..5e34a4f81 100644 --- a/deploy/charts/wildside/values.yaml +++ b/deploy/charts/wildside/values.yaml @@ -38,7 +38,9 @@ ingress: enabled: false className: "" hostname: "" + hosts: [] tlsSecretName: "" + tls: [] annotations: {} # Non-secret configuration. Keys become environment variables via a ConfigMap. @@ -49,8 +51,20 @@ config: existingSecretName: "" # Allow rendering to proceed even if the Secret is missing; set to false to fail. allowMissingSecret: true +# Validate existingSecretName against the live cluster. Leave false for GitOps +# and offline renders. +validateExistingSecret: false secretEnvFromKeys: {} +externalSecret: + enabled: false + refreshInterval: 1h + secretStoreRef: + name: "" + kind: ClusterSecretStore + targetName: "" + data: [] + # Session signing key secret configuration for zero-downtime key rotation. # When enabled, the secret is mounted as a file at mountPath/session_key. # See docs/runbooks/session-key-rotation.md for the rotation procedure. @@ -99,8 +113,13 @@ container: failureThreshold: 3 startupProbe: httpGet: - path: /health/ready + path: /health/live port: http periodSeconds: 5 timeoutSeconds: 2 failureThreshold: 12 + +serviceAccount: + create: true + name: "" + annotations: {} diff --git a/deploy/docker/backend.Dockerfile b/deploy/docker/backend.Dockerfile index dae821e2a..80802c164 100644 --- a/deploy/docker/backend.Dockerfile +++ b/deploy/docker/backend.Dockerfile @@ -1,48 +1,52 @@ -ARG RUST_VERSION=1.79 -FROM rust:${RUST_VERSION}-alpine AS build +ARG RUST_VERSION=1.90.0 +FROM rust:${RUST_VERSION}-bookworm AS build -ARG BUILD_BASE_VERSION=0.5-r3 -ARG MUSL_DEV_VERSION=1.2.4-r3 -ARG PKGCONF_VERSION=1.9.5-r0 -ARG OPENSSL_DEV_VERSION=3.1.8-r0 - -RUN apk add --no-cache \ - build-base=${BUILD_BASE_VERSION} \ - musl-dev=${MUSL_DEV_VERSION} \ - pkgconf=${PKGCONF_VERSION} \ - openssl-dev=${OPENSSL_DEV_VERSION} && \ - rustup target add x86_64-unknown-linux-musl -ENV OPENSSL_STATIC=1 +RUN apt-get update && \ + apt-get install -y --no-install-recommends \ + ca-certificates \ + libpq-dev \ + libsqlite3-dev \ + libssl-dev \ + pkg-config \ + protobuf-compiler && \ + rm -rf /var/lib/apt/lists/* WORKDIR /app -# Cache dependencies independently from source to speed up rebuilds. -COPY Cargo.toml Cargo.lock backend/Cargo.toml ./ -RUN cargo fetch --locked --manifest-path backend/Cargo.toml -COPY backend/ backend/ -RUN cargo build --locked --release --target x86_64-unknown-linux-musl \ - --manifest-path backend/Cargo.toml +# Copy the workspace so path dependencies and git-locked dependencies resolve +# exactly as they do in local and CI builds. +COPY . . +RUN cargo fetch --locked --manifest-path backend/Cargo.toml && \ + cargo build --locked --release --bin backend --manifest-path backend/Cargo.toml -ARG ALPINE_VERSION=3.18 -FROM alpine:${ALPINE_VERSION} AS runtime +FROM debian:bookworm-slim AS runtime -ARG CURL_VERSION=8.12.1-r0 -ARG CERTS_VERSION=20241121-r1 -RUN apk add --no-cache \ - curl=${CURL_VERSION} \ - ca-certificates=${CERTS_VERSION} && \ - adduser -D -u 1000 app +ARG APP_UID=10001 +ARG APP_GID=10001 +RUN apt-get update && \ + apt-get install -y --no-install-recommends \ + ca-certificates \ + curl \ + libpq5 \ + libsqlite3-0 \ + libssl3 && \ + rm -rf /var/lib/apt/lists/* && \ + groupadd --system --gid "${APP_GID}" app && \ + useradd --system --uid "${APP_UID}" --gid "${APP_GID}" \ + --home-dir /srv --shell /usr/sbin/nologin app WORKDIR /srv -COPY --from=build --chown=1000:1000 /app/target/x86_64-unknown-linux-musl/release/backend /srv/app -USER app +COPY --from=build --chown=${APP_UID}:${APP_GID} /app/target/release/backend /srv/app +USER ${APP_UID}:${APP_GID} ARG HEALTHCHECK_PORT=8080 -ARG HEALTHCHECK_PATH=/health +ARG HEALTHCHECK_PATH=/health/live ENV HEALTHCHECK_PORT=${HEALTHCHECK_PORT} ENV HEALTHCHECK_PATH=${HEALTHCHECK_PATH} +ENV HOST=0.0.0.0 +ENV PORT=${HEALTHCHECK_PORT} +ENV RUST_LOG=info EXPOSE ${HEALTHCHECK_PORT} -ENV RUST_LOG=info # Basic liveness probe; override port or path with build args or env vars. HEALTHCHECK --interval=30s --timeout=5s --retries=3 CMD \ - curl -f http://localhost:${HEALTHCHECK_PORT}${HEALTHCHECK_PATH} || exit 1 + curl -fsS "http://127.0.0.1:${HEALTHCHECK_PORT}${HEALTHCHECK_PATH}" || exit 1 ENTRYPOINT ["/srv/app"] diff --git a/docs/backend-roadmap.md b/docs/backend-roadmap.md index 136467331..c13165e14 100644 --- a/docs/backend-roadmap.md +++ b/docs/backend-roadmap.md @@ -338,3 +338,7 @@ publish events and metrics through their ports only. Deployment automation, preview environment workflows, and operational runbooks are maintained in the Nile Valley repository. Coordinate roadmap changes with that repository when deployment or infrastructure behaviour needs updates. + +- [x] 7.1. Align Wildside with Nile Valley preview and GitOps workflows by + hardening the backend health contract, container image, Helm chart, and + repository-local k3d preview helper. diff --git a/docs/contents.md b/docs/contents.md index 6f2984a98..1802a7b44 100644 --- a/docs/contents.md +++ b/docs/contents.md @@ -13,6 +13,9 @@ developers._ - [Backend MVP architecture and observability](backend-design.md) – details monolithic backend and observability plan. _Audience: backend developers._ +- [Local k3d preview and Nile Valley integration design](local-k8s-preview-design.md) + – describes the backend health, container, Helm, and local preview contracts. + _Audience: backend developers and platform engineers._ - [Values class diagram](values-class-diagram.mmd) – Mermaid diagram of Helm chart values. _Audience: platform engineers._ @@ -92,7 +95,8 @@ - Infrastructure automation, GitOps workflows, and ephemeral preview environments are documented in the Nile Valley repository (`../../nile-valley`). This repository keeps the application code, container - images, and Helm chart that Nile Valley deploys. + images, Helm chart, and developer-local k3d preview helper that Nile Valley + deploys or exercises. ## Operational runbooks diff --git a/docs/developers-guide.md b/docs/developers-guide.md index 8d9f4dad4..8b70ab72b 100644 --- a/docs/developers-guide.md +++ b/docs/developers-guide.md @@ -25,6 +25,26 @@ All suites run through the same quality gateways: - `make lint` - `make test` +## Local Kubernetes preview + +Use the repository-local k3d preview when validating the backend image and Helm +chart before handing values to Nile Valley. The preview workflow is documented +in [Local k3d preview and Nile Valley integration +design](local-k8s-preview-design.md). + +```bash +make local-k8s-up +make local-k8s-status +make local-k8s-logs +make local-k8s-down +``` + +The Makefile targets call `uv run scripts/local_k8s.py ...`. Keep helper logic +in `scripts/local_k8s/`, unit-test pure validation behaviour under +`scripts/local_k8s/unittests/`, and keep cluster creation idempotent. The +helper must fail before making changes when required tools such as Docker, +Helm, `k3d`, or `kubectl` are missing. + ## Front-end development The Wildside Progressive Web Application (PWA) lives under `frontend-pwa/`. diff --git a/docs/execplans/backend-nile-valley-integration.md b/docs/execplans/backend-nile-valley-integration.md new file mode 100644 index 000000000..cb21cdca5 --- /dev/null +++ b/docs/execplans/backend-nile-valley-integration.md @@ -0,0 +1,807 @@ +# Integrate Wildside with Nile Valley previews + +This ExecPlan (execution plan) is a living document. The sections +`Constraints`, `Tolerances`, `Risks`, `Progress`, `Surprises & Discoveries`, +`Decision Log`, and `Outcomes & Retrospective` must be kept up to date as work +proceeds. + +Status: COMPLETE + +This plan was approved for implementation on 2026-05-21. + +## Purpose / big picture + +Wildside already has a partial deployment surface: an Actix Web runtime, +`/health/live` and `/health/ready` endpoints, a backend container image, and a +Helm chart under `deploy/charts/wildside`. The current surface is not yet fully +aligned with the Nile Valley preview and GitOps contract. The main gaps are +that health semantics live in the inbound HTTP adapter instead of a +domain-owned port, the chart lacks the full ExternalSecret and local preview +contract used by Corbusier and Nile Valley, and this repository has no local +`k3d` orchestration workflow. + +After this change, a developer can run the Wildside backend as the production +HTTP entry point, build a non-root container image with stable health probes, +render and install a Nile Valley-compatible Helm chart, and use Makefile targets +to create, inspect, tail, and tear down a local `k3d` preview environment. A +reviewer can observe success by requesting `/health/live` and `/health/ready`, +rendering the Helm chart with local and GitOps-style values, and running the +full repository gates. + +Observable success criteria: + +- `backend/src/domain` owns a health observation port and default + implementation, with no Actix Web or Kubernetes types in the domain layer. +- `backend/src/inbound/http/health.rs` maps the domain health result to + Actix Web responses for `GET /health/live` and `GET /health/ready`. +- `backend/src/main.rs` and `backend/src/server/*` keep the HTTP server as the + Wildside runtime entry point. +- `deploy/docker/backend.Dockerfile` and the repository `.dockerignore` build a + release backend image, run it as a non-root user, expose the configured HTTP + port, and probe `/health/live` or `/health/ready` consistently. +- `deploy/charts/wildside` supports configurable ingress, non-secret config, + Secret-derived environment variables, session Secret mounting, and + ExternalSecret rendering compatible with Nile Valley. +- `scripts/local_k8s.py`, `scripts/local_k8s/*`, and `make local-k8s-*` + provide a Python, Cyclopts, and `uv` driven local `k3d` preview workflow. +- `docs/local-k8s-preview-design.md`, this ExecPlan, `docs/users-guide.md`, + `docs/developers-guide.md`, `docs/wildside-backend-architecture.md`, and + `docs/backend-roadmap.md` describe the user-facing and internal contracts. +- Focused `rstest` unit tests and `rstest-bdd` behavioural tests cover healthy + and unhealthy health observations plus relevant adapter and command-line + behaviour. +- `make check-fmt`, `make lint`, and `make test` pass, with command output + retained in `/tmp` logs. + +## Constraints + +- Do not implement this plan until the user explicitly approves it. +- Preserve the hexagonal dependency rule from the `hexagonal-architecture` + skill and `docs/wildside-backend-architecture.md`: dependencies point + inward, the domain defines ports, and adapters implement or consume them. +- Keep health domain code free of Actix Web, Kubernetes, Docker, Helm, or + environment-variable types. +- Keep the inbound HTTP adapter thin: parse no business policy there, and map + only domain health observations to HTTP status codes and headers. +- Use the existing Actix Web runtime as the server entry point. Do not add a + second long-running process or sidecar-specific health server. +- Prefer extending the existing backend Dockerfile and Helm chart over adding + duplicate deployment assets, unless validation proves the existing assets are + beyond repair. +- Align with the Corbusier Nile Valley integration where it fits Wildside: + health endpoints, startup probe semantics, ExternalSecret support, + loopback-bound `k3d` ingress, Cyclopts CLI shape, and Makefile target names. +- Keep local preview automation repository-local and developer-focused. The + Nile Valley repository remains the owner of shared preview infrastructure and + GitOps automation. +- Use Makefile targets for validation. Run tests, lints, and format checks + sequentially, with `tee` writing logs under `/tmp`. +- Do not run sub-agent tests. Sub-agents may inspect and summarize, but the + coordinator owns all gate runs. +- Keep source files under 400 lines. Split new Python and Rust modules by + feature when needed. +- New Rust modules must begin with `//!` module documentation. Public Rust APIs + need Rustdoc with examples when the examples add useful information. +- Documentation uses en-GB-oxendict spelling and follows + `docs/documentation-style-guide.md`. +- Update `docs/contents.md` when new long-lived documentation is added. +- Use `coderabbit review --agent` after each major implementation milestone and + resolve concerns before moving to the next milestone. +- Commit frequently after gated, atomic changes. Use file-based commit messages + through `git commit -F`, never `git commit -m`. + +## Tolerances + +- Scope tolerance: if implementation requires replacing Actix Web, splitting + the backend into multiple deployable services, or changing public API paths + beyond `/health/live` and `/health/ready`, stop and request approval. +- Architecture tolerance: if a domain module needs to import Actix Web, + Kubernetes, Helm, Docker, `plumbum`, or other infrastructure types, stop and + redesign the port boundary before continuing. +- Chart tolerance: if the Wildside chart cannot be made compatible with Nile + Valley without breaking existing `deploy/docker-compose.yml` or documented + chart values, stop and record options in the Decision Log. +- Local preview tolerance: if a full local preview needs cluster-admin actions + outside `k3d`, `kubectl`, Helm, CloudNativePG, or Valkey operator + installation, stop and ask whether that belongs in Nile Valley instead. +- Dependency tolerance: adding `cyclopts` and `plumbum` as inline `uv` script + dependencies is expected. Adding more than two new Rust production + dependencies or more than four Python dependencies requires explicit + approval. +- Test tolerance: if `rstest-bdd` cannot reasonably exercise a network or CLI + boundary without excessive runtime, add a narrower behavioural test and + document the reason. Do not add superficial BDD scenarios. +- Proof tolerance: no Verus, Kani, or proptest work is expected for this + feature because the planned logic is configuration, boundary mapping, and + orchestration rather than a broad state-space invariant. If implementation + introduces non-trivial port-allocation, state-transition, or retry invariants, + revisit this decision. +- Gate tolerance: after three repair loops on the same gate failure, stop, + record the failing command and log path, and ask for direction. +- Environment tolerance: if `k3d`, Docker, `kubectl`, Helm, `coderabbit`, or + local networking are unavailable, document the blocker and validate the + nearest render or unit-test substitute instead of faking success. + +## Risks + +- Risk: Wildside already has health endpoints, but their semantics are owned by + `backend/src/inbound/http/health.rs`. + Severity: medium. + Likelihood: high. + Mitigation: move policy into a domain-owned health module and keep the HTTP + adapter as response mapping only. + +- Risk: the current backend Dockerfile probes `/health`, while the server and + chart use `/health/live` and `/health/ready`. + Severity: high. + Likelihood: high. + Mitigation: standardize probes on `/health/live` for liveness/startup and + `/health/ready` for readiness, matching the Corbusier follow-up decision. + +- Risk: the Wildside chart currently resembles Nile Valley's generic + `example-app` chart more than Corbusier's hardened chart. + Severity: medium. + Likelihood: high. + Mitigation: port only the chart contract that Nile Valley expects: + ExternalSecret, schema validation, Secret lookup controls, service account, + ingress hosts, and stable probe schema. + +- Risk: local `k3d` orchestration may overlap with Nile Valley ownership. + Severity: medium. + Likelihood: medium. + Mitigation: keep the repository-local workflow as a developer preview that + builds Wildside's image and installs Wildside's chart, while documenting that + multi-application GitOps automation remains in Nile Valley. + +- Risk: a full local preview may be slow or flaky because it depends on Docker, + `k3d`, Kubernetes controllers, CloudNativePG, and Valkey. + Severity: medium. + Likelihood: medium. + Mitigation: unit-test helper parsing and validation logic, use bounded waits, + surface clear preflight errors, and reserve end-to-end preview execution for + explicit local validation. + +- Risk: documentation requirements mention `docs/users-guide.md`, but this + repository currently uses component-specific user guides instead of that + global file. + Severity: low. + Likelihood: high. + Mitigation: create `docs/users-guide.md` as the user-facing entry point for + server and preview behaviour, and link it from `docs/contents.md`. + +## Relevant skills and documentation + +Use these skills during implementation: + +- `leta`: navigate code symbols, references, call hierarchy, and refactors. +- `rust-router`: route Rust design issues to smaller Rust skills as needed. +- `hexagonal-architecture`: keep health policy in the domain and transport + concerns in adapters. +- `execplans`: keep this plan current throughout implementation. +- `firecrawl-mcp`: refresh external prior art only when local context is + insufficient or external conventions may have changed. +- `commit-message`: commit with a file-based message after gated changes. + +Signpost these repository documents while working: + +- `AGENTS.md` +- `docs/wildside-backend-architecture.md` +- `docs/rust-testing-with-rstest-fixtures.md` +- `docs/rstest-bdd-users-guide.md` +- `docs/rust-doctest-dry-guide.md` +- `docs/complexity-antipatterns-and-refactoring-strategies.md` +- `docs/pg-embed-setup-unpriv-users-guide.md` +- `docs/documentation-style-guide.md` +- `docs/developers-guide.md` +- `docs/backend-roadmap.md` +- `docs/execplans/remove-ephemeral-previews.md` +- `docs/repository-structure.md` + +Planning used a Wyvern agent team and context pack +`wildside-nile-valley-planning` (`pk_tz64s5e4`) to share code references for +the existing health adapter, server bootstrap, and Helm values. + +## Prior art and external references + +Firecrawl was used to resolve gaps around Corbusier and Nile Valley. The +implementation should keep these references nearby but must still adapt them +to Wildside's existing structure. + +- Corbusier Makefile: + +- Corbusier local preview CLI: + +- Corbusier local preview orchestration: + +- Corbusier local preview config: + +- Corbusier chart values and ExternalSecret template: + + and + +- Nile Valley repository overview: + +- Nile Valley example chart values: + + +The Corbusier implementation records several useful decisions: use +`/health/live` for startup probes, keep local ingress bound to `127.0.0.1`, +retry `k3d` cluster creation on loopback port collisions, make Helm Secret +lookup opt-in for offline renders, and let ExternalSecret provide the effective +Secret name when `existingSecretName` is unset. + +## Current repository orientation + +The existing runtime and deployment surface is spread across these files: + +- `backend/src/main.rs` wires runtime configuration and starts the backend. +- `backend/src/server/mod.rs` builds the Actix Web application and marks the + current health state ready after binding the server. +- `backend/src/server/config.rs` and + `backend/src/server/state_builders.rs` own runtime configuration and service + composition. +- `backend/src/inbound/http/health.rs` currently owns both health state and + Actix Web endpoint mapping. +- `backend/src/doc.rs` registers health endpoints in OpenAPI. +- `deploy/docker/backend.Dockerfile` builds and runs the backend image. +- `deploy/docker-compose.yml` uses the backend health endpoint locally. +- `deploy/charts/wildside/*` contains the Helm chart, values, templates, and + schema. +- `Makefile` defines the quality gates and currently lacks `local-k8s-*` + targets. + +The implementation should first verify these files rather than assuming they +are absent. + +## Implementation plan + +### Milestone 0: Baseline and plan approval + +Do not edit implementation files in this milestone. Confirm the branch, check +for a clean or understood worktree, and keep this ExecPlan in `DRAFT` until the +user approves it. + +Run: + +```bash +git branch --show-current +git status --short +leta workspace info +``` + +Expected result: the branch is not `main`, no unrelated dirty changes are +mistaken for implementation work, and Leta reports this worktree as the active +workspace. + +Once the plan is approved, change `Status` to `IN PROGRESS`, record the +approval in `Progress`, and continue. + +### Milestone 1: Establish failing health tests + +Add focused tests before changing production health code. + +Create or extend tests so they prove: + +- a default health observer starts live but not ready; +- marking readiness makes readiness report healthy; +- marking liveness unhealthy makes liveness report unhealthy; +- the HTTP readiness route returns `200` only for ready observations; +- the HTTP liveness route returns `200` for live observations and `503` for + unhealthy observations; +- all health probe responses include `Cache-Control: no-store`. + +Use `rstest` for domain and adapter unit tests. Add `rstest-bdd` only where it +captures externally observable behaviour more clearly than a direct Actix test, +for example a feature describing probe responses across ready and not-ready +states. + +Expected initial result: at least one new test fails because health semantics +are not yet domain-owned. + +### Milestone 2: Move health semantics behind a domain port + +Introduce a domain health module and port without leaking infrastructure: + +- Add `backend/src/domain/health.rs` with health status/value types such as + `HealthStatus`, `HealthObservation`, and a default implementation suitable + for process readiness and liveness. +- Add or extend `backend/src/domain/ports/*` with a `HealthObserver` or + equivalent trait that returns domain-owned observations. +- Export the module through `backend/src/domain/mod.rs` and + `backend/src/domain/ports/mod.rs`. +- Refactor `backend/src/inbound/http/health.rs` so Actix Web receives an + injected health observer/state and maps it to HTTP only. +- Update `backend/src/server/mod.rs` and related wiring so readiness is marked + after the server is constructed, as today, but through the domain-owned + abstraction. +- Preserve OpenAPI registration in `backend/src/doc.rs`. + +The domain module may use standard library synchronization primitives if the +runtime needs shared mutable process state. It must not know about HTTP, +Kubernetes probes, or container health checks. + +Run the targeted health tests. Then run: + +```bash +action=health-unit +branch="$(git branch --show-current | tr '/ ' '--')" +project="$(basename "$(git rev-parse --show-toplevel)")" +cargo test --manifest-path backend/Cargo.toml health 2>&1 | tee "/tmp/${action}-${project}-${branch}.out" +``` + +Commit this milestone only after the targeted tests pass. Run +`coderabbit review --agent` and resolve concerns before continuing. + +### Milestone 3: Harden the container image + +Audit and update the container build surface: + +- Add a repository `.dockerignore` if one is absent. +- Update `deploy/docker/backend.Dockerfile` to use a current Rust toolchain + compatible with this repository. +- Keep the image multi-stage. +- Build the release backend binary with locked dependencies. +- Run as a non-root user with a stable numeric UID/GID. +- Install only runtime packages required by the final binary and health probe. +- Expose the server port, defaulting to `8080`. +- Set `RUST_LOG=info`. +- Configure the Docker health check to probe `/health/live` by default. +- Ensure the health path and port agree with Helm and docker-compose. + +Validate with a local image build when Docker is available: + +```bash +action=docker-build +branch="$(git branch --show-current | tr '/ ' '--')" +project="$(basename "$(git rev-parse --show-toplevel)")" +docker build -f deploy/docker/backend.Dockerfile -t wildside-backend:local . 2>&1 | tee "/tmp/${action}-${project}-${branch}.out" +``` + +If Docker is unavailable, record the preflight failure and still run any +available Dockerfile lint or static validation. Commit the container changes +only after validation. Run `coderabbit review --agent` and resolve concerns. + +### Milestone 4: Align the Helm chart with Nile Valley + +Harden `deploy/charts/wildside` using Corbusier and Nile Valley as prior art. + +Required chart work: + +- Add or complete `templates/externalsecret.yaml`. +- Add `externalSecret.enabled`, `refreshInterval`, `secretStoreRef`, + `targetName`, and `data` values. +- Add `validateExistingSecret` so offline renders do not require a live + cluster lookup. +- Resolve the effective Secret name from `existingSecretName` or the + ExternalSecret target default. +- Keep `allowMissingSecret` behaviour explicit and schema-validated. +- Add or verify service account support if Nile Valley values expect it. +- Support ingress hosts, paths, annotations, class name, and TLS in a way that + covers both hostless local ingress and GitOps overlays. +- Use `/health/live` for liveness and startup probes, and `/health/ready` for + readiness. +- Make config checksum annotations conditional so empty config renders cleanly. +- Keep Pod Security Context and container security context non-root and + restricted. +- Update `values.schema.json` to reject malformed probe, Secret, ingress, and + ExternalSecret values. +- Add `values.local.yaml` if the local preview workflow needs local image and + hostless ingress overrides. + +Validate with Helm: + +```bash +action=helm-template +branch="$(git branch --show-current | tr '/ ' '--')" +project="$(basename "$(git rev-parse --show-toplevel)")" +helm lint deploy/charts/wildside 2>&1 | tee "/tmp/${action}-${project}-${branch}.out" +helm template wildside deploy/charts/wildside 2>&1 | tee -a "/tmp/${action}-${project}-${branch}.out" +``` + +If a local values file is added, also render it: + +```bash +helm template wildside deploy/charts/wildside \ + --values deploy/charts/wildside/values.local.yaml \ + 2>&1 | tee -a "/tmp/${action}-${project}-${branch}.out" +``` + +Commit chart changes after the render checks pass. Run +`coderabbit review --agent` and resolve concerns. + +### Milestone 5: Add local `k3d` orchestration + +Add the developer preview workflow modelled on Corbusier but adapted to +Wildside's existing deployment assets. + +Create: + +- `scripts/local_k8s.py` as the Cyclopts CLI entry point using inline `uv` + metadata. +- `scripts/local_k8s/config.py` for default names and paths. +- `scripts/local_k8s/validation.py` for executable checks, port validation, and + local errors. +- `scripts/local_k8s/k3d.py` for cluster creation, deletion, image import, + kubeconfig environment, and loopback ingress-port discovery. +- `scripts/local_k8s/k8s.py` for namespace and Kubernetes helper operations. +- `scripts/local_k8s/deployment.py` for Docker build, Secret creation, Helm + install or upgrade, status, and logs. +- Optional `scripts/local_k8s/cnpg.py` and `scripts/local_k8s/valkey.py` if the + preview provisions PostgreSQL and Valkey locally rather than relying on + caller-provided URLs. +- Unit tests under the existing Python test layout, or a new `tests/` layout if + this repository has no Python script tests yet. + +The CLI must provide: + +- `up` +- `down` +- `status` +- `logs` + +The CLI should accept environment-variable overrides with a `WILDSIDE_` prefix +for cluster name, namespace, and ingress port. It should expose a `--skip-build` +option for `up` and `--follow` for `logs`. + +Add Makefile targets: + +- `local-k8s-up` +- `local-k8s-down` +- `local-k8s-status` +- `local-k8s-logs` + +The targets should call `uv run scripts/local_k8s.py ...`. The helper must +print clear preflight errors when `k3d`, `kubectl`, Helm, Docker, or required +controllers are unavailable. + +Validate the CLI without creating a cluster first: + +```bash +action=local-k8s-help +branch="$(git branch --show-current | tr '/ ' '--')" +project="$(basename "$(git rev-parse --show-toplevel)")" +uv run scripts/local_k8s.py --help 2>&1 | tee "/tmp/${action}-${project}-${branch}.out" +uv run scripts/local_k8s.py up --help 2>&1 | tee -a "/tmp/${action}-${project}-${branch}.out" +make local-k8s-status 2>&1 | tee -a "/tmp/${action}-${project}-${branch}.out" +``` + +When local tooling is available, validate the full preview: + +```bash +action=local-k8s-preview +branch="$(git branch --show-current | tr '/ ' '--')" +project="$(basename "$(git rev-parse --show-toplevel)")" +make local-k8s-up 2>&1 | tee "/tmp/${action}-${project}-${branch}.out" +make local-k8s-status 2>&1 | tee -a "/tmp/${action}-${project}-${branch}.out" +curl -fsS "http://127.0.0.1:${WILDSIDE_K3D_PORT:-}/health/live" +make local-k8s-down 2>&1 | tee -a "/tmp/${action}-${project}-${branch}.out" +``` + +Replace `` with the port printed by the CLI if no explicit port +was supplied. If a preview fails because local infrastructure is unavailable, +record the precise blocker and keep the non-cluster tests green. + +Commit local orchestration changes after validation. Run +`coderabbit review --agent` and resolve concerns. + +### Milestone 6: Documentation and roadmap + +Add and update documentation while the implementation details are fresh: + +- Add `docs/local-k8s-preview-design.md` covering the architecture of the + local preview workflow, Nile Valley boundaries, container contract, Helm + values, and expected operator dependencies. +- Add or update `docs/users-guide.md` with user-facing server behaviour: + health endpoints, container defaults, Helm deployment values, and local + preview commands. +- Update `docs/developers-guide.md` with internal conventions for health + ports, chart validation, and local preview helper maintenance. +- Update `docs/wildside-backend-architecture.md` to document the domain health + observation port and the inbound HTTP adapter mapping. +- Update `docs/repository-structure.md` if new script and chart files change + the repository layout. +- Update `docs/contents.md` with any new long-lived documents. +- Add an Architectural Decision Record only if implementation makes a + long-lived architectural choice not adequately captured by the design doc. + If needed, use the next `docs/adr-NNN-*.md` number after + `docs/adr-001-websockets-on-actix-ws.md`. +- Update `docs/backend-roadmap.md` by adding a deployment coordination task + under section 7 if no suitable task exists. Mark it done only after all + gates pass. + +Run documentation validation: + +```bash +action=docs +branch="$(git branch --show-current | tr '/ ' '--')" +project="$(basename "$(git rev-parse --show-toplevel)")" +make fmt 2>&1 | tee "/tmp/${action}-${project}-${branch}.out" +make markdownlint 2>&1 | tee -a "/tmp/${action}-${project}-${branch}.out" +make nixie 2>&1 | tee -a "/tmp/${action}-${project}-${branch}.out" +``` + +Commit documentation changes after validation. Run +`coderabbit review --agent` and resolve concerns. + +### Milestone 7: Full gates and closeout + +Run the full repository gates sequentially: + +```bash +branch="$(git branch --show-current | tr '/ ' '--')" +project="$(basename "$(git rev-parse --show-toplevel)")" + +make check-fmt 2>&1 | tee "/tmp/check-fmt-${project}-${branch}.out" +make lint 2>&1 | tee "/tmp/lint-${project}-${branch}.out" +make test 2>&1 | tee "/tmp/test-${project}-${branch}.out" +``` + +If all gates pass, update this ExecPlan: + +- set `Status: COMPLETE`; +- complete the relevant `Progress` entries; +- record final gate evidence and log paths; +- fill in `Outcomes & Retrospective`; +- ensure the relevant roadmap entry is marked done. + +Commit the closeout documentation. Run a final `coderabbit review --agent` and +resolve concerns before declaring the work complete. + +## Test strategy + +Domain tests should cover health status values and transitions without Actix, +networking, Docker, or Kubernetes. Use `rstest` fixtures for repeated initial +state and status cases. + +Adapter tests should use Actix Web test utilities to exercise the health routes +with injected domain health observers. These tests prove status-code and header +mapping without binding a real socket. + +Behavioural tests with `rstest-bdd` should be used where they describe +externally observable contracts: readiness before and after startup, liveness +after unhealthy marking, and local preview CLI preflight behaviour if the CLI +surface is easier to understand as Given/When/Then scenarios. + +Python helper tests should cover pure parsing and validation: port bounds, +`k3d` JSON shape handling, loopback ingress extraction, command construction, +and missing-tool errors. Avoid requiring a live cluster for unit tests. + +End-to-end validation should run the local preview only when Docker, `k3d`, +`kubectl`, Helm, and local ports are available. The E2E proof is the rendered +or installed chart plus successful HTTP requests to the health endpoints. + +No property tests, Kani harnesses, or Verus proofs are planned initially. If +implementation introduces non-trivial invariants over retry sequences, port +selection, or state transitions, add a Decision Log entry and expand the test +strategy before continuing. + +## Progress + +- [x] 2026-05-21: Loaded `leta`, `rust-router`, `hexagonal-architecture`, + `execplans`, `firecrawl-mcp`, and `commit-message` skills for planning. +- [x] 2026-05-21: Created a Leta workspace for this worktree and confirmed + `leta workspace info` resolves it. +- [x] 2026-05-21: Used a Wyvern agent team to inspect runtime structure, + testing constraints, roadmap conventions, and documentation touchpoints. +- [x] 2026-05-21: Created context pack + `wildside-nile-valley-planning` (`pk_tz64s5e4`) for agent-team code + references. +- [x] 2026-05-21: Used Firecrawl to inspect Corbusier and Nile Valley prior + art for Makefile targets, local `k3d` orchestration, chart values, + ExternalSecret support, and Nile Valley chart expectations. +- [x] 2026-05-21: Drafted this ExecPlan for approval. +- [x] 2026-05-21: Ran `make fmt`, `make markdownlint`, and + `make check-fmt` against the draft plan. +- [x] 2026-05-21: Attempted `coderabbit review --agent` twice for the planning + milestone; both attempts were blocked by the external CodeRabbit usage rate + limit before any review findings were returned. +- [x] 2026-05-21: Received explicit user approval to proceed with + implementation as set out in this ExecPlan. +- [x] 2026-05-21: Recorded approval and set `Status: IN PROGRESS`. +- [x] 2026-05-21: Established health unit and BDD coverage for domain state, + Actix probe mapping, unhealthy liveness, unready readiness, and + `Cache-Control: no-store`. +- [x] 2026-05-21: Moved health semantics into a domain-owned + `ProcessHealth` implementation and `HealthObserver` port, keeping the Actix + adapter as HTTP response mapping. +- [x] 2026-05-21: Ran health milestone gates successfully: + `make check-fmt`, `make lint`, and `make test`. +- [x] 2026-05-21: Committed the health milestone as + `4e3083f Move health observation into domain`. +- [x] 2026-05-21: Ran `coderabbit review --agent` for the health milestone; + CodeRabbit completed with zero findings. +- [x] 2026-05-21: Hardened the backend container image definition by moving + to an edition-2024-capable Rust builder, a Debian slim runtime, a non-root + UID/GID, explicit runtime libraries, `HOST`/`PORT` defaults, and + `/health/live` as the image liveness check. +- [x] 2026-05-21: Added a root `.dockerignore` so local build artefacts, + VCS metadata, frontend output, and dependency trees are excluded from image + contexts. +- [x] 2026-05-21: Ran container milestone gates successfully: + `make check-fmt`, `make lint`, and `make test`; the Docker image build + remains blocked locally because Docker is not installed in this environment. +- [x] 2026-05-21: Committed the container milestone as + `4b41354 Harden backend container image`. +- [x] 2026-05-21: Ran `coderabbit review --agent` for the container + milestone; CodeRabbit completed with zero findings. +- [x] 2026-05-21: Aligned the Helm chart with Nile Valley conventions: + ExternalSecret rendering, effective Secret name resolution, optional live + Secret validation, service account support, hostless/local ingress values, + schema validation, and `/health/live` startup probes. +- [x] 2026-05-21: Validated the Helm milestone with `helm lint`, + `helm template --kube-version 1.31.0`, local values rendering, and an + ExternalSecret render in `/tmp/helm-template-wildside-backend-nile-valley-integration.out`. +- [x] 2026-05-21: Committed the Helm milestone as + `66cf831 Align Helm chart with Nile Valley`. +- [x] 2026-05-21: Ran `coderabbit review --agent` for the Helm milestone; + CodeRabbit completed with zero findings. +- [x] 2026-05-21: Added local `k3d` orchestration scaffolding with a + Cyclopts CLI, Makefile targets, configuration, validation, k3d, + Kubernetes, and Helm deployment helpers. +- [x] 2026-05-21: Validated the local preview CLI help and pure Python + validation tests; `make local-k8s-status` now reports the expected local + blocker because `k3d` and `kubectl` are not installed in this environment. +- [x] 2026-05-21: Ran local preview milestone gates successfully: + `make check-fmt`, `make lint`, and `make test`. +- [x] 2026-05-21: Committed the local preview milestone as + `5aaf44f Add local k3d preview workflow`. +- [x] 2026-05-21: Ran `coderabbit review --agent` for the local preview + milestone; CodeRabbit completed with zero findings. +- [x] 2026-05-21: Updated design, user, developer, architecture, contents, + repository-structure, and roadmap docs for the local preview and Nile Valley + integration contracts. +- [x] 2026-05-21: Ran documentation validation successfully: `make fmt`, + `make markdownlint`, and `make nixie`. +- [x] 2026-05-21: Committed the documentation milestone as + `fea1a1d Document Nile Valley preview integration`. +- [x] 2026-05-21: Ran `coderabbit review --agent` for the documentation + milestone; CodeRabbit completed with zero findings. +- [x] 2026-05-21: Ran final gates successfully: `make check-fmt`, + `make lint`, and `make test`. +- [x] 2026-05-21: Closed this ExecPlan after all implementation, validation, + documentation, roadmap, and review requirements were satisfied except for + environment-blocked Docker/k3d execution. + +## Surprises & discoveries + +- Wildside already has `backend/src/inbound/http/health.rs` with + `/health/live` and `/health/ready`, so the implementation is a refactor and + hardening task rather than a fresh endpoint addition. +- Wildside already has `deploy/docker/backend.Dockerfile` and + `deploy/charts/wildside`, but the Docker health check currently targets + `/health` and the chart is missing Corbusier-style ExternalSecret support. +- No repository-local `k3d` preview workflow exists today. Existing docs say + preview infrastructure ownership moved to Nile Valley, so the new local + workflow must be documented as developer preview tooling rather than shared + GitOps ownership. +- The repository does not currently contain `docs/users-guide.md`, despite the + requested update. Creating it is part of this plan. +- `docs/contents.md` may contain stale references; documentation updates should + reconcile new links carefully without broad housekeeping. +- `coderabbit review --agent` can return exit code 0 while reporting a + recoverable rate-limit error in its JSON output. The planning milestone could + not obtain CodeRabbit findings after two attempts on 2026-05-21. +- `rstest-bdd` async scenarios need an explicit async test runtime. The health + probe BDD test uses `#[tokio::test(flavor = "current_thread")]` so async + Actix route tests do not attempt to start a nested runtime. +- Docker is not installed in this environment. The container milestone cannot + run + `docker build -f deploy/docker/backend.Dockerfile -t wildside-backend:local .` + here; validation must rely on static review and the repository gates until a + Docker-enabled host runs the image build. +- This Helm binary defaults `helm template` capabilities to Kubernetes v1.20, + while the chart declares `kubeVersion: >=1.26.0-0 <1.32.0-0`; Helm renders + need `--kube-version 1.31.0` in this environment. +- Local preview cluster validation cannot create or inspect a cluster in this + environment because `k3d` and `kubectl` are not installed. The CLI preflight + now reports that blocker concisely. +- `make nixie` runs `bun install`, which attempted to refresh `bun.lock` for + the unrelated `ip-address` override. That lockfile change was excluded from + the documentation milestone. + +## Decision Log + +- 2026-05-21: Treat this as a hardening and alignment project, not a greenfield + add. + Rationale: the current repository already contains an Actix runtime, health + routes, Dockerfile, and Helm chart. Replacing them wholesale would increase + risk and violate the local preference to extend existing patterns. + +- 2026-05-21: Use `/health/live` for liveness and startup probes and + `/health/ready` for readiness. + Rationale: this matches Kubernetes probe semantics and the Corbusier follow-up + decision discovered via Firecrawl. + +- 2026-05-21: Create a domain-owned health observation port before changing the + HTTP adapter. + Rationale: the requested architecture uses hexagonal boundaries, and health + policy currently lives in the inbound adapter. + +- 2026-05-21: Plan to create `docs/users-guide.md`. + Rationale: the user explicitly requested that file, and the repository does + not currently have a global users guide covering server and deployment + behaviour. + +- 2026-05-21: Do not plan Verus, Kani, or proptest work for the initial scope. + Rationale: the planned changes are boundary mapping and orchestration, not + a broad algorithmic state space. This decision must be revisited if the + implementation introduces retry or state invariants that merit stronger + verification. + +- 2026-05-21: Begin implementation with the domain health port and HTTP + adapter tests. + Rationale: this is the architecture-bearing change. Container, Helm, and + local preview work should depend on stable health semantics rather than the + current adapter-owned state. + +- 2026-05-21: Keep `backend::inbound::http::health::HealthState` as a type + alias for `backend::domain::ProcessHealth` during the refactor. + Rationale: existing server wiring and callers can keep their current import + path while the actual health semantics and state live in the domain layer. + +- 2026-05-21: Use a Debian slim runtime image rather than continuing the + Alpine musl image. + Rationale: the backend depends on PostgreSQL, OpenSSL, and SQLite-linked + crates. A glibc runtime with explicit `libpq5`, `libssl3`, and + `libsqlite3-0` packages keeps the container build simpler and avoids the + brittle exact Alpine package pins that were already stale. + +- 2026-05-21: Keep live Secret lookup validation opt-in through + `validateExistingSecret`. + Rationale: GitOps, CI, and local preview renders must work without access to + the target cluster, while operators can still request a live lookup when + installing against a cluster that already contains the Secret. + +## Outcomes & Retrospective + +Implemented. + +Wildside now has a domain-owned health observation model and port, with Actix +Web probe handlers mapping that policy to `/health/live` and `/health/ready`. +The backend container image uses a multi-stage build, a Debian slim non-root +runtime, explicit runtime libraries, stable `HOST`/`PORT` defaults, and a +`/health/live` image health check. + +The Helm chart now supports Nile Valley-oriented deployment concerns: +ExternalSecret rendering, effective Secret name resolution, optional live +Secret validation, service account configuration, local and host-based ingress +forms, schema validation, and Kubernetes probes aligned with the runtime health +contract. + +The repository now provides `make local-k8s-up`, `make local-k8s-status`, +`make local-k8s-logs`, and `make local-k8s-down` targets backed by a +Cyclopts/`uv` Python helper. The helper preflights Docker, Helm, `k3d`, and +`kubectl`; builds and imports the local backend image; and installs the chart +with `values.local.yaml` when the required tools are present. + +Documentation now covers user-facing server and preview behaviour in +`docs/users-guide.md`, the local preview and Nile Valley design in +`docs/local-k8s-preview-design.md`, and internal conventions in the developer, +architecture, repository-structure, contents, and roadmap documents. The +backend roadmap entry for Nile Valley preview and GitOps alignment is marked +done. + +Validation completed: + +- `make check-fmt` +- `make lint` +- `make test` +- `make fmt` +- `make markdownlint` +- `make nixie` +- `helm lint deploy/charts/wildside` +- `helm template deploy/charts/wildside --kube-version 1.31.0` +- local values and ExternalSecret Helm render checks +- local preview CLI help and Python unit tests +- CodeRabbit reviews for health, container, Helm, local preview, and + documentation milestones, all with zero findings + +Residual environment gaps: + +- Docker is not installed in this environment, so the backend image build could + not be executed here. +- `k3d` and `kubectl` are not installed in this environment, so the full local + preview cluster lifecycle could not be executed here. + +Both gaps are covered by preflight checks and documented as local environment +requirements. diff --git a/docs/local-k8s-preview-design.md b/docs/local-k8s-preview-design.md new file mode 100644 index 000000000..33ed02a3b --- /dev/null +++ b/docs/local-k8s-preview-design.md @@ -0,0 +1,109 @@ +# Local k3d preview and Nile Valley integration design + +## Purpose + +Wildside ships the application artefacts that Nile Valley preview and GitOps +workflows consume: the backend container image, Helm chart, and health contract. +Nile Valley remains responsible for shared cluster automation, environment +overlays, and cross-application GitOps reconciliation. This repository owns a +developer-focused local preview loop that proves the Wildside chart can install +into a small `k3d` cluster. + +## Runtime health contract + +The backend binary is the production runtime entry point. It starts the Actix +Web server and exposes two unauthenticated health endpoints: + +- `GET /health/live` reports process liveness. +- `GET /health/ready` reports whether startup completed and the service is + ready for traffic. + +Health semantics live in the domain layer through `ProcessHealth` and the +`HealthObserver` port. The HTTP adapter only maps domain observations to HTTP +status codes, cache headers, and a small JSON health envelope. This keeps +Kubernetes, Actix Web, and other transport concerns outside the domain. + +## Container image contract + +The backend image is built by `deploy/docker/backend.Dockerfile`. It uses a +multi-stage Rust build and a Debian slim runtime with explicit runtime +libraries. The runtime process runs as a non-root user and defaults to +`HOST=0.0.0.0` and `PORT=8080`. + +The image health check probes `/health/live` on the configured port. Kubernetes +readiness remains a chart concern and probes `/health/ready`. + +## Helm chart contract + +The Wildside chart under `deploy/charts/wildside` is the deployment interface +consumed by Nile Valley. It renders: + +- a Deployment and Service for the backend; +- a ConfigMap for non-secret environment values; +- Secret-derived environment variables through `secretEnvFromKeys`; +- optional `ExternalSecret` resources for external-secrets operators; +- optional service accounts, ingress, autoscaling, and disruption budgets. + +Secret validation is opt-in through `validateExistingSecret` so GitOps and +offline `helm template` runs do not require live cluster access. When +`externalSecret.enabled` is true and `existingSecretName` is unset, the +ExternalSecret target name becomes the effective Secret name used by the +Deployment. + +Use `deploy/charts/wildside/values.local.yaml` for the local preview. Nile +Valley should provide environment-specific values in its GitOps overlays. + +## Local preview workflow + +The local preview CLI is `scripts/local_k8s.py`. It uses `uv` inline script +metadata and a Cyclopts command surface: + +```bash +make local-k8s-up +make local-k8s-status +make local-k8s-logs +make local-k8s-down +``` + +`make local-k8s-up` validates required tools, creates or reuses the `k3d` +cluster, builds the backend image, imports it into the cluster, and installs or +upgrades the Helm release with the local values file. The ingress load balancer +is bound to `127.0.0.1` to avoid exposing the preview outside the developer +machine. + +The workflow expects these executables on `PATH`: + +- `docker` +- `helm` +- `k3d` +- `kubectl` +- `uv` + +Configuration can be overridden with environment variables: + +| Variable | Default | Purpose | +| --------------------------- | ------------------------ | ---------------------- | +| `WILDSIDE_K3D_CLUSTER` | `wildside-preview` | k3d cluster name. | +| `WILDSIDE_K3D_PORT` | `8088` | Loopback ingress port. | +| `WILDSIDE_K8S_NAMESPACE` | `wildside` | Kubernetes namespace. | +| `WILDSIDE_HELM_RELEASE` | `wildside` | Helm release name. | +| `WILDSIDE_IMAGE` | `wildside-backend:local` | Local image reference. | + +`WILDSIDE_IMAGE` must include a tag because the Helm chart receives repository +and tag as separate values. + +## Validation + +The local preview helper has unit coverage for preflight validation and image +reference parsing. Full end-to-end preview validation requires Docker, `k3d`, +`kubectl`, Helm, and an available loopback port. If those tools are absent, +the CLI must fail early with a clear missing-executable message rather than +partially creating infrastructure. + +Repository-wide validation remains: + +```bash +make check-fmt +make lint +make test +``` diff --git a/docs/repository-structure.md b/docs/repository-structure.md index dfad30fe7..8b4c71f2a 100644 --- a/docs/repository-structure.md +++ b/docs/repository-structure.md @@ -485,6 +485,13 @@ Deployment automation for clusters and preview environments lives in the Nile Valley repository. Refer to its documentation for GitOps workflows and infrastructure state handling. +This repository also provides a developer-local k3d preview helper: +`scripts/local_k8s.py` and `scripts/local_k8s/*`. The helper builds the +backend image, imports it into a local `k3d` cluster, and installs the Wildside +chart with `deploy/charts/wildside/values.local.yaml`. It validates the +application packaging contract; Nile Valley remains the owner of shared +cluster and GitOps automation. + ______________________________________________________________________ ## 8) Local DX (Makefile targets) diff --git a/docs/users-guide.md b/docs/users-guide.md new file mode 100644 index 000000000..34fbb2d71 --- /dev/null +++ b/docs/users-guide.md @@ -0,0 +1,47 @@ +# Wildside users' guide + +## Backend health endpoints + +The backend exposes unauthenticated health probes for operators and local +preview tooling: + +- `GET /health/live` returns `200 OK` while the process is live. +- `GET /health/ready` returns `200 OK` after startup completes and `503 + Service Unavailable` while the process is not ready. + +Both endpoints send `Cache-Control: no-store` and a JSON body with a top-level +`status` field (`pass` or `fail`) plus a `checks` object keyed by `liveness` or +`readiness`. + +## Local k3d preview + +Developers can run a local Kubernetes preview when Docker, Helm, `k3d`, +`kubectl`, and `uv` are installed: + +```bash +make local-k8s-up +make local-k8s-status +make local-k8s-logs +make local-k8s-down +``` + +The default preview is reachable through loopback ingress at +`http://127.0.0.1:8088`. Override the port with `WILDSIDE_K3D_PORT` if that +port is already in use. + +Useful overrides: + +| Variable | Default | +| --------------------------- | ------------------------ | +| `WILDSIDE_K3D_CLUSTER` | `wildside-preview` | +| `WILDSIDE_K3D_PORT` | `8088` | +| `WILDSIDE_K8S_NAMESPACE` | `wildside` | +| `WILDSIDE_HELM_RELEASE` | `wildside` | +| `WILDSIDE_IMAGE` | `wildside-backend:local` | + +`WILDSIDE_IMAGE` must include a tag. The preview helper splits the value into +the Helm chart's `image.repository` and `image.tag` settings. + +Nile Valley owns shared preview and GitOps automation. The local preview in +this repository is for developer validation of the Wildside chart and runtime +contract. diff --git a/docs/wildside-backend-architecture.md b/docs/wildside-backend-architecture.md index b9ae38290..0199f5440 100644 --- a/docs/wildside-backend-architecture.md +++ b/docs/wildside-backend-architecture.md @@ -202,9 +202,9 @@ flowchart TB #### Port usage examples -Ports are defined in the domain and implemented by outbound adapters. Inbound -adapters consume ports via injected state rather than importing outbound -modules directly. +Ports are defined in the domain and implemented by outbound adapters or by +domain-owned default implementations. Inbound adapters consume ports via +injected state rather than importing outbound modules directly. - **Port (domain):** `backend/src/domain/ports/user_repository.rs` defines the `UserRepository` trait and `UserPersistenceError`. @@ -215,6 +215,12 @@ modules directly. - **Contract (tests):** `backend/tests/ports_behaviour.rs` exercises the port semantics against a Postgres-backed implementation using `pg-embed-setup-unpriv`. +- **Health port (domain):** `backend/src/domain/ports/health_observer.rs` + defines `HealthObserver`, while `backend/src/domain/health.rs` provides the + process-local `ProcessHealth` implementation. The Actix adapter in + `backend/src/inbound/http/health.rs` only maps those observations to + `/health/live` and `/health/ready` HTTP status codes, cache headers, and JSON + response envelopes. The intended runtime wiring pattern is: diff --git a/package.json b/package.json index a72f601af..ae0d35e79 100644 --- a/package.json +++ b/package.json @@ -52,7 +52,7 @@ "minimatch": "10.2.3", "lodash-es": "4.18.1", "pino": "9.13.1", - "qs": "6.14.2", + "qs": "6.15.2", "rollup": "4.59.0", "basic-ftp": "5.3.1", "dompurify": "3.4.0", diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index 6a3aca44d..213185885 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -17,7 +17,7 @@ overrides: minimatch: 10.2.3 lodash-es: 4.18.1 pino: 9.13.1 - qs: 6.14.2 + qs: 6.15.2 rollup: 4.59.0 basic-ftp: 5.3.1 dompurify: 3.4.0 @@ -2554,8 +2554,8 @@ packages: deprecated: < 24.15.0 is no longer supported hasBin: true - qs@6.14.2: - resolution: {integrity: sha512-V/yCWTTF7VJ9hIh18Ugr2zhJMP01MY7c5kh4J870L7imm6/DIzBsNLTXzMwUA3yZ5b/KBqLx8Kp3uRvd7xSe3Q==} + qs@6.15.2: + resolution: {integrity: sha512-Rzq0KEyX/w/tEybncDgdkZrJgVUsUMk3xjh3t5bv3S1HTAtg+uOYt72+ZfwiQwKdysThkTBdL/rTi6HDmX9Ddw==} engines: {node: '>=0.6'} queue-microtask@1.2.3: @@ -5790,7 +5790,7 @@ snapshots: - typescript - utf-8-validate - qs@6.14.2: + qs@6.15.2: dependencies: side-channel: 1.1.0 @@ -6237,7 +6237,7 @@ snapshots: url@0.11.4: dependencies: punycode: 1.4.1 - qs: 6.14.2 + qs: 6.15.2 use-sync-external-store@1.5.0(react@19.1.1): dependencies: diff --git a/scripts/local_k8s.py b/scripts/local_k8s.py new file mode 100644 index 000000000..5681f6c98 --- /dev/null +++ b/scripts/local_k8s.py @@ -0,0 +1,64 @@ +#!/usr/bin/env -S uv run +# /// script +# requires-python = ">=3.13" +# dependencies = [ +# "cyclopts==4.10.1", +# "plumbum==1.9.0", +# ] +# /// +"""Run the Wildside local Kubernetes preview workflow.""" + +from __future__ import annotations + +from cyclopts import App + +from local_k8s.config import PreviewConfig +from local_k8s.deployment import deploy_preview, print_logs, print_status +from local_k8s.k3d import delete_cluster +from local_k8s.validation import LocalK8sError + +app = App(help="Manage a local k3d Wildside preview environment.") + + +def _run(operation: str, func: object) -> None: + try: + if callable(func): + func() + except LocalK8sError as exc: + raise SystemExit(f"{operation} failed: {exc}") from exc + + +@app.command +def up(skip_build: bool = False) -> None: + """Create or update the local preview environment.""" + + config = PreviewConfig.from_env() + _run("local preview up", lambda: deploy_preview(config, skip_build=skip_build)) + + +@app.command +def down() -> None: + """Delete the local preview cluster.""" + + config = PreviewConfig.from_env() + _run("local preview down", lambda: delete_cluster(config)) + + +@app.command +def status() -> None: + """Print cluster, namespace, Helm release, and pod status.""" + + config = PreviewConfig.from_env() + _run("local preview status", lambda: print_status(config)) + + +@app.command +def logs(follow: bool = False) -> None: + """Print logs from the Wildside backend pods.""" + + config = PreviewConfig.from_env() + _run("local preview logs", lambda: print_logs(config, follow=follow)) + + +if __name__ == "__main__": + app() diff --git a/scripts/local_k8s/__init__.py b/scripts/local_k8s/__init__.py new file mode 100644 index 000000000..b13fee39b --- /dev/null +++ b/scripts/local_k8s/__init__.py @@ -0,0 +1 @@ +"""Local Kubernetes preview helpers for Wildside.""" diff --git a/scripts/local_k8s/commands.py b/scripts/local_k8s/commands.py new file mode 100644 index 000000000..7a65ba81f --- /dev/null +++ b/scripts/local_k8s/commands.py @@ -0,0 +1,34 @@ +"""Command execution primitives for the local preview workflow.""" + +from __future__ import annotations + +from collections.abc import Sequence +from dataclasses import dataclass + +from plumbum import local +from plumbum.commands.processes import ProcessExecutionError + +from .validation import LocalK8sError + + +@dataclass(frozen=True, slots=True) +class CommandResult: + """Captured stdout and stderr from an external command.""" + + stdout: str + stderr: str + + +def run(command: str, args: Sequence[str], *, cwd: str | None = None) -> CommandResult: + """Run a command and raise a local preview error on failure.""" + + try: + executable = local[command] + if cwd: + with local.cwd(cwd): + out = executable.run(args) + else: + out = executable.run(args) + except ProcessExecutionError as exc: + raise LocalK8sError(exc.stderr.strip() or str(exc)) from exc + return CommandResult(stdout=out[1], stderr=out[2]) diff --git a/scripts/local_k8s/config.py b/scripts/local_k8s/config.py new file mode 100644 index 000000000..3e4c0dc3b --- /dev/null +++ b/scripts/local_k8s/config.py @@ -0,0 +1,53 @@ +"""Configuration for the local k3d Wildside preview.""" + +from __future__ import annotations + +import os +from dataclasses import dataclass +from pathlib import Path + +from .validation import validate_port + +DEFAULT_CLUSTER_NAME = "wildside-preview" +DEFAULT_NAMESPACE = "wildside" +DEFAULT_RELEASE_NAME = "wildside" +DEFAULT_IMAGE_NAME = "wildside-backend:local" +DEFAULT_INGRESS_PORT = 8088 + + +@dataclass(frozen=True, slots=True) +class PreviewConfig: + """Repository-local configuration for a Wildside preview deployment.""" + + repository_root: Path + cluster_name: str + namespace: str + release_name: str + image_name: str + ingress_port: int + chart_path: Path + local_values_path: Path + dockerfile_path: Path + + @classmethod + def from_env(cls) -> "PreviewConfig": + """Build configuration from defaults and `WILDSIDE_` overrides.""" + + repository_root = Path(__file__).resolve().parents[2] + ingress_port = validate_port( + os.environ.get("WILDSIDE_K3D_PORT"), + default=DEFAULT_INGRESS_PORT, + name="WILDSIDE_K3D_PORT", + ) + chart_path = repository_root / "deploy" / "charts" / "wildside" + return cls( + repository_root=repository_root, + cluster_name=os.environ.get("WILDSIDE_K3D_CLUSTER", DEFAULT_CLUSTER_NAME), + namespace=os.environ.get("WILDSIDE_K8S_NAMESPACE", DEFAULT_NAMESPACE), + release_name=os.environ.get("WILDSIDE_HELM_RELEASE", DEFAULT_RELEASE_NAME), + image_name=os.environ.get("WILDSIDE_IMAGE", DEFAULT_IMAGE_NAME), + ingress_port=ingress_port, + chart_path=chart_path, + local_values_path=chart_path / "values.local.yaml", + dockerfile_path=repository_root / "deploy" / "docker" / "backend.Dockerfile", + ) diff --git a/scripts/local_k8s/deployment.py b/scripts/local_k8s/deployment.py new file mode 100644 index 000000000..9bab08051 --- /dev/null +++ b/scripts/local_k8s/deployment.py @@ -0,0 +1,118 @@ +"""Build, deploy, inspect, and log the Wildside local preview.""" + +from __future__ import annotations + +from .commands import run +from .config import PreviewConfig +from .k3d import ensure_cluster, import_image, print_cluster_status +from .k8s import ensure_namespace, print_kubernetes_status +from .validation import LocalK8sError, require_tools + + +def deploy_preview(config: PreviewConfig, *, skip_build: bool) -> None: + """Build the image and install or upgrade the Wildside Helm release.""" + + require_tools(_deploy_preview_tools(skip_build=skip_build)) + ensure_cluster(config) + ensure_namespace(config) + if not skip_build: + build_image(config) + import_image(config) + helm_upgrade(config) + print_status(config) + + +def _deploy_preview_tools(*, skip_build: bool) -> tuple[str, ...]: + """Return the required command-line tools for the requested deploy mode.""" + + if skip_build: + return ("helm", "k3d", "kubectl") + return ("docker", "helm", "k3d", "kubectl") + + +def build_image(config: PreviewConfig) -> None: + """Build the Wildside backend image for local k3d import.""" + + run( + "docker", + [ + "build", + "-f", + str(config.dockerfile_path), + "-t", + config.image_name, + str(config.repository_root), + ], + ) + + +def helm_upgrade(config: PreviewConfig) -> None: + """Install or upgrade the Wildside Helm release.""" + + image_repository, image_tag = image_repository_and_tag(config.image_name) + run( + "helm", + [ + "upgrade", + "--install", + config.release_name, + str(config.chart_path), + "--namespace", + config.namespace, + "--values", + str(config.local_values_path), + "--set", + f"image.repository={image_repository}", + "--set", + f"image.tag={image_tag}", + "--wait", + "--timeout", + "5m", + ], + ) + + +def _image_ref_lacks_tag(repository: str, separator: str, tag: str) -> bool: + """Return True when the parsed parts do not form a valid image:tag reference.""" + return not separator or "/" in tag or not repository or not tag + + +def image_repository_and_tag(image_name: str) -> tuple[str, str]: + """Split a Docker image reference into Helm repository and tag values.""" + + repository, separator, tag = image_name.rpartition(":") + if _image_ref_lacks_tag(repository, separator, tag): + raise LocalK8sError( + "WILDSIDE_IMAGE must include a tag, for example wildside-backend:local" + ) + return repository, tag + + +def print_status(config: PreviewConfig) -> None: + """Print cluster and workload status.""" + + require_tools(("helm", "k3d", "kubectl")) + print_cluster_status(config) + release = run("helm", ["-n", config.namespace, "status", config.release_name]) + print(release.stdout.strip()) + print_kubernetes_status(config) + + +def print_logs(config: PreviewConfig, *, follow: bool) -> None: + """Print backend pod logs from the preview namespace.""" + + require_tools(("kubectl",)) + args = [ + "-n", + config.namespace, + "logs", + "-l", + f"app.kubernetes.io/instance={config.release_name}", + "-c", + "app", + "--tail", + "200", + ] + if follow: + args.append("--follow") + print(run("kubectl", args).stdout, end="") diff --git a/scripts/local_k8s/k3d.py b/scripts/local_k8s/k3d.py new file mode 100644 index 000000000..9e42898c9 --- /dev/null +++ b/scripts/local_k8s/k3d.py @@ -0,0 +1,66 @@ +"""k3d cluster lifecycle helpers for the Wildside local preview.""" + +from __future__ import annotations + +import json + +from .commands import run +from .config import PreviewConfig +from .validation import LocalK8sError, require_tools + + +def ensure_cluster(config: PreviewConfig) -> None: + """Create the preview cluster when it does not already exist.""" + + require_tools(("k3d", "kubectl", "helm")) + if _cluster_exists(config.cluster_name): + print(f"k3d cluster {config.cluster_name!r} already exists") + return + args = [ + "cluster", + "create", + config.cluster_name, + "--servers", + "1", + "--agents", + "1", + "--port", + f"127.0.0.1:{config.ingress_port}:80@loadbalancer", + "--wait", + ] + run("k3d", args) + + +def delete_cluster(config: PreviewConfig) -> None: + """Delete the preview cluster if it exists.""" + + require_tools(("k3d",)) + if not _cluster_exists(config.cluster_name): + print(f"k3d cluster {config.cluster_name!r} does not exist") + return + run("k3d", ["cluster", "delete", config.cluster_name]) + + +def import_image(config: PreviewConfig) -> None: + """Import the local backend image into the preview cluster.""" + + require_tools(("k3d",)) + run("k3d", ["image", "import", config.image_name, "--cluster", config.cluster_name]) + + +def print_cluster_status(config: PreviewConfig) -> None: + """Print a short description of the preview cluster.""" + + require_tools(("k3d",)) + if not _cluster_exists(config.cluster_name): + raise LocalK8sError(f"k3d cluster {config.cluster_name!r} does not exist") + print(f"cluster: {config.cluster_name}") + print(f"ingress: http://127.0.0.1:{config.ingress_port}") + + +def _cluster_exists(cluster_name: str) -> bool: + result = run("k3d", ["cluster", "list", "--output", "json"]) + clusters = json.loads(result.stdout or "[]") + if not isinstance(clusters, list): + raise LocalK8sError("unexpected k3d cluster list JSON shape") + return any(isinstance(cluster, dict) and cluster.get("name") == cluster_name for cluster in clusters) diff --git a/scripts/local_k8s/k8s.py b/scripts/local_k8s/k8s.py new file mode 100644 index 000000000..335905baf --- /dev/null +++ b/scripts/local_k8s/k8s.py @@ -0,0 +1,59 @@ +"""Kubernetes helper operations for the Wildside local preview.""" + +from __future__ import annotations + +from .commands import run +from .config import PreviewConfig +from .validation import require_tools + + +def ensure_namespace(config: PreviewConfig) -> None: + """Create the preview namespace when it does not already exist.""" + + require_tools(("kubectl",)) + result = run("kubectl", ["get", "namespace", config.namespace, "--ignore-not-found"]) + if result.stdout.strip(): + return + run("kubectl", ["create", "namespace", config.namespace]) + + +def _helm_fullname(config: PreviewConfig) -> str: + """Return the chart fullname used for Kubernetes object names.""" + + chart_name = config.chart_path.name + if config.release_name == chart_name: + return chart_name + return f"{config.release_name}-{chart_name}"[:63].rstrip("-") + + +def print_kubernetes_status(config: PreviewConfig) -> None: + """Print namespace, service, and pod status for the preview release.""" + + require_tools(("kubectl",)) + print(f"namespace: {config.namespace}") + pods = run( + "kubectl", + [ + "-n", + config.namespace, + "get", + "pods", + "-l", + f"app.kubernetes.io/instance={config.release_name}", + "-o", + "wide", + ], + ) + print(pods.stdout.strip() or "pods: none") + services = run( + "kubectl", + [ + "-n", + config.namespace, + "get", + "service", + _helm_fullname(config), + "--ignore-not-found", + ], + ) + print(services.stdout.strip() or "service: none") diff --git a/scripts/local_k8s/unittests/test_deployment.py b/scripts/local_k8s/unittests/test_deployment.py new file mode 100644 index 000000000..3a8c7280d --- /dev/null +++ b/scripts/local_k8s/unittests/test_deployment.py @@ -0,0 +1,83 @@ +"""Unit tests for local preview deployment orchestration. + +These tests exercise the orchestration logic in ``local_k8s.deployment`` +without invoking Kubernetes, Helm, k3d, or Docker. They document the preflight +contract for full build-and-deploy runs and the ``skip_build`` path used with +prebuilt images. The key invariant is that Docker is required only when the +deployment will build an image locally; Helm, k3d, and kubectl remain required +for both deployment modes. +""" + +from __future__ import annotations + +from pathlib import Path + +import pytest + +from local_k8s.config import PreviewConfig +from local_k8s.deployment import deploy_preview + + +@pytest.fixture +def preview_config() -> PreviewConfig: + """Representative local preview configuration for deployment tests. + + Returns + ------- + PreviewConfig + Configuration for a local preview release named ``preview`` in the + ``wildside`` namespace. The image tag, chart path, values path, and + Dockerfile path match the deployment fields that ``deploy_preview`` + passes through its build, import, and Helm orchestration steps. + """ + + return PreviewConfig( + repository_root=Path("/repo"), + cluster_name="wildside-preview", + namespace="wildside", + release_name="preview", + image_name="wildside-backend:local", + ingress_port=8088, + chart_path=Path("/repo/deploy/charts/wildside"), + local_values_path=Path("/repo/deploy/charts/wildside/values.local.yaml"), + dockerfile_path=Path("/repo/deploy/docker/backend.Dockerfile"), + ) + + +@pytest.mark.parametrize( + ("skip_build", "expected_tools"), + [ + (True, ("helm", "k3d", "kubectl")), + (False, ("docker", "helm", "k3d", "kubectl")), + ], + ids=["skip-build", "build-image"], +) +def test_deploy_preview_docker_requirement_conditional_on_skip_build( + monkeypatch: pytest.MonkeyPatch, + preview_config: PreviewConfig, + skip_build: bool, # noqa: FBT001 - pytest parametrize documents both boolean modes. + expected_tools: tuple[str, ...], +) -> None: + """Verify that Docker preflight follows the selected build mode.""" + required_tools: list[tuple[str, ...]] = [] + + def no_op(_: PreviewConfig) -> None: + """Replace deployment side effects during preflight assertions.""" + + monkeypatch.setattr( + "local_k8s.deployment.require_tools", + lambda tools: required_tools.append(tuple(tools)), + ) + monkeypatch.setattr("local_k8s.deployment.ensure_cluster", no_op) + monkeypatch.setattr("local_k8s.deployment.ensure_namespace", no_op) + monkeypatch.setattr("local_k8s.deployment.import_image", no_op) + monkeypatch.setattr("local_k8s.deployment.helm_upgrade", no_op) + monkeypatch.setattr("local_k8s.deployment.print_status", no_op) + monkeypatch.setattr("local_k8s.deployment.build_image", no_op) + + deploy_preview(preview_config, skip_build=skip_build) + + assert required_tools == [expected_tools], ( + f"expected require_tools to be called once with {expected_tools}, " + f"but got {required_tools}" + ) diff --git a/scripts/local_k8s/unittests/test_k8s.py b/scripts/local_k8s/unittests/test_k8s.py new file mode 100644 index 000000000..00b46628a --- /dev/null +++ b/scripts/local_k8s/unittests/test_k8s.py @@ -0,0 +1,45 @@ +"""Unit tests for local preview Kubernetes helpers.""" + +from __future__ import annotations + +import typing +from pathlib import Path +from types import SimpleNamespace + +from local_k8s.config import PreviewConfig +from local_k8s.k8s import print_kubernetes_status + +if typing.TYPE_CHECKING: + import pytest + + +def test_print_kubernetes_status_uses_helm_fullname_for_service( + monkeypatch: pytest.MonkeyPatch, +) -> None: + """Verify that status queries the Helm-derived Service name.""" + config = PreviewConfig( + repository_root=Path("/repo"), + cluster_name="wildside-preview", + namespace="wildside", + release_name="preview", + image_name="wildside-backend:local", + ingress_port=8088, + chart_path=Path("/repo/deploy/charts/wildside"), + local_values_path=Path("/repo/deploy/charts/wildside/values.local.yaml"), + dockerfile_path=Path("/repo/deploy/docker/backend.Dockerfile"), + ) + calls: list[tuple[str, list[str]]] = [] + + def fake_run(command: str, args: list[str]) -> SimpleNamespace: + calls.append((command, args)) + return SimpleNamespace(stdout="") + + monkeypatch.setattr("local_k8s.k8s.require_tools", lambda _: None) + monkeypatch.setattr("local_k8s.k8s.run", fake_run) + + print_kubernetes_status(config) + + assert ( + "kubectl", + ["-n", "wildside", "get", "service", "preview-wildside", "--ignore-not-found"], + ) in calls, "expected kubectl get service preview-wildside call to be present in calls" diff --git a/scripts/local_k8s/unittests/test_validation.py b/scripts/local_k8s/unittests/test_validation.py new file mode 100644 index 000000000..b974b06be --- /dev/null +++ b/scripts/local_k8s/unittests/test_validation.py @@ -0,0 +1,45 @@ +"""Unit tests for local preview validation helpers.""" + +from __future__ import annotations + +import pytest + +from local_k8s.deployment import image_repository_and_tag +from local_k8s.validation import LocalK8sError, require_tools, validate_port + + +def test_validate_port_uses_default_for_missing_value() -> None: + assert validate_port(None, default=8088, name="WILDSIDE_K3D_PORT") == 8088 + + +@pytest.mark.parametrize("raw_value", ["1", "8088", "65535"]) +def test_validate_port_accepts_valid_tcp_ports(raw_value: str) -> None: + assert validate_port(raw_value, default=8088, name="WILDSIDE_K3D_PORT") == int(raw_value) + + +@pytest.mark.parametrize("raw_value", ["0", "65536", "not-a-port"]) +def test_validate_port_rejects_invalid_values(raw_value: str) -> None: + with pytest.raises(LocalK8sError): + validate_port(raw_value, default=8088, name="WILDSIDE_K3D_PORT") + + +def test_require_tools_reports_missing_executables() -> None: + with pytest.raises(LocalK8sError, match="definitely-not-a-wildside-tool"): + require_tools(("definitely-not-a-wildside-tool",)) + + +@pytest.mark.parametrize( + ("image_name", "expected"), + [ + ("wildside-backend:local", ("wildside-backend", "local")), + ("registry.example.test:5000/wildside/backend:preview", ("registry.example.test:5000/wildside/backend", "preview")), + ], +) +def test_image_repository_and_tag_accepts_tagged_images(image_name: str, expected: tuple[str, str]) -> None: + assert image_repository_and_tag(image_name) == expected + + +@pytest.mark.parametrize("image_name", ["wildside-backend", "registry.example.test:5000/wildside/backend", ":local"]) +def test_image_repository_and_tag_rejects_untagged_images(image_name: str) -> None: + with pytest.raises(LocalK8sError, match="WILDSIDE_IMAGE"): + image_repository_and_tag(image_name) diff --git a/scripts/local_k8s/validation.py b/scripts/local_k8s/validation.py new file mode 100644 index 000000000..afa2a2e7f --- /dev/null +++ b/scripts/local_k8s/validation.py @@ -0,0 +1,43 @@ +"""Validation helpers for local Kubernetes preview commands.""" + +from __future__ import annotations + +from collections.abc import Iterable + +from plumbum import local +from plumbum.commands.processes import CommandNotFound + + +class LocalK8sError(RuntimeError): + """Raised when a local preview preflight or command fails.""" + + +def validate_port(raw_value: str | None, *, default: int, name: str) -> int: + """Return a TCP port from an optional environment variable value.""" + + if raw_value is None or raw_value == "": + return default + try: + port = int(raw_value) + except ValueError as exc: + raise LocalK8sError(f"{name} must be an integer TCP port") from exc + if not 1 <= port <= 65535: + raise LocalK8sError(f"{name} must be between 1 and 65535") + return port + + +def require_tools(tools: Iterable[str]) -> None: + """Fail with a concise preflight error when required executables are absent.""" + + missing = [tool for tool in tools if _is_missing(tool)] + if missing: + joined = ", ".join(missing) + raise LocalK8sError(f"missing required executable(s): {joined}") + + +def _is_missing(tool: str) -> bool: + try: + local.which(tool) + except CommandNotFound: + return True + return False