From e54f93a735ea526b61d4120d1233c2bc54a3417e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andre=CC=81=20Lange?= Date: Thu, 12 Mar 2026 18:08:39 +0100 Subject: [PATCH] feat(ops): add update channels and rollout rings --- CHANGELOG.md | 1 + README.md | 5 +++ config.yaml | 2 + docs/FOUNDRYGATE-ROADMAP.md | 2 +- docs/PUBLISHING.md | 2 + foundrygate/config.py | 12 ++++++ foundrygate/main.py | 3 +- foundrygate/updates.py | 70 ++++++++++++++++++++++++++++--- tests/test_config.py | 6 +++ tests/test_updates.py | 82 +++++++++++++++++++++++++++++++++++++ 10 files changed, 178 insertions(+), 7 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index d7934f9..d725a4a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -13,6 +13,7 @@ The format is intentionally lightweight and human-readable. Group entries by rel - Added `GET /api/operator-events` plus operator-event metrics for update checks and helper-driven auto-update attempts - Added dashboard cards and tables for operator-side update checks and apply attempts - Added provider-health rollout guardrails so helper-driven auto-updates can block when gateway health is already degraded +- Added `update_check.release_channel` and `auto_update.rollout_ring` so operators can distinguish stable vs preview checks and tighter rollout rings ## v0.6.0 - 2026-03-12 diff --git a/README.md b/README.md index 0bee303..94fbd98 100644 --- a/README.md +++ b/README.md @@ -522,6 +522,7 @@ Supported fields in `update_check`: - `api_base` - `timeout_seconds` - `check_interval_seconds` +- `release_channel` Example: @@ -532,6 +533,7 @@ update_check: api_base: "https://api.github.com" timeout_seconds: 5 check_interval_seconds: 21600 + release_channel: "stable" ``` The status is exposed through `GET /api/update`, the dashboard, and the helper script `foundrygate-update-check`. @@ -543,6 +545,7 @@ Supported fields in `auto_update`: - `enabled` - `allow_major` +- `rollout_ring` - `require_healthy_providers` - `max_unhealthy_providers` - `apply_command` @@ -553,6 +556,7 @@ Example: auto_update: enabled: true allow_major: false + rollout_ring: "early" require_healthy_providers: true max_unhealthy_providers: 0 apply_command: "foundrygate-update" @@ -564,6 +568,7 @@ What the current runtime does with it: - shows the same state in the dashboard - lets `foundrygate-auto-update --apply` run only when the current release state is eligible - can block helper-driven rollout when provider health is already degraded +- lets operators separate `stable` vs `preview` release checks and `stable` / `early` / `canary` rollout rings What it still does not do: diff --git a/config.yaml b/config.yaml index 64364d8..52eb154 100644 --- a/config.yaml +++ b/config.yaml @@ -881,6 +881,7 @@ update_check: api_base: "https://api.github.com" timeout_seconds: 5 check_interval_seconds: 21600 + release_channel: "stable" # ── Optional Auto-Update Enabler ──────────────────────────────────────────── # This does not make the API mutate the checkout. It only marks whether the @@ -888,6 +889,7 @@ update_check: auto_update: enabled: false allow_major: false + rollout_ring: "early" require_healthy_providers: true max_unhealthy_providers: 0 apply_command: "foundrygate-update" diff --git a/docs/FOUNDRYGATE-ROADMAP.md b/docs/FOUNDRYGATE-ROADMAP.md index 9d47a8f..6b333a2 100644 --- a/docs/FOUNDRYGATE-ROADMAP.md +++ b/docs/FOUNDRYGATE-ROADMAP.md @@ -361,7 +361,7 @@ Current baseline: - local helper access via `foundrygate-update-check` - opt-in eligibility reporting and helper-driven apply flow via `foundrygate-auto-update` -This should remain opt-in and operationally conservative as it expands toward scheduled helper use, stronger rollout controls, and clearer operator approval boundaries. +This should remain opt-in and operationally conservative as it expands toward scheduled helper use, stronger rollout controls, clearer operator approval boundaries, and small rollout-ring/channel distinctions. ### 7. Distribution channels diff --git a/docs/PUBLISHING.md b/docs/PUBLISHING.md index 74b7867..9c4f6d2 100644 --- a/docs/PUBLISHING.md +++ b/docs/PUBLISHING.md @@ -61,6 +61,8 @@ Publishing creates a tagged release. Applying that release on a host should rema If you want scheduled update application: - keep `auto_update.enabled: true` explicit in `config.yaml` +- keep `update_check.release_channel` on `stable` unless you intentionally want preview releases in the check path +- keep `auto_update.rollout_ring` on `stable` or `early` for normal environments; use `canary` only for faster adopters - keep `allow_major: false` unless you are ready to absorb breaking changes automatically - keep `require_healthy_providers: true` unless you are intentionally allowing rollouts while the gateway is degraded - prefer the reviewed examples in [examples/foundrygate-auto-update.service](./examples/foundrygate-auto-update.service) and [examples/foundrygate-auto-update.timer](./examples/foundrygate-auto-update.timer) diff --git a/foundrygate/config.py b/foundrygate/config.py index 6baa372..40b6133 100644 --- a/foundrygate/config.py +++ b/foundrygate/config.py @@ -865,6 +865,10 @@ def _normalize_update_check(data: dict[str, Any]) -> dict[str, Any]: if check_interval_seconds <= 0: raise ConfigError("'update_check.check_interval_seconds' must be positive") + release_channel = raw.get("release_channel", "stable") + if release_channel not in {"stable", "preview"}: + raise ConfigError("'update_check.release_channel' must be 'stable' or 'preview'") + normalized = dict(data) normalized["update_check"] = { "enabled": enabled, @@ -872,6 +876,7 @@ def _normalize_update_check(data: dict[str, Any]) -> dict[str, Any]: "api_base": api_base.strip().rstrip("/"), "timeout_seconds": float(timeout_seconds), "check_interval_seconds": check_interval_seconds, + "release_channel": release_channel, } return normalized @@ -892,6 +897,10 @@ def _normalize_auto_update(data: dict[str, Any]) -> dict[str, Any]: if not isinstance(allow_major, bool): raise ConfigError("'auto_update.allow_major' must be a boolean") + rollout_ring = raw.get("rollout_ring", "early") + if rollout_ring not in {"stable", "early", "canary"}: + raise ConfigError("'auto_update.rollout_ring' must be 'stable', 'early', or 'canary'") + require_healthy_providers = raw.get("require_healthy_providers", True) if not isinstance(require_healthy_providers, bool): raise ConfigError("'auto_update.require_healthy_providers' must be a boolean") @@ -910,6 +919,7 @@ def _normalize_auto_update(data: dict[str, Any]) -> dict[str, Any]: normalized["auto_update"] = { "enabled": enabled, "allow_major": allow_major, + "rollout_ring": rollout_ring, "require_healthy_providers": require_healthy_providers, "max_unhealthy_providers": max_unhealthy_providers, "apply_command": apply_command.strip(), @@ -991,6 +1001,7 @@ def update_check(self) -> dict: "api_base": "https://api.github.com", "timeout_seconds": 5.0, "check_interval_seconds": 21600, + "release_channel": "stable", }, ) @@ -1001,6 +1012,7 @@ def auto_update(self) -> dict: { "enabled": False, "allow_major": False, + "rollout_ring": "early", "require_healthy_providers": True, "max_unhealthy_providers": 0, "apply_command": "foundrygate-update", diff --git a/foundrygate/main.py b/foundrygate/main.py index 3cf2473..e8392a0 100644 --- a/foundrygate/main.py +++ b/foundrygate/main.py @@ -632,6 +632,7 @@ async def lifespan(app: FastAPI): api_base=str(_config.update_check.get("api_base", "https://api.github.com")), check_interval_seconds=int(_config.update_check.get("check_interval_seconds", 21600)), timeout_seconds=float(_config.update_check.get("timeout_seconds", 5.0)), + release_channel=str(_config.update_check.get("release_channel", "stable")), auto_update=_config.auto_update, ) @@ -1639,7 +1640,7 @@ def main():
Healthy Providers
${healthyProviders}/${providers.length}
${unhealthyProviders} unhealthy
Capability Coverage
${coverageEntries.length}
${coverageEntries.map(([name]) => name).slice(0,3).join(', ') || 'none'}
Top Modality
${esc(topModality)}
${modalityRows.length} modality groups
-
Release Status
${esc(update.latest_version || update.current_version || 'n/a')}
${update.enabled ? (update.status === 'ok' ? `${esc(update.update_type || 'current')} / ${esc(update.recommended_action || (update.update_available ? 'Upgrade recommended' : 'No action needed'))}${update.auto_update && update.auto_update.enabled ? ` / auto: ${esc(update.auto_update.eligible ? 'eligible' : (update.auto_update.blocked_reason || 'blocked'))}` : ''}` : esc(update.recommended_action || 'Update check unavailable')) : 'Update checks disabled'}
+
Release Status
${esc(update.latest_version || update.current_version || 'n/a')}
${update.enabled ? (update.status === 'ok' ? `${esc(update.release_channel || 'stable')} / ${esc(update.update_type || 'current')} / ${esc(update.recommended_action || (update.update_available ? 'Upgrade recommended' : 'No action needed'))}${update.auto_update && update.auto_update.enabled ? ` / ring: ${esc(update.auto_update.rollout_ring || 'early')} / auto: ${esc(update.auto_update.eligible ? 'eligible' : (update.auto_update.blocked_reason || 'blocked'))}` : ''}` : esc(update.recommended_action || 'Update check unavailable')) : 'Update checks disabled'}
Operator Actions
${fmtTok((operatorEvents.events || []).length)}
${latestOperatorEvent ? `${esc(latestOperatorEvent.action || 'update-check')} / ${esc(latestOperatorEvent.status || 'unknown')}` : 'No recent operator events'}
`; diff --git a/foundrygate/updates.py b/foundrygate/updates.py index 8e39c84..91061ac 100644 --- a/foundrygate/updates.py +++ b/foundrygate/updates.py @@ -74,6 +74,34 @@ def alert_level_for_update(update_type: str, *, available: bool, status: str) -> return "warning" +def allowed_update_types_for_ring(rollout_ring: str, *, allow_major: bool) -> list[str]: + """Return the allowed update types for one rollout ring.""" + if rollout_ring == "stable": + allowed = ["patch"] + elif rollout_ring == "canary": + allowed = ["patch", "minor"] + else: + allowed = ["patch", "minor"] + + if allow_major and rollout_ring == "canary": + allowed.append("major") + return allowed + + +def select_release_payload(payload: Any, *, release_channel: str) -> dict[str, Any]: + """Select one release object from the GitHub API payload.""" + if release_channel == "preview": + if not isinstance(payload, list): + return {} + for item in payload: + if isinstance(item, dict) and not item.get("draft"): + return item + return {} + if isinstance(payload, dict): + return payload + return {} + + def apply_auto_update_guardrails( auto_update: dict[str, Any], *, @@ -118,6 +146,7 @@ class UpdateStatus: release_url: str = "" checked_at: float = 0.0 status: str = "disabled" + release_channel: str = "stable" update_type: str = "current" alert_level: str = "disabled" recommended_action: str = "" @@ -134,6 +163,7 @@ def to_dict(self) -> dict[str, Any]: "release_url": self.release_url, "checked_at": self.checked_at, "status": self.status, + "release_channel": self.release_channel, "update_type": self.update_type, "alert_level": self.alert_level, "recommended_action": self.recommended_action, @@ -154,6 +184,7 @@ def __init__( api_base: str = "https://api.github.com", check_interval_seconds: int = 21600, timeout_seconds: float = 5.0, + release_channel: str = "stable", auto_update: dict[str, Any] | None = None, ): self.current_version = current_version @@ -162,9 +193,11 @@ def __init__( self.api_base = api_base.rstrip("/") self.check_interval_seconds = check_interval_seconds self.timeout_seconds = timeout_seconds + self.release_channel = release_channel self.auto_update = { "enabled": bool((auto_update or {}).get("enabled", False)), "allow_major": bool((auto_update or {}).get("allow_major", False)), + "rollout_ring": str((auto_update or {}).get("rollout_ring", "early")), "require_healthy_providers": bool( (auto_update or {}).get("require_healthy_providers", True) ), @@ -175,6 +208,7 @@ def __init__( enabled=enabled, current_version=current_version, repository=repository, + release_channel=release_channel, ) self._client = httpx.AsyncClient( timeout=httpx.Timeout(timeout_seconds, connect=min(timeout_seconds, 5.0)), @@ -198,10 +232,9 @@ def _auto_update_status( """Return opt-in auto-update eligibility for operator tooling.""" enabled = bool(self.auto_update.get("enabled", False)) allow_major = bool(self.auto_update.get("allow_major", False)) + rollout_ring = str(self.auto_update.get("rollout_ring", "early")) apply_command = str(self.auto_update.get("apply_command", "foundrygate-update")) - allowed_types = ["patch", "minor"] - if allow_major: - allowed_types.append("major") + allowed_types = allowed_update_types_for_ring(rollout_ring, allow_major=allow_major) blocked_reason = "" eligible = False @@ -223,6 +256,7 @@ def _auto_update_status( "strategy": "script", "allowed_update_types": allowed_types, "allow_major": allow_major, + "rollout_ring": rollout_ring, "require_healthy_providers": bool( self.auto_update.get("require_healthy_providers", True) ), @@ -243,6 +277,7 @@ async def get_status(self, *, force: bool = False) -> UpdateStatus: repository=self.repository, checked_at=time.time(), status="disabled", + release_channel=self.release_channel, update_type="current", alert_level="disabled", recommended_action="Update checks are disabled", @@ -263,7 +298,10 @@ async def get_status(self, *, force: bool = False) -> UpdateStatus: ): return self._cached - url = f"{self.api_base}/repos/{self.repository}/releases/latest" + if self.release_channel == "preview": + url = f"{self.api_base}/repos/{self.repository}/releases?per_page=10" + else: + url = f"{self.api_base}/repos/{self.repository}/releases/latest" try: response = await self._client.get(url) if response.status_code >= 400: @@ -273,6 +311,7 @@ async def get_status(self, *, force: bool = False) -> UpdateStatus: repository=self.repository, checked_at=now, status="unavailable", + release_channel=self.release_channel, update_type="unknown", alert_level="warning", recommended_action="Inspect release connectivity and retry later", @@ -285,9 +324,28 @@ async def get_status(self, *, force: bool = False) -> UpdateStatus: ) return self._cached - payload = response.json() + payload = select_release_payload(response.json(), release_channel=self.release_channel) latest_version = str(payload.get("tag_name") or "").strip() release_url = str(payload.get("html_url") or "").strip() + if not latest_version: + self._cached = UpdateStatus( + enabled=True, + current_version=self.current_version, + repository=self.repository, + checked_at=now, + status="unavailable", + release_channel=self.release_channel, + update_type="unknown", + alert_level="warning", + recommended_action="Inspect release connectivity and retry later", + auto_update=self._auto_update_status( + status="unavailable", + update_available=False, + update_type="unknown", + ), + error="No release found for the selected channel", + ) + return self._cached update_available = is_update_available(self.current_version, latest_version) update_type = classify_update(self.current_version, latest_version) alert_level = alert_level_for_update( @@ -304,6 +362,7 @@ async def get_status(self, *, force: bool = False) -> UpdateStatus: release_url=release_url, checked_at=now, status="ok", + release_channel=self.release_channel, update_type=update_type, alert_level=alert_level, recommended_action=( @@ -324,6 +383,7 @@ async def get_status(self, *, force: bool = False) -> UpdateStatus: repository=self.repository, checked_at=now, status="unavailable", + release_channel=self.release_channel, update_type="unknown", alert_level="warning", recommended_action="Inspect release connectivity and retry later", diff --git a/tests/test_config.py b/tests/test_config.py index f9409be..2fc3756 100644 --- a/tests/test_config.py +++ b/tests/test_config.py @@ -87,6 +87,12 @@ def test_auto_update_defaults_are_exposed(): cfg = load_config(Path(__file__).parent.parent / "config.yaml") assert cfg.auto_update["enabled"] is False assert cfg.auto_update["allow_major"] is False + assert cfg.auto_update["rollout_ring"] == "early" assert cfg.auto_update["require_healthy_providers"] is True assert cfg.auto_update["max_unhealthy_providers"] == 0 assert cfg.auto_update["apply_command"] == "foundrygate-update" + + +def test_update_check_defaults_include_stable_release_channel(): + cfg = load_config(Path(__file__).parent.parent / "config.yaml") + assert cfg.update_check["release_channel"] == "stable" diff --git a/tests/test_updates.py b/tests/test_updates.py index 559f545..7d9173a 100644 --- a/tests/test_updates.py +++ b/tests/test_updates.py @@ -7,9 +7,11 @@ from foundrygate.updates import ( UpdateChecker, alert_level_for_update, + allowed_update_types_for_ring, apply_auto_update_guardrails, classify_update, is_update_available, + select_release_payload, ) @@ -58,6 +60,26 @@ def test_alert_level_maps_update_type_and_status(): assert alert_level_for_update("unknown", available=False, status="unavailable") == "warning" +def test_allowed_update_types_follow_rollout_ring(): + assert allowed_update_types_for_ring("stable", allow_major=False) == ["patch"] + assert allowed_update_types_for_ring("early", allow_major=False) == ["patch", "minor"] + assert allowed_update_types_for_ring("canary", allow_major=False) == ["patch", "minor"] + assert allowed_update_types_for_ring("canary", allow_major=True) == [ + "patch", + "minor", + "major", + ] + + +def test_select_release_payload_uses_first_preview_release(): + payload = [ + {"tag_name": "v0.8.0-rc1", "draft": False, "html_url": "https://example.test/rc1"}, + {"tag_name": "v0.7.0", "draft": False, "html_url": "https://example.test/stable"}, + ] + chosen = select_release_payload(payload, release_channel="preview") + assert chosen["tag_name"] == "v0.8.0-rc1" + + def test_auto_update_guardrails_block_when_too_many_providers_are_unhealthy(): guarded = apply_auto_update_guardrails( { @@ -136,6 +158,7 @@ async def test_update_checker_reports_latest_release(): assert status.recommended_action == "Upgrade to the latest release" assert status.auto_update["enabled"] is True assert status.auto_update["eligible"] is True + assert status.release_channel == "stable" assert status.auto_update["allowed_update_types"] == ["patch", "minor"] assert status.release_url.endswith("/v0.5.0") @@ -214,6 +237,65 @@ async def test_major_updates_are_blocked_when_auto_update_disallows_them(): assert status.auto_update["blocked_reason"] == "Major updates require manual approval" +@pytest.mark.asyncio +async def test_stable_rollout_ring_blocks_minor_updates(): + checker = UpdateChecker( + current_version="0.6.0", + enabled=True, + repository="typelicious/FoundryGate", + auto_update={"enabled": True, "rollout_ring": "stable", "allow_major": False}, + ) + checker._client = _FakeClient( + _FakeResponse( + 200, + { + "tag_name": "v0.7.0", + "html_url": "https://github.com/typelicious/FoundryGate/releases/tag/v0.7.0", + }, + ) + ) + + status = await checker.get_status(force=True) + + assert status.update_type == "minor" + assert status.auto_update["rollout_ring"] == "stable" + assert status.auto_update["eligible"] is False + assert status.auto_update["blocked_reason"] == "Minor updates require manual approval" + + +@pytest.mark.asyncio +async def test_preview_release_channel_reads_latest_preview_release(): + checker = UpdateChecker( + current_version="0.6.0", + enabled=True, + repository="typelicious/FoundryGate", + release_channel="preview", + auto_update={"enabled": True, "rollout_ring": "canary", "allow_major": False}, + ) + checker._client = _FakeClient( + _FakeResponse( + 200, + [ + { + "tag_name": "v0.7.0-rc1", + "draft": False, + "html_url": "https://github.com/typelicious/FoundryGate/releases/tag/v0.7.0-rc1", + }, + { + "tag_name": "v0.6.2", + "draft": False, + "html_url": "https://github.com/typelicious/FoundryGate/releases/tag/v0.6.2", + }, + ], + ) + ) + + status = await checker.get_status(force=True) + + assert status.release_channel == "preview" + assert status.latest_version == "v0.7.0-rc1" + + @pytest.mark.asyncio async def test_auto_update_disabled_status_is_reported_cleanly(): checker = UpdateChecker(