From 3d8347b9f3edfa89f4929bef868c02026e499e4e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andre=CC=81=20Lange?= Date: Thu, 12 Mar 2026 18:58:17 +0100 Subject: [PATCH 1/2] feat(ops): add rollout provider scopes --- CHANGELOG.md | 1 + README.md | 5 +++++ config.yaml | 3 +++ docs/PUBLISHING.md | 1 + docs/TROUBLESHOOTING.md | 1 + foundrygate/config.py | 46 +++++++++++++++++++++++++++++++++++++++++ foundrygate/main.py | 35 +++++++++++++++++++++++++++++-- foundrygate/updates.py | 8 +++++++ tests/test_config.py | 4 ++++ tests/test_updates.py | 31 ++++++++++++++++++++++++++- 10 files changed, 132 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 989d825..512eee5 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -16,6 +16,7 @@ The format is intentionally lightweight and human-readable. Group entries by rel - Added `update_check.release_channel` and `auto_update.rollout_ring` so operators can distinguish stable vs preview checks and tighter rollout rings - Added `auto_update.min_release_age_hours` so helper-driven auto-updates can wait for a release to age before becoming eligible - Added `auto_update.maintenance_window` so helper-driven auto-updates can stay inside explicit local maintenance hours +- Added `auto_update.provider_scope` so rollout-health guardrails can evaluate only a selected provider subset ## v0.6.0 - 2026-03-12 diff --git a/README.md b/README.md index f93b8ff..121544a 100644 --- a/README.md +++ b/README.md @@ -549,6 +549,7 @@ Supported fields in `auto_update`: - `require_healthy_providers` - `max_unhealthy_providers` - `min_release_age_hours` +- `provider_scope` - `maintenance_window` - `apply_command` @@ -562,6 +563,9 @@ auto_update: require_healthy_providers: true max_unhealthy_providers: 0 min_release_age_hours: 24 + provider_scope: + allow_providers: ["local-worker", "deepseek-chat"] + deny_providers: ["openrouter-fallback"] maintenance_window: enabled: true timezone: "Europe/Berlin" @@ -577,6 +581,7 @@ What the current runtime does with it: - shows the same state in the dashboard - lets `foundrygate-auto-update --apply` run only when the current release state is eligible - can block helper-driven rollout when provider health is already degraded +- can scope rollout-health checks to a specific provider subset instead of the whole runtime - lets operators separate `stable` vs `preview` release checks and `stable` / `early` / `canary` rollout rings - can require that a release has aged for a minimum number of hours before helper-driven rollout - can restrict helper-driven rollout to explicit local maintenance windows diff --git a/config.yaml b/config.yaml index 9441c2c..4104477 100644 --- a/config.yaml +++ b/config.yaml @@ -893,6 +893,9 @@ auto_update: require_healthy_providers: true max_unhealthy_providers: 0 min_release_age_hours: 0 + provider_scope: + allow_providers: [] + deny_providers: ["openrouter-fallback"] maintenance_window: enabled: false timezone: "UTC" diff --git a/docs/PUBLISHING.md b/docs/PUBLISHING.md index d553291..69e21dd 100644 --- a/docs/PUBLISHING.md +++ b/docs/PUBLISHING.md @@ -66,6 +66,7 @@ If you want scheduled update application: - keep `allow_major: false` unless you are ready to absorb breaking changes automatically - keep `require_healthy_providers: true` unless you are intentionally allowing rollouts while the gateway is degraded - set `min_release_age_hours` above `0` if you want scheduled rollouts to wait before applying newly published releases +- use `provider_scope.allow_providers` / `deny_providers` if rollout health should only consider a subset of providers - add `maintenance_window` if scheduled updates should only run in explicit local maintenance hours - prefer the reviewed examples in [examples/foundrygate-auto-update.service](./examples/foundrygate-auto-update.service) and [examples/foundrygate-auto-update.timer](./examples/foundrygate-auto-update.timer) - use the cron example in [examples/foundrygate-auto-update.cron](./examples/foundrygate-auto-update.cron) only when `systemd` timers are not practical diff --git a/docs/TROUBLESHOOTING.md b/docs/TROUBLESHOOTING.md index da3c230..1daed6d 100644 --- a/docs/TROUBLESHOOTING.md +++ b/docs/TROUBLESHOOTING.md @@ -181,6 +181,7 @@ If `foundrygate-auto-update --apply` refuses to run, inspect the `auto_update` b - `auto_update.enabled: false` - the latest release is a major upgrade while `allow_major: false` +- `provider_scope.allow_providers` / `deny_providers` resolves to no matching providers - one or more providers are unhealthy while `require_healthy_providers: true` - the number of unhealthy providers exceeds `max_unhealthy_providers` - the current time is outside the configured `maintenance_window.days` or `maintenance_window.start_hour` / `end_hour` diff --git a/foundrygate/config.py b/foundrygate/config.py index 81b0d99..f6a2501 100644 --- a/foundrygate/config.py +++ b/foundrygate/config.py @@ -918,6 +918,44 @@ def _normalize_auto_update(data: dict[str, Any]) -> dict[str, Any]: if min_release_age_hours < 0: raise ConfigError("'auto_update.min_release_age_hours' must be non-negative") + provider_scope = raw.get("provider_scope", {}) + if provider_scope is None: + provider_scope = {} + if not isinstance(provider_scope, dict): + raise ConfigError("'auto_update.provider_scope' must be a mapping") + + provider_names = set((data.get("providers") or {}).keys()) + allow_providers = _normalize_string_list( + provider_scope.get("allow_providers", []), + field_name="allow_providers", + rule_name="auto_update.provider_scope", + allow_empty=True, + ) + deny_providers = _normalize_string_list( + provider_scope.get("deny_providers", []), + field_name="deny_providers", + rule_name="auto_update.provider_scope", + allow_empty=True, + ) + unknown_allowed = sorted(name for name in allow_providers if name not in provider_names) + if unknown_allowed: + raise ConfigError( + "'auto_update.provider_scope.allow_providers' references unknown providers: " + + ", ".join(unknown_allowed) + ) + unknown_denied = sorted(name for name in deny_providers if name not in provider_names) + if unknown_denied: + raise ConfigError( + "'auto_update.provider_scope.deny_providers' references unknown providers: " + + ", ".join(unknown_denied) + ) + overlap = sorted(set(allow_providers) & set(deny_providers)) + if overlap: + raise ConfigError( + "'auto_update.provider_scope' cannot allow and deny the same providers: " + + ", ".join(overlap) + ) + maintenance_window = raw.get("maintenance_window", {}) if maintenance_window is None: maintenance_window = {} @@ -969,6 +1007,10 @@ def _normalize_auto_update(data: dict[str, Any]) -> dict[str, Any]: "require_healthy_providers": require_healthy_providers, "max_unhealthy_providers": max_unhealthy_providers, "min_release_age_hours": min_release_age_hours, + "provider_scope": { + "allow_providers": allow_providers, + "deny_providers": deny_providers, + }, "maintenance_window": { "enabled": window_enabled, "timezone": timezone.strip(), @@ -1070,6 +1112,10 @@ def auto_update(self) -> dict: "require_healthy_providers": True, "max_unhealthy_providers": 0, "min_release_age_hours": 0, + "provider_scope": { + "allow_providers": [], + "deny_providers": [], + }, "maintenance_window": { "enabled": False, "timezone": "UTC", diff --git a/foundrygate/main.py b/foundrygate/main.py index c3485c5..6239a60 100644 --- a/foundrygate/main.py +++ b/foundrygate/main.py @@ -263,6 +263,28 @@ def _health_summary() -> dict[str, int]: } +def _rollout_provider_summary(provider_scope: dict[str, Any] | None) -> dict[str, Any]: + """Return provider-health totals for the configured rollout scope.""" + scope = dict(provider_scope or {}) + allow = set(scope.get("allow_providers") or []) + deny = set(scope.get("deny_providers") or []) + + rows = [] + for name, provider in _providers.items(): + if allow and name not in allow: + continue + if name in deny: + continue + rows.append((name, provider)) + + return { + "providers": [name for name, _ in rows], + "providers_total": len(rows), + "providers_healthy": sum(1 for _, provider in rows if provider.health.healthy), + "providers_unhealthy": sum(1 for _, provider in rows if not provider.health.healthy), + } + + def _estimate_request_dimensions(body: dict[str, Any]) -> dict[str, int | str]: """Return lightweight request-dimension estimates for debugging and routing preview.""" messages = body.get("messages", []) @@ -831,12 +853,21 @@ async def update_status(request: Request, force: bool = False): """Return cached or fresh release update metadata.""" headers = _collect_routing_headers(request) status = await _update_checker.get_status(force=force) + rollout_summary = _rollout_provider_summary((status.auto_update or {}).get("provider_scope")) status.auto_update = apply_auto_update_guardrails( status.auto_update or {}, - providers_healthy=_health_summary()["providers_healthy"], - providers_unhealthy=_health_summary()["providers_unhealthy"], + providers_total=rollout_summary["providers_total"], + providers_healthy=rollout_summary["providers_healthy"], + providers_unhealthy=rollout_summary["providers_unhealthy"], ) status.auto_update = apply_maintenance_window_guardrail(status.auto_update or {}) + status.auto_update.setdefault("provider_scope", {}) + status.auto_update["provider_scope"]["matched_providers"] = rollout_summary["providers"] + status.auto_update["provider_scope"]["summary"] = { + "providers_total": rollout_summary["providers_total"], + "providers_healthy": rollout_summary["providers_healthy"], + "providers_unhealthy": rollout_summary["providers_unhealthy"], + } operator_action, client_tag = _collect_operator_context(headers) auto_update = status.auto_update or {} _metrics.log_operator_event( diff --git a/foundrygate/updates.py b/foundrygate/updates.py index 5129bfa..81e44e1 100644 --- a/foundrygate/updates.py +++ b/foundrygate/updates.py @@ -119,6 +119,7 @@ def release_age_hours(published_at: str, *, now: datetime | None = None) -> floa def apply_auto_update_guardrails( auto_update: dict[str, Any], *, + providers_total: int, providers_healthy: int, providers_unhealthy: int, ) -> dict[str, Any]: @@ -133,6 +134,11 @@ def apply_auto_update_guardrails( if not require_healthy_providers: return result + if providers_total <= 0: + result["eligible"] = False + result["blocked_reason"] = "No providers match rollout provider scope" + return result + if providers_healthy <= 0: result["eligible"] = False result["blocked_reason"] = "No healthy providers available" @@ -304,6 +310,7 @@ def __init__( ), "max_unhealthy_providers": int((auto_update or {}).get("max_unhealthy_providers", 0)), "min_release_age_hours": int((auto_update or {}).get("min_release_age_hours", 0)), + "provider_scope": dict((auto_update or {}).get("provider_scope") or {}), "maintenance_window": dict((auto_update or {}).get("maintenance_window") or {}), "apply_command": str((auto_update or {}).get("apply_command", "foundrygate-update")), } @@ -365,6 +372,7 @@ def _auto_update_status( ), "max_unhealthy_providers": int(self.auto_update.get("max_unhealthy_providers", 0)), "min_release_age_hours": int(self.auto_update.get("min_release_age_hours", 0)), + "provider_scope": dict(self.auto_update.get("provider_scope") or {}), "maintenance_window": dict(self.auto_update.get("maintenance_window") or {}), "eligible": eligible, "blocked_reason": blocked_reason, diff --git a/tests/test_config.py b/tests/test_config.py index 8c1ce91..68ef849 100644 --- a/tests/test_config.py +++ b/tests/test_config.py @@ -91,6 +91,10 @@ def test_auto_update_defaults_are_exposed(): assert cfg.auto_update["require_healthy_providers"] is True assert cfg.auto_update["max_unhealthy_providers"] == 0 assert cfg.auto_update["min_release_age_hours"] == 0 + assert cfg.auto_update["provider_scope"] == { + "allow_providers": [], + "deny_providers": ["openrouter-fallback"], + } assert cfg.auto_update["maintenance_window"]["enabled"] is False assert cfg.auto_update["maintenance_window"]["timezone"] == "UTC" assert cfg.auto_update["maintenance_window"]["days"] == ["sat", "sun"] diff --git a/tests/test_updates.py b/tests/test_updates.py index a90d777..586ad12 100644 --- a/tests/test_updates.py +++ b/tests/test_updates.py @@ -114,6 +114,7 @@ def test_auto_update_guardrails_block_when_too_many_providers_are_unhealthy(): "max_unhealthy_providers": 0, "blocked_reason": "", }, + providers_total=2, providers_healthy=1, providers_unhealthy=1, ) @@ -131,6 +132,7 @@ def test_auto_update_guardrails_allow_updates_when_health_budget_is_met(): "max_unhealthy_providers": 1, "blocked_reason": "", }, + providers_total=3, providers_healthy=2, providers_unhealthy=1, ) @@ -147,6 +149,7 @@ def test_auto_update_guardrails_block_when_no_provider_is_healthy(): "max_unhealthy_providers": 2, "blocked_reason": "", }, + providers_total=2, providers_healthy=0, providers_unhealthy=2, ) @@ -155,6 +158,24 @@ def test_auto_update_guardrails_block_when_no_provider_is_healthy(): assert guarded["blocked_reason"] == "No healthy providers available" +def test_auto_update_guardrails_block_when_provider_scope_matches_nothing(): + guarded = apply_auto_update_guardrails( + { + "enabled": True, + "eligible": True, + "require_healthy_providers": True, + "max_unhealthy_providers": 0, + "blocked_reason": "", + }, + providers_total=0, + providers_healthy=0, + providers_unhealthy=0, + ) + + assert guarded["eligible"] is False + assert guarded["blocked_reason"] == "No providers match rollout provider scope" + + def test_maintenance_window_guardrail_allows_updates_when_window_is_disabled(): guarded = apply_maintenance_window_guardrail( { @@ -271,7 +292,11 @@ async def test_update_checker_reports_latest_release(): current_version="0.4.0", enabled=True, repository="typelicious/FoundryGate", - auto_update={"enabled": True, "allow_major": False}, + auto_update={ + "enabled": True, + "allow_major": False, + "provider_scope": {"allow_providers": ["deepseek-chat"], "deny_providers": []}, + }, ) checker._client = _FakeClient( _FakeResponse( @@ -295,6 +320,10 @@ async def test_update_checker_reports_latest_release(): assert status.auto_update["eligible"] is True assert status.release_channel == "stable" assert status.auto_update["allowed_update_types"] == ["patch", "minor"] + assert status.auto_update["provider_scope"] == { + "allow_providers": ["deepseek-chat"], + "deny_providers": [], + } assert status.release_url.endswith("/v0.5.0") From 29a47a12e36cbaa49ce98b658f3639f4c7a773ce Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andre=CC=81=20Lange?= Date: Thu, 12 Mar 2026 19:51:20 +0100 Subject: [PATCH 2/2] fix(ci): restore python 3.10 compatibility --- foundrygate/updates.py | 6 +++--- tests/test_updates.py | 20 +++++++++++--------- 2 files changed, 14 insertions(+), 12 deletions(-) diff --git a/foundrygate/updates.py b/foundrygate/updates.py index 81e44e1..392c678 100644 --- a/foundrygate/updates.py +++ b/foundrygate/updates.py @@ -4,7 +4,7 @@ import time from dataclasses import dataclass -from datetime import UTC, datetime +from datetime import datetime, timezone from typing import Any from zoneinfo import ZoneInfo, ZoneInfoNotFoundError @@ -112,7 +112,7 @@ def release_age_hours(published_at: str, *, now: datetime | None = None) -> floa published = datetime.fromisoformat(published_at.replace("Z", "+00:00")) except ValueError: return None - current = now or datetime.now(UTC) + current = now or datetime.now(timezone.utc) return max(0.0, (current - published).total_seconds() / 3600) @@ -209,7 +209,7 @@ def apply_maintenance_window_guardrail( result["maintenance_window"] = window return result - current = (now or datetime.now(UTC)).astimezone(zone) + current = (now or datetime.now(timezone.utc)).astimezone(zone) day_name = current.strftime("%a").lower()[:3] allowed_days = list(window.get("days") or []) start_hour = int(window.get("start_hour", 0)) diff --git a/tests/test_updates.py b/tests/test_updates.py index 586ad12..5ca2ee9 100644 --- a/tests/test_updates.py +++ b/tests/test_updates.py @@ -2,7 +2,7 @@ from __future__ import annotations -from datetime import UTC, datetime, timedelta +from datetime import datetime, timedelta, timezone import pytest @@ -86,7 +86,7 @@ def test_select_release_payload_uses_first_preview_release(): def test_release_age_hours_reports_elapsed_time(): - now = datetime(2026, 3, 12, 18, 0, tzinfo=UTC) + now = datetime(2026, 3, 12, 18, 0, tzinfo=timezone.utc) published = (now - timedelta(hours=6)).isoformat().replace("+00:00", "Z") assert release_age_hours(published, now=now) == 6.0 @@ -99,7 +99,9 @@ def test_release_age_guardrail_blocks_new_releases(): "min_release_age_hours": 24, "blocked_reason": "", }, - published_at=(datetime.now(UTC) - timedelta(hours=2)).isoformat().replace("+00:00", "Z"), + published_at=(datetime.now(timezone.utc) - timedelta(hours=2)) + .isoformat() + .replace("+00:00", "Z"), ) assert guarded["eligible"] is False assert guarded["blocked_reason"].startswith("Release is too new") @@ -190,7 +192,7 @@ def test_maintenance_window_guardrail_allows_updates_when_window_is_disabled(): "end_hour": 24, }, }, - now=datetime(2026, 3, 12, 12, 0, tzinfo=UTC), + now=datetime(2026, 3, 12, 12, 0, tzinfo=timezone.utc), ) assert guarded["eligible"] is True @@ -211,7 +213,7 @@ def test_maintenance_window_guardrail_blocks_outside_allowed_days(): "end_hour": 24, }, }, - now=datetime(2026, 3, 12, 12, 0, tzinfo=UTC), + now=datetime(2026, 3, 12, 12, 0, tzinfo=timezone.utc), ) assert guarded["eligible"] is False @@ -233,7 +235,7 @@ def test_maintenance_window_guardrail_blocks_outside_allowed_hours(): "end_hour": 5, }, }, - now=datetime(2026, 3, 12, 12, 0, tzinfo=UTC), + now=datetime(2026, 3, 12, 12, 0, tzinfo=timezone.utc), ) assert guarded["eligible"] is False @@ -255,7 +257,7 @@ def test_maintenance_window_guardrail_allows_inside_matching_window(): "end_hour": 14, }, }, - now=datetime(2026, 3, 12, 12, 0, tzinfo=UTC), + now=datetime(2026, 3, 12, 12, 0, tzinfo=timezone.utc), ) assert guarded["eligible"] is True @@ -278,7 +280,7 @@ def test_maintenance_window_guardrail_blocks_unknown_timezone(): "end_hour": 24, }, }, - now=datetime(2026, 3, 12, 12, 0, tzinfo=UTC), + now=datetime(2026, 3, 12, 12, 0, tzinfo=timezone.utc), ) assert guarded["eligible"] is False @@ -479,7 +481,7 @@ async def test_min_release_age_blocks_auto_update_until_release_has_aged(): { "tag_name": "v0.6.1", "html_url": "https://github.com/typelicious/FoundryGate/releases/tag/v0.6.1", - "published_at": (datetime.now(UTC) - timedelta(hours=1)) + "published_at": (datetime.now(timezone.utc) - timedelta(hours=1)) .isoformat() .replace("+00:00", "Z"), },