Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ The format is intentionally lightweight and human-readable. Group entries by rel
- Added `update_check.release_channel` and `auto_update.rollout_ring` so operators can distinguish stable vs preview checks and tighter rollout rings
- Added `auto_update.min_release_age_hours` so helper-driven auto-updates can wait for a release to age before becoming eligible
- Added `auto_update.maintenance_window` so helper-driven auto-updates can stay inside explicit local maintenance hours
- Added `auto_update.provider_scope` so rollout-health guardrails can evaluate only a selected provider subset

## v0.6.0 - 2026-03-12

Expand Down
5 changes: 5 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -549,6 +549,7 @@ Supported fields in `auto_update`:
- `require_healthy_providers`
- `max_unhealthy_providers`
- `min_release_age_hours`
- `provider_scope`
- `maintenance_window`
- `apply_command`

Expand All @@ -562,6 +563,9 @@ auto_update:
require_healthy_providers: true
max_unhealthy_providers: 0
min_release_age_hours: 24
provider_scope:
allow_providers: ["local-worker", "deepseek-chat"]
deny_providers: ["openrouter-fallback"]
maintenance_window:
enabled: true
timezone: "Europe/Berlin"
Expand All @@ -577,6 +581,7 @@ What the current runtime does with it:
- shows the same state in the dashboard
- lets `foundrygate-auto-update --apply` run only when the current release state is eligible
- can block helper-driven rollout when provider health is already degraded
- can scope rollout-health checks to a specific provider subset instead of the whole runtime
- lets operators separate `stable` vs `preview` release checks and `stable` / `early` / `canary` rollout rings
- can require that a release has aged for a minimum number of hours before helper-driven rollout
- can restrict helper-driven rollout to explicit local maintenance windows
Expand Down
3 changes: 3 additions & 0 deletions config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -893,6 +893,9 @@ auto_update:
require_healthy_providers: true
max_unhealthy_providers: 0
min_release_age_hours: 0
provider_scope:
allow_providers: []
deny_providers: ["openrouter-fallback"]
maintenance_window:
enabled: false
timezone: "UTC"
Expand Down
1 change: 1 addition & 0 deletions docs/PUBLISHING.md
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,7 @@ If you want scheduled update application:
- keep `allow_major: false` unless you are ready to absorb breaking changes automatically
- keep `require_healthy_providers: true` unless you are intentionally allowing rollouts while the gateway is degraded
- set `min_release_age_hours` above `0` if you want scheduled rollouts to wait before applying newly published releases
- use `provider_scope.allow_providers` / `deny_providers` if rollout health should only consider a subset of providers
- add `maintenance_window` if scheduled updates should only run in explicit local maintenance hours
- prefer the reviewed examples in [examples/foundrygate-auto-update.service](./examples/foundrygate-auto-update.service) and [examples/foundrygate-auto-update.timer](./examples/foundrygate-auto-update.timer)
- use the cron example in [examples/foundrygate-auto-update.cron](./examples/foundrygate-auto-update.cron) only when `systemd` timers are not practical
Expand Down
1 change: 1 addition & 0 deletions docs/TROUBLESHOOTING.md
Original file line number Diff line number Diff line change
Expand Up @@ -181,6 +181,7 @@ If `foundrygate-auto-update --apply` refuses to run, inspect the `auto_update` b

- `auto_update.enabled: false`
- the latest release is a major upgrade while `allow_major: false`
- `provider_scope.allow_providers` / `deny_providers` resolves to no matching providers
- one or more providers are unhealthy while `require_healthy_providers: true`
- the number of unhealthy providers exceeds `max_unhealthy_providers`
- the current time is outside the configured `maintenance_window.days` or `maintenance_window.start_hour` / `end_hour`
Expand Down
46 changes: 46 additions & 0 deletions foundrygate/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -918,6 +918,44 @@ def _normalize_auto_update(data: dict[str, Any]) -> dict[str, Any]:
if min_release_age_hours < 0:
raise ConfigError("'auto_update.min_release_age_hours' must be non-negative")

provider_scope = raw.get("provider_scope", {})
if provider_scope is None:
provider_scope = {}
if not isinstance(provider_scope, dict):
raise ConfigError("'auto_update.provider_scope' must be a mapping")

provider_names = set((data.get("providers") or {}).keys())
allow_providers = _normalize_string_list(
provider_scope.get("allow_providers", []),
field_name="allow_providers",
rule_name="auto_update.provider_scope",
allow_empty=True,
)
deny_providers = _normalize_string_list(
provider_scope.get("deny_providers", []),
field_name="deny_providers",
rule_name="auto_update.provider_scope",
allow_empty=True,
)
unknown_allowed = sorted(name for name in allow_providers if name not in provider_names)
if unknown_allowed:
raise ConfigError(
"'auto_update.provider_scope.allow_providers' references unknown providers: "
+ ", ".join(unknown_allowed)
)
unknown_denied = sorted(name for name in deny_providers if name not in provider_names)
if unknown_denied:
raise ConfigError(
"'auto_update.provider_scope.deny_providers' references unknown providers: "
+ ", ".join(unknown_denied)
)
overlap = sorted(set(allow_providers) & set(deny_providers))
if overlap:
raise ConfigError(
"'auto_update.provider_scope' cannot allow and deny the same providers: "
+ ", ".join(overlap)
)

maintenance_window = raw.get("maintenance_window", {})
if maintenance_window is None:
maintenance_window = {}
Expand Down Expand Up @@ -969,6 +1007,10 @@ def _normalize_auto_update(data: dict[str, Any]) -> dict[str, Any]:
"require_healthy_providers": require_healthy_providers,
"max_unhealthy_providers": max_unhealthy_providers,
"min_release_age_hours": min_release_age_hours,
"provider_scope": {
"allow_providers": allow_providers,
"deny_providers": deny_providers,
},
"maintenance_window": {
"enabled": window_enabled,
"timezone": timezone.strip(),
Expand Down Expand Up @@ -1070,6 +1112,10 @@ def auto_update(self) -> dict:
"require_healthy_providers": True,
"max_unhealthy_providers": 0,
"min_release_age_hours": 0,
"provider_scope": {
"allow_providers": [],
"deny_providers": [],
},
"maintenance_window": {
"enabled": False,
"timezone": "UTC",
Expand Down
35 changes: 33 additions & 2 deletions foundrygate/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -263,6 +263,28 @@ def _health_summary() -> dict[str, int]:
}


def _rollout_provider_summary(provider_scope: dict[str, Any] | None) -> dict[str, Any]:
"""Return provider-health totals for the configured rollout scope."""
scope = dict(provider_scope or {})
allow = set(scope.get("allow_providers") or [])
deny = set(scope.get("deny_providers") or [])

rows = []
for name, provider in _providers.items():
if allow and name not in allow:
continue
if name in deny:
continue
rows.append((name, provider))

return {
"providers": [name for name, _ in rows],
"providers_total": len(rows),
"providers_healthy": sum(1 for _, provider in rows if provider.health.healthy),
"providers_unhealthy": sum(1 for _, provider in rows if not provider.health.healthy),
}


def _estimate_request_dimensions(body: dict[str, Any]) -> dict[str, int | str]:
"""Return lightweight request-dimension estimates for debugging and routing preview."""
messages = body.get("messages", [])
Expand Down Expand Up @@ -831,12 +853,21 @@ async def update_status(request: Request, force: bool = False):
"""Return cached or fresh release update metadata."""
headers = _collect_routing_headers(request)
status = await _update_checker.get_status(force=force)
rollout_summary = _rollout_provider_summary((status.auto_update or {}).get("provider_scope"))
status.auto_update = apply_auto_update_guardrails(
status.auto_update or {},
providers_healthy=_health_summary()["providers_healthy"],
providers_unhealthy=_health_summary()["providers_unhealthy"],
providers_total=rollout_summary["providers_total"],
providers_healthy=rollout_summary["providers_healthy"],
providers_unhealthy=rollout_summary["providers_unhealthy"],
)
status.auto_update = apply_maintenance_window_guardrail(status.auto_update or {})
status.auto_update.setdefault("provider_scope", {})
status.auto_update["provider_scope"]["matched_providers"] = rollout_summary["providers"]
status.auto_update["provider_scope"]["summary"] = {
"providers_total": rollout_summary["providers_total"],
"providers_healthy": rollout_summary["providers_healthy"],
"providers_unhealthy": rollout_summary["providers_unhealthy"],
}
operator_action, client_tag = _collect_operator_context(headers)
auto_update = status.auto_update or {}
_metrics.log_operator_event(
Expand Down
8 changes: 8 additions & 0 deletions foundrygate/updates.py
Original file line number Diff line number Diff line change
Expand Up @@ -119,6 +119,7 @@ def release_age_hours(published_at: str, *, now: datetime | None = None) -> floa
def apply_auto_update_guardrails(
auto_update: dict[str, Any],
*,
providers_total: int,
providers_healthy: int,
providers_unhealthy: int,
) -> dict[str, Any]:
Expand All @@ -133,6 +134,11 @@ def apply_auto_update_guardrails(
if not require_healthy_providers:
return result

if providers_total <= 0:
result["eligible"] = False
result["blocked_reason"] = "No providers match rollout provider scope"
return result

if providers_healthy <= 0:
result["eligible"] = False
result["blocked_reason"] = "No healthy providers available"
Expand Down Expand Up @@ -304,6 +310,7 @@ def __init__(
),
"max_unhealthy_providers": int((auto_update or {}).get("max_unhealthy_providers", 0)),
"min_release_age_hours": int((auto_update or {}).get("min_release_age_hours", 0)),
"provider_scope": dict((auto_update or {}).get("provider_scope") or {}),
"maintenance_window": dict((auto_update or {}).get("maintenance_window") or {}),
"apply_command": str((auto_update or {}).get("apply_command", "foundrygate-update")),
}
Expand Down Expand Up @@ -365,6 +372,7 @@ def _auto_update_status(
),
"max_unhealthy_providers": int(self.auto_update.get("max_unhealthy_providers", 0)),
"min_release_age_hours": int(self.auto_update.get("min_release_age_hours", 0)),
"provider_scope": dict(self.auto_update.get("provider_scope") or {}),
"maintenance_window": dict(self.auto_update.get("maintenance_window") or {}),
"eligible": eligible,
"blocked_reason": blocked_reason,
Expand Down
4 changes: 4 additions & 0 deletions tests/test_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,10 @@ def test_auto_update_defaults_are_exposed():
assert cfg.auto_update["require_healthy_providers"] is True
assert cfg.auto_update["max_unhealthy_providers"] == 0
assert cfg.auto_update["min_release_age_hours"] == 0
assert cfg.auto_update["provider_scope"] == {
"allow_providers": [],
"deny_providers": ["openrouter-fallback"],
}
assert cfg.auto_update["maintenance_window"]["enabled"] is False
assert cfg.auto_update["maintenance_window"]["timezone"] == "UTC"
assert cfg.auto_update["maintenance_window"]["days"] == ["sat", "sun"]
Expand Down
31 changes: 30 additions & 1 deletion tests/test_updates.py
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,7 @@ def test_auto_update_guardrails_block_when_too_many_providers_are_unhealthy():
"max_unhealthy_providers": 0,
"blocked_reason": "",
},
providers_total=2,
providers_healthy=1,
providers_unhealthy=1,
)
Expand All @@ -133,6 +134,7 @@ def test_auto_update_guardrails_allow_updates_when_health_budget_is_met():
"max_unhealthy_providers": 1,
"blocked_reason": "",
},
providers_total=3,
providers_healthy=2,
providers_unhealthy=1,
)
Expand All @@ -149,6 +151,7 @@ def test_auto_update_guardrails_block_when_no_provider_is_healthy():
"max_unhealthy_providers": 2,
"blocked_reason": "",
},
providers_total=2,
providers_healthy=0,
providers_unhealthy=2,
)
Expand All @@ -157,6 +160,24 @@ def test_auto_update_guardrails_block_when_no_provider_is_healthy():
assert guarded["blocked_reason"] == "No healthy providers available"


def test_auto_update_guardrails_block_when_provider_scope_matches_nothing():
guarded = apply_auto_update_guardrails(
{
"enabled": True,
"eligible": True,
"require_healthy_providers": True,
"max_unhealthy_providers": 0,
"blocked_reason": "",
},
providers_total=0,
providers_healthy=0,
providers_unhealthy=0,
)

assert guarded["eligible"] is False
assert guarded["blocked_reason"] == "No providers match rollout provider scope"


def test_maintenance_window_guardrail_allows_updates_when_window_is_disabled():
guarded = apply_maintenance_window_guardrail(
{
Expand Down Expand Up @@ -273,7 +294,11 @@ async def test_update_checker_reports_latest_release():
current_version="0.4.0",
enabled=True,
repository="typelicious/FoundryGate",
auto_update={"enabled": True, "allow_major": False},
auto_update={
"enabled": True,
"allow_major": False,
"provider_scope": {"allow_providers": ["deepseek-chat"], "deny_providers": []},
},
)
checker._client = _FakeClient(
_FakeResponse(
Expand All @@ -297,6 +322,10 @@ async def test_update_checker_reports_latest_release():
assert status.auto_update["eligible"] is True
assert status.release_channel == "stable"
assert status.auto_update["allowed_update_types"] == ["patch", "minor"]
assert status.auto_update["provider_scope"] == {
"allow_providers": ["deepseek-chat"],
"deny_providers": [],
}
assert status.release_url.endswith("/v0.5.0")


Expand Down
Loading