diff --git a/CHANGELOG.md b/CHANGELOG.md index 6fcc7d2..989d825 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -15,6 +15,7 @@ The format is intentionally lightweight and human-readable. Group entries by rel - Added provider-health rollout guardrails so helper-driven auto-updates can block when gateway health is already degraded - Added `update_check.release_channel` and `auto_update.rollout_ring` so operators can distinguish stable vs preview checks and tighter rollout rings - Added `auto_update.min_release_age_hours` so helper-driven auto-updates can wait for a release to age before becoming eligible +- Added `auto_update.maintenance_window` so helper-driven auto-updates can stay inside explicit local maintenance hours ## v0.6.0 - 2026-03-12 diff --git a/README.md b/README.md index df43a53..f93b8ff 100644 --- a/README.md +++ b/README.md @@ -549,6 +549,7 @@ Supported fields in `auto_update`: - `require_healthy_providers` - `max_unhealthy_providers` - `min_release_age_hours` +- `maintenance_window` - `apply_command` Example: @@ -561,6 +562,12 @@ auto_update: require_healthy_providers: true max_unhealthy_providers: 0 min_release_age_hours: 24 + maintenance_window: + enabled: true + timezone: "Europe/Berlin" + days: ["sat", "sun"] + start_hour: 2 + end_hour: 5 apply_command: "foundrygate-update" ``` @@ -572,6 +579,7 @@ What the current runtime does with it: - can block helper-driven rollout when provider health is already degraded - lets operators separate `stable` vs `preview` release checks and `stable` / `early` / `canary` rollout rings - can require that a release has aged for a minimum number of hours before helper-driven rollout +- can restrict helper-driven rollout to explicit local maintenance windows What it still does not do: @@ -848,6 +856,7 @@ What it does not do: - it does not download releases - it does not modify the checkout - it does not auto-update the service unless an operator explicitly wires `foundrygate-auto-update --apply` into their own scheduler +- it does not bypass maintenance windows, release-age gates, rollout rings, or provider-health guardrails Manual check: diff --git a/config.yaml b/config.yaml index d77f509..9441c2c 100644 --- a/config.yaml +++ b/config.yaml @@ -893,6 +893,12 @@ auto_update: require_healthy_providers: true max_unhealthy_providers: 0 min_release_age_hours: 0 + maintenance_window: + enabled: false + timezone: "UTC" + days: ["sat", "sun"] + start_hour: 2 + end_hour: 5 apply_command: "foundrygate-update" diff --git a/docs/PUBLISHING.md b/docs/PUBLISHING.md index 29badf1..d553291 100644 --- a/docs/PUBLISHING.md +++ b/docs/PUBLISHING.md @@ -66,6 +66,7 @@ If you want scheduled update application: - keep `allow_major: false` unless you are ready to absorb breaking changes automatically - keep `require_healthy_providers: true` unless you are intentionally allowing rollouts while the gateway is degraded - set `min_release_age_hours` above `0` if you want scheduled rollouts to wait before applying newly published releases +- add `maintenance_window` if scheduled updates should only run in explicit local maintenance hours - prefer the reviewed examples in [examples/foundrygate-auto-update.service](./examples/foundrygate-auto-update.service) and [examples/foundrygate-auto-update.timer](./examples/foundrygate-auto-update.timer) - use the cron example in [examples/foundrygate-auto-update.cron](./examples/foundrygate-auto-update.cron) only when `systemd` timers are not practical diff --git a/docs/TROUBLESHOOTING.md b/docs/TROUBLESHOOTING.md index cd66ab5..da3c230 100644 --- a/docs/TROUBLESHOOTING.md +++ b/docs/TROUBLESHOOTING.md @@ -183,4 +183,6 @@ If `foundrygate-auto-update --apply` refuses to run, inspect the `auto_update` b - the latest release is a major upgrade while `allow_major: false` - one or more providers are unhealthy while `require_healthy_providers: true` - the number of unhealthy providers exceeds `max_unhealthy_providers` +- the current time is outside the configured `maintenance_window.days` or `maintenance_window.start_hour` / `end_hour` +- `maintenance_window.timezone` is invalid for the host runtime - the release lookup itself is unavailable diff --git a/foundrygate/config.py b/foundrygate/config.py index c2136a2..81b0d99 100644 --- a/foundrygate/config.py +++ b/foundrygate/config.py @@ -63,6 +63,7 @@ _CLIENT_PROFILE_MATCH_KEYS = {"header_contains", "header_present", "any", "all"} _SUPPORTED_CLIENT_PROFILE_PRESETS = {"openclaw", "n8n", "cli"} _SUPPORTED_REQUEST_HOOKS = set(get_registered_request_hooks()) +_SUPPORTED_WINDOW_DAYS = {"mon", "tue", "wed", "thu", "fri", "sat", "sun"} _CLIENT_PROFILE_PRESET_SPECS: dict[str, dict[str, Any]] = { "openclaw": { @@ -917,6 +918,45 @@ def _normalize_auto_update(data: dict[str, Any]) -> dict[str, Any]: if min_release_age_hours < 0: raise ConfigError("'auto_update.min_release_age_hours' must be non-negative") + maintenance_window = raw.get("maintenance_window", {}) + if maintenance_window is None: + maintenance_window = {} + if not isinstance(maintenance_window, dict): + raise ConfigError("'auto_update.maintenance_window' must be a mapping") + + window_enabled = maintenance_window.get("enabled", False) + if not isinstance(window_enabled, bool): + raise ConfigError("'auto_update.maintenance_window.enabled' must be a boolean") + + timezone = maintenance_window.get("timezone", "UTC") + if not isinstance(timezone, str) or not timezone.strip(): + raise ConfigError("'auto_update.maintenance_window.timezone' must be a non-empty string") + + days = _normalize_string_list( + maintenance_window.get("days", []), + field_name="days", + rule_name="auto_update.maintenance_window", + allow_empty=True, + ) + unknown_days = sorted(set(days) - _SUPPORTED_WINDOW_DAYS) + if unknown_days: + raise ConfigError( + "'auto_update.maintenance_window.days' has unknown weekday values: " + + ", ".join(unknown_days) + ) + + start_hour = maintenance_window.get("start_hour", 0) + end_hour = maintenance_window.get("end_hour", 24) + for key, value in {"start_hour": start_hour, "end_hour": end_hour}.items(): + if isinstance(value, bool) or not isinstance(value, int): + raise ConfigError(f"'auto_update.maintenance_window.{key}' must be an integer") + if not 0 <= start_hour <= 23: + raise ConfigError("'auto_update.maintenance_window.start_hour' must be between 0 and 23") + if not 1 <= end_hour <= 24: + raise ConfigError("'auto_update.maintenance_window.end_hour' must be between 1 and 24") + if start_hour == end_hour: + raise ConfigError("'auto_update.maintenance_window' must not use the same start/end hour") + apply_command = raw.get("apply_command", "foundrygate-update") if not isinstance(apply_command, str) or not apply_command.strip(): raise ConfigError("'auto_update.apply_command' must be a non-empty string") @@ -929,6 +969,13 @@ def _normalize_auto_update(data: dict[str, Any]) -> dict[str, Any]: "require_healthy_providers": require_healthy_providers, "max_unhealthy_providers": max_unhealthy_providers, "min_release_age_hours": min_release_age_hours, + "maintenance_window": { + "enabled": window_enabled, + "timezone": timezone.strip(), + "days": days, + "start_hour": start_hour, + "end_hour": end_hour, + }, "apply_command": apply_command.strip(), } return normalized @@ -1023,6 +1070,13 @@ def auto_update(self) -> dict: "require_healthy_providers": True, "max_unhealthy_providers": 0, "min_release_age_hours": 0, + "maintenance_window": { + "enabled": False, + "timezone": "UTC", + "days": [], + "start_hour": 0, + "end_hour": 24, + }, "apply_command": "foundrygate-update", }, ) diff --git a/foundrygate/main.py b/foundrygate/main.py index e8392a0..c3485c5 100644 --- a/foundrygate/main.py +++ b/foundrygate/main.py @@ -23,7 +23,11 @@ from .metrics import MetricsStore, calc_cost from .providers import ProviderBackend, ProviderError from .router import Router, RoutingDecision -from .updates import UpdateChecker, apply_auto_update_guardrails +from .updates import ( + UpdateChecker, + apply_auto_update_guardrails, + apply_maintenance_window_guardrail, +) logger = logging.getLogger("foundrygate") @@ -832,6 +836,7 @@ async def update_status(request: Request, force: bool = False): providers_healthy=_health_summary()["providers_healthy"], providers_unhealthy=_health_summary()["providers_unhealthy"], ) + status.auto_update = apply_maintenance_window_guardrail(status.auto_update or {}) operator_action, client_tag = _collect_operator_context(headers) auto_update = status.auto_update or {} _metrics.log_operator_event( diff --git a/foundrygate/updates.py b/foundrygate/updates.py index c69a2a6..1a51482 100644 --- a/foundrygate/updates.py +++ b/foundrygate/updates.py @@ -4,8 +4,9 @@ import time from dataclasses import dataclass -from datetime import UTC, datetime +from datetime import datetime, timezone from typing import Any +from zoneinfo import ZoneInfo, ZoneInfoNotFoundError import httpx @@ -111,7 +112,7 @@ def release_age_hours(published_at: str, *, now: datetime | None = None) -> floa published = datetime.fromisoformat(published_at.replace("Z", "+00:00")) except ValueError: return None - current = now or datetime.now(UTC) + current = now or datetime.now(timezone.utc) return max(0.0, (current - published).total_seconds() / 3600) @@ -175,6 +176,63 @@ def apply_release_age_guardrail( return result +def apply_maintenance_window_guardrail( + auto_update: dict[str, Any], + *, + now: datetime | None = None, +) -> dict[str, Any]: + """Apply an optional maintenance-window guardrail to one auto-update status.""" + result = dict(auto_update or {}) + window = dict(result.get("maintenance_window") or {}) + if not result.get("enabled") or not result.get("eligible"): + result["maintenance_window"] = window + return result + + if not window.get("enabled", False): + window["open"] = True + result["maintenance_window"] = window + return result + + timezone_name = str(window.get("timezone", "UTC")) + try: + zone = ZoneInfo(timezone_name) + except ZoneInfoNotFoundError: + result["eligible"] = False + result["blocked_reason"] = f"Unknown maintenance-window timezone '{timezone_name}'" + window["open"] = False + result["maintenance_window"] = window + return result + + current = (now or datetime.now(timezone.utc)).astimezone(zone) + day_name = current.strftime("%a").lower()[:3] + allowed_days = list(window.get("days") or []) + start_hour = int(window.get("start_hour", 0)) + end_hour = int(window.get("end_hour", 24)) + current_hour = current.hour + + day_allowed = not allowed_days or day_name in allowed_days + if start_hour < end_hour: + hour_allowed = start_hour <= current_hour < end_hour + else: + hour_allowed = current_hour >= start_hour or current_hour < end_hour + + window["open"] = bool(day_allowed and hour_allowed) + window["current_day"] = day_name + window["current_hour"] = current_hour + result["maintenance_window"] = window + + if not day_allowed: + result["eligible"] = False + result["blocked_reason"] = f"Outside maintenance days ({day_name})" + return result + if not hour_allowed: + result["eligible"] = False + result["blocked_reason"] = ( + f"Outside maintenance window ({start_hour:02d}:00-{end_hour:02d}:00 {timezone_name})" + ) + return result + + @dataclass class UpdateStatus: """Structured update-check result.""" @@ -246,6 +304,7 @@ def __init__( ), "max_unhealthy_providers": int((auto_update or {}).get("max_unhealthy_providers", 0)), "min_release_age_hours": int((auto_update or {}).get("min_release_age_hours", 0)), + "maintenance_window": dict((auto_update or {}).get("maintenance_window") or {}), "apply_command": str((auto_update or {}).get("apply_command", "foundrygate-update")), } self._cached = UpdateStatus( @@ -306,6 +365,7 @@ def _auto_update_status( ), "max_unhealthy_providers": int(self.auto_update.get("max_unhealthy_providers", 0)), "min_release_age_hours": int(self.auto_update.get("min_release_age_hours", 0)), + "maintenance_window": dict(self.auto_update.get("maintenance_window") or {}), "eligible": eligible, "blocked_reason": blocked_reason, "apply_command": apply_command, diff --git a/tests/test_config.py b/tests/test_config.py index 3fa8dd3..8c1ce91 100644 --- a/tests/test_config.py +++ b/tests/test_config.py @@ -91,6 +91,11 @@ def test_auto_update_defaults_are_exposed(): assert cfg.auto_update["require_healthy_providers"] is True assert cfg.auto_update["max_unhealthy_providers"] == 0 assert cfg.auto_update["min_release_age_hours"] == 0 + assert cfg.auto_update["maintenance_window"]["enabled"] is False + assert cfg.auto_update["maintenance_window"]["timezone"] == "UTC" + assert cfg.auto_update["maintenance_window"]["days"] == ["sat", "sun"] + assert cfg.auto_update["maintenance_window"]["start_hour"] == 2 + assert cfg.auto_update["maintenance_window"]["end_hour"] == 5 assert cfg.auto_update["apply_command"] == "foundrygate-update" diff --git a/tests/test_updates.py b/tests/test_updates.py index 406d5c7..231ef22 100644 --- a/tests/test_updates.py +++ b/tests/test_updates.py @@ -2,7 +2,7 @@ from __future__ import annotations -from datetime import UTC, datetime, timedelta +from datetime import datetime, timedelta, timezone import pytest @@ -11,6 +11,7 @@ alert_level_for_update, allowed_update_types_for_ring, apply_auto_update_guardrails, + apply_maintenance_window_guardrail, apply_release_age_guardrail, classify_update, is_update_available, @@ -85,7 +86,7 @@ def test_select_release_payload_uses_first_preview_release(): def test_release_age_hours_reports_elapsed_time(): - now = datetime(2026, 3, 12, 18, 0, tzinfo=UTC) + now = datetime(2026, 3, 12, 18, 0, tzinfo=timezone.utc) published = (now - timedelta(hours=6)).isoformat().replace("+00:00", "Z") assert release_age_hours(published, now=now) == 6.0 @@ -98,7 +99,9 @@ def test_release_age_guardrail_blocks_new_releases(): "min_release_age_hours": 24, "blocked_reason": "", }, - published_at=(datetime.now(UTC) - timedelta(hours=2)).isoformat().replace("+00:00", "Z"), + published_at=(datetime.now(timezone.utc) - timedelta(hours=2)) + .isoformat() + .replace("+00:00", "Z"), ) assert guarded["eligible"] is False assert guarded["blocked_reason"].startswith("Release is too new") @@ -154,6 +157,116 @@ def test_auto_update_guardrails_block_when_no_provider_is_healthy(): assert guarded["blocked_reason"] == "No healthy providers available" +def test_maintenance_window_guardrail_allows_updates_when_window_is_disabled(): + guarded = apply_maintenance_window_guardrail( + { + "enabled": True, + "eligible": True, + "blocked_reason": "", + "maintenance_window": { + "enabled": False, + "timezone": "UTC", + "days": [], + "start_hour": 0, + "end_hour": 24, + }, + }, + now=datetime(2026, 3, 12, 12, 0, tzinfo=timezone.utc), + ) + + assert guarded["eligible"] is True + assert guarded["maintenance_window"]["open"] is True + + +def test_maintenance_window_guardrail_blocks_outside_allowed_days(): + guarded = apply_maintenance_window_guardrail( + { + "enabled": True, + "eligible": True, + "blocked_reason": "", + "maintenance_window": { + "enabled": True, + "timezone": "UTC", + "days": ["sat", "sun"], + "start_hour": 0, + "end_hour": 24, + }, + }, + now=datetime(2026, 3, 12, 12, 0, tzinfo=timezone.utc), + ) + + assert guarded["eligible"] is False + assert guarded["maintenance_window"]["open"] is False + assert guarded["blocked_reason"] == "Outside maintenance days (thu)" + + +def test_maintenance_window_guardrail_blocks_outside_allowed_hours(): + guarded = apply_maintenance_window_guardrail( + { + "enabled": True, + "eligible": True, + "blocked_reason": "", + "maintenance_window": { + "enabled": True, + "timezone": "UTC", + "days": [], + "start_hour": 2, + "end_hour": 5, + }, + }, + now=datetime(2026, 3, 12, 12, 0, tzinfo=timezone.utc), + ) + + assert guarded["eligible"] is False + assert guarded["maintenance_window"]["open"] is False + assert guarded["blocked_reason"] == "Outside maintenance window (02:00-05:00 UTC)" + + +def test_maintenance_window_guardrail_allows_inside_matching_window(): + guarded = apply_maintenance_window_guardrail( + { + "enabled": True, + "eligible": True, + "blocked_reason": "", + "maintenance_window": { + "enabled": True, + "timezone": "UTC", + "days": ["thu"], + "start_hour": 10, + "end_hour": 14, + }, + }, + now=datetime(2026, 3, 12, 12, 0, tzinfo=timezone.utc), + ) + + assert guarded["eligible"] is True + assert guarded["maintenance_window"]["open"] is True + assert guarded["maintenance_window"]["current_day"] == "thu" + assert guarded["maintenance_window"]["current_hour"] == 12 + + +def test_maintenance_window_guardrail_blocks_unknown_timezone(): + guarded = apply_maintenance_window_guardrail( + { + "enabled": True, + "eligible": True, + "blocked_reason": "", + "maintenance_window": { + "enabled": True, + "timezone": "Mars/Olympus", + "days": [], + "start_hour": 0, + "end_hour": 24, + }, + }, + now=datetime(2026, 3, 12, 12, 0, tzinfo=timezone.utc), + ) + + assert guarded["eligible"] is False + assert guarded["maintenance_window"]["open"] is False + assert guarded["blocked_reason"] == "Unknown maintenance-window timezone 'Mars/Olympus'" + + @pytest.mark.asyncio async def test_update_checker_reports_latest_release(): checker = UpdateChecker( @@ -339,7 +452,7 @@ async def test_min_release_age_blocks_auto_update_until_release_has_aged(): { "tag_name": "v0.6.1", "html_url": "https://github.com/typelicious/FoundryGate/releases/tag/v0.6.1", - "published_at": (datetime.now(UTC) - timedelta(hours=1)) + "published_at": (datetime.now(timezone.utc) - timedelta(hours=1)) .isoformat() .replace("+00:00", "Z"), },