diff --git a/CHANGELOG.md b/CHANGELOG.md index 03a96c1..b5efbd9 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,17 @@ # fusionAIze Gate Changelog +## v1.13.0 - Unreleased + +### Added + +- Expanded the provider source catalog scope beyond `blackbox`, `kilo`, and `openai` so Gate can also track mirrored official source data for `anthropic`, `deepseek`, and `google` +- Added local models-endpoint overlays per configured route, which lets Gate compare what a specific key can really see against the mirrored global provider catalog + +### Changed + +- Provider source alerts now distinguish more clearly between global catalog drift and key-specific route/model visibility drift +- Catalog summaries now include local route counts, local visible model counts, and route-vs-catalog mismatch hints instead of only source freshness and change counts + ## v1.12.0 - 2026-03-29 ### Added diff --git a/docs/CONFIGURATION.md b/docs/CONFIGURATION.md index 4dea59d..3498a68 100644 --- a/docs/CONFIGURATION.md +++ b/docs/CONFIGURATION.md @@ -384,7 +384,10 @@ provider_source_refresh: timeout_seconds: 10.0 interval_seconds: 21600 providers: + - anthropic - blackbox + - deepseek + - google - kilo - openai ``` @@ -393,4 +396,5 @@ Notes: - startup refresh is best-effort and should not block the service if docs are unavailable - `interval_seconds` controls the conservative background refresh loop after startup - source snapshots live in the same local DB as metrics +- for providers with a usable local `models` endpoint, Gate also mirrors key-specific model visibility per configured route and compares that against the global source snapshot - local billing overlays such as subscription or quota windows belong in the local account profile layer, not in the global provider snapshot diff --git a/docs/FAIGATE-ROADMAP.md b/docs/FAIGATE-ROADMAP.md index c68e4fe..ffe6558 100644 --- a/docs/FAIGATE-ROADMAP.md +++ b/docs/FAIGATE-ROADMAP.md @@ -37,29 +37,31 @@ The detailed design lives in [Adaptive model orchestration](./ADAPTIVE-ORCHESTRA The next block should stay disciplined: build on the workstation baseline, keep packaging practical, and avoid turning fusionAIze Gate into a sprawling platform. -## Current release target: `v1.12.0` +## Current release target: `v1.13.0` -The next release should land as a clean operational release, not as another loose pile of runtime slices. +`v1.12.0` closed the first operator-facing catalog and release-hardening loop. The +next release should make that catalog meaningfully more alive instead of merely +more visible. -`v1.12.0` should close around three themes that now fit together: +`v1.13.0` should close around three tightly related themes: -- provider source cataloging and alerting as a first-class operator surface -- clearer aggregator behavior for Kilo and BLACKBOX, especially where "free", "budget", "wallet", and explicit paid lanes are easy to conflate -- hardened release automation after the `v1.11.x` release failures +- provider source catalog moves from mirrored docs pages to a more living operator dataset +- local key and route visibility are overlaid against global provider source snapshots +- provider drift gets classified more clearly as global docs drift, key-specific access drift, or route-level mismatch The release should feel coherent from an operator point of view: -- Quick Setup, Doctor, Provider Probe, Dashboard, and route preview all explain drift or route pressure using the same language -- Kilo explicit Sonnet/Opus lanes are visible as deliberate routing choices instead of hidden aggregator magic -- release prep, tag validation, and publish dry-runs are boring and repeatable again +- Doctor, Provider Probe, Dashboard, Quick Setup, and `/api/provider-catalog` tell the same story about what changed globally and what is only true for this key or route +- free-tier, paid-tier, wallet, and BYOK assumptions are treated as per-key operational facts instead of being inferred blindly from public pricing tables +- the provider source catalog becomes a trustworthy early-warning surface before route selection starts leaning on outdated assumptions -What is intentionally not in scope for `v1.12.0`: +What is intentionally not in scope for `v1.13.0`: - the virtual key layer - gateway-level response caching -- fully automated external provider-source crawling on a long-running schedule +- a large new bridge or client-surface expansion -Those stay as follow-on tracks once the operator surfaces, release path, and aggregator semantics are stable enough to trust. +Those stay as follow-on tracks once the provider catalog and route-availability overlay are stable enough to trust under real operator workflows. ## Shipped: `v1.8.0` – `v1.9.1` diff --git a/faigate/config.py b/faigate/config.py index b0c4ac1..209e494 100644 --- a/faigate/config.py +++ b/faigate/config.py @@ -1704,9 +1704,10 @@ def _normalize_provider_source_refresh(data: dict[str, Any]) -> dict[str, Any]: if interval_seconds <= 0: raise ConfigError("'provider_source_refresh.interval_seconds' must be positive") - providers = raw.get("providers", ["blackbox", "kilo", "openai"]) + default_providers = ["anthropic", "blackbox", "deepseek", "google", "kilo", "openai"] + providers = raw.get("providers", default_providers) if providers in (None, ""): - providers = ["blackbox", "kilo", "openai"] + providers = default_providers if not isinstance(providers, list) or any( not isinstance(item, str) or not item.strip() for item in providers ): @@ -1881,7 +1882,14 @@ def provider_source_refresh(self) -> dict: "on_startup": True, "timeout_seconds": 10.0, "interval_seconds": 21600, - "providers": ["blackbox", "kilo", "openai"], + "providers": [ + "anthropic", + "blackbox", + "deepseek", + "google", + "kilo", + "openai", + ], }, ) diff --git a/faigate/main.py b/faigate/main.py index 730b741..57b7e06 100644 --- a/faigate/main.py +++ b/faigate/main.py @@ -38,6 +38,10 @@ ) from .lane_registry import get_provider_lane_binding, get_route_add_recommendations from .metrics import MetricsStore, calc_cost +from .provider_availability import ( + record_availability_from_config, + refresh_local_model_availability, +) from .provider_catalog import ( build_provider_catalog_report, build_provider_discovery_view, @@ -74,6 +78,10 @@ _provider_catalog_refresh_task: asyncio.Task[None] | None = None +def _provider_catalog_config_path() -> str: + return str(os.environ.get("FAIGATE_CONFIG_FILE") or "config.yaml") + + class PayloadTooLargeError(ValueError): """Raised when one request or upload exceeds configured size limits.""" @@ -204,6 +212,21 @@ async def _refresh_provider_source_catalog(*, force: bool = False) -> list[dict[ provider_ids=target_ids, timeout_seconds=float(source_refresh_cfg.get("timeout_seconds") or 10.0), ) + await asyncio.to_thread( + record_availability_from_config, + _provider_catalog_store, + config_path=_provider_catalog_config_path(), + health_payload={ + "providers": {item["name"]: item for item in _build_provider_inventory()} + }, + ) + await asyncio.to_thread( + refresh_local_model_availability, + _provider_catalog_store, + config_path=_provider_catalog_config_path(), + provider_ids=target_ids, + timeout_seconds=float(source_refresh_cfg.get("timeout_seconds") or 10.0), + ) ok_count = sum(1 for item in refresh_results if item.ok) logger.info( "Provider source refresh completed: %s/%s source endpoints succeeded (%s)", @@ -1908,6 +1931,14 @@ async def provider_catalog(): "priority_next": {}, } if _provider_catalog_store is not None: + await asyncio.to_thread( + record_availability_from_config, + _provider_catalog_store, + config_path=_provider_catalog_config_path(), + health_payload={ + "providers": {item["name"]: item for item in _build_provider_inventory()} + }, + ) source_catalog = build_catalog_summary( _provider_catalog_store, provider_ids=list(_config.provider_source_refresh.get("providers") or []), diff --git a/faigate/provider_availability.py b/faigate/provider_availability.py index c59d07b..eb7cf6d 100644 --- a/faigate/provider_availability.py +++ b/faigate/provider_availability.py @@ -3,28 +3,181 @@ from __future__ import annotations import json -from typing import Any +from typing import Any, Protocol + +import httpx from .config import load_config from .provider_catalog_store import ProviderCatalogStore +from .provider_sources import get_provider_source, resolve_provider_source_id + + +class JsonFetcher(Protocol): + """Protocol for fetching provider models-endpoint payloads.""" + + def fetch_json( + self, + url: str, + *, + headers: dict[str, str], + timeout_seconds: float, + ) -> dict[str, Any]: ... + + +class HttpxJsonFetcher: + """Default JSON fetcher for provider models endpoints.""" + + def fetch_json( + self, + url: str, + *, + headers: dict[str, str], + timeout_seconds: float, + ) -> dict[str, Any]: + timeout = httpx.Timeout(timeout_seconds, connect=min(timeout_seconds, 5.0)) + with httpx.Client(timeout=timeout) as client: + response = client.get(url, headers=headers, follow_redirects=True) + response.raise_for_status() + return dict(response.json() or {}) + + +def _request_readiness_from_health( + health_payload: dict[str, Any] | None, + route_name: str, +) -> dict[str, Any]: + providers = dict((health_payload or {}).get("providers") or {}) + return dict((providers.get(route_name) or {}).get("request_readiness") or {}) + + +def _configured_provider_targets(config_path: str) -> list[dict[str, Any]]: + config = load_config(config_path) + targets: list[dict[str, Any]] = [] + for provider_name, provider in sorted(config.providers.items()): + targets.append( + { + "provider_name": provider_name, + "provider_id": resolve_provider_source_id(provider_name, provider), + "provider": provider, + } + ) + return targets + + +def _join_base_url(base_url: str, path: str) -> str: + base = str(base_url or "").rstrip("/") + suffix = str(path or "").strip() + if not base or not suffix: + return "" + if not suffix.startswith("/"): + suffix = "/" + suffix + if suffix.startswith("/v1/") and base.endswith("/v1"): + return base + suffix[len("/v1") :] + if base.endswith(suffix): + return base + return base + suffix + + +def _parse_models_payload(payload: dict[str, Any]) -> list[str]: + rows = payload.get("data") + if rows is None and isinstance(payload.get("models"), list): + rows = payload.get("models") + if rows is None and isinstance(payload.get("items"), list): + rows = payload.get("items") + if not isinstance(rows, list): + return [] + + visible_models: list[str] = [] + seen: set[str] = set() + for row in rows: + if isinstance(row, str): + token = row.strip() + elif isinstance(row, dict): + token = str( + row.get("id") or row.get("name") or row.get("model") or "" + ).strip() + else: + token = "" + if not token or token in seen: + continue + seen.add(token) + visible_models.append(token) + return sorted(visible_models) + + +def record_availability_from_config( + store: ProviderCatalogStore, + *, + config_path: str, + health_payload: dict[str, Any] | None, +) -> list[dict[str, Any]]: + """Persist one route-state snapshot per configured provider route.""" + rows: list[dict[str, Any]] = [] + for target in _configured_provider_targets(config_path): + provider_name = str(target["provider_name"]) + provider_id = str(target["provider_id"]) + provider = dict(target["provider"] or {}) + readiness = _request_readiness_from_health(health_payload, provider_name) + source = get_provider_source(provider_id) + ready = bool(readiness.get("ready")) + store.record_availability_snapshot( + provider_id, + provider_name, + source_name="route-state", + model_id=str(provider.get("model") or ""), + available_for_key=ready, + request_ready=ready, + verified_via=str(readiness.get("verified_via") or "health"), + last_issue_type=str(readiness.get("runtime_issue_type") or ""), + metadata={ + "status": readiness.get("status"), + "reason": readiness.get("reason"), + "compatibility": readiness.get("compatibility"), + "profile": readiness.get("profile"), + "base_url": str(provider.get("base_url") or ""), + "backend": str(provider.get("backend") or ""), + "catalog_provider_id": provider_id, + "supports_models_endpoint": bool( + (source.get("availability") or {}).get("supports_models_endpoint") + ), + }, + ) + rows.append( + { + "provider_id": provider_id, + "route_name": provider_name, + "model_id": str(provider.get("model") or ""), + "request_ready": ready, + "status": str(readiness.get("status") or ""), + } + ) + return rows def record_availability_from_health( store: ProviderCatalogStore, *, + config_path: str | None = None, health_payload: dict[str, Any] | None, ) -> list[dict[str, Any]]: - """Persist a light local availability overlay from the live /health payload.""" + """Persist a local availability overlay from the live /health payload.""" if not health_payload: return [] + if config_path: + return record_availability_from_config( + store, + config_path=config_path, + health_payload=health_payload, + ) + rows: list[dict[str, Any]] = [] for route_name, payload in sorted((health_payload.get("providers") or {}).items()): request_readiness = dict(payload.get("request_readiness") or {}) lane = dict(payload.get("lane") or {}) - provider_id = str(lane.get("family") or route_name.split("-", 1)[0] or route_name) + provider_id = resolve_provider_source_id(route_name, {"lane": lane}) store.record_availability_snapshot( provider_id, route_name, + source_name="route-state", model_id=str(payload.get("model") or ""), available_for_key=bool(request_readiness.get("ready")), request_ready=bool(request_readiness.get("ready")), @@ -49,6 +202,218 @@ def record_availability_from_health( return rows +def refresh_local_model_availability( + store: ProviderCatalogStore, + *, + config_path: str, + provider_ids: list[str] | None = None, + fetcher: JsonFetcher | None = None, + timeout_seconds: float = 10.0, +) -> list[dict[str, Any]]: + """Refresh local models-endpoint visibility for configured routes.""" + fetcher = fetcher or HttpxJsonFetcher() + allowed_provider_ids = set(provider_ids or []) + results: list[dict[str, Any]] = [] + + for target in _configured_provider_targets(config_path): + provider_name = str(target["provider_name"]) + provider_id = str(target["provider_id"]) + if allowed_provider_ids and provider_id not in allowed_provider_ids: + continue + + source = get_provider_source(provider_id) + availability = dict(source.get("availability") or {}) + if not availability.get("supports_models_endpoint"): + continue + + provider = dict(target["provider"] or {}) + base_url = str(provider.get("base_url") or "").strip() + api_key = str(provider.get("api_key") or "").strip() + if not base_url or not api_key: + continue + + configured_model = str(provider.get("model") or "").strip() + models_paths = list(availability.get("models_paths") or []) + visible_models: list[str] = [] + resolved_url = "" + last_error = "" + + for models_path in models_paths: + resolved_url = _join_base_url(base_url, str(models_path)) + if not resolved_url: + continue + try: + payload = fetcher.fetch_json( + resolved_url, + headers={ + "Authorization": f"Bearer {api_key}", + "Accept": "application/json", + }, + timeout_seconds=timeout_seconds, + ) + visible_models = _parse_models_payload(payload) + if visible_models: + last_error = "" + break + last_error = "empty models payload" + except Exception as exc: # pragma: no cover - defensive runtime path + last_error = str(exc) + + available_for_key = bool( + configured_model and configured_model in visible_models + ) + last_issue_type = "" + if configured_model and visible_models and not available_for_key: + last_issue_type = "model-unavailable" + elif last_error: + last_issue_type = "models-endpoint-error" + + store.record_availability_snapshot( + provider_id, + provider_name, + source_name="models-endpoint", + model_id=configured_model, + available_for_key=available_for_key, + request_ready=available_for_key, + verified_via=resolved_url or "models-endpoint", + last_issue_type=last_issue_type, + metadata={ + "catalog_provider_id": provider_id, + "base_url": base_url, + "models_endpoint_url": resolved_url, + "visible_models": visible_models, + "visible_model_count": len(visible_models), + "last_error": last_error, + }, + ) + results.append( + { + "provider_id": provider_id, + "route_name": provider_name, + "model_id": configured_model, + "available_for_key": available_for_key, + "visible_model_count": len(visible_models), + "last_error": last_error, + } + ) + return results + + +def build_provider_availability_overlay( + store: ProviderCatalogStore, + *, + provider_id: str, + global_model_ids: set[str] | None = None, + global_free_model_ids: set[str] | None = None, +) -> dict[str, Any]: + """Compare local route and key visibility against global catalog data.""" + route_rows = store.get_latest_availability( + provider_id=provider_id, + source_name="route-state", + ) + endpoint_rows = store.get_latest_availability( + provider_id=provider_id, + source_name="models-endpoint", + ) + endpoint_by_route = {str(row.get("route_name") or ""): row for row in endpoint_rows} + visible_models: set[str] = set() + key_model_mismatches: list[dict[str, Any]] = [] + + for row in endpoint_rows: + metadata = dict(row.get("metadata") or {}) + route_visible_models = { + str(item).strip() + for item in list(metadata.get("visible_models") or []) + if str(item).strip() + } + visible_models.update(route_visible_models) + configured_model = str(row.get("model_id") or "") + if ( + configured_model + and route_visible_models + and configured_model not in route_visible_models + ): + key_model_mismatches.append( + { + "route_name": str(row.get("route_name") or ""), + "model_id": configured_model, + "visible_model_count": len(route_visible_models), + } + ) + + configured_models = { + str(row.get("model_id") or "").strip() + for row in route_rows + if str(row.get("model_id") or "").strip() + } + global_models = set(global_model_ids or set()) + global_free_models = set(global_free_model_ids or set()) + + configured_models_missing_globally = sorted( + model_id + for model_id in configured_models + if global_models and model_id not in global_models + ) + local_only_models = sorted( + model_id + for model_id in visible_models + if global_models and model_id not in global_models + ) + free_models_missing_locally = sorted( + model_id + for model_id in global_free_models + if visible_models and model_id not in visible_models + ) + + status = "clear" + if key_model_mismatches: + status = "intervention-needed" + elif configured_models_missing_globally or free_models_missing_locally: + status = "review-needed" + elif local_only_models: + status = "informational" + + route_details: list[dict[str, Any]] = [] + for row in route_rows: + endpoint_row = endpoint_by_route.get(str(row.get("route_name") or "")) + endpoint_meta = dict((endpoint_row or {}).get("metadata") or {}) + route_meta = dict(row.get("metadata") or {}) + route_details.append( + { + "route_name": str(row.get("route_name") or ""), + "model_id": str(row.get("model_id") or ""), + "request_ready": bool(row.get("request_ready")), + "status": str(route_meta.get("status") or ""), + "available_for_key": bool( + (endpoint_row or {}).get("available_for_key") + ), + "visible_model_count": int( + endpoint_meta.get("visible_model_count") or 0 + ), + "models_endpoint_error": str(endpoint_meta.get("last_error") or ""), + } + ) + + return { + "status": status, + "local_routes": len(route_rows), + "request_ready_routes": sum( + 1 for row in route_rows if row.get("request_ready") + ), + "models_endpoint_routes": len(endpoint_rows), + "visible_model_count": len(visible_models), + "visible_models": sorted(visible_models), + "configured_models": sorted(configured_models), + "configured_models_missing_globally": configured_models_missing_globally, + "key_model_mismatches": key_model_mismatches, + "local_only_models": local_only_models, + "global_free_models": sorted(global_free_models), + "free_models_visible_locally": len(global_free_models & visible_models), + "free_models_missing_locally": free_models_missing_locally, + "route_details": route_details, + } + + def load_health_payload(raw: str) -> dict[str, Any] | None: """Decode a serialized /health payload from a script environment.""" token = str(raw or "").strip() @@ -58,11 +423,10 @@ def load_health_payload(raw: str) -> dict[str, Any] | None: def configured_provider_families(config_path: str) -> dict[str, list[str]]: - """Return configured provider names grouped by family-ish prefix.""" - config = load_config(config_path) + """Return configured provider names grouped by source-catalog family.""" rows: dict[str, list[str]] = {} - for provider_name, provider in sorted(config.providers.items()): - lane = dict(provider.get("lane") or {}) - family = str(lane.get("family") or provider_name.split("-", 1)[0] or "unknown") - rows.setdefault(family, []).append(provider_name) + for target in _configured_provider_targets(config_path): + rows.setdefault(str(target["provider_id"] or "unknown"), []).append( + str(target["provider_name"]) + ) return rows diff --git a/faigate/provider_catalog_refresh.py b/faigate/provider_catalog_refresh.py index e7934c2..2601bae 100644 --- a/faigate/provider_catalog_refresh.py +++ b/faigate/provider_catalog_refresh.py @@ -10,6 +10,7 @@ import httpx +from .provider_availability import build_provider_availability_overlay from .provider_catalog_store import ProviderCatalogStore from .provider_sources import list_provider_sources @@ -52,7 +53,10 @@ class RefreshResult: def _source_due_severity(item: dict[str, Any]) -> str: """Escalate overdue source drift when it has lingered well past refresh cadence.""" - refresh_interval_seconds = max(int(item.get("refresh_interval_seconds") or 21600), 1) + refresh_interval_seconds = max( + int(item.get("refresh_interval_seconds") or 21600), + 1, + ) seconds_since_success = item.get("seconds_since_success") last_success_at = float(item.get("last_success_at") or 0.0) @@ -93,7 +97,8 @@ def _source_refresh_suggestion(item: dict[str, Any]) -> str: "auth assumptions before trusting catalog data here." ) return ( - f"Refresh {provider_id} before relying on older model, pricing, or free-tier assumptions." + f"Refresh {provider_id} before relying on older model, pricing, " + "or free-tier assumptions." ) @@ -128,6 +133,7 @@ def build_catalog_alerts( for item in list(summary.get("items") or []): provider_id = str(item.get("provider_id") or "") status = str(item.get("status") or "") + local_availability = dict(item.get("local_availability") or {}) if status == "error": action = _catalog_alert_action( kind="source-refresh-error", @@ -174,6 +180,114 @@ def build_catalog_alerts( "source_kind": "source", } ) + if list(local_availability.get("key_model_mismatches") or []): + mismatches = list(local_availability.get("key_model_mismatches") or []) + mismatch = mismatches[0] + alerts.append( + { + "kind": "local-model-availability", + "severity": "warning", + "action": "fix-now", + "provider_id": provider_id, + "headline": ( + f"Configured route model not visible for local " + f"{provider_id} key" + ), + "detail": ( + f"{mismatch.get('route_name')} expects " + f"{mismatch.get('model_id')}, but the latest local " + f"models endpoint did not list it " + f"({mismatch.get('visible_model_count')} visible models)." + ), + "suggestion": ( + "Verify the configured model id and local key for " + f"{mismatch.get('route_name')} " + "before trusting this route as request-ready." + ), + "source_kind": "local-availability", + } + ) + if list(local_availability.get("configured_models_missing_globally") or []): + missing_model = str( + local_availability["configured_models_missing_globally"][0] + ) + alerts.append( + { + "kind": "catalog-route-mismatch", + "severity": "warning", + "action": "review-now", + "provider_id": provider_id, + "headline": ( + f"Configured {provider_id} model missing from mirrored " + "global catalog" + ), + "detail": ( + f"The configured model '{missing_model}' is not present " + "in the latest " + f"mirrored {provider_id} source snapshot." + ), + "suggestion": ( + f"Review whether {missing_model} is still the intended " + "model id or " + "whether the provider source mirror needs to be refreshed." + ), + "source_kind": "local-availability", + } + ) + if list(local_availability.get("local_only_models") or []): + local_only = str(local_availability["local_only_models"][0]) + alerts.append( + { + "kind": "local-model-drift", + "severity": "notice", + "action": "inspect", + "provider_id": provider_id, + "headline": ( + f"Local {provider_id} key exposes models missing from " + "mirrored docs" + ), + "detail": ( + f"The local models endpoint exposed '{local_only}', " + "which is not in the " + "latest mirrored global source snapshot." + ), + "suggestion": ( + f"Inspect whether {provider_id} docs are lagging or " + "whether the local key " + "is on a newer provider track." + ), + "source_kind": "local-availability", + } + ) + if ( + int(local_availability.get("models_endpoint_routes") or 0) > 0 + and int(local_availability.get("free_models_visible_locally") or 0) == 0 + and list(local_availability.get("global_free_models") or []) + ): + free_model = str(local_availability["global_free_models"][0]) + alerts.append( + { + "kind": "free-model-unavailable", + "severity": "notice", + "action": "review-now", + "provider_id": provider_id, + "headline": ( + f"Free {provider_id} catalog entries are not visible " + "for this key" + ), + "detail": ( + f"The mirrored global catalog still lists '{free_model}' " + "as free, but the latest local models endpoint did not " + "expose any mirrored free model." + ), + "suggestion": ( + f"Treat free-tier assumptions for {provider_id} as " + "key-specific and verify " + "whether this route should stay in low-cost fallback chains." + ), + "source_kind": "local-availability", + } + ) for event in list(summary.get("recent_events") or []): severity = str(event.get("severity") or "notice") change_type = str(event.get("change_type") or "") @@ -261,6 +375,22 @@ def build_catalog_summary( latest_models = store.get_latest_models(provider_id, "models") latest_pricing = store.get_latest_models(provider_id, "pricing") latest_docs_index = store.get_latest_models(provider_id, "docs-index") + global_catalog_model_ids = { + str(item.get("model_id") or "").strip() + for item in latest_models + latest_pricing + if str(item.get("model_id") or "").strip() + } + global_free_model_ids = { + str(item.get("model_id") or "").strip() + for item in latest_pricing + if bool(item.get("is_free")) and str(item.get("model_id") or "").strip() + } + local_availability = build_provider_availability_overlay( + store, + provider_id=provider_id, + global_model_ids=global_catalog_model_ids, + global_free_model_ids=global_free_model_ids, + ) last_success_at = float(source.get("last_success_at") or 0) last_checked_at = float(source.get("last_checked_at") or 0) refresh_interval_seconds = int(source.get("refresh_interval_seconds") or 21600) @@ -294,12 +424,15 @@ def build_catalog_summary( str(item.get("model_id") or "") for item in (latest_pricing or latest_models)[:5] ], + "local_availability": local_availability, "billing_notes": str(source.get("billing_notes") or ""), "account_profile": store.get_account_profile(provider_id), } ) - selected_provider_id = provider_ids[0] if provider_ids and len(provider_ids) == 1 else None + selected_provider_id = ( + provider_ids[0] if provider_ids and len(provider_ids) == 1 else None + ) recent_events = store.get_recent_change_events( provider_id=selected_provider_id, limit=20, @@ -328,7 +461,10 @@ def build_catalog_summary( elif recent_events: priority_next = { "path": "Provider Catalog Review", - "why": ("recent provider catalog changes were detected and should be reviewed."), + "why": ( + "recent provider catalog changes were detected and should " + "be reviewed." + ), } alerts = build_catalog_alerts( @@ -392,10 +528,13 @@ def render_catalog_summary_text( if item.get("billing_notes"): lines.append(f" billing: {item['billing_notes']}") if item.get("refresh_interval_seconds"): - lines.append(f" refresh interval: {int(item['refresh_interval_seconds'])}s") + lines.append( + f" refresh interval: {int(item['refresh_interval_seconds'])}s" + ) if item.get("seconds_since_success") is not None: lines.append( - f" age: {int(float(item['seconds_since_success']))}s since last success" + f" age: {int(float(item['seconds_since_success']))}s " + "since last success" ) profile = dict(item.get("account_profile") or {}) if profile: @@ -406,7 +545,41 @@ def render_catalog_summary_text( profile_bits.append(f"window={profile['quota_window']}") if profile.get("quota_remaining") is not None: profile_bits.append(f"remaining={profile['quota_remaining']}") - lines.append(" local account: " + " | ".join(bit for bit in profile_bits if bit)) + lines.append( + " local account: " + + " | ".join(bit for bit in profile_bits if bit) + ) + local_availability = dict(item.get("local_availability") or {}) + if local_availability: + lines.append( + " local availability: " + + f"routes={int(local_availability.get('local_routes') or 0)} | " + + f"ready={int(local_availability.get('request_ready_routes') or 0)} | " + + "models-endpoint=" + + f"{int(local_availability.get('models_endpoint_routes') or 0)} | " + + "visible-models=" + + f"{int(local_availability.get('visible_model_count') or 0)}" + ) + if local_availability.get("configured_models_missing_globally"): + lines.append( + " catalog mismatch: " + + ", ".join( + local_availability["configured_models_missing_globally"][:3] + ) + ) + if local_availability.get("key_model_mismatches"): + lines.append( + " key mismatch: " + + ", ".join( + f"{item['route_name']} -> {item['model_id']}" + for item in local_availability["key_model_mismatches"][:3] + ) + ) + if local_availability.get("local_only_models"): + lines.append( + " local-only models: " + + ", ".join(local_availability["local_only_models"][:3]) + ) if item.get("last_error"): lines.append(f" last error: {item['last_error']}") events = list(summary.get("recent_events") or []) @@ -633,7 +806,9 @@ def _diff_model_sets( "field_name": "model_id", "old_value": "", "new_value": model_id, - "message": (f"{provider_id}: model '{model_id}' appeared in {source_kind}."), + "message": ( + f"{provider_id}: model '{model_id}' appeared in {source_kind}." + ), } ) for model_id in sorted(previous_by_id.keys() - current_by_id.keys()): @@ -648,7 +823,9 @@ def _diff_model_sets( "field_name": "model_id", "old_value": model_id, "new_value": "", - "message": (f"{provider_id}: model '{model_id}' disappeared from {source_kind}."), + "message": ( + f"{provider_id}: model '{model_id}' disappeared from {source_kind}." + ), } ) for model_id in sorted(current_by_id.keys() & previous_by_id.keys()): diff --git a/faigate/provider_catalog_store.py b/faigate/provider_catalog_store.py index 2d29fcd..ea20d15 100644 --- a/faigate/provider_catalog_store.py +++ b/faigate/provider_catalog_store.py @@ -50,6 +50,7 @@ id INTEGER PRIMARY KEY AUTOINCREMENT, provider_id TEXT NOT NULL, route_name TEXT NOT NULL, + source_name TEXT DEFAULT 'route-state', checked_at REAL NOT NULL, model_id TEXT DEFAULT '', available_for_key INTEGER DEFAULT 0, @@ -106,8 +107,24 @@ def init(self) -> None: self._conn.execute("PRAGMA journal_mode=WAL") self._conn.execute("PRAGMA synchronous=NORMAL") self._conn.executescript(_CREATE_SQL) + self._migrate_schema() self._conn.commit() + def _migrate_schema(self) -> None: + if not self._conn: + return + columns = { + row[1] + for row in self._conn.execute("PRAGMA table_info(provider_availability_snapshots)") + } + if "source_name" not in columns: + self._conn.execute( + """ + ALTER TABLE provider_availability_snapshots + ADD COLUMN source_name TEXT DEFAULT 'route-state' + """ + ) + def close(self) -> None: if self._conn: self._conn.close() @@ -270,6 +287,7 @@ def record_availability_snapshot( provider_id: str, route_name: str, *, + source_name: str = "route-state", model_id: str = "", available_for_key: bool = False, request_ready: bool = False, @@ -283,13 +301,14 @@ def record_availability_snapshot( self._conn.execute( """ INSERT INTO provider_availability_snapshots( - provider_id, route_name, checked_at, model_id, + provider_id, route_name, source_name, checked_at, model_id, available_for_key, request_ready, verified_via, last_issue_type, metadata_json - ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?) + ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?) """, ( provider_id, route_name, + source_name, float(checked_at or time.time()), model_id, 1 if available_for_key else 0, @@ -301,6 +320,52 @@ def record_availability_snapshot( ) self._conn.commit() + def get_latest_availability( + self, + *, + provider_id: str | None = None, + source_name: str | None = None, + ) -> list[dict[str, Any]]: + if not self._conn: + return [] + + where_clauses: list[str] = [] + params: list[Any] = [] + if provider_id: + where_clauses.append("provider_id=?") + params.append(provider_id) + if source_name: + where_clauses.append("source_name=?") + params.append(source_name) + where_sql = f"WHERE {' AND '.join(where_clauses)}" if where_clauses else "" + cur = self._conn.execute( + f""" + SELECT snap.provider_id, snap.route_name, snap.source_name, snap.checked_at, + snap.model_id, snap.available_for_key, snap.request_ready, + snap.verified_via, snap.last_issue_type, snap.metadata_json + FROM provider_availability_snapshots AS snap + INNER JOIN ( + SELECT provider_id, route_name, source_name, MAX(checked_at) AS checked_at + FROM provider_availability_snapshots + {where_sql} + GROUP BY provider_id, route_name, source_name + ) AS latest + ON snap.provider_id = latest.provider_id + AND snap.route_name = latest.route_name + AND snap.source_name = latest.source_name + AND snap.checked_at = latest.checked_at + ORDER BY snap.provider_id, snap.route_name, snap.source_name + """, + params, + ) + cols = [item[0] for item in cur.description] + rows = [dict(zip(cols, row)) for row in cur.fetchall()] + for row in rows: + row["metadata"] = json.loads(str(row.pop("metadata_json") or "{}")) + row["available_for_key"] = bool(row.get("available_for_key")) + row["request_ready"] = bool(row.get("request_ready")) + return rows + def upsert_account_profile( self, provider_id: str, diff --git a/faigate/provider_sources.py b/faigate/provider_sources.py index 6766e04..3858084 100644 --- a/faigate/provider_sources.py +++ b/faigate/provider_sources.py @@ -6,6 +6,33 @@ from typing import Any _SOURCE_REGISTRY: dict[str, dict[str, Any]] = { + "anthropic": { + "provider_id": "anthropic", + "display_name": "Anthropic", + "refresh_interval_seconds": 43_200, + "billing_notes": ( + "Anthropic usage can combine direct API billing with operator-side " + "subscription or quota windows. Local route availability should be tracked " + "separately from the public model docs." + ), + "route_prefixes": ["anthropic", "claude"], + "provider_names": ["anthropic-claude", "anthropic-sonnet"], + "endpoints": [ + { + "kind": "models", + "url": "https://docs.anthropic.com/en/docs/about-claude/models", + "parser_type": "regex-model-refs", + "model_patterns": [ + r"\bclaude-[a-z0-9.\-]+", + ], + } + ], + "availability": { + "supports_models_endpoint": False, + "models_paths": [], + "transport": "anthropic", + }, + }, "blackbox": { "provider_id": "blackbox", "display_name": "BLACKBOX", @@ -14,6 +41,8 @@ "BLACKBOX can expose both free and paid model variants. Local key availability " "must be checked separately from the global pricing catalog." ), + "route_prefixes": ["blackbox"], + "provider_names": ["blackbox-free"], "endpoints": [ { "kind": "docs-index", @@ -28,10 +57,63 @@ ], "availability": { "supports_models_endpoint": True, - "models_path": "/v1/models", + "models_paths": ["/v1/models", "/models"], "transport": "openai-compat", }, }, + "deepseek": { + "provider_id": "deepseek", + "display_name": "DeepSeek", + "refresh_interval_seconds": 43_200, + "billing_notes": ( + "DeepSeek route cost and quota behavior can differ between direct API billing " + "and operator-specific subscription or account limits." + ), + "route_prefixes": ["deepseek"], + "provider_names": ["deepseek-chat", "deepseek-reasoner"], + "endpoints": [ + { + "kind": "models", + "url": "https://api-docs.deepseek.com/", + "parser_type": "regex-model-refs", + "model_patterns": [ + r"\bdeepseek-[a-z0-9.\-]+", + ], + } + ], + "availability": { + "supports_models_endpoint": True, + "models_paths": ["/v1/models", "/models"], + "transport": "openai-compat", + }, + }, + "google": { + "provider_id": "google", + "display_name": "Google", + "refresh_interval_seconds": 43_200, + "billing_notes": ( + "Google model access can sit behind AI Studio or platform-specific quotas. " + "Local availability and operator limits should be overlaid separately." + ), + "route_prefixes": ["google", "gemini"], + "provider_names": ["gemini-flash", "gemini-flash-lite"], + "endpoints": [ + { + "kind": "models", + "url": "https://ai.google.dev/gemini-api/docs/models", + "parser_type": "regex-model-refs", + "model_patterns": [ + r"\bgemini-[a-z0-9.\-:]+", + r"\bgemma-[a-z0-9.\-:]+", + ], + } + ], + "availability": { + "supports_models_endpoint": False, + "models_paths": [], + "transport": "google", + }, + }, "kilo": { "provider_id": "kilo", "display_name": "Kilo", @@ -40,6 +122,8 @@ "Kilo mixes gateway wallet, free models, and BYOK-style execution paths. " "Local billing interpretation should be overlaid from account usage and route probes." ), + "route_prefixes": ["kilo"], + "provider_names": ["kilocode", "kilo-sonnet", "kilo-opus"], "endpoints": [ { "kind": "models", @@ -61,7 +145,7 @@ ], "availability": { "supports_models_endpoint": False, - "models_path": "", + "models_paths": [], "transport": "openai-compat", }, }, @@ -74,6 +158,8 @@ "limits outside the raw API pricing table. Local account state should be " "tracked separately." ), + "route_prefixes": ["openai", "gpt", "o1", "o3", "o4"], + "provider_names": ["openai-gpt4o", "openai-images"], "endpoints": [ { "kind": "models", @@ -91,7 +177,7 @@ ], "availability": { "supports_models_endpoint": True, - "models_path": "/v1/models", + "models_paths": ["/models", "/v1/models"], "transport": "openai-compat", }, }, @@ -117,3 +203,29 @@ def list_provider_sources(provider_ids: list[str] | None = None) -> list[dict[st if item: items.append(item) return items + + +def resolve_provider_source_id( + provider_name: str, + provider: dict[str, Any] | None = None, +) -> str: + """Map one configured route to a provider source family.""" + normalized_name = str(provider_name or "").strip().lower() + lane = dict((provider or {}).get("lane") or {}) + family = str(lane.get("family") or "").strip().lower() + + for provider_id, source in _SOURCE_REGISTRY.items(): + if family and family == provider_id: + return provider_id + for explicit_name in list(source.get("provider_names") or []): + if normalized_name == str(explicit_name or "").strip().lower(): + return provider_id + for prefix in list(source.get("route_prefixes") or []): + token = str(prefix or "").strip().lower() + if token and ( + normalized_name == token + or normalized_name.startswith(f"{token}-") + or normalized_name.startswith(f"{token}_") + ): + return provider_id + return family or normalized_name.split("-", 1)[0] or normalized_name diff --git a/faigate/wizard.py b/faigate/wizard.py index 6d0f44f..1fb764c 100644 --- a/faigate/wizard.py +++ b/faigate/wizard.py @@ -3249,7 +3249,7 @@ def build_initial_config( "on_startup": True, "timeout_seconds": 10.0, "interval_seconds": 21600, - "providers": ["blackbox", "kilo", "openai"], + "providers": ["anthropic", "blackbox", "deepseek", "google", "kilo", "openai"], }, "providers": providers, "fallback_chain": fallback_chain, diff --git a/scripts/faigate-doctor b/scripts/faigate-doctor index da6b4c1..3dc6361 100755 --- a/scripts/faigate-doctor +++ b/scripts/faigate-doctor @@ -90,7 +90,10 @@ from pathlib import Path import yaml from faigate.onboarding import collect_provider_env_requirements from faigate.provider_catalog import build_provider_catalog_report, build_provider_refresh_guidance -from faigate.provider_availability import record_availability_from_health +from faigate.provider_availability import ( + record_availability_from_health, + refresh_local_model_availability, +) from faigate.provider_catalog_refresh import ProviderCatalogRefresher, build_catalog_summary from faigate.provider_catalog_refresh import build_catalog_alert_summary from faigate.provider_catalog_store import ProviderCatalogStore @@ -177,7 +180,18 @@ for item in catalog.get("items", []): health_raw = os.environ.get("FAIGATE_HEALTH_PAYLOAD", "").strip() if store is not None and health_raw: health_payload = json.loads(health_raw) - record_availability_from_health(store, health_payload=health_payload) + record_availability_from_health( + store, + config_path=os.environ.get("FAIGATE_CONFIG_FILE", ""), + health_payload=health_payload, + ) + if os.environ.get("FAIGATE_DOCTOR_REFRESH_CATALOG") == "1": + refresh_local_model_availability( + store, + config_path=os.environ.get("FAIGATE_CONFIG_FILE", ""), + provider_ids=list(config.provider_source_refresh.get("providers") or []), + timeout_seconds=float(config.provider_source_refresh.get("timeout_seconds") or 10.0), + ) providers = (health_payload.get("providers") or {}) ready = 0 total = 0 diff --git a/scripts/faigate-provider-probe b/scripts/faigate-provider-probe index e90ea56..5026376 100755 --- a/scripts/faigate-provider-probe +++ b/scripts/faigate-provider-probe @@ -77,7 +77,10 @@ import json import os from faigate.config import load_config -from faigate.provider_availability import record_availability_from_health +from faigate.provider_availability import ( + record_availability_from_health, + refresh_local_model_availability, +) from faigate.provider_catalog_refresh import ( ProviderCatalogRefresher, build_catalog_alert_summary, @@ -108,7 +111,18 @@ if store is not None and os.environ.get("FAIGATE_PROVIDER_PROBE_REFRESH_CATALOG" timeout_seconds=float(os.environ.get("FAIGATE_PROVIDER_PROBE_TIMEOUT") or "2.0"), ) if store is not None: - record_availability_from_health(store, health_payload=health_payload) + record_availability_from_health( + store, + config_path=os.environ["FAIGATE_PROVIDER_PROBE_CONFIG"], + health_payload=health_payload, + ) + if os.environ.get("FAIGATE_PROVIDER_PROBE_REFRESH_CATALOG") == "1": + refresh_local_model_availability( + store, + config_path=os.environ["FAIGATE_PROVIDER_PROBE_CONFIG"], + provider_ids=list(config.provider_source_refresh.get("providers") or []), + timeout_seconds=float(os.environ.get("FAIGATE_PROVIDER_PROBE_TIMEOUT") or "2.0"), + ) catalog_summary = build_catalog_summary( store, provider_ids=list(config.provider_source_refresh.get("providers") or []), diff --git a/tests/test_config.py b/tests/test_config.py index 2c8aa88..c2d4cec 100644 --- a/tests/test_config.py +++ b/tests/test_config.py @@ -334,7 +334,7 @@ def test_provider_source_refresh_defaults_are_exposed(): "on_startup": True, "timeout_seconds": 10.0, "interval_seconds": 21600, - "providers": ["blackbox", "kilo", "openai"], + "providers": ["anthropic", "blackbox", "deepseek", "google", "kilo", "openai"], } diff --git a/tests/test_provider_availability.py b/tests/test_provider_availability.py new file mode 100644 index 0000000..0b0584c --- /dev/null +++ b/tests/test_provider_availability.py @@ -0,0 +1,145 @@ +from __future__ import annotations + +from pathlib import Path + +from faigate.provider_availability import ( + build_provider_availability_overlay, + record_availability_from_config, + refresh_local_model_availability, +) +from faigate.provider_catalog_store import ProviderCatalogStore + + +class FakeJsonFetcher: + def __init__(self, payloads: dict[str, dict]): + self._payloads = payloads + + def fetch_json( + self, + url: str, + *, + headers: dict[str, str], + timeout_seconds: float, + ) -> dict: + return dict(self._payloads[url]) + + +def _write_config(tmp_path: Path) -> Path: + path = tmp_path / "config.yaml" + path.write_text( + """ +server: + host: "127.0.0.1" + port: 8090 +providers: + blackbox-free: + backend: openai-compat + base_url: "https://api.blackbox.ai" + api_key: "secret" + model: "x-ai/grok-code-fast-1:free" + deepseek-chat: + backend: openai-compat + base_url: "https://api.deepseek.com/v1" + api_key: "secret" + model: "deepseek-chat" +fallback_chain: [] +metrics: + enabled: false +""".strip(), + encoding="utf-8", + ) + return path + + +def test_local_models_endpoint_overlay_detects_key_specific_mismatch(tmp_path: Path): + config_path = _write_config(tmp_path) + db_path = tmp_path / "faigate.db" + store = ProviderCatalogStore(str(db_path)) + store.init() + store.replace_model_snapshot( + "blackbox", + "pricing", + [ + { + "model_id": "x-ai/grok-code-fast-1:free", + "model_name": "Grok Code Fast 1 Free", + "input_cost": 0.0, + "output_cost": 0.0, + "context_length": 256000, + "is_free": True, + "raw_source_hash": "hash-blackbox", + } + ], + ) + store.replace_model_snapshot( + "deepseek", + "models", + [ + { + "model_id": "deepseek-chat", + "model_name": "DeepSeek Chat", + "input_cost": None, + "output_cost": None, + "context_length": None, + "is_free": False, + "raw_source_hash": "hash-deepseek", + } + ], + ) + + record_availability_from_config( + store, + config_path=str(config_path), + health_payload={ + "providers": { + "blackbox-free": { + "request_readiness": { + "ready": False, + "status": "degraded", + "reason": "last request failed", + } + }, + "deepseek-chat": { + "request_readiness": { + "ready": True, + "status": "ready", + "reason": "healthy", + } + }, + } + }, + ) + refresh_local_model_availability( + store, + config_path=str(config_path), + fetcher=FakeJsonFetcher( + { + "https://api.blackbox.ai/v1/models": { + "data": [{"id": "x-ai/grok-code-fast-1"}] + }, + "https://api.deepseek.com/v1/models": { + "data": [{"id": "deepseek-chat"}, {"id": "deepseek-reasoner"}] + }, + } + ), + ) + + blackbox_overlay = build_provider_availability_overlay( + store, + provider_id="blackbox", + global_model_ids={"x-ai/grok-code-fast-1:free"}, + global_free_model_ids={"x-ai/grok-code-fast-1:free"}, + ) + deepseek_overlay = build_provider_availability_overlay( + store, + provider_id="deepseek", + global_model_ids={"deepseek-chat", "deepseek-reasoner"}, + global_free_model_ids=set(), + ) + + assert blackbox_overlay["status"] == "intervention-needed" + assert blackbox_overlay["key_model_mismatches"][0]["route_name"] == "blackbox-free" + assert blackbox_overlay["local_only_models"] == ["x-ai/grok-code-fast-1"] + assert blackbox_overlay["free_models_missing_locally"] == ["x-ai/grok-code-fast-1:free"] + assert deepseek_overlay["status"] == "clear" + assert deepseek_overlay["visible_models"] == ["deepseek-chat", "deepseek-reasoner"] diff --git a/tests/test_provider_catalog_refresh.py b/tests/test_provider_catalog_refresh.py index 18f822f..989e47e 100644 --- a/tests/test_provider_catalog_refresh.py +++ b/tests/test_provider_catalog_refresh.py @@ -196,3 +196,40 @@ def test_due_provider_ids_returns_sources_without_recent_success(tmp_path): assert "blackbox" in due assert "kilo" in due + + +def test_build_catalog_alerts_include_local_availability_mismatches(): + summary = { + "items": [ + { + "provider_id": "blackbox", + "status": "current", + "last_error": "", + "seconds_since_success": 10, + "local_availability": { + "key_model_mismatches": [ + { + "route_name": "blackbox-free", + "model_id": "x-ai/grok-code-fast-1:free", + "visible_model_count": 1, + } + ], + "configured_models_missing_globally": ["x-ai/grok-code-fast-1:free"], + "local_only_models": ["x-ai/grok-code-fast-1"], + "models_endpoint_routes": 1, + "free_models_visible_locally": 0, + "global_free_models": ["x-ai/grok-code-fast-1:free"], + }, + } + ], + "recent_events": [], + } + + alerts = build_catalog_alerts(summary) + alert_summary = build_catalog_alert_summary(alerts) + + kinds = [alert["kind"] for alert in alerts] + assert "local-model-availability" in kinds + assert "catalog-route-mismatch" in kinds + assert "free-model-unavailable" in kinds + assert alert_summary["status"] == "intervention-needed" diff --git a/tests/test_provider_catalog_store.py b/tests/test_provider_catalog_store.py index 91efd84..0b1e8d2 100644 --- a/tests/test_provider_catalog_store.py +++ b/tests/test_provider_catalog_store.py @@ -86,3 +86,31 @@ def test_provider_catalog_store_persists_snapshots_and_events(tmp_path): events = store.get_recent_change_events(provider_id="blackbox") assert events[0]["change_type"] == "model-added" + + +def test_provider_catalog_store_returns_latest_availability_by_source(tmp_path): + db_path = tmp_path / "faigate.db" + store = ProviderCatalogStore(str(db_path)) + store.init() + store.record_availability_snapshot( + "blackbox", + "blackbox-free", + source_name="route-state", + model_id="x-ai/grok-code-fast-1:free", + request_ready=False, + checked_at=1.0, + ) + store.record_availability_snapshot( + "blackbox", + "blackbox-free", + source_name="models-endpoint", + model_id="x-ai/grok-code-fast-1:free", + available_for_key=False, + metadata={"visible_models": ["x-ai/grok-code-fast-1"]}, + checked_at=2.0, + ) + + rows = store.get_latest_availability(provider_id="blackbox") + + assert len(rows) == 2 + assert {row["source_name"] for row in rows} == {"route-state", "models-endpoint"}