From 9cbadb5a53dd23bb4be58d8b8a2424f805e53f13 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andre=CC=81=20Lange?= Date: Sun, 29 Mar 2026 22:53:11 +0200 Subject: [PATCH 1/2] feat(router): add explicit kilo gateway lanes --- CHANGELOG.md | 16 ++ config.yaml | 14 +- docs/FAIGATE-ROADMAP.md | 24 +++ .../faigate-multi-provider-stack.yaml | 6 +- docs/examples/provider-blackbox.yaml | 5 +- docs/examples/provider-kilocode.env.example | 2 +- docs/examples/provider-kilocode.yaml | 25 ++- faigate/lane_registry.py | 103 ++++++++++- faigate/provider_catalog.py | 50 +++++- faigate/registry.py | 2 +- faigate/router.py | 115 +++++++++++++ tests/test_api_hardening.py | 162 ++++++++++++++++++ tests/test_provider_catalog.py | 6 +- tests/test_routing_dimensions.py | 87 ++++++++++ 14 files changed, 587 insertions(+), 30 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 46376d7..639bae9 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,21 @@ # fusionAIze Gate Changelog +## v1.12.0 - Unreleased + +### Added + +- Added a provider source catalog line with startup refresh, dashboard/API summaries, and operator-facing alert actions across `faigate-doctor`, `faigate-provider-probe`, and Quick Setup so model/pricing drift is visible earlier instead of hiding behind stale curated assumptions +- Added explicit Kilo paid workhorse lanes for Sonnet and Opus plus Kilo-specific routing fit scoring, which lets Gate model premium, balanced, and free Kilo traffic without relying on opaque `kilo-auto/*` header behavior +- Added route-preview coverage for Kilo frontier lane selection so operators can see when Gate chose `kilo-opus`, `kilo-sonnet`, or `kilocode` and why +- Added stronger release automation dry-run coverage so the release helper itself is exercised in CI before a tagged release is cut + +### Changed + +- Hardened release automation end-to-end: local release scripts now validate versions more strictly, verify package metadata coherency, and point to the dedicated `fusionAIze/homebrew-tap` repository instead of assuming a local formula in this repo +- Release artifact publishing now validates tag/version alignment before publishing and reuses prebuilt Python artifacts for PyPI instead of rebuilding a second time in the publish job +- Reframed the legacy `blackbox-free` route as a low-cost burst path rather than a guaranteed free path, because the currently working curated model is not reliably the `:free` SKU for every key +- Updated Kilo defaults and examples to use the current gateway base URL without the stale `/v1` suffix + ## v1.11.2 - 2026-03-29 ### Changed diff --git a/config.yaml b/config.yaml index 606eb07..14440d0 100644 --- a/config.yaml +++ b/config.yaml @@ -385,7 +385,7 @@ model_shortcuts: aliases: - blackbox - bb - description: BLACKBOX free-tier route + description: BLACKBOX budget burst route (legacy id) target: blackbox-free deepseek-chat: aliases: @@ -557,10 +557,10 @@ providers: backend: openai-compat base_url: ${BLACKBOX_BASE_URL:-https://api.blackbox.ai} capabilities: - cost_tier: free + cost_tier: cheap latency_tier: fast lane: - benchmark_cluster: free-coding + benchmark_cluster: budget-chat canonical_model: aggregator/blackbox-grok-code-fast cluster: budget-general context_strength: mid @@ -571,15 +571,15 @@ providers: freshness_hint: benchmark and cost assumptions were reviewed recently freshness_status: fresh last_reviewed: '2026-03-22' - name: free-burst - quality_tier: free + name: burst + quality_tier: budget reasoning_strength: mid review_age_days: 1 route_type: aggregator same_model_group: aggregator/blackbox-grok-code-fast tool_strength: mid max_tokens: 8000 - model: blackboxai/x-ai/grok-code-fast-1:free + model: blackboxai/x-ai/grok-code-fast-1 tier: fallback timeout: connect_s: 10 @@ -797,7 +797,7 @@ providers: kilocode: api_key: ${KILOCODE_API_KEY} backend: openai-compat - base_url: ${KILOCODE_BASE_URL:-https://api.kilo.ai/api/gateway/v1} + base_url: ${KILOCODE_BASE_URL:-https://api.kilo.ai/api/gateway} capabilities: cost_tier: free latency_tier: balanced diff --git a/docs/FAIGATE-ROADMAP.md b/docs/FAIGATE-ROADMAP.md index 7f7f7c8..c68e4fe 100644 --- a/docs/FAIGATE-ROADMAP.md +++ b/docs/FAIGATE-ROADMAP.md @@ -37,6 +37,30 @@ The detailed design lives in [Adaptive model orchestration](./ADAPTIVE-ORCHESTRA The next block should stay disciplined: build on the workstation baseline, keep packaging practical, and avoid turning fusionAIze Gate into a sprawling platform. +## Current release target: `v1.12.0` + +The next release should land as a clean operational release, not as another loose pile of runtime slices. + +`v1.12.0` should close around three themes that now fit together: + +- provider source cataloging and alerting as a first-class operator surface +- clearer aggregator behavior for Kilo and BLACKBOX, especially where "free", "budget", "wallet", and explicit paid lanes are easy to conflate +- hardened release automation after the `v1.11.x` release failures + +The release should feel coherent from an operator point of view: + +- Quick Setup, Doctor, Provider Probe, Dashboard, and route preview all explain drift or route pressure using the same language +- Kilo explicit Sonnet/Opus lanes are visible as deliberate routing choices instead of hidden aggregator magic +- release prep, tag validation, and publish dry-runs are boring and repeatable again + +What is intentionally not in scope for `v1.12.0`: + +- the virtual key layer +- gateway-level response caching +- fully automated external provider-source crawling on a long-running schedule + +Those stay as follow-on tracks once the operator surfaces, release path, and aggregator semantics are stable enough to trust. + ## Shipped: `v1.8.0` – `v1.9.1` The adaptive model orchestration foundation is fully in place: diff --git a/docs/examples/faigate-multi-provider-stack.yaml b/docs/examples/faigate-multi-provider-stack.yaml index e440d2d..68425ef 100644 --- a/docs/examples/faigate-multi-provider-stack.yaml +++ b/docs/examples/faigate-multi-provider-stack.yaml @@ -108,12 +108,12 @@ routing_policies: select: prefer_tiers: ["cheap", "default"] -# Optional additional fallback providers with free-tier or free-model paths: +# Optional additional fallback providers with free-tier or low-cost model paths: # # providers: # kilocode: # backend: openai-compat -# base_url: "${KILOCODE_BASE_URL:-https://api.kilo.ai/api/gateway/v1}" +# base_url: "${KILOCODE_BASE_URL:-https://api.kilo.ai/api/gateway}" # api_key: "${KILOCODE_API_KEY}" # model: "z-ai/glm-5:free" # max_tokens: 8000 @@ -123,6 +123,6 @@ routing_policies: # backend: openai-compat # base_url: "${BLACKBOX_BASE_URL:-https://api.blackbox.ai}" # api_key: "${BLACKBOX_API_KEY}" -# model: "blackboxai/x-ai/grok-code-fast-1:free" +# model: "blackboxai/x-ai/grok-code-fast-1" # max_tokens: 8000 # tier: fallback diff --git a/docs/examples/provider-blackbox.yaml b/docs/examples/provider-blackbox.yaml index 3a7eae0..3bf40c9 100644 --- a/docs/examples/provider-blackbox.yaml +++ b/docs/examples/provider-blackbox.yaml @@ -3,9 +3,12 @@ providers: backend: openai-compat base_url: "${BLACKBOX_BASE_URL:-https://api.blackbox.ai}" api_key: "${BLACKBOX_API_KEY}" - model: "blackboxai/x-ai/grok-code-fast-1:free" + model: "blackboxai/x-ai/grok-code-fast-1" max_tokens: 8000 tier: fallback +# `blackbox-free` is kept as a legacy provider id for compatibility. +# The current curated route is low-cost, not guaranteed free. + fallback_chain: - blackbox-free diff --git a/docs/examples/provider-kilocode.env.example b/docs/examples/provider-kilocode.env.example index 9a430c6..2a5595c 100644 --- a/docs/examples/provider-kilocode.env.example +++ b/docs/examples/provider-kilocode.env.example @@ -3,4 +3,4 @@ # explicit key is the more stable operator path. KILOCODE_API_KEY=replace-me -KILOCODE_BASE_URL=https://api.kilo.ai/api/gateway/v1 +KILOCODE_BASE_URL=https://api.kilo.ai/api/gateway diff --git a/docs/examples/provider-kilocode.yaml b/docs/examples/provider-kilocode.yaml index e8e9122..2ddb738 100644 --- a/docs/examples/provider-kilocode.yaml +++ b/docs/examples/provider-kilocode.yaml @@ -1,11 +1,34 @@ providers: kilocode: backend: openai-compat - base_url: "${KILOCODE_BASE_URL:-https://api.kilo.ai/api/gateway/v1}" + base_url: "${KILOCODE_BASE_URL:-https://api.kilo.ai/api/gateway}" api_key: "${KILOCODE_API_KEY}" model: "z-ai/glm-5:free" max_tokens: 8000 tier: fallback + kilo-sonnet: + backend: openai-compat + base_url: "${KILOCODE_BASE_URL:-https://api.kilo.ai/api/gateway}" + api_key: "${KILOCODE_API_KEY}" + model: "anthropic/claude-sonnet-4.6" + max_tokens: 16000 + tier: mid + kilo-opus: + backend: openai-compat + base_url: "${KILOCODE_BASE_URL:-https://api.kilo.ai/api/gateway}" + api_key: "${KILOCODE_API_KEY}" + model: "anthropic/claude-opus-4.6" + max_tokens: 32000 + tier: mid fallback_chain: + - kilo-sonnet + - kilo-opus - kilocode + +# Notes +# - Kilo explicit paid lanes let Gate model Sonnet and Opus separately instead of relying on +# `kilo-auto/*` header hints. +# - If Kilo reports `usage.is_byok: true`, the request is not billed to your Kilo balance. +# In that case verify whether a user- or org-scoped BYOK provider key exists, or treat the +# flag as a Kilo-side billing-signal issue until proven otherwise. diff --git a/faigate/lane_registry.py b/faigate/lane_registry.py index cf17f5d..cc408b3 100644 --- a/faigate/lane_registry.py +++ b/faigate/lane_registry.py @@ -354,14 +354,42 @@ def get_active_model_label(canonical_id: str) -> str: "same_model_group": "aggregator/kilo-glm5-free", "degrade_to": ["aggregator/blackbox-grok-code-fast", "google/gemini-flash-lite"], }, + "kilo-sonnet": { + "family": "kilo", + "name": "sonnet", + "canonical_model": "anthropic/sonnet-4.6", + "route_type": "aggregator", + "cluster": "quality-workhorse", + "benchmark_cluster": "quality-coding", + "quality_tier": "high", + "reasoning_strength": "high", + "context_strength": "high", + "tool_strength": "high", + "same_model_group": "anthropic/sonnet-4.6", + "degrade_to": ["anthropic/haiku-4.5", "deepseek/chat", "google/gemini-flash"], + }, + "kilo-opus": { + "family": "kilo", + "name": "opus", + "canonical_model": "anthropic/opus-4.6", + "route_type": "aggregator", + "cluster": "elite-reasoning", + "benchmark_cluster": "quality-coding", + "quality_tier": "premium", + "reasoning_strength": "high", + "context_strength": "high", + "tool_strength": "high", + "same_model_group": "anthropic/opus-4.6", + "degrade_to": ["anthropic/sonnet-4.6", "openai/gpt-4o", "deepseek/reasoner"], + }, "blackbox-free": { "family": "blackbox", - "name": "free-burst", + "name": "burst", "canonical_model": "aggregator/blackbox-grok-code-fast", "route_type": "aggregator", "cluster": "budget-general", - "benchmark_cluster": "free-coding", - "quality_tier": "free", + "benchmark_cluster": "budget-chat", + "quality_tier": "budget", "reasoning_strength": "mid", "context_strength": "mid", "tool_strength": "mid", @@ -519,6 +547,40 @@ def _lane_binding_with_freshness(binding: dict[str, Any]) -> dict[str, Any]: "free-tier model availability and path behavior should be revalidated regularly", ], }, + "kilo-sonnet": { + "profile": "kilo-openai-compat", + "compatibility": "aggregator", + "probe_confidence": "medium", + "auth_mode": "bearer", + "probe_strategy": "chat", + "probe_payload_kind": "kilo-chat-minimal", + "probe_payload_text": "ping", + "probe_payload_max_tokens": 1, + "models_path": "", + "chat_path": "/chat/completions", + "supports_models_probe": False, + "notes": [ + "aggregator route uses a shallow chat probe instead of assuming /models support", + "paid Kilo routes should be revalidated against current gateway behavior periodically", + ], + }, + "kilo-opus": { + "profile": "kilo-openai-compat", + "compatibility": "aggregator", + "probe_confidence": "medium", + "auth_mode": "bearer", + "probe_strategy": "chat", + "probe_payload_kind": "kilo-chat-minimal", + "probe_payload_text": "ping", + "probe_payload_max_tokens": 1, + "models_path": "", + "chat_path": "/chat/completions", + "supports_models_probe": False, + "notes": [ + "aggregator route uses a shallow chat probe instead of assuming /models support", + "paid Kilo routes should be revalidated against current gateway behavior periodically", + ], + }, "blackbox-free": { "profile": "blackbox-openai-compat", "compatibility": "aggregator", @@ -533,7 +595,10 @@ def _lane_binding_with_freshness(binding: dict[str, Any]) -> dict[str, Any]: "supports_models_probe": False, "notes": [ "aggregator route uses a shallow chat probe instead of assuming /models support", - "free-tier route volatility is high; auth and model availability can shift quickly", + ( + "low-cost BLACKBOX routes can shift quickly in pricing, auth behavior, " + "or model availability" + ), ], }, } @@ -558,10 +623,10 @@ def _lane_binding_with_freshness(binding: dict[str, Any]) -> dict[str, Any]: }, { "route_id": "kilocode/anthropic-opus-4.6", - "provider_name": "kilocode-anthropic-opus", + "provider_name": "kilo-opus", "provider_family": "kilo", "route_type": "aggregator", - "availability": "catalog", + "availability": "configured", "route_group": "same-lane", }, { @@ -573,6 +638,32 @@ def _lane_binding_with_freshness(binding: dict[str, Any]) -> dict[str, Any]: "route_group": "same-lane", }, ], + "anthropic/sonnet-4.6": [ + { + "route_id": "anthropic-direct/sonnet-4.6", + "provider_name": "anthropic-sonnet", + "provider_family": "anthropic", + "route_type": "direct", + "availability": "catalog", + "route_group": "same-lane", + }, + { + "route_id": "kilocode/anthropic-sonnet-4.6", + "provider_name": "kilo-sonnet", + "provider_family": "kilo", + "route_type": "aggregator", + "availability": "configured", + "route_group": "same-lane", + }, + { + "route_id": "openrouter/anthropic-sonnet-4.6", + "provider_name": "openrouter-anthropic-sonnet", + "provider_family": "openrouter", + "route_type": "aggregator", + "availability": "catalog", + "route_group": "same-lane", + }, + ], "openai/gpt-4o": [ { "route_id": "openai-direct/gpt-4o", diff --git a/faigate/provider_catalog.py b/faigate/provider_catalog.py index b571ae6..fbf09b2 100644 --- a/faigate/provider_catalog.py +++ b/faigate/provider_catalog.py @@ -116,11 +116,47 @@ "notes": "Current curated Kilo free-tier model; free and budget tracks can move quickly", "last_reviewed": "2026-03-19", }, + "kilo-sonnet": { + "recommended_model": "anthropic/claude-sonnet-4.6", + "aliases": ["anthropic/claude-sonnet-4.6", "kilo-auto/frontier", "kilo-auto/balanced"], + "track": "stable", + "offer_track": "gateway-paid", + "provider_type": "aggregator", + "auth_modes": ["api_key", "byok"], + "volatility": "medium", + "evidence_level": "official", + "official_source_url": "https://kilo.ai/docs/gateway/models-and-providers", + "signup_url": "https://kilo.ai/", + "watch_sources": [], + "notes": ( + "Kilo paid Sonnet lane; useful as the workhorse path when you want " + "Kilo credits to absorb balanced coding traffic" + ), + "last_reviewed": "2026-03-29", + }, + "kilo-opus": { + "recommended_model": "anthropic/claude-opus-4.6", + "aliases": ["anthropic/claude-opus-4.6", "kilo-auto/frontier"], + "track": "stable", + "offer_track": "gateway-paid", + "provider_type": "aggregator", + "auth_modes": ["api_key", "byok"], + "volatility": "medium", + "evidence_level": "official", + "official_source_url": "https://kilo.ai/docs/gateway/models-and-providers", + "signup_url": "https://kilo.ai/", + "watch_sources": [], + "notes": ( + "Kilo paid Opus lane; useful when expiring Kilo credits should absorb " + "premium reasoning traffic" + ), + "last_reviewed": "2026-03-29", + }, "blackbox-free": { - "recommended_model": "blackboxai/x-ai/grok-code-fast-1:free", - "aliases": ["blackboxai/x-ai/grok-code-fast-1:free"], - "track": "free", - "offer_track": "free", + "recommended_model": "blackboxai/x-ai/grok-code-fast-1", + "aliases": ["blackboxai/x-ai/grok-code-fast-1"], + "track": "cheap", + "offer_track": "credit", "provider_type": "aggregator", "auth_modes": ["api_key"], "volatility": "high", @@ -129,10 +165,10 @@ "signup_url": "https://cloud.blackbox.ai/", "watch_sources": [_COMMUNITY_WATCHLIST], "notes": ( - "Current curated BLACKBOX free-tier path; verify often because free " - "offerings can rotate" + "Legacy provider id for the current low-cost BLACKBOX Grok Code Fast route; " + "verify often because pricing and model availability can rotate" ), - "last_reviewed": "2026-03-19", + "last_reviewed": "2026-03-29", }, "openai-gpt4o": { "recommended_model": "gpt-4o", diff --git a/faigate/registry.py b/faigate/registry.py index 4f588f4..1521e60 100644 --- a/faigate/registry.py +++ b/faigate/registry.py @@ -134,7 +134,7 @@ class ProviderDef(TypedDict, total=False): # ── Kilo Gateway ────────────────────────────────────────────────────── "kilocode": ProviderDef( backend="openai-compat", - base_url="https://api.kilo.ai/api/gateway/v1", + base_url="https://api.kilo.ai/api/gateway", base_url_env="KILOCODE_BASE_URL", api_key_env="KILOCODE_API_KEY", tier="fallback", diff --git a/faigate/router.py b/faigate/router.py index 46713df..a9afa8a 100644 --- a/faigate/router.py +++ b/faigate/router.py @@ -800,6 +800,112 @@ def _freshness_posture_score(lane: dict[str, Any], routing_posture: str) -> int: ).get(freshness_status, 0) +def _kilo_gateway_fit( + lane: dict[str, Any], + ctx: _RoutingContext | None, + routing_posture: str, +) -> dict[str, Any]: + """Model a Kilo-specific frontier strategy using explicit Opus/Sonnet/Free lanes. + + Kilo currently exposes both explicit paid models and `kilo-auto/*` routes. Gate does not yet + attach provider-specific default headers like `x-kilocode-mode`, so we adapt that frontier + behavior by steering between explicit Kilo lanes: + - `kilo-opus` for higher-risk premium planning/review/debugging work + - `kilo-sonnet` for balanced day-to-day coding work + - `kilocode` for eco/free overflow + """ + if not lane or str(lane.get("family") or "").lower() != "kilo": + return {"score": 0, "mode": "", "reasons": []} + + request_insights = dict(getattr(ctx, "request_insights", {}) or {}) if ctx is not None else {} + client_profile = str(getattr(ctx, "client_profile", "") or "") + canonical_model = str(lane.get("canonical_model") or "") + complexity_profile = str(request_insights.get("complexity_profile") or "") + signal_groups = {str(item) for item in (request_insights.get("signal_groups") or []) if item} + planning_context = bool(request_insights.get("planning_context")) + short_complex = bool(request_insights.get("short_complex")) + architecture_risk = bool(request_insights.get("architecture_risk")) + + score = 0 + reasons: list[str] = [] + mode = "" + + if canonical_model == "anthropic/opus-4.6": + mode = "frontier-premium" + if routing_posture == "quality": + score += 6 + reasons.append("Quality posture keeps the premium Kilo Opus lane eligible.") + elif routing_posture == "balanced": + score += 2 + reasons.append("Balanced posture still allows Opus for harder coding work.") + else: + score -= 4 + reasons.append("Eco/free posture pushes Opus behind cheaper Kilo lanes.") + + if client_profile == "opencode" and complexity_profile in {"medium", "high"}: + score += 4 + reasons.append("Opencode complexity makes the premium Kilo frontier lane worthwhile.") + if architecture_risk or {"architecture", "change-risk", "concurrency"} & signal_groups: + score += 4 + reasons.append("Architecture or change-risk signals favored Opus over lighter lanes.") + if planning_context: + score += 2 + reasons.append("Planning language matched Kilo's premium frontier path.") + if short_complex: + score += 2 + reasons.append("Brief but risky request stayed on a stronger Kilo lane.") + + elif canonical_model == "anthropic/sonnet-4.6": + mode = "frontier-balanced" + if routing_posture == "balanced": + score += 6 + reasons.append("Balanced posture lined up with the Kilo Sonnet workhorse lane.") + elif routing_posture == "quality": + score += 3 + reasons.append("Quality posture kept Sonnet as the lower-cost premium-adjacent option.") + elif routing_posture == "eco": + score += 1 + reasons.append("Eco posture still allows Sonnet for moderate coding work.") + else: + score -= 2 + reasons.append("Free posture prefers cheaper Kilo routes over Sonnet.") + + if client_profile == "opencode" and complexity_profile in {"medium", "high"}: + score += 4 + reasons.append("Opencode coding work matched the Kilo Sonnet workhorse lane.") + if {"quality", "debugging", "testing", "security", "database", "devops"} & signal_groups: + score += 2 + reasons.append("Coding quality signals favored Sonnet over free overflow.") + if planning_context: + score += 1 + reasons.append("Planning language still fit the balanced Kilo workhorse.") + + elif canonical_model == "aggregator/kilo-glm5-free": + mode = "frontier-free" + if routing_posture in {"eco", "free"}: + score += 5 + reasons.append("Eco/free posture matched the Kilo free overflow lane.") + elif routing_posture == "balanced": + score += 1 + reasons.append("Balanced posture kept the Kilo free lane as overflow only.") + else: + score -= 5 + reasons.append("Quality posture demoted the Kilo free lane.") + + if client_profile == "opencode" and complexity_profile in {"medium", "high"}: + score -= 5 + reasons.append("Complex opencode work was kept off the free Kilo lane.") + if short_complex or architecture_risk: + score -= 3 + reasons.append("Risky short-form coding work was too sharp for the free Kilo lane.") + + return { + "score": score, + "mode": mode, + "reasons": reasons, + } + + def _merge_select_constraints(*selects: dict[str, Any]) -> dict[str, Any]: """Merge policy-like select mappings without dropping list/dict constraints.""" merged: dict[str, Any] = { @@ -1502,6 +1608,8 @@ def _provider_dimension_details( cost_tier=cost_tier, ) freshness_score = _freshness_posture_score(lane, routing_posture) + kilo_fit = _kilo_gateway_fit(lane, ctx, routing_posture) + kilo_score = int(kilo_fit.get("score") or 0) adaptation_penalty = int(runtime_state.get("penalty", 0) or 0) recovery_score = self._recovery_posture_score(lane, runtime_state, routing_posture) image_score = 0 @@ -1562,6 +1670,7 @@ def _provider_dimension_details( + benchmark_request_score + cost_score + freshness_score + + kilo_score + recovery_score + image_score + image_policy_score @@ -1588,6 +1697,9 @@ def _provider_dimension_details( "freshness_score": freshness_score, "freshness_status": str(lane.get("freshness_status") or ""), "review_age_days": int(lane.get("review_age_days") or -1), + "kilo_score": kilo_score, + "kilo_mode": str(kilo_fit.get("mode") or ""), + "kilo_reasons": list(kilo_fit.get("reasons") or []), "adaptation_penalty": adaptation_penalty, "recovery_score": recovery_score, "image_score": image_score, @@ -1930,6 +2042,9 @@ def _score_provider_candidates( "freshness_score": details["freshness_score"], "freshness_status": details["freshness_status"], "review_age_days": details["review_age_days"], + "kilo_score": details["kilo_score"], + "kilo_mode": details["kilo_mode"], + "kilo_reasons": details["kilo_reasons"], "runtime_penalty": details["runtime_penalty"], "runtime_issue_type": details["runtime_issue_type"], "runtime_recovered_recently": details["runtime_recovered_recently"], diff --git a/tests/test_api_hardening.py b/tests/test_api_hardening.py index 6dc994f..8bfbfa9 100644 --- a/tests/test_api_hardening.py +++ b/tests/test_api_hardening.py @@ -532,6 +532,168 @@ def test_route_preview_includes_route_summary_for_opencode_complexity( ) +def test_route_preview_explains_kilo_frontier_lane_choice(api_client, monkeypatch, tmp_path): + cfg = load_config( + _write_config( + tmp_path, + """ +server: + host: "127.0.0.1" + port: 8090 +providers: + kilo-opus: + backend: openai-compat + base_url: "https://api.kilo.ai/api/gateway" + api_key: "secret" + model: "anthropic/claude-opus-4.6" + tier: mid + capabilities: + cost_tier: premium + kilo-sonnet: + backend: openai-compat + base_url: "https://api.kilo.ai/api/gateway" + api_key: "secret" + model: "anthropic/claude-sonnet-4.6" + tier: mid + capabilities: + cost_tier: standard + kilocode: + backend: openai-compat + base_url: "https://api.kilo.ai/api/gateway" + api_key: "secret" + model: "z-ai/glm-5:free" + tier: fallback + capabilities: + cost_tier: free +client_profiles: + enabled: true + default: generic + rules: + - profile: opencode + match: + header_contains: + x-faigate-client: ["opencode"] + profiles: + generic: {} + opencode: + routing_mode: premium +routing_modes: + enabled: true + default: premium + modes: + premium: + select: + prefer_tiers: ["mid"] +heuristic_rules: + enabled: false + rules: [] +fallback_chain: + - kilo-sonnet + - kilocode +metrics: + enabled: false +""", + ) + ) + kilo_opus = _ProviderStub() + kilo_opus.name = "kilo-opus" + kilo_opus.model = "anthropic/claude-opus-4.6" + kilo_opus.health = types.SimpleNamespace( + healthy=True, + last_check=1.0, + avg_latency_ms=160.0, + last_error="", + to_dict=lambda: { + "name": "kilo-opus", + "healthy": True, + "consecutive_failures": 0, + "avg_latency_ms": 160.0, + "last_error": "", + }, + ) + kilo_sonnet = _ProviderStub() + kilo_sonnet.name = "kilo-sonnet" + kilo_sonnet.model = "anthropic/claude-sonnet-4.6" + kilo_sonnet.health = types.SimpleNamespace( + healthy=True, + last_check=1.0, + avg_latency_ms=130.0, + last_error="", + to_dict=lambda: { + "name": "kilo-sonnet", + "healthy": True, + "consecutive_failures": 0, + "avg_latency_ms": 130.0, + "last_error": "", + }, + ) + kilo_free = _ProviderStub() + kilo_free.name = "kilocode" + kilo_free.model = "z-ai/glm-5:free" + kilo_free.tier = "fallback" + kilo_free.health = types.SimpleNamespace( + healthy=True, + last_check=1.0, + avg_latency_ms=100.0, + last_error="", + to_dict=lambda: { + "name": "kilocode", + "healthy": True, + "consecutive_failures": 0, + "avg_latency_ms": 100.0, + "last_error": "", + }, + ) + + monkeypatch.setattr(main_module, "_config", cfg, raising=False) + monkeypatch.setattr(main_module, "_router", Router(cfg), raising=False) + monkeypatch.setattr( + main_module, + "_providers", + { + "kilo-opus": kilo_opus, + "kilo-sonnet": kilo_sonnet, + "kilocode": kilo_free, + }, + raising=False, + ) + + response = api_client.post( + "/api/route", + headers={"X-faigate-Client": "opencode"}, + json={ + "model": "auto", + "messages": [ + { + "role": "user", + "content": ( + "Design a rollback-safe architecture plan for this refactor " + "under load." + ), + } + ], + }, + ) + + assert response.status_code == 200 + body = response.json() + assert body["decision"]["provider"] == "kilo-opus" + assert body["route_summary"]["selected"]["canonical_model"] == "anthropic/opus-4.6" + assert body["route_summary"]["selected"]["kilo_mode"] == "frontier-premium" + assert any( + "Kilo frontier fit favored frontier-premium" in item + for item in body["route_summary"]["why_selected"] + ) + assert any( + "premium Kilo Opus lane" in item for item in body["route_summary"]["why_selected"] + ) + assert body["route_summary"]["alternatives"] + assert any( + "Kilo lane strategy fit was weaker" in item + for item in body["route_summary"]["alternatives"][0]["why_not_selected"] + ) + + def test_image_edit_rejects_large_upload(api_client): response = api_client.post( "/v1/images/edits", diff --git a/tests/test_provider_catalog.py b/tests/test_provider_catalog.py index 0664514..525b699 100644 --- a/tests/test_provider_catalog.py +++ b/tests/test_provider_catalog.py @@ -117,7 +117,7 @@ def test_provider_catalog_report_warns_on_unofficial_and_volatile_tracks(tmp_pat backend: openai-compat base_url: "https://api.blackbox.ai" api_key: "secret" - model: "blackboxai/x-ai/grok-code-fast-1:free" + model: "blackboxai/x-ai/grok-code-fast-1" fallback_chain: [] metrics: enabled: false @@ -130,7 +130,7 @@ def test_provider_catalog_report_warns_on_unofficial_and_volatile_tracks(tmp_pat assert "catalog-source-unofficial" in codes assert "volatile-offer-configured" in codes - assert report["items"][0]["offer_track"] == "free" + assert report["items"][0]["offer_track"] == "credit" assert report["items"][0]["volatility"] == "high" @@ -258,7 +258,7 @@ def test_provider_discovery_view_supports_link_source_and_offer_track_filters( model: "openrouter/auto" kilocode: backend: openai-compat - base_url: "https://api.kilo.ai/api/gateway/v1" + base_url: "https://api.kilo.ai/api/gateway" api_key: "secret" model: "z-ai/glm-5:free" fallback_chain: [] diff --git a/tests/test_routing_dimensions.py b/tests/test_routing_dimensions.py index cd830f8..d39a534 100644 --- a/tests/test_routing_dimensions.py +++ b/tests/test_routing_dimensions.py @@ -1160,6 +1160,93 @@ async def test_opencode_medium_complexity_suppresses_simple_query_and_promotes_c assert opencode_decision.details["heuristic_match"]["opencode_bias_applied"] is True +@pytest.mark.asyncio +async def test_opencode_balanced_prefers_kilo_sonnet_workhorse_over_free_or_generic_lanes( + tmp_path, +): + cfg = load_config( + _write_config( + tmp_path, + """ +server: + host: "127.0.0.1" + port: 8090 +providers: + kilo-sonnet: + backend: openai-compat + base_url: "https://api.kilo.ai/api/gateway" + api_key: "secret" + model: "anthropic/claude-sonnet-4.6" + tier: mid + capabilities: + cost_tier: standard + deepseek-chat: + backend: openai-compat + base_url: "https://deepseek.example.com/v1" + api_key: "secret" + model: "deepseek-chat" + tier: default + capabilities: + cost_tier: standard + kilocode: + backend: openai-compat + base_url: "https://api.kilo.ai/api/gateway" + api_key: "secret" + model: "z-ai/glm-5:free" + tier: fallback + capabilities: + cost_tier: free +client_profiles: + enabled: true + default: generic + profiles: + generic: {} + opencode: + routing_mode: auto +routing_modes: + enabled: true + default: auto + modes: + auto: + select: + prefer_tiers: ["mid"] +fallback_chain: + - deepseek-chat + - kilocode +metrics: + enabled: false +""", + ) + ) + router = Router(cfg) + + decision = await router.route( + [ + { + "role": "user", + "content": "Need a rollback-safe architecture review for this refactor under load.", + } + ], + model_requested="auto", + client_profile="opencode", + profile_hints=cfg.client_profiles["profiles"]["opencode"], + provider_health={ + "kilo-sonnet": {"healthy": True, "avg_latency_ms": 180, "consecutive_failures": 0}, + "deepseek-chat": {"healthy": True, "avg_latency_ms": 120, "consecutive_failures": 0}, + "kilocode": {"healthy": True, "avg_latency_ms": 110, "consecutive_failures": 0}, + }, + ) + + assert decision.provider_name == "kilo-sonnet" + ranking = decision.details["candidate_ranking"] + assert ranking[0]["provider"] == "kilo-sonnet" + assert ranking[0]["kilo_mode"] == "frontier-balanced" + assert ranking[0]["kilo_score"] > 0 + free_lane = next(item for item in ranking if item["provider"] == "kilocode") + assert free_lane["kilo_mode"] == "frontier-free" + assert free_lane["kilo_score"] < ranking[0]["kilo_score"] + + @pytest.mark.asyncio async def test_opencode_complexity_bias_promotes_single_strong_architecture_hit(tmp_path): cfg = load_config( From 36ddbc83f1de745753afd2e6a20b08c2f26ffbd3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andre=CC=81=20Lange?= Date: Sun, 29 Mar 2026 23:25:43 +0200 Subject: [PATCH 2/2] test(router): align kilo lane expectations --- tests/test_api_hardening.py | 7 ++----- tests/test_wizard.py | 6 ++++-- 2 files changed, 6 insertions(+), 7 deletions(-) diff --git a/tests/test_api_hardening.py b/tests/test_api_hardening.py index 8bfbfa9..71eddde 100644 --- a/tests/test_api_hardening.py +++ b/tests/test_api_hardening.py @@ -667,8 +667,7 @@ def test_route_preview_explains_kilo_frontier_lane_choice(api_client, monkeypatc { "role": "user", "content": ( - "Design a rollback-safe architecture plan for this refactor " - "under load." + "Design a rollback-safe architecture plan for this refactor under load." ), } ], @@ -684,9 +683,7 @@ def test_route_preview_explains_kilo_frontier_lane_choice(api_client, monkeypatc "Kilo frontier fit favored frontier-premium" in item for item in body["route_summary"]["why_selected"] ) - assert any( - "premium Kilo Opus lane" in item for item in body["route_summary"]["why_selected"] - ) + assert any("premium Kilo Opus lane" in item for item in body["route_summary"]["why_selected"]) assert body["route_summary"]["alternatives"] assert any( "Kilo lane strategy fit was weaker" in item diff --git a/tests/test_wizard.py b/tests/test_wizard.py index 789df92..8b4ae5f 100644 --- a/tests/test_wizard.py +++ b/tests/test_wizard.py @@ -1440,7 +1440,8 @@ def test_build_route_add_setup_plan_maps_catalog_route_to_setup_provider(tmp_pat assert "openrouter-fallback" in auto_apply_names assert actionable_names >= { "openrouter-fallback", - "kilocode", + "kilo-opus", + "kilo-sonnet", "blackbox-free", "openai-gpt4o", "deepseek-reasoner", @@ -1493,7 +1494,8 @@ def test_build_route_add_setup_plan_separates_ready_and_manual_additions(tmp_pat assert "openrouter-fallback" in auto_apply_names assert actionable_names >= { "openrouter-fallback", - "kilocode", + "kilo-opus", + "kilo-sonnet", "blackbox-free", "openai-gpt4o", "deepseek-chat",