From 42ab92ac95b1c086dae1a62e784c4176fe6e397c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andre=CC=81=20Lange?= Date: Mon, 30 Mar 2026 13:10:23 +0200 Subject: [PATCH 01/13] feat(bridge): add anthropic streaming sse adapter --- faigate/bridges/anthropic/__init__.py | 2 + faigate/bridges/anthropic/adapter.py | 261 +++++++++++++++++++++++++- faigate/main.py | 56 ++++-- tests/test_anthropic_api.py | 197 +++++++++++++++++-- 4 files changed, 481 insertions(+), 35 deletions(-) diff --git a/faigate/bridges/anthropic/__init__.py b/faigate/bridges/anthropic/__init__.py index 57bb9dd..4d7c63b 100644 --- a/faigate/bridges/anthropic/__init__.py +++ b/faigate/bridges/anthropic/__init__.py @@ -8,6 +8,7 @@ canonical_to_openai_body, dispatch_anthropic_count_tokens, dispatch_anthropic_messages, + openai_sse_to_anthropic, ) __all__ = [ @@ -18,4 +19,5 @@ "canonical_to_openai_body", "dispatch_anthropic_count_tokens", "dispatch_anthropic_messages", + "openai_sse_to_anthropic", ] diff --git a/faigate/bridges/anthropic/adapter.py b/faigate/bridges/anthropic/adapter.py index 03ccb4b..ef97a98 100644 --- a/faigate/bridges/anthropic/adapter.py +++ b/faigate/bridges/anthropic/adapter.py @@ -8,6 +8,8 @@ from __future__ import annotations import json +from collections.abc import AsyncIterator +from dataclasses import dataclass from typing import Any from uuid import uuid4 @@ -32,6 +34,16 @@ ) +@dataclass +class _AnthropicStreamToolState: + """Tracks one streamed tool block while OpenAI-style deltas arrive.""" + + index: int + tool_use_id: str | None = None + name: str | None = None + started: bool = False + + def anthropic_request_to_canonical( request: AnthropicMessagesRequest, *, @@ -39,7 +51,9 @@ def anthropic_request_to_canonical( ) -> CanonicalChatRequest: """Map an Anthropic messages request to the internal gateway model.""" - normalized_headers = {str(key): str(value) for key, value in (headers or {}).items()} + normalized_headers = { + str(key): str(value) for key, value in (headers or {}).items() + } source = ( normalized_headers.get("x-faigate-client") or normalized_headers.get("anthropic-client") @@ -164,7 +178,9 @@ def dispatch_anthropic_count_tokens( ) -def approximate_anthropic_input_tokens(request: CanonicalChatRequest) -> tuple[int, str]: +def approximate_anthropic_input_tokens( + request: CanonicalChatRequest, +) -> tuple[int, str]: """Return a lightweight token estimate for Anthropic bridge requests. The gateway does not yet maintain provider-specific tokenizers or a stable @@ -212,7 +228,8 @@ def _message_to_canonical(message: AnthropicMessage) -> list[CanonicalMessage]: return _user_message_to_canonical(message) if any(block.type != "text" for block in message.content): raise AnthropicBridgeError( - f"Anthropic bridge v1 does not support '{message.role}' messages with non-text blocks" + "Anthropic bridge v1 does not support " + f"'{message.role}' messages with non-text blocks" ) return [ CanonicalMessage( @@ -279,12 +296,18 @@ def _anthropic_tool_use_to_openai_call(block: AnthropicContentBlock) -> dict[str "type": "function", "function": { "name": block.name, - "arguments": json.dumps(block.input or {}, separators=(",", ":"), sort_keys=True), + "arguments": json.dumps( + block.input or {}, + separators=(",", ":"), + sort_keys=True, + ), }, } -def _anthropic_tool_result_to_canonical_message(block: AnthropicContentBlock) -> CanonicalMessage: +def _anthropic_tool_result_to_canonical_message( + block: AnthropicContentBlock, +) -> CanonicalMessage: tool_use_id = block.tool_use_id if not tool_use_id: raise AnthropicBridgeError("Anthropic tool_result blocks require a tool_use_id") @@ -436,3 +459,231 @@ def map_stop_reason_to_anthropic( if normalized in {"length", "max_tokens"}: return "max_tokens" return normalized + + +def anthropic_sse_event(event_type: str, payload: dict[str, Any]) -> bytes: + """Encode one Anthropic-style SSE event.""" + + body = json.dumps(payload, separators=(",", ":")) + return f"event: {event_type}\ndata: {body}\n\n".encode() + + +async def openai_sse_to_anthropic( + stream: AsyncIterator[bytes], + *, + requested_model: str, + resolved_model: str | None = None, +) -> AsyncIterator[bytes]: + """Translate OpenAI-compatible SSE chunks into Anthropic-style message events. + + This intentionally supports the common bridge path first: + + - text deltas + - streamed tool calls represented as function-call deltas + - stop reasons and optional usage payloads + + Unknown or malformed upstream chunks are ignored conservatively instead of + terminating the client-visible stream abruptly. + """ + + message_id = f"msg_{uuid4().hex}" + output_tokens = 0 + usage: dict[str, int] = {"input_tokens": 0, "output_tokens": 0} + text_block_started = False + text_block_closed = False + tool_states: dict[int, _AnthropicStreamToolState] = {} + tool_blocks_closed = False + stop_reason: str | None = None + + yield anthropic_sse_event( + "message_start", + { + "type": "message_start", + "message": { + "id": message_id, + "type": "message", + "role": "assistant", + "model": resolved_model or requested_model, + "content": [], + "stop_reason": None, + "stop_sequence": None, + "usage": dict(usage), + }, + }, + ) + + async for raw_line in stream: + line = raw_line.decode("utf-8", errors="replace").strip() + if not line or not line.startswith("data:"): + continue + payload_text = line[5:].strip() + if not payload_text: + continue + if payload_text == "[DONE]": + break + + try: + payload = json.loads(payload_text) + except json.JSONDecodeError: + continue + + if isinstance(payload, dict) and "error" in payload: + yield anthropic_sse_event( + "error", + { + "type": "error", + "error": payload.get("error") + or {"type": "api_error", "message": "Upstream error"}, + }, + ) + return + + usage_payload = payload.get("usage") or {} + prompt_tokens = int(usage_payload.get("prompt_tokens") or 0) + completion_tokens = int(usage_payload.get("completion_tokens") or 0) + if prompt_tokens: + usage["input_tokens"] = prompt_tokens + if completion_tokens: + usage["output_tokens"] = completion_tokens + + choices = payload.get("choices") or [] + if not choices: + continue + choice = choices[0] or {} + delta = choice.get("delta") or {} + finish_reason = str(choice.get("finish_reason") or "").strip() or None + + text_delta = delta.get("content") + if isinstance(text_delta, str) and text_delta: + if tool_states and not tool_blocks_closed: + for tool_index in sorted(tool_states): + if tool_states[tool_index].started: + yield anthropic_sse_event( + "content_block_stop", + { + "type": "content_block_stop", + "index": _anthropic_tool_index( + tool_index, + text_block_started=True, + ), + }, + ) + tool_blocks_closed = True + if not text_block_started: + yield anthropic_sse_event( + "content_block_start", + { + "type": "content_block_start", + "index": 0, + "content_block": {"type": "text", "text": ""}, + }, + ) + text_block_started = True + output_tokens += _estimate_text_tokens(text_delta) + usage["output_tokens"] = max(usage["output_tokens"], output_tokens) + yield anthropic_sse_event( + "content_block_delta", + { + "type": "content_block_delta", + "index": 0, + "delta": {"type": "text_delta", "text": text_delta}, + }, + ) + + delta_tool_calls = delta.get("tool_calls") or [] + if isinstance(delta_tool_calls, list) and delta_tool_calls: + if text_block_started and not text_block_closed: + yield anthropic_sse_event( + "content_block_stop", + {"type": "content_block_stop", "index": 0}, + ) + text_block_closed = True + for tool_delta in delta_tool_calls: + if not isinstance(tool_delta, dict): + continue + raw_index = int(tool_delta.get("index") or 0) + state = tool_states.setdefault( + raw_index, _AnthropicStreamToolState(index=raw_index) + ) + function = tool_delta.get("function") or {} + if tool_delta.get("id"): + state.tool_use_id = str(tool_delta["id"]) + if function.get("name"): + state.name = str(function["name"]) + if not state.started and state.name: + state.started = True + yield anthropic_sse_event( + "content_block_start", + { + "type": "content_block_start", + "index": _anthropic_tool_index( + raw_index, + text_block_started, + ), + "content_block": { + "type": "tool_use", + "id": state.tool_use_id or f"toolu_{uuid4().hex[:24]}", + "name": state.name, + "input": {}, + }, + }, + ) + raw_arguments = function.get("arguments") + if state.started and isinstance(raw_arguments, str) and raw_arguments: + yield anthropic_sse_event( + "content_block_delta", + { + "type": "content_block_delta", + "index": _anthropic_tool_index( + raw_index, + text_block_started, + ), + "delta": { + "type": "input_json_delta", + "partial_json": raw_arguments, + }, + }, + ) + + if finish_reason: + stop_reason = map_stop_reason_to_anthropic( + finish_reason, + has_tool_calls=bool(tool_states), + ) + + if text_block_started and not text_block_closed: + yield anthropic_sse_event( + "content_block_stop", + {"type": "content_block_stop", "index": 0}, + ) + for tool_index in sorted(tool_states): + state = tool_states[tool_index] + if state.started: + yield anthropic_sse_event( + "content_block_stop", + { + "type": "content_block_stop", + "index": _anthropic_tool_index(tool_index, text_block_started), + }, + ) + + yield anthropic_sse_event( + "message_delta", + { + "type": "message_delta", + "delta": { + "stop_reason": ( + stop_reason or ("tool_use" if tool_states else "end_turn") + ), + "stop_sequence": None, + }, + "usage": dict(usage), + }, + ) + yield anthropic_sse_event("message_stop", {"type": "message_stop"}) + + +def _anthropic_tool_index(raw_index: int, text_block_started: bool) -> int: + """Return the Anthropic content index for one streamed tool block.""" + + return raw_index + (1 if text_block_started else 0) diff --git a/faigate/main.py b/faigate/main.py index dcb7839..8d955ba 100644 --- a/faigate/main.py +++ b/faigate/main.py @@ -34,6 +34,7 @@ anthropic_request_to_canonical, canonical_response_to_anthropic, dispatch_anthropic_count_tokens, + openai_sse_to_anthropic, ) from .canonical import CanonicalChatRequest, CanonicalChatResponse, CanonicalResponseMessage from .config import Config, load_config @@ -3125,12 +3126,6 @@ async def anthropic_messages(request: Request): headers = _collect_anthropic_bridge_headers(request) try: wire_request = parse_anthropic_messages_request(body) - if wire_request.stream: - return _anthropic_error_response( - "Anthropic bridge v1 does not support streaming yet", - error_type="not_supported_error", - status_code=501, - ) canonical_request = anthropic_request_to_canonical(wire_request, headers=headers) canonical_request = _resolve_anthropic_requested_model(canonical_request) execution = await _execute_chat_completion_body(canonical_request.to_openai_body(), headers) @@ -3159,11 +3154,42 @@ async def anthropic_messages(request: Request): status_code=execution.status_code, ) - if execution.stream or not isinstance(execution.result, dict): + bridge_headers = _anthropic_bridge_response_headers( + source=str(canonical_request.metadata.get("source") or "claude-code"), + requested_model=str( + canonical_request.metadata.get("requested_model_original") or wire_request.model + ), + resolved_model=str(canonical_request.requested_model or wire_request.model), + anthropic_version=str(headers.get("anthropic-version") or "") or None, + anthropic_beta=str(headers.get("anthropic-beta") or "") or None, + ) + + if execution.stream: + return StreamingResponse( + openai_sse_to_anthropic( + execution.result, + requested_model=str( + canonical_request.metadata.get("requested_model_original") or wire_request.model + ), + resolved_model=str(canonical_request.requested_model or wire_request.model), + ), + media_type="text/event-stream", + headers={ + "X-faigate-Provider": execution.provider_name, + "X-faigate-Profile": execution.client_profile, + "X-faigate-Layer": execution.decision.layer, + "X-faigate-Rule": execution.decision.rule_name, + "X-faigate-Hooks": ",".join(execution.hook_state.applied_hooks), + "X-faigate-Hook-Errors": str(len(execution.hook_state.errors)), + "x-faigate-trace-id": execution.trace_id or str(uuid.uuid4()), + **bridge_headers, + }, + ) + if not isinstance(execution.result, dict): return _anthropic_error_response( - "Anthropic bridge v1 does not support streaming responses", - error_type="not_supported_error", - status_code=501, + "Anthropic bridge returned an unsupported upstream response shape", + error_type="api_error", + status_code=502, ) canonical_response = _openai_result_to_canonical_response(execution.result) @@ -3182,15 +3208,7 @@ async def anthropic_messages(request: Request): response.headers["X-faigate-Hooks"] = ",".join(execution.hook_state.applied_hooks) response.headers["X-faigate-Hook-Errors"] = str(len(execution.hook_state.errors)) response.headers["x-faigate-trace-id"] = execution.trace_id or str(uuid.uuid4()) - for key, value in _anthropic_bridge_response_headers( - source=str(canonical_request.metadata.get("source") or "claude-code"), - requested_model=str( - canonical_request.metadata.get("requested_model_original") or wire_request.model - ), - resolved_model=str(canonical_request.requested_model or wire_request.model), - anthropic_version=str(headers.get("anthropic-version") or "") or None, - anthropic_beta=str(headers.get("anthropic-beta") or "") or None, - ).items(): + for key, value in bridge_headers.items(): response.headers[key] = value return response diff --git a/tests/test_anthropic_api.py b/tests/test_anthropic_api.py index d5f94a5..ae8f0b6 100644 --- a/tests/test_anthropic_api.py +++ b/tests/test_anthropic_api.py @@ -5,6 +5,7 @@ import importlib import sys import types +from collections.abc import AsyncIterator from contextlib import asynccontextmanager from pathlib import Path @@ -21,6 +22,7 @@ sys.modules.pop("faigate.main", None) import faigate.main as main_module # noqa: E402 +from faigate.bridges.anthropic import openai_sse_to_anthropic # noqa: E402 from faigate.config import load_config # noqa: E402 from faigate.providers import ProviderError # noqa: E402 from faigate.router import Router # noqa: E402 @@ -35,13 +37,23 @@ def _write_config(tmp_path: Path, body: str) -> Path: class _CapturingProviderStub: - def __init__(self, name: str = "cloud-default", *, transport: dict[str, object] | None = None): + def __init__( + self, + name: str = "cloud-default", + *, + transport: dict[str, object] | None = None, + ): self.name = name self.model = "chat-model" self.backend_type = "openai-compat" self.contract = "generic" self.tier = "default" - self.capabilities = {"chat": True, "local": False, "cloud": True, "network_zone": "public"} + self.capabilities = { + "chat": True, + "local": False, + "cloud": True, + "network_zone": "public", + } self.context_window = 128000 self.limits = {"max_input_tokens": 128000, "max_output_tokens": 4096} self.cache = {"mode": "none", "read_discount": False} @@ -116,6 +128,30 @@ async def complete(self, messages, **kwargs): raise ProviderError(self.name, self.status, self.detail) +class _StreamingProviderStub(_CapturingProviderStub): + async def complete(self, messages, **kwargs): + self.calls.append({"messages": messages, **kwargs}) + + async def _iter() -> AsyncIterator[bytes]: + yield ( + b'data: {"id":"chatcmpl-stream","object":"chat.completion.chunk",' + b'"model":"chat-model","choices":[{"index":0,"delta":{"role":"assistant",' + b'"content":"Hello"},"finish_reason":null}]}\n' + ) + yield b"\n" + yield ( + b'data: {"id":"chatcmpl-stream","object":"chat.completion.chunk",' + b'"model":"chat-model","choices":[{"index":0,"delta":{"content":" world"},' + b'"finish_reason":"stop"}],"usage":{"prompt_tokens":11,' + b'"completion_tokens":2,"total_tokens":13}}\n' + ) + yield b"\n" + yield b"data: [DONE]\n" + yield b"\n" + + return _iter() + + @pytest.fixture def anthropic_api_client(tmp_path, monkeypatch): cfg = load_config( @@ -155,9 +191,19 @@ async def _noop_lifespan(_app): monkeypatch.setattr(main_module, "_config", cfg, raising=False) monkeypatch.setattr(main_module, "_router", Router(cfg), raising=False) - monkeypatch.setattr(main_module, "_providers", {"cloud-default": provider}, raising=False) + monkeypatch.setattr( + main_module, + "_providers", + {"cloud-default": provider}, + raising=False, + ) monkeypatch.setattr(main_module, "_metrics", _MetricsStub(), raising=False) - monkeypatch.setattr(main_module.app.router, "lifespan_context", _noop_lifespan, raising=False) + monkeypatch.setattr( + main_module.app.router, + "lifespan_context", + _noop_lifespan, + raising=False, + ) with TestClient(main_module.app) as client: yield client, provider @@ -181,7 +227,10 @@ def test_anthropic_messages_returns_bridge_response(anthropic_api_client): assert body["content"][0]["type"] == "text" assert body["content"][0]["text"] == "anthropic ok" assert provider.calls[0]["extra_body"]["metadata"]["source"] == "claude-code" - assert provider.calls[0]["messages"][0] == {"role": "system", "content": "Use markdown"} + assert provider.calls[0]["messages"][0] == { + "role": "system", + "content": "Use markdown", + } assert response.headers["x-faigate-bridge-surface"] == "anthropic-messages" assert response.headers["x-faigate-bridge-source"] == "claude-code" assert response.headers["x-faigate-bridge-model-requested"] == "claude-sonnet" @@ -238,7 +287,9 @@ def test_anthropic_messages_preserve_version_headers(anthropic_api_client): assert response.headers["x-faigate-bridge-anthropic-beta"] == "tools-2024-04-04" -def test_anthropic_messages_forward_tool_use_and_tool_result_blocks(anthropic_api_client): +def test_anthropic_messages_forward_tool_use_and_tool_result_blocks( + anthropic_api_client, +): client, provider = anthropic_api_client response = client.post( @@ -306,6 +357,75 @@ def test_anthropic_messages_rejects_non_text_blocks(anthropic_api_client): assert "text and tool_result blocks" in body["error"]["message"] +def test_anthropic_messages_support_streaming(tmp_path, monkeypatch): + cfg = load_config( + _write_config( + tmp_path, + """ +server: + host: "127.0.0.1" + port: 8090 +providers: + cloud-default: + backend: openai-compat + base_url: "https://api.example.com/v1" + api_key: "secret" + model: "chat-model" +anthropic_bridge: + enabled: true +fallback_chain: + - cloud-default +metrics: + enabled: false +""", + ) + ) + + @asynccontextmanager + async def _noop_lifespan(_app): + yield + + provider = _StreamingProviderStub() + monkeypatch.setattr(main_module, "_config", cfg, raising=False) + monkeypatch.setattr(main_module, "_router", Router(cfg), raising=False) + monkeypatch.setattr( + main_module, + "_providers", + {"cloud-default": provider}, + raising=False, + ) + monkeypatch.setattr(main_module, "_metrics", _MetricsStub(), raising=False) + monkeypatch.setattr( + main_module.app.router, + "lifespan_context", + _noop_lifespan, + raising=False, + ) + + with TestClient(main_module.app) as client: + with client.stream( + "POST", + "/v1/messages", + json={ + "model": "claude-sonnet", + "stream": True, + "messages": [{"role": "user", "content": "hello"}], + }, + ) as response: + body = b"".join(response.iter_bytes()).decode("utf-8") + + assert response.status_code == 200 + assert response.headers["content-type"].startswith("text/event-stream") + assert response.headers["x-faigate-bridge-surface"] == "anthropic-messages" + assert "event: message_start" in body + assert "event: content_block_start" in body + assert '"type":"text_delta","text":"Hello"' in body + assert '"type":"text_delta","text":" world"' in body + assert '"stop_reason":"end_turn"' in body + assert "event: message_stop" in body + assert provider.calls[0]["stream"] is True + + def test_anthropic_count_tokens_returns_estimate_with_headers(anthropic_api_client): client, _provider = anthropic_api_client @@ -319,7 +439,10 @@ def test_anthropic_count_tokens_returns_estimate_with_headers(anthropic_api_clie { "name": "lookup_doc", "description": "Load one doc", - "input_schema": {"type": "object", "properties": {"id": {"type": "string"}}}, + "input_schema": { + "type": "object", + "properties": {"id": {"type": "string"}}, + }, } ], }, @@ -412,7 +535,12 @@ async def _noop_lifespan(_app): raising=False, ) monkeypatch.setattr(main_module, "_metrics", _MetricsStub(), raising=False) - monkeypatch.setattr(main_module.app.router, "lifespan_context", _noop_lifespan, raising=False) + monkeypatch.setattr( + main_module.app.router, + "lifespan_context", + _noop_lifespan, + raising=False, + ) with TestClient(main_module.app) as client: response = client.post( @@ -483,7 +611,10 @@ async def _noop_lifespan(_app): assert body["error"]["type"] == "rate_limit_error" -def test_anthropic_messages_skip_shared_quota_group_after_quota_failure(tmp_path, monkeypatch): +def test_anthropic_messages_skip_shared_quota_group_after_quota_failure( + tmp_path, + monkeypatch, +): cfg = load_config( _write_config( tmp_path, @@ -533,7 +664,10 @@ async def _noop_lifespan(_app): detail="insufficient_quota on upstream account", transport={"quota_group": "anthropic-main"}, ) - mirror = _CapturingProviderStub("kilo-mirror", transport={"quota_group": "anthropic-main"}) + mirror = _CapturingProviderStub( + "kilo-mirror", + transport={"quota_group": "anthropic-main"}, + ) local = _CapturingProviderStub("local-worker") monkeypatch.setattr(main_module, "_config", cfg, raising=False) @@ -549,7 +683,12 @@ async def _noop_lifespan(_app): raising=False, ) monkeypatch.setattr(main_module, "_metrics", _MetricsStub(), raising=False) - monkeypatch.setattr(main_module.app.router, "lifespan_context", _noop_lifespan, raising=False) + monkeypatch.setattr( + main_module.app.router, + "lifespan_context", + _noop_lifespan, + raising=False, + ) with TestClient(main_module.app) as client: response = client.post( @@ -565,3 +704,39 @@ async def _noop_lifespan(_app): assert mirror.calls == [] assert len(local.calls) == 1 assert response.headers["x-faigate-provider"] == "local-worker" + + +@pytest.mark.asyncio +async def test_openai_sse_to_anthropic_maps_tool_call_deltas(): + async def _iter() -> AsyncIterator[bytes]: + yield ( + b'data: {"id":"chatcmpl-stream","object":"chat.completion.chunk",' + b'"model":"chat-model","choices":[{"index":0,"delta":{"tool_calls":[{' + b'"index":0,"id":"call_1","type":"function","function":{"name":"lookup_doc",' + b'"arguments":"{\\"id\\":"}}]},"finish_reason":null}]}\n' + ) + yield b"\n" + yield ( + b'data: {"id":"chatcmpl-stream","object":"chat.completion.chunk",' + b'"model":"chat-model","choices":[{"index":0,"delta":{"tool_calls":[{' + b'"index":0,"function":{"arguments":"\\"design-note\\"}"}}]},' + b'"finish_reason":"tool_calls"}]}\n' + ) + yield b"\n" + yield b"data: [DONE]\n" + yield b"\n" + + chunks: list[str] = [] + async for chunk in openai_sse_to_anthropic( + _iter(), + requested_model="claude-code", + resolved_model="premium", + ): + chunks.append(chunk.decode("utf-8")) + + body = "".join(chunks) + assert "event: content_block_start" in body + assert '"type":"tool_use","id":"call_1","name":"lookup_doc","input":{}' in body + assert '"type":"input_json_delta","partial_json":"{\\"id\\":' in body + assert '"type":"input_json_delta","partial_json":"\\"design-note\\"}"' in body + assert '"stop_reason":"tool_use"' in body From 2c2a85696a590d657c5318c3e54caccfdb8cc764 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andre=CC=81=20Lange?= Date: Mon, 30 Mar 2026 14:19:35 +0200 Subject: [PATCH 02/13] feat(bridge): accept claude code model ids --- docs/anthropic-bridge.md | 7 +++++++ faigate/config.py | 19 +++++++++++++++++-- faigate/main.py | 29 ++++++++++++++++++++++++++++- tests/test_anthropic_api.py | 26 ++++++++++++++++++++++++++ tests/test_config.py | 8 ++++++++ 5 files changed, 86 insertions(+), 3 deletions(-) diff --git a/docs/anthropic-bridge.md b/docs/anthropic-bridge.md index d2a35e3..c9eb43e 100644 --- a/docs/anthropic-bridge.md +++ b/docs/anthropic-bridge.md @@ -80,6 +80,13 @@ Good first aliases: - `claude-code-fast -> eco` - `claude-code-premium -> premium` +Built-in bridge defaults also recognize common Claude Code model ids such as: + +- `claude-sonnet-4-6[1m]` +- `claude-sonnet-4-6-20251001` +- `claude-opus-4-6[1m]` +- `claude-haiku-4-5-20251001` + That keeps Claude-oriented clients on stable logical targets while Gate can still adapt the real route underneath. ## Limits And Fallback Design diff --git a/faigate/config.py b/faigate/config.py index 424673f..cccb07c 100644 --- a/faigate/config.py +++ b/faigate/config.py @@ -110,6 +110,21 @@ _SUPPORTED_PROVIDER_TRANSPORT_PROBE_STRATEGIES = {"models", "chat", "models_or_chat", "none"} _SUPPORTED_PROVIDER_TRANSPORT_COMPATIBILITY = {"native", "aggregator", "compat-layer"} _SUPPORTED_PROVIDER_TRANSPORT_CONFIDENCE = {"high", "medium", "low"} +_DEFAULT_ANTHROPIC_BRIDGE_MODEL_ALIASES = { + "claude-code": "auto", + "claude-code-fast": "eco", + "claude-code-premium": "premium", + # Claude Code currently sends its own Anthropic model ids. These built-ins + # let the bridge accept them without per-machine operator tuning. + "claude-sonnet-4-6": "anthropic-sonnet", + "claude-sonnet-4-6-20251001": "anthropic-sonnet", + "claude-sonnet-4-6[1m]": "anthropic-sonnet", + "claude-opus-4-6": "anthropic-claude", + "claude-opus-4-6-20251001": "anthropic-claude", + "claude-opus-4-6[1m]": "anthropic-claude", + "claude-haiku-4-5": "anthropic-haiku", + "claude-haiku-4-5-20251001": "anthropic-haiku", +} _CLIENT_PROFILE_PRESET_SPECS: dict[str, dict[str, Any]] = { "openclaw": { @@ -1792,7 +1807,7 @@ def _normalize_anthropic_bridge(data: dict[str, Any]) -> dict[str, Any]: if not isinstance(model_aliases, dict): raise ConfigError("'anthropic_bridge.model_aliases' must be a mapping") - normalized_aliases: dict[str, str] = {} + normalized_aliases: dict[str, str] = dict(_DEFAULT_ANTHROPIC_BRIDGE_MODEL_ALIASES) for key, value in model_aliases.items(): alias = str(key or "").strip() target = str(value or "").strip() @@ -1987,7 +2002,7 @@ def anthropic_bridge(self) -> dict: "enabled": False, "route_prefix": "/v1", "allow_claude_code_hints": True, - "model_aliases": {}, + "model_aliases": dict(_DEFAULT_ANTHROPIC_BRIDGE_MODEL_ALIASES), }, ) diff --git a/faigate/main.py b/faigate/main.py index 8d955ba..83d3cf5 100644 --- a/faigate/main.py +++ b/faigate/main.py @@ -421,7 +421,19 @@ def _resolve_anthropic_requested_model(request: CanonicalChatRequest) -> Canonic """Apply configured Anthropic bridge aliases without changing wire parsing.""" alias_map = _config.anthropic_bridge.get("model_aliases", {}) - requested_model = str(alias_map.get(request.requested_model, request.requested_model)) + requested_model_raw = str(request.requested_model or "").strip() + requested_model = str( + alias_map.get( + requested_model_raw, + alias_map.get( + requested_model_raw.lower(), + alias_map.get( + _normalize_anthropic_model_alias(requested_model_raw), + request.requested_model, + ), + ), + ) + ) if requested_model == request.requested_model: return request metadata = dict(request.metadata) @@ -439,6 +451,21 @@ def _resolve_anthropic_requested_model(request: CanonicalChatRequest) -> Canonic ) +def _normalize_anthropic_model_alias(model_id: str) -> str: + """Return a stable alias key for Claude-native model ids. + + Claude Code sometimes sends model ids with display-oriented suffixes like + ``[1m]``. The bridge should treat those as the same model family for alias + resolution instead of forcing operators to encode every formatting variant. + """ + + normalized = str(model_id or "").strip().lower() + if not normalized: + return "" + normalized = re.sub(r"\[[^\]]+]", "", normalized).strip() + return normalized + + def _collect_operator_context(headers: dict[str, str]) -> tuple[str, str]: """Return operator action and client tag hints from request headers.""" max_chars = int((_config.security or {}).get("max_header_value_chars", 160)) diff --git a/tests/test_anthropic_api.py b/tests/test_anthropic_api.py index ae8f0b6..a15cd6e 100644 --- a/tests/test_anthropic_api.py +++ b/tests/test_anthropic_api.py @@ -260,6 +260,32 @@ def test_anthropic_messages_applies_model_aliases(anthropic_api_client): assert response.headers["x-faigate-bridge-model-resolved"] == "premium" +def test_anthropic_messages_applies_builtin_claude_code_model_aliases( + anthropic_api_client, +): + client, provider = anthropic_api_client + + response = client.post( + "/v1/messages", + json={ + "model": "claude-sonnet-4-6[1m]", + "messages": [ + { + "role": "user", + "content": "Use the Claude Code model name directly", + } + ], + }, + ) + + assert response.status_code == 200 + metadata = provider.calls[0]["extra_body"]["metadata"] + assert metadata["requested_model_original"] == "claude-sonnet-4-6[1m]" + assert metadata["requested_model_resolved"] == "anthropic-sonnet" + assert response.headers["x-faigate-bridge-model-requested"] == "claude-sonnet-4-6-1m" + assert response.headers["x-faigate-bridge-model-resolved"] == "anthropic-sonnet" + + def test_anthropic_messages_preserve_version_headers(anthropic_api_client): client, provider = anthropic_api_client diff --git a/tests/test_config.py b/tests/test_config.py index 71a02fb..1862023 100644 --- a/tests/test_config.py +++ b/tests/test_config.py @@ -411,6 +411,14 @@ def test_anthropic_bridge_defaults_are_exposed(): "claude-code": "auto", "claude-code-fast": "eco", "claude-code-premium": "premium", + "claude-sonnet-4-6": "anthropic-sonnet", + "claude-sonnet-4-6-20251001": "anthropic-sonnet", + "claude-sonnet-4-6[1m]": "anthropic-sonnet", + "claude-opus-4-6": "anthropic-claude", + "claude-opus-4-6-20251001": "anthropic-claude", + "claude-opus-4-6[1m]": "anthropic-claude", + "claude-haiku-4-5": "anthropic-haiku", + "claude-haiku-4-5-20251001": "anthropic-haiku", }, } From 94aee790372c957438303b64e62eca8ed3468616 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andre=CC=81=20Lange?= Date: Mon, 30 Mar 2026 14:27:42 +0200 Subject: [PATCH 03/13] feat(bridge): accept anthropic system text blocks --- faigate/api/anthropic/models.py | 45 ++++++++++++++++++++++++++------- tests/test_anthropic_api.py | 26 +++++++++++++++++++ tests/test_anthropic_bridge.py | 18 +++++++++++++ 3 files changed, 80 insertions(+), 9 deletions(-) diff --git a/faigate/api/anthropic/models.py b/faigate/api/anthropic/models.py index b03b858..ef395a4 100644 --- a/faigate/api/anthropic/models.py +++ b/faigate/api/anthropic/models.py @@ -96,15 +96,7 @@ def parse_anthropic_messages_request(payload: Mapping[str, Any]) -> AnthropicMes raise AnthropicBridgeError("Anthropic messages payload requires a model") raw_system = payload.get("system") - system: str | list[str] | None - if raw_system is None: - system = None - elif isinstance(raw_system, str): - system = raw_system - elif isinstance(raw_system, list) and all(isinstance(item, str) for item in raw_system): - system = list(raw_system) - else: - raise AnthropicBridgeError("'system' must be a string, a list of strings, or null") + system = _parse_system_prompt(raw_system) raw_messages = payload.get("messages", []) if not isinstance(raw_messages, list): @@ -149,6 +141,41 @@ def parse_anthropic_token_count_request(payload: Mapping[str, Any]) -> Anthropic ) +def _parse_system_prompt(raw: Any) -> str | list[str] | None: + """Normalize Anthropic system prompts into the narrow bridge shape. + + Claude-native clients can send system prompts either as a single string or + as a list of text blocks. The bridge keeps the internal representation + intentionally small by flattening text blocks to plain strings. + """ + + if raw is None: + return None + if isinstance(raw, str): + return raw + if not isinstance(raw, list): + raise AnthropicBridgeError( + "'system' must be a string, a list of strings, a list of text blocks, or null" + ) + + normalized: list[str] = [] + for item in raw: + if isinstance(item, str): + normalized.append(item) + continue + if not isinstance(item, Mapping): + raise AnthropicBridgeError( + "'system' blocks must be strings or text block mappings" + ) + block_type = str(item.get("type", "") or "").strip() + if block_type != "text": + raise AnthropicBridgeError( + "Anthropic bridge v1 supports only text blocks in 'system'" + ) + normalized.append(str(item.get("text", "") or "")) + return normalized + + def _parse_message(raw: Any) -> AnthropicMessage: if not isinstance(raw, Mapping): raise AnthropicBridgeError("Anthropic message entries must be mappings") diff --git a/tests/test_anthropic_api.py b/tests/test_anthropic_api.py index a15cd6e..e6e3ebf 100644 --- a/tests/test_anthropic_api.py +++ b/tests/test_anthropic_api.py @@ -236,6 +236,32 @@ def test_anthropic_messages_returns_bridge_response(anthropic_api_client): assert response.headers["x-faigate-bridge-model-requested"] == "claude-sonnet" +def test_anthropic_messages_accept_system_text_blocks(anthropic_api_client): + client, provider = anthropic_api_client + + response = client.post( + "/v1/messages", + json={ + "model": "claude-sonnet", + "system": [ + {"type": "text", "text": "Use markdown"}, + {"type": "text", "text": "Prefer concise patches"}, + ], + "messages": [{"role": "user", "content": "Summarize this"}], + }, + ) + + assert response.status_code == 200 + assert provider.calls[0]["messages"][0] == { + "role": "system", + "content": "Use markdown", + } + assert provider.calls[0]["messages"][1] == { + "role": "system", + "content": "Prefer concise patches", + } + + def test_anthropic_messages_applies_model_aliases(anthropic_api_client): client, provider = anthropic_api_client diff --git a/tests/test_anthropic_bridge.py b/tests/test_anthropic_bridge.py index 853b0aa..dd59c8a 100644 --- a/tests/test_anthropic_bridge.py +++ b/tests/test_anthropic_bridge.py @@ -48,6 +48,24 @@ def test_parse_anthropic_messages_request_accepts_string_content(): assert request.messages[0].content[0].text == "hello" +def test_parse_anthropic_messages_request_accepts_text_block_system_prompt(): + request = parse_anthropic_messages_request( + { + "model": "claude-sonnet", + "system": [ + {"type": "text", "text": "You are a coding assistant."}, + {"type": "text", "text": "Prefer concise diffs."}, + ], + "messages": [{"role": "user", "content": "hello"}], + } + ) + + assert request.system == [ + "You are a coding assistant.", + "Prefer concise diffs.", + ] + + def test_anthropic_request_maps_to_canonical_and_openai_body(): wire_request = parse_anthropic_messages_request( { From 7bee23e743ff0c290b16de8c7938b7bfdac45407 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andre=CC=81=20Lange?= Date: Mon, 30 Mar 2026 14:53:12 +0200 Subject: [PATCH 04/13] feat(bridge): tolerate tool results without ids --- faigate/bridges/anthropic/adapter.py | 12 +++++++++++ tests/test_anthropic_api.py | 31 +++++++++++++++++++++++++++ tests/test_anthropic_bridge.py | 32 ++++++++++++++++++++++++++++ 3 files changed, 75 insertions(+) diff --git a/faigate/bridges/anthropic/adapter.py b/faigate/bridges/anthropic/adapter.py index ef97a98..8922f47 100644 --- a/faigate/bridges/anthropic/adapter.py +++ b/faigate/bridges/anthropic/adapter.py @@ -269,6 +269,18 @@ def _user_message_to_canonical(message: AnthropicMessage) -> list[CanonicalMessa raise AnthropicBridgeError( "Anthropic bridge v1 supports only text and tool_result blocks in user messages" ) + if not block.tool_use_id: + # Claude-native clients can emit tool_result-like user blocks without a + # stable tool_use_id. Falling back to user text keeps the session + # usable instead of hard-failing the whole turn. + pending_text.append( + AnthropicContentBlock( + type="text", + text=_anthropic_tool_result_to_string(block), + metadata={**dict(block.metadata), "tool_result_without_id": True}, + ) + ) + continue if pending_text: canonical_messages.append( CanonicalMessage(role="user", content=_text_blocks_to_string(pending_text)) diff --git a/tests/test_anthropic_api.py b/tests/test_anthropic_api.py index e6e3ebf..0afdd50 100644 --- a/tests/test_anthropic_api.py +++ b/tests/test_anthropic_api.py @@ -386,6 +386,37 @@ def test_anthropic_messages_forward_tool_use_and_tool_result_blocks( } +def test_anthropic_messages_tolerate_tool_result_without_id(anthropic_api_client): + client, provider = anthropic_api_client + + response = client.post( + "/v1/messages", + json={ + "model": "claude-sonnet", + "messages": [ + { + "role": "user", + "content": [ + { + "type": "tool_result", + "content": "Detached tool result text", + } + ], + } + ], + }, + ) + + assert response.status_code == 200 + forwarded_messages = provider.calls[0]["messages"] + assert forwarded_messages == [ + { + "role": "user", + "content": "Detached tool result text", + } + ] + + def test_anthropic_messages_rejects_non_text_blocks(anthropic_api_client): client, _provider = anthropic_api_client diff --git a/tests/test_anthropic_bridge.py b/tests/test_anthropic_bridge.py index dd59c8a..5acd3d7 100644 --- a/tests/test_anthropic_bridge.py +++ b/tests/test_anthropic_bridge.py @@ -145,6 +145,38 @@ def test_anthropic_request_maps_tool_use_and_tool_result_blocks(): } +def test_anthropic_request_degrades_tool_result_without_id_to_user_text(): + wire_request = parse_anthropic_messages_request( + { + "model": "claude-sonnet", + "messages": [ + { + "role": "user", + "content": [ + { + "type": "tool_result", + "content": "Result text without a stable tool id", + } + ], + } + ], + } + ) + + canonical = anthropic_request_to_canonical( + wire_request, + headers={"x-faigate-client": "claude-code"}, + ) + openai_body = canonical.to_openai_body() + + assert openai_body["messages"] == [ + { + "role": "user", + "content": "Result text without a stable tool id", + } + ] + + def test_detached_router_runs_bridge_dispatch(): executor = _FakeExecutor() response = TestClient(_build_test_app(executor)).post( From f0736dd0da957e25d630ec8d6aac011dcc506ef4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andre=CC=81=20Lange?= Date: Mon, 30 Mar 2026 15:11:29 +0200 Subject: [PATCH 05/13] docs(ops): clarify quota domains in probe output --- faigate/wizard.py | 84 ++++++++++++++++ scripts/faigate-doctor | 77 +++++++++++++++ tests/test_menu_helpers.py | 194 +++++++++++++++++++++++++++++++++++++ 3 files changed, 355 insertions(+) diff --git a/faigate/wizard.py b/faigate/wizard.py index 6d0f44f..2c61567 100644 --- a/faigate/wizard.py +++ b/faigate/wizard.py @@ -585,6 +585,58 @@ def _extract_env_reference(value: str) -> str: return "" +def _describe_quota_domain( + *, + backend: str, + route_type: str, + compatibility: str, + billing_mode: str, + quota_group: str, + quota_isolated: bool, + api_key_env: str, +) -> tuple[str, str]: + summary_bits: list[str] = [] + note = "" + + if billing_mode: + summary_bits.append(f"billing={billing_mode}") + elif backend == "anthropic-compat": + summary_bits.append("billing=direct-api") + elif route_type == "aggregator" or compatibility == "aggregator": + summary_bits.append("billing=aggregator-unspecified") + + if quota_group: + summary_bits.append(f"group={quota_group}") + if quota_isolated: + summary_bits.append("isolated=yes") + elif quota_group: + summary_bits.append("isolated=no") + + if backend == "anthropic-compat" and api_key_env == "ANTHROPIC_API_KEY": + note = ( + "uses ANTHROPIC_API_KEY through the Anthropic API; this path is separate from " + "Claude app subscription meters." + ) + elif quota_group and not quota_isolated: + note = ( + f"shares quota domain '{quota_group}' with sibling routes; a 429 here can also " + "hold other routes in the same group." + ) + elif route_type == "aggregator" or compatibility == "aggregator": + if quota_isolated: + note = ( + "marked as quota-isolated; this route can stay in rotation when a separate " + "Anthropic API path is rate-limited." + ) + else: + note = ( + "aggregator route is not marked quota-isolated; do not assume it escapes the " + "same Anthropic account limits unless its billing path is independent." + ) + + return " | ".join(summary_bits), note + + _ENV_REF_RE = re.compile(r"\$\{([^}]+)}") @@ -1232,6 +1284,7 @@ def build_provider_probe_report( backend=str(provider.get("backend", "openai-compat") or "openai-compat"), contract=str(provider.get("contract", "generic") or "generic"), ) + backend = str(provider.get("backend", "openai-compat") or "openai-compat") lane_binding = get_provider_lane_binding(name) api_key = str(provider.get("api_key", "") or "").strip() env_name = _extract_env_reference(api_key) @@ -1351,6 +1404,24 @@ def build_provider_probe_report( or transport_defaults.get("probe_strategy") or "" ), + "billing_mode": str( + request_readiness.get("billing_mode") + or (provider.get("transport") or {}).get("billing_mode") + or "" + ), + "quota_group": str( + request_readiness.get("quota_group") + or (provider.get("transport") or {}).get("quota_group") + or "" + ), + "quota_isolated": bool( + request_readiness.get("quota_isolated") + or (provider.get("transport") or {}).get("quota_isolated") + or False + ), + "route_type": str(lane.get("route_type") or ""), + "backend": backend, + "api_key_env": env_name, "probe_payload": str(request_readiness.get("probe_payload") or ""), "verified_via": str(request_readiness.get("verified_via") or ""), "operator_hint": operator_hint, @@ -1613,6 +1684,19 @@ def render_provider_probe_text(report: dict[str, Any]) -> str: ) + (f" | strategy: {row.get('probe_strategy')}" if row.get("probe_strategy") else "") ) + quota_summary, quota_note = _describe_quota_domain( + backend=str(row.get("backend") or ""), + route_type=str(row.get("route_type") or ""), + compatibility=str(row.get("transport_compatibility") or ""), + billing_mode=str(row.get("billing_mode") or ""), + quota_group=str(row.get("quota_group") or ""), + quota_isolated=bool(row.get("quota_isolated")), + api_key_env=str(row.get("api_key_env") or ""), + ) + if quota_summary: + lines.append(" " + f"quota domain: {quota_summary}") + if quota_note: + lines.append(" " + f"quota note: {quota_note}") if row.get("verified_via"): lines.append(" " + f"verified via: {row['verified_via']}") if row.get("probe_payload"): diff --git a/scripts/faigate-doctor b/scripts/faigate-doctor index da6b4c1..156b264 100755 --- a/scripts/faigate-doctor +++ b/scripts/faigate-doctor @@ -227,6 +227,55 @@ if store is not None and health_raw: return "route" return "inspect" + def describe_quota_domain( + *, + backend: str, + route_type: str, + compatibility: str, + billing_mode: str, + quota_group: str, + quota_isolated: bool, + api_key_env: str, + ) -> tuple[str, str]: + summary_bits = [] + note = "" + if billing_mode: + summary_bits.append(f"billing={billing_mode}") + elif backend == "anthropic-compat": + summary_bits.append("billing=direct-api") + elif route_type == "aggregator" or compatibility == "aggregator": + summary_bits.append("billing=aggregator-unspecified") + if quota_group: + summary_bits.append(f"group={quota_group}") + if quota_isolated: + summary_bits.append("isolated=yes") + elif quota_group: + summary_bits.append("isolated=no") + + if backend == "anthropic-compat" and api_key_env == "ANTHROPIC_API_KEY": + note = ( + "uses ANTHROPIC_API_KEY through the Anthropic API; this path is separate from " + "Claude app subscription meters." + ) + elif quota_group and not quota_isolated: + note = ( + f"shares quota domain '{quota_group}' with sibling routes; a 429 here can also " + "hold other routes in the same group." + ) + elif route_type == "aggregator" or compatibility == "aggregator": + if quota_isolated: + note = ( + "marked as quota-isolated; this route can stay in rotation when a separate " + "Anthropic API path is rate-limited." + ) + else: + note = ( + "aggregator route is not marked quota-isolated; do not assume it escapes " + "the same Anthropic account limits unless its billing path is independent." + ) + + return " | ".join(summary_bits), note + for provider_name, payload in sorted(providers.items()): total += 1 request_readiness = payload.get("request_readiness") or {} @@ -237,6 +286,9 @@ if store is not None and health_raw: profile = str(request_readiness.get("profile") or "") compatibility = str(request_readiness.get("compatibility") or "") confidence = str(request_readiness.get("probe_confidence") or "") + billing_mode = str(request_readiness.get("billing_mode") or "") + quota_group = str(request_readiness.get("quota_group") or "") + quota_isolated = bool(request_readiness.get("quota_isolated")) verified_via = str(request_readiness.get("verified_via") or "") probe_payload = str(request_readiness.get("probe_payload") or "") operator_hint = str(request_readiness.get("operator_hint") or "") @@ -275,6 +327,12 @@ if store is not None and health_raw: mirror_gap_routes += 1 lane_cluster = str(lane.get("cluster") or "") degrade_to = [str(item) for item in (lane.get("degrade_to") or []) if str(item)] + route_type = str(lane.get("route_type") or "") + backend = str(payload.get("backend") or "") + api_key_env = "" + api_key = str(payload.get("api_key") or "").strip() + if api_key.startswith("${") and api_key.endswith("}"): + api_key_env = api_key[2:-1].split(":-", 1)[0].split(":", 1)[0] add_recommendations = get_route_add_recommendations( configured_provider_names=configured_provider_names, canonical_model=canonical_model, @@ -300,6 +358,12 @@ if store is not None and health_raw: "profile": profile, "compatibility": compatibility, "confidence": confidence, + "billing_mode": billing_mode, + "quota_group": quota_group, + "quota_isolated": quota_isolated, + "route_type": route_type, + "backend": backend, + "api_key_env": api_key_env, "verified_via": verified_via, "probe_payload": probe_payload, "operator_hint": operator_hint, @@ -381,6 +445,19 @@ if store is not None and health_raw: ) if row["probe_payload"]: print(f"[ok] request-ready payload: {row['provider']} -> {row['probe_payload']}") + quota_summary, quota_note = describe_quota_domain( + backend=row["backend"], + route_type=row["route_type"], + compatibility=row["compatibility"], + billing_mode=row["billing_mode"], + quota_group=row["quota_group"], + quota_isolated=row["quota_isolated"], + api_key_env=row["api_key_env"], + ) + if quota_summary: + print(f"[ok] request-ready quota: {row['provider']} -> {quota_summary}") + if quota_note: + print(f"[ok] request-ready quota note: {row['provider']} -> {quota_note}") if row["operator_hint"]: print(f"[ok] request-ready next step: {row['provider']} -> {row['operator_hint']}") if row["runtime_penalty"] or row["runtime_issue_type"]: diff --git a/tests/test_menu_helpers.py b/tests/test_menu_helpers.py index 6e898da..01e3c6b 100644 --- a/tests/test_menu_helpers.py +++ b/tests/test_menu_helpers.py @@ -2431,6 +2431,93 @@ def test_faigate_provider_probe_surfaces_catalog_alert_actions(tmp_path: Path): assert "Action summary: fix-now=1 | review-now=1 | inspect=0" in result.stdout +def test_faigate_provider_probe_surfaces_quota_domain_notes(tmp_path: Path): + config_file = tmp_path / "config.yaml" + env_file = tmp_path / "faigate.env" + config_file.write_text( + """ +server: + host: "127.0.0.1" + port: 8090 +providers: + anthropic-sonnet: + backend: anthropic-compat + api_key: ${ANTHROPIC_API_KEY} + base_url: https://api.anthropic.com/v1 + model: claude-sonnet-4-6 + kilo-sonnet: + backend: openai-compat + api_key: ${KILOCODE_API_KEY} + base_url: https://api.kilo.ai/api/gateway + model: anthropic/claude-sonnet-4.6 +fallback_chain: [] +metrics: + enabled: false + db_path: ":memory:" +""".strip(), + encoding="utf-8", + ) + env_file.write_text( + "ANTHROPIC_API_KEY=test-ant\nKILOCODE_API_KEY=test-kilo\n", + encoding="utf-8", + ) + + fake_bin = _write_fake_curl( + tmp_path, + { + "/health": json.dumps( + { + "providers": { + "anthropic-sonnet": { + "healthy": False, + "request_readiness": { + "ready": False, + "status": "rate-limited", + "reason": "429 rate limited upstream", + "profile": "anthropic-native", + "compatibility": "native", + "probe_confidence": "high", + }, + }, + "kilo-sonnet": { + "healthy": True, + "request_readiness": { + "ready": True, + "status": "ready-compat", + "reason": "route looks request-ready", + "profile": "kilo-openai-compat", + "compatibility": "aggregator", + "probe_confidence": "medium", + }, + }, + } + } + ) + }, + ) + + env = os.environ.copy() + env["PATH"] = f"{fake_bin}:{env['PATH']}" + env["FAIGATE_CONFIG_FILE"] = str(config_file) + env["FAIGATE_ENV_FILE"] = str(env_file) + env["FAIGATE_PYTHON"] = sys.executable + env["PYTHONPATH"] = str(REPO_ROOT) + + result = subprocess.run( + ["bash", "scripts/faigate-provider-probe"], + cwd=REPO_ROOT, + env=env, + check=True, + capture_output=True, + text=True, + ) + + assert "quota domain: billing=direct-api" in result.stdout + assert "separate from Claude app subscription meters" in result.stdout + assert "quota domain: billing=aggregator-unspecified" in result.stdout + assert "not marked quota-isolated" in result.stdout + + def test_faigate_doctor_prefers_same_lane_route_before_cluster_degrade(tmp_path: Path): config_file = tmp_path / "config.yaml" env_file = tmp_path / "faigate.env" @@ -2533,6 +2620,113 @@ def test_faigate_doctor_prefers_same_lane_route_before_cluster_degrade(tmp_path: assert "request-ready fallback guidance: same-lane=1 | cluster=0 | family=0" in result.stdout +def test_faigate_doctor_surfaces_quota_domain_notes(tmp_path: Path): + config_file = tmp_path / "config.yaml" + env_file = tmp_path / "faigate.env" + config_file.write_text( + """ +server: {} +providers: + anthropic-sonnet: + backend: anthropic-compat + api_key: ${ANTHROPIC_API_KEY} + base_url: https://api.anthropic.com/v1 + model: claude-sonnet-4-6 + kilo-sonnet: + backend: openai-compat + api_key: ${KILOCODE_API_KEY} + base_url: https://api.kilo.ai/api/gateway + model: anthropic/claude-sonnet-4.6 +""".strip(), + encoding="utf-8", + ) + env_file.write_text( + "ANTHROPIC_API_KEY=test-ant\nKILOCODE_API_KEY=test-kilo\n", + encoding="utf-8", + ) + + fake_bin = _write_fake_curl( + tmp_path, + { + "/health": json.dumps( + { + "status": "ok", + "summary": { + "providers_total": 2, + "providers_healthy": 1, + "providers_unhealthy": 1, + }, + "request_readiness": { + "providers_total": 2, + "providers_ready": 1, + "providers_not_ready": 1, + }, + "providers": { + "anthropic-sonnet": { + "backend": "anthropic-compat", + "api_key": "${ANTHROPIC_API_KEY}", + "healthy": False, + "lane": { + "family": "anthropic", + "canonical_model": "anthropic/sonnet-4.6", + "route_type": "direct", + }, + "request_readiness": { + "ready": False, + "status": "rate-limited", + "reason": "429 rate limited upstream", + "profile": "anthropic-native", + "compatibility": "native", + "probe_confidence": "high", + }, + }, + "kilo-sonnet": { + "backend": "openai-compat", + "api_key": "${KILOCODE_API_KEY}", + "healthy": True, + "lane": { + "family": "kilo", + "canonical_model": "anthropic/claude-sonnet-4.6", + "route_type": "aggregator", + }, + "request_readiness": { + "ready": True, + "status": "ready-compat", + "reason": "route looks request-ready", + "profile": "kilo-openai-compat", + "compatibility": "aggregator", + "probe_confidence": "medium", + }, + }, + }, + } + ), + "/v1/models": json.dumps({"data": []}), + }, + ) + + env = os.environ.copy() + env["PATH"] = f"{fake_bin}:{env['PATH']}" + env["FAIGATE_CONFIG_FILE"] = str(config_file) + env["FAIGATE_ENV_FILE"] = str(env_file) + env["FAIGATE_PYTHON"] = sys.executable + env["PYTHONPATH"] = str(REPO_ROOT) + + result = subprocess.run( + ["bash", "scripts/faigate-doctor"], + cwd=REPO_ROOT, + env=env, + capture_output=True, + text=True, + check=True, + ) + + assert "request-ready quota: anthropic-sonnet -> billing=direct-api" in result.stdout + assert "separate from Claude app subscription meters" in result.stdout + assert "request-ready quota: kilo-sonnet -> billing=aggregator-unspecified" in result.stdout + assert "not marked quota-isolated" in result.stdout + + def test_faigate_doctor_surfaces_provider_source_priority_actions(tmp_path: Path): config_file = tmp_path / "config.yaml" db_path = tmp_path / "faigate.db" From a4be630a3c61bb8c2fa181ccf33d6281caae3619 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andre=CC=81=20Lange?= Date: Mon, 30 Mar 2026 15:22:03 +0200 Subject: [PATCH 06/13] feat(bridge): preserve tool result continuity --- faigate/bridges/anthropic/adapter.py | 19 +++--- tests/test_anthropic_api.py | 89 ++++++++++++++++++++++++++++ tests/test_anthropic_bridge.py | 64 ++++++++++++++++++++ 3 files changed, 164 insertions(+), 8 deletions(-) diff --git a/faigate/bridges/anthropic/adapter.py b/faigate/bridges/anthropic/adapter.py index 8922f47..f7ea74a 100644 --- a/faigate/bridges/anthropic/adapter.py +++ b/faigate/bridges/anthropic/adapter.py @@ -259,7 +259,7 @@ def _assistant_message_to_canonical(message: AnthropicMessage) -> CanonicalMessa def _user_message_to_canonical(message: AnthropicMessage) -> list[CanonicalMessage]: - canonical_messages: list[CanonicalMessage] = [] + tool_messages: list[CanonicalMessage] = [] pending_text: list[AnthropicContentBlock] = [] for block in message.content: if block.type == "text": @@ -281,13 +281,16 @@ def _user_message_to_canonical(message: AnthropicMessage) -> list[CanonicalMessa ) ) continue - if pending_text: - canonical_messages.append( - CanonicalMessage(role="user", content=_text_blocks_to_string(pending_text)) - ) - pending_text = [] - canonical_messages.append(_anthropic_tool_result_to_canonical_message(block)) - if pending_text or not canonical_messages: + tool_messages.append(_anthropic_tool_result_to_canonical_message(block)) + + if not tool_messages: + return [CanonicalMessage(role="user", content=_text_blocks_to_string(pending_text))] + + canonical_messages = list(tool_messages) + if pending_text: + # OpenAI-style tool continuity requires tool messages to follow the + # assistant tool_calls immediately. Preserve any surrounding user text + # as a trailing user turn once all tool_result blocks are emitted. canonical_messages.append( CanonicalMessage(role="user", content=_text_blocks_to_string(pending_text)) ) diff --git a/tests/test_anthropic_api.py b/tests/test_anthropic_api.py index 0afdd50..695300c 100644 --- a/tests/test_anthropic_api.py +++ b/tests/test_anthropic_api.py @@ -312,6 +312,32 @@ def test_anthropic_messages_applies_builtin_claude_code_model_aliases( assert response.headers["x-faigate-bridge-model-resolved"] == "anthropic-sonnet" +def test_anthropic_messages_can_redirect_claude_code_model_ids_to_gateway_routes( + anthropic_api_client, +): + client, provider = anthropic_api_client + main_module._config.anthropic_bridge["model_aliases"]["claude-sonnet-4-6[1m]"] = "kilo-sonnet" + + response = client.post( + "/v1/messages", + json={ + "model": "claude-sonnet-4-6[1m]", + "messages": [ + { + "role": "user", + "content": "Prefer the gateway-managed sonnet lane", + } + ], + }, + ) + + assert response.status_code == 200 + metadata = provider.calls[0]["extra_body"]["metadata"] + assert metadata["requested_model_original"] == "claude-sonnet-4-6[1m]" + assert metadata["requested_model_resolved"] == "kilo-sonnet" + assert response.headers["x-faigate-bridge-model-resolved"] == "kilo-sonnet" + + def test_anthropic_messages_preserve_version_headers(anthropic_api_client): client, provider = anthropic_api_client @@ -417,6 +443,69 @@ def test_anthropic_messages_tolerate_tool_result_without_id(anthropic_api_client ] +def test_anthropic_messages_keep_tool_result_adjacent_before_user_text(anthropic_api_client): + client, provider = anthropic_api_client + + response = client.post( + "/v1/messages", + json={ + "model": "claude-sonnet", + "messages": [ + { + "role": "assistant", + "content": [ + { + "type": "tool_use", + "id": "toolu_lookup", + "name": "lookup_doc", + "input": {"id": "design-note"}, + } + ], + }, + { + "role": "user", + "content": [ + {"type": "text", "text": "Use the most relevant snippet"}, + { + "type": "tool_result", + "tool_use_id": "toolu_lookup", + "content": "Design note loaded", + }, + ], + }, + ], + }, + ) + + assert response.status_code == 200 + forwarded_messages = provider.calls[0]["messages"] + assert forwarded_messages == [ + { + "role": "assistant", + "content": "", + "tool_calls": [ + { + "id": "toolu_lookup", + "type": "function", + "function": { + "name": "lookup_doc", + "arguments": '{"id":"design-note"}', + }, + } + ], + }, + { + "role": "tool", + "content": "Design note loaded", + "tool_call_id": "toolu_lookup", + }, + { + "role": "user", + "content": "Use the most relevant snippet", + }, + ] + + def test_anthropic_messages_rejects_non_text_blocks(anthropic_api_client): client, _provider = anthropic_api_client diff --git a/tests/test_anthropic_bridge.py b/tests/test_anthropic_bridge.py index 5acd3d7..242bb2e 100644 --- a/tests/test_anthropic_bridge.py +++ b/tests/test_anthropic_bridge.py @@ -177,6 +177,70 @@ def test_anthropic_request_degrades_tool_result_without_id_to_user_text(): ] +def test_anthropic_request_keeps_tool_results_adjacent_to_tool_calls(): + wire_request = parse_anthropic_messages_request( + { + "model": "claude-sonnet", + "messages": [ + { + "role": "assistant", + "content": [ + { + "type": "tool_use", + "id": "toolu_lookup", + "name": "lookup_doc", + "input": {"id": "spec"}, + } + ], + }, + { + "role": "user", + "content": [ + {"type": "text", "text": "Here is the context you asked for"}, + { + "type": "tool_result", + "tool_use_id": "toolu_lookup", + "content": "Spec body", + }, + ], + }, + ], + } + ) + + canonical = anthropic_request_to_canonical( + wire_request, + headers={"x-faigate-client": "claude-code"}, + ) + openai_body = canonical.to_openai_body() + + assert openai_body["messages"] == [ + { + "role": "assistant", + "content": "", + "tool_calls": [ + { + "id": "toolu_lookup", + "type": "function", + "function": { + "name": "lookup_doc", + "arguments": '{"id":"spec"}', + }, + } + ], + }, + { + "role": "tool", + "content": "Spec body", + "tool_call_id": "toolu_lookup", + }, + { + "role": "user", + "content": "Here is the context you asked for", + }, + ] + + def test_detached_router_runs_bridge_dispatch(): executor = _FakeExecutor() response = TestClient(_build_test_app(executor)).post( From 2aceb4fac72a596f9573b4cc9248510fc52d0e42 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andre=CC=81=20Lange?= Date: Mon, 30 Mar 2026 19:40:22 +0200 Subject: [PATCH 07/13] feat(dashboard): add operator cockpit and streaming safety --- docs/DASHBOARD-IA.md | 455 +++++++ docs/IMPLEMENTATION-PLAN.md | 340 +++-- docs/LP-BRIEFING.md | 536 ++++++++ faigate/bridges/anthropic/adapter.py | 282 ++-- faigate/dashboard_web.py | 1778 ++++++++++++++++++++++++++ faigate/main.py | 67 +- faigate/providers.py | 17 +- tests/test_anthropic_api.py | 58 +- 8 files changed, 3233 insertions(+), 300 deletions(-) create mode 100644 docs/DASHBOARD-IA.md create mode 100644 docs/LP-BRIEFING.md create mode 100644 faigate/dashboard_web.py diff --git a/docs/DASHBOARD-IA.md b/docs/DASHBOARD-IA.md new file mode 100644 index 0000000..cbf3732 --- /dev/null +++ b/docs/DASHBOARD-IA.md @@ -0,0 +1,455 @@ +# fusionAIze Gate Dashboard IA + +## Why this exists + +Gate already has more runtime substance than the current dashboard surface + communicates. + +We can already answer many operator questions through: + +- `/health` +- `/api/providers` +- `/api/stats` +- `/api/traces` +- `/api/recent` +- provider catalog and refresh guidance +- lane family and request-readiness metadata + +The gap is no longer "do we have data?". +The gap is "do we present the right information in the right order for the + operator's real jobs?". + +This document turns that into a product-surface plan. + +## What to borrow from LLM AIRouter + +The useful parts are not the hosted funnel or the copy. +The useful parts are the clear information architecture and the way the docs + and dashboard map to operator jobs. + +Worth adapting: + +- a clearer top-level split between overview, providers, analytics, stacks, + request history, and setup +- docs that explain one concept per page instead of burying everything in one + long reference +- explicit quickstart, CLI-tools, troubleshooting, cache, circuit-breaker, and + cost-management surfaces +- a product surface that feels like a coherent tool, not a pile of local + endpoints + +Not worth copying directly: + +- the hosted-router onboarding shape +- opaque "stack" abstractions that hide the actual route and YAML reality +- claims about secret storage or hosted key management that do not match Gate's + local-first design + +## What Gate should do differently + +Gate should not become "LLM AIRouter, but local". + +Gate's product advantage is different: + +- local-first +- operator-owned config +- agent-native +- direct, aggregator, and local-worker routes in one scoring core +- canonical lanes instead of provider-string roulette +- explainable routing, not black-box stack magic + +That means the dashboard should optimize for these questions: + +1. Is my gateway safe and request-ready right now? +2. Which clients are burning money or taking the slow path? +3. Which lane and route did Gate choose, and why? +4. Which providers, aggregators, or local workers are unhealthy, stale, or + quota-coupled? +5. What should I change next? + +## Design principles + +### 1. Jobs first, metrics second + +Each page should answer one operator job clearly. + +The operator should not need to mentally reconstruct: + +- where health lives +- where cost lives +- where route explainability lives +- where setup help lives + +### 2. Confidence before detail + +The first screen should answer: + +- service up? +- request-ready? +- fallback pressure? +- premium spend? +- top issue? + +The second step can show tables. + +### 3. Explainability by default + +If Gate used an expensive route, downgraded a lane, skipped a provider, or + protected a premium quota, the UI should say so plainly. + +### 4. Progressive disclosure + +Overview should stay compact. +Provider, route, and request detail pages can go deep. + +### 5. Read-only first, action-linked second + +The web surface should stay operationally safe. +In the near term it should link clearly to helper CLIs and config-edit flows + rather than pretending to be a full control plane. + +### 6. Distinctive but disciplined design + +The dashboard should feel intentionally designed, not like a default admin + table. + +That means: + +- stronger visual hierarchy +- clearer section identity +- better grouping +- more purposeful typography and color +- fewer undifferentiated tables + +It does not mean turning Gate into a heavy frontend app. + +## Recommended dashboard areas + +## 1. Overview + +Primary operator job: + +- "Tell me if Gate is healthy, trustworthy, and worth touching right now." + +Should show: + +- request-ready summary +- healthy vs unhealthy routes +- premium spend and 24h spend +- fallback share +- top alert +- top client +- top lane family +- "priority next" block + +Why this matters: + +- current cards already expose many of these signals +- they are just not grouped around one first-run confidence story yet + +## 2. Providers + +Primary operator job: + +- "Which upstreams exist, which ones are really usable, and which ones are the + weak links?" + +Should show: + +- provider identity and route type +- canonical lane and lane family +- request-readiness +- billing mode +- quota group / quota isolation +- health and runtime penalties +- freshness status +- route-add recommendation when a family is fragile + +Why this matters: + +- this is where Gate differentiates from simple proxy routers +- route-aware aggregator handling has to be visible here, not hidden in traces + +## 3. Clients + +Primary operator job: + +- "Which tools are using Gate, and which of them need cheaper or safer + defaults?" + +Should show: + +- client profile +- client tag +- request / token / cost totals +- failure rate +- average latency +- recommended scenario or routing mode +- top expensive / slow / failure-heavy clients + +Why this matters: + +- Gate is not only provider-native, it is client-native +- Claude Code, opencode, openclaw, Codex, and automation clients should be + legible as distinct traffic shapes + +## 4. Routes + +Primary operator job: + +- "Why did Gate choose this path instead of another one?" + +Should show: + +- chosen canonical lane +- chosen execution route +- selection path +- same-lane fallback vs cluster downgrade +- route penalty / cooldown / recovery state +- why-not-selected summaries for important skipped candidates + +Why this matters: + +- this is the bridge between "smart router" and "trustworthy operator tool" + +## 5. Analytics + +Primary operator job: + +- "Where is my money going, and how is traffic shifting over time?" + +Should show: + +- spend by client +- spend by lane family +- spend by provider / route type +- token trends +- fallback trend +- premium escalation share +- cache hit share +- projected monthly spend + +Why this matters: + +- the current raw stats are strong enough to build this now +- the UI should make cost-management a first-class narrative, not a derived + exercise + +## 6. Request Log + +Primary operator job: + +- "What just happened?" + +Should show: + +- recent requests +- route traces +- selected provider +- model / lane +- layer and rule +- success / failure +- latency +- trace id + +Why this matters: + +- request history is the fastest debugging entry point +- it should be easy to pivot from recent request -> trace -> provider detail + +## 7. Catalog + +Primary operator job: + +- "Are my assumptions about providers still fresh enough to trust?" + +Should show: + +- tracked sources +- due refreshes +- stale benchmark assumptions +- pricing drift alerts +- provider discovery guidance +- explicit next review action + +Why this matters: + +- Gate's provider-catalog and freshness model are already stronger than most + router surfaces +- this should be productized instead of buried + +## 8. Integrations + +Primary operator job: + +- "How do I wire my actual tools into Gate?" + +Should show: + +- Claude Code +- opencode +- openclaw +- Codex CLI +- Cline / Continue / Cursor-style OpenAI-compatible paths +- n8n and scripts + +Should include: + +- copy/paste env vars +- recommended model ids or routing modes +- note on when to use `auto` vs `coding-auto` vs `premium` + +Why this matters: + +- LLM AIRouter is right that tool setup deserves first-class visibility +- Gate already supports more surfaces than "just one CLI", so this should be a + stronger differentiator for us + +## 9. Troubleshooting + +Primary operator job: + +- "Something is wrong. What is the shortest path to the fix?" + +Should show: + +- unauthorized / missing key +- provider unhealthy / request-not-ready +- quota-domain confusion +- slow responses +- model not found +- bridge-compatibility mismatch +- local-worker reachability + +Why this matters: + +- user-centered design is not only about happy-path polish +- it is also about fast recovery when things break + +## Suggested navigation model + +Recommended top-level web navigation: + +- Overview +- Providers +- Clients +- Routes +- Analytics +- Request Log +- Catalog +- Integrations +- Troubleshooting + +Recommended shell helper mapping: + +- `faigate-dashboard --overview` +- `faigate-dashboard --providers` +- `faigate-dashboard --clients` +- `faigate-dashboard --activity` +- `faigate-dashboard --alerts` +- future: + - `--routes` + - `--catalog` + - `--integrations` + - `--troubleshooting` + +## Near-term implementation shape + +### `v1.15.x` first slice + +Ship the information architecture before chasing fancy visuals. + +Minimum meaningful surface: + +- overview +- providers +- clients +- routes +- analytics +- request log + +These can still be read-only and no-build. + +### `v1.15.x` second slice + +Add stronger operator guidance: + +- priority-next cards +- clearer expensive-client and premium-escalation flags +- quota-domain and billing-mode visibility +- route drilldowns with same-lane vs downgraded explanation + +### `v1.15.x` third slice + +Add setup and docs integration: + +- integrations page +- troubleshooting page +- quick links into helper CLIs and relevant docs + +## Design direction + +The current dashboard should evolve from "dense local admin page" to "operator + cockpit". + +Recommended visual moves: + +- stronger left-rail or top-nav sectioning +- overview cards grouped by confidence, spend, traffic, and actions +- more contrast between healthy, degraded, stale, and expensive states +- more deliberate typography pairing between headings and tabular detail +- fewer giant all-purpose tables +- compact detail panels that answer "why this matters" inline + +The target feeling: + +- serious +- technical +- calm under pressure +- more distinctive than default admin templates +- still lightweight enough to ship as part of Gate + +## Licensing boundary + +### Tier A — Apache 2.0 + +Should include: + +- local read-only dashboard +- provider, client, route, and request-log views +- local analytics from Gate's own metrics +- integrations and troubleshooting pages +- catalog freshness and route-readiness visibility + +These features strengthen adoption and product clarity. +They should stay in the open Gate surface. + +### Tier B — source-available or premium packs + +Reasonable later candidates: + +- saved custom views and operator alerts +- richer cost analytics overlays +- policy simulation and route what-if tools +- budget packs and team-aware dashboards + +### Tier C — commercial control plane + +Reasonable later candidates: + +- shared multi-instance dashboards +- org-wide governance and audit +- Grid-backed fleet visibility +- RBAC and centralized rollout controls + +## Success criteria + +The dashboard work is successful when a new operator can answer these questions + within a few minutes: + +1. Which client is costing me the most? +2. Which provider or route is currently the weakest link? +3. Are expensive lanes being used because they are needed, or because my + defaults are bad? +4. Can I explain the last major routing decision? +5. What is the next safest action to improve cost, reliability, or setup? diff --git a/docs/IMPLEMENTATION-PLAN.md b/docs/IMPLEMENTATION-PLAN.md index 0bf7d25..23c11af 100644 --- a/docs/IMPLEMENTATION-PLAN.md +++ b/docs/IMPLEMENTATION-PLAN.md @@ -1,10 +1,16 @@ # Implementation Plan +## Goal + +Turn Gate's existing routing intelligence into the default daily-use behavior +for Claude Code, opencode, openclaw, and similar clients, then expose that +intelligence through a stronger standalone product surface. + ## Scope -This document turns the current roadmap into the next concrete release lines. +This document turns the roadmap into the next concrete release lines. -It is intentionally biased toward the biggest product levers: +It stays biased toward the biggest product levers: - Claude-native daily usability - routing trust and operator explainability @@ -20,8 +26,6 @@ It is not a parking lot for every possible feature. - do routing explainability before stronger live adaptation - keep `v2.x` work behind clean product boundaries -## Release Sequence - ## Parity Definitions ### Full Anthropic parity @@ -51,7 +55,8 @@ Working definition: ### Full Claude Desktop parity -Daily-use parity for Claude Desktop against local Gate where endpoint override is supported. +Daily-use parity for Claude Desktop against local Gate where endpoint override +is supported. Working definition: @@ -61,192 +66,143 @@ Working definition: ## Release Sequence -### `v1.14.x` - Anthropic protocol hardening plus Claude Code daily-use parity - -Goal: - -- move the Anthropic bridge from early-adopter-safe to comfortable for everyday Claude Code use -- close the highest-value Anthropic protocol gaps at the same time - -Why this matters first: - -- it is the biggest remaining daily workflow gap -- it unlocks real Claude Code testing without client reshaping -- it sharpens the product story around one local endpoint for both OpenAI-native and Claude-native clients - -Target slices: - -1. SSE streaming parity for `/v1/messages` -2. stronger Anthropic block compatibility - - richer `tool_use` - - richer `tool_result` - - clearer unsupported block handling -3. stronger client-facing parity behaviors - - stop reasons - - version/beta handling - - error mapping consistency -4. expanded client-near validation - - Claude Code workflow validation - - Claude Desktop workflow notes and smoke steps +### `v1.14.x` - coding auto modes and Claude daily-use trust + +Primary outcome: + +- the cheapest capable route becomes the default for coding traffic instead of + hardwiring Sonnet or Opus too early + +Implementation slices: + +1. map Claude-native model ids to routing intent instead of direct frontier providers + - `claude-sonnet-* -> auto` + - `claude-opus-* -> premium` + - `claude-haiku-* -> eco` +2. add clear coding routing modes + - `coding-auto` + - `coding-fast` + - `coding-premium` +3. align default client profiles + - `claude` + - `opencode` + - `openclaw` + - `codex` +4. harden Anthropic streaming parity + - SSE streaming parity for `/v1/messages` + - mid-stream failure handling + - stop-reason correctness + - stronger `tool_use` / `tool_result` continuity across longer sessions +5. validate against real workflows + - Claude Code + - opencode + - openclaw Success bar: -- Claude Code can be pointed at local Gate and used for normal iterative coding flows with acceptable behavior +- Claude Code can be pointed at local Gate and used for normal iterative coding + flows with acceptable behavior - streaming and tool-oriented workflows do not immediately fall off the happy path - -Deliberately not required: - -- exact provider-side token counting -- full parity claims across every Anthropic client feature - -### `v1.15.x` - Claude Desktop parity or adaptive orchestration trust - -This release should be chosen by evidence, not by taste. - -Decision rule: - -- if Claude Desktop local usage validates as the next real operator lever, do the desktop parity line first -- otherwise take the routing-value line first - -#### Option A: Claude Desktop parity - -Goal: - -- make Claude Desktop a genuinely usable local client against Gate - -Target slices: - -1. endpoint-override and config-path validation for supported desktop flows -2. desktop-specific session and response compatibility hardening -3. clearer local testing and troubleshooting instructions -4. release-readiness validation for desktop workflows - -Success bar: - -- Claude Desktop can be used locally against Gate without feeling like a fragile workaround - -Current gating note: - -- only take this line next if [Claude Desktop feasibility](./CLAUDE-DESKTOP-FEASIBILITY.md) clears the endpoint-override and repeatable-local-workflow bar - -#### Option B: Adaptive orchestration trust - -Goal: - -- make route selection understandable and trustworthy enough that operators rely on it instead of overriding it by hand - -Why it is second: - -- the routing engine already does more than the docs and surfaces make obvious -- the biggest leverage now is visibility, structured lane semantics, and safer aggregator handling - -Target slices: - -1. canonical lane visibility - - route preview speaks in lanes first, transports second - - dashboard and provider views summarize lane families clearly -2. route-aware aggregator handling - - clearer mirror/same-lane semantics - - quota-isolated vs quota-coupled route handling - - stronger aggregator readiness language -3. benchmark and cost clusters - - structured cluster metadata - - freshness/review age - - operator-visible inputs -4. operator explainability - - why lane won - - why route won - - why same-lane mirror was skipped - - why downgrade happened - -Success bar: - -- operators can look at a route decision and understand it without reading source code -- aggregator handling feels intentional instead of “maybe a fallback” - -### `v1.16.x` - Remaining parity or live adaptation under pressure - -Goal: - -- adapt routing under quota, latency, and failure pressure without becoming opaque - -Why it is third: - -- dynamic adaptation is only worth shipping once lane and route semantics are already trusted -- any still-open Claude Desktop parity work should be resolved before promising a broader "full parity" story - -Target slices: - -1. live pressure scoring - - quota pressure - - latency inflation - - failure pressure - - fallback pressure -2. same-lane-first reactions - - mirror route before weaker cluster -3. operator controls - - conservative defaults - - visible adaptation state - - clear cooldown and recovery behavior -4. richer traces - - actual attempted route order - - same-lane fallback vs cluster degrade +- coding clients enter through clear auto modes instead of muddled provider-first + defaults + +Guardrails: + +- do not hide premium escalations +- do not bypass the scoring engine with provider aliases unless the operator + asked for an explicit concrete provider +- keep bridge routing inside the same core, not as a parallel router + +### `v1.15.x` - product surface and operator trust + +Primary outcome: + +- Gate becomes legible as a standalone product, not just a strong core hidden + behind config files +- the dashboard answers operator jobs in a sane order instead of dumping one + long admin page + +Implementation slices: + +1. overview dashboard + - request-readiness first + - provider health + - spend and token trend + - top alerts + - priority-next actions +2. providers surface + - route type + - lane family + - quota group + - billing mode + - readiness +3. clients surface + - cost by client + - latency by client + - profile recommendations + - premium-escalation hotspots +4. routes surface + - chosen lane + - chosen execution route + - same-lane fallback vs downgrade + - why selected / why not selected +5. analytics surface + - cost by client + - cost by stack + - cost by lane family + - routing posture distribution + - downgrade and fallback visibility +6. request-log and route drilldowns + - recent request stream + - trace-first debugging + - provider, client, and lane pivots +7. integrations and troubleshooting surface + - Claude Code + - opencode + - openclaw + - Codex / Cursor / Continue / automation setups + - common symptom-to-fix views + +Design constraints: + +- keep the web surface read-heavy and operationally safe first +- do not hide YAML, traces, or helper CLIs behind opaque UI abstractions +- borrow the clarity of LLM AIRouter's docs and page structure, not its + hosted-router assumptions +- make Gate feel more intentional and polished than a default admin panel + +Reference: + +- [Dashboard IA](./DASHBOARD-IA.md) + +### `v1.16.x` - adaptive orchestration trust + +Primary outcome: + +- richer route decisions without turning Gate into a black box + +Implementation slices: + +1. benchmark and cost cluster refinement +2. live pressure adaptation under quota, latency, and failure +3. stronger operator explainability per routing decision +4. same-lane-first reactions before weaker-cluster degrade +5. richer traces that show attempted route order and downgrade reasons Success bar: -- route switching under pressure is visible, understandable, and mostly unsurprising to operators - -## Deferred Lines - -### Exact provider-side token counting - -Recommendation: - -- defer until after `v1.14.x` -- implement first for the providers that expose reliable count endpoints or deterministic usage feedback -- keep the current bridge estimate until a route-specific exact path exists - -### Virtual keys and per-key budgets - -Recommendation: - -- likely next after the `v1.16.x` trust line if operator-scale controls become the top demand -- this is the prerequisite for later team/org budget hierarchy - -### OTEL trace-context forwarding - -Recommendation: - -- can move earlier than other `v2.x` items -- good candidate for a narrower cross-cutting observability release if demand is high - -### Team and org budget hierarchy - -Recommendation: - -- defer until virtual keys and spend ledger are genuinely stable - -### Grid shared-state coordination - -Recommendation: - -- design the contract early -- implement later -- keep Gate itself free of hard Redis/Postgres coupling - -### Semantic caching - -Recommendation: - -- do not start before exact caching plus usage evidence +- operators can look at a route decision and understand it without reading + source code +- route switching under pressure is visible, understandable, and mostly + unsurprising to operators ## Concrete Next Actions ### Immediate -1. merge docs cleanup and roadmap reset -2. open a focused `v1.14.x` feature branch for bridge streaming and Claude-native parity hardening -3. define the `v1.14.x` validation matrix before the implementation expands +1. finish the `v1.14.x` validation matrix +2. close remaining Claude-native daily-use gaps under real workflows +3. keep product-surface work operator-first and trace-friendly ### `v1.14.x` validation matrix @@ -279,6 +235,32 @@ Use this matrix when deciding whether a release truly moved parity forward: | Exact token counting | Strongly preferred | Helpful | Helpful | | Real client workflow validation | Not sufficient alone | Required | Required | +## Deferred Lines + +### Exact provider-side token counting + +Recommendation: + +- defer until after `v1.14.x` +- implement first for providers that expose reliable count endpoints or + deterministic usage feedback +- keep the current bridge estimate until a route-specific exact path exists + +### Virtual keys and per-key budgets + +Recommendation: + +- likely next after the `v1.16.x` trust line if operator-scale controls become + the top demand + +### OTEL trace-context forwarding + +Recommendation: + +- can move earlier than other `v2.x` items +- good candidate for a narrower cross-cutting observability release if demand + is high + ## Open Questions - which Claude Code workflows are still meaningfully blocked after streaming lands? diff --git a/docs/LP-BRIEFING.md b/docs/LP-BRIEFING.md new file mode 100644 index 0000000..c3651bd --- /dev/null +++ b/docs/LP-BRIEFING.md @@ -0,0 +1,536 @@ +# fusionAIze Gate Landing Page Briefing + +## Purpose + +This briefing turns Gate's current roadmap, dashboard direction, and licensing + logic into a clean landing-page concept for a public Gate product page under + the fusionAIze brand. + +It is written for: + +- ChatGPT or another creative assistant producing first-pass landing-page copy +- frontend design exploration +- future website implementation on `fusionaize.com` + +The goal is not to imitate hosted router products. +The goal is to present Gate as a serious, local-first, agent-native routing + product with stronger operator trust and a clearer product surface. + +## Product truth + +These are the statements the page should be able to support without hype: + +- Gate gives operators one local endpoint for AI traffic +- Gate routes across direct providers, aggregators, and local workers +- Gate supports OpenAI-compatible clients today and an Anthropic bridge as an + opt-in early-adopter line +- Gate already has client profiles, route introspection, provider metadata, and + operational traces +- Gate is local-first and operator-owned +- Gate is moving toward cheapest-capable routing as the default coding posture +- Gate is designed for Claude Code, opencode, openclaw, automation clients, and + serious operator workflows + +Do not claim: + +- full Anthropic parity today +- full Claude Desktop parity today +- hosted control-plane features that do not exist +- team budgets, Grid, or semantic caching as shipped product features + +## Core positioning + +Primary positioning sentence: + +> fusionAIze Gate is the local-first AI gateway that routes every request to +> the cheapest capable path you can trust. + +Short version: + +> One local endpoint. Direct providers, aggregators, and local workers in one +> routing core. + +Expanded positioning: + +> fusionAIze Gate gives Claude-native, OpenAI-compatible, and agent-native +> clients one local endpoint, then routes requests across direct providers, +> aggregator paths, and local workers with explainable policy, health-aware +> fallback, and operator-owned control. + +## Strategic angle + +The page should make one thing obvious: + +Gate is not trying to be: + +- a hosted black-box router +- a proxy that hides routing reality behind "stack" marketing +- a generic agent framework + +Gate is trying to be: + +- a trustworthy local routing plane +- a product operators can actually reason about +- the clean bridge between developer tools, AI clients, and provider reality + +## Audience + +Primary audiences: + +- technical solo operators running multiple AI tools locally +- AI-native developers using Claude Code, opencode, Codex CLI, openclaw, Cline, + Continue, Cursor-style surfaces, and automation tools +- small teams that want local control before they need a hosted control plane + +Secondary audiences: + +- consultancies and internal AI enablement teams +- engineering leaders who care about cost, fallback, and provider flexibility +- people evaluating alternatives to OpenRouter, LiteLLM, ClawRouter, and hosted + router products + +## What makes Gate memorable + +The page needs one unforgettable idea: + +> Gate does not just forward model traffic. It decides the safest and cheapest +> capable route, and shows you why. + +That memorable idea should show up in three layers: + +- routing intelligence +- operator trust +- local-first ownership + +## Competitive thesis + +### vs OpenRouter + +OpenRouter is hosted and black-box from the operator's point of view. + +Gate should contrast with: + +- local-first runtime +- operator-owned credentials and config +- explainable route choice +- direct provider traffic where possible + +Message: + +> Keep your keys. Keep your traffic local. Keep visibility into the route. + +### vs ClawRouter + +ClawRouter is closer philosophically because it is agent-native and routing + aware. + +Gate should differentiate on: + +- deeper provider intelligence +- canonical lanes +- route-aware aggregator handling +- broader operator surface +- stronger local analytics and dashboard direction + +Message: + +> Agent-native routing, but with a fuller operator plane. + +### vs LLM AIRouter + +LLM AIRouter's useful signal is not its hosted model. +The useful signal is its surface clarity: + +- overview +- providers +- stacks/routes +- analytics +- CLI setup +- troubleshooting +- cache and circuit-breaker docs + +Gate should borrow that clarity while staying honest about its different + product boundary. + +Message: + +> Product-grade surface clarity, without hosted-router compromises. + +## Landing-page message hierarchy + +### Hero + +Primary headline options: + +- One Local Endpoint. Every AI Route Under Control. +- Route Every AI Request to the Cheapest Capable Path. +- The Local-First Gateway for Claude, OpenAI, and Agent-Native Workflows. + +Recommended subheadline: + +> fusionAIze Gate routes Claude Code, opencode, openclaw, scripts, and local +> automation across direct providers, aggregators, and local workers with +> explainable routing, health-aware fallback, and operator-owned control. + +Primary CTA: + +- Run Gate locally + +Secondary CTA: + +- Explore the operator dashboard + +Micro-proof line: + +> Local-first. Agent-native. Explainable by default. + +### Section 1: Why Gate exists + +This section should explain the real operator pain: + +- too many tools +- too many provider surfaces +- hidden cost spikes +- weak fallback behavior +- no trustworthy answer to "why did it use this model?" + +Suggested message: + +> Most routers give you one endpoint. +> Gate gives you one endpoint and one routing brain you can inspect. + +### Section 2: Cheapest capable routing + +This is the product promise the page should lean into hardest. + +The page should explain that Gate is moving toward: + +- `eco` +- `auto` +- `premium` +- `coding-auto` +- `coding-fast` +- `coding-premium` + +These should be explained as routing intent, not provider lock-in. + +Suggested framing: + +- simple prompts go to cheaper capable lanes +- coding defaults stay cost-aware +- premium lanes are used when complexity or reliability justify them +- explicit client model picks still work, but resolve through routing intent + +### Section 3: One routing core across surfaces + +This section should make Gate feel broader than "just another CLI proxy". + +Show: + +- Claude Code +- OpenAI-compatible tools +- opencode +- openclaw +- Codex CLI +- scripts and automations +- local workers + +Suggested message: + +> One routing core for Claude-native, OpenAI-compatible, and agent-native +> clients. + +### Section 4: Operator dashboard + +This is where the current dashboard redesign matters. + +The page should show Gate's dashboard as a real product surface with distinct + operator jobs: + +- Overview +- Providers +- Clients +- Routes +- Analytics +- Catalog +- Integrations + +The pitch should not be "beautiful charts". +The pitch should be: + +> See health, spend, route choice, and integration status in one local cockpit. + +### Section 5: Why local-first matters + +This section should hit security and ownership clearly: + +- no hosted dependency required +- operator-owned configuration +- no black-box stack abstraction +- direct provider traffic when configured +- local observability + +Suggested message: + +> Gate is designed for operators who want control without giving up routing +> intelligence. + +### Section 6: Explainability and trust + +This section should make the route-intelligence story concrete: + +- chosen lane +- chosen route +- same-lane fallback vs downgrade +- billing mode and quota domain visibility +- provider freshness and readiness + +Suggested message: + +> If Gate chose an expensive route, a fallback route, or a weaker route, the +> operator should be able to see why. + +### Section 7: Product stack and licensing + +This section should clarify the fusionAIze strategy without sounding defensive. + +Suggested framing: + +- Gate core stays open and adoption-friendly +- advanced policy, control-plane, and org-governance layers belong in higher + tiers +- the product boundary is deliberate, not accidental + +## Dashboard direction for the LP and product surface + +The new Gate dashboard should feel like: + +- a calm operator cockpit +- a financial or trading dashboard in discipline, not in hype +- stronger visual hierarchy than a default admin panel +- serious, trustworthy, and brand-aligned + +It should not feel like: + +- a neon gamer UI +- a startup toy +- a default Tailwind admin clone +- a fake enterprise dashboard with lots of empty chrome + +### Recommended visual direction + +Use the fusionAIze brand in a dark operator variant: + +- deep blue-black backgrounds derived from the dark/navy family +- `#0052CC` as the primary electric action color +- `#C4D900` as the sparing intelligence / success / progress accent +- `#FFAA19` only for action or alert emphasis +- restrained glass, glow, and gradient treatment +- sharp typography and dense-but-readable data layout + +The closest reference mood is: + +- financial dashboard +- trading terminal +- control room + +Not: + +- cyberpunk chaos +- purple SaaS gradient wallpaper + +### Recommended dashboard IA + +The LP and the real product surface should align around these areas: + +1. Overview +2. Providers +3. Clients +4. Routes +5. Analytics +6. Request Log +7. Catalog +8. Integrations +9. Troubleshooting + +This information architecture should also drive future docs and product copy. + +## What to adapt from LLM AIRouter + +Useful to adapt: + +- one concept per doc page +- visible quickstart +- visible CLI tools page +- visible providers page +- visible stacks/routes page +- visible cost-management, cache, circuit-breaker, troubleshooting, and API + reference surfaces +- dashboard broken into sane operator jobs + +Do not adapt directly: + +- hosted-router setup funnel +- claims around server-side key custody as the core story +- opaque "stack" abstraction if it hides the real route or YAML truth + +Gate should translate these into local-first equivalents: + +- Integrations instead of hosted "connect provider" +- Route views instead of black-box stack cards +- Troubleshooting that respects local runtime and helper CLIs +- Dashboard views that expose route, lane, and quota reality + +## Recommended LP structure + +1. Hero +2. Trusted by operators who need one endpoint and real control +3. Cheapest capable routing explained simply +4. Supported surfaces and clients +5. Dashboard / operator cockpit +6. Local-first security and ownership +7. Explainable routing and provider intelligence +8. Integration examples +9. Open-core product boundary +10. CTA and install path + +## Recommended proof points + +Use proof points that are operational and concrete: + +- one local endpoint +- direct + aggregator + local-worker routes +- cheapest-capable routing modes +- route introspection +- filtered traces and recent requests +- provider readiness and quota-domain visibility +- Anthropic bridge as opt-in early-adopter line +- dashboard surfaces for overview, providers, clients, routes, analytics, and + integrations + +Avoid proof points that overclaim: + +- "full parity" +- "perfect token counting" +- "multi-instance enterprise control plane" + +## Recommended screenshots or product visuals + +The most useful future LP visuals would be: + +1. Overview cockpit + - request-ready + - premium spend + - top client + - top issue + +2. Providers view + - route type + - billing mode + - quota group + - readiness + +3. Routes view + - chosen lane + - chosen route + - why selected + - why not selected + +4. Integrations view + - Claude Code + - OpenAI-compatible tools + - opencode / openclaw + - setup snippets + +5. Analytics view + - cost by client + - cost by lane family + - traffic trend + - fallback share + +## LP copy themes + +Themes worth repeating: + +- cheapest capable by default +- operator-owned routing +- local-first and secure +- agent-native +- explainable, not black-box +- direct, aggregator, and local in one core +- built for real tools, not only demos + +Themes to avoid overusing: + +- "revolutionary" +- "all-in-one AI platform" +- "autonomous everything" +- vague AI-consulting phrasing + +## Licensing and product stack boundary + +This should stay clean and easy to communicate. + +### Tier A — Apache 2.0 + +Open Gate should include: + +- local runtime +- routing core +- bridge surfaces +- local dashboard +- provider, client, route, and request-log views +- integrations and troubleshooting pages +- client profiles, routing modes, stacks, traces, helper CLIs + +### Tier B — source-available / premium packs + +Reasonable later premium layers: + +- advanced saved policies +- richer analytics overlays +- team-aware budget and retention packs +- advanced observability packs +- policy simulation and what-if tooling + +### Tier C — commercial control plane + +Reasonable later commercial layers: + +- multi-instance coordination +- org governance +- RBAC and audit +- Grid-backed shared-state features +- centralized rollout and fleet visibility + +Short product-stack phrasing: + +> Open what accelerates adoption. Protect what creates operational moat. + +## Suggested prompt for ChatGPT or design exploration + +Use this prompt as the starting point: + +> Design a landing page for fusionAIze Gate, a local-first AI gateway for +> Claude-native, OpenAI-compatible, and agent-native clients. The page should +> communicate that Gate routes each request to the cheapest capable path across +> direct providers, aggregators, and local workers, while keeping operator +> control and explainability. The visual style should feel like a dark +> financial-dashboard cockpit: calm, precise, technical, premium, and +> trustworthy. Use the fusionAIze brand palette with deep blues, `#0052CC` as +> the primary action color, `#C4D900` as a restrained accent, and `#FFAA19` +> only for emphasis. Avoid generic AI SaaS aesthetics, purple gradients, and +> cookie-cutter admin UI. The page should include sections for hero, cheapest +> capable routing, supported clients, operator dashboard, local-first security, +> explainable routing, integrations, and open-core product boundary. + +## Success criteria + +The landing page is successful when a technical visitor understands within a + few seconds: + +1. Gate is local-first +2. Gate routes intelligently instead of just proxying blindly +3. Gate works across real coding and agent-native clients +4. Gate is cheaper-capable and explainable +5. Gate has a real product surface, not just YAML and raw endpoints diff --git a/faigate/bridges/anthropic/adapter.py b/faigate/bridges/anthropic/adapter.py index f7ea74a..86e1606 100644 --- a/faigate/bridges/anthropic/adapter.py +++ b/faigate/bridges/anthropic/adapter.py @@ -42,6 +42,7 @@ class _AnthropicStreamToolState: tool_use_id: str | None = None name: str | None = None started: bool = False + closed: bool = False def anthropic_request_to_canonical( @@ -527,144 +528,201 @@ async def openai_sse_to_anthropic( }, ) - async for raw_line in stream: - line = raw_line.decode("utf-8", errors="replace").strip() - if not line or not line.startswith("data:"): - continue - payload_text = line[5:].strip() - if not payload_text: - continue - if payload_text == "[DONE]": - break - - try: - payload = json.loads(payload_text) - except json.JSONDecodeError: - continue + try: + async for raw_line in stream: + line = raw_line.decode("utf-8", errors="replace").strip() + if not line or not line.startswith("data:"): + continue + payload_text = line[5:].strip() + if not payload_text: + continue + if payload_text == "[DONE]": + break - if isinstance(payload, dict) and "error" in payload: - yield anthropic_sse_event( - "error", - { - "type": "error", - "error": payload.get("error") - or {"type": "api_error", "message": "Upstream error"}, - }, - ) - return - - usage_payload = payload.get("usage") or {} - prompt_tokens = int(usage_payload.get("prompt_tokens") or 0) - completion_tokens = int(usage_payload.get("completion_tokens") or 0) - if prompt_tokens: - usage["input_tokens"] = prompt_tokens - if completion_tokens: - usage["output_tokens"] = completion_tokens - - choices = payload.get("choices") or [] - if not choices: - continue - choice = choices[0] or {} - delta = choice.get("delta") or {} - finish_reason = str(choice.get("finish_reason") or "").strip() or None + try: + payload = json.loads(payload_text) + except json.JSONDecodeError: + continue - text_delta = delta.get("content") - if isinstance(text_delta, str) and text_delta: - if tool_states and not tool_blocks_closed: + if isinstance(payload, dict) and "error" in payload: + if text_block_started and not text_block_closed: + yield anthropic_sse_event( + "content_block_stop", + {"type": "content_block_stop", "index": 0}, + ) + text_block_closed = True for tool_index in sorted(tool_states): - if tool_states[tool_index].started: + state = tool_states[tool_index] + if state.started and not state.closed: yield anthropic_sse_event( "content_block_stop", { "type": "content_block_stop", "index": _anthropic_tool_index( tool_index, - text_block_started=True, + text_block_started, ), }, ) + state.closed = True tool_blocks_closed = True - if not text_block_started: yield anthropic_sse_event( - "content_block_start", + "error", { - "type": "content_block_start", - "index": 0, - "content_block": {"type": "text", "text": ""}, + "type": "error", + "error": payload.get("error") + or {"type": "api_error", "message": "Upstream error"}, }, ) - text_block_started = True - output_tokens += _estimate_text_tokens(text_delta) - usage["output_tokens"] = max(usage["output_tokens"], output_tokens) - yield anthropic_sse_event( - "content_block_delta", - { - "type": "content_block_delta", - "index": 0, - "delta": {"type": "text_delta", "text": text_delta}, - }, - ) - - delta_tool_calls = delta.get("tool_calls") or [] - if isinstance(delta_tool_calls, list) and delta_tool_calls: - if text_block_started and not text_block_closed: - yield anthropic_sse_event( - "content_block_stop", - {"type": "content_block_stop", "index": 0}, - ) - text_block_closed = True - for tool_delta in delta_tool_calls: - if not isinstance(tool_delta, dict): - continue - raw_index = int(tool_delta.get("index") or 0) - state = tool_states.setdefault( - raw_index, _AnthropicStreamToolState(index=raw_index) - ) - function = tool_delta.get("function") or {} - if tool_delta.get("id"): - state.tool_use_id = str(tool_delta["id"]) - if function.get("name"): - state.name = str(function["name"]) - if not state.started and state.name: - state.started = True + return + + usage_payload = payload.get("usage") or {} + prompt_tokens = int(usage_payload.get("prompt_tokens") or 0) + completion_tokens = int(usage_payload.get("completion_tokens") or 0) + if prompt_tokens: + usage["input_tokens"] = prompt_tokens + if completion_tokens: + usage["output_tokens"] = completion_tokens + + choices = payload.get("choices") or [] + if not choices: + continue + choice = choices[0] or {} + delta = choice.get("delta") or {} + finish_reason = str(choice.get("finish_reason") or "").strip() or None + + text_delta = delta.get("content") + if isinstance(text_delta, str) and text_delta: + if tool_states and not tool_blocks_closed: + for tool_index in sorted(tool_states): + state = tool_states[tool_index] + if state.started and not state.closed: + yield anthropic_sse_event( + "content_block_stop", + { + "type": "content_block_stop", + "index": _anthropic_tool_index( + tool_index, + text_block_started=True, + ), + }, + ) + state.closed = True + tool_blocks_closed = True + if not text_block_started: yield anthropic_sse_event( "content_block_start", { "type": "content_block_start", - "index": _anthropic_tool_index( - raw_index, - text_block_started, - ), - "content_block": { - "type": "tool_use", - "id": state.tool_use_id or f"toolu_{uuid4().hex[:24]}", - "name": state.name, - "input": {}, - }, + "index": 0, + "content_block": {"type": "text", "text": ""}, }, ) - raw_arguments = function.get("arguments") - if state.started and isinstance(raw_arguments, str) and raw_arguments: + text_block_started = True + output_tokens += _estimate_text_tokens(text_delta) + usage["output_tokens"] = max(usage["output_tokens"], output_tokens) + yield anthropic_sse_event( + "content_block_delta", + { + "type": "content_block_delta", + "index": 0, + "delta": {"type": "text_delta", "text": text_delta}, + }, + ) + + delta_tool_calls = delta.get("tool_calls") or [] + if isinstance(delta_tool_calls, list) and delta_tool_calls: + if text_block_started and not text_block_closed: yield anthropic_sse_event( - "content_block_delta", - { - "type": "content_block_delta", - "index": _anthropic_tool_index( - raw_index, - text_block_started, - ), - "delta": { - "type": "input_json_delta", - "partial_json": raw_arguments, - }, - }, + "content_block_stop", + {"type": "content_block_stop", "index": 0}, ) + text_block_closed = True + for tool_delta in delta_tool_calls: + if not isinstance(tool_delta, dict): + continue + raw_index = int(tool_delta.get("index") or 0) + state = tool_states.setdefault( + raw_index, _AnthropicStreamToolState(index=raw_index) + ) + function = tool_delta.get("function") or {} + if tool_delta.get("id"): + state.tool_use_id = str(tool_delta["id"]) + if function.get("name"): + state.name = str(function["name"]) + if not state.started and state.name: + state.started = True + state.tool_use_id = state.tool_use_id or f"toolu_{uuid4().hex[:24]}" + yield anthropic_sse_event( + "content_block_start", + { + "type": "content_block_start", + "index": _anthropic_tool_index( + raw_index, + text_block_started, + ), + "content_block": { + "type": "tool_use", + "id": state.tool_use_id, + "name": state.name, + "input": {}, + }, + }, + ) + raw_arguments = function.get("arguments") + if state.started and isinstance(raw_arguments, str) and raw_arguments: + yield anthropic_sse_event( + "content_block_delta", + { + "type": "content_block_delta", + "index": _anthropic_tool_index( + raw_index, + text_block_started, + ), + "delta": { + "type": "input_json_delta", + "partial_json": raw_arguments, + }, + }, + ) - if finish_reason: - stop_reason = map_stop_reason_to_anthropic( - finish_reason, - has_tool_calls=bool(tool_states), + if finish_reason: + stop_reason = map_stop_reason_to_anthropic( + finish_reason, + has_tool_calls=bool(tool_states), + ) + except Exception as exc: + if text_block_started and not text_block_closed: + yield anthropic_sse_event( + "content_block_stop", + {"type": "content_block_stop", "index": 0}, ) + text_block_closed = True + for tool_index in sorted(tool_states): + state = tool_states[tool_index] + if state.started and not state.closed: + yield anthropic_sse_event( + "content_block_stop", + { + "type": "content_block_stop", + "index": _anthropic_tool_index( + tool_index, + text_block_started, + ), + }, + ) + state.closed = True + yield anthropic_sse_event( + "error", + { + "type": "error", + "error": { + "type": "api_error", + "message": f"Streaming request failed unexpectedly: {exc}", + }, + }, + ) + return if text_block_started and not text_block_closed: yield anthropic_sse_event( @@ -673,7 +731,7 @@ async def openai_sse_to_anthropic( ) for tool_index in sorted(tool_states): state = tool_states[tool_index] - if state.started: + if state.started and not state.closed: yield anthropic_sse_event( "content_block_stop", { diff --git a/faigate/dashboard_web.py b/faigate/dashboard_web.py new file mode 100644 index 0000000..df40569 --- /dev/null +++ b/faigate/dashboard_web.py @@ -0,0 +1,1778 @@ +"""Built-in operator dashboard HTML for fusionAIze Gate.""" + +# ruff: noqa: E501 + +DASHBOARD_HTML = ''' + + + + +fusionAIze Gate + + + +
+ + +
+
+
+
+
Gateway health is loading
+

Dark-mode operator cockpit

+

fusionAIze Gate now reads more like a live routing desk than a local admin page: health confidence first, spend pressure second, explainability always visible, and setup guidance close to the traffic it affects.

+
+
+ + + + Updated +
+
+
+
+
+ Priority next + loading +
+
Loading priority path
+
The gateway is calculating the next safest operator move.
+
+
+
+
Snapshot
+
+
+
+
+ +
+
+
+ Filters and live scope +

Narrow the cockpit to one provider, one client family, one layer, or one success state. The same filter scope drives overview, tables, traces, and request history.

+
+ All traffic +
+
+ + + + + + +
+
+
No active filters
+
+ + +
+
+
+ +
+
+
+
+
+
+

What needs attention

+

High-signal guidance from health, route pressure, catalog freshness, and operator events.

+
+ Loading +
+
+
+
+
+
+

Spend and traffic

+

Lightweight built-in charts keep the overview tactile without introducing a frontend build stack.

+
+
+ +
+
+
+
+
+
+

Lane families

+

Which canonical families are carrying traffic, under cooldown pressure, or due for strengthening.

+
+
+
+
+
+
+
+

Recent request log

+

The shortest path from “something feels off” to a specific request, route, and status.

+
+
+
TimeProviderLaneClientLatencyCostStatus
+
+
+
+ +
+
+
+
+
+

Provider fleet

+

Request-readiness, billing mode, quota coupling, runtime penalties, and lane context in one table.

+
+ Inventory +
+
ProviderStatusLaneRouteBilling + quotaRequestsCostLatencyOperator note
+
+
+ +
+
+
+
+
+

Client posture

+

See which tools are expensive, slow, or failure-heavy, then decide where `coding-auto`, `eco`, or `premium` should really land.

+
+
+
ClientProfileRequestsSuccessTokensCostCost / reqLatencyProviders
+
+
+ +
+
+
+
+
+

Selection paths

+

Same-lane, same-cluster, and fallback-chain behavior should be legible, not magical.

+
+
+
+
+
+
+
+

Route pressure

+

Cooldown and recovery signals reveal where resilience is real and where it still depends on operator help.

+
+
+
+
+
+
+
+
+

Routing breakdown

+

Chosen rule, selected provider, canonical lane, and observed runtime state for successful traffic.

+
+
+
LayerRuleProviderLane familySelection pathRequestsCostLatency
+
+
+ +
+
+
+
+
+

30-day cost trend

+

Financial-desk style long view of requests, spend, and failure pressure.

+
+
+
+
+
+
+
+

24-hour traffic pulse

+

Short-window request flow for detecting fallbacks, spikes, and cold periods.

+
+
+
+
+
+
+
+
+
+

Modality mix

+

How different request types distribute across providers and layers.

+
+
+
ModalityProviderLayerRequestsCostLatency
+
+
+
+
+

Operator actions

+

Update checks and local operator events, surfaced beside the traffic they affect.

+
+
+
EventActionStatusTargetEligibleEvents
+
+
+
+ +
+
+
+
+
+
+

Catalog alerts

+

Freshness drift, source issues, and model mismatches should stay visible before they become routing debt.

+
+
+
+
+
+
+
+

Refresh guidance

+

Where to review now, what to refresh soon, and which assumptions are still trustworthy.

+
+
+
+
+
+
+
+
+

Tracked provider assumptions

+

Configured model, recommended model, offer track, volatility, and evidence quality in one surface.

+
+
+
ProviderStatusConfiguredRecommendedOffer trackVolatilityReviewedWhy it matters
+
+
+ +
+
+
+ Claude Code +

Anthropic-compatible local endpoint for daily-use coding flows. Use `coding-auto` when you want cheapest-capable behavior, not a fixed premium provider.

+ export ANTHROPIC_BASE_URL=http://127.0.0.1:8090 +export ANTHROPIC_AUTH_TOKEN=dummy-local-token +claude --model coding-auto +
+
+ OpenAI-compatible tools +

Cursor-style tools, Continue, Cline, scripts, and agent shells should point at one local endpoint and use Gate modes instead of raw provider names.

+ export OPENAI_BASE_URL=http://127.0.0.1:8090/v1 +export OPENAI_API_KEY=dummy-local-token +# models: auto, coding-auto, coding-fast, coding-premium, eco, premium +
+
+ Agent-native clients +

Use client-aware entry points for `opencode`, `openclaw`, local automation, and future n8n flows. Keep the client identity visible so the router can treat them differently.

+ Recommended mental model + +light coding -> coding-auto +cheap background -> eco +high-trust coding -> coding-premium +manual hard task -> premium +
+
+
+
+
+
+

Why this surface is different

+

Not hosted-first. Not black-box stacks. Not provider-string roulette.

+
+
+
+
+ Local-first security +

Your keys, routes, and request traces stay on your machine unless you explicitly wire in remote services.

+
+
+ Agent-native routing +

Claude Code, opencode, openclaw, and OpenAI-compatible clients are traffic shapes with different economics, not identical callers.

+
+
+ Explainable by default +

Lane family, selection path, route type, quota coupling, and freshness are visible in the same operator surface.

+
+
+
+
+
+
+

Troubleshooting shortcuts

+

Short recovery paths for the things that break in the real world.

+
+
+
+

If a model is unexpectedly expensive, first check the client model id, then the selected lane, then the actual route type. Gate can only be “cheapest capable” if the client enters through the right intent surface.

+
+
+
+
+
+ + + + +''' diff --git a/faigate/main.py b/faigate/main.py index 83d3cf5..f2d20b7 100644 --- a/faigate/main.py +++ b/faigate/main.py @@ -38,6 +38,7 @@ ) from .canonical import CanonicalChatRequest, CanonicalChatResponse, CanonicalResponseMessage from .config import Config, load_config +from .dashboard_web import DASHBOARD_HTML from .hooks import ( AppliedHooks, HookExecutionError, @@ -118,6 +119,57 @@ def _client_error_response(message: str, *, error_type: str, status_code: int) - return JSONResponse({"error": message, "type": error_type}, status_code=status_code) +def _openai_sse_data(payload: dict[str, Any]) -> bytes: + """Return one OpenAI-style SSE data frame.""" + return f"data: {json.dumps(payload, separators=(',', ':'))}\n\n".encode() + + +async def _safe_openai_sse_stream( + stream: AsyncIterator[bytes], + *, + provider_name: str, + trace_id: str | None, +) -> AsyncIterator[bytes]: + """Keep streaming responses well-formed when the upstream fails mid-turn.""" + + try: + async for chunk in stream: + yield chunk + except ProviderError as exc: + logger.warning( + "Streaming response from %s failed after stream start: %s", + provider_name, + exc.detail[:200], + ) + yield _openai_sse_data( + { + "error": { + "message": str(exc.detail or "Streaming request failed"), + "type": classify_runtime_issue(status=exc.status, detail=exc.detail), + "provider": provider_name, + "trace_id": trace_id or "", + } + } + ) + yield b"data: [DONE]\n\n" + except Exception: + logger.exception( + "Streaming response from %s failed unexpectedly after stream start", + provider_name, + ) + yield _openai_sse_data( + { + "error": { + "message": "Streaming request failed unexpectedly", + "type": "provider_error", + "provider": provider_name, + "trace_id": trace_id or "", + } + } + ) + yield b"data: [DONE]\n\n" + + def _request_hook_error_response(exc: Exception) -> JSONResponse: """Return a sanitized request-hook failure response.""" logger.warning("Request hook processing failed: %s", exc) @@ -3098,7 +3150,11 @@ async def chat_completions(request: Request): if execution.stream: return StreamingResponse( - execution.result, + _safe_openai_sse_stream( + execution.result, + provider_name=execution.provider_name, + trace_id=execution.trace_id, + ), media_type="text/event-stream", headers={ "X-faigate-Provider": execution.provider_name, @@ -3194,7 +3250,11 @@ async def anthropic_messages(request: Request): if execution.stream: return StreamingResponse( openai_sse_to_anthropic( - execution.result, + _safe_openai_sse_stream( + execution.result, + provider_name=execution.provider_name, + trace_id=execution.trace_id, + ), requested_model=str( canonical_request.metadata.get("requested_model_original") or wire_request.model ), @@ -3780,3 +3840,6 @@ def _dashboard_csp() -> str: setInterval(load, 30000); """ + +# Keep the runtime wired to the extracted operator cockpit UI. +_DASHBOARD_HTML = DASHBOARD_HTML diff --git a/faigate/providers.py b/faigate/providers.py index 7adbaca..f6fdf4d 100644 --- a/faigate/providers.py +++ b/faigate/providers.py @@ -684,11 +684,18 @@ async def _stream_response( raise ProviderError(self.name, resp.status_code, error_text.decode()[:500]) first_chunk = True - async for line in resp.aiter_lines(): - if first_chunk: - self.health.record_success((time.time() - t0) * 1000) - first_chunk = False - yield (line + "\n").encode() + try: + async for line in resp.aiter_lines(): + if first_chunk: + self.health.record_success((time.time() - t0) * 1000) + first_chunk = False + yield (line + "\n").encode() + except httpx.HTTPError as e: + self.health.record_failure(f"Stream error: {e}") + raise ProviderError(self.name, 0, f"Stream error: {e}") from e + except Exception as e: + self.health.record_failure(f"Stream error: {e}") + raise ProviderError(self.name, 0, f"Stream error: {e}") from e # ── Google GenAI path ────────────────────────────────────── diff --git a/tests/test_anthropic_api.py b/tests/test_anthropic_api.py index 695300c..d88721e 100644 --- a/tests/test_anthropic_api.py +++ b/tests/test_anthropic_api.py @@ -307,9 +307,9 @@ def test_anthropic_messages_applies_builtin_claude_code_model_aliases( assert response.status_code == 200 metadata = provider.calls[0]["extra_body"]["metadata"] assert metadata["requested_model_original"] == "claude-sonnet-4-6[1m]" - assert metadata["requested_model_resolved"] == "anthropic-sonnet" + assert metadata["requested_model_resolved"] == "auto" assert response.headers["x-faigate-bridge-model-requested"] == "claude-sonnet-4-6-1m" - assert response.headers["x-faigate-bridge-model-resolved"] == "anthropic-sonnet" + assert response.headers["x-faigate-bridge-model-resolved"] == "auto" def test_anthropic_messages_can_redirect_claude_code_model_ids_to_gateway_routes( @@ -912,3 +912,57 @@ async def _iter() -> AsyncIterator[bytes]: assert '"type":"input_json_delta","partial_json":"{\\"id\\":' in body assert '"type":"input_json_delta","partial_json":"\\"design-note\\"}"' in body assert '"stop_reason":"tool_use"' in body + + +@pytest.mark.asyncio +async def test_openai_sse_to_anthropic_closes_open_text_block_before_error(): + async def _iter() -> AsyncIterator[bytes]: + yield ( + b'data: {"id":"chatcmpl-stream","object":"chat.completion.chunk",' + b'"model":"chat-model","choices":[{"index":0,"delta":{"role":"assistant",' + b'"content":"Hello"},"finish_reason":null}]}\n' + ) + yield b"\n" + yield ( + b'data: {"error":{"type":"api_error","message":"upstream broke"}}\n' + ) + yield b"\n" + + chunks: list[str] = [] + async for chunk in openai_sse_to_anthropic( + _iter(), + requested_model="claude-code", + resolved_model="premium", + ): + chunks.append(chunk.decode("utf-8")) + + body = "".join(chunks) + assert "event: content_block_start" in body + assert '"type":"text_delta","text":"Hello"' in body + assert 'event: content_block_stop' in body + assert 'event: error' in body + assert body.index('event: content_block_stop') < body.index('event: error') + assert '"message":"upstream broke"' in body + + +@pytest.mark.asyncio +async def test_safe_openai_sse_stream_emits_error_frame_and_done(): + async def _iter() -> AsyncIterator[bytes]: + yield b'data: {"choices":[{"delta":{"content":"Hello"}}]}\n\n' + raise ProviderError("kilo-sonnet", 429, "rate limited mid-stream") + + chunks: list[bytes] = [] + async for chunk in main_module._safe_openai_sse_stream( + _iter(), + provider_name="kilo-sonnet", + trace_id="trace-stream-1", + ): + chunks.append(chunk) + + body = b"".join(chunks).decode("utf-8") + assert 'data: {"choices":[{"delta":{"content":"Hello"}}]}' in body + assert '"message":"rate limited mid-stream"' in body + assert '"type":"rate-limited"' in body + assert '"provider":"kilo-sonnet"' in body + assert '"trace_id":"trace-stream-1"' in body + assert body.rstrip().endswith("data: [DONE]") From 6cb250c27dc5c427f654a46c7675c879b1c5dcff Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andre=CC=81=20Lange?= Date: Mon, 30 Mar 2026 21:13:05 +0200 Subject: [PATCH 08/13] feat(dashboard): add lightweight uplot charts --- faigate/dashboard_web.py | 214 ++++++++++++++++++++++++------- faigate/vendor/uPlot.min.css | 1 + faigate/vendor/uplot.iife.min.js | 2 + pyproject.toml | 3 + 4 files changed, 175 insertions(+), 45 deletions(-) create mode 100644 faigate/vendor/uPlot.min.css create mode 100644 faigate/vendor/uplot.iife.min.js diff --git a/faigate/dashboard_web.py b/faigate/dashboard_web.py index df40569..e3d1f4c 100644 --- a/faigate/dashboard_web.py +++ b/faigate/dashboard_web.py @@ -1,7 +1,20 @@ """Built-in operator dashboard HTML for fusionAIze Gate.""" +from pathlib import Path + # ruff: noqa: E501 +_VENDOR_DIR = Path(__file__).resolve().parent / "vendor" + + +def _read_vendor_asset(name: str) -> str: + """Return one vendored dashboard asset as text.""" + + try: + return (_VENDOR_DIR / name).read_text(encoding="utf-8") + except OSError: + return "" + DASHBOARD_HTML = ''' @@ -9,6 +22,7 @@ fusionAIze Gate + + + + + + + + + + \ No newline at end of file diff --git a/faigate/assets/brand/fusionaize-logo-white.svg b/faigate/assets/brand/fusionaize-logo-white.svg new file mode 100644 index 0000000..7d7ec75 --- /dev/null +++ b/faigate/assets/brand/fusionaize-logo-white.svg @@ -0,0 +1,34 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/faigate/assets/brand/fusionaize-logo.svg b/faigate/assets/brand/fusionaize-logo.svg new file mode 100644 index 0000000..eaeac73 --- /dev/null +++ b/faigate/assets/brand/fusionaize-logo.svg @@ -0,0 +1,38 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/faigate/dashboard_web.py b/faigate/dashboard_web.py index e3d1f4c..f9d46b0 100644 --- a/faigate/dashboard_web.py +++ b/faigate/dashboard_web.py @@ -1,10 +1,12 @@ """Built-in operator dashboard HTML for fusionAIze Gate.""" +import re from pathlib import Path # ruff: noqa: E501 _VENDOR_DIR = Path(__file__).resolve().parent / "vendor" +_ASSET_DIR = Path(__file__).resolve().parent / "assets" / "brand" def _read_vendor_asset(name: str) -> str: @@ -15,6 +17,26 @@ def _read_vendor_asset(name: str) -> str: except OSError: return "" + +def _read_brand_asset(name: str) -> str: + """Return one brand asset as text.""" + + try: + return (_ASSET_DIR / name).read_text(encoding="utf-8") + except OSError: + return "" + + +def _inline_svg(name: str) -> str: + """Return one SVG asset sanitized for inline HTML embedding.""" + + svg = _read_brand_asset(name) + if not svg: + return "" + svg = svg.replace('', "").strip() + svg = re.sub(r"", "", svg, flags=re.DOTALL).strip() + return svg + DASHBOARD_HTML = ''' @@ -112,44 +134,42 @@ def _read_vendor_asset(name: str) -> str: background:linear-gradient(90deg, transparent, rgba(84,171,238,.35), transparent); } .brand{ + display:grid; + gap:12px; + margin-bottom:24px; +} +.brand-lockup{ display:flex; align-items:flex-start; - gap:14px; - margin-bottom:26px; -} -.brand-mark{ - width:48px; - height:48px; - border-radius:16px; - background: - radial-gradient(circle at 30% 30%, rgba(84,171,238,.95), rgba(0,82,204,.55) 42%, rgba(13,23,48,.2) 70%), - linear-gradient(135deg, rgba(196,217,0,.18), rgba(0,82,204,.1)); - box-shadow:0 0 0 1px rgba(84,171,238,.14), 0 0 26px rgba(0,82,204,.35); - position:relative; + gap:12px; } -.brand-mark::before, -.brand-mark::after{ - content:""; - position:absolute; - inset:9px; - border-radius:12px; - border:1px solid rgba(225,233,243,.18); +.brand-wordmark{ + display:flex; + align-items:center; + min-height:28px; } -.brand-mark::after{ - inset:17px; - border-color:rgba(196,217,0,.3); +.brand-wordmark svg{ + display:block; + width:166px; + height:auto; } -.brand-copy h1{ - margin:2px 0 6px; - font:700 1.32rem/1 var(--display); - letter-spacing:.04em; +.brand-gate{ + display:inline-flex; + align-items:center; + min-height:28px; + padding:0 10px; + border-radius:999px; + border:1px solid rgba(84,171,238,.16); + background:rgba(255,255,255,.03); + color:#f6fbff; + font:700 .74rem/1 var(--mono); + letter-spacing:.16em; text-transform:uppercase; } -.brand-copy h1 .accent{color:var(--lime)} .brand-copy p{ margin:0; color:var(--muted); - font-size:.88rem; + font-size:.82rem; line-height:1.45; } .rail-meta{ @@ -226,36 +246,60 @@ def _read_vendor_asset(name: str) -> str: .hero{ position:relative; overflow:hidden; - padding:24px 24px 22px; + padding:18px 20px; border:1px solid rgba(84,171,238,.16); border-radius:var(--radius-xl); background: linear-gradient(145deg, rgba(18,34,68,.96), rgba(9,17,31,.96)), - radial-gradient(circle at top right, rgba(84,171,238,.2), transparent 30%); + radial-gradient(circle at top right, rgba(84,171,238,.14), transparent 28%); box-shadow:var(--shadow); } .hero::before{ content:""; position:absolute; inset:0; - background:linear-gradient(90deg, rgba(84,171,238,.08), transparent 26%, transparent 74%, rgba(196,217,0,.06)); + background:linear-gradient(90deg, rgba(84,171,238,.06), transparent 24%, transparent 76%, rgba(196,217,0,.04)); pointer-events:none; } .hero-top{ display:flex; - align-items:flex-start; + align-items:flex-end; justify-content:space-between; - gap:20px; - margin-bottom:18px; + gap:16px; + margin-bottom:14px; flex-wrap:wrap; } +.hero-head{ + display:grid; + gap:8px; +} +.hero-headline{ + display:flex; + align-items:center; + gap:12px; + flex-wrap:wrap; +} +.hero-brand{ + display:inline-flex; + align-items:center; +} +.hero-brand svg{ + display:block; + width:150px; + height:auto; + opacity:.95; +} +.hero-brand-sep{ + width:1px; + height:22px; + background:rgba(84,171,238,.18); +} .eyebrow{ display:inline-flex; align-items:center; gap:10px; - margin-bottom:12px; color:var(--muted); - font:600 .73rem/1 var(--mono); + font:600 .68rem/1 var(--mono); letter-spacing:.16em; text-transform:uppercase; } @@ -274,24 +318,25 @@ def _read_vendor_asset(name: str) -> str: } .hero h2{ margin:0; - max-width:13ch; - font:700 clamp(2rem, 4vw, 3.3rem)/.96 var(--display); + max-width:none; + font:700 clamp(1.4rem, 2vw, 2rem)/1 var(--display); text-transform:uppercase; - letter-spacing:.03em; + letter-spacing:.05em; } .hero h2 .accent{color:var(--lime)} .hero p{ - margin:14px 0 0; - max-width:72ch; + margin:0; + max-width:58ch; color:var(--muted); - line-height:1.6; - font-size:1rem; + line-height:1.45; + font-size:.92rem; } .hero-actions{ display:flex; flex-wrap:wrap; gap:10px; - align-items:center; + align-items:flex-end; + justify-content:flex-end; } .btn{ display:inline-flex; @@ -317,16 +362,19 @@ def _read_vendor_asset(name: str) -> str: } .hero-ribbon{ display:grid; - grid-template-columns:1.35fr .9fr; + grid-template-columns:1.1fr .9fr; gap:16px; } .ribbon-panel{ - padding:18px; + padding:16px; border-radius:20px; border:1px solid rgba(84,171,238,.14); background:rgba(7,16,29,.48); backdrop-filter:blur(10px); } +.ribbon-panel.attention{ + background:linear-gradient(160deg, rgba(17,29,56,.72), rgba(9,17,31,.72)); +} .ribbon-title{ margin-bottom:12px; color:var(--muted-soft); @@ -355,7 +403,7 @@ def _read_vendor_asset(name: str) -> str: letter-spacing:.12em; } .priority-path{ - font:700 1.1rem/1.2 var(--display); + font:700 1rem/1.18 var(--display); text-transform:uppercase; letter-spacing:.05em; } @@ -385,9 +433,16 @@ def _read_vendor_asset(name: str) -> str: background:linear-gradient(135deg, var(--brand-2), var(--lime)); flex:0 0 auto; } +.priority-actions{ + display:flex; + flex-wrap:wrap; + gap:10px; + margin-top:14px; +} .stats-rack{ display:grid; gap:10px; + margin-top:14px; } .rack-row{ display:grid; @@ -407,36 +462,54 @@ def _read_vendor_asset(name: str) -> str: } .toolbar{ display:grid; - gap:16px; - padding:18px 20px; + gap:12px; + padding:16px 18px; border:1px solid rgba(84,171,238,.12); border-radius:var(--radius-lg); - background:var(--panel); + background:linear-gradient(180deg, rgba(13,24,48,.92), rgba(8,16,31,.92)); box-shadow:var(--shadow); } .toolbar-head{ display:flex; - align-items:flex-start; + align-items:flex-end; justify-content:space-between; gap:16px; flex-wrap:wrap; } +.toolbar-head-right{ + display:flex; + align-items:center; + gap:10px; + flex-wrap:wrap; +} .toolbar-copy strong{ display:block; - margin-bottom:6px; - font:700 1rem/1.1 var(--display); + margin-bottom:4px; + font:700 .76rem/1 var(--mono); text-transform:uppercase; - letter-spacing:.08em; + letter-spacing:.16em; + color:var(--muted-soft); } .toolbar-copy p{ margin:0; + color:var(--text); + font:700 1.05rem/1.2 var(--display); + letter-spacing:.05em; + text-transform:uppercase; +} +.toolbar-copy p .accent{ + color:var(--lime); +} +.toolbar-copy .toolbar-subline{ + margin-top:6px; color:var(--muted); - line-height:1.5; - max-width:72ch; + font:500 .88rem/1.45 var(--body); + text-transform:none; + letter-spacing:0; } .filters{ display:grid; - grid-template-columns:repeat(auto-fit,minmax(170px,1fr)); + grid-template-columns:repeat(auto-fit,minmax(146px,1fr)); gap:12px; } .field{ @@ -452,7 +525,7 @@ def _read_vendor_asset(name: str) -> str: .field input, .field select{ width:100%; - min-height:42px; + min-height:40px; padding:0 12px; color:var(--text); background:rgba(9,17,31,.58); @@ -476,11 +549,46 @@ def _read_vendor_asset(name: str) -> str: color:var(--muted); font-size:.9rem; } +.toolbar-summary strong{ + color:var(--text); +} .toolbar-actions{ display:flex; gap:10px; flex-wrap:wrap; } +.toolbar-chips{ + display:flex; + gap:8px; + flex-wrap:wrap; +} +.chip.active-filter{ + background:rgba(0,82,204,.12); + border-color:rgba(84,171,238,.22); + color:#dff0ff; +} +.saved-view{ + min-width:170px; +} +.saved-view select{ + min-height:42px; + padding:0 12px; + border-radius:999px; + border:1px solid rgba(84,171,238,.16); + background:rgba(255,255,255,.04); + color:var(--text); +} +.attention-grid{ + display:grid; + grid-template-columns:1.05fr .95fr; + gap:18px; +} +.overview-actions{ + display:flex; + flex-wrap:wrap; + gap:10px; + margin-top:14px; +} .view-panel{ display:none; gap:18px; @@ -588,6 +696,11 @@ def _read_vendor_asset(name: str) -> str: .cards-3{ grid-template-columns:repeat(3,minmax(0,1fr)); } +.cards-2{ + display:grid; + grid-template-columns:repeat(2,minmax(0,1fr)); + gap:12px; +} .focus-card,.integration-card,.catalog-card{ padding:16px; border-radius:18px; @@ -809,10 +922,18 @@ def _read_vendor_asset(name: str) -> str: } .empty{ display:grid; - place-items:center; - padding:28px 18px; + gap:6px; + justify-items:start; + padding:22px 18px; color:var(--muted-soft); font-size:.9rem; + border-radius:16px; + background:rgba(9,17,31,.4); + border:1px dashed rgba(84,171,238,.16); +} +.empty strong{ + color:var(--text); + font-size:.95rem; } .code{ display:block; @@ -839,7 +960,7 @@ def _read_vendor_asset(name: str) -> str: height:auto; } .metrics-grid{grid-template-columns:repeat(2,minmax(0,1fr))} - .columns,.hero-ribbon,.cards-3{grid-template-columns:1fr} + .columns,.hero-ribbon,.cards-3,.cards-2,.attention-grid{grid-template-columns:1fr} } @media (max-width: 720px){ .shell{padding:14px} @@ -847,6 +968,8 @@ def _read_vendor_asset(name: str) -> str: .toolbar,.panel{padding:16px} .metrics-grid{grid-template-columns:1fr} .nav{grid-template-columns:1fr 1fr} + .hero-headline{align-items:flex-start} + .hero-actions{justify-content:flex-start} } @@ -854,10 +977,12 @@ def _read_vendor_asset(name: str) -> str:
-
TimeProviderLaneClientLatencyCostStatus
+
@@ -2611,6 +2620,27 @@ def _inline_svg(name: str) -> str: `).join(''); } +function recentLogItems(rows, limit = 8) { + if (!rows.length) { + return '
No requests for the current scopeClear filters or switch to All traffic.
'; + } + return rows.slice(0, limit).map(row => ` +
+
+
+ ${esc(row.provider || '—')} + ${row.success ? pill('success', 'ready') : pill('failure', 'fail')} +
+
${esc(row.canonical_model || row.rule_name || '—')} · ${esc(row.client_tag || row.client_profile || 'generic')}
+
+
+
${esc(ago(row.timestamp))}
+
${fmtMs(row.latency_ms || 0)} · ${fmtUsd(row.cost_usd || 0)}
+
+
+ `).join(''); +} + function render(bundle) { latestBundle = bundle; const totals = bundle.stats.totals || {}; @@ -2736,7 +2766,7 @@ def _inline_svg(name: str) -> str: tone: 'blue', empty: 'No lane family data in this scope', }); - $('#overview-recent tbody').innerHTML = recentRows(recent); + $('#overview-recent').innerHTML = recentLogItems(recent); $('#providers-kpis').innerHTML = [ {kicker:'Healthy vs total', value:(bundle.health.summary ? bundle.health.summary.providers_healthy : providers.filter(row => row.healthy).length) + '/' + providers.length, detail:String(unhealthyProviders.length) + ' unhealthy', tone:unhealthyProviders.length ? 'orange' : 'green'}, From 83db7a8e77cf5f567d1ae353a8aa3f3a094fb517 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andre=CC=81=20Lange?= Date: Tue, 31 Mar 2026 03:49:32 +0200 Subject: [PATCH 12/13] style: format rebased python changes --- faigate/bridges/anthropic/adapter.py | 11 +++-------- faigate/dashboard_web.py | 5 +++-- tests/test_anthropic_api.py | 10 ++++------ 3 files changed, 10 insertions(+), 16 deletions(-) diff --git a/faigate/bridges/anthropic/adapter.py b/faigate/bridges/anthropic/adapter.py index 86e1606..1129959 100644 --- a/faigate/bridges/anthropic/adapter.py +++ b/faigate/bridges/anthropic/adapter.py @@ -52,9 +52,7 @@ def anthropic_request_to_canonical( ) -> CanonicalChatRequest: """Map an Anthropic messages request to the internal gateway model.""" - normalized_headers = { - str(key): str(value) for key, value in (headers or {}).items() - } + normalized_headers = {str(key): str(value) for key, value in (headers or {}).items()} source = ( normalized_headers.get("x-faigate-client") or normalized_headers.get("anthropic-client") @@ -229,8 +227,7 @@ def _message_to_canonical(message: AnthropicMessage) -> list[CanonicalMessage]: return _user_message_to_canonical(message) if any(block.type != "text" for block in message.content): raise AnthropicBridgeError( - "Anthropic bridge v1 does not support " - f"'{message.role}' messages with non-text blocks" + f"Anthropic bridge v1 does not support '{message.role}' messages with non-text blocks" ) return [ CanonicalMessage( @@ -745,9 +742,7 @@ async def openai_sse_to_anthropic( { "type": "message_delta", "delta": { - "stop_reason": ( - stop_reason or ("tool_use" if tool_states else "end_turn") - ), + "stop_reason": (stop_reason or ("tool_use" if tool_states else "end_turn")), "stop_sequence": None, }, "usage": dict(usage), diff --git a/faigate/dashboard_web.py b/faigate/dashboard_web.py index 7a96087..5c19798 100644 --- a/faigate/dashboard_web.py +++ b/faigate/dashboard_web.py @@ -54,7 +54,8 @@ def _inline_svg(name: str) -> str: svg = re.sub(r"", "", svg, flags=re.DOTALL).strip() return svg -DASHBOARD_HTML = ''' + +DASHBOARD_HTML = """ @@ -3059,7 +3060,7 @@ def _inline_svg(name: str) -> str: -''' +""" DASHBOARD_HTML = ( DASHBOARD_HTML.replace("/*__UPLOT_CSS__*/", _read_vendor_asset("uPlot.min.css")) diff --git a/tests/test_anthropic_api.py b/tests/test_anthropic_api.py index d88721e..0b74857 100644 --- a/tests/test_anthropic_api.py +++ b/tests/test_anthropic_api.py @@ -923,9 +923,7 @@ async def _iter() -> AsyncIterator[bytes]: b'"content":"Hello"},"finish_reason":null}]}\n' ) yield b"\n" - yield ( - b'data: {"error":{"type":"api_error","message":"upstream broke"}}\n' - ) + yield (b'data: {"error":{"type":"api_error","message":"upstream broke"}}\n') yield b"\n" chunks: list[str] = [] @@ -939,9 +937,9 @@ async def _iter() -> AsyncIterator[bytes]: body = "".join(chunks) assert "event: content_block_start" in body assert '"type":"text_delta","text":"Hello"' in body - assert 'event: content_block_stop' in body - assert 'event: error' in body - assert body.index('event: content_block_stop') < body.index('event: error') + assert "event: content_block_stop" in body + assert "event: error" in body + assert body.index("event: content_block_stop") < body.index("event: error") assert '"message":"upstream broke"' in body From f54f84e0e90e1d5a55b1a5dc7a612f77bce3f59a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andre=CC=81=20Lange?= Date: Tue, 31 Mar 2026 03:54:30 +0200 Subject: [PATCH 13/13] style: format anthropic api models --- faigate/api/anthropic/models.py | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/faigate/api/anthropic/models.py b/faigate/api/anthropic/models.py index ef395a4..da5dc1c 100644 --- a/faigate/api/anthropic/models.py +++ b/faigate/api/anthropic/models.py @@ -164,14 +164,10 @@ def _parse_system_prompt(raw: Any) -> str | list[str] | None: normalized.append(item) continue if not isinstance(item, Mapping): - raise AnthropicBridgeError( - "'system' blocks must be strings or text block mappings" - ) + raise AnthropicBridgeError("'system' blocks must be strings or text block mappings") block_type = str(item.get("type", "") or "").strip() if block_type != "text": - raise AnthropicBridgeError( - "Anthropic bridge v1 supports only text blocks in 'system'" - ) + raise AnthropicBridgeError("Anthropic bridge v1 supports only text blocks in 'system'") normalized.append(str(item.get("text", "") or "")) return normalized