diff --git a/README.md b/README.md index 3d19471..c2abcae 100644 --- a/README.md +++ b/README.md @@ -314,7 +314,7 @@ anthropic_bridge: Known v1 limits: - non-streaming only -- text content blocks only +- text blocks plus basic `tool_use` / `tool_result` - `count_tokens` is a local estimate, not provider-exact accounting - the optional `claude-code-router` hook only adds routing hints; it is not the protocol bridge @@ -324,6 +324,12 @@ Local smoke test: ./docs/examples/anthropic-bridge-smoke.sh ``` +Client-near validation before release: + +```bash +./docs/examples/anthropic-bridge-validation.sh +``` + For a fuller operator view, see [docs/anthropic-bridge.md](./docs/anthropic-bridge.md) and [docs/API.md](./docs/API.md). ## API Surface diff --git a/RELEASES.md b/RELEASES.md index a4343b8..59dc707 100644 --- a/RELEASES.md +++ b/RELEASES.md @@ -18,6 +18,7 @@ This repo does not require a heavy release process. Use lightweight tags plus Gi 10. For hardening-heavy releases, keep the API functional tests green alongside unit and config coverage. 11. Publish the GitHub Release so [`notify-tap`](./.github/workflows/notify-tap.yml) can dispatch the Homebrew tap update automatically. 12. If the tap dispatch fails or the formula needs manual follow-up, bump `Formula/faigate.rb` in the separate [`fusionAIze/homebrew-tap`](https://github.com/fusionAIze/homebrew-tap) repo to the new release tag and update its `sha256`. +13. For Anthropic bridge releases, also run the client-near validation flow in [docs/anthropic-bridge-release-readiness.md](./docs/anthropic-bridge-release-readiness.md) before tagging. ## Example diff --git a/docs/API.md b/docs/API.md index f70d4d8..3a0cc97 100644 --- a/docs/API.md +++ b/docs/API.md @@ -111,7 +111,7 @@ Response headers make the approximation explicit: Known v1 bridge limits: - non-streaming only -- text content blocks only +- text blocks plus basic `tool_use` / `tool_result` - image or binary content blocks are rejected - `count_tokens` is an estimate, not provider-exact accounting diff --git a/docs/CONFIGURATION.md b/docs/CONFIGURATION.md index 59acb62..f5e73f6 100644 --- a/docs/CONFIGURATION.md +++ b/docs/CONFIGURATION.md @@ -358,6 +358,9 @@ Recommended operational pattern: - keep direct Anthropic routes, Anthropic-capable aggregators, and local workers all probeable - be careful with aggregator routes that may still use a BYOK Anthropic key from the same quota domain - prefer health checks and fallback ordering over assuming every Anthropic-shaped route is independent +- when two routes can burn the same upstream quota, give them the same `transport.quota_group` +- set `transport.billing_mode: byok` on routes where wallet or aggregator billing may still collapse to your own upstream key +- only set `transport.quota_isolated: true` when you are confident that route is operationally independent For the end-to-end flow and local smoke example, see [Anthropic Bridge](./anthropic-bridge.md). diff --git a/docs/anthropic-bridge-release-readiness.md b/docs/anthropic-bridge-release-readiness.md new file mode 100644 index 0000000..63c327b --- /dev/null +++ b/docs/anthropic-bridge-release-readiness.md @@ -0,0 +1,146 @@ +# Anthropic Bridge Release Readiness + +This checklist is the release gate for shipping the Anthropic bridge as a production-facing feature inside `fusionAIze Gate`. + +The intended release position is: + +- optional and explicitly enabled +- safe for production early adopters +- not full Anthropic parity yet + +The bridge should only ship when the core protocol path, fallback behavior, and operator surfaces all agree. + +## Intended Release Scope + +Acceptable to release: + +- `POST /v1/messages` +- `POST /v1/messages/count_tokens` +- non-streaming text flows +- basic `tool_use` / `tool_result` +- Anthropic header/version tolerance +- shared-quota-aware fallback behavior + +Still explicitly out of scope: + +- streaming SSE parity +- image or binary content blocks +- provider-exact token counting +- claiming full Claude Desktop or Claude Code parity across all versions + +## Preflight Configuration + +Before release, verify that the target config has: + +- `api_surfaces.anthropic_messages: true` +- `anthropic_bridge.enabled: true` +- at least one stable Claude-facing alias such as `claude-code -> auto` +- at least one non-Anthropic fallback route or local worker for continuity +- shared Anthropic quota domains marked with `transport.quota_group` +- aggregator or wallet-style routes marked with `transport.billing_mode` when BYOK collapse is possible + +Recommended example: + +```yaml +anthropic_bridge: + enabled: true + model_aliases: + claude-code: auto + claude-code-fast: eco + claude-code-premium: premium + +providers: + anthropic-sonnet: + transport: + quota_group: anthropic-main + kilo-sonnet: + transport: + billing_mode: byok + quota_group: anthropic-main + local-worker: + transport: + quota_isolated: true +``` + +## Validation Sequence + +Run these in order on a product-like config: + +1. `./docs/examples/anthropic-bridge-smoke.sh` +2. `./docs/examples/anthropic-bridge-validation.sh` +3. `./scripts/faigate-doctor` +4. `./scripts/faigate-provider-probe --json` + +If you validate against a non-default config or env file, export those first so the script, doctor, and probe all inspect the same runtime: + +```bash +export FAIGATE_BASE_URL=http://127.0.0.1:18090 +export FAIGATE_CONFIG_FILE=/tmp/faigate-bridge-live.yaml +export FAIGATE_ENV_FILE=/opt/homebrew/etc/faigate/faigate.env +./docs/examples/anthropic-bridge-validation.sh +``` + +The second script is the more complete client-near validation path. It checks: + +- Anthropic messages with version/beta headers +- basic tool-use / tool-result bridge handling +- `count_tokens` +- doctor and provider-probe output after the same config is live + +## Required Test Baseline + +Before release, keep these green: + +```bash +env PYTHONPATH=. ./.venv-check-313/bin/pytest -q \ + tests/test_config.py \ + tests/test_providers.py \ + tests/test_anthropic_api.py \ + tests/test_anthropic_bridge.py \ + tests/test_request_hooks.py +``` + +and: + +```bash +rtk ruff check faigate/config.py faigate/providers.py faigate/main.py \ + faigate/canonical.py faigate/bridges/anthropic/adapter.py \ + tests/test_config.py tests/test_providers.py tests/test_anthropic_api.py \ + tests/test_anthropic_bridge.py tests/test_request_hooks.py +``` + +## Release Criteria + +Ship only if all of these are true: + +- Anthropic bridge requests succeed through the normal routing core +- tool-use / tool-result flows stay on the same execution path +- Anthropic error mapping stays coherent under direct provider failures +- version/beta headers survive roundtrip handling +- shared-quota routes are skipped when one route in the same quota group fails with quota, rate-limit, or auth pressure +- doctor and provider-probe still explain route readiness clearly +- docs match the real v1 limits + +## No-Go Signals + +Do not release if any of these are still observed: + +- a quota-exhausted Anthropic route still retries an aggregator route that shares the same upstream quota domain +- the bridge silently drops tool-use or tool-result semantics +- the bridge claims streaming support without a tested SSE implementation +- `count_tokens` is described as exact anywhere in the docs +- doctor or provider-probe make Anthropic-shaped routes look independent when they are actually BYOK-coupled + +## Release Call + +If the checks above pass, the bridge is reasonable to release as: + +- production-usable +- opt-in +- early-adopter safe + +It should not yet be marketed as: + +- full Anthropic API parity +- full Claude Code parity +- full Claude Desktop parity diff --git a/docs/anthropic-bridge.md b/docs/anthropic-bridge.md index b7fa2c5..21f746c 100644 --- a/docs/anthropic-bridge.md +++ b/docs/anthropic-bridge.md @@ -91,6 +91,8 @@ Recommended pattern: - keep direct Anthropic routes probeable and clearly named - keep Anthropic-capable aggregators as explicit mirrors or secondary routes - do not assume a premium Anthropic mirror is independent if it uses the same exhausted account +- mark routes that can burn the same upstream quota with a shared `transport.quota_group` +- use `transport.billing_mode: byok` on aggregator routes when the wallet path may still collapse to your own upstream account - use `faigate-doctor`, `faigate-provider-probe`, `/health`, and `/api/providers` to validate which routes are actually request-ready ## Claude Code / Claude Desktop @@ -131,10 +133,24 @@ This covers: - `POST /v1/messages` - `POST /v1/messages/count_tokens` +For a client-near validation pass before release, run: + +```bash +./docs/examples/anthropic-bridge-validation.sh +``` + +That broader check adds: + +- bridge headers and Anthropic version/beta tolerance +- basic `tool_use` / `tool_result` flow shape +- doctor and provider-probe output after the same config is live + +For the explicit release gate, see [Anthropic Bridge Release Readiness](./anthropic-bridge-release-readiness.md). + ## Known v1 Limits - non-streaming only -- text content blocks only +- text blocks plus basic `tool_use` / `tool_result` - `count_tokens` returns a deterministic local estimate - image or binary content blocks are not bridged yet - the optional `claude-code-router` hook only adds routing hints diff --git a/docs/examples/anthropic-bridge-validation.sh b/docs/examples/anthropic-bridge-validation.sh new file mode 100644 index 0000000..88b4536 --- /dev/null +++ b/docs/examples/anthropic-bridge-validation.sh @@ -0,0 +1,93 @@ +set -euo pipefail + +BASE_URL="${FAIGATE_BASE_URL:-http://127.0.0.1:8090}" +MODEL_ALIAS="${FAIGATE_ANTHROPIC_MODEL_ALIAS:-claude-code}" +CONFIG_FILE="${FAIGATE_CONFIG_FILE:-}" +ENV_FILE="${FAIGATE_ENV_FILE:-}" + +echo "==> Validation context" +echo "BASE_URL=${BASE_URL}" +echo "MODEL_ALIAS=${MODEL_ALIAS}" +if [ -n "${CONFIG_FILE}" ]; then + echo "FAIGATE_CONFIG_FILE=${CONFIG_FILE}" +fi +if [ -n "${ENV_FILE}" ]; then + echo "FAIGATE_ENV_FILE=${ENV_FILE}" +fi +printf '\n' + +echo "==> Health" +rtk curl -fsS "${BASE_URL}/health" +printf '\n\n' + +echo "==> Provider inventory" +rtk curl -fsS "${BASE_URL}/api/providers" +printf '\n\n' + +echo "==> Anthropic messages with bridge headers" +rtk curl -i -fsS "${BASE_URL}/v1/messages" \ + -H 'Content-Type: application/json' \ + -H 'anthropic-client: claude-code' \ + -H 'anthropic-version: 2023-06-01' \ + -H 'anthropic-beta: tools-2024-04-04' \ + -d "{ + \"model\": \"${MODEL_ALIAS}\", + \"system\": \"Respond as a concise operator helper.\", + \"messages\": [ + {\"role\": \"user\", \"content\": \"Summarize why one local gateway endpoint helps with Anthropic quota limits.\"} + ] + }" +printf '\n\n' + +echo "==> Anthropic tool roundtrip shape" +rtk curl -i -fsS "${BASE_URL}/v1/messages" \ + -H 'Content-Type: application/json' \ + -H 'anthropic-client: claude-code' \ + -d "{ + \"model\": \"${MODEL_ALIAS}\", + \"messages\": [ + {\"role\": \"user\", \"content\": \"Load the deployment guide.\"}, + { + \"role\": \"assistant\", + \"content\": [ + { + \"type\": \"tool_use\", + \"id\": \"toolu_demo\", + \"name\": \"lookup_doc\", + \"input\": {\"id\": \"deploy-guide\"} + } + ] + }, + { + \"role\": \"user\", + \"content\": [ + { + \"type\": \"tool_result\", + \"tool_use_id\": \"toolu_demo\", + \"content\": \"Deployment guide loaded successfully.\" + } + ] + } + ] + }" +printf '\n\n' + +echo "==> Anthropic count_tokens" +rtk curl -i -fsS "${BASE_URL}/v1/messages/count_tokens" \ + -H 'Content-Type: application/json' \ + -H 'anthropic-version: 2023-06-01' \ + -d "{ + \"model\": \"${MODEL_ALIAS}\", + \"messages\": [ + {\"role\": \"user\", \"content\": \"Count the bridge tokens for this request.\"} + ] + }" +printf '\n\n' + +echo "==> Doctor" +./scripts/faigate-doctor +printf '\n\n' + +echo "==> Provider probe" +./scripts/faigate-provider-probe --json +printf '\n' diff --git a/docs/examples/provider-kilocode.yaml b/docs/examples/provider-kilocode.yaml index 2ddb738..1f006db 100644 --- a/docs/examples/provider-kilocode.yaml +++ b/docs/examples/provider-kilocode.yaml @@ -13,6 +13,9 @@ providers: model: "anthropic/claude-sonnet-4.6" max_tokens: 16000 tier: mid + transport: + billing_mode: byok + quota_group: anthropic-main kilo-opus: backend: openai-compat base_url: "${KILOCODE_BASE_URL:-https://api.kilo.ai/api/gateway}" @@ -20,6 +23,9 @@ providers: model: "anthropic/claude-opus-4.6" max_tokens: 32000 tier: mid + transport: + billing_mode: byok + quota_group: anthropic-main fallback_chain: - kilo-sonnet diff --git a/faigate/bridges/anthropic/adapter.py b/faigate/bridges/anthropic/adapter.py index be3b5af..03ccb4b 100644 --- a/faigate/bridges/anthropic/adapter.py +++ b/faigate/bridges/anthropic/adapter.py @@ -57,7 +57,7 @@ def anthropic_request_to_canonical( surface="anthropic-messages", requested_model=request.model, system=request.system, - messages=[_message_to_canonical(message) for message in request.messages], + messages=_messages_to_canonical(request.messages), tools=[ CanonicalTool( name=tool.name, @@ -108,7 +108,10 @@ def canonical_response_to_anthropic( id=response.response_id or f"msg_{uuid4().hex}", model=response.model or requested_model, content=_canonical_content_to_anthropic_blocks(response.message), - stop_reason=response.stop_reason or response.message.stop_reason, + stop_reason=map_stop_reason_to_anthropic( + response.stop_reason or response.message.stop_reason, + has_tool_calls=bool(response.message.tool_calls), + ), usage=dict(response.usage), metadata={ **dict(response.metadata), @@ -193,16 +196,129 @@ def approximate_anthropic_input_tokens(request: CanonicalChatRequest) -> tuple[i return max(total, 1), "estimated-char-v1" -def _message_to_canonical(message: AnthropicMessage) -> CanonicalMessage: +def _messages_to_canonical(messages: list[AnthropicMessage]) -> list[CanonicalMessage]: + """Flatten Anthropic turns into the OpenAI-style sequence the core expects.""" + + canonical_messages: list[CanonicalMessage] = [] + for message in messages: + canonical_messages.extend(_message_to_canonical(message)) + return canonical_messages + + +def _message_to_canonical(message: AnthropicMessage) -> list[CanonicalMessage]: + if message.role == "assistant": + return [_assistant_message_to_canonical(message)] + if message.role == "user": + return _user_message_to_canonical(message) if any(block.type != "text" for block in message.content): raise AnthropicBridgeError( - "Anthropic bridge v1 currently supports only text content blocks in messages" + f"Anthropic bridge v1 does not support '{message.role}' messages with non-text blocks" ) - if len(message.content) == 1 and message.content[0].type == "text": - content: Any = message.content[0].text or "" - else: - content = [_anthropic_block_to_payload(block) for block in message.content] - return CanonicalMessage(role=message.role, content=content) + return [ + CanonicalMessage( + role=message.role, + content=_text_blocks_to_string(message.content), + ) + ] + + +def _assistant_message_to_canonical(message: AnthropicMessage) -> CanonicalMessage: + text_blocks: list[AnthropicContentBlock] = [] + tool_calls: list[dict[str, Any]] = [] + for block in message.content: + if block.type == "text": + text_blocks.append(block) + continue + if block.type != "tool_use": + raise AnthropicBridgeError( + "Anthropic bridge v1 supports only text and tool_use blocks in assistant messages" + ) + tool_calls.append(_anthropic_tool_use_to_openai_call(block)) + return CanonicalMessage( + role="assistant", + content=_text_blocks_to_string(text_blocks), + tool_calls=tool_calls, + ) + + +def _user_message_to_canonical(message: AnthropicMessage) -> list[CanonicalMessage]: + canonical_messages: list[CanonicalMessage] = [] + pending_text: list[AnthropicContentBlock] = [] + for block in message.content: + if block.type == "text": + pending_text.append(block) + continue + if block.type != "tool_result": + raise AnthropicBridgeError( + "Anthropic bridge v1 supports only text and tool_result blocks in user messages" + ) + if pending_text: + canonical_messages.append( + CanonicalMessage(role="user", content=_text_blocks_to_string(pending_text)) + ) + pending_text = [] + canonical_messages.append(_anthropic_tool_result_to_canonical_message(block)) + if pending_text or not canonical_messages: + canonical_messages.append( + CanonicalMessage(role="user", content=_text_blocks_to_string(pending_text)) + ) + return canonical_messages + + +def _text_blocks_to_string(blocks: list[AnthropicContentBlock]) -> str: + parts = [str(block.text or "") for block in blocks if block.type == "text"] + return "\n\n".join(part for part in parts if part) + + +def _anthropic_tool_use_to_openai_call(block: AnthropicContentBlock) -> dict[str, Any]: + if not block.name: + raise AnthropicBridgeError("Anthropic tool_use blocks require a name") + call_id = block.tool_use_id or f"toolu_{uuid4().hex[:24]}" + return { + "id": call_id, + "type": "function", + "function": { + "name": block.name, + "arguments": json.dumps(block.input or {}, separators=(",", ":"), sort_keys=True), + }, + } + + +def _anthropic_tool_result_to_canonical_message(block: AnthropicContentBlock) -> CanonicalMessage: + tool_use_id = block.tool_use_id + if not tool_use_id: + raise AnthropicBridgeError("Anthropic tool_result blocks require a tool_use_id") + metadata = {} + if "is_error" in block.metadata: + metadata["tool_result_is_error"] = bool(block.metadata.get("is_error")) + return CanonicalMessage( + role="tool", + content=_anthropic_tool_result_to_string(block), + tool_call_id=tool_use_id, + metadata=metadata, + ) + + +def _anthropic_tool_result_to_string(block: AnthropicContentBlock) -> str: + raw_content = block.metadata.get("content") + if raw_content is None and block.text is not None: + return block.text + if isinstance(raw_content, str): + return raw_content + if isinstance(raw_content, list): + text_parts: list[str] = [] + for item in raw_content: + if isinstance(item, str): + text_parts.append(item) + continue + if isinstance(item, dict) and str(item.get("type") or "") == "text": + text_parts.append(str(item.get("text") or "")) + continue + return json.dumps(raw_content, sort_keys=True) + return "\n\n".join(part for part in text_parts if part) + if raw_content is None: + return "" + return json.dumps(raw_content, sort_keys=True) def _anthropic_block_to_payload(block: AnthropicContentBlock) -> dict[str, Any]: @@ -250,12 +366,17 @@ def _canonical_content_to_anthropic_blocks( content = message.content blocks: list[AnthropicContentBlock] if isinstance(content, str): - blocks = [AnthropicContentBlock(type="text", text=content)] + blocks = ( + [] + if (not content and message.tool_calls) + else [AnthropicContentBlock(type="text", text=content)] + ) elif isinstance(content, list): blocks = [] for item in content: if isinstance(item, str): - blocks.append(AnthropicContentBlock(type="text", text=item)) + if item: + blocks.append(AnthropicContentBlock(type="text", text=item)) continue if not isinstance(item, dict): blocks.append(AnthropicContentBlock(type="text", text=str(item))) @@ -270,8 +391,10 @@ def _canonical_content_to_anthropic_blocks( metadata=dict(item.get("metadata", {}) or {}), ) ) + elif content: + blocks = [AnthropicContentBlock(type="text", text=str(content))] else: - blocks = [AnthropicContentBlock(type="text", text=str(content or ""))] + blocks = [] for tool_call in message.tool_calls: if not isinstance(tool_call, dict): @@ -296,3 +419,20 @@ def _canonical_content_to_anthropic_blocks( ) ) return blocks + + +def map_stop_reason_to_anthropic( + stop_reason: str | None, *, has_tool_calls: bool = False +) -> str | None: + """Translate OpenAI-style finish reasons into Anthropic stop reasons.""" + + normalized = str(stop_reason or "").strip().lower() + if not normalized: + return "tool_use" if has_tool_calls else None + if normalized in {"stop", "end_turn"}: + return "end_turn" + if normalized in {"tool_calls", "tool_use"}: + return "tool_use" + if normalized in {"length", "max_tokens"}: + return "max_tokens" + return normalized diff --git a/faigate/canonical.py b/faigate/canonical.py index 1ad558b..fa9fe0b 100644 --- a/faigate/canonical.py +++ b/faigate/canonical.py @@ -34,6 +34,7 @@ class CanonicalMessage: content: Any name: str | None = None tool_call_id: str | None = None + tool_calls: list[dict[str, Any]] = field(default_factory=list) metadata: dict[str, Any] = field(default_factory=dict) @@ -76,6 +77,8 @@ def to_openai_body(self) -> dict[str, Any]: payload["name"] = message.name if message.tool_call_id: payload["tool_call_id"] = message.tool_call_id + if message.tool_calls: + payload["tool_calls"] = list(message.tool_calls) if message.metadata: payload["metadata"] = dict(message.metadata) messages.append(payload) diff --git a/faigate/config.py b/faigate/config.py index c32184e..424673f 100644 --- a/faigate/config.py +++ b/faigate/config.py @@ -91,6 +91,7 @@ "compatibility", "probe_confidence", "auth_mode", + "billing_mode", "probe_strategy", "probe_payload_kind", "probe_payload_text", @@ -101,6 +102,8 @@ "image_edit_path", "requires_api_key", "supports_models_probe", + "quota_group", + "quota_isolated", "notes", } _SUPPORTED_PROVIDER_TRANSPORT_AUTH_MODES = {"bearer", "query", "none"} @@ -649,6 +652,9 @@ def _normalize_provider_transport(name: str, cfg: dict[str, Any]) -> dict[str, A ) normalized["auth_mode"] = auth_mode + billing_mode = str(transport.get("billing_mode", "") or "").strip().lower() + normalized["billing_mode"] = billing_mode + probe_strategy = str(transport.get("probe_strategy", "models") or "models").strip().lower() probe_strategy = probe_strategy.replace("-", "_") if probe_strategy not in _SUPPORTED_PROVIDER_TRANSPORT_PROBE_STRATEGIES: @@ -703,6 +709,14 @@ def _normalize_provider_transport(name: str, cfg: dict[str, Any]) -> dict[str, A raise ConfigError(f"Provider '{name}' transport.{field_name} must be a boolean") normalized[field_name] = value + quota_group = str(transport.get("quota_group", "") or "").strip() + normalized["quota_group"] = quota_group + + quota_isolated = transport.get("quota_isolated", False) + if not isinstance(quota_isolated, bool): + raise ConfigError(f"Provider '{name}' transport.quota_isolated must be a boolean") + normalized["quota_isolated"] = quota_isolated + notes = transport.get("notes", []) if notes in (None, ""): notes = [] diff --git a/faigate/main.py b/faigate/main.py index 0e16a1c..dcb7839 100644 --- a/faigate/main.py +++ b/faigate/main.py @@ -61,7 +61,7 @@ ) from .provider_catalog_store import ProviderCatalogStore from .provider_sources import list_provider_sources -from .providers import ProviderBackend, ProviderError +from .providers import ProviderBackend, ProviderError, classify_runtime_issue from .router import Router, RoutingDecision from .updates import ( UpdateChecker, @@ -142,6 +142,79 @@ def _anthropic_error_response(message: str, *, error_type: str, status_code: int ) +def _anthropic_error_type_for_status(status_code: int, error_type: str) -> str: + """Map generic gateway/provider failures onto Anthropic-style error types.""" + + known_types = { + "invalid_request_error", + "authentication_error", + "permission_error", + "not_found_error", + "rate_limit_error", + "request_too_large", + "api_error", + "overloaded_error", + "not_supported_error", + } + if error_type in known_types: + return error_type + if status_code == 400: + return "invalid_request_error" + if status_code == 401: + return "authentication_error" + if status_code == 403: + return "permission_error" + if status_code == 404: + return "not_found_error" + if status_code == 413: + return "request_too_large" + if status_code == 429: + return "rate_limit_error" + if status_code in {502, 503, 504}: + return "overloaded_error" + return "api_error" + + +def _anthropic_bridge_response_headers( + *, + source: str, + requested_model: str, + resolved_model: str | None = None, + anthropic_version: str | None = None, + anthropic_beta: str | None = None, +) -> dict[str, str]: + """Return bounded response headers that make bridge behavior visible.""" + + headers = { + "X-faigate-Bridge-Surface": "anthropic-messages", + "X-faigate-Bridge-Source": _sanitize_token(source, default="claude-code", max_chars=64), + "X-faigate-Bridge-Model-Requested": _sanitize_token( + requested_model, + default="unknown", + max_chars=96, + ), + } + if resolved_model and resolved_model != requested_model: + headers["X-faigate-Bridge-Model-Resolved"] = _sanitize_token( + resolved_model, + default="unknown", + max_chars=96, + ) + if anthropic_version: + headers["X-faigate-Bridge-Anthropic-Version"] = _sanitize_token( + anthropic_version, + default="unknown", + max_chars=64, + ) + if anthropic_beta: + headers["X-faigate-Bridge-Anthropic-Beta"] = _sanitize_token( + anthropic_beta, + default="unknown", + max_chars=96, + ) + return headers + + def _invalid_request_response(message: str, *, exc: Exception | None = None) -> JSONResponse: """Return a sanitized invalid-request response.""" if exc is not None: @@ -190,13 +263,28 @@ def _provider_error_category(status: int, detail: str) -> str: return "provider_error" -def _serialize_provider_attempt_error(provider_name: str, exc: ProviderError) -> dict[str, Any]: +def _serialize_provider_attempt_error( + provider_name: str, + exc: ProviderError, + *, + category_override: str | None = None, + extra: dict[str, Any] | None = None, +) -> dict[str, Any]: """Return a sanitized provider-attempt failure object for client responses.""" - return { + payload = { "provider": provider_name, "status": exc.status, - "category": _provider_error_category(exc.status, exc.detail), + "category": category_override or _provider_error_category(exc.status, exc.detail), } + if extra: + payload.update(extra) + return payload + + +def _provider_quota_group(provider: Any) -> str: + """Return the configured shared quota group for one route, if any.""" + + return str(getattr(provider, "transport", {}).get("quota_group", "") or "").strip() async def _refresh_local_worker_probes(force: bool = False) -> None: @@ -311,6 +399,10 @@ def _collect_anthropic_bridge_headers(request: Request) -> dict[str, str]: ) headers.setdefault("x-faigate-client", bridge_source) headers.setdefault("x-faigate-surface", "anthropic-messages") + for header_name in ("anthropic-version", "anthropic-beta", "user-agent"): + value = request.headers.get(header_name) + if value: + headers[header_name] = _sanitize_header_value(value, max_chars=max_chars) return headers @@ -1587,6 +1679,7 @@ async def _execute_chat_completion_body( ) errors: list[dict[str, Any]] = [] + blocked_quota_groups: dict[str, dict[str, str]] = {} for provider_name in attempt_order: provider = _providers.get(provider_name) @@ -1594,6 +1687,28 @@ async def _execute_chat_completion_body( continue if not provider.health.healthy and provider_name != attempt_order[0]: continue + quota_group = _provider_quota_group(provider) + quota_isolated = bool(getattr(provider, "transport", {}).get("quota_isolated", False)) + blocked_group = blocked_quota_groups.get(quota_group) if quota_group else None + if blocked_group and not quota_isolated: + logger.info( + "Skipping provider %s because shared quota group %s was blocked by %s (%s)", + provider_name, + quota_group, + blocked_group["provider"], + blocked_group["issue_type"], + ) + errors.append( + { + "provider": provider_name, + "status": 0, + "category": "shared-quota-skipped", + "shared_quota_group": quota_group, + "blocked_by": blocked_group["provider"], + "blocked_issue_type": blocked_group["issue_type"], + } + ) + continue try: result = await provider.complete( @@ -1663,7 +1778,28 @@ async def _execute_chat_completion_body( ) except ProviderError as e: _adaptive_state.record_failure(provider_name, error=e.detail[:500]) - errors.append(_serialize_provider_attempt_error(provider_name, e)) + classify_issue = getattr(provider, "classify_runtime_issue", None) + if callable(classify_issue): + issue_type = classify_issue(status=e.status, detail=e.detail) + else: + issue_type = classify_runtime_issue(status=e.status, detail=e.detail) + errors.append( + _serialize_provider_attempt_error( + provider_name, + e, + category_override=issue_type, + extra={"shared_quota_group": quota_group} if quota_group else None, + ) + ) + if ( + quota_group + and not quota_isolated + and issue_type in {"quota-exhausted", "rate-limited", "auth-invalid"} + ): + blocked_quota_groups[quota_group] = { + "provider": provider_name, + "issue_type": issue_type, + } logger.warning("Provider %s failed: %s, trying next...", provider_name, e.detail[:200]) if _config.metrics.get("enabled"): _metrics.log_request( @@ -1688,12 +1824,13 @@ async def _execute_chat_completion_body( ) continue + last_error = errors[-1] if errors else {} return _ChatExecutionFailure( - status_code=502, + status_code=int(last_error.get("status") or 502), body={ "error": { "message": "All providers failed", - "type": "provider_error", + "type": str(last_error.get("category") or "provider_error"), "attempts": errors, } }, @@ -3015,10 +3152,10 @@ async def anthropic_messages(request: Request): message = str( execution.body.get("error", {}).get("message", "Anthropic bridge request failed") ) - error_type = str(execution.body.get("error", {}).get("type", "api_error")) + raw_error_type = str(execution.body.get("error", {}).get("type", "api_error")) return _anthropic_error_response( message, - error_type=error_type, + error_type=_anthropic_error_type_for_status(execution.status_code, raw_error_type), status_code=execution.status_code, ) @@ -3045,6 +3182,16 @@ async def anthropic_messages(request: Request): response.headers["X-faigate-Hooks"] = ",".join(execution.hook_state.applied_hooks) response.headers["X-faigate-Hook-Errors"] = str(len(execution.hook_state.errors)) response.headers["x-faigate-trace-id"] = execution.trace_id or str(uuid.uuid4()) + for key, value in _anthropic_bridge_response_headers( + source=str(canonical_request.metadata.get("source") or "claude-code"), + requested_model=str( + canonical_request.metadata.get("requested_model_original") or wire_request.model + ), + resolved_model=str(canonical_request.requested_model or wire_request.model), + anthropic_version=str(headers.get("anthropic-version") or "") or None, + anthropic_beta=str(headers.get("anthropic-beta") or "") or None, + ).items(): + response.headers[key] = value return response @@ -3088,8 +3235,13 @@ async def anthropic_count_tokens(request: Request): error_type="invalid_request_error", status_code=400, ) - - return JSONResponse(asdict(result), headers=extra_headers) + bridge_headers = _anthropic_bridge_response_headers( + source=str(headers.get("x-faigate-client") or "claude-code"), + requested_model=str(body.get("model") or "unknown"), + anthropic_version=str(headers.get("anthropic-version") or "") or None, + anthropic_beta=str(headers.get("anthropic-beta") or "") or None, + ) + return JSONResponse(asdict(result), headers={**extra_headers, **bridge_headers}) # ── CLI entry point ──────────────────────────────────────────── diff --git a/faigate/providers.py b/faigate/providers.py index a104159..7adbaca 100644 --- a/faigate/providers.py +++ b/faigate/providers.py @@ -5,7 +5,7 @@ import logging import re import time -from collections.abc import AsyncIterator +from collections.abc import AsyncIterator, Callable from dataclasses import dataclass, field from typing import Any @@ -17,6 +17,38 @@ _UNRESOLVED_ENV_RE = re.compile(r"\$\{[^}]+}") +def classify_runtime_issue( + *, + status: int, + detail: str, + fallback: Callable[[str], tuple[str, str]] | None = None, +) -> str: + """Classify runtime failures without requiring a full ProviderBackend instance.""" + + lowered = str(detail or "").lower() + if status in {401, 403}: + return "auth-invalid" + if status == 429: + if any(token in lowered for token in ("quota", "insufficient_quota", "billing", "credit")): + return "quota-exhausted" + return "rate-limited" + if status == 404 and "model" in lowered: + return "model-unavailable" + if callable(fallback): + return str(fallback(detail)[0]) + if status == 0: + if "timeout" in lowered: + return "timeout" + if "connection error" in lowered: + return "connection_error" + return "transport_error" + if 400 <= status < 500: + return "upstream_client_error" + if status >= 500: + return "upstream_server_error" + return "degraded" + + @dataclass class ProviderHealth: """Tracks health state for a single provider.""" @@ -227,7 +259,10 @@ def request_readiness(self) -> dict[str, Any]: probe_strategy = probe_strategy.replace("-", "_") compatibility = str(self.transport.get("compatibility", "native") or "native") profile = str(self.transport.get("profile", "") or "") + billing_mode = str(self.transport.get("billing_mode", "") or "") probe_confidence = str(self.transport.get("probe_confidence", "medium") or "medium") + quota_group = str(self.transport.get("quota_group", "") or "") + quota_isolated = bool(self.transport.get("quota_isolated", False)) notes = list(self.transport.get("notes", []) or []) verified_via = self._last_probe_strategy or "" probe_payload = self._last_probe_payload or self._probe_payload_preview() @@ -241,7 +276,10 @@ def request_readiness(self) -> dict[str, Any]: "probe_strategy": probe_strategy, "compatibility": compatibility, "profile": profile, + "billing_mode": billing_mode, "probe_confidence": probe_confidence, + "quota_group": quota_group, + "quota_isolated": quota_isolated, "notes": notes, "probe_payload": probe_payload, "verified_via": verified_via, @@ -256,7 +294,10 @@ def request_readiness(self) -> dict[str, Any]: "probe_strategy": probe_strategy, "compatibility": compatibility, "profile": profile, + "billing_mode": billing_mode, "probe_confidence": probe_confidence, + "quota_group": quota_group, + "quota_isolated": quota_isolated, "notes": notes, "probe_payload": probe_payload, "verified_via": verified_via, @@ -273,7 +314,10 @@ def request_readiness(self) -> dict[str, Any]: "probe_strategy": probe_strategy, "compatibility": compatibility, "profile": profile, + "billing_mode": billing_mode, "probe_confidence": probe_confidence, + "quota_group": quota_group, + "quota_isolated": quota_isolated, "notes": notes, "probe_payload": probe_payload, "verified_via": verified_via, @@ -288,7 +332,10 @@ def request_readiness(self) -> dict[str, Any]: "probe_strategy": probe_strategy, "compatibility": compatibility, "profile": profile, + "billing_mode": billing_mode, "probe_confidence": "high", + "quota_group": quota_group, + "quota_isolated": quota_isolated, "notes": notes, "probe_payload": probe_payload, "verified_via": verified_via or probe_strategy, @@ -306,7 +353,10 @@ def request_readiness(self) -> dict[str, Any]: "probe_strategy": probe_strategy, "compatibility": compatibility, "profile": profile, + "billing_mode": billing_mode, "probe_confidence": probe_confidence, + "quota_group": quota_group, + "quota_isolated": quota_isolated, "notes": notes, "probe_payload": probe_payload, "verified_via": verified_via, @@ -320,13 +370,25 @@ def request_readiness(self) -> dict[str, Any]: "probe_strategy": probe_strategy, "compatibility": compatibility, "profile": profile, + "billing_mode": billing_mode, "probe_confidence": probe_confidence, + "quota_group": quota_group, + "quota_isolated": quota_isolated, "notes": notes, "probe_payload": probe_payload, "verified_via": verified_via, "operator_hint": self._request_readiness_action(status), } + def classify_runtime_issue(self, *, status: int, detail: str) -> str: + """Classify one runtime failure into a readiness-style issue label.""" + + return classify_runtime_issue( + status=status, + detail=detail, + fallback=self._classify_request_readiness_issue, + ) + async def probe_health(self, timeout_seconds: float = 10.0) -> bool: """Probe a provider without sending a completion request. diff --git a/scripts/faigate-service-lib.sh b/scripts/faigate-service-lib.sh index 89f41da..523ca27 100644 --- a/scripts/faigate-service-lib.sh +++ b/scripts/faigate-service-lib.sh @@ -235,7 +235,15 @@ faigate_db_path() { env_db="$(faigate_env_value FAIGATE_DB_PATH 2>/dev/null || true)" if [ -n "$env_db" ]; then printf '%s\n' "$env_db" - elif [ "$(faigate_platform)" = "Darwin" ] && [ -n "${FAIGATE_MAC_CONFIG_DIR:-}" ]; then + else + local yaml_db + yaml_db="$(faigate_yaml_value metrics.db_path "")" + if [ -n "$yaml_db" ]; then + printf '%s\n' "$yaml_db" + return 0 + fi + fi + if [ "$(faigate_platform)" = "Darwin" ] && [ -n "${FAIGATE_MAC_CONFIG_DIR:-}" ]; then faigate_mac_db_path else printf '%s\n' "$(faigate_repo_root)/faigate.db" diff --git a/tests/test_anthropic_api.py b/tests/test_anthropic_api.py index ba7d2ed..d5f94a5 100644 --- a/tests/test_anthropic_api.py +++ b/tests/test_anthropic_api.py @@ -22,6 +22,7 @@ import faigate.main as main_module # noqa: E402 from faigate.config import load_config # noqa: E402 +from faigate.providers import ProviderError # noqa: E402 from faigate.router import Router # noqa: E402 importlib.reload(main_module) @@ -34,8 +35,8 @@ def _write_config(tmp_path: Path, body: str) -> Path: class _CapturingProviderStub: - def __init__(self): - self.name = "cloud-default" + def __init__(self, name: str = "cloud-default", *, transport: dict[str, object] | None = None): + self.name = name self.model = "chat-model" self.backend_type = "openai-compat" self.contract = "generic" @@ -45,6 +46,7 @@ def __init__(self): self.limits = {"max_input_tokens": 128000, "max_output_tokens": 4096} self.cache = {"mode": "none", "read_discount": False} self.image = {} + self.transport = dict(transport or {}) self.calls: list[dict[str, object]] = [] self.health = types.SimpleNamespace( healthy=True, @@ -77,7 +79,7 @@ async def complete(self, messages, **kwargs): } ], "usage": {"prompt_tokens": 12, "completion_tokens": 6, "total_tokens": 18}, - "_faigate": {"latency_ms": 12, "provider": "cloud-default"}, + "_faigate": {"latency_ms": 12, "provider": self.name}, } @@ -86,6 +88,34 @@ def log_request(self, **_kwargs): return None +class _FailingProviderStub(_CapturingProviderStub): + def __init__( + self, + name: str = "cloud-default", + *, + status: int = 429, + detail: str = "rate limited upstream", + transport: dict[str, object] | None = None, + ): + super().__init__(name=name, transport=transport) + self.status = status + self.detail = detail + + def classify_runtime_issue(self, *, status: int, detail: str) -> str: + lowered = detail.lower() + if status == 429 and "quota" in lowered: + return "quota-exhausted" + if status == 429: + return "rate-limited" + if status in {401, 403}: + return "auth-invalid" + return "degraded" + + async def complete(self, messages, **kwargs): + self.calls.append({"messages": messages, **kwargs}) + raise ProviderError(self.name, self.status, self.detail) + + @pytest.fixture def anthropic_api_client(tmp_path, monkeypatch): cfg = load_config( @@ -152,6 +182,9 @@ def test_anthropic_messages_returns_bridge_response(anthropic_api_client): assert body["content"][0]["text"] == "anthropic ok" assert provider.calls[0]["extra_body"]["metadata"]["source"] == "claude-code" assert provider.calls[0]["messages"][0] == {"role": "system", "content": "Use markdown"} + assert response.headers["x-faigate-bridge-surface"] == "anthropic-messages" + assert response.headers["x-faigate-bridge-source"] == "claude-code" + assert response.headers["x-faigate-bridge-model-requested"] == "claude-sonnet" def test_anthropic_messages_applies_model_aliases(anthropic_api_client): @@ -174,6 +207,80 @@ def test_anthropic_messages_applies_model_aliases(anthropic_api_client): metadata = provider.calls[0]["extra_body"]["metadata"] assert metadata["requested_model_original"] == "claude-code-premium" assert metadata["requested_model_resolved"] == "premium" + assert response.headers["x-faigate-bridge-model-requested"] == "claude-code-premium" + assert response.headers["x-faigate-bridge-model-resolved"] == "premium" + + +def test_anthropic_messages_preserve_version_headers(anthropic_api_client): + client, provider = anthropic_api_client + + response = client.post( + "/v1/messages", + headers={ + "anthropic-client": "claude-desktop", + "anthropic-version": "2023-06-01", + "anthropic-beta": "tools-2024-04-04", + "user-agent": "Claude-Code/1.0", + }, + json={ + "model": "claude-sonnet", + "messages": [{"role": "user", "content": "hello"}], + }, + ) + + assert response.status_code == 200 + bridge_headers = provider.calls[0]["extra_body"]["metadata"]["bridge_headers"] + assert bridge_headers["anthropic-version"] == "2023-06-01" + assert bridge_headers["anthropic-beta"] == "tools-2024-04-04" + assert bridge_headers["user-agent"] == "Claude-Code/1.0" + assert response.headers["x-faigate-bridge-source"] == "claude-desktop" + assert response.headers["x-faigate-bridge-anthropic-version"] == "2023-06-01" + assert response.headers["x-faigate-bridge-anthropic-beta"] == "tools-2024-04-04" + + +def test_anthropic_messages_forward_tool_use_and_tool_result_blocks(anthropic_api_client): + client, provider = anthropic_api_client + + response = client.post( + "/v1/messages", + json={ + "model": "claude-sonnet", + "messages": [ + {"role": "user", "content": "Look up the design note"}, + { + "role": "assistant", + "content": [ + { + "type": "tool_use", + "id": "toolu_lookup", + "name": "lookup_doc", + "input": {"id": "design-note"}, + } + ], + }, + { + "role": "user", + "content": [ + { + "type": "tool_result", + "tool_use_id": "toolu_lookup", + "content": "Design note loaded", + } + ], + }, + ], + }, + ) + + assert response.status_code == 200 + forwarded_messages = provider.calls[0]["messages"] + assert forwarded_messages[1]["role"] == "assistant" + assert forwarded_messages[1]["tool_calls"][0]["function"]["name"] == "lookup_doc" + assert forwarded_messages[2] == { + "role": "tool", + "content": "Design note loaded", + "tool_call_id": "toolu_lookup", + } def test_anthropic_messages_rejects_non_text_blocks(anthropic_api_client): @@ -196,7 +303,7 @@ def test_anthropic_messages_rejects_non_text_blocks(anthropic_api_client): body = response.json() assert body["type"] == "error" assert body["error"]["type"] == "invalid_request_error" - assert "text content blocks" in body["error"]["message"] + assert "text and tool_result blocks" in body["error"]["message"] def test_anthropic_count_tokens_returns_estimate_with_headers(anthropic_api_client): @@ -224,6 +331,28 @@ def test_anthropic_count_tokens_returns_estimate_with_headers(anthropic_api_clie assert body["input_tokens"] > 0 assert response.headers["x-faigate-token-count-exact"] == "false" assert response.headers["x-faigate-token-count-method"] == "estimated-char-v1" + assert response.headers["x-faigate-bridge-surface"] == "anthropic-messages" + assert response.headers["x-faigate-bridge-model-requested"] == "claude-sonnet" + + +def test_anthropic_count_tokens_preserve_version_headers(anthropic_api_client): + client, _provider = anthropic_api_client + + response = client.post( + "/v1/messages/count_tokens", + headers={ + "anthropic-version": "2023-06-01", + "anthropic-beta": "tools-2024-04-04", + }, + json={ + "model": "claude-sonnet", + "messages": [{"role": "user", "content": "Count these tokens please"}], + }, + ) + + assert response.status_code == 200 + assert response.headers["x-faigate-bridge-anthropic-version"] == "2023-06-01" + assert response.headers["x-faigate-bridge-anthropic-beta"] == "tools-2024-04-04" def test_anthropic_count_tokens_rejects_invalid_payload(anthropic_api_client): @@ -298,3 +427,141 @@ async def _noop_lifespan(_app): body = response.json() assert body["type"] == "error" assert body["error"]["type"] == "not_found_error" + + +def test_anthropic_messages_maps_rate_limit_provider_errors(tmp_path, monkeypatch): + cfg = load_config( + _write_config( + tmp_path, + """ +server: + host: "127.0.0.1" + port: 8090 +providers: + cloud-default: + backend: openai-compat + base_url: "https://api.example.com/v1" + api_key: "secret" + model: "chat-model" +anthropic_bridge: + enabled: true +fallback_chain: + - cloud-default +metrics: + enabled: false +""", + ) + ) + + @asynccontextmanager + async def _noop_lifespan(_app): + yield + + monkeypatch.setattr(main_module, "_config", cfg, raising=False) + monkeypatch.setattr(main_module, "_router", Router(cfg), raising=False) + monkeypatch.setattr( + main_module, + "_providers", + {"cloud-default": _FailingProviderStub()}, + raising=False, + ) + monkeypatch.setattr(main_module, "_metrics", _MetricsStub(), raising=False) + monkeypatch.setattr(main_module.app.router, "lifespan_context", _noop_lifespan, raising=False) + + with TestClient(main_module.app) as client: + response = client.post( + "/v1/messages", + json={ + "model": "claude-sonnet", + "messages": [{"role": "user", "content": "hello"}], + }, + ) + + assert response.status_code == 429 + body = response.json() + assert body["type"] == "error" + assert body["error"]["type"] == "rate_limit_error" + + +def test_anthropic_messages_skip_shared_quota_group_after_quota_failure(tmp_path, monkeypatch): + cfg = load_config( + _write_config( + tmp_path, + """ +server: + host: "127.0.0.1" + port: 8090 +providers: + cloud-default: + backend: openai-compat + base_url: "https://api.example.com/v1" + api_key: "secret" + model: "chat-model" + transport: + quota_group: anthropic-main + kilo-mirror: + backend: openai-compat + base_url: "https://api.kilo.example/v1" + api_key: "secret" + model: "claude-sonnet" + transport: + quota_group: anthropic-main + local-worker: + backend: openai-compat + base_url: "http://127.0.0.1:11434/v1" + api_key: "local" + model: "llama3" +fallback_chain: + - kilo-mirror + - local-worker +anthropic_bridge: + enabled: true + model_aliases: + claude-sonnet: cloud-default +metrics: + enabled: false +""", + ) + ) + + @asynccontextmanager + async def _noop_lifespan(_app): + yield + + primary = _FailingProviderStub( + "cloud-default", + detail="insufficient_quota on upstream account", + transport={"quota_group": "anthropic-main"}, + ) + mirror = _CapturingProviderStub("kilo-mirror", transport={"quota_group": "anthropic-main"}) + local = _CapturingProviderStub("local-worker") + + monkeypatch.setattr(main_module, "_config", cfg, raising=False) + monkeypatch.setattr(main_module, "_router", Router(cfg), raising=False) + monkeypatch.setattr( + main_module, + "_providers", + { + "cloud-default": primary, + "kilo-mirror": mirror, + "local-worker": local, + }, + raising=False, + ) + monkeypatch.setattr(main_module, "_metrics", _MetricsStub(), raising=False) + monkeypatch.setattr(main_module.app.router, "lifespan_context", _noop_lifespan, raising=False) + + with TestClient(main_module.app) as client: + response = client.post( + "/v1/messages", + json={ + "model": "claude-sonnet", + "messages": [{"role": "user", "content": "hello"}], + }, + ) + + assert response.status_code == 200 + assert len(primary.calls) == 1 + assert mirror.calls == [] + assert len(local.calls) == 1 + assert response.headers["x-faigate-provider"] == "local-worker" diff --git a/tests/test_anthropic_bridge.py b/tests/test_anthropic_bridge.py index 08c324a..853b0aa 100644 --- a/tests/test_anthropic_bridge.py +++ b/tests/test_anthropic_bridge.py @@ -79,6 +79,54 @@ def test_anthropic_request_maps_to_canonical_and_openai_body(): assert openai_body["messages"][1]["content"] == "Explain the diff" +def test_anthropic_request_maps_tool_use_and_tool_result_blocks(): + wire_request = parse_anthropic_messages_request( + { + "model": "claude-sonnet", + "messages": [ + {"role": "user", "content": "Find the spec"}, + { + "role": "assistant", + "content": [ + { + "type": "tool_use", + "id": "toolu_lookup", + "name": "lookup_doc", + "input": {"id": "spec-1"}, + } + ], + }, + { + "role": "user", + "content": [ + { + "type": "tool_result", + "tool_use_id": "toolu_lookup", + "content": "Spec text", + } + ], + }, + ], + } + ) + + canonical = anthropic_request_to_canonical( + wire_request, + headers={"x-faigate-client": "claude-code"}, + ) + openai_body = canonical.to_openai_body() + + assert openai_body["messages"][0] == {"role": "user", "content": "Find the spec"} + assert openai_body["messages"][1]["role"] == "assistant" + assert openai_body["messages"][1]["tool_calls"][0]["id"] == "toolu_lookup" + assert openai_body["messages"][1]["tool_calls"][0]["function"]["name"] == "lookup_doc" + assert openai_body["messages"][2] == { + "role": "tool", + "content": "Spec text", + "tool_call_id": "toolu_lookup", + } + + def test_detached_router_runs_bridge_dispatch(): executor = _FakeExecutor() response = TestClient(_build_test_app(executor)).post( @@ -119,6 +167,44 @@ def test_canonical_response_maps_back_to_anthropic_blocks(): assert payload["metadata"]["provider"] == "kilo-opus" +def test_canonical_response_maps_tool_calls_to_tool_use_stop_reason(): + response = canonical_response_to_anthropic( + CanonicalChatResponse( + response_id="msg_tool", + model="anthropic/claude-sonnet-4.6", + provider="kilo-sonnet", + message=CanonicalResponseMessage( + content="", + tool_calls=[ + { + "id": "call_lookup", + "type": "function", + "function": { + "name": "lookup_doc", + "arguments": '{"id":"abc"}', + }, + } + ], + ), + stop_reason="tool_calls", + ), + requested_model="claude-sonnet", + ) + + payload = asdict(response) + assert payload["stop_reason"] == "tool_use" + assert payload["content"] == [ + { + "type": "tool_use", + "text": None, + "tool_use_id": "call_lookup", + "name": "lookup_doc", + "input": {"id": "abc"}, + "metadata": {}, + } + ] + + def _build_test_app(executor: _FakeExecutor) -> FastAPI: app = FastAPI() app.include_router(build_anthropic_router(executor=executor)) diff --git a/tests/test_config.py b/tests/test_config.py index b22069c..71a02fb 100644 --- a/tests/test_config.py +++ b/tests/test_config.py @@ -262,8 +262,42 @@ def test_provider_transport_metadata_is_normalized(tmp_path): ) assert cfg.providers["cloud-default"]["transport"]["probe_payload_text"] == "ping" assert cfg.providers["cloud-default"]["transport"]["probe_payload_max_tokens"] == 1 + assert cfg.providers["cloud-default"]["transport"]["billing_mode"] == "" assert cfg.providers["cloud-default"]["transport"]["models_path"] == "/models" assert cfg.providers["cloud-default"]["transport"]["chat_path"] == "/responses/chat" + assert cfg.providers["cloud-default"]["transport"]["quota_group"] == "" + assert cfg.providers["cloud-default"]["transport"]["quota_isolated"] is False + + +def test_provider_transport_quota_metadata_is_normalized(tmp_path): + path = tmp_path / "config.yaml" + path.write_text( + """ +server: + host: "127.0.0.1" + port: 8090 +providers: + cloud-default: + backend: openai-compat + base_url: "https://api.example.com/v1" + api_key: "secret" + model: "chat-model" + transport: + billing_mode: byok + quota_group: anthropic-main + quota_isolated: true +fallback_chain: [] +metrics: + enabled: false +""", + encoding="utf-8", + ) + + cfg = load_config(path) + + assert cfg.providers["cloud-default"]["transport"]["billing_mode"] == "byok" + assert cfg.providers["cloud-default"]["transport"]["quota_group"] == "anthropic-main" + assert cfg.providers["cloud-default"]["transport"]["quota_isolated"] is True def test_client_profile_rejects_unknown_routing_mode(tmp_path): diff --git a/tests/test_menu_helpers.py b/tests/test_menu_helpers.py index 47ca769..6e898da 100644 --- a/tests/test_menu_helpers.py +++ b/tests/test_menu_helpers.py @@ -730,6 +730,37 @@ def test_faigate_service_lib_detects_homebrew_runtime_paths(tmp_path: Path): assert lines[3] == "brew services (launchd)" +def test_faigate_service_lib_prefers_metrics_db_path_from_config(tmp_path: Path): + config_file = tmp_path / "config.yaml" + db_path = tmp_path / "state" / "faigate.db" + config_file.write_text( + f""" +metrics: + db_path: "{db_path}" +""".strip(), + encoding="utf-8", + ) + + env = os.environ.copy() + env["FAIGATE_CONFIG_FILE"] = str(config_file) + env["FAIGATE_PYTHON"] = sys.executable + + result = subprocess.run( + [ + "bash", + "-lc", + "source scripts/faigate-service-lib.sh && faigate_db_path", + ], + cwd=REPO_ROOT, + env=env, + capture_output=True, + text=True, + check=True, + ) + + assert result.stdout.strip() == str(db_path) + + def test_faigate_client_integrations_json_filters_one_client(tmp_path: Path): config_file = tmp_path / "config.yaml" config_file.write_text( diff --git a/tests/test_providers.py b/tests/test_providers.py index bf8584a..2cc5d75 100644 --- a/tests/test_providers.py +++ b/tests/test_providers.py @@ -232,6 +232,44 @@ async def test_aggregator_request_readiness_reports_compatibility_profile(self): assert readiness["probe_confidence"] == "medium" assert "kilo-chat-minimal" in readiness["probe_payload"] + @pytest.mark.asyncio + async def test_request_readiness_surfaces_quota_group_metadata(self): + backend = ProviderBackend( + "kilo-sonnet", + { + "backend": "openai-compat", + "base_url": "https://api.kilo.example/v1", + "api_key": "secret", + "model": "claude-sonnet", + "transport": { + "profile": "kilo-openai-compat", + "compatibility": "aggregator", + "probe_confidence": "medium", + "auth_mode": "bearer", + "billing_mode": "byok", + "probe_strategy": "chat", + "probe_payload_kind": "kilo-chat-minimal", + "probe_payload_text": "ping", + "probe_payload_max_tokens": 1, + "models_path": "", + "chat_path": "/chat/completions", + "image_generation_path": "/images/generations", + "image_edit_path": "/images/edits", + "requires_api_key": True, + "supports_models_probe": False, + "quota_group": "anthropic-main", + "quota_isolated": False, + "notes": ["aggregator route may share Anthropic quota through BYOK"], + }, + }, + ) + + readiness = backend.request_readiness() + + assert readiness["billing_mode"] == "byok" + assert readiness["quota_group"] == "anthropic-main" + assert readiness["quota_isolated"] is False + @pytest.mark.asyncio async def test_chat_probe_marks_provider_ready_verified(self): backend = ProviderBackend(