From c5d2f48d504a1d6c10171eea3f195acbcbc6367d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andre=CC=81=20Lange?= Date: Tue, 31 Mar 2026 23:34:50 +0200 Subject: [PATCH] feat(catalog): support shared provider metadata --- docs/FAIGATE-ROADMAP.md | 1098 +++-------------- docs/FUSIONAIZE-SHARED-METADATA.md | 178 +++ docs/IMPLEMENTATION-PLAN.md | 137 ++ .../fusionaize-metadata-repo/README.md | 49 + .../products/gate/overlays.v1.json | 80 ++ .../providers/catalog.v1.json | 28 + .../providers/sources.v1.json | 11 + .../schemas/provider-catalog.v1.schema.json | 25 + .../provider-catalog.snapshot.v1.json | 28 + faigate/provider_catalog.py | 130 +- scripts/faigate-provider-metadata-sync | 93 ++ scripts/faigate-restart | 6 + scripts/faigate-service-lib.sh | 58 + scripts/faigate-update | 12 + tests/test_provider_catalog.py | 196 +++ 15 files changed, 1185 insertions(+), 944 deletions(-) create mode 100644 docs/FUSIONAIZE-SHARED-METADATA.md create mode 100644 docs/examples/fusionaize-metadata-repo/README.md create mode 100644 docs/examples/fusionaize-metadata-repo/products/gate/overlays.v1.json create mode 100644 docs/examples/fusionaize-metadata-repo/providers/catalog.v1.json create mode 100644 docs/examples/fusionaize-metadata-repo/providers/sources.v1.json create mode 100644 docs/examples/fusionaize-metadata-repo/schemas/provider-catalog.v1.schema.json create mode 100644 docs/examples/provider-catalog.snapshot.v1.json create mode 100755 scripts/faigate-provider-metadata-sync diff --git a/docs/FAIGATE-ROADMAP.md b/docs/FAIGATE-ROADMAP.md index f3076f1..3b2c256 100644 --- a/docs/FAIGATE-ROADMAP.md +++ b/docs/FAIGATE-ROADMAP.md @@ -2,1017 +2,235 @@ ## Status -`v1.13.0` is shipped. +`v1.14.1` is shipped. -The current product shape is now clear: +Gate is no longer just a routing core with helper scripts around it. The +current product baseline is now clear: -- one local gateway -- one OpenAI-compatible surface plus an optional Anthropic-compatible bridge +- one local gateway runtime +- one OpenAI-compatible surface +- one optional Anthropic-compatible bridge - direct providers, aggregators, and local workers under one routing core -- operator-facing health, probe, catalog, and release tooling +- an operator shell made up of dashboard, doctor, catalog, probe, and guided setup -The roadmap should now stay disciplined. The next releases should deepen routing trust and Claude-native compatibility, not sprawl into a second platform. +The roadmap should now stay disciplined. The next release lines should deepen +operator trust, routing explainability, and daily-use client confidence instead +of expanding sideways into a second platform. -## Current Product Baseline +## Architecture Readout -Already in place: +The refreshed `Understand-Anything` pass confirms four high-value themes: -- canonical lane-aware routing foundations -- route-aware handling for direct, aggregator, wallet-router, and local-worker paths -- client profiles, routing modes, and request hooks -- provider-source catalog mirroring and local route visibility overlays -- quota-group-aware fallback guardrails for Anthropic-shaped traffic -- optional Anthropic bridge with `/v1/messages` and `/v1/messages/count_tokens` -- shell-native operator surfaces: doctor, provider probe, dashboard, quick setup +1. the gateway core is still healthy and understandable +2. the operator surface is now a first-class product surface +3. the Anthropic bridge is part of the real runtime contract +4. the next trust gap is metadata truth, not raw routing breadth -This means the roadmap no longer needs to ask whether Gate should become a multi-provider local gateway. It already is one. +The practical implication is simple: -## Release Direction +- Gate does not need a bigger feature list first +- Gate needs clearer truth about cost, freshness, route choice, and operator controls -## Parity Targets - -The roadmap treats three parity goals as distinct targets, not one fuzzy promise. - -### Full Anthropic parity - -Meaning: - -- protocol-level parity for the Anthropic-compatible surface -- clients that speak Anthropic `messages` should not need special-case awareness of Gate - -Includes: - -- `POST /v1/messages` request and response shape -- SSE streaming parity -- content-block parity beyond basic text and tool flow -- header, version, and beta compatibility -- compatible error envelopes and stop reasons -- trustworthy token counting behavior - -### Full Claude Code parity - -Meaning: - -- Claude Code should be comfortable to use against local Gate in real daily coding workflows - -Includes: - -- iterative coding sessions -- streaming and tool-oriented flows -- stable aliasing and route continuity -- fallback behavior that does not break the working session unnecessarily -- enough protocol parity that Claude Code does not feel like it is on a fragile compatibility layer - -### Full Claude Desktop parity - -Meaning: - -- Claude Desktop should be a viable local daily-use client against Gate where endpoint override is supported - -Includes: - -- stable local endpoint configuration -- good session behavior for the feature set Claude Desktop actually uses -- no recurring “almost compatible but annoying in practice” gaps - -Strategically, this matters beyond personal convenience. If Gate can serve Claude Desktop cleanly, it proves the local Claude-native gateway story much more strongly than API compatibility alone. - -## Current release target: `v1.14.x` - -`v1.13.0` shipped the Anthropic bridge as an opt-in early-adopter line. - -The next release should not chase more protocol breadth first. It should make -the existing gateway meaningfully cheaper and more trustworthy for daily coding -traffic across Claude Code, opencode, openclaw, and similar clients. - -### `v1.14.x`: coding auto modes plus Claude-native daily-use hardening - -This is the highest-leverage next line. - -Primary goals: - -- make the cheapest capable route the default for coding traffic instead of - burning Sonnet or Opus too early -- align client profiles and named routing modes around the same routing intent -- make the Anthropic bridge comfortable for real Claude Code workflows -- close the highest-value Anthropic protocol gaps -- prepare the bridge for a serious Claude Desktop parity track immediately after -- close the biggest protocol-parity gaps before expanding scope again -- keep the bridge opt-in and explicit while improving day-to-day reliability - -Expected slices: - -1. map Claude-native ids to routing intent instead of direct frontier providers - - `claude-sonnet-* -> auto` - - `claude-opus-* -> premium` - - `claude-haiku-* -> eco` -2. add and align coding routing modes - - `coding-auto` - - `coding-fast` - - `coding-premium` -3. stronger client defaults for - - `claude` - - `opencode` - - `openclaw` - - `codex` -4. SSE streaming parity for `/v1/messages` -5. fuller Anthropic block compatibility beyond the current text plus basic tool flow -6. stronger Claude-client validation fixtures and operator troubleshooting -7. sharper error and stop-reason compatibility - -Non-goals: - -- exact provider-side token counting for every backend -- "full parity" marketing language before live client coverage proves it -- hosted or multi-user control-plane features - -### `v1.15.x`: Claude Desktop parity or adaptive orchestration trust - -This should be chosen by evidence after `v1.14.x`, not by preference. - -If Claude Desktop local usage proves to be the next real operator lever, take the desktop-parity line first. Otherwise, take the routing-value line first. - -#### Option A: Claude Desktop parity - -Primary goals: - -- make Claude Desktop a genuinely usable local client against Gate -- validate supported local endpoint-override paths -- remove recurring desktop-specific compatibility friction - -Expected slices: - -1. endpoint-override and config-path validation for supported desktop flows -2. desktop-specific session and response compatibility hardening -3. clearer local testing and troubleshooting instructions -4. release-readiness validation for desktop workflows - -The current feasibility gate for this option is tracked in [Claude Desktop feasibility](./CLAUDE-DESKTOP-FEASIBILITY.md). - -#### Option B: adaptive orchestration trust - -Primary goals: - -- make canonical lanes more visible and more legible to operators -- tighten route-aware aggregator handling under real quota and latency pressure -- make benchmark and cost assumptions auditable and fresh enough to trust -- explain every meaningful routing decision in operator-facing terms - -Expected slices: - -1. canonical lane cards and route-family summaries in operator surfaces -2. route-aware aggregator handling with clearer quota isolation and mirror semantics -3. benchmark and cost clusters that are structured, reviewable, and freshness-aware -4. operator explainability for lane choice, same-lane fallback, and cluster downgrade - -### `v1.16.x`: remaining parity or live adaptation under pressure - -Only pursue the live-adaptation line once the decision model is trustworthy and the most valuable Claude-native parity gaps are no longer the dominant operator pain. - -Primary goals: - -- adapt route choice under quota, latency, and failure pressure -- keep same-lane substitutions ahead of weaker-cluster downgrades -- make live routing pressure visible enough that operators can trust it - -Expected slices: - -1. live route pressure and cooldown scoring -2. family- and lane-level adaptation signals -3. fallback pressure reporting in dashboard, route preview, and traces -4. conservative operator controls for adaptation posture - -## Challenge The Backlog - -This section is the reality check: what is already there, what is partially there, and what is actually worth the next release slots. - -| Theme | Current reality | Biggest gap | Recommendation | -| --- | --- | --- | --- | -| Canonical model lanes | Already present in routing foundations and catalog surfaces | Too hidden in operator UX; not yet the default mental model | Double down in `v1.15.x`, not later | -| Route-aware aggregator handling | Partly present: route types, Kilo lanes, BLACKBOX handling, quota groups | Mirror semantics and quota isolation are still too implicit | Make this a first-class `v1.15.x` line | -| Benchmark and cost clusters | Present as curated metadata, but still coarse | Freshness, explainability, and structured ranking are not strong enough yet | Build reviewable cluster metadata in `v1.15.x` | -| Live adaptation under quota, latency, and failure pressure | Early adaptation exists, but still conservative | Needs stronger operator trust and clearer lane/route semantics first | Keep for `v1.16.x` after orchestration trust work | -| Operator explainability for major routing decisions | Partly present in traces and previews | Still not compact or decisive enough for day-2 operations | Make this a headline outcome of `v1.15.x` | -| Full Anthropic parity | Not there | Streaming and deeper block coverage are missing | Treat as staged protocol parity, beginning in `v1.14.x` | -| Full Claude Code parity | Not there | Real client workflow coverage is still partial | Focus `v1.14.x` on practical daily-use parity | -| Full Claude Desktop parity | Not there | Desktop-specific override paths and real workflow validation are still thin | Make this an explicit follow-on track right after `v1.14.x` | -| SSE streaming parity | Not shipped for the bridge yet | Missing bridge streaming path | Highest-priority bridge gap for `v1.14.x` | -| Exact provider-side token counting | Not shipped | Needs backend-aware counting per route or provider API support | Useful, but not a blocker for `v1.14.x`; likely `v1.15.x` or later | - -## What Should Not Drive The Next Releases - -Some ideas are valid, but they are not the best next lever. - -### Full parity as a release slogan - -Avoid release lines built around vague parity claims. - -Better framing: - -- `v1.14.x`: Anthropic protocol hardening plus Claude Code daily-use parity -- `v1.15.x`: adaptive orchestration trust -- `v1.16.x`: Claude Desktop parity or live adaptation under pressure, depending on validated client demand - -### Exact token counting before streaming parity - -Exact provider-side token counting is valuable, but it is not the next operational blocker. Streaming parity and workflow continuity matter more first. - -### Semantic caching before exact caching or usage evidence - -Semantic caching is expensive, operationally heavier, and easy to romanticize. It should remain explicitly later than: +## Product Direction -- exact request/response caching -- virtual keys and budgets -- a stable multi-instance contract -- observed workload evidence that semantic similarity would actually pay off - -## `v2.x`: budgets, coordination, and higher-cost intelligence - -These are valid directions, but they should be sequenced honestly. - -### Team and org budget hierarchy - -Worth doing only after a solid virtual-key layer exists. - -Recommendation: - -- first ship per-key budget controls and spend ledgers -- then extend to `user -> team -> org` - -This is not the next highest-leverage line for the current product. It is a later operator-scale feature. - -### Multi-instance shared state via Grid - -Still the right product boundary. - -Recommendation: - -- keep Gate single-instance-friendly -- define a clean shared-state contract that Grid can consume later -- do not pull Redis/Postgres complexity into Gate just to fake clustering - -This should remain a `v2.x` line and should follow virtual keys, not precede them. - -### Semantic caching - -Still a late bet. - -Recommendation: - -- do exact caching first -- measure hit patterns -- only build semantic caching when prompt homogeneity is proven and the vector-store cost is justified - -### OTEL-compatible trace context - -This one is different: it is lower risk and more operator-useful than semantic caching. - -Recommendation: - -- move OTEL trace-context forwarding forward in priority -- it can plausibly land before other `v2.x` ideas - -If one item from the old `v2.x` bucket should move earlier, it is OTEL glue, not semantic caching. - -## Competitive Positioning: Where To Double Down - -The strongest differentiators to compound are: - -1. canonical lane abstraction -2. route-aware transport handling across direct, aggregator, and local paths -3. local-first operator control -4. explainable routing and fallback decisions -5. hybrid cloud-plus-local execution +Gate remains gateway-first. That means: -- do not copy hosted-router black-box behavior -- do not turn Gate into a distributed platform runtime -- do not bury routing logic in one-off client adapters - -The right move is to make Gate more legible, more adaptive, and more trustworthy as a gateway. - -## Historical Baseline - -Recent shipped lines, newest first: - -- `v1.13.0`: optional Anthropic bridge and Claude-oriented routing hints -- `v1.12.0`: live provider-source catalog surfaces, Kilo lane clarity, release automation hardening -- `v1.8.0` to `v1.11.x`: canonical lane foundations, route-aware scoring, signal-group expansion, and operator explainability groundwork - -Detailed design notes for the orchestration track still live in [Adaptive model orchestration](./ADAPTIVE-ORCHESTRATION.md). - -<<<<<<< HEAD -The next concrete execution line is tracked in [Implementation plan](./IMPLEMENTATION-PLAN.md). -======= -ClawRouter's transport binding model (`direct`, `wallet-router`, `aggregator`) is well-designed and faigate should adopt its vocabulary in `lane_registry.py` — this is already partly done (`route_type: direct / aggregator / wallet-router`). The area where faigate leads is the full provider-intelligence layer: ClawRouter does not model benchmark clusters, cache semantics, or per-client signal scoring. - -What faigate can learn from ClawRouter: deeper agent-native transport contracts, richer `x-openclaw-*` header semantics for multi-agent delegation flows. - -### Product surface priorities from LLM AIRouter and ClawRouter - -ClawRouter is strongest at framing the routing promise clearly: cheapest capable -model, explicit policies, and a legible routing pipeline. - -LLM AIRouter is strongest at framing the operating surface clearly: overview, -providers, analytics, stacks, routes, request log, provider limits, CLI tools, -and settings in one coherent dashboard story. - -The product goal for Gate is to combine both advantages without inheriting their -hosted-first or wallet-first assumptions: - -- local-first and operator-owned by default -- agent-native, not just app-dashboard-native -- one runtime that works for Claude Code, opencode, openclaw, n8n, curl, and custom apps -- explicit route intelligence, not black-box “AI chooses for you” marketing - -That means the next product-surface slices should be: - -1. overview dashboard that makes provider health, spend, lane families, and recent routing visible in one glance -2. providers view that exposes route type, quota domain, billing mode, lane family, and current readiness -3. analytics view that ties cost, token usage, and routing posture back to concrete clients and stacks -4. stacks view for named route bundles such as coding-default, coding-premium, local-only, or Claude-safe mirrors -5. routes and request-log views that explain why one route won and why cheaper alternatives lost -6. CLI and helper-tool surface as a first-class product feature, not a fallback for when the dashboard is missing something - -That should now be read more explicitly through operator jobs: - -1. `Overview` - - "is Gate safe and request-ready right now?" -2. `Providers` - - "which routes are usable, degraded, stale, or quota-coupled?" -3. `Clients` - - "which tools are expensive, slow, or misprofiled?" -4. `Routes` - - "why did Gate choose this lane and route?" -5. `Analytics` - - "where is the spend and fallback pressure?" -6. `Request Log` - - "what just happened?" -7. `Catalog` - - "are my provider assumptions still fresh enough to trust?" -8. `Integrations` - - "how do I wire Claude Code, opencode, openclaw, Codex, automation clients, and custom apps quickly?" -9. `Troubleshooting` - - "what is the shortest path from symptom to fix?" - -### Licensing and product-boundary read on those surfaces - -These surface expansions should follow the existing fusionAIze stack boundary: - -**Tier A — Apache 2.0 core** - -- local dashboard views over Gate's own runtime state -- provider inventory, lane metadata, route readiness, and request traces -- stack definitions and route explainability -- helper CLIs and exportable local reports - -**Tier B — source-available operator packs** - -- advanced alerts, saved routing policies, and heavier analytics overlays -- longer retention, richer usage forensics, and external callback packs -- team-aware budget controls and higher-level stack templates - -**Tier C — commercial control plane** - -- multi-instance shared state -- hosted or managed control-plane views -- org RBAC, audit trails, and enterprise governance overlays -- Grid/OS coordination features that should not bloat the local Gate runtime - ---- - -## `v1.8.0` to `v1.11.x`: adaptive model orchestration (original sequence for reference) - -Primary goals: - -- treat providers, aggregators, and direct routes as execution paths to canonical model lanes rather than as one flat list of alternatives -- let scenarios such as `quality`, `balanced`, `eco`, and `free` choose the right lane threshold and degradation path instead of only choosing a provider tier -- preserve same-lane quality when direct quota is exhausted by trying equivalent aggregator routes before dropping to a weaker model cluster -- keep benchmark and cost assumptions visible, curated, and refreshable so "magical" routing still stays explainable - -Release sequence: - -1. `v1.8.0` ✅ lane registry, provider lane metadata, and route-aware catalog surfaces -2. `v1.9.0` ✅ lane-aware router scoring and "why this lane?" traces -3. `v1.9.1` ✅ routing bug fixes, signal group expansion, mode-override hook -4. `v1.9.2`: pre-failure RPM/TPM headroom, trace-id header -5. `v1.10.x`: provider intelligence layer (capability tags, benchmark ranks, cache TTL, TTFT, pricing freshness) -6. `v1.11.x`: virtual key layer, gateway-level response caching, webhook observability, guardrail hooks -7. `v2.x`: team/org budget hierarchy, multi-instance Grid coordination, semantic caching, OTEL - -Non-negotiable guardrails: - -- never hide a downgrade from operators -- prefer same-lane route substitution before weaker-model degradation -- keep old configs compatible while lane metadata is introduced -- treat benchmarks and cost heuristics as curated operational inputs, not as magic constants - -## `v1.5.0`: guided control-center UX - -Primary goals: - -- make the standalone Gate shell feel like the first serious product surface instead of a loose set of helper scripts -- introduce one obvious happy path for first setup, validation, restart, and client connection -- replace raw JSON-first operator views with compact human summaries plus drill-downs where needed -- keep the Gate UX aligned with the later Grid orchestration direction so the products feel like one family - -Recommended minimal slices: - -1. `Quick Setup` happy path inside `faigate-menu` -2. compact summary cards for gateway, config, providers, and clients in the main operational menus -3. shorter, recommendation-first client quickstarts with per-client drilldown instead of long first-contact dumps -4. explicit next-step receipts after wizard, validation, restart, and client-setup actions - -Guardrails: - -- keep the shell UX scriptable and helper-driven; do not turn `faigate-menu` into a full-screen TUI yet -- prefer compact default output plus optional detail/raw views over large payload dumps -- keep wording calm and operational, especially when health, service-manager state, and bound port state disagree - -Post-`1.5.0` UX items already worth bookmarking: - -- readiness score and richer setup progress scoring -- port/runtime conflict auto-detection with one-step recovery suggestions -- client route previews that show where a given client would land right now -- richer action receipts and broader `what to do next` guidance -- more compact client cards before the long quickstart text - -## Licensing strategy - -The fusionAIze stack uses a three-tier open-core model. The tier boundaries are defined here before the features exist so there are no retroactive surprises for the community. - -**Non-negotiable rule**: a feature that ships as Tier A will never be moved to Tier B or Tier C. Only newly-built features can be Tier B or Tier C from day one. +- request routing stays the product center +- provider contracts stay explicit +- operator visibility stays close to the runtime +- shell, dashboard, and config must describe the same system -This is the lesson from LiteLLM's BSL transition: moving the proxy from Apache 2.0 to BSL 1.1 after the community had adopted it created lasting distrust and reputational damage. faigate will not repeat that mistake. +It does **not** mean: -### Tier A — Apache 2.0 (permanent) +- turning Gate into a generic agent platform +- hiding routing logic behind opaque UI magic +- introducing hosted-only assumptions into a local-first product -The full local gateway runtime, as it exists and as it will continue to evolve through routine improvements: - -- baseline gateway core: routing engine, heuristic rules, hook pipeline, fallback chains -- all provider adapters: direct, aggregator, wallet-router -- all built-in request hooks: locality, prefer-provider, profile-override, mode-override -- client profile system and opencode / openclaw / n8n / cli profiles -- config schema and YAML format -- SQLite metrics store and trace recording -- operator dashboard (read-only) -- `/api/route`, `/api/stats`, `/api/traces`, `/api/providers` endpoints -- all helper scripts: `faigate-menu`, `faigate-doctor`, `faigate-status`, `faigate-update`, etc. -- Homebrew formula and packaging -- everything shipped through v1.9.x and all future routine routing improvements - -### Tier B — Source-available (open-core) - -Features built for operators who run faigate at team or production scale. Defined as Tier B before they are built: - -- virtual key layer (`max_budget`, `budget_duration`, `rpm_limit`, `allowed_models`, key lifecycle) -- per-key budget enforcement and spend ledger -- webhook / callback observability output to external sinks (Langfuse, Helicone, Datadog) -- advanced guardrail hook implementations (PII detection via Presidio, prompt injection via Lakera) -- named routing strategy weight presets as a commercial operator convenience -- gateway-level response caching with Redis backend -- team and org budget hierarchy - -### Tier C — Proprietary / commercial (fusionAIze OS) - -Control-plane features that belong with the broader fusionAIze stack, not with the local gateway runtime: - -- managed control plane (fusionAIze Grid / OS) -- SSO / SAML / OIDC authentication for the operator UI -- RBAC and audit logs for team and org management -- multi-instance shared state and distributed rate-limit coordination (Grid) -- enterprise SLAs and priority support - -### Product stack and tier mapping - -| Product | Role | Tier | -|---|---|---| -| **Gate** | Local-first routing runtime | A core + selective B | -| **Lens** | Observability and spend analytics consuming Gate `/api/stats`, `/api/traces`, webhook events | B–C | -| **Grid** | Multi-instance coordination: distributed rate limits, shared virtual key registry, cross-instance cache | C | -| **OS** | SSO, RBAC, audit logs, team management — LiteLLM Enterprise's territory | C | -| **Fabric** | Content policy and guardrail enforcement via Gate's hook seam | B–C | - -## `v1.3.0`: guided setup and catalog-assisted updates - -Primary goals: - -- make first setup and later provider updates realistic without turning `config.yaml` into hand-edited drift bait -- keep routing modes, client defaults, and provider selection understandable across many clients -- improve provider-catalog freshness and update suggestions without silently rewriting operator intent -- start the provider-discovery and recommendation-link line only in a transparency-first, metadata-first shape - -Recommended minimal slices: - -1. wizard candidate selection, update suggestions, dry-run summaries, and backup-aware writes -2. provider-catalog source metadata, offer-track volatility flags, and freshness alerts -3. wizard and CLI usage polish so the guided flow is self-explanatory from `--help` -4. optional provider recommendation-link metadata with explicit disclosure, but still no ranking changes based on provider-link metadata - -Guardrails for any recommendation-link work in this line: - -- recommendation ranking must never use provider-link metadata as an input and must stay performance-led, preferring fit, quality, health, capability, and cost behavior -- provider-link metadata should stay operator-owned and secret-backed, not embedded in user-editable client configs -- docs and CLI output should disclose clearly when a shown signup link is informational only -- the first slice should be metadata and display only; managed short links, browser control-center surfaces, and richer landing-page flows can come later - -## `v1.2.0`: workstation operations baseline - -Primary goals: - -- add a dedicated workstation operations guide -- document macOS `launchd` as a first-class local-runtime path -- document Windows Task Scheduler / PowerShell as the baseline Windows path -- keep development checkouts and runtime installs clearly separated -- add a project-owned Homebrew packaging path for macOS workstations - -Recommended minimal slices: - -1. workstation baseline docs and path layout -2. macOS `launchd` example and instructions -3. Windows startup examples and documentation -4. optional lightweight install helpers only if the docs prove insufficient -5. Homebrew formula and `brew services` guidance for the packaged macOS path - -## Post-1.0 direction - -The first post-`1.0` block should stay narrow enough to ship as `v1.1.0`. - -Primary goals: - -- double-check and extend AI-native client support beyond the current OpenClaw, n8n, and CLI baseline -- ship the next wave of integration starters for requested and high-signal agent frameworks -- expose more useful per-client token and usage metrics in the operator surface -- audit the routing-stage stack so the responsibility of each layer stays clear -- keep a structured watch on ClawRouter-style product evolution without copying features blindly - -The current framework prioritization lives in [AI-NATIVE-MATRIX.md](./AI-NATIVE-MATRIX.md). - -## Big Picture - -The opportunity is not to build another thin router. - -The opportunity is to build a reusable AI gateway plane that works across: - -- local model workers -- direct provider APIs -- proxy providers -- OpenClaw -- workflow systems such as n8n -- CLI-native development environments -- agent tools -- future AI-native SaaS products - -If the core stays disciplined, fusionAIze Gate can become the common routing and policy layer shared by several products without collapsing into a bloated platform. - -That is the target shape: - -- one gateway core -- many providers -- many clients -- optional context and optimization layers -- clear operational boundaries - -## Design principles - -### 1. Gateway first - -fusionAIze Gate should stay a gateway and control plane, not a monolithic platform. - -### 2. Standard protocols first - -If a client can use the OpenAI-compatible API cleanly, keep it on that path before building a custom adapter. - -### 3. Multi-dimensional routing - -The design target is to exceed simpler router behavior by making routing explicitly multi-dimensional. - -That means fusionAIze Gate should increasingly consider: - -- capability support -- health and latency -- cost tier -- local vs cloud locality -- context window size -- cache behavior and cache pricing -- tool usage -- client identity -- modality requirements -- compliance or tenancy constraints - -The intent is not to claim that this is fully implemented today. The intent is to make this the guiding routing architecture. - -### 4. Optional extension layers - -Context, memory, optimization, and sidecar adapters should plug into the gateway cleanly, not become mandatory core behavior. - -## Current runtime baseline - -Today the runtime already supports: - -- one OpenAI-compatible endpoint -- multiple providers behind a single local base URL -- policy, static, heuristic, client-profile, and optional LLM-assisted routing stages -- direct model pinning and fallback chains -- local worker contracts and health probes -- route introspection and traces -- client-aware routing defaults for OpenClaw, n8n, and CLI callers - -The next runtime gap to close is not “more core abstraction”. It is “more real clients with less glue”. - -## `v1.1.0`: AI-native client expansion and operator visibility - -Primary goals: - -- add the first post-`1.0` starter wave for requested and high-signal AI-native clients -- add a curated framework matrix so external users can quickly see where fusionAIze Gate fits -- deepen client and token reporting in API and dashboard surfaces -- review policy, static, heuristic, hook, client-profile, and classifier boundaries with clearer ownership and tests - -Recommended minimal slices: - -1. AI-native client matrix plus roadmap update -2. first-wave starter templates for `SWE-AF`, `paperclip`, `ship-faster`, and the highest-fit external frameworks -3. per-client token and usage reporting in stats and dashboard views -4. routing-layer review plus targeted rule/test cleanup - -The plugin question should stay explicitly out of scope for `v1.1.0` and be revisited only after this release line lands. - -## OpenClaw direction - -OpenClaw remains a first-class integration surface. - -Current coverage: - -- one-agent traffic through the normal OpenAI-compatible path -- many-agent or delegated traffic through the same path with `x-openclaw-source` -- OpenClaw-side model aliases and profile defaults - -Near-term direction: - -- document one-agent and many-agent behavior explicitly -- keep the integration header-based and OpenAI-compatible -- avoid forking the core gateway logic just for OpenClaw - -## Modality expansion - -Inspired by the value of image-router patterns in other gateways, fusionAIze Gate should eventually support modality-aware routing beyond chat. - -Planned direction: - -- add a provider contract for image-generation-capable backends -- add modality-aware request classification -- route image tasks to the right backend without polluting the chat path - -This is a roadmap item, not a current runtime claim. - -## Architecture direction - -### Gateway core - -Responsibilities: - -- request normalization -- route selection -- fallback handling -- timeout boundaries -- usage and trace recording -- operational endpoints - -### Provider layer - -Responsibilities: - -- cloud providers -- OpenAI-compatible proxies -- local workers -- future modality-specific providers - -### Client layer - -Responsibilities: - -- OpenClaw -- n8n and workflow clients -- CLI wrappers and proxy clients -- future AI-native app integrations - -### Optional extension layer - -Responsibilities: - -- request hooks -- context or memory enrichment -- optimization hooks -- policy overlays - -## Release path to v1.0.0 - -`v0.3.0` is the first public fusionAIze Gate release. The path to `v1.0.0` should stay incremental and reviewable. - -### `v0.4.x`: deeper routing and extension hardening - -Primary goals: - -- deepen multi-dimensional scoring beyond simple fit checks for cache behavior, context windows, provider limits, locality, latency, and recent failures -- keep refining the simple dashboard around traces, provider/client breakdowns, route visibility, and safe operator ergonomics -- keep OpenClaw one-agent and many-agent flows on the same OpenAI-compatible path with clearer defaults -- harden the request hook seam for context, memory, and optimization layers, including fail-closed behavior and input sanitization - -This release line should deepen the gateway core without turning it into a monolith. - -### `v0.5.0`: operator distribution baseline - -Primary goals: - -- add the first modality-aware provider contract, starting with image generation -- publish an official Docker release path -- publish fusionAIze Gate to PyPI -- add provider and client onboarding helpers for many-provider and many-client deployments -- add a publish dry-run path for Python package and GHCR validation before real release tags -- add validation workflows so operators can catch config mistakes before rollout -- complete the public community-health baseline and security-overview baseline for the repo - -This is the first release line where installation and upgrade paths should feel productized for external users. - -### `v0.6.x`: modality expansion - -Primary goals: - -- add modality-aware provider contracts, starting with image generation -- extend that contract toward image editing where the provider surface supports it -- keep chat and image paths explicit instead of mixing modality-specific behavior into one opaque route -- expose modality-aware health, provider inventory, and routing visibility in the dashboard and operational endpoints - -This should borrow the useful parts of image-router patterns without copying another gateway's product shape. - -### `v0.7.x`: operations polish - -Primary goals: - -- expand the release-check baseline into stronger update alerts so operators can see when a newer release is available -- add an optional automatic update enabler for controlled deployments -- improve route traces, metrics, and dashboard filters for providers, clients, and profiles -- keep the dashboard simple, read-heavy, and operationally safe - -This release line is about day-2 operations rather than new routing concepts. - -The first small slice in this line is to turn `GET /api/update` from a plain boolean check into an operator-facing alert surface with update type, alert level, and recommended action. - -The next small slice is to keep auto-update conservative: - -- disabled by default -- no checkout mutation over HTTP -- helper-driven and operator-triggered only -- major upgrades still manual unless explicitly allowed - -### `v0.8.x`: many-provider and many-client onboarding - -Primary goals: - -- make onboarding repeatable for many providers and many clients on one gateway -- ship clearer presets and validation for OpenClaw, n8n, CLI wrappers, and future AI-native applications -- reduce manual config editing for common deployment shapes -- tighten integration coverage for delegated or many-agent traffic where headers identify sub-clients - -The target is faster adoption without custom glue for every client. - -Current `v0.8.x` baseline already includes: - -- onboarding report plus validation helpers -- staged provider rollout reporting -- client matrix reporting -- starter templates for OpenClaw, n8n, CLI, cloud providers, local workers, and image providers -- matching provider `.env` starter files -- delegated OpenClaw request examples -- starter custom-profile examples for future AI-native applications -- doctor checks for missing provider env placeholders -- JSON and Markdown onboarding exports - -### `v0.9.x`: pre-1.0 hardening - -Primary goals: - -- stabilize request hook boundaries and extension contracts -- expand integration and functional test coverage across real client flows -- complete documentation review across README, onboarding, integrations, troubleshooting, and release docs -- close obvious operational gaps discovered during earlier releases - -This release line should leave `v1.0.0` focused on stability and security gates, not backlog cleanup. - -Current `v0.9.x` baseline is aimed at: - -- conservative response headers and dashboard CSP defaults -- explicit JSON and multipart size guardrails -- bounded routing and operator header handling -- broader functional API tests around dashboard, routing, and upload surfaces -- documentation updates that make the hardened defaults visible to operators - -### `v1.0.0`: stable gateway baseline - -Primary goals: - -- declare a stable fusionAIze Gate gateway baseline for local-first, multi-provider routing -- publish the first separate npm CLI package for fusionAIze Gate-adjacent CLI usage -- complete a comprehensive security review before release - -The `v1.0.0` security review should explicitly cover: - -- cross-site scripting and HTML or CSS injection risks in the dashboard -- request, header, and parameter injection risks in proxy and routing paths -- dependency vulnerabilities and unsafe defaults -- local-worker and upstream proxy trust boundaries -- auth, secret-handling, and writable-path assumptions - -`v1.0.0` should only ship after those review results are addressed or documented with a clear mitigation plan. - -Current `v1.0.0` baseline is aimed at: - -- dashboard CSP hardening without turning the no-build UI into a separate frontend app -- reduced leakage of upstream provider failure details in client responses -- clearer trust-boundary validation for provider base URLs -- a documented release-gate security review with explicit residual risks -- a separate npm CLI package that complements the Python gateway instead of replacing it - -## Updated near-term PR sequence - -The next sequence should ladder directly into the release path above: - -1. `feat(provider): add modality-aware provider contracts, starting with image generation` -2. `feat(provider): extend modality contracts toward image editing where supported` -3. `feat(onboarding): add provider/client onboarding helpers and validation workflows` -4. `feat(dist): add Docker release path and PyPI publishing baseline` -5. `feat(ops): add update alerts and an optional auto-update enabler for controlled deployments` -6. `feat(cli): define the separate npm or TypeScript CLI package path for the v1.0.0 line` - -## Check on the earlier sequence - -The earlier near-term sequence is now effectively complete up through the routing and observability foundation: - -1. `docs: add fusionAIze Gate roadmap and rename note` -> done -2. `feat(config): add provider capability schema` -> done -3. `feat(router): add policy-based provider selection` -> done -4. `feat(provider): add local worker provider contract` -> done -5. `feat(api): add client profile support` -> done -6. `feat(obs): add route introspection and policy metrics` -> done, and now extended with traces and local worker probing -7. `feat(ext): add optional request hook interfaces` -> done -8. `feat(router): add first multi-dimensional route-fit inputs for cache, context windows, provider limits, and locality` -> done -9. `feat(obs): harden the simple dashboard around traces, provider/client filters, and route visibility` -> done - -## Detailed near-term backlog - -### 1. Optional request hook interfaces - -Why: +## Parity Targets -- this creates the seam for context, memory, and optimization layers without hard-coupling them +The roadmap keeps three parity goals separate. -Examples: +### Full Anthropic parity -- optional memory or context enrichment before routing -- request-shaping hooks for RTK-like CLI optimization -- operator-controlled extension points that can stay disabled by default +Working definition: -### 2. Multi-dimensional routing inputs +- `POST /v1/messages` request and response compatibility +- SSE streaming parity +- content-block compatibility +- header, version, and beta compatibility +- compatible error envelopes and stop reasons +- trustworthy token-count semantics -Why: +### Full Claude Code parity -- routing should understand more than keywords and simple tier preferences +Working definition: -Examples: +- daily coding sessions feel normal against local Gate +- streaming and tool flows work +- aliases and fallback do not constantly disrupt the session +- routing remains inside Gate instead of being pushed into client config -- cache-read vs cache-miss economics -- context window fit -- locality and policy constraints -- latency/health tradeoffs -- provider-specific max context and cache behavior +### Full Claude Desktop parity -### 3. Simple dashboard hardening +Working definition: -Why: +- stable local endpoint configuration where override is supported +- acceptable session behavior for the desktop feature set that actually matters +- no recurring compatibility papercuts that keep the setup feeling experimental -- fusionAIze Gate already exposes a dashboard endpoint, but operators need a clearer read-only control surface +## Release Sequence -Examples: +### `v1.15.x` - operator trust and metadata truth -- route trace table with provider and client filters -- provider health panel with capabilities and contract type -- quick links to dry-run routing and recent failure context +Primary outcome: -### 4. Image generation and editing routing +- Gate becomes more trustworthy as an operator product +- dashboard, shell, and config tell the same story +- cost and catalog signals become reviewable instead of hand-wavy -Why: +Implementation slices: -- multi-modal routing is a natural next expansion for a gateway plane +1. cost truth and catalog freshness + - explicit tracked / stale / untracked state + - stronger provider pricing provenance + - refresh visibility in dashboard and shell +2. route and lane explainability + - why this lane + - why this route + - same-lane fallback vs downgrade + - clearer lane-family summaries +3. command bar intelligence and shell parity + - shell-backed scope suggestions + - parity between dashboard pivots and CLI/YAML terms + - safe preview/diff/apply config actions +4. shared metadata-source foundation + - fusionAIze-internal JSON metadata boundary + - reusable across Gate and future fusionAIze products only -Examples: +Success bar: -- image-generation-capable provider contracts -- image-editing-capable provider contracts -- explicit modality routing so chat, image generation, and image editing stay understandable +- operators can trust the dashboard without treating it as a decorative shell +- cost and freshness signals are explainable +- route choice is easier to reason about from UI, CLI, and config -### 5. Provider and client onboarding helpers +### `v1.16.x` - adaptive routing trust -Why: +Primary outcome: -- many-provider, many-client deployments need a clearer adoption path than manual config editing alone +- richer live routing behavior without turning Gate into a black box -Examples: +Implementation slices: -- bootstrap helpers for provider credentials and base URLs -- starter profiles for OpenClaw, n8n, CLI, and future AI-native applications -- preflight config validation before a rollout or restart +1. route pressure and cooldown visibility +2. same-lane-first adaptation before weaker downgrade paths +3. clearer route maps and trace-level route narratives +4. more explicit premium drift, fallback pressure, and quota coupling signals -### 6. Update alerts and optional automatic update enablers +Success bar: -Why: +- adaptation under pressure is visible and mostly unsurprising +- operators can explain route changes after the fact without reading source code -- operators need a safer path than only ad hoc manual updates +### Later `v1.x` line - Claude Desktop parity if demand justifies it -Current baseline: +This should be validated by real operator demand, not assumed. -- cached release checks via `GET /api/update` -- dashboard visibility for current vs latest known release -- local helper access via `faigate-update-check` -- opt-in eligibility reporting and helper-driven apply flow via `faigate-auto-update` +If the client demand is real, the next parity-focused slices should cover: -This should remain opt-in and operationally conservative as it expands toward scheduled helper use, stronger rollout controls, clearer operator approval boundaries, and small rollout-ring/channel distinctions. +1. supported endpoint override flows +2. desktop-specific compatibility hardening +3. clearer troubleshooting and real local workflow validation -### 7. Distribution channels +## Shared Metadata Repository Direction -Why: +The provider metadata line should be designed from the start as a reusable +fusionAIze capability, not a Gate-only sidecar. -- the project should become easier to adopt without coupling packaging strategy to one runtime +Scope guardrail: -Examples: +- this shared metadata line is for fusionAIze products only +- it is not intended to become a generic shared metadata service for unrelated repositories -- GitHub Releases as the default channel now -- Docker images and PyPI packages by `v0.5.0` -- a separate npm or TypeScript CLI package by `v1.0.0`, not a Node rewrite of the core gateway +### Working shape -### 8. Security review as a release gate +- versioned JSON documents, not a mandatory hosted database +- static-hostable and cacheable +- reviewable in Git +- publishable on a fixed cadence by automation +- consumable locally without requiring fusionAIze-operated hosting -Why: +### What it should eventually serve -- `v1.0.0` needs a credible stability and security bar, not just a larger feature list +- Gate +- Grid +- Lens +- Fabric +- future fusionAIze operator products that need provider, model, offer, or pricing truth -Examples: +### What belongs in that source -- dashboard rendering review for XSS and HTML or CSS injection paths -- request routing review for injection, header abuse, and unsafe forwarding behavior -- dependency and configuration review for known vulnerabilities and insecure defaults -- documentation review so security expectations and deployment assumptions are explicit +- provider identity and aliases +- model and offer identifiers +- modality and capability metadata +- pricing metadata +- provenance metadata +- freshness metadata +- operator-reviewed overrides -## Documentation direction +### Provenance requirements -fusionAIze Gate should be understandable from the outside in under a few minutes. +Every meaningful cost or offer field should be able to answer: -That means keeping these docs current: +- where did this value come from? +- when was it last refreshed? +- what kind of source is it? +- is it tracked, stale, or untracked? -- README for the landing page -- architecture for technical orientation -- integrations for OpenClaw, n8n, CLI, and future clients -- onboarding for many-provider and many-client adoption -- troubleshooting for operators -- process docs for contributors +Example source types: -## Review cadence +- `provider-docs` +- `aggregator-offer` +- `manual-review` +- `observed-usage` -Every 4 or 5 merged PRs, run a broader review pass: +### Delivery model -- review unit tests -- review integration tests -- review functional coverage against real workflows -- update every relevant doc -- refresh the roadmap and process docs if the direction changed +Recommended first delivery model: -This is necessary because fusionAIze Gate is evolving quickly and the docs can drift even when individual PRs are clean. +1. dedicated versioned metadata repo +2. JSON snapshots published from that repo +3. scheduled refresh job outside Gate +4. Gate-side refresh/update mechanism tied to restart and normal update flow -## Provider discovery and recommendation links +This keeps the truth source inspectable and shared, while avoiding a premature +hosted control-plane dependency. -fusionAIze Gate should be able to help operators and end users discover suitable providers, but it should not turn recommendation output into a monetized marketplace. +## Immediate Near-Term Order -That means the future recommendation-link line should stay deliberately staged: +1. cost truth and catalog freshness +2. route and lane explainability +3. command bar intelligence and shell/config parity -### First slices that make sense soon +This order matters. -- add optional provider-catalog fields for signup URLs, disclosure labels, and source ownership -- surface those links in CLI or later browser-based control-center output only when they are available and disclosed -- allow operator-managed secret or env-backed provider-link overrides rather than baking them into normal client-visible config +First make the truth source believable. Then make route choice legible. Then +add smarter operator controls on top of a clearer model. -### Later slices that make sense after that +## Anti-Goals -- optional managed short-link or landing-page wrappers -- richer provider discovery views in a small browser control center -- trust/performance signals derived from historical provider behavior, so recommendations can explain quality and reliability more concretely +- no second routing runtime just for Anthropic traffic +- no opaque “smart routing” layer that cannot explain itself +- no hosted-only metadata dependency for basic local use +- no control-plane sprawl before operator trust is earned +- no product claims that outrun live workflow validation -The non-negotiable rule is simple: recommendation quality must stay fully independent from provider-link metadata, and signup links may only follow from a recommendation rather than shaping it. +## Review Rule -## Assumptions +After every 4 or 5 merged PRs: -- OpenAI-compatible HTTP remains the default interoperability surface in the near term -- OpenClaw, n8n, and CLI tools should keep sharing one gateway unless a client truly requires a dedicated adapter -- modality expansion should stay contract-driven instead of adding ad hoc special cases -- context, memory, and optimization remain optional layers around the gateway core ->>>>>>> b0b5a2e (feat: refine routing defaults and operator dashboard) +- review unit and integration coverage +- review real operator workflows +- refresh docs across README, roadmap, architecture, integrations, onboarding, and troubleshooting +- check whether current release priorities still match the product direction diff --git a/docs/FUSIONAIZE-SHARED-METADATA.md b/docs/FUSIONAIZE-SHARED-METADATA.md new file mode 100644 index 0000000..ee41498 --- /dev/null +++ b/docs/FUSIONAIZE-SHARED-METADATA.md @@ -0,0 +1,178 @@ +# fusionAIze Shared Metadata + +## Purpose + +This document defines the intended shape of the shared metadata line for +fusionAIze products. + +It is deliberately **not** a generic cross-repo metadata platform. + +Scope: + +- `fusionAIze Gate` +- future fusionAIze products such as `Grid`, `Lens`, and `Fabric` + +Out of scope: + +- unrelated repositories +- general-purpose metadata hosting +- a mandatory hosted control plane + +## Why this exists + +Gate already needs stronger truth around: + +- provider identity +- model and offer aliases +- modality and capability metadata +- pricing provenance +- freshness and review state + +Those same concerns can later matter for other fusionAIze products. The shared +metadata line should therefore be designed once as a reusable fusionAIze +capability instead of re-invented inside each product. + +## Working model + +Recommended first model: + +1. a dedicated Git repo for fusionAIze product metadata +2. versioned JSON documents +3. static-hostable snapshots +4. optional scheduled refresh jobs outside product runtimes +5. product-side refresh or import hooks + +This gives us: + +- reviewability in Git +- easy local mirroring +- no database requirement +- no forced fusionAIze-operated hosting + +## Proposed repo shape + +```text +fusionaize-metadata/ + README.md + schemas/ + provider-catalog.v1.schema.json + providers/ + catalog.v1.json + sources.v1.json + snapshots/ + providers/ + 2026-03-31T18-00-00Z.catalog.v1.json + products/ + gate/ + overlays.v1.json + grid/ + overlays.v1.json + lens/ + overlays.v1.json + fabric/ + overlays.v1.json +``` + +## Provider catalog snapshot shape + +Recommended top-level form: + +```json +{ + "schema_version": "fusionaize-provider-catalog/v1", + "generated_at": "2026-03-31T18:00:00Z", + "source_repo": "fusionaize-metadata", + "providers": { + "deepseek-chat": { + "recommended_model": "deepseek-chat", + "aliases": ["deepseek-chat", "ds-v3"], + "track": "stable", + "offer_track": "direct", + "provider_type": "direct", + "auth_modes": ["api_key"], + "volatility": "low", + "evidence_level": "official", + "official_source_url": "https://api-docs.deepseek.com/", + "signup_url": "https://platform.deepseek.com/", + "watch_sources": [], + "notes": "Balanced DeepSeek direct route", + "last_reviewed": "2026-03-31", + "pricing": { + "source_type": "provider-docs", + "source_url": "https://api-docs.deepseek.com/pricing", + "refreshed_at": "2026-03-31T17:45:00Z", + "freshness_status": "fresh" + } + } + } +} +``` + +## Required metadata principles + +Every price or offer-oriented field should be able to answer: + +- where did this value come from? +- when was it refreshed? +- how fresh is it? +- is it official, mixed, manual, or observed? + +Recommended provenance fields: + +- `source_type` +- `source_url` +- `refreshed_at` +- `freshness_status` + +## Gate integration path + +The first Gate slice should stay intentionally small: + +- Gate keeps its embedded curated catalog as a fallback baseline +- Gate can optionally load an external JSON snapshot +- the external snapshot can add new providers or override embedded fields + +That gives us a clean migration path: + +1. embedded Python catalog only +2. optional JSON snapshot overrides +3. JSON snapshot becomes preferred truth source +4. embedded catalog shrinks to bootstrap fallback only + +## Current Gate hooks + +Gate now supports two operator-side import hooks: + +- `FAIGATE_PROVIDER_METADATA_FILE=/path/to/provider-catalog.snapshot.v1.json` +- `FAIGATE_PROVIDER_METADATA_DIR=/path/to/fusionaize-metadata` + +If `FAIGATE_PROVIDER_METADATA_FILE` is set, Gate loads that JSON snapshot +directly and merges it into the embedded provider catalog. + +If `FAIGATE_PROVIDER_METADATA_DIR` is set, Gate loads: + +- `providers/catalog.v1.json` +- `products/gate/overlays.v1.json` + +and materializes an effective Gate catalog in memory before merging it into the +embedded provider catalog. + +For runtime use, Gate also ships a small helper that materializes a repo +checkout into one snapshot file: + +```bash +./scripts/faigate-provider-metadata-sync \ + --repo /path/to/fusionaize-metadata \ + --product gate +``` + +The output snapshot can then be pointed to with +`FAIGATE_PROVIDER_METADATA_FILE` and refreshed alongside restart or +repo-update flows. + +For the first tracked gaps in Gate, the example `products/gate/overlays.v1.json` +already includes: + +- `anthropic-haiku` +- `anthropic-sonnet` +- `gemini-pro` diff --git a/docs/IMPLEMENTATION-PLAN.md b/docs/IMPLEMENTATION-PLAN.md index 23c11af..5ea9c74 100644 --- a/docs/IMPLEMENTATION-PLAN.md +++ b/docs/IMPLEMENTATION-PLAN.md @@ -175,6 +175,143 @@ Reference: - [Dashboard IA](./DASHBOARD-IA.md) +### Immediate operator-trust slices after dashboard v1 + +These are the next high-signal follow-ups now that the first dashboard surface +exists and exposes the real gaps. + +They should be treated as short operator-trust slices, not as a second broad UI +redesign. + +#### Cluster A - cost truth and catalog freshness + +Observed gap: + +- cost data is not yet trustworthy enough to explain spend posture per provider +- several providers still show as untracked in the catalog layer + +Challenge: + +- provider "price" is not one thing +- direct-provider list pricing, aggregator marketplace pricing, free-tier offers, + and effective billed usage can diverge +- the product should not claim false precision where only a stale public price + table exists + +Recommendation: + +- introduce a versioned provider-metadata source of truth that can live as JSON + in a repo rather than as a hosted database +- design that metadata source as a shared fusionAIze boundary from day one so + Gate is only the first consumer, not the only one +- keep that scope explicitly limited to fusionAIze products rather than turning + it into a generic cross-repo metadata service +- keep cost provenance explicit per field: + - source price + - source timestamp + - freshness status + - source type (`provider-docs`, `aggregator-offer`, `manual-review`, + `observed-usage`) +- add a small refresh job outside Gate that updates that metadata on a fixed + rhythm +- pull that metadata into Gate through a conservative update path tied to normal + catalog refresh and restart flows + +Working shape: + +- versioned JSON documents +- statically hostable if desired, but not dependent on a dedicated hosted + database +- reusable later for `fusionAIze Grid`, `Lens`, `Fabric`, and similar products +- not intended for unrelated repositories outside the fusionAIze product line +- reviewable in Git with clear operator override paths + +Immediate slices: + +1. catalog schema for price provenance and freshness +2. tracked assumptions for `anthropic-haiku`, `anthropic-sonnet`, and + `gemini-pro` +3. dashboard surfacing for `tracked`, `stale`, `untracked`, and `source age` +4. post-update metadata refresh hook tied to Gate's normal update cadence + +#### Cluster B - route and lane explainability + +Observed gap: + +- operators can see routes and lanes, but not yet in a way that feels obvious + at a glance + +Challenge: + +- a graphic by itself will not fix this if the underlying route explanation is + still too implicit +- visual route maps should follow clearer route-decision semantics, not replace + them + +Recommendation: + +- make route choice legible in layers: + - requested intent + - chosen lane + - chosen execution route + - same-lane fallback candidates + - downgrade path if fallback crossed clusters +- then add a light visual route map once the textual explanation is already + operator-trustworthy + +Immediate slices: + +1. "why this lane / why this route" drilldown in Routes and Request Log +2. explicit same-lane fallback vs downgrade markers +3. lane-family summary cards in Overview and Routes +4. lightweight visual route map once route trace semantics are stable + +#### Cluster C - intelligent command bar and shell parity + +Observed gap: + +- the command bar filters well enough, but it is not yet intelligent +- the dashboard does not yet move in lockstep with shell capabilities and + config workflows + +Challenge: + +- "intelligent" should not become another black box +- if the dashboard can suggest scopes or edits, the same logic must stay + inspectable and reproducible from CLI and YAML + +Recommendation: + +- keep the command bar operator-first: + - saved scopes + - recommended pivots + - next useful drilldowns +- build shell and dashboard against the same capability layer rather than + inventing separate UX-only semantics +- add config actions only through safe preview/diff/apply flows, not direct + opaque mutation + +Immediate slices: + +1. shell-backed scope suggestions (`high spend`, `fallback active`, + `premium drift`, `untracked catalog`) +2. deep links from dashboard panels to equivalent shell or API views +3. dashboard config actions with preview, diff, backup, and explicit apply +4. parity review so dashboard filters, shell helpers, and YAML names stay + aligned + +#### Recommended near-term order + +1. cost truth and tracked-catalog freshness +2. route and lane explainability +3. command bar intelligence and shell/config parity + +This order keeps the next product gains grounded in trust: + +- first make the cost and catalog story believable +- then make route choice more legible +- then add smarter operator controls on top of that clearer model + ### `v1.16.x` - adaptive orchestration trust Primary outcome: diff --git a/docs/examples/fusionaize-metadata-repo/README.md b/docs/examples/fusionaize-metadata-repo/README.md new file mode 100644 index 0000000..2eb455b --- /dev/null +++ b/docs/examples/fusionaize-metadata-repo/README.md @@ -0,0 +1,49 @@ +# fusionAIze Metadata Repo Skeleton + +This directory is a starter skeleton for a future dedicated +`fusionaize-metadata` repository. + +It is intentionally scoped to fusionAIze products only: + +- Gate +- Grid +- Lens +- Fabric + +It is not intended as a shared metadata platform for unrelated repositories. + +## Layout + +```text +fusionaize-metadata/ + README.md + schemas/ + provider-catalog.v1.schema.json + providers/ + catalog.v1.json + sources.v1.json + products/ + gate/ + overlays.v1.json +``` + +## Gate integration + +Gate supports two input modes: + +1. direct snapshot file + - `FAIGATE_PROVIDER_METADATA_FILE=/path/to/provider-catalog.snapshot.v1.json` +2. metadata repo checkout with product overlay + - `FAIGATE_PROVIDER_METADATA_DIR=/path/to/fusionaize-metadata` + - optional `FAIGATE_PROVIDER_METADATA_PRODUCT=gate` + +To materialize a snapshot from a repo checkout for runtime use: + +```bash +./scripts/faigate-provider-metadata-sync \ + --repo /path/to/fusionaize-metadata \ + --product gate + +Restart and managed update flows can call the same helper automatically when +`FAIGATE_PROVIDER_METADATA_DIR` is set in the runtime environment. +``` diff --git a/docs/examples/fusionaize-metadata-repo/products/gate/overlays.v1.json b/docs/examples/fusionaize-metadata-repo/products/gate/overlays.v1.json new file mode 100644 index 0000000..6bc6547 --- /dev/null +++ b/docs/examples/fusionaize-metadata-repo/products/gate/overlays.v1.json @@ -0,0 +1,80 @@ +{ + "schema_version": "fusionaize-provider-overlays/v1", + "product": "gate", + "providers": { + "anthropic-haiku": { + "recommended_model": "claude-3-5-haiku-latest", + "aliases": ["claude-3-5-haiku-latest", "anthropic:haiku"], + "track": "stable", + "offer_track": "direct", + "provider_type": "direct", + "auth_modes": ["api_key"], + "volatility": "low", + "evidence_level": "official", + "official_source_url": "https://docs.anthropic.com/en/docs/about-claude/models", + "signup_url": "https://console.anthropic.com/", + "watch_sources": [], + "notes": "Gate keeps a direct Anthropic Haiku route tracked here.", + "last_reviewed": "2026-03-31", + "pricing": { + "source_type": "provider-docs", + "source_url": "https://www.anthropic.com/pricing#api", + "refreshed_at": "2026-03-31T18:15:00Z", + "freshness_status": "fresh", + "input_cost_per_1m": 0.8, + "output_cost_per_1m": 4.0 + } + }, + "anthropic-sonnet": { + "recommended_model": "claude-sonnet-4-6", + "aliases": ["claude-sonnet-4-6", "anthropic:sonnet"], + "track": "stable", + "offer_track": "direct", + "provider_type": "direct", + "auth_modes": ["api_key"], + "volatility": "low", + "evidence_level": "official", + "official_source_url": "https://docs.anthropic.com/en/docs/about-claude/models", + "signup_url": "https://console.anthropic.com/", + "watch_sources": [], + "notes": "Gate keeps a direct Anthropic Sonnet route tracked here.", + "last_reviewed": "2026-03-31", + "pricing": { + "source_type": "provider-docs", + "source_url": "https://www.anthropic.com/pricing#api", + "refreshed_at": "2026-03-31T18:15:00Z", + "freshness_status": "fresh", + "input_cost_per_1m": 3.0, + "output_cost_per_1m": 15.0 + } + }, + "gemini-pro": { + "recommended_model": "gemini-2.5-pro", + "aliases": ["gemini-2.5-pro", "gemini-pro-high", "google:gemini-pro"], + "track": "stable", + "offer_track": "direct", + "provider_type": "direct", + "auth_modes": ["api_key"], + "volatility": "low", + "evidence_level": "official", + "official_source_url": "https://ai.google.dev/gemini-api/docs/models", + "signup_url": "https://aistudio.google.com/", + "watch_sources": [], + "notes": "Gate keeps the Gemini Pro lane tracked here, including the higher-tier provider alias.", + "last_reviewed": "2026-03-31", + "pricing": { + "source_type": "provider-docs", + "source_url": "https://ai.google.dev/gemini-api/docs/pricing", + "refreshed_at": "2026-03-31T18:15:00Z", + "freshness_status": "fresh", + "pricing_notes": "Model pricing can vary by modality and token band; verify context-specific rates during refresh." + } + }, + "deepseek-chat": { + "notes": "Gate overlay can tighten route notes or freshness without rewriting the base catalog.", + "pricing": { + "freshness_status": "fresh" + } + } + } +} diff --git a/docs/examples/fusionaize-metadata-repo/providers/catalog.v1.json b/docs/examples/fusionaize-metadata-repo/providers/catalog.v1.json new file mode 100644 index 0000000..bda073b --- /dev/null +++ b/docs/examples/fusionaize-metadata-repo/providers/catalog.v1.json @@ -0,0 +1,28 @@ +{ + "schema_version": "fusionaize-provider-catalog/v1", + "generated_at": "2026-03-31T18:00:00Z", + "source_repo": "fusionaize-metadata", + "providers": { + "deepseek-chat": { + "recommended_model": "deepseek-chat", + "aliases": ["deepseek-chat", "ds-v3"], + "track": "stable", + "offer_track": "direct", + "provider_type": "direct", + "auth_modes": ["api_key"], + "volatility": "low", + "evidence_level": "official", + "official_source_url": "https://api-docs.deepseek.com/", + "signup_url": "https://platform.deepseek.com/", + "watch_sources": [], + "notes": "Balanced DeepSeek direct route.", + "last_reviewed": "2026-03-31", + "pricing": { + "source_type": "provider-docs", + "source_url": "https://api-docs.deepseek.com/pricing", + "refreshed_at": "2026-03-31T17:45:00Z", + "freshness_status": "fresh" + } + } + } +} diff --git a/docs/examples/fusionaize-metadata-repo/providers/sources.v1.json b/docs/examples/fusionaize-metadata-repo/providers/sources.v1.json new file mode 100644 index 0000000..493e464 --- /dev/null +++ b/docs/examples/fusionaize-metadata-repo/providers/sources.v1.json @@ -0,0 +1,11 @@ +{ + "schema_version": "fusionaize-provider-sources/v1", + "sources": { + "deepseek-chat": { + "owner_product": "gate", + "refresh_interval_seconds": 10800, + "official_models_url": "https://api-docs.deepseek.com/", + "official_pricing_url": "https://api-docs.deepseek.com/pricing" + } + } +} diff --git a/docs/examples/fusionaize-metadata-repo/schemas/provider-catalog.v1.schema.json b/docs/examples/fusionaize-metadata-repo/schemas/provider-catalog.v1.schema.json new file mode 100644 index 0000000..8888add --- /dev/null +++ b/docs/examples/fusionaize-metadata-repo/schemas/provider-catalog.v1.schema.json @@ -0,0 +1,25 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "https://fusionaize.local/schemas/provider-catalog.v1.schema.json", + "title": "fusionAIze Provider Catalog v1", + "type": "object", + "required": ["schema_version", "providers"], + "properties": { + "schema_version": { + "type": "string" + }, + "generated_at": { + "type": "string" + }, + "source_repo": { + "type": "string" + }, + "providers": { + "type": "object", + "additionalProperties": { + "type": "object" + } + } + }, + "additionalProperties": true +} diff --git a/docs/examples/provider-catalog.snapshot.v1.json b/docs/examples/provider-catalog.snapshot.v1.json new file mode 100644 index 0000000..838e2b7 --- /dev/null +++ b/docs/examples/provider-catalog.snapshot.v1.json @@ -0,0 +1,28 @@ +{ + "schema_version": "fusionaize-provider-catalog/v1", + "generated_at": "2026-03-31T18:00:00Z", + "source_repo": "fusionaize-metadata", + "providers": { + "anthropic-haiku": { + "recommended_model": "claude-3-5-haiku-latest", + "aliases": ["claude-3-5-haiku-latest", "anthropic:haiku"], + "track": "stable", + "offer_track": "direct", + "provider_type": "direct", + "auth_modes": ["api_key"], + "volatility": "low", + "evidence_level": "official", + "official_source_url": "https://docs.anthropic.com/en/docs/about-claude/models", + "signup_url": "https://console.anthropic.com/", + "watch_sources": [], + "notes": "Example snapshot entry for external provider metadata import.", + "last_reviewed": "2026-03-31", + "pricing": { + "source_type": "provider-docs", + "source_url": "https://docs.anthropic.com/en/docs/about-claude/models", + "refreshed_at": "2026-03-31T17:55:00Z", + "freshness_status": "fresh" + } + } + } +} diff --git a/faigate/provider_catalog.py b/faigate/provider_catalog.py index fbf09b2..7030905 100644 --- a/faigate/provider_catalog.py +++ b/faigate/provider_catalog.py @@ -2,9 +2,11 @@ from __future__ import annotations +import json import os import re from datetime import date +from pathlib import Path from typing import Any from .config import Config @@ -25,6 +27,12 @@ "informational only and do not affect ranking." ) +_EXTERNAL_CATALOG_ENV = "FAIGATE_PROVIDER_METADATA_FILE" +_EXTERNAL_CATALOG_DIR_ENV = "FAIGATE_PROVIDER_METADATA_DIR" +_EXTERNAL_CATALOG_PRODUCT_ENV = "FAIGATE_PROVIDER_METADATA_PRODUCT" +_DEFAULT_METADATA_PRODUCT = "gate" +_METADATA_CATALOG_RELATIVE_PATH = Path("providers") / "catalog.v1.json" + _CATALOG: dict[str, dict[str, Any]] = { "deepseek-chat": { "recommended_model": get_active_model_id("deepseek/chat"), @@ -233,6 +241,120 @@ } +def _normalize_catalog_entry(entry: Any) -> dict[str, Any]: + if not isinstance(entry, dict): + return {} + return {str(key): value for key, value in entry.items()} + + +def _merge_catalog_entry(base: dict[str, Any], overlay: dict[str, Any]) -> dict[str, Any]: + merged = dict(base) + for key, value in overlay.items(): + if isinstance(value, dict) and isinstance(merged.get(key), dict): + merged[key] = _merge_catalog_entry( + _normalize_catalog_entry(merged[key]), + _normalize_catalog_entry(value), + ) + continue + merged[key] = value + return merged + + +def _normalize_catalog_payload(payload: Any) -> dict[str, dict[str, Any]]: + raw_catalog = payload.get("providers") if isinstance(payload, dict) else payload + if not isinstance(raw_catalog, dict): + return {} + + catalog: dict[str, dict[str, Any]] = {} + for provider_name, entry in raw_catalog.items(): + normalized_name = str(provider_name or "").strip() + normalized_entry = _normalize_catalog_entry(entry) + if not normalized_name or not normalized_entry: + continue + catalog[normalized_name] = normalized_entry + return catalog + + +def _load_catalog_payload(path: str | Path) -> dict[str, Any]: + try: + with open(path, encoding="utf-8") as handle: + payload = json.load(handle) + except (OSError, json.JSONDecodeError): + return {} + return payload if isinstance(payload, dict) else {} + + +def build_provider_metadata_snapshot( + metadata_dir: str | Path, + *, + product: str = _DEFAULT_METADATA_PRODUCT, +) -> dict[str, Any]: + root = Path(metadata_dir).expanduser() + catalog_payload = _load_catalog_payload(root / _METADATA_CATALOG_RELATIVE_PATH) + catalog = _normalize_catalog_payload(catalog_payload) + + product_name = str(product or _DEFAULT_METADATA_PRODUCT).strip() or _DEFAULT_METADATA_PRODUCT + overlay_payload = _load_catalog_payload(root / "products" / product_name / "overlays.v1.json") + overlay = _normalize_catalog_payload(overlay_payload) + + merged_catalog = dict(catalog) + for provider_name, entry in overlay.items(): + merged_catalog[provider_name] = _merge_catalog_entry( + merged_catalog.get(provider_name, {}), + entry, + ) + + return { + "schema_version": str( + catalog_payload.get("schema_version") or "fusionaize-provider-catalog/v1" + ), + "generated_at": str(catalog_payload.get("generated_at") or ""), + "source_repo": str(catalog_payload.get("source_repo") or ""), + "product": product_name, + "providers": merged_catalog, + } + + +def materialize_provider_metadata_snapshot( + metadata_dir: str | Path, + output_path: str | Path, + *, + product: str = _DEFAULT_METADATA_PRODUCT, +) -> dict[str, Any]: + snapshot = build_provider_metadata_snapshot(metadata_dir, product=product) + destination = Path(output_path).expanduser() + destination.parent.mkdir(parents=True, exist_ok=True) + destination.write_text( + json.dumps(snapshot, indent=2, sort_keys=True) + "\n", + encoding="utf-8", + ) + return snapshot + + +def _load_external_provider_catalog() -> dict[str, dict[str, Any]]: + metadata_path = str(os.environ.get(_EXTERNAL_CATALOG_ENV, "") or "").strip() + if metadata_path: + payload = _load_catalog_payload(metadata_path) + return _normalize_catalog_payload(payload) + + metadata_dir = str(os.environ.get(_EXTERNAL_CATALOG_DIR_ENV, "") or "").strip() + if not metadata_dir: + return {} + product = str(os.environ.get(_EXTERNAL_CATALOG_PRODUCT_ENV, _DEFAULT_METADATA_PRODUCT) or "") + return _normalize_catalog_payload( + build_provider_metadata_snapshot(metadata_dir, product=product) + ) + + +def _get_catalog_source() -> dict[str, dict[str, Any]]: + catalog = {name: dict(entry) for name, entry in _CATALOG.items()} + for name, entry in _load_external_provider_catalog().items(): + merged = dict(catalog.get(name, {})) + merged.update(entry) + catalog[name] = merged + return catalog + + def _slugify_provider_name(provider_name: str) -> str: return re.sub(r"[^A-Z0-9]+", "_", provider_name.upper()).strip("_") @@ -263,7 +385,7 @@ def _build_discovery_metadata(provider_name: str, catalog_entry: dict[str, Any]) def get_provider_catalog() -> dict[str, dict[str, Any]]: """Return a shallow copy of the curated provider catalog.""" payload: dict[str, dict[str, Any]] = {} - for name, entry in _CATALOG.items(): + for name, entry in _get_catalog_source().items(): item = dict(entry) item["discovery"] = _build_discovery_metadata(name, entry) payload[name] = item @@ -272,7 +394,7 @@ def get_provider_catalog() -> dict[str, dict[str, Any]]: def get_provider_catalog_entry(provider_name: str) -> dict[str, Any]: """Return one curated provider catalog entry with discovery metadata.""" - entry = _CATALOG.get(provider_name) + entry = _get_catalog_source().get(provider_name) if not entry: return {} item = dict(entry) @@ -450,11 +572,11 @@ def build_provider_catalog_report(config: Config) -> dict[str, Any]: for provider_name, provider in sorted(config.providers.items()): model = str(provider.get("model", "") or "").strip() - catalog_entry = _CATALOG.get(provider_name) + catalog_entry = get_provider_catalog_entry(provider_name) item: dict[str, Any] = { "provider": provider_name, "configured_model": model, - "tracked": catalog_entry is not None, + "tracked": bool(catalog_entry), } if not catalog_entry: diff --git a/scripts/faigate-provider-metadata-sync b/scripts/faigate-provider-metadata-sync new file mode 100755 index 0000000..545010f --- /dev/null +++ b/scripts/faigate-provider-metadata-sync @@ -0,0 +1,93 @@ +#!/usr/bin/env bash +set -euo pipefail + +source "$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)/faigate-service-lib.sh" + +python_bin="$(faigate_python_bin)" +repo_dir="${FAIGATE_PROVIDER_METADATA_DIR:-}" +product="gate" +output_path="" + +usage() { + cat <<'EOF' +Usage: + ./scripts/faigate-provider-metadata-sync --repo DIR [--product gate] [--out FILE] + +Materialize an effective provider catalog snapshot from a fusionAIze metadata +repo checkout plus product overlays. The output file can be consumed by Gate +through FAIGATE_PROVIDER_METADATA_FILE. +EOF +} + +while [[ $# -gt 0 ]]; do + case "$1" in + --repo) + repo_dir="${2:-}" + shift 2 + ;; + --product) + product="${2:-}" + shift 2 + ;; + --out) + output_path="${2:-}" + shift 2 + ;; + --help|-h) + usage + exit 0 + ;; + *) + usage >&2 + exit 2 + ;; + esac +done + +if [[ -z "$repo_dir" ]]; then + printf 'error: metadata repo path required (--repo or FAIGATE_PROVIDER_METADATA_DIR)\n' >&2 + exit 2 +fi + +if [[ -z "$output_path" ]]; then + db_path="$(faigate_db_path)" + output_path="$(dirname "$db_path")/provider-catalog.snapshot.v1.json" +fi + +FAIGATE_PROVIDER_METADATA_SYNC_REPO="$repo_dir" \ +FAIGATE_PROVIDER_METADATA_SYNC_PRODUCT="$product" \ +FAIGATE_PROVIDER_METADATA_SYNC_OUTPUT="$output_path" \ +"$python_bin" - <<'PY' +import json +import os + +from faigate.provider_catalog import materialize_provider_metadata_snapshot + +repo = os.environ["FAIGATE_PROVIDER_METADATA_SYNC_REPO"] +product = os.environ["FAIGATE_PROVIDER_METADATA_SYNC_PRODUCT"] +output = os.environ["FAIGATE_PROVIDER_METADATA_SYNC_OUTPUT"] + +snapshot = materialize_provider_metadata_snapshot(repo, output, product=product) +providers = snapshot.get("providers") or {} + +print("fusionAIze Gate Provider Metadata Sync") +print() +print(f"Repo: {repo}") +print(f"Product: {product}") +print(f"Output: {output}") +print(f"Providers: {len(providers)}") +print() +print( + json.dumps( + { + "schema_version": snapshot.get("schema_version"), + "generated_at": snapshot.get("generated_at"), + "source_repo": snapshot.get("source_repo"), + "product": snapshot.get("product"), + "providers": sorted(providers.keys()), + }, + indent=2, + sort_keys=True, + ) +) +PY diff --git a/scripts/faigate-restart b/scripts/faigate-restart index e38049d..ed80f60 100755 --- a/scripts/faigate-restart +++ b/scripts/faigate-restart @@ -44,6 +44,12 @@ faigate_ui_header "fusionAIze Gate Restart" "Restarting through $(faigate_servic faigate_ui_info "Target: $(faigate_service_target)" faigate_ui_info "Bind: $(faigate_host):$(faigate_port)" +if metadata_dir="$(faigate_provider_metadata_dir)" && [ -n "$metadata_dir" ]; then + faigate_ui_info "Syncing provider metadata from: ${metadata_dir}" + faigate_sync_provider_metadata_if_configured + faigate_ui_success "Provider metadata snapshot refreshed." +fi + case "$(faigate_platform)" in Darwin) faigate_launchctl_start diff --git a/scripts/faigate-service-lib.sh b/scripts/faigate-service-lib.sh index 523ca27..860657c 100644 --- a/scripts/faigate-service-lib.sh +++ b/scripts/faigate-service-lib.sh @@ -144,6 +144,64 @@ faigate_env_value() { ' "$env_file" | tail -n 1 } +faigate_provider_metadata_dir() { + if [ -n "${FAIGATE_PROVIDER_METADATA_DIR:-}" ]; then + printf '%s\n' "$FAIGATE_PROVIDER_METADATA_DIR" + return 0 + fi + local env_dir + env_dir="$(faigate_env_value FAIGATE_PROVIDER_METADATA_DIR 2>/dev/null || true)" + if [ -n "$env_dir" ]; then + printf '%s\n' "$env_dir" + fi +} + +faigate_provider_metadata_product() { + if [ -n "${FAIGATE_PROVIDER_METADATA_PRODUCT:-}" ]; then + printf '%s\n' "$FAIGATE_PROVIDER_METADATA_PRODUCT" + return 0 + fi + local env_product + env_product="$(faigate_env_value FAIGATE_PROVIDER_METADATA_PRODUCT 2>/dev/null || true)" + if [ -n "$env_product" ]; then + printf '%s\n' "$env_product" + else + printf '%s\n' "gate" + fi +} + +faigate_provider_metadata_snapshot_path() { + if [ -n "${FAIGATE_PROVIDER_METADATA_FILE:-}" ]; then + printf '%s\n' "$FAIGATE_PROVIDER_METADATA_FILE" + return 0 + fi + local env_file + env_file="$(faigate_env_value FAIGATE_PROVIDER_METADATA_FILE 2>/dev/null || true)" + if [ -n "$env_file" ]; then + printf '%s\n' "$env_file" + return 0 + fi + local db_path + db_path="$(faigate_db_path)" + printf '%s/provider-catalog.snapshot.v1.json\n' "$(dirname "$db_path")" +} + +faigate_sync_provider_metadata_if_configured() { + local metadata_dir metadata_product snapshot_path repo_root + metadata_dir="$(faigate_provider_metadata_dir)" + if [ -z "$metadata_dir" ]; then + return 0 + fi + metadata_product="$(faigate_provider_metadata_product)" + snapshot_path="$(faigate_provider_metadata_snapshot_path)" + repo_root="$(faigate_repo_root)" + FAIGATE_PROVIDER_METADATA_DIR="$metadata_dir" \ + "$repo_root/scripts/faigate-provider-metadata-sync" \ + --repo "$metadata_dir" \ + --product "$metadata_product" \ + --out "$snapshot_path" +} + faigate_yaml_value() { local dotted_key="$1" local default="${2:-}" diff --git a/scripts/faigate-update b/scripts/faigate-update index 8984320..c773749 100755 --- a/scripts/faigate-update +++ b/scripts/faigate-update @@ -1,6 +1,9 @@ #!/usr/bin/env bash set -euo pipefail +script_dir="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +source "${script_dir}/faigate-service-lib.sh" + # update code from repo + restart service # # Git operations run as the faigate service user (owner of /opt/faigate). @@ -59,6 +62,15 @@ if [ -f "$FAIGATE_DIR/venv/bin/pip" ]; then fi fi +metadata_dir="$(faigate_provider_metadata_dir)" +if [ -n "$metadata_dir" ]; then + echo "syncing provider metadata from ${metadata_dir}" + "$FAIGATE_DIR/scripts/faigate-provider-metadata-sync" \ + --repo "$metadata_dir" \ + --product "$(faigate_provider_metadata_product)" \ + --out "$(faigate_provider_metadata_snapshot_path)" +fi + # ── systemd ─────────────────────────────────────────────────────────────────── sudo install -m 644 "$FAIGATE_DIR/faigate.service" /etc/systemd/system/faigate.service sudo systemctl daemon-reload diff --git a/tests/test_provider_catalog.py b/tests/test_provider_catalog.py index 525b699..6344d73 100644 --- a/tests/test_provider_catalog.py +++ b/tests/test_provider_catalog.py @@ -6,7 +6,10 @@ from faigate.provider_catalog import ( build_provider_catalog_report, build_provider_discovery_view, + build_provider_metadata_snapshot, build_provider_refresh_guidance, + get_provider_catalog_entry, + materialize_provider_metadata_snapshot, ) @@ -299,3 +302,196 @@ def test_build_provider_refresh_guidance_prefers_stale_entries(): assert guidance[0]["action"] == "refresh-now" assert guidance[0]["refresh_url"].startswith("https://") assert guidance[1]["action"] == "review-soon" + + +def test_provider_catalog_report_can_track_provider_from_external_snapshot( + tmp_path: Path, monkeypatch +): + snapshot = tmp_path / "provider-catalog.json" + snapshot.write_text( + """ +{ + "schema_version": "fusionaize-provider-catalog/v1", + "providers": { + "anthropic-haiku": { + "recommended_model": "claude-3-5-haiku-latest", + "aliases": ["claude-3-5-haiku-latest", "anthropic:haiku"], + "track": "stable", + "offer_track": "direct", + "provider_type": "direct", + "auth_modes": ["api_key"], + "volatility": "low", + "evidence_level": "official", + "official_source_url": "https://docs.anthropic.com/en/docs/about-claude/models", + "signup_url": "https://console.anthropic.com/", + "watch_sources": [], + "notes": "External snapshot entry", + "last_reviewed": "2026-03-31" + } + } +} +""", + encoding="utf-8", + ) + monkeypatch.setenv("FAIGATE_PROVIDER_METADATA_FILE", str(snapshot)) + + cfg = load_config( + _write_config( + tmp_path, + """ +server: + host: "127.0.0.1" + port: 8090 +providers: + anthropic-haiku: + backend: openai-compat + base_url: "https://api.anthropic.com/v1" + api_key: "secret" + model: "claude-3-5-haiku-latest" +fallback_chain: [] +metrics: + enabled: false +""", + ) + ) + + report = build_provider_catalog_report(cfg) + + assert report["tracked_providers"] == 1 + assert report["alert_count"] == 0 + assert report["items"][0]["provider"] == "anthropic-haiku" + assert report["items"][0]["tracked"] is True + assert report["items"][0]["recommended_model"] == "claude-3-5-haiku-latest" + + +def test_provider_catalog_external_snapshot_can_override_embedded_entry( + tmp_path: Path, monkeypatch +): + snapshot = tmp_path / "provider-catalog.json" + snapshot.write_text( + """ +{ + "schema_version": "fusionaize-provider-catalog/v1", + "providers": { + "deepseek-chat": { + "notes": "External override note", + "last_reviewed": "2026-03-31" + } + } +} +""", + encoding="utf-8", + ) + monkeypatch.setenv("FAIGATE_PROVIDER_METADATA_FILE", str(snapshot)) + + entry = get_provider_catalog_entry("deepseek-chat") + + assert entry["notes"] == "External override note" + assert entry["last_reviewed"] == "2026-03-31" + + +def test_provider_catalog_can_load_repo_catalog_with_gate_overlay(tmp_path: Path, monkeypatch): + repo_dir = tmp_path / "fusionaize-metadata" + (repo_dir / "providers").mkdir(parents=True) + (repo_dir / "products" / "gate").mkdir(parents=True) + (repo_dir / "providers" / "catalog.v1.json").write_text( + """ +{ + "schema_version": "fusionaize-provider-catalog/v1", + "providers": { + "deepseek-chat": { + "notes": "Base note", + "pricing": { + "source_type": "provider-docs", + "source_url": "https://example.test/pricing" + } + } + } +} +""", + encoding="utf-8", + ) + (repo_dir / "products" / "gate" / "overlays.v1.json").write_text( + """ +{ + "schema_version": "fusionaize-provider-overlays/v1", + "providers": { + "deepseek-chat": { + "notes": "Gate note", + "pricing": { + "freshness_status": "fresh" + } + }, + "anthropic-haiku": { + "recommended_model": "claude-3-5-haiku-latest", + "aliases": ["anthropic:haiku"], + "track": "stable", + "offer_track": "direct", + "provider_type": "direct", + "auth_modes": ["api_key"], + "volatility": "low", + "evidence_level": "official", + "official_source_url": "https://docs.anthropic.com/en/docs/about-claude/models", + "signup_url": "https://console.anthropic.com/", + "watch_sources": [], + "notes": "Added by Gate overlay", + "last_reviewed": "2026-03-31" + } + } +} +""", + encoding="utf-8", + ) + monkeypatch.delenv("FAIGATE_PROVIDER_METADATA_FILE", raising=False) + monkeypatch.setenv("FAIGATE_PROVIDER_METADATA_DIR", str(repo_dir)) + + entry = get_provider_catalog_entry("deepseek-chat") + added = get_provider_catalog_entry("anthropic-haiku") + + assert entry["notes"] == "Gate note" + assert entry["pricing"]["source_type"] == "provider-docs" + assert entry["pricing"]["freshness_status"] == "fresh" + assert added["notes"] == "Added by Gate overlay" + + +def test_materialize_provider_metadata_snapshot_writes_effective_catalog(tmp_path: Path): + repo_dir = tmp_path / "fusionaize-metadata" + output_path = tmp_path / "state" / "provider-catalog.snapshot.v1.json" + (repo_dir / "providers").mkdir(parents=True) + (repo_dir / "products" / "gate").mkdir(parents=True) + (repo_dir / "providers" / "catalog.v1.json").write_text( + """ +{ + "schema_version": "fusionaize-provider-catalog/v1", + "generated_at": "2026-03-31T18:00:00Z", + "source_repo": "fusionaize-metadata", + "providers": { + "deepseek-chat": { + "notes": "Base note" + } + } +} +""", + encoding="utf-8", + ) + (repo_dir / "products" / "gate" / "overlays.v1.json").write_text( + """ +{ + "schema_version": "fusionaize-provider-overlays/v1", + "providers": { + "deepseek-chat": { + "notes": "Gate note" + } + } +} +""", + encoding="utf-8", + ) + + snapshot = build_provider_metadata_snapshot(repo_dir) + written = materialize_provider_metadata_snapshot(repo_dir, output_path) + + assert snapshot["providers"]["deepseek-chat"]["notes"] == "Gate note" + assert written["providers"]["deepseek-chat"]["notes"] == "Gate note" + assert output_path.exists() is True + assert "Gate note" in output_path.read_text(encoding="utf-8")