diff --git a/.cliff.toml b/.cliff.toml new file mode 100644 index 0000000..8cf29ec --- /dev/null +++ b/.cliff.toml @@ -0,0 +1,79 @@ +# git-cliff configuration for fusionAIze Gate +# https://git-cliff.org/docs/configuration + +[changelog] +# changelog header +header = "# Changelog\n" +# template for the changelog body +body = """ +{% if version %} + ## v{{ version }} - {{ timestamp | date(format="%Y-%m-%d") }} +{% else %} + ## Unreleased +{% endif %} + +{% for group, commits in commits | group_by(attribute="group") %} + ### {{ group | upper_first }} + {% for commit in commits %} + - {{ commit.message | upper_first }}\ + {% if commit.scope %} *({{commit.scope}})*{% endif %} + {% endfor %} +{% endfor %} +""" +# template for a single commit in the changelog body +trim = true +# post-processing hooks +postprocessors = [ + # Remove trailing whitespace + { pattern = "\n{3,}", replace = "\n\n" }, + { pattern = r"\r", replace = "" }, +] +# sort the commits inside sections +sort_commits = "newest" + +[git] +# parse the commits based on https://www.conventionalcommits.org +conventional_commits = true +# filter out the commits that are not conventional +filter_unconventional = true +# process each line of a commit as an individual commit +split_commits = false +# regex for parsing the commit references +commit_parsers = [ + { message = "^feat", group = "Added" }, + { message = "^fix", group = "Fixed" }, + { message = "^docs", group = "Documentation" }, + { message = "^style", group = "Style" }, + { message = "^refactor", group = "Refactored" }, + { message = "^perf", group = "Performance" }, + { message = "^test", group = "Tests" }, + { message = "^build", group = "Build" }, + { message = "^ci", group = "CI" }, + { message = "^chore", group = "Chore" }, + { message = "^revert", group = "Reverted" }, +] +# protect breaking changes from being skipped due to matching a commit_parser +protect_breaking_commits = true +# filter out the commits that are not matched by commit parsers +filter_commits = true +# regex for matching and skipping commits +ignore_commits = [ + "^Merge", + "^Revert", + "^Release", +] + +[bump] +# bump mappings for version increment based on commit types +mapping = [ + { break = "major" }, + { type = "feat", bump = "minor" }, + { type = "fix", bump = "patch" }, + { type = "perf", bump = "patch" }, + { type = "refactor", bump = "patch" }, + { type = "docs", bump = "patch" }, +] + +[tag] +# regex for matching and parsing the version from a tag +pattern = "^v([0-9]+\\.[0-9]+\\.[0-9]+)$" \ No newline at end of file diff --git a/.codenomad/background_processes/mnh8wrrb/index.json b/.codenomad/background_processes/mnh8wrrb/index.json new file mode 100644 index 0000000..35e8bfe --- /dev/null +++ b/.codenomad/background_processes/mnh8wrrb/index.json @@ -0,0 +1,79 @@ +[ + { + "id": "proc_2026-04-02T0911_f38886", + "workspaceId": "mnh8wrrb", + "title": "faigate server on port 8092", + "command": "cd /Users/andrelange/Documents/repositories/github/faigate && python3 -m faigate", + "cwd": "/Users/andrelange/Documents/repositories/github/faigate", + "status": "stopped", + "pid": 65532, + "startedAt": "2026-04-02T09:11:51.759Z", + "outputSizeBytes": 1572, + "stoppedAt": "2026-04-02T09:23:03.877Z" + }, + { + "id": "proc_2026-04-02T0919_daa525", + "workspaceId": "mnh8wrrb", + "title": "faigate server with metadata", + "command": "cd /Users/andrelange/Documents/repositories/github/faigate && FAIGATE_PROVIDER_METADATA_DIR=/Users/andrelange/Documents/repositories/github/faigate/docs/examples/fusionaize-metadata-repo python3 -m faigate", + "cwd": "/Users/andrelange/Documents/repositories/github/faigate", + "status": "error", + "pid": 73920, + "startedAt": "2026-04-02T09:19:22.138Z", + "outputSizeBytes": 2134, + "exitCode": 3, + "stoppedAt": "2026-04-02T09:19:23.453Z" + }, + { + "id": "proc_2026-04-02T0921_e1a02b", + "workspaceId": "mnh8wrrb", + "title": "faigate server with metadata on port 8092", + "command": "cd /Users/andrelange/Documents/repositories/github/faigate && FAIGATE_PROVIDER_METADATA_DIR=/Users/andrelange/Documents/repositories/github/faigate/docs/examples/fusionaize-metadata-repo python3 -m faigate", + "cwd": "/Users/andrelange/Documents/repositories/github/faigate", + "status": "error", + "pid": 80702, + "startedAt": "2026-04-02T09:21:57.695Z", + "outputSizeBytes": 1662, + "exitCode": 1, + "stoppedAt": "2026-04-02T09:21:58.598Z" + }, + { + "id": "proc_2026-04-02T0922_6b1c2e", + "workspaceId": "mnh8wrrb", + "title": "faigate server port 8092", + "command": "cd /Users/andrelange/Documents/repositories/github/faigate && FAIGATE_PROVIDER_METADATA_DIR=/Users/andrelange/Documents/repositories/github/faigate/docs/examples/fusionaize-metadata-repo python3 -m faigate", + "cwd": "/Users/andrelange/Documents/repositories/github/faigate", + "status": "error", + "pid": 81311, + "startedAt": "2026-04-02T09:22:08.700Z", + "outputSizeBytes": 1662, + "exitCode": 1, + "stoppedAt": "2026-04-02T09:22:09.483Z" + }, + { + "id": "proc_2026-04-02T0923_af21c4", + "workspaceId": "mnh8wrrb", + "title": "faigate server with metadata", + "command": "cd /Users/andrelange/Documents/repositories/github/faigate && FAIGATE_PROVIDER_METADATA_DIR=/Users/andrelange/Documents/repositories/github/faigate/docs/examples/fusionaize-metadata-repo python3 -m faigate", + "cwd": "/Users/andrelange/Documents/repositories/github/faigate", + "status": "error", + "pid": 85481, + "startedAt": "2026-04-02T09:23:17.951Z", + "outputSizeBytes": 1662, + "exitCode": 1, + "stoppedAt": "2026-04-02T09:23:18.719Z" + }, + { + "id": "proc_2026-04-02T0926_ddc210", + "workspaceId": "mnh8wrrb", + "title": "faigate server with merged external metadata", + "command": "cd /Users/andrelange/Documents/repositories/github/faigate && FAIGATE_PROVIDER_METADATA_DIR=/Users/andrelange/Documents/repositories/github/faigate/docs/examples/fusionaize-metadata-repo python3 -m faigate", + "cwd": "/Users/andrelange/Documents/repositories/github/faigate", + "status": "error", + "pid": 95038, + "startedAt": "2026-04-02T09:26:11.156Z", + "outputSizeBytes": 1662, + "exitCode": 1, + "stoppedAt": "2026-04-02T09:26:12.658Z" + } +] \ No newline at end of file diff --git a/.codenomad/background_processes/mnh8wrrb/proc_2026-04-02T0911_f38886/output.txt b/.codenomad/background_processes/mnh8wrrb/proc_2026-04-02T0911_f38886/output.txt new file mode 100644 index 0000000..2742dd5 --- /dev/null +++ b/.codenomad/background_processes/mnh8wrrb/proc_2026-04-02T0911_f38886/output.txt @@ -0,0 +1,22 @@ +INFO: Started server process [65532] +INFO: Waiting for application startup. +11:11:52 [faigate] INFO Loaded config with 14 providers +11:11:52 [faigate] INFO ✓ anthropic-haiku → claude-haiku-3-5 (default) +11:11:52 [faigate] INFO ✓ anthropic-sonnet → claude-sonnet-4-6 (mid) +11:11:52 [faigate] INFO ✓ anthropic-claude → claude-opus-4-6 (mid) +11:11:52 [faigate] INFO ✓ blackbox-free → blackboxai/x-ai/grok-code-fast-1 (fallback) +11:11:52 [faigate] INFO ✓ deepseek-chat → deepseek-chat (default) +11:11:52 [faigate] INFO ✓ deepseek-reasoner → deepseek-reasoner (reasoning) +11:11:52 [faigate] INFO ✓ gemini-flash → gemini-3-flash (mid) +11:11:52 [faigate] INFO ✓ gemini-flash-lite → gemini-3-flash-lite (cheap) +11:11:52 [faigate] INFO ✓ gemini-pro-high → gemini-3.1-pro (mid) +11:11:52 [faigate] INFO ✓ gemini-pro-low → gemini-3.1-pro (mid) +11:11:52 [faigate] INFO ✓ kilocode → z-ai/glm-5:free (fallback) +11:11:52 [faigate] INFO ✓ openai-gpt4o → gpt-4o (mid) +11:11:52 [faigate] INFO ✓ openai-images → gpt-image-1 (specialty) +11:11:52 [faigate] INFO ✓ openrouter-fallback → openrouter/auto (fallback) +11:11:52 [faigate] WARNING Provider source catalog startup refresh skipped: [Errno 13] Permission denied: '/private/var/lib/faigate' +11:11:52 [faigate] INFO fusionAIze Gate ready on 127.0.0.1:8092 +INFO: Application startup complete. +INFO: Uvicorn running on http://127.0.0.1:8092 (Press CTRL+C to quit) +INFO: 127.0.0.1:58497 - "GET /api/analytics/provider-mix HTTP/1.1" 200 OK diff --git a/.codenomad/background_processes/mnh8wrrb/proc_2026-04-02T0919_daa525/output.txt b/.codenomad/background_processes/mnh8wrrb/proc_2026-04-02T0919_daa525/output.txt new file mode 100644 index 0000000..f56a155 --- /dev/null +++ b/.codenomad/background_processes/mnh8wrrb/proc_2026-04-02T0919_daa525/output.txt @@ -0,0 +1,33 @@ +INFO: Started server process [73920] +INFO: Waiting for application startup. +11:19:23 [faigate] INFO Loaded config with 14 providers +11:19:23 [faigate] INFO ✓ anthropic-haiku → claude-haiku-3-5 (default) +11:19:23 [faigate] INFO ✓ anthropic-sonnet → claude-sonnet-4-6 (mid) +11:19:23 [faigate] INFO ✓ anthropic-claude → claude-opus-4-6 (mid) +11:19:23 [faigate] INFO ✓ blackbox-free → blackboxai/x-ai/grok-code-fast-1 (fallback) +11:19:23 [faigate] INFO ✓ deepseek-chat → deepseek-chat (default) +11:19:23 [faigate] INFO ✓ deepseek-reasoner → deepseek-reasoner (reasoning) +11:19:23 [faigate] INFO ✓ gemini-flash → gemini-3-flash (mid) +11:19:23 [faigate] INFO ✓ gemini-flash-lite → gemini-3-flash-lite (cheap) +11:19:23 [faigate] INFO ✓ gemini-pro-high → gemini-3.1-pro (mid) +11:19:23 [faigate] INFO ✓ gemini-pro-low → gemini-3.1-pro (mid) +11:19:23 [faigate] INFO ✓ kilocode → z-ai/glm-5:free (fallback) +11:19:23 [faigate] INFO ✓ openai-gpt4o → gpt-4o (mid) +11:19:23 [faigate] INFO ✓ openai-images → gpt-image-1 (specialty) +11:19:23 [faigate] INFO ✓ openrouter-fallback → openrouter/auto (fallback) +ERROR: Traceback (most recent call last): + File "/opt/homebrew/lib/python3.14/site-packages/starlette/routing.py", line 694, in lifespan + async with self.lifespan_context(app) as maybe_state: + ~~~~~~~~~~~~~~~~~~~~~^^^^^ + File "/opt/homebrew/Cellar/python@3.14/3.14.3_1/Frameworks/Python.framework/Versions/3.14/lib/python3.14/contextlib.py", line 214, in __aenter__ + return await anext(self.gen) + ^^^^^^^^^^^^^^^^^^^^^ + File "/Users/andrelange/Documents/repositories/github/faigate/faigate/main.py", line 2250, in lifespan + _metrics.init() + ~~~~~~~~~~~~~^^ + File "/Users/andrelange/Documents/repositories/github/faigate/faigate/metrics.py", line 120, in init + self._conn = sqlite3.connect(self._db_path, check_same_thread=False) + ~~~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +sqlite3.OperationalError: unable to open database file + +ERROR: Application startup failed. Exiting. diff --git a/.codenomad/background_processes/mnh8wrrb/proc_2026-04-02T0921_e1a02b/output.txt b/.codenomad/background_processes/mnh8wrrb/proc_2026-04-02T0921_e1a02b/output.txt new file mode 100644 index 0000000..1cd3f33 --- /dev/null +++ b/.codenomad/background_processes/mnh8wrrb/proc_2026-04-02T0921_e1a02b/output.txt @@ -0,0 +1,24 @@ +INFO: Started server process [80702] +INFO: Waiting for application startup. +11:21:58 [faigate] INFO Loaded config with 14 providers +11:21:58 [faigate] INFO ✓ anthropic-haiku → claude-haiku-3-5 (default) +11:21:58 [faigate] INFO ✓ anthropic-sonnet → claude-sonnet-4-6 (mid) +11:21:58 [faigate] INFO ✓ anthropic-claude → claude-opus-4-6 (mid) +11:21:58 [faigate] INFO ✓ blackbox-free → blackboxai/x-ai/grok-code-fast-1 (fallback) +11:21:58 [faigate] INFO ✓ deepseek-chat → deepseek-chat (default) +11:21:58 [faigate] INFO ✓ deepseek-reasoner → deepseek-reasoner (reasoning) +11:21:58 [faigate] INFO ✓ gemini-flash → gemini-3-flash (mid) +11:21:58 [faigate] INFO ✓ gemini-flash-lite → gemini-3-flash-lite (cheap) +11:21:58 [faigate] INFO ✓ gemini-pro-high → gemini-3.1-pro (mid) +11:21:58 [faigate] INFO ✓ gemini-pro-low → gemini-3.1-pro (mid) +11:21:58 [faigate] INFO ✓ kilocode → z-ai/glm-5:free (fallback) +11:21:58 [faigate] INFO ✓ openai-gpt4o → gpt-4o (mid) +11:21:58 [faigate] INFO ✓ openai-images → gpt-image-1 (specialty) +11:21:58 [faigate] INFO ✓ openrouter-fallback → openrouter/auto (fallback) +11:21:58 [faigate] WARNING Provider source catalog startup refresh skipped: [Errno 13] Permission denied: '/private/var/lib/faigate' +11:21:58 [faigate] INFO fusionAIze Gate ready on 127.0.0.1:8092 +INFO: Application startup complete. +ERROR: [Errno 48] error while attempting to bind on address ('127.0.0.1', 8092): address already in use +INFO: Waiting for application shutdown. +11:21:58 [faigate] INFO fusionAIze Gate shut down +INFO: Application shutdown complete. diff --git a/.codenomad/background_processes/mnh8wrrb/proc_2026-04-02T0922_6b1c2e/output.txt b/.codenomad/background_processes/mnh8wrrb/proc_2026-04-02T0922_6b1c2e/output.txt new file mode 100644 index 0000000..3b359e4 --- /dev/null +++ b/.codenomad/background_processes/mnh8wrrb/proc_2026-04-02T0922_6b1c2e/output.txt @@ -0,0 +1,24 @@ +INFO: Started server process [81311] +INFO: Waiting for application startup. +11:22:09 [faigate] INFO Loaded config with 14 providers +11:22:09 [faigate] INFO ✓ anthropic-haiku → claude-haiku-3-5 (default) +11:22:09 [faigate] INFO ✓ anthropic-sonnet → claude-sonnet-4-6 (mid) +11:22:09 [faigate] INFO ✓ anthropic-claude → claude-opus-4-6 (mid) +11:22:09 [faigate] INFO ✓ blackbox-free → blackboxai/x-ai/grok-code-fast-1 (fallback) +11:22:09 [faigate] INFO ✓ deepseek-chat → deepseek-chat (default) +11:22:09 [faigate] INFO ✓ deepseek-reasoner → deepseek-reasoner (reasoning) +11:22:09 [faigate] INFO ✓ gemini-flash → gemini-3-flash (mid) +11:22:09 [faigate] INFO ✓ gemini-flash-lite → gemini-3-flash-lite (cheap) +11:22:09 [faigate] INFO ✓ gemini-pro-high → gemini-3.1-pro (mid) +11:22:09 [faigate] INFO ✓ gemini-pro-low → gemini-3.1-pro (mid) +11:22:09 [faigate] INFO ✓ kilocode → z-ai/glm-5:free (fallback) +11:22:09 [faigate] INFO ✓ openai-gpt4o → gpt-4o (mid) +11:22:09 [faigate] INFO ✓ openai-images → gpt-image-1 (specialty) +11:22:09 [faigate] INFO ✓ openrouter-fallback → openrouter/auto (fallback) +11:22:09 [faigate] WARNING Provider source catalog startup refresh skipped: [Errno 13] Permission denied: '/private/var/lib/faigate' +11:22:09 [faigate] INFO fusionAIze Gate ready on 127.0.0.1:8092 +INFO: Application startup complete. +ERROR: [Errno 48] error while attempting to bind on address ('127.0.0.1', 8092): address already in use +INFO: Waiting for application shutdown. +11:22:09 [faigate] INFO fusionAIze Gate shut down +INFO: Application shutdown complete. diff --git a/.codenomad/background_processes/mnh8wrrb/proc_2026-04-02T0923_af21c4/output.txt b/.codenomad/background_processes/mnh8wrrb/proc_2026-04-02T0923_af21c4/output.txt new file mode 100644 index 0000000..4943f7e --- /dev/null +++ b/.codenomad/background_processes/mnh8wrrb/proc_2026-04-02T0923_af21c4/output.txt @@ -0,0 +1,24 @@ +INFO: Started server process [85481] +INFO: Waiting for application startup. +11:23:18 [faigate] INFO Loaded config with 14 providers +11:23:18 [faigate] INFO ✓ anthropic-haiku → claude-haiku-3-5 (default) +11:23:18 [faigate] INFO ✓ anthropic-sonnet → claude-sonnet-4-6 (mid) +11:23:18 [faigate] INFO ✓ anthropic-claude → claude-opus-4-6 (mid) +11:23:18 [faigate] INFO ✓ blackbox-free → blackboxai/x-ai/grok-code-fast-1 (fallback) +11:23:18 [faigate] INFO ✓ deepseek-chat → deepseek-chat (default) +11:23:18 [faigate] INFO ✓ deepseek-reasoner → deepseek-reasoner (reasoning) +11:23:18 [faigate] INFO ✓ gemini-flash → gemini-3-flash (mid) +11:23:18 [faigate] INFO ✓ gemini-flash-lite → gemini-3-flash-lite (cheap) +11:23:18 [faigate] INFO ✓ gemini-pro-high → gemini-3.1-pro (mid) +11:23:18 [faigate] INFO ✓ gemini-pro-low → gemini-3.1-pro (mid) +11:23:18 [faigate] INFO ✓ kilocode → z-ai/glm-5:free (fallback) +11:23:18 [faigate] INFO ✓ openai-gpt4o → gpt-4o (mid) +11:23:18 [faigate] INFO ✓ openai-images → gpt-image-1 (specialty) +11:23:18 [faigate] INFO ✓ openrouter-fallback → openrouter/auto (fallback) +11:23:18 [faigate] WARNING Provider source catalog startup refresh skipped: [Errno 13] Permission denied: '/private/var/lib/faigate' +11:23:18 [faigate] INFO fusionAIze Gate ready on 127.0.0.1:8092 +INFO: Application startup complete. +ERROR: [Errno 48] error while attempting to bind on address ('127.0.0.1', 8092): address already in use +INFO: Waiting for application shutdown. +11:23:18 [faigate] INFO fusionAIze Gate shut down +INFO: Application shutdown complete. diff --git a/.codenomad/background_processes/mnh8wrrb/proc_2026-04-02T0926_ddc210/output.txt b/.codenomad/background_processes/mnh8wrrb/proc_2026-04-02T0926_ddc210/output.txt new file mode 100644 index 0000000..0807778 --- /dev/null +++ b/.codenomad/background_processes/mnh8wrrb/proc_2026-04-02T0926_ddc210/output.txt @@ -0,0 +1,24 @@ +INFO: Started server process [95038] +INFO: Waiting for application startup. +11:26:12 [faigate] INFO Loaded config with 14 providers +11:26:12 [faigate] INFO ✓ anthropic-haiku → claude-haiku-3-5 (default) +11:26:12 [faigate] INFO ✓ anthropic-sonnet → claude-sonnet-4-6 (mid) +11:26:12 [faigate] INFO ✓ anthropic-claude → claude-opus-4-6 (mid) +11:26:12 [faigate] INFO ✓ blackbox-free → blackboxai/x-ai/grok-code-fast-1 (fallback) +11:26:12 [faigate] INFO ✓ deepseek-chat → deepseek-chat (default) +11:26:12 [faigate] INFO ✓ deepseek-reasoner → deepseek-reasoner (reasoning) +11:26:12 [faigate] INFO ✓ gemini-flash → gemini-3-flash (mid) +11:26:12 [faigate] INFO ✓ gemini-flash-lite → gemini-3-flash-lite (cheap) +11:26:12 [faigate] INFO ✓ gemini-pro-high → gemini-3.1-pro (mid) +11:26:12 [faigate] INFO ✓ gemini-pro-low → gemini-3.1-pro (mid) +11:26:12 [faigate] INFO ✓ kilocode → z-ai/glm-5:free (fallback) +11:26:12 [faigate] INFO ✓ openai-gpt4o → gpt-4o (mid) +11:26:12 [faigate] INFO ✓ openai-images → gpt-image-1 (specialty) +11:26:12 [faigate] INFO ✓ openrouter-fallback → openrouter/auto (fallback) +11:26:12 [faigate] WARNING Provider source catalog startup refresh skipped: [Errno 13] Permission denied: '/private/var/lib/faigate' +11:26:12 [faigate] INFO fusionAIze Gate ready on 127.0.0.1:8092 +INFO: Application startup complete. +ERROR: [Errno 48] error while attempting to bind on address ('127.0.0.1', 8092): address already in use +INFO: Waiting for application shutdown. +11:26:12 [faigate] INFO fusionAIze Gate shut down +INFO: Application shutdown complete. diff --git a/.codenomad/background_processes/mnh9cz26/index.json b/.codenomad/background_processes/mnh9cz26/index.json new file mode 100644 index 0000000..c360b95 --- /dev/null +++ b/.codenomad/background_processes/mnh9cz26/index.json @@ -0,0 +1,64 @@ +[ + { + "id": "proc_2026-04-02T0919_9e2eb7", + "workspaceId": "mnh9cz26", + "title": "faigate server with metadata", + "command": "cd /Users/andrelange/Documents/repositories/github/faigate && FAIGATE_PROVIDER_METADATA_DIR=docs/examples/fusionaize-metadata-repo python3 -m faigate", + "cwd": "/Users/andrelange/Documents/repositories/github/faigate", + "status": "error", + "pid": 73561, + "startedAt": "2026-04-02T09:19:08.642Z", + "outputSizeBytes": 2134, + "exitCode": 3, + "stoppedAt": "2026-04-02T09:19:09.588Z" + }, + { + "id": "proc_2026-04-02T0920_ab4f1c", + "workspaceId": "mnh9cz26", + "title": "faigate server with external metadata", + "command": "cd /Users/andrelange/Documents/repositories/github/faigate && FAIGATE_PROVIDER_METADATA_DIR=/Users/andrelange/Documents/repositories/github/faigate/docs/examples/fusionaize-metadata-repo python3 -m faigate", + "cwd": "/Users/andrelange/Documents/repositories/github/faigate", + "status": "error", + "pid": 76934, + "startedAt": "2026-04-02T09:20:47.024Z", + "outputSizeBytes": 1662, + "exitCode": 1, + "stoppedAt": "2026-04-02T09:20:47.712Z" + }, + { + "id": "proc_2026-04-02T0922_5bbbd8", + "workspaceId": "mnh9cz26", + "title": "faigate server with metadata", + "command": "cd /Users/andrelange/Documents/repositories/github/faigate && FAIGATE_PROVIDER_METADATA_DIR=/Users/andrelange/Documents/repositories/github/faigate/docs/examples/fusionaize-metadata-repo python3 -m faigate", + "cwd": "/Users/andrelange/Documents/repositories/github/faigate", + "status": "error", + "pid": 82785, + "startedAt": "2026-04-02T09:22:30.566Z", + "outputSizeBytes": 1662, + "exitCode": 1, + "stoppedAt": "2026-04-02T09:22:31.306Z" + }, + { + "id": "proc_2026-04-02T0923_292185", + "workspaceId": "mnh9cz26", + "title": "faigate server with metadata", + "command": "cd /Users/andrelange/Documents/repositories/github/faigate && FAIGATE_PROVIDER_METADATA_DIR=/Users/andrelange/Documents/repositories/github/faigate/docs/examples/fusionaize-metadata-repo python3 -m faigate", + "cwd": "/Users/andrelange/Documents/repositories/github/faigate", + "status": "stopped", + "pid": 85184, + "startedAt": "2026-04-02T09:23:14.490Z", + "outputSizeBytes": 0, + "stoppedAt": "2026-04-02T09:25:30.974Z" + }, + { + "id": "proc_2026-04-02T0925_431e2a", + "workspaceId": "mnh9cz26", + "title": "faigate server with merged changes", + "command": "cd /Users/andrelange/Documents/repositories/github/faigate && FAIGATE_PROVIDER_METADATA_DIR=/Users/andrelange/Documents/repositories/github/faigate/docs/examples/fusionaize-metadata-repo python3 -m faigate", + "cwd": "/Users/andrelange/Documents/repositories/github/faigate", + "status": "running", + "pid": 94627, + "startedAt": "2026-04-02T09:25:59.367Z", + "outputSizeBytes": 0 + } +] \ No newline at end of file diff --git a/.codenomad/background_processes/mnh9cz26/proc_2026-04-02T0919_9e2eb7/output.txt b/.codenomad/background_processes/mnh9cz26/proc_2026-04-02T0919_9e2eb7/output.txt new file mode 100644 index 0000000..7985afd --- /dev/null +++ b/.codenomad/background_processes/mnh9cz26/proc_2026-04-02T0919_9e2eb7/output.txt @@ -0,0 +1,33 @@ +INFO: Started server process [73561] +INFO: Waiting for application startup. +11:19:09 [faigate] INFO Loaded config with 14 providers +11:19:09 [faigate] INFO ✓ anthropic-haiku → claude-haiku-3-5 (default) +11:19:09 [faigate] INFO ✓ anthropic-sonnet → claude-sonnet-4-6 (mid) +11:19:09 [faigate] INFO ✓ anthropic-claude → claude-opus-4-6 (mid) +11:19:09 [faigate] INFO ✓ blackbox-free → blackboxai/x-ai/grok-code-fast-1 (fallback) +11:19:09 [faigate] INFO ✓ deepseek-chat → deepseek-chat (default) +11:19:09 [faigate] INFO ✓ deepseek-reasoner → deepseek-reasoner (reasoning) +11:19:09 [faigate] INFO ✓ gemini-flash → gemini-3-flash (mid) +11:19:09 [faigate] INFO ✓ gemini-flash-lite → gemini-3-flash-lite (cheap) +11:19:09 [faigate] INFO ✓ gemini-pro-high → gemini-3.1-pro (mid) +11:19:09 [faigate] INFO ✓ gemini-pro-low → gemini-3.1-pro (mid) +11:19:09 [faigate] INFO ✓ kilocode → z-ai/glm-5:free (fallback) +11:19:09 [faigate] INFO ✓ openai-gpt4o → gpt-4o (mid) +11:19:09 [faigate] INFO ✓ openai-images → gpt-image-1 (specialty) +11:19:09 [faigate] INFO ✓ openrouter-fallback → openrouter/auto (fallback) +ERROR: Traceback (most recent call last): + File "/opt/homebrew/lib/python3.14/site-packages/starlette/routing.py", line 694, in lifespan + async with self.lifespan_context(app) as maybe_state: + ~~~~~~~~~~~~~~~~~~~~~^^^^^ + File "/opt/homebrew/Cellar/python@3.14/3.14.3_1/Frameworks/Python.framework/Versions/3.14/lib/python3.14/contextlib.py", line 214, in __aenter__ + return await anext(self.gen) + ^^^^^^^^^^^^^^^^^^^^^ + File "/Users/andrelange/Documents/repositories/github/faigate/faigate/main.py", line 2250, in lifespan + _metrics.init() + ~~~~~~~~~~~~~^^ + File "/Users/andrelange/Documents/repositories/github/faigate/faigate/metrics.py", line 120, in init + self._conn = sqlite3.connect(self._db_path, check_same_thread=False) + ~~~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +sqlite3.OperationalError: unable to open database file + +ERROR: Application startup failed. Exiting. diff --git a/.codenomad/background_processes/mnh9cz26/proc_2026-04-02T0920_ab4f1c/output.txt b/.codenomad/background_processes/mnh9cz26/proc_2026-04-02T0920_ab4f1c/output.txt new file mode 100644 index 0000000..7661f08 --- /dev/null +++ b/.codenomad/background_processes/mnh9cz26/proc_2026-04-02T0920_ab4f1c/output.txt @@ -0,0 +1,24 @@ +INFO: Started server process [76934] +INFO: Waiting for application startup. +11:20:47 [faigate] INFO Loaded config with 14 providers +11:20:47 [faigate] INFO ✓ anthropic-haiku → claude-haiku-3-5 (default) +11:20:47 [faigate] INFO ✓ anthropic-sonnet → claude-sonnet-4-6 (mid) +11:20:47 [faigate] INFO ✓ anthropic-claude → claude-opus-4-6 (mid) +11:20:47 [faigate] INFO ✓ blackbox-free → blackboxai/x-ai/grok-code-fast-1 (fallback) +11:20:47 [faigate] INFO ✓ deepseek-chat → deepseek-chat (default) +11:20:47 [faigate] INFO ✓ deepseek-reasoner → deepseek-reasoner (reasoning) +11:20:47 [faigate] INFO ✓ gemini-flash → gemini-3-flash (mid) +11:20:47 [faigate] INFO ✓ gemini-flash-lite → gemini-3-flash-lite (cheap) +11:20:47 [faigate] INFO ✓ gemini-pro-high → gemini-3.1-pro (mid) +11:20:47 [faigate] INFO ✓ gemini-pro-low → gemini-3.1-pro (mid) +11:20:47 [faigate] INFO ✓ kilocode → z-ai/glm-5:free (fallback) +11:20:47 [faigate] INFO ✓ openai-gpt4o → gpt-4o (mid) +11:20:47 [faigate] INFO ✓ openai-images → gpt-image-1 (specialty) +11:20:47 [faigate] INFO ✓ openrouter-fallback → openrouter/auto (fallback) +11:20:47 [faigate] WARNING Provider source catalog startup refresh skipped: [Errno 13] Permission denied: '/private/var/lib/faigate' +11:20:47 [faigate] INFO fusionAIze Gate ready on 127.0.0.1:8090 +INFO: Application startup complete. +ERROR: [Errno 48] error while attempting to bind on address ('127.0.0.1', 8090): address already in use +INFO: Waiting for application shutdown. +11:20:47 [faigate] INFO fusionAIze Gate shut down +INFO: Application shutdown complete. diff --git a/.codenomad/background_processes/mnh9cz26/proc_2026-04-02T0922_5bbbd8/output.txt b/.codenomad/background_processes/mnh9cz26/proc_2026-04-02T0922_5bbbd8/output.txt new file mode 100644 index 0000000..8f6b34c --- /dev/null +++ b/.codenomad/background_processes/mnh9cz26/proc_2026-04-02T0922_5bbbd8/output.txt @@ -0,0 +1,24 @@ +INFO: Started server process [82785] +INFO: Waiting for application startup. +11:22:31 [faigate] INFO Loaded config with 14 providers +11:22:31 [faigate] INFO ✓ anthropic-haiku → claude-haiku-3-5 (default) +11:22:31 [faigate] INFO ✓ anthropic-sonnet → claude-sonnet-4-6 (mid) +11:22:31 [faigate] INFO ✓ anthropic-claude → claude-opus-4-6 (mid) +11:22:31 [faigate] INFO ✓ blackbox-free → blackboxai/x-ai/grok-code-fast-1 (fallback) +11:22:31 [faigate] INFO ✓ deepseek-chat → deepseek-chat (default) +11:22:31 [faigate] INFO ✓ deepseek-reasoner → deepseek-reasoner (reasoning) +11:22:31 [faigate] INFO ✓ gemini-flash → gemini-3-flash (mid) +11:22:31 [faigate] INFO ✓ gemini-flash-lite → gemini-3-flash-lite (cheap) +11:22:31 [faigate] INFO ✓ gemini-pro-high → gemini-3.1-pro (mid) +11:22:31 [faigate] INFO ✓ gemini-pro-low → gemini-3.1-pro (mid) +11:22:31 [faigate] INFO ✓ kilocode → z-ai/glm-5:free (fallback) +11:22:31 [faigate] INFO ✓ openai-gpt4o → gpt-4o (mid) +11:22:31 [faigate] INFO ✓ openai-images → gpt-image-1 (specialty) +11:22:31 [faigate] INFO ✓ openrouter-fallback → openrouter/auto (fallback) +11:22:31 [faigate] WARNING Provider source catalog startup refresh skipped: [Errno 13] Permission denied: '/private/var/lib/faigate' +11:22:31 [faigate] INFO fusionAIze Gate ready on 127.0.0.1:8092 +INFO: Application startup complete. +ERROR: [Errno 48] error while attempting to bind on address ('127.0.0.1', 8092): address already in use +INFO: Waiting for application shutdown. +11:22:31 [faigate] INFO fusionAIze Gate shut down +INFO: Application shutdown complete. diff --git a/.codenomad/background_processes/mnh9cz26/proc_2026-04-02T0923_292185/output.txt b/.codenomad/background_processes/mnh9cz26/proc_2026-04-02T0923_292185/output.txt new file mode 100644 index 0000000..cf49204 --- /dev/null +++ b/.codenomad/background_processes/mnh9cz26/proc_2026-04-02T0923_292185/output.txt @@ -0,0 +1,31 @@ +INFO: Started server process [85184] +INFO: Waiting for application startup. +11:23:15 [faigate] INFO Loaded config with 14 providers +11:23:15 [faigate] INFO ✓ anthropic-haiku → claude-haiku-3-5 (default) +11:23:15 [faigate] INFO ✓ anthropic-sonnet → claude-sonnet-4-6 (mid) +11:23:15 [faigate] INFO ✓ anthropic-claude → claude-opus-4-6 (mid) +11:23:15 [faigate] INFO ✓ blackbox-free → blackboxai/x-ai/grok-code-fast-1 (fallback) +11:23:15 [faigate] INFO ✓ deepseek-chat → deepseek-chat (default) +11:23:15 [faigate] INFO ✓ deepseek-reasoner → deepseek-reasoner (reasoning) +11:23:15 [faigate] INFO ✓ gemini-flash → gemini-3-flash (mid) +11:23:15 [faigate] INFO ✓ gemini-flash-lite → gemini-3-flash-lite (cheap) +11:23:15 [faigate] INFO ✓ gemini-pro-high → gemini-3.1-pro (mid) +11:23:15 [faigate] INFO ✓ gemini-pro-low → gemini-3.1-pro (mid) +11:23:15 [faigate] INFO ✓ kilocode → z-ai/glm-5:free (fallback) +11:23:15 [faigate] INFO ✓ openai-gpt4o → gpt-4o (mid) +11:23:15 [faigate] INFO ✓ openai-images → gpt-image-1 (specialty) +11:23:15 [faigate] INFO ✓ openrouter-fallback → openrouter/auto (fallback) +11:23:15 [faigate] WARNING Provider source catalog startup refresh skipped: [Errno 13] Permission denied: '/private/var/lib/faigate' +11:23:15 [faigate] INFO fusionAIze Gate ready on 127.0.0.1:8092 +INFO: Application startup complete. +INFO: Uvicorn running on http://127.0.0.1:8092 (Press CTRL+C to quit) +INFO: 127.0.0.1:58964 - "GET /api/analytics/provider-mix HTTP/1.1" 404 Not Found +INFO: 127.0.0.1:58970 - "GET /api/analytics/provider-mix HTTP/1.1" 404 Not Found +INFO: 127.0.0.1:58975 - "GET /api/analytics/provider-mix HTTP/1.1" 404 Not Found +INFO: 127.0.0.1:58988 - "GET /health HTTP/1.1" 200 OK +INFO: 127.0.0.1:59004 - "GET /health HTTP/1.1" 200 OK +INFO: Shutting down +INFO: Waiting for application shutdown. +11:25:30 [faigate] INFO fusionAIze Gate shut down +INFO: Application shutdown complete. +INFO: Finished server process [85184] diff --git a/.codenomad/background_processes/mnh9cz26/proc_2026-04-02T0925_431e2a/output.txt b/.codenomad/background_processes/mnh9cz26/proc_2026-04-02T0925_431e2a/output.txt new file mode 100644 index 0000000..465fca6 --- /dev/null +++ b/.codenomad/background_processes/mnh9cz26/proc_2026-04-02T0925_431e2a/output.txt @@ -0,0 +1,29 @@ +INFO: Started server process [94627] +INFO: Waiting for application startup. +11:26:01 [faigate] INFO Loaded config with 14 providers +11:26:01 [faigate] INFO ✓ anthropic-haiku → claude-haiku-3-5 (default) +11:26:01 [faigate] INFO ✓ anthropic-sonnet → claude-sonnet-4-6 (mid) +11:26:01 [faigate] INFO ✓ anthropic-claude → claude-opus-4-6 (mid) +11:26:01 [faigate] INFO ✓ blackbox-free → blackboxai/x-ai/grok-code-fast-1 (fallback) +11:26:01 [faigate] INFO ✓ deepseek-chat → deepseek-chat (default) +11:26:01 [faigate] INFO ✓ deepseek-reasoner → deepseek-reasoner (reasoning) +11:26:01 [faigate] INFO ✓ gemini-flash → gemini-3-flash (mid) +11:26:01 [faigate] INFO ✓ gemini-flash-lite → gemini-3-flash-lite (cheap) +11:26:01 [faigate] INFO ✓ gemini-pro-high → gemini-3.1-pro (mid) +11:26:01 [faigate] INFO ✓ gemini-pro-low → gemini-3.1-pro (mid) +11:26:01 [faigate] INFO ✓ kilocode → z-ai/glm-5:free (fallback) +11:26:01 [faigate] INFO ✓ openai-gpt4o → gpt-4o (mid) +11:26:01 [faigate] INFO ✓ openai-images → gpt-image-1 (specialty) +11:26:01 [faigate] INFO ✓ openrouter-fallback → openrouter/auto (fallback) +11:26:01 [faigate] WARNING Provider source catalog startup refresh skipped: [Errno 13] Permission denied: '/private/var/lib/faigate' +11:26:01 [faigate] INFO fusionAIze Gate ready on 127.0.0.1:8092 +INFO: Application startup complete. +INFO: Uvicorn running on http://127.0.0.1:8092 (Press CTRL+C to quit) +INFO: 127.0.0.1:59150 - "GET /api/analytics/provider-mix HTTP/1.1" 200 OK +INFO: 127.0.0.1:59160 - "GET /api/analytics/provider-mix HTTP/1.1" 200 OK +INFO: 127.0.0.1:59175 - "GET /api/provider-catalog HTTP/1.1" 200 OK +INFO: 127.0.0.1:59206 - "GET /api/provider-catalog HTTP/1.1" 200 OK +INFO: 127.0.0.1:59207 - "GET /dashboard HTTP/1.1" 200 OK +INFO: 127.0.0.1:59228 - "GET / HTTP/1.1" 404 Not Found +INFO: 127.0.0.1:59232 - "GET /dashboard HTTP/1.1" 200 OK +INFO: 127.0.0.1:59245 - "GET /dashboard HTTP/1.1" 200 OK diff --git a/.coveragerc b/.coveragerc new file mode 100644 index 0000000..548712b --- /dev/null +++ b/.coveragerc @@ -0,0 +1,27 @@ +[run] +source = faigate +omit = + */tests/* + */__pycache__/* + */vendor/* + */assets/* +branch = true + +[report] +exclude_lines = + pragma: no cover + def __repr__ + if self.debug: + if settings.DEBUG + raise AssertionError + raise NotImplementedError + if 0: + if __name__ == .__main__.: + @property + @abstractmethod +ignore_errors = true +show_missing = true + +[html] +directory = htmlcov +title = fusionAIze Gate Coverage Report \ No newline at end of file diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json new file mode 100644 index 0000000..85efc2a --- /dev/null +++ b/.devcontainer/devcontainer.json @@ -0,0 +1,54 @@ +{ + "name": "fusionAIze Gate", + "image": "mcr.microsoft.com/devcontainers/python:1-3.12-bullseye", + "features": { + "ghcr.io/devcontainers/features/github-cli:1": {}, + "ghcr.io/devcontainers/features/docker-in-docker:2": {} + }, + "postCreateCommand": "pip install -e .[dev] && pre-commit install", + "customizations": { + "vscode": { + "extensions": [ + "ms-python.python", + "ms-python.vscode-pylance", + "ms-python.black-formatter", + "charliermarsh.ruff", + "tamasfe.even-better-toml", + "ms-azuretools.vscode-docker", + "github.vscode-github-actions", + "bierner.markdown-preview-github-styles", + "redhat.vscode-yaml", + "ms-vscode.hexeditor" + ], + "settings": { + "python.defaultInterpreterPath": "/usr/local/bin/python", + "python.testing.pytestEnabled": true, + "python.testing.unittestEnabled": false, + "python.testing.pytestArgs": ["tests"], + "editor.formatOnSave": true, + "editor.codeActionsOnSave": { + "source.organizeImports": "explicit" + }, + "[python]": { + "editor.defaultFormatter": "charliermarsh.ruff", + "editor.formatOnSave": true + }, + "[json]": { + "editor.defaultFormatter": "vscode.json-language-features" + }, + "[yaml]": { + "editor.defaultFormatter": "redhat.vscode-yaml" + }, + "ruff.path": ["/usr/local/py-utils/bin/ruff"] + } + } + }, + "remoteUser": "vscode", + "forwardPorts": [8080], + "portsAttributes": { + "8080": { + "label": "faigate dashboard", + "onAutoForward": "notify" + } + } +} \ No newline at end of file diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index e9b135a..78ddad6 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -28,8 +28,14 @@ jobs: - name: Lint run: ruff check . - - name: Test - run: pytest tests/ -v + - name: Test with coverage + run: pytest tests/ -v --cov=faigate --cov-report=term --cov-report=xml:coverage.xml + + - name: Upload coverage to Codecov + uses: codecov/codecov-action@v4 + with: + file: ./coverage.xml + fail_ci_if_error: false package: runs-on: ubuntu-latest @@ -51,7 +57,113 @@ jobs: - uses: actions/setup-python@v5 with: python-version: "3.12" - - run: pip install ruff + - run: pip install ruff pre-commit - run: ruff check . - run: ruff format --check . - run: bash -n scripts/* + - run: pre-commit run --all-files --show-diff-on-failure + - name: Validate version consistency + run: python -c " +import re +import sys +from pathlib import Path +root = Path('.') +pyproject = root / 'pyproject.toml' +package = root / 'faigate' / '__init__.py' +pyproject_content = pyproject.read_text() +package_content = package.read_text() +pyproject_match = re.search(r'^version = \"([^\"]+)\"$', pyproject_content, flags=re.MULTILINE) +package_match = re.search(r'^__version__ = \"([^\"]+)\"$', package_content, flags=re.MULTILINE) +if not pyproject_match: + print('ERROR: Could not find version in pyproject.toml') + sys.exit(1) +if not package_match: + print('ERROR: Could not find __version__ in faigate/__init__.py') + sys.exit(1) +if pyproject_match.group(1) != package_match.group(1): + print('ERROR: Version mismatch') + print(f'pyproject.toml: {pyproject_match.group(1)}') + print(f'faigate/__init__.py: {package_match.group(1)}') + sys.exit(1) +print(f'Version OK: {pyproject_match.group(1)}') +" + + security: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - uses: actions/setup-python@v5 + with: + python-version: "3.12" + - run: pip install bandit[toml] + - run: bandit -c pyproject.toml -r faigate -f html -o bandit-report.html || true + - run: bandit -c pyproject.toml -r faigate -f json -o bandit-report.json || true + - name: Upload Bandit report + uses: actions/upload-artifact@v4 + if: always() + with: + name: bandit-security-report + path: | + bandit-report.html + bandit-report.json + + benchmarks: + runs-on: ubuntu-latest + if: github.event_name == 'push' && github.ref == 'refs/heads/main' + steps: + - uses: actions/checkout@v4 + - uses: actions/setup-python@v5 + with: + python-version: "3.12" + - run: pip install -e .[dev] + - name: Run performance benchmarks + run: pytest tests/benchmarks/ --benchmark-only --benchmark-json=benchmark-results.json + - name: Upload benchmark results + uses: actions/upload-artifact@v4 + with: + name: benchmark-results + path: benchmark-results.json + + docs: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - uses: actions/setup-python@v5 + with: + python-version: "3.12" + - run: pip install -e .[dev] + - name: Generate API documentation + run: python scripts/generate-api-docs.py + - name: Check if API.md changed + run: | + if git diff --name-only docs/API.md | grep -q "API.md"; then + echo "API.md is out of date. Please run 'python scripts/generate-api-docs.py' and commit the changes." + git diff docs/API.md + exit 1 + else + echo "API.md is up to date." + fi + + changelog: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + with: + fetch-depth: 0 + - uses: actions/setup-python@v5 + with: + python-version: "3.12" + - name: Install git-cliff + run: | + curl -LsSf https://github.com/orhun/git-cliff/releases/latest/download/git-cliff-x86_64-unknown-linux-gnu.tar.gz | tar xz -C /tmp + sudo mv /tmp/git-cliff-*/git-cliff /usr/local/bin/ + - name: Generate changelog + run: git-cliff --config .cliff.toml --unreleased --strip header -o /tmp/generated-changelog.md + - name: Check if CHANGELOG.md is up to date + run: | + if ! diff -u CHANGELOG.md /tmp/generated-changelog.md; then + echo "CHANGELOG.md is out of date. Please run 'git-cliff --unreleased --strip header -o CHANGELOG.md' and commit the changes." + exit 1 + else + echo "CHANGELOG.md is up to date." + fi diff --git a/.mailmap b/.mailmap new file mode 100644 index 0000000..bf83bec --- /dev/null +++ b/.mailmap @@ -0,0 +1,8 @@ +# Map commit authors for consistent contributor attribution +# Format: Proper Name Commit Name + +# Map André Lange to typelicious for GitHub contributor recognition +typelicious André Lange + +# Note: AI model contributions (e.g., Claude, GPT) are not considered human contributors +# and should not appear in contributor lists. Their commits are considered automated tooling. \ No newline at end of file diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 0000000..bfe5328 --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,40 @@ +# See https://pre-commit.com for more information +# See https://pre-commit.com/hooks.html for more hooks + +repos: + # General hooks + - repo: https://github.com/pre-commit/pre-commit-hooks + rev: v5.0.0 + hooks: + - id: trailing-whitespace + - id: end-of-file-fixer + - id: check-yaml + - id: check-added-large-files + args: ['--maxkb=1024'] + - id: check-case-conflict + - id: check-merge-conflicts + - id: detect-private-key + - id: forbid-new-submodules + + # Python formatting and linting + - repo: https://github.com/astral-sh/ruff-pre-commit + rev: v0.8.4 + hooks: + - id: ruff + args: [--fix, --exit-non-zero-on-fix] + - id: ruff-format + + # Python security scanning + - repo: https://github.com/pycqa/bandit + rev: '1.8.0' + hooks: + - id: bandit + args: ['-c', 'pyproject.toml'] + additional_dependencies: ['bandit[toml]'] + + # Conventional commits + - repo: https://github.com/qoomon/git-conventional-commits + rev: v2.6.3 + hooks: + - id: conventional-commits + stages: [commit-msg] \ No newline at end of file diff --git a/CHANGELOG.md b/CHANGELOG.md index 865c16e..366e755 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -105,6 +105,8 @@ - Added an internal canonical request and response layer for bridge traffic, which keeps Anthropic-shaped ingress mapping separate from the existing routing and completion core instead of adding one-off protocol logic directly in the router - Added a community `claude-code-router` hook that can prefer coding-strong, tool-capable, and larger-context routes for Claude Code traffic without making the bridge itself depend on any one routing policy - Added bridge-specific validation and release-readiness helpers, including a client-near validation script and an explicit bridge release checklist for opt-in production rollouts +- Expanded the provider source catalog scope beyond `blackbox`, `kilo`, and `openai` so Gate can also track mirrored official source data for `anthropic`, `deepseek`, and `google` +- Added local models-endpoint overlays per configured route, which lets Gate compare what a specific key can really see against the mirrored global provider catalog ### Changed @@ -112,6 +114,8 @@ - Improved quota-aware fallback behavior for Anthropic-shaped traffic by introducing shared quota metadata on routes, which lets Gate avoid blindly retrying another path that is still backed by the same exhausted Anthropic or BYOK quota domain - Clarified the Bridge release position across docs: `v1.13.0` ships the Anthropic surface as opt-in and production-usable for early adopters, but does not claim full Anthropic, Claude Code, or Claude Desktop parity yet - Aligned the doctor and bridge validation tooling with non-default live configs so release validation runs against the same configured DB, env file, and runtime instance instead of silently falling back to repo-local defaults +- Provider source alerts now distinguish more clearly between global catalog drift and key-specific route/model visibility drift +- Catalog summaries now include local route counts, local visible model counts, and route-vs-catalog mismatch hints instead of only source freshness and change counts ## v1.12.0 - 2026-03-29 diff --git a/CODE_OF_CONDUCT.md b/CODE_OF_CONDUCT.md index 333a2f7..6a696ce 100644 --- a/CODE_OF_CONDUCT.md +++ b/CODE_OF_CONDUCT.md @@ -58,3 +58,7 @@ All complaints will be reviewed and investigated promptly and fairly. This Code of Conduct is adapted from the [Contributor Covenant](https://www.contributor-covenant.org/), version 2.1, available at [https://www.contributor-covenant.org/version/2/1/code_of_conduct.html](https://www.contributor-covenant.org/version/2/1/code_of_conduct.html). + +## AI‑Generated Contributions + +Contributions generated by AI models (such as Claude, GPT, or other LLMs) are considered automated tooling and do not qualify as human contributors. While AI‑assisted code may be submitted, the human author remains solely responsible for the content and must ensure it complies with this Code of Conduct and the project's licensing terms. AI models are not listed as contributors in project attribution. diff --git a/RELEASES.md b/RELEASES.md index 6e24b43..8b229a2 100644 --- a/RELEASES.md +++ b/RELEASES.md @@ -18,7 +18,7 @@ This repo does not require a heavy release process. Use lightweight tags plus Gi 10. For hardening-heavy releases, keep the API functional tests green alongside unit and config coverage. 11. Publish the GitHub Release so [`notify-tap`](./.github/workflows/notify-tap.yml) can dispatch the Homebrew tap update automatically. 12. If the tap dispatch fails or the formula needs manual follow-up, bump `Formula/faigate.rb` in the separate [`fusionAIze/homebrew-tap`](https://github.com/fusionAIze/homebrew-tap) repo to the new release tag and update its `sha256`. -13. For Anthropic bridge releases, run the client-near validation flow in [docs/anthropic-bridge-release-readiness.md](./docs/anthropic-bridge-release-readiness.md) before tagging, and keep the feature positioned as opt-in unless a later release closes the documented parity gaps. +13. For Anthropic bridge releases, run the client-near validation flow in [docs/bridge/anthropic-bridge-release-readiness.md](./docs/bridge/anthropic-bridge-release-readiness.md) before tagging, and keep the feature positioned as opt-in unless a later release closes the documented parity gaps. ## Example diff --git a/catalog_output.json b/catalog_output.json new file mode 100644 index 0000000..5f731c0 --- /dev/null +++ b/catalog_output.json @@ -0,0 +1,207 @@ +{ + alert_count: int, + alerts: + [{ + code: string, + message: string[114], + official_source_url: string[53], + provider: string, + severity: string + }] (4) + cost_truth: + { + missing_pricing: int, + pricing_freshness: + { + aging: int, + fresh: int, + stale: int, + unknown: int + } + tracked_with_numeric_rates: int, + tracked_with_pricing: int + } + enabled: bool, + items: + [{ + auth_modes: + [ + string + ] + benchmark_cluster: string, + canonical_model: string, + catalog_age_days: int, + configured_model: string, + discovery: + { + disclosure: string[128], + disclosure_required: bool, + link_source: string, + operator_env_var: string, + resolved_url: url, + signup_url: url + } + evidence_level: string, + has_numeric_rates: bool, + lane: + { + benchmark_cluster: string, + canonical_model: string, + cluster: string, + context_strength: string, + degrade_to: + [string] (3) + family: string, + freshness_hint: string[53], + freshness_status: string, + last_reviewed: date?, + name: string, + quality_tier: string, + reasoning_strength: string, + review_age_days: int, + route_type: string, + same_model_group: string, + tool_strength: string + ... +0 more keys + } + lane_cluster: string, + lane_family: string, + lane_name: string, + last_reviewed: date?, + model_matches_recommendation: bool, + notes: string, + offer_track: string, + ... +14 more keys + }] (14) + offerings_count: int, + packages_count: int, + priority_clusters: + [{ + description: string[52], + id: string, + item_count: int, + name: string, + priority: string, + total_items: int + }] (6) + priority_next: string, + recommendation_policy: + { + disclosure: string[128], + provider_links_affect_ranking: bool, + ranking_basis: + [string] (5) + } + recommendations: + [{ + action: string[74], + cluster_id: string, + description: string, + id: string, + priority: string, + title: string + }] (3) + source_alert_summary: + { + fix_now: int, + inspect: int, + review_now: int, + severity: + { + critical: int, + info: int, + notice: int, + warning: int + } + status: string, + top_headline: string, + top_suggestion: string[146], + total: int + } + source_alerts: + [{ + action: string, + detail: string[236], + headline: string, + kind: string, + provider_id: string, + severity: string, + source_kind: string, + suggestion: string[146] + }] (8) + source_catalog: + { + alert_summary: + { + fix_now: int, + inspect: int, + review_now: int, + severity: + { + critical: int, + info: int, + notice: int, + warning: int + } + status: string, + top_headline: string, + top_suggestion: string[146], + total: int + } + alerts: + [{ + action: string, + detail: string[236], + headline: string, + kind: string, + provider_id: string, + severity: string, + source_kind: string, + suggestion: string[146] + }] (8) + due_sources: int, + error_sources: int, + items: + [{ + account_profile: + {} + billing_notes: string[137], + display_name: string, + docs_index_count: int, + last_checked_at: float, + last_error: string, + last_success_at: float, + models_count: int, + pricing_count: int, + provider_id: string, + refresh_interval_seconds: int, + sample_models: + [] + seconds_since_success: float, + status: string + }] (3) + priority_next: + { + path: string, + why: string[63] + } + recent_changes: int, + recent_events: + [{ + change_type: string, + detected_at: float, + field_name: string, + message: string, + model_id: string, + new_value: string, + old_value: string, + provider_id: string, + severity: string, + source_kind: string + }] (11) + tracked_sources: int + } + total_providers: int, + tracked_providers: int + ... +0 more keys +} diff --git a/config.yaml b/config.yaml index 63464d3..d73a071 100644 --- a/config.yaml +++ b/config.yaml @@ -303,10 +303,10 @@ llm_classifier: ' timeout_ms: 3000 metrics: - db_path: ${FAIGATE_DB_PATH:-/var/lib/faigate/faigate.db} + db_path: faigate.db enabled: true - log_requests: true - log_routing_decisions: true + log_requests: false + log_routing_decisions: false model_shortcuts: enabled: false shortcuts: @@ -871,6 +871,90 @@ providers: timeout: connect_s: 10 read_s: 90 +client_profiles: + enabled: true + default: generic + presets: ["openclaw", "n8n", "cli"] + profiles: + generic: {} + cli: + routing_mode: auto + local-only: + capability_values: + local: true + n8n: + routing_mode: eco + prefer_tiers: ["cheap", "default"] + openclaw: + routing_mode: auto + prefer_tiers: ["default", "reasoning"] + opencode: + routing_mode: auto + prefer_tiers: ["default", "mid", "high", "reasoning"] + # ── faigrid CLI integrations ────────────────────────────────────────── + claude: + routing_mode: auto + prefer_tiers: ["default", "mid", "high", "reasoning"] + codex: + routing_mode: auto + prefer_providers: ["deepseek-chat", "anthropic-haiku", "gemini-flash"] + prefer_tiers: ["default", "mid"] + deepseek-cli: + routing_mode: auto + prefer_providers: ["deepseek-chat", "deepseek-reasoner", "anthropic-haiku"] + prefer_tiers: ["default", "reasoning"] + kilocode: + routing_mode: auto + prefer_tiers: ["default", "mid", "high", "reasoning"] + gemini-cli: + routing_mode: auto + prefer_providers: ["gemini-flash", "gemini-flash-lite", "gemini-pro"] + prefer_tiers: ["cheap", "default", "mid"] + antigravity: + routing_mode: eco + prefer_providers: ["gemini-flash-lite", "gemini-flash", "gemini-pro"] + prefer_tiers: ["cheap", "default"] + rules: + - profile: opencode + match: + header_contains: + x-faigate-client: ["opencode"] + - profile: claude + match: + header_contains: + x-faigate-client: ["claude", "claude-code"] + - profile: codex + match: + header_contains: + x-faigate-client: ["codex"] + - profile: deepseek-cli + match: + header_contains: + x-faigate-client: ["deepseek-cli"] + - profile: kilocode + match: + header_contains: + x-faigate-client: ["kilocode", "kilo"] + - profile: gemini-cli + match: + header_contains: + x-faigate-client: ["gemini-cli"] + - profile: antigravity + match: + header_contains: + x-faigate-client: ["antigravity"] + - profile: openclaw + match: + header_present: ["x-openclaw-source"] + - profile: n8n + match: + header_contains: + x-faigate-client: ["n8n"] + # - profile: local-only + # match: + # header_contains: + # x-faigate-profile: ["local-only", "private"] + request_hooks: enabled: true hooks: @@ -880,9 +964,9 @@ request_hooks: - mode-override-header on_error: continue api_surfaces: - anthropic_messages: false + anthropic_messages: true anthropic_bridge: - enabled: false + enabled: true allow_claude_code_hints: true model_aliases: claude-code: auto @@ -896,6 +980,13 @@ anthropic_bridge: claude-opus-4-6[1m]: premium claude-haiku-4-5: eco claude-haiku-4-5-20251001: eco + # Claude Desktop model aliases + claude-3-5-sonnet-20241022: auto + claude-3-5-sonnet: auto + claude-3-opus-20240229: premium + claude-3-opus: premium + claude-3-haiku-20240307: eco + claude-3-haiku: eco routing_modes: default: auto enabled: true @@ -1054,7 +1145,7 @@ security: server: host: 127.0.0.1 log_level: info - port: 8090 + port: 8092 static_rules: enabled: true rules: diff --git a/docs/CONFIGURATION.md b/docs/CONFIGURATION.md index f5e73f6..c7af4c0 100644 --- a/docs/CONFIGURATION.md +++ b/docs/CONFIGURATION.md @@ -450,7 +450,10 @@ provider_source_refresh: timeout_seconds: 10.0 interval_seconds: 21600 providers: + - anthropic - blackbox + - deepseek + - google - kilo - openai ``` @@ -459,4 +462,5 @@ Notes: - startup refresh is best-effort and should not block the service if docs are unavailable - `interval_seconds` controls the conservative background refresh loop after startup - source snapshots live in the same local DB as metrics +- for providers with a usable local `models` endpoint, Gate also mirrors key-specific model visibility per configured route and compares that against the global source snapshot - local billing overlays such as subscription or quota windows belong in the local account profile layer, not in the global provider snapshot diff --git a/docs/FAIGATE-ROADMAP.md b/docs/FAIGATE-ROADMAP.md index 3b2c256..6e37ce1 100644 --- a/docs/FAIGATE-ROADMAP.md +++ b/docs/FAIGATE-ROADMAP.md @@ -2,20 +2,27 @@ ## Status -`v1.14.1` is shipped. +`v1.18.0` is shipped. Gate is no longer just a routing core with helper scripts around it. The current product baseline is now clear: - one local gateway runtime - one OpenAI-compatible surface -- one optional Anthropic-compatible bridge +- one optional Anthropic-compatible bridge (SSE streaming, tool continuity, Claude Code aliases) - direct providers, aggregators, and local workers under one routing core - an operator shell made up of dashboard, doctor, catalog, probe, and guided setup +- package renewal alerts and cost projection wizard -The roadmap should now stay disciplined. The next release lines should deepen -operator trust, routing explainability, and daily-use client confidence instead -of expanding sideways into a second platform. +### Recent Achievements (v1.15.0 - v1.18.0) +- **Anthropic bridge production-ready**: SSE streaming adapter, tool result continuity, Claude Code model ID mapping +- **Dashboard enhancements**: Package renewal alerts, cost trends CLI, uPlot charts integration +- **Operator tools**: Branch management guidelines, model shortcut alias conflict detection +- **Provider catalog live**: Local route visibility overlays, operator alert summaries + +The roadmap should now stay disciplined. The next release lines should finalize +Claude Desktop parity, then deepen operator trust through metadata truth and +routing explainability. ## Architecture Readout @@ -48,31 +55,42 @@ It does **not** mean: - hiding routing logic behind opaque UI magic - introducing hosted-only assumptions into a local-first product -## Parity Targets +## Parity Status & Targets + +### Current Parity Status (v1.18.0) -The roadmap keeps three parity goals separate. +| Capability | Anthropic Bridge | Claude Code | Claude Desktop | +|------------|------------------|-------------|----------------| +| `POST /v1/messages` non-streaming | ✅ Production-ready | ✅ Production-ready | ✅ Supported | +| SSE streaming parity | ✅ Implemented | ✅ Working | ⚠️ Needs validation | +| `tool_use` / `tool_result` continuity | ✅ Implemented | ✅ Working | ⚠️ Needs validation | +| Claude model ID aliasing | ✅ Built-in mappings | ✅ Working | ⚠️ Needs validation | +| Header/version/beta compatibility | ✅ Basic support | ✅ Working | ⚠️ Needs validation | +| Exact token counting | ⚠️ Char-based estimates | ⚠️ Estimates okay | ⚠️ Estimates okay | +| Desktop endpoint override flows | N/A | N/A | ⚠️ Needs implementation | +| Session continuity under fallback | ✅ Working | ✅ Working | ⚠️ Needs validation | -### Full Anthropic parity +### Full Anthropic parity (Target) Working definition: - `POST /v1/messages` request and response compatibility -- SSE streaming parity +- SSE streaming parity (✅ achieved) - content-block compatibility - header, version, and beta compatibility - compatible error envelopes and stop reasons -- trustworthy token-count semantics +- **trustworthy token-count semantics** (remaining gap) -### Full Claude Code parity +### Full Claude Code parity (✅ Mostly achieved) Working definition: -- daily coding sessions feel normal against local Gate -- streaming and tool flows work -- aliases and fallback do not constantly disrupt the session -- routing remains inside Gate instead of being pushed into client config +- daily coding sessions feel normal against local Gate (✅) +- streaming and tool flows work (✅) +- aliases and fallback do not constantly disrupt the session (✅) +- routing remains inside Gate instead of being pushed into client config (✅) -### Full Claude Desktop parity +### Full Claude Desktop parity (Next priority) Working definition: @@ -80,68 +98,85 @@ Working definition: - acceptable session behavior for the desktop feature set that actually matters - no recurring compatibility papercuts that keep the setup feeling experimental -## Release Sequence - -### `v1.15.x` - operator trust and metadata truth - -Primary outcome: - -- Gate becomes more trustworthy as an operator product -- dashboard, shell, and config tell the same story -- cost and catalog signals become reviewable instead of hand-wavy - -Implementation slices: - -1. cost truth and catalog freshness - - explicit tracked / stale / untracked state - - stronger provider pricing provenance - - refresh visibility in dashboard and shell -2. route and lane explainability - - why this lane - - why this route - - same-lane fallback vs downgrade - - clearer lane-family summaries -3. command bar intelligence and shell parity - - shell-backed scope suggestions - - parity between dashboard pivots and CLI/YAML terms - - safe preview/diff/apply config actions -4. shared metadata-source foundation - - fusionAIze-internal JSON metadata boundary - - reusable across Gate and future fusionAIze products only - -Success bar: - -- operators can trust the dashboard without treating it as a decorative shell -- cost and freshness signals are explainable -- route choice is easier to reason about from UI, CLI, and config - -### `v1.16.x` - adaptive routing trust - -Primary outcome: - -- richer live routing behavior without turning Gate into a black box - -Implementation slices: - -1. route pressure and cooldown visibility -2. same-lane-first adaptation before weaker downgrade paths -3. clearer route maps and trace-level route narratives -4. more explicit premium drift, fallback pressure, and quota coupling signals - -Success bar: - -- adaptation under pressure is visible and mostly unsurprising -- operators can explain route changes after the fact without reading source code - -### Later `v1.x` line - Claude Desktop parity if demand justifies it - -This should be validated by real operator demand, not assumed. - -If the client demand is real, the next parity-focused slices should cover: - -1. supported endpoint override flows -2. desktop-specific compatibility hardening -3. clearer troubleshooting and real local workflow validation +## Release Sequence (v1.19.x - v1.21.x) + +### `v1.19.x` - Claude Desktop Parity Finalization + +**Primary outcome:** +- Claude Desktop becomes a first-class client with stable local endpoint configuration +- Desktop-specific workflows work reliably without recurring compatibility issues +- Bridge hardening completes the Anthropic parity line + +**Implementation slices:** +1. **Desktop endpoint override flows** + - Stable local endpoint configuration support + - Clear troubleshooting guides for desktop setup + - Validation against real Claude Desktop workflows +2. **Bridge hardening for desktop use** + - Enhanced header/version/beta compatibility + - Session continuity validation under desktop usage patterns + - Error mapping improvements for desktop-specific error cases +3. **Desktop workflow validation** + - Real workflow testing with Claude Desktop + - Common papercut identification and fixes + - Performance and stability validation + +**Success bar:** +- Operators can configure Claude Desktop to use local Gate without recurring issues +- Desktop sessions feel stable and production-ready +- Bridge parity gaps are documented and addressed + +### `v1.20.x` - External Metadata Integration (#186) + +**Primary outcome:** +- Gate integrates with external metadata repository for provider/model/pricing truth +- Cost-aware routing uses real pricing data from trusted sources +- Operators gain visibility into pricing provenance and freshness + +**Implementation slices:** +1. **Git-based metadata sync** (Phase 2a from #186) + - External metadata repository integration + - Background update daemon (2-3 hour intervals) + - Offline fallback and cache management +2. **Model/provider/price mapping** + - Canonical model definitions with multi-provider offerings + - Pricing provenance tracking (source, timestamp, freshness) + - Router integration for price-aware routing decisions +3. **Dashboard integration** + - Cost truth visualization with source indicators + - Promotion tracking and expiration alerts + - Provider mix analytics and cost savings reporting + +**Success bar:** +- Gate uses external metadata for accurate pricing and model mappings +- Operators can trust cost reporting with clear provenance +- Routing decisions consider real prices and promotions + +### `v1.21.x` - Route Explainability & Operator Trust + +**Primary outcome:** +- Route decisions become transparent and explainable to operators +- Dashboard provides clear "why this route/why this lane" explanations +- Operators gain confidence in Gate's routing intelligence + +**Implementation slices:** +1. **Route decision explainability** + - "Why this lane / why this route" drilldowns in dashboard + - Same-lane fallback vs downgrade visual indicators + - Lane-family summary cards with decision factors +2. **Operator trust tooling** + - Route trace narratives with decision context + - Pressure and cooldown visibility in real-time + - Premium drift and fallback pressure indicators +3. **Shell parity and intelligence** + - Shell-backed scope suggestions matching dashboard + - Deep links between dashboard panels and CLI views + - Safe config preview/diff/apply workflows + +**Success bar:** +- Operators can understand and explain route decisions without reading source code +- Dashboard and shell tell the same story about routing behavior +- Route adaptation under pressure is visible and understandable ## Shared Metadata Repository Direction @@ -207,16 +242,28 @@ Recommended first delivery model: This keeps the truth source inspectable and shared, while avoiding a premature hosted control-plane dependency. -## Immediate Near-Term Order +## Immediate Near-Term Order (v1.19.x) + +1. **Claude Desktop Parity Finalization** + - Desktop endpoint override flows + - Bridge hardening for desktop usage + - Real workflow validation + +2. **External Metadata Integration** (v1.20.x) + - Git-based metadata sync implementation + - Model/provider/price mapping foundation + - Dashboard cost truth visualization -1. cost truth and catalog freshness -2. route and lane explainability -3. command bar intelligence and shell/config parity +3. **Route Explainability** (v1.21.x) + - Route decision drilldowns and explanations + - Operator trust tooling and visibility + - Shell parity and intelligent suggestions This order matters. -First make the truth source believable. Then make route choice legible. Then -add smarter operator controls on top of a clearer model. +First complete the client parity line with Claude Desktop. Then build metadata +truth for trustworthy cost routing. Finally add explainability so operators +understand and trust the routing decisions. ## Anti-Goals diff --git a/docs/anthropic-bridge.md b/docs/anthropic-bridge.md index ffe96e0..cd7cc7b 100644 --- a/docs/anthropic-bridge.md +++ b/docs/anthropic-bridge.md @@ -187,7 +187,7 @@ That broader check adds: - basic `tool_use` / `tool_result` flow shape - doctor and provider-probe output after the same config is live -For the explicit release gate, see [Anthropic Bridge Release Readiness](./anthropic-bridge-release-readiness.md). +For the explicit release gate, see [Anthropic Bridge Release Readiness](./bridge/anthropic-bridge-release-readiness.md). ## Known v1 Limits diff --git a/docs/anthropic-bridge-plan.md b/docs/bridge/anthropic-bridge-plan.md similarity index 100% rename from docs/anthropic-bridge-plan.md rename to docs/bridge/anthropic-bridge-plan.md diff --git a/docs/anthropic-bridge-release-readiness.md b/docs/bridge/anthropic-bridge-release-readiness.md similarity index 100% rename from docs/anthropic-bridge-release-readiness.md rename to docs/bridge/anthropic-bridge-release-readiness.md diff --git a/docs/examples/fusionaize-metadata-repo/products/gate/overlays.v1.json b/docs/examples/fusionaize-metadata-repo/products/gate/overlays.v1.json index 6bc6547..f13af9a 100644 --- a/docs/examples/fusionaize-metadata-repo/products/gate/overlays.v1.json +++ b/docs/examples/fusionaize-metadata-repo/products/gate/overlays.v1.json @@ -22,7 +22,11 @@ "refreshed_at": "2026-03-31T18:15:00Z", "freshness_status": "fresh", "input_cost_per_1m": 0.8, - "output_cost_per_1m": 4.0 + "output_cost_per_1m": 4.0, + "promotion": "Spring 2026 Discount", + "discount_percentage": 15.0, + "expires_at": "2026-04-30T23:59:59Z", + "promotion_source": "provider-announcement" } }, "anthropic-sonnet": { @@ -45,7 +49,11 @@ "refreshed_at": "2026-03-31T18:15:00Z", "freshness_status": "fresh", "input_cost_per_1m": 3.0, - "output_cost_per_1m": 15.0 + "output_cost_per_1m": 15.0, + "promotion": "Q1 2026 Launch Offer", + "discount_percentage": 10.0, + "expires_at": "2026-03-15T23:59:59Z", # Expired promotion + "promotion_source": "provider-announcement" } }, "gemini-pro": { diff --git a/docs/examples/fusionaize-metadata-repo/providers/catalog.v1.json b/docs/examples/fusionaize-metadata-repo/providers/catalog.v1.json index bda073b..b52443c 100644 --- a/docs/examples/fusionaize-metadata-repo/providers/catalog.v1.json +++ b/docs/examples/fusionaize-metadata-repo/providers/catalog.v1.json @@ -21,7 +21,11 @@ "source_type": "provider-docs", "source_url": "https://api-docs.deepseek.com/pricing", "refreshed_at": "2026-03-31T17:45:00Z", - "freshness_status": "fresh" + "freshness_status": "fresh", + "promotion": "introductory-discount", + "discount_percentage": 20, + "expires_at": "2026-06-30T23:59:59Z", + "promotion_source": "provider-announcement" } } } diff --git a/docs/fusionAIze-project-template.md b/docs/fusionAIze-project-template.md new file mode 100644 index 0000000..f132b9b --- /dev/null +++ b/docs/fusionAIze-project-template.md @@ -0,0 +1,254 @@ +# fusionAIze Project Template + +This template defines the professional software‑development benchmark for fusionAIze projects (Lens, Fabric, Grid, Browser, OS). It captures the tooling, automation, and quality gates established in fusionAIze Gate. + +## Core Architecture Principles + +1. **Gateway‑first architecture** – Keep the core small, focused, and portable. +2. **Clear provider boundaries** – Use client adapters, not one‑off integrations. +3. **Standard API surfaces first** – Prefer OpenAI‑compatible endpoints before custom adapters. +4. **Operational simplicity** – Avoid platform sprawl; keep failure modes visible. +5. **Local‑first, cloud‑portable** – Design for local operation with optional cloud scaling. + +## Required Tooling & Dependencies + +### Development Dependencies (pyproject.toml) + +```toml +[project.optional-dependencies] +dev = [ + "build>=1.2", + "pytest>=8.0", + "pytest-asyncio>=0.24", + "pytest-cov>=5.0", + "pytest-benchmark>=4.0.0", + "httpx", # for TestClient + "ruff>=0.8", + "twine>=6.1", + "pre-commit>=3.0", + "bandit[toml]>=1.8.0", + "jinja2>=3.1.0", +] +``` + +### Pre‑commit Configuration (.pre‑commit‑config.yaml) + +Include hooks for: +- Ruff linting and formatting +- Bandit security scanning +- Conventional‑commit validation +- File hygiene (trailing whitespace, end‑of‑file fixer, etc.) + +### Git‑cliff Configuration (.cliff.toml) + +Configure conventional‑commit parsing and automated changelog generation. + +### Coverage Configuration (.coveragerc) + +Define coverage sources, exclusions, and reporting options. + +### DevContainer Configuration (.devcontainer/devcontainer.json) + +Provide a consistent development environment with VS Code extensions and post‑create commands. + +## CI/CD Pipeline (GitHub Actions) + +### Core Jobs + +1. **Test** – Multi‑Python version testing with coverage reporting and Codecov upload. +2. **Lint** – Ruff checks, format validation, shell‑script linting, pre‑commit hooks, version‑consistency validation. +3. **Security** – Bandit scanning with HTML/JSON report artifacts. +4. **Package** – Python package build and Twine validation. +5. **Benchmarks** – Performance benchmark suite (runs on main pushes). +6. **Docs** – API documentation generation and validation. +7. **Changelog** – git‑cliff validation to ensure CHANGELOG.md is up‑to‑date. + +### Additional Workflows + +- **codeql.yml** – GitHub CodeQL security scanning. +- **repo‑safety.yml** – Repository‑hygiene enforcement (no secrets, forbidden files). +- **release‑artifacts.yml** – Release packaging for PyPI, Docker, and Homebrew. +- **publish‑dry‑run.yml** – Pre‑release validation. +- **notify‑tap.yml** – Homebrew tap integration. + +## Development Workflow + +### Onboarding + +1. Clone the repository. +2. Open in VS Code with DevContainers (recommended) or set up a local Python 3.12+ environment. +3. Run `pip install -e .[dev]` to install development dependencies. +4. Run `pre‑commit install` to install git hooks. + +### Daily Development + +- Write tests for new features. +- Run `pytest` locally before pushing. +- Use `ruff check .` and `ruff format .` to maintain code style. +- Commit messages must follow [Conventional Commits](https://www.conventionalcommits.org/). + +### Pre‑Commit Hooks + +The following hooks run automatically on `git commit`: + +- **trailing‑whitespace** – Removes trailing whitespace. +- **end‑of‑file‑fixer** – Ensures files end with a newline. +- **check‑yaml** – Validates YAML files. +- **detect‑private‑key** – Prevents accidental commits of private keys. +- **ruff** – Lints and fixes Python code. +- **ruff‑format** – Formats Python code. +- **bandit** – Runs security scanning. +- **conventional‑commits** – Validates commit messages. + +## Testing Strategy + +### Unit Tests + +- Place tests in `tests/` directory. +- Use `pytest‑asyncio` for async tests. +- Mock external dependencies (HTTP calls, file system, environment variables). + +### Coverage Requirements + +- Aim for ≥80% line coverage. +- Coverage reports are generated in CI and uploaded to Codecov. +- Exclude vendor files, assets, and test directories from coverage. + +### Performance Benchmarks + +- Place benchmark tests in `tests/benchmarks/`. +- Use `pytest‑benchmark` to track performance over time. +- Benchmarks run automatically on pushes to `main`. + +## Documentation + +### API Documentation + +- Use FastAPI’s automatic OpenAPI generation. +- Maintain a `docs/API.md` file that is auto‑generated from the OpenAPI spec. +- The CI validates that `docs/API.md` matches the current API. + +### Project Documentation + +- `README.md` – Primary landing page with badges, quick start, and navigation. +- `docs/` – Detailed architecture, integration, onboarding, and troubleshooting guides. +- `CHANGELOG.md` – Auto‑generated from git history using git‑cliff. +- `ROADMAP.md` – Project roadmap and release planning. + +## Release Process + +### Versioning + +- Use [Semantic Versioning](https://semver.org/) (`MAJOR.MINOR.PATCH`). +- Versions are tracked in `pyproject.toml` and `__init__.py`. +- CI validates that both files are in sync. + +### Release Script + +- Use `scripts/faigate‑release` (or equivalent) to prepare releases. +- The script: + - Validates version consistency. + - Updates version files. + - Updates the changelog. + - Outputs the next steps for tagging and pushing. + +### Automated Changelog + +- `git‑cliff` generates the changelog from conventional commits. +- The `changelog` CI job ensures the changelog is up‑to‑date. + +## Security & Compliance + +### Scanning Tools + +- **Bandit** – Python‑specific security issues. +- **CodeQL** – GitHub’s advanced semantic code analysis. +- **Repository safety** – Blocks commits of secrets and forbidden files. + +### Dependency Management + +- Dependabot is configured for automatic dependency updates. +- Security vulnerabilities are automatically flagged and patched. + +## Issue & PR Workflow + +### Issue Creation + +- Use GitHub Issues for all feature requests, bugs, and tasks. +- Apply labels: `roadmap:vX.Y`, `priority:high|medium|low`, `component:*`, `parity:*`. +- Reference the relevant roadmap milestone. + +### Pull Requests + +- Branch naming: `feature/`, `review/`, `hotfix/`. +- PR description must include: + - Summary of changes. + - Link to related issue(s). + - Testing performed. + - Screenshots (if UI changes). +- All CI jobs must pass before merge. +- At least one review required for non‑trivial changes. + +### Branch Management + +- `main` is always stable and release‑ready. +- Feature branches are deleted after merge. +- Use `git worktree` for parallel development contexts. + +## Monitoring & Observability + +### Health Endpoints + +- Expose a `/health` endpoint with service status, provider summary, and capability coverage. +- Include metrics for request counts, token usage, and error rates. + +### Logging + +- Use structured logging (JSON) for production deployments. +- Log levels: DEBUG (development), INFO (normal operation), WARNING (unexpected but handled), ERROR (failures). + +### Metrics + +- Expose Prometheus metrics (optional) for advanced monitoring. +- Track request latency, error rates, and provider health. + +## Optimization Opportunities + +### High Priority + +1. Test coverage reporting with `pytest‑cov`. +2. Pre‑commit hooks for code quality. +3. Security scanning with Bandit. +4. Version‑bump automation. + +### Medium Priority + +5. DevContainer configuration. +6. Performance benchmark suite. +7. API documentation automation. +8. Changelog automation with git‑cliff. + +### Low Priority + +9. Advanced monitoring (Prometheus, structured logging). +10. Multi‑environment testing (macOS, Windows). +11. Dependency license compliance. +12. Code quality metrics dashboard. + +## Template Adoption + +To apply this template to a new fusionAIze project: + +1. Copy the `.github/workflows/` directory. +2. Copy `.pre‑commit‑config.yaml`, `.cliff.toml`, `.coveragerc`, `.devcontainer/`. +3. Update `pyproject.toml` with project‑specific metadata. +4. Adjust the CI jobs as needed (e.g., remove Python multi‑version testing if not applicable). +5. Update this document with project‑specific details. + +## License + +fusionAIze projects are licensed under the Apache‑2.0 license unless otherwise specified. + +--- + +*This template is derived from the fusionAIze Gate project and serves as the benchmark for all fusionAIze repositories.* \ No newline at end of file diff --git a/docs/process/git-workflow.md b/docs/process/git-workflow.md index b0e0cbf..00f4e72 100644 --- a/docs/process/git-workflow.md +++ b/docs/process/git-workflow.md @@ -103,3 +103,70 @@ If you use multiple Git worktrees, remember that: - `origin/main` may advance before every worktree has been fast-forwarded locally Keep branch cleanup explicit when multiple tools or agents share the same repository. + +## Branch Management and Cleanup + +To prevent branch sprawl and maintain repository hygiene, follow these cleanup guidelines: + +### Active Branch Limits +- **Maximum 15 active branches** total (feature, review, hotfix combined) +- **Maximum 10 unmerged feature branches** at any time +- Delete branches immediately after merging into `main` + +### Age-Based Cleanup +- **30-day rule**: Branches older than 30 days without commits should be considered for deletion +- **Release-triggered cleanup**: During each release process, review and delete stale branches +- **Hotfix branches**: Delete within 7 days after merging, unless kept for tracking + +### Cleanup Process +1. **Before each release**, run branch audit: + ```bash + # List merged branches + git branch --merged main | grep -E "^(feature|review|hotfix)/" + + # List branches older than 30 days + git for-each-ref --sort=committerdate refs/heads/ \ + --format='%(committerdate:short) %(refname:short)' | \ + grep -E "^(feature|review|hotfix)/" + ``` + +2. **Delete merged branches**: + ```bash + # Safe deletion of merged branches + git branch --merged main | grep -E "^(feature|review|hotfix)/" | xargs -n1 git branch -d + + # Force deletion of stale unmerged branches (with caution) + git branch | grep -E "^(feature|review|hotfix)/" | \ + while read branch; do + if [ "$(git log -1 --since='30 days ago' $branch)" = "" ]; then + echo "Consider deleting stale branch: $branch" + # git branch -D "$branch" # Uncomment after review + fi + done + ``` + +3. **Clean remote-tracking references**: + ```bash + git fetch --prune + git remote prune origin + ``` + +4. **Clean up worktrees**: + ```bash + git worktree list + git worktree prune + ``` + +### Exceptions +- **Documentation branches**: Can be kept longer if actively maintained +- **Long-running feature branches**: Should be rebased regularly and justified in PR description +- **Release branches**: Hotfix branches for specific releases can be kept until next minor release + +### Automation Recommendation +Consider adding a periodic cleanup script (e.g., `scripts/branch-cleanup`) that: +- Lists stale branches +- Provides dry-run option +- Integrates with release checklist +- Logs cleanup actions for audit + +Remember: A clean repository is easier to navigate, reduces merge conflicts, and improves team productivity. diff --git a/docs/process/issue-workflow.md b/docs/process/issue-workflow.md new file mode 100644 index 0000000..935cd0e --- /dev/null +++ b/docs/process/issue-workflow.md @@ -0,0 +1,209 @@ +# GitHub Issue Workflow + +## Overview +This workflow aligns Roadmap priorities with GitHub Issues and provides a lean, predictable process for implementing features from issue creation to merge and cleanup. + +## Workflow Phases + +``` +Roadmap Priorities → GitHub Issues → Feature Branch → PR → Review → Merge → Cleanup +``` + +## 1. Issue Creation & Triage + +### From Roadmap to Issues +Every roadmap item should be converted into one or more GitHub issues before implementation. + +**Issue Creation Template:** +```bash +gh issue create --title "feat: [brief description] ([target release])" \ + --body-file - << 'EOF' +## Objective +[Clear, concise objective] + +## Context +[Link to roadmap section, related issues, current status] + +## Implementation Slices +1. [First deliverable slice] +2. [Second deliverable slice] +3. [Optional stretch goals] + +## Success Criteria +- [Measurable success criterion 1] +- [Measurable success criterion 2] + +## Related Issues +- #[number]: [Related issue title] + +## Labels +- `roadmap:vX.Y` (target release) +- `priority:[high|medium|low]` +- `component:[bridge|dashboard|metadata|router|...]` +- `parity:[desktop|anthropic|...]` (if applicable) +EOF +``` + +### Labeling Guidelines +- **`roadmap:vX.Y`**: Target release version from roadmap (e.g., `roadmap:v1.19`, `roadmap:v1.20`) +- **`priority:`**: `high` (next release), `medium` (next 1-2 releases), `low` (backlog) +- **`component:`**: Primary component affected: `bridge`, `dashboard`, `metadata`, `router`, `cli`, `docs` +- **`parity:`**: For parity-focused work: `desktop`, `anthropic`, `claude-code` +- **Standard labels**: `bug`, `documentation`, `enhancement`, `question` + +### Issue Triage Process +- **Weekly sync**: Review open issues against roadmap priorities +- **New roadmap items**: Convert to issues within 1 week of roadmap update +- **Stale issues**: Close or update issues older than 30 days without activity +- **Priority updates**: Adjust labels based on roadmap changes + +## 2. Development Phase + +### Branch Creation +Create feature branch directly from issue: +```bash +# Option 1: Using gh CLI (recommended) +gh issue develop [issue-number] --branch feature/[topic]-[date] + +# Option 2: Manual branch naming +git checkout -b feature/[component]/[brief-description]-[date] +``` + +**Branch naming conventions:** +- `feature/claude-desktop-endpoints-2026-04-01` +- `feature/dashboard-route-explainability-2026-04-01` +- `feature/metadata-git-sync-2026-04-01` + +### Commit Guidelines +- Reference issue number in commit message: `feat(bridge): exact token counting for Anthropic (#187)` +- Keep commits small and focused (1 logical change per commit) +- Write clear commit messages with "why" not just "what" +- Follow existing code style and conventions + +### Development Checklist +- [ ] Read and understand the issue requirements +- [ ] Check for related issues and dependencies +- [ ] Write tests for new functionality +- [ ] Update documentation if needed +- [ ] Run linting and tests locally before pushing + +## 3. Pull Request & Review + +### PR Creation +```bash +gh pr create --title "feat: [descriptive title]" \ + --body "Closes #[issue-number]. Implements [brief description]..." \ + --reviewer @[reviewer] \ + --label "component:[component]" \ + --assignee @[assignee] +``` + +**PR Title Format:** +- `feat: [component] [description]` +- `fix: [component] [description]` +- `docs: [description]` +- `refactor: [component] [description]` + +**PR Body Template:** +``` +## Changes +- [Bullet list of key changes] + +## Testing +- [ ] Unit tests added/updated +- [ ] Integration tests pass +- [ ] Manual testing performed [describe] + +## Documentation +- [ ] README/docs updated if needed +- [ ] Changelog entry added (for user-facing changes) + +## Related Issues +Closes #[issue-number] + +## Checklist +- [ ] Code follows project conventions +- [ ] Tests pass locally +- [ ] Linting passes (`ruff check --fix`) +- [ ] No new warnings introduced +``` + +### Review Process +**Reviewer Responsibilities:** +- Verify implementation matches issue requirements +- Check code quality and adherence to conventions +- Ensure tests are adequate and pass +- Confirm documentation is updated if needed +- Validate no breaking changes (unless intentional) + +**Author Responsibilities:** +- Address review comments promptly +- Update PR based on feedback +- Keep PR focused on the issue scope +- Rebase if needed to resolve conflicts + +**Review Labels:** +- `ready-for-review`: PR is ready for review +- `needs-changes`: PR requires updates before merge +- `approved`: PR approved for merge + +## 4. Merge & Post-Merge + +### Merge Criteria +- [ ] All tests pass (CI green) +- [ ] At least one approval +- [ ] No unresolved review comments +- [ ] Code coverage maintained or improved +- [ ] Documentation updated if needed + +### Merge Strategy +- Prefer **squash and merge** for feature branches +- Keep commit history clean and logical +- Use descriptive merge commit message referencing issue + +### Post-Merge Actions +1. **Close issue**: Automatically via PR closure ("Closes #[number]") +2. **Delete branch**: Immediately after merge (follow branch management guidelines) +3. **Update changelog**: Add entry to `CHANGELOG.md` for user-facing changes +4. **Verify deployment**: If applicable, verify changes work in target environment + +## 5. Issue & Branch Cleanup + +### Branch Management +- Delete feature branches immediately after merge +- Follow branch limits (max 15 active branches total) +- Clean up stale branches older than 30 days + +### Issue Cleanup +- Close issues when implementation complete +- Move incomplete items to new issues if scope changed +- Archive resolved issues (keep for reference) + +### Regular Maintenance +**Weekly:** +- Review open issues against roadmap +- Update priorities based on latest roadmap +- Close stale issues without activity + +**Pre-release:** +- Verify all issues for target milestone are closed or moved +- Update roadmap with completed items +- Create issues for next release priorities + +## Automation & Tools + +### GitHub Actions +- Auto-label PRs based on branch name +- Auto-close issues on merge (via "Closes #[number]") +- Weekly issue triage reminder + +### Local Scripts +Consider creating helper scripts: +- `scripts/issue-create-from-roadmap`: Convert roadmap items to issues +- `scripts/branch-cleanup`: Clean up merged/stale branches +- `scripts/pre-release-check`: Verify issue completion before release + +## Related Documents +- [Git Workflow](./git-workflow.md) - Branch management and cleanup +- [Roadmap](../FAIGATE-ROADMAP.md) - Product direction and release sequence +- [RELEASES.md](../../RELEASES.md) - Release process and versioning \ No newline at end of file diff --git a/faigate/config.py b/faigate/config.py index b68a468..689f8e5 100644 --- a/faigate/config.py +++ b/faigate/config.py @@ -1260,6 +1260,120 @@ def _normalize_routing_modes(data: dict[str, Any]) -> dict[str, Any]: return normalized +def _shortcut_alias_tokens(shortcut_name: str, spec: Any) -> list[str]: + """Return the normalized alias tokens that participate in uniqueness checks.""" + if not isinstance(shortcut_name, str): + return [] + normalized_name = shortcut_name.strip() + if not normalized_name: + return [] + + tokens = [normalized_name] + if not isinstance(spec, dict): + return tokens + + for alias in spec.get("aliases", []): + if not isinstance(alias, str): + continue + normalized_alias = alias.strip() + if normalized_alias: + tokens.append(normalized_alias) + return tokens + + +def find_model_shortcut_alias_conflicts(data: dict[str, Any]) -> list[dict[str, str]]: + """Return duplicate shortcut-alias conflicts without raising ConfigError.""" + raw = data.get("model_shortcuts", {"enabled": False, "shortcuts": {}}) + if raw in (None, "") or not isinstance(raw, dict): + return [] + + raw_shortcuts = raw.get("shortcuts", {}) + if raw_shortcuts is None or not isinstance(raw_shortcuts, dict): + return [] + + seen_aliases: dict[str, str] = {} + conflicts: list[dict[str, str]] = [] + for shortcut_name, spec in raw_shortcuts.items(): + normalized_name = str(shortcut_name).strip() + if not normalized_name: + continue + for alias in _shortcut_alias_tokens(normalized_name, spec): + owner = seen_aliases.get(alias) + if owner and owner != normalized_name: + conflicts.append( + { + "alias": alias, + "owner": owner, + "conflict": normalized_name, + } + ) + continue + seen_aliases[alias] = normalized_name + return conflicts + + +def dedupe_model_shortcut_aliases( + data: dict[str, Any], +) -> tuple[dict[str, Any], list[dict[str, str]]]: + """Drop duplicate aliases conservatively, keeping the first owner in shortcut order.""" + raw = data.get("model_shortcuts", {"enabled": False, "shortcuts": {}}) + if raw in (None, "") or not isinstance(raw, dict): + return dict(data), [] + + raw_shortcuts = raw.get("shortcuts", {}) + if raw_shortcuts is None or not isinstance(raw_shortcuts, dict): + return dict(data), [] + + normalized = dict(data) + normalized_model_shortcuts = dict(raw) + normalized_shortcuts: dict[str, Any] = {} + seen_aliases: dict[str, str] = {} + removed: list[dict[str, str]] = [] + + for shortcut_name, spec in raw_shortcuts.items(): + if not isinstance(shortcut_name, str) or not shortcut_name.strip(): + normalized_shortcuts[shortcut_name] = spec + continue + normalized_name = shortcut_name.strip() + if not isinstance(spec, dict): + normalized_shortcuts[normalized_name] = spec + continue + + shortcut_copy = dict(spec) + deduped_aliases: list[str] = [] + local_seen: set[str] = set() + for alias in spec.get("aliases", []): + if not isinstance(alias, str): + continue + normalized_alias = alias.strip() + if not normalized_alias: + continue + if normalized_alias == normalized_name or normalized_alias in local_seen: + continue + owner = seen_aliases.get(normalized_alias) + if owner and owner != normalized_name: + removed.append( + { + "alias": normalized_alias, + "owner": owner, + "conflict": normalized_name, + } + ) + continue + seen_aliases[normalized_alias] = normalized_name + deduped_aliases.append(normalized_alias) + local_seen.add(normalized_alias) + + normalized_shortcuts[normalized_name] = { + **shortcut_copy, + "aliases": deduped_aliases, + } + + normalized_model_shortcuts["shortcuts"] = normalized_shortcuts + normalized["model_shortcuts"] = normalized_model_shortcuts + return normalized, removed + + def _normalize_model_shortcuts(data: dict[str, Any]) -> dict[str, Any]: """Validate explicit shortcut names that map to concrete providers.""" raw = data.get("model_shortcuts", {"enabled": False, "shortcuts": {}}) diff --git a/faigate/dashboard_web.py b/faigate/dashboard_web.py index a5eef1c..9f75e63 100644 --- a/faigate/dashboard_web.py +++ b/faigate/dashboard_web.py @@ -2052,6 +2052,15 @@ def _inline_svg(name: str) -> str:
LayerRuleProviderLane familySelection pathRequestsCostLatency
+
+
+
+

Route decision history

+

Recent routing decisions with explanations.

+
+
+
TimeProviderModelWhy selectedAlternativesCostLatency
+
@@ -2146,7 +2155,7 @@ def _inline_svg(name: str) -> str:

Configured vs recommended model, freshness, volatility, and notes.

-
ProviderStatusConfiguredRecommendedOffer trackVolatilityReviewedWhy it matters
+
ProviderStatusConfiguredRecommendedOffer trackVolatilitySourceReviewedWhy it matters
@@ -2531,6 +2540,32 @@ def _inline_svg(name: str) -> str: suggestion: alert.recommended_model ? 'Check recommended model ' + alert.recommended_model + '.' : 'Refresh the catalog or review the source trail.', }); }); + // Package renewal alerts + const packagesSummary = bundle.stats.packages_summary || {}; + const packagesDetail = bundle.stats.packages_detail || []; + if (packagesSummary.expiring_soon > 0) { + alerts.push({ + level: 'warning', + headline: packagesSummary.expiring_soon + ' package' + (packagesSummary.expiring_soon === 1 ? '' : 's') + ' expiring soon', + detail: 'Package credits will expire within 7 days.', + suggestion: 'Review packages and consider renewal before expiry.', + }); + } + if (packagesDetail.length > 0) { + const lowCreditPackages = packagesDetail.filter(pkg => { + const remaining = pkg.remaining_credits; + const total = pkg.total_credits; + return remaining !== null && total !== null && total > 0 && remaining / total < 0.1; + }); + if (lowCreditPackages.length > 0) { + alerts.push({ + level: 'warning', + headline: lowCreditPackages.length + ' package' + (lowCreditPackages.length === 1 ? '' : 's') + ' running low on credits', + detail: 'Packages have less than 10% credits remaining.', + suggestion: 'Monitor usage or purchase additional credits.', + }); + } + } const unhealthy = (bundle.inventory.providers || []).filter(row => row.healthy === false); if (unhealthy.length) { const top = unhealthy[0]; @@ -2664,6 +2699,8 @@ def _inline_svg(name: str) -> str: latestBundle = bundle; const totals = bundle.stats.totals || {}; const providers = bundle.inventory.providers || []; + const packagesSummary = bundle.stats.packages_summary || {}; + const packagesDetail = bundle.stats.packages_detail || []; const providerMetrics = Object.fromEntries((bundle.stats.providers || []).map(row => [row.provider, row])); const clientTotals = bundle.stats.client_totals || []; const routing = bundle.stats.routing || []; @@ -2702,6 +2739,7 @@ def _inline_svg(name: str) -> str: }); const sortedClients = [...clientTotals].sort((a, b) => (Number(b.cost_usd || 0) - Number(a.cost_usd || 0)) || (Number(b.failures || 0) - Number(a.failures || 0)) || (Number(b.requests || 0) - Number(a.requests || 0))); const sortedRouting = [...routing].sort((a, b) => (Number(b.cost_usd || 0) - Number(a.cost_usd || 0)) || (Number(b.requests || 0) - Number(a.requests || 0))); + const sortedTraces = [...traces].sort((a, b) => (b.timestamp || 0) - (a.timestamp || 0)); let primaryAction = {target: 'providers', label: 'Open providers'}; let secondaryAction = {target: 'routes', label: 'Inspect routes'}; @@ -2737,6 +2775,7 @@ def _inline_svg(name: str) -> str: ['Top lane family', laneFamilies.length ? (laneFamilies[0].lane_family || 'unclassified') : '—'], ['Top cost client', topCost ? (topCost.client_tag || topCost.client_profile || 'generic') : '—'], ['Catalog due', String(sourceCatalog.due_sources || 0)], + ['Packages', String(packagesSummary.total || 0) + (packagesSummary.expiring_soon > 0 ? ' (' + String(packagesSummary.expiring_soon) + ' expiring)' : '')], ].map(([label, value]) => `
${esc(label)}
${esc(value)}
`).join(''); $('#overview-cards').innerHTML = [ @@ -2747,6 +2786,7 @@ def _inline_svg(name: str) -> str: {kicker:'Estimated spend', value:fmtUsd(totals.total_cost_usd || 0), detail:fmtTok((totals.total_prompt_tokens || 0) + (totals.total_compl_tokens || 0)) + ' tokens', tone:'orange'}, {kicker:'Avg latency', value:fmtMs(totals.avg_latency_ms || 0), detail:'Last request ' + ago(totals.last_request), tone:'green'}, {kicker:'Catalog drift', value:String(bundle.catalog.alert_count || 0), detail:String(sourceCatalog.due_sources || 0) + ' reviews due', tone:'orange'}, + {kicker:'Packages', value:String(packagesSummary.total || 0), detail:String(packagesSummary.expiring_soon || 0) + ' expiring soon', tone:packagesSummary.expiring_soon > 0 ? 'warning' : 'blue'}, {kicker:'Top client', value:esc(bundle.stats.client_highlights && bundle.stats.client_highlights.top_requests ? (bundle.stats.client_highlights.top_requests.client_tag || bundle.stats.client_highlights.top_requests.client_profile || 'generic') : '—'), detail:'Highest request volume', tone:'lime'}, ].map(metricCard).join(''); @@ -2885,6 +2925,18 @@ def _inline_svg(name: str) -> str: `).join('') : tableEmpty(8, 'No routing rows in this scope', 'Clear filters or switch to All traffic.'); + $('#route-decisions-table tbody').innerHTML = sortedTraces.length ? sortedTraces.map(row => ` + + ${esc(ago(row.timestamp || 0))} + ${esc(row.provider || '—')} + ${esc(row.model || '—')} + ${esc((row.route_summary?.why_selected || []).slice(0, 2).join(', ') || '—')} + ${esc((row.route_summary?.alternatives || []).length ? `${row.route_summary.alternatives.length} alternatives` : '—')} + ${fmtUsd(row.cost_usd || 0)} + ${fmtMs(row.latency_ms || 0)} + + `).join('') : tableEmpty(7, 'No recent route decisions in this scope', 'Clear filters or wait for requests.'); + const analyticsDailyLabels = (bundle.stats.daily || []).map(row => row.day || ''); const analyticsHourlyLabels = (bundle.stats.hourly || []).map(row => row.hour_offset || ''); $('#analytics-kpis').innerHTML = [ @@ -3013,10 +3065,11 @@ def _inline_svg(name: str) -> str: ${esc(row.recommended_model || '—')} ${esc(row.offer_track || '—')} ${esc(row.volatility || '—')} + ${esc(row.pricing?.source_type || '—')} ${esc(row.last_reviewed || '—')} ${esc(row.notes || ((row.model_matches_recommendation === false) ? 'Configured model differs from the curated recommendation.' : 'Catalog guidance is aligned.')).slice(0, 180)} - `).join('') : tableEmpty(8, 'No tracked provider assumptions in this scope', 'Widen the scope or check whether provider catalog coverage is enabled.'); + `).join('') : tableEmpty(9, 'No tracked provider assumptions in this scope', 'Widen the scope or check whether provider catalog coverage is enabled.'); $('#integrations-kpis').innerHTML = [ {kicker:'Claude-ready', value:(readiness.providers_ready || 0) ? 'Yes' : 'No', detail:'Anthropic endpoint reachable', tone:(readiness.providers_ready || 0) ? 'green' : 'orange'}, diff --git a/faigate/main.py b/faigate/main.py index e6ee84e..57a1996 100644 --- a/faigate/main.py +++ b/faigate/main.py @@ -40,7 +40,9 @@ ) from .canonical import CanonicalChatRequest, CanonicalChatResponse, CanonicalResponseMessage from .config import Config, load_config +from .dashboard import _metadata_catalogs_summary, _metadata_packages_detail from .dashboard_web import DASHBOARD_HTML +from .dashboard import _metadata_catalogs_summary, _metadata_packages_detail from .hooks import ( AppliedHooks, HookExecutionError, @@ -51,6 +53,10 @@ ) from .lane_registry import get_provider_lane_binding, get_route_add_recommendations from .metrics import MetricsStore, calc_cost +from .provider_availability import ( + record_availability_from_config, + refresh_local_model_availability, +) from .provider_catalog import ( build_provider_catalog_report, build_provider_discovery_view, @@ -88,6 +94,10 @@ _provider_catalog_refresh_task: asyncio.Task[None] | None = None +def _provider_catalog_config_path() -> str: + return str(os.environ.get("FAIGATE_CONFIG_FILE") or "config.yaml") + + class PayloadTooLargeError(ValueError): """Raised when one request or upload exceeds configured size limits.""" @@ -389,6 +399,19 @@ async def _refresh_provider_source_catalog(*, force: bool = False) -> list[dict[ provider_ids=target_ids, timeout_seconds=float(source_refresh_cfg.get("timeout_seconds") or 10.0), ) + await asyncio.to_thread( + record_availability_from_config, + _provider_catalog_store, + config_path=_provider_catalog_config_path(), + health_payload={"providers": {item["name"]: item for item in _build_provider_inventory()}}, + ) + await asyncio.to_thread( + refresh_local_model_availability, + _provider_catalog_store, + config_path=_provider_catalog_config_path(), + provider_ids=target_ids, + timeout_seconds=float(source_refresh_cfg.get("timeout_seconds") or 10.0), + ) ok_count = sum(1 for item in refresh_results if item.ok) logger.info( "Provider source refresh completed: %s/%s source endpoints succeeded (%s)", @@ -1782,6 +1805,7 @@ async def _execute_chat_completion_body( attempt_order=attempt_order, ), attempt_order=attempt_order, + route_summary=_build_route_summary(decision), ) trace_id = str(row_id) if row_id is not None else str(uuid.uuid4()) @@ -1839,6 +1863,7 @@ async def _execute_chat_completion_body( attempt_order=attempt_order, ), attempt_order=attempt_order, + route_summary=_build_route_summary(decision), ) continue @@ -2367,6 +2392,12 @@ async def provider_catalog(): "priority_next": {}, } if _provider_catalog_store is not None: + await asyncio.to_thread( + record_availability_from_config, + _provider_catalog_store, + config_path=_provider_catalog_config_path(), + health_payload={"providers": {item["name"]: item for item in _build_provider_inventory()}}, + ) source_catalog = build_catalog_summary( _provider_catalog_store, provider_ids=list(_config.provider_source_refresh.get("providers") or []), @@ -2396,6 +2427,101 @@ async def provider_discovery( ) +@app.get("/api/analytics/provider-mix") +async def provider_mix_analytics(): + """Analyze provider mix for cost savings opportunities.""" + from .lane_registry import get_canonical_model_catalog, get_provider_lane_binding + from .provider_catalog import _get_pricing_for_provider_and_model + # Health check uses global _providers + + canonical_catalog = get_canonical_model_catalog() + analytics = [] + + for canonical_model, model_info in canonical_catalog.items(): + providers_for_model = [] + + # Find all providers that serve this canonical model + for provider_name, provider_config in _config.providers.items(): + lane = dict(provider_config.get("lane") or get_provider_lane_binding(provider_name)) + if lane.get("canonical_model") == canonical_model: + # Get pricing for this provider + pricing = _get_pricing_for_provider_and_model(provider_name, canonical_model) + if not pricing: + continue + + # Calculate estimated cost per 1k tokens (input + output) + input_rate = float(pricing.get("input", 0) or 0) + output_rate = float(pricing.get("output", 0) or 0) + cost_per_1k = (input_rate + output_rate) / 1000 # Convert from per 1M to per 1K + + # Check provider health + health = {} + if provider_name in _providers: + health = _providers[provider_name].health.to_dict() + + providers_for_model.append( + { + "provider": provider_name, + "cost_per_1k_tokens": round(cost_per_1k, 6), + "input_rate": input_rate, + "output_rate": output_rate, + "healthy": health.get("healthy", False), + "pricing_source": pricing.get("source_type", "unknown"), + "freshness_status": pricing.get("freshness_status", "unknown"), + "promotion": pricing.get("promotion"), + "discount_percentage": pricing.get("discount_percentage"), + "expires_at": pricing.get("expires_at"), + } + ) + + if len(providers_for_model) < 2: + continue # Need at least 2 providers for comparison + + # Sort by cost + sorted_providers = sorted(providers_for_model, key=lambda x: x["cost_per_1k_tokens"]) + cheapest = sorted_providers[0] + most_expensive = sorted_providers[-1] + + # Calculate potential savings + if most_expensive["cost_per_1k_tokens"] > 0: + savings_percent = ( + (most_expensive["cost_per_1k_tokens"] - cheapest["cost_per_1k_tokens"]) + / most_expensive["cost_per_1k_tokens"] + * 100 + ) + else: + savings_percent = 0 + + analytics.append( + { + "canonical_model": canonical_model, + "model_label": model_info.get("label", canonical_model), + "provider_count": len(providers_for_model), + "providers": providers_for_model, + "cheapest_provider": cheapest["provider"], + "cheapest_cost_per_1k": cheapest["cost_per_1k_tokens"], + "most_expensive_provider": most_expensive["provider"], + "most_expensive_cost_per_1k": most_expensive["cost_per_1k_tokens"], + "potential_savings_percent": round(savings_percent, 1), + "potential_savings_per_1k": round( + most_expensive["cost_per_1k_tokens"] - cheapest["cost_per_1k_tokens"], 6 + ), + "recommendation": f"Use {cheapest['provider']} instead of {most_expensive['provider']} for {round(savings_percent, 1)}% savings" + if savings_percent > 5 + else "Cost differences are minimal", + } + ) + + # Sort by potential savings (descending) + analytics.sort(key=lambda x: x["potential_savings_percent"], reverse=True) + + return { + "total_opportunities": len(analytics), + "total_savings_percent_avg": sum(a["potential_savings_percent"] for a in analytics) / max(1, len(analytics)), + "analytics": analytics, + } + + @app.get("/v1/models") async def list_models(): """OpenAI-compatible model listing.""" @@ -2492,6 +2618,8 @@ async def stats( "operator_actions": _metrics.get_operator_breakdown(**operator_filters), "hourly": _metrics.get_hourly_series(24), "daily": _metrics.get_daily_totals(30), + "packages_summary": _metadata_catalogs_summary()["packages"], + "packages_detail": _metadata_packages_detail(), } @@ -2831,6 +2959,7 @@ async def image_generations(request: Request): attempt_order=attempt_order, ), attempt_order=attempt_order, + route_summary=_build_route_summary(decision), ) trace_id = str(row_id) if row_id is not None else str(uuid.uuid4()) @@ -2875,6 +3004,7 @@ async def image_generations(request: Request): attempt_order=attempt_order, ), attempt_order=attempt_order, + route_summary=_build_route_summary(decision), ) return JSONResponse( @@ -2979,6 +3109,7 @@ async def image_edits(request: Request): attempt_order=attempt_order, ), attempt_order=attempt_order, + route_summary=_build_route_summary(decision), ) trace_id = str(row_id) if row_id is not None else str(uuid.uuid4()) diff --git a/faigate/metrics.py b/faigate/metrics.py index 901e6f6..f35154d 100644 --- a/faigate/metrics.py +++ b/faigate/metrics.py @@ -102,6 +102,7 @@ def calc_cost( "last_recovered_issue_type": "TEXT DEFAULT ''", "decision_details": "TEXT DEFAULT '{}'", "attempt_order": "TEXT DEFAULT '[]'", + "route_summary": "TEXT DEFAULT '{}'", } @@ -169,6 +170,7 @@ def log_request( last_recovered_issue_type: str = "", decision_details: dict[str, Any] | None = None, attempt_order: list[str] | None = None, + route_summary: dict[str, Any] | None = None, ) -> int | None: if not self._conn: return None @@ -181,8 +183,8 @@ def log_request( requested_model,modality,client_profile,client_tag, decision_reason,confidence,canonical_model,lane_family,route_type,lane_cluster, selection_path,runtime_window_state,recovered_recently,last_recovered_issue_type, - decision_details,attempt_order) - VALUES (?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?)""", + decision_details,attempt_order,route_summary) + VALUES (?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?)""", ( time.time(), provider, @@ -213,6 +215,7 @@ def log_request( last_recovered_issue_type, json.dumps(decision_details or {}, sort_keys=True), json.dumps(attempt_order or []), + json.dumps(route_summary or {}, sort_keys=True), ), ) self._conn.commit() @@ -495,6 +498,12 @@ def get_recent(self, limit: int = 50, **filters: Any) -> list[dict]: row["decision_details"] = json.loads(decision_details) except json.JSONDecodeError: row["decision_details"] = {} + route_summary = row.get("route_summary") + if isinstance(route_summary, str) and route_summary: + try: + row["route_summary"] = json.loads(route_summary) + except json.JSONDecodeError: + row["route_summary"] = {} return rows def get_totals(self, **filters: Any) -> dict: diff --git a/faigate/provider_availability.py b/faigate/provider_availability.py index c59d07b..eb7cf6d 100644 --- a/faigate/provider_availability.py +++ b/faigate/provider_availability.py @@ -3,28 +3,181 @@ from __future__ import annotations import json -from typing import Any +from typing import Any, Protocol + +import httpx from .config import load_config from .provider_catalog_store import ProviderCatalogStore +from .provider_sources import get_provider_source, resolve_provider_source_id + + +class JsonFetcher(Protocol): + """Protocol for fetching provider models-endpoint payloads.""" + + def fetch_json( + self, + url: str, + *, + headers: dict[str, str], + timeout_seconds: float, + ) -> dict[str, Any]: ... + + +class HttpxJsonFetcher: + """Default JSON fetcher for provider models endpoints.""" + + def fetch_json( + self, + url: str, + *, + headers: dict[str, str], + timeout_seconds: float, + ) -> dict[str, Any]: + timeout = httpx.Timeout(timeout_seconds, connect=min(timeout_seconds, 5.0)) + with httpx.Client(timeout=timeout) as client: + response = client.get(url, headers=headers, follow_redirects=True) + response.raise_for_status() + return dict(response.json() or {}) + + +def _request_readiness_from_health( + health_payload: dict[str, Any] | None, + route_name: str, +) -> dict[str, Any]: + providers = dict((health_payload or {}).get("providers") or {}) + return dict((providers.get(route_name) or {}).get("request_readiness") or {}) + + +def _configured_provider_targets(config_path: str) -> list[dict[str, Any]]: + config = load_config(config_path) + targets: list[dict[str, Any]] = [] + for provider_name, provider in sorted(config.providers.items()): + targets.append( + { + "provider_name": provider_name, + "provider_id": resolve_provider_source_id(provider_name, provider), + "provider": provider, + } + ) + return targets + + +def _join_base_url(base_url: str, path: str) -> str: + base = str(base_url or "").rstrip("/") + suffix = str(path or "").strip() + if not base or not suffix: + return "" + if not suffix.startswith("/"): + suffix = "/" + suffix + if suffix.startswith("/v1/") and base.endswith("/v1"): + return base + suffix[len("/v1") :] + if base.endswith(suffix): + return base + return base + suffix + + +def _parse_models_payload(payload: dict[str, Any]) -> list[str]: + rows = payload.get("data") + if rows is None and isinstance(payload.get("models"), list): + rows = payload.get("models") + if rows is None and isinstance(payload.get("items"), list): + rows = payload.get("items") + if not isinstance(rows, list): + return [] + + visible_models: list[str] = [] + seen: set[str] = set() + for row in rows: + if isinstance(row, str): + token = row.strip() + elif isinstance(row, dict): + token = str( + row.get("id") or row.get("name") or row.get("model") or "" + ).strip() + else: + token = "" + if not token or token in seen: + continue + seen.add(token) + visible_models.append(token) + return sorted(visible_models) + + +def record_availability_from_config( + store: ProviderCatalogStore, + *, + config_path: str, + health_payload: dict[str, Any] | None, +) -> list[dict[str, Any]]: + """Persist one route-state snapshot per configured provider route.""" + rows: list[dict[str, Any]] = [] + for target in _configured_provider_targets(config_path): + provider_name = str(target["provider_name"]) + provider_id = str(target["provider_id"]) + provider = dict(target["provider"] or {}) + readiness = _request_readiness_from_health(health_payload, provider_name) + source = get_provider_source(provider_id) + ready = bool(readiness.get("ready")) + store.record_availability_snapshot( + provider_id, + provider_name, + source_name="route-state", + model_id=str(provider.get("model") or ""), + available_for_key=ready, + request_ready=ready, + verified_via=str(readiness.get("verified_via") or "health"), + last_issue_type=str(readiness.get("runtime_issue_type") or ""), + metadata={ + "status": readiness.get("status"), + "reason": readiness.get("reason"), + "compatibility": readiness.get("compatibility"), + "profile": readiness.get("profile"), + "base_url": str(provider.get("base_url") or ""), + "backend": str(provider.get("backend") or ""), + "catalog_provider_id": provider_id, + "supports_models_endpoint": bool( + (source.get("availability") or {}).get("supports_models_endpoint") + ), + }, + ) + rows.append( + { + "provider_id": provider_id, + "route_name": provider_name, + "model_id": str(provider.get("model") or ""), + "request_ready": ready, + "status": str(readiness.get("status") or ""), + } + ) + return rows def record_availability_from_health( store: ProviderCatalogStore, *, + config_path: str | None = None, health_payload: dict[str, Any] | None, ) -> list[dict[str, Any]]: - """Persist a light local availability overlay from the live /health payload.""" + """Persist a local availability overlay from the live /health payload.""" if not health_payload: return [] + if config_path: + return record_availability_from_config( + store, + config_path=config_path, + health_payload=health_payload, + ) + rows: list[dict[str, Any]] = [] for route_name, payload in sorted((health_payload.get("providers") or {}).items()): request_readiness = dict(payload.get("request_readiness") or {}) lane = dict(payload.get("lane") or {}) - provider_id = str(lane.get("family") or route_name.split("-", 1)[0] or route_name) + provider_id = resolve_provider_source_id(route_name, {"lane": lane}) store.record_availability_snapshot( provider_id, route_name, + source_name="route-state", model_id=str(payload.get("model") or ""), available_for_key=bool(request_readiness.get("ready")), request_ready=bool(request_readiness.get("ready")), @@ -49,6 +202,218 @@ def record_availability_from_health( return rows +def refresh_local_model_availability( + store: ProviderCatalogStore, + *, + config_path: str, + provider_ids: list[str] | None = None, + fetcher: JsonFetcher | None = None, + timeout_seconds: float = 10.0, +) -> list[dict[str, Any]]: + """Refresh local models-endpoint visibility for configured routes.""" + fetcher = fetcher or HttpxJsonFetcher() + allowed_provider_ids = set(provider_ids or []) + results: list[dict[str, Any]] = [] + + for target in _configured_provider_targets(config_path): + provider_name = str(target["provider_name"]) + provider_id = str(target["provider_id"]) + if allowed_provider_ids and provider_id not in allowed_provider_ids: + continue + + source = get_provider_source(provider_id) + availability = dict(source.get("availability") or {}) + if not availability.get("supports_models_endpoint"): + continue + + provider = dict(target["provider"] or {}) + base_url = str(provider.get("base_url") or "").strip() + api_key = str(provider.get("api_key") or "").strip() + if not base_url or not api_key: + continue + + configured_model = str(provider.get("model") or "").strip() + models_paths = list(availability.get("models_paths") or []) + visible_models: list[str] = [] + resolved_url = "" + last_error = "" + + for models_path in models_paths: + resolved_url = _join_base_url(base_url, str(models_path)) + if not resolved_url: + continue + try: + payload = fetcher.fetch_json( + resolved_url, + headers={ + "Authorization": f"Bearer {api_key}", + "Accept": "application/json", + }, + timeout_seconds=timeout_seconds, + ) + visible_models = _parse_models_payload(payload) + if visible_models: + last_error = "" + break + last_error = "empty models payload" + except Exception as exc: # pragma: no cover - defensive runtime path + last_error = str(exc) + + available_for_key = bool( + configured_model and configured_model in visible_models + ) + last_issue_type = "" + if configured_model and visible_models and not available_for_key: + last_issue_type = "model-unavailable" + elif last_error: + last_issue_type = "models-endpoint-error" + + store.record_availability_snapshot( + provider_id, + provider_name, + source_name="models-endpoint", + model_id=configured_model, + available_for_key=available_for_key, + request_ready=available_for_key, + verified_via=resolved_url or "models-endpoint", + last_issue_type=last_issue_type, + metadata={ + "catalog_provider_id": provider_id, + "base_url": base_url, + "models_endpoint_url": resolved_url, + "visible_models": visible_models, + "visible_model_count": len(visible_models), + "last_error": last_error, + }, + ) + results.append( + { + "provider_id": provider_id, + "route_name": provider_name, + "model_id": configured_model, + "available_for_key": available_for_key, + "visible_model_count": len(visible_models), + "last_error": last_error, + } + ) + return results + + +def build_provider_availability_overlay( + store: ProviderCatalogStore, + *, + provider_id: str, + global_model_ids: set[str] | None = None, + global_free_model_ids: set[str] | None = None, +) -> dict[str, Any]: + """Compare local route and key visibility against global catalog data.""" + route_rows = store.get_latest_availability( + provider_id=provider_id, + source_name="route-state", + ) + endpoint_rows = store.get_latest_availability( + provider_id=provider_id, + source_name="models-endpoint", + ) + endpoint_by_route = {str(row.get("route_name") or ""): row for row in endpoint_rows} + visible_models: set[str] = set() + key_model_mismatches: list[dict[str, Any]] = [] + + for row in endpoint_rows: + metadata = dict(row.get("metadata") or {}) + route_visible_models = { + str(item).strip() + for item in list(metadata.get("visible_models") or []) + if str(item).strip() + } + visible_models.update(route_visible_models) + configured_model = str(row.get("model_id") or "") + if ( + configured_model + and route_visible_models + and configured_model not in route_visible_models + ): + key_model_mismatches.append( + { + "route_name": str(row.get("route_name") or ""), + "model_id": configured_model, + "visible_model_count": len(route_visible_models), + } + ) + + configured_models = { + str(row.get("model_id") or "").strip() + for row in route_rows + if str(row.get("model_id") or "").strip() + } + global_models = set(global_model_ids or set()) + global_free_models = set(global_free_model_ids or set()) + + configured_models_missing_globally = sorted( + model_id + for model_id in configured_models + if global_models and model_id not in global_models + ) + local_only_models = sorted( + model_id + for model_id in visible_models + if global_models and model_id not in global_models + ) + free_models_missing_locally = sorted( + model_id + for model_id in global_free_models + if visible_models and model_id not in visible_models + ) + + status = "clear" + if key_model_mismatches: + status = "intervention-needed" + elif configured_models_missing_globally or free_models_missing_locally: + status = "review-needed" + elif local_only_models: + status = "informational" + + route_details: list[dict[str, Any]] = [] + for row in route_rows: + endpoint_row = endpoint_by_route.get(str(row.get("route_name") or "")) + endpoint_meta = dict((endpoint_row or {}).get("metadata") or {}) + route_meta = dict(row.get("metadata") or {}) + route_details.append( + { + "route_name": str(row.get("route_name") or ""), + "model_id": str(row.get("model_id") or ""), + "request_ready": bool(row.get("request_ready")), + "status": str(route_meta.get("status") or ""), + "available_for_key": bool( + (endpoint_row or {}).get("available_for_key") + ), + "visible_model_count": int( + endpoint_meta.get("visible_model_count") or 0 + ), + "models_endpoint_error": str(endpoint_meta.get("last_error") or ""), + } + ) + + return { + "status": status, + "local_routes": len(route_rows), + "request_ready_routes": sum( + 1 for row in route_rows if row.get("request_ready") + ), + "models_endpoint_routes": len(endpoint_rows), + "visible_model_count": len(visible_models), + "visible_models": sorted(visible_models), + "configured_models": sorted(configured_models), + "configured_models_missing_globally": configured_models_missing_globally, + "key_model_mismatches": key_model_mismatches, + "local_only_models": local_only_models, + "global_free_models": sorted(global_free_models), + "free_models_visible_locally": len(global_free_models & visible_models), + "free_models_missing_locally": free_models_missing_locally, + "route_details": route_details, + } + + def load_health_payload(raw: str) -> dict[str, Any] | None: """Decode a serialized /health payload from a script environment.""" token = str(raw or "").strip() @@ -58,11 +423,10 @@ def load_health_payload(raw: str) -> dict[str, Any] | None: def configured_provider_families(config_path: str) -> dict[str, list[str]]: - """Return configured provider names grouped by family-ish prefix.""" - config = load_config(config_path) + """Return configured provider names grouped by source-catalog family.""" rows: dict[str, list[str]] = {} - for provider_name, provider in sorted(config.providers.items()): - lane = dict(provider.get("lane") or {}) - family = str(lane.get("family") or provider_name.split("-", 1)[0] or "unknown") - rows.setdefault(family, []).append(provider_name) + for target in _configured_provider_targets(config_path): + rows.setdefault(str(target["provider_id"] or "unknown"), []).append( + str(target["provider_name"]) + ) return rows diff --git a/faigate/provider_catalog.py b/faigate/provider_catalog.py index 0fe2292..eee11d3 100644 --- a/faigate/provider_catalog.py +++ b/faigate/provider_catalog.py @@ -18,7 +18,7 @@ import logging import os import re -from datetime import date +from datetime import date, datetime from pathlib import Path from typing import Any @@ -931,6 +931,47 @@ def _alert( return payload +def _check_promotion_expiry(pricing: dict[str, Any], provider: str) -> dict[str, Any] | None: + """Check if a promotion is about to expire and return an alert if needed.""" + expires_at = pricing.get("expires_at") + if not expires_at: + return None + try: + expiry = datetime.fromisoformat(expires_at.replace("Z", "+00:00")) + now = datetime.now(expiry.tzinfo) if expiry.tzinfo else datetime.now() + days_left = (expiry - now).days + if days_left < 0: + return _alert( + provider=provider, + severity="notice", + code="promotion-expired", + message=( + f"Promotion '{pricing.get('promotion', 'unknown')}' for provider '{provider}' " + f"expired {abs(days_left)} days ago." + ), + promotion=pricing.get("promotion"), + expires_at=expires_at, + days_overdue=abs(days_left), + ) + elif days_left <= 7: + return _alert( + provider=provider, + severity="notice", + code="promotion-expiring-soon", + message=( + f"Promotion '{pricing.get('promotion', 'unknown')}' for provider '{provider}' " + f"expires in {days_left} days." + ), + promotion=pricing.get("promotion"), + expires_at=expires_at, + days_left=days_left, + ) + except (ValueError, TypeError): + # If date parsing fails, ignore + pass + return None + + def _tracked_item( provider_name: str, provider: dict[str, Any], @@ -1098,6 +1139,13 @@ def build_provider_catalog_report(config: Config) -> dict[str, Any]: ) ) + # Promotion expiry check + if check_cfg.get("enabled") and item.get("pricing_available"): + pricing = item.get("pricing", {}) + promotion_alert = _check_promotion_expiry(pricing, provider_name) + if promotion_alert: + alerts.append(promotion_alert) + # Calculate cost truth statistics cost_truth_stats = { "tracked_with_pricing": 0, diff --git a/faigate/provider_catalog_refresh.py b/faigate/provider_catalog_refresh.py index 39275cb..2601bae 100644 --- a/faigate/provider_catalog_refresh.py +++ b/faigate/provider_catalog_refresh.py @@ -10,6 +10,7 @@ import httpx +from .provider_availability import build_provider_availability_overlay from .provider_catalog_store import ProviderCatalogStore from .provider_sources import list_provider_sources @@ -52,7 +53,10 @@ class RefreshResult: def _source_due_severity(item: dict[str, Any]) -> str: """Escalate overdue source drift when it has lingered well past refresh cadence.""" - refresh_interval_seconds = max(int(item.get("refresh_interval_seconds") or 21600), 1) + refresh_interval_seconds = max( + int(item.get("refresh_interval_seconds") or 21600), + 1, + ) seconds_since_success = item.get("seconds_since_success") last_success_at = float(item.get("last_success_at") or 0.0) @@ -92,18 +96,30 @@ def _source_refresh_suggestion(item: dict[str, Any]) -> str: f"--provider {provider_id} and verify the source URL, parser, or " "auth assumptions before trusting catalog data here." ) - return f"Refresh {provider_id} before relying on older model, pricing, or free-tier assumptions." + return ( + f"Refresh {provider_id} before relying on older model, pricing, " + "or free-tier assumptions." + ) def _catalog_change_suggestion(event: dict[str, Any]) -> str: change_type = str(event.get("change_type") or "") provider_id = str(event.get("provider_id") or "provider") if change_type == "model-removed": - return f"Review configured model ids and fallback mirrors for {provider_id}; one catalog entry disappeared." + return ( + f"Review configured model ids and fallback mirrors for {provider_id}; " + "one catalog entry disappeared." + ) if change_type == "field-changed": - return f"Recheck pricing, context, and routing weights for {provider_id}; a tracked field changed." + return ( + f"Recheck pricing, context, and routing weights for {provider_id}; " + "a tracked field changed." + ) if change_type == "model-added": - return f"Review whether the newly listed {provider_id} model belongs in route additions or scenarios." + return ( + f"Review whether the newly listed {provider_id} model belongs in " + "route additions or scenarios." + ) return f"Review recent provider catalog changes for {provider_id}." @@ -117,6 +133,7 @@ def build_catalog_alerts( for item in list(summary.get("items") or []): provider_id = str(item.get("provider_id") or "") status = str(item.get("status") or "") + local_availability = dict(item.get("local_availability") or {}) if status == "error": action = _catalog_alert_action( kind="source-refresh-error", @@ -163,6 +180,114 @@ def build_catalog_alerts( "source_kind": "source", } ) + if list(local_availability.get("key_model_mismatches") or []): + mismatches = list(local_availability.get("key_model_mismatches") or []) + mismatch = mismatches[0] + alerts.append( + { + "kind": "local-model-availability", + "severity": "warning", + "action": "fix-now", + "provider_id": provider_id, + "headline": ( + f"Configured route model not visible for local " + f"{provider_id} key" + ), + "detail": ( + f"{mismatch.get('route_name')} expects " + f"{mismatch.get('model_id')}, but the latest local " + f"models endpoint did not list it " + f"({mismatch.get('visible_model_count')} visible models)." + ), + "suggestion": ( + "Verify the configured model id and local key for " + f"{mismatch.get('route_name')} " + "before trusting this route as request-ready." + ), + "source_kind": "local-availability", + } + ) + if list(local_availability.get("configured_models_missing_globally") or []): + missing_model = str( + local_availability["configured_models_missing_globally"][0] + ) + alerts.append( + { + "kind": "catalog-route-mismatch", + "severity": "warning", + "action": "review-now", + "provider_id": provider_id, + "headline": ( + f"Configured {provider_id} model missing from mirrored " + "global catalog" + ), + "detail": ( + f"The configured model '{missing_model}' is not present " + "in the latest " + f"mirrored {provider_id} source snapshot." + ), + "suggestion": ( + f"Review whether {missing_model} is still the intended " + "model id or " + "whether the provider source mirror needs to be refreshed." + ), + "source_kind": "local-availability", + } + ) + if list(local_availability.get("local_only_models") or []): + local_only = str(local_availability["local_only_models"][0]) + alerts.append( + { + "kind": "local-model-drift", + "severity": "notice", + "action": "inspect", + "provider_id": provider_id, + "headline": ( + f"Local {provider_id} key exposes models missing from " + "mirrored docs" + ), + "detail": ( + f"The local models endpoint exposed '{local_only}', " + "which is not in the " + "latest mirrored global source snapshot." + ), + "suggestion": ( + f"Inspect whether {provider_id} docs are lagging or " + "whether the local key " + "is on a newer provider track." + ), + "source_kind": "local-availability", + } + ) + if ( + int(local_availability.get("models_endpoint_routes") or 0) > 0 + and int(local_availability.get("free_models_visible_locally") or 0) == 0 + and list(local_availability.get("global_free_models") or []) + ): + free_model = str(local_availability["global_free_models"][0]) + alerts.append( + { + "kind": "free-model-unavailable", + "severity": "notice", + "action": "review-now", + "provider_id": provider_id, + "headline": ( + f"Free {provider_id} catalog entries are not visible " + "for this key" + ), + "detail": ( + f"The mirrored global catalog still lists '{free_model}' " + "as free, but the latest local models endpoint did not " + "expose any mirrored free model." + ), + "suggestion": ( + f"Treat free-tier assumptions for {provider_id} as " + "key-specific and verify " + "whether this route should stay in low-cost fallback chains." + ), + "source_kind": "local-availability", + } + ) for event in list(summary.get("recent_events") or []): severity = str(event.get("severity") or "notice") change_type = str(event.get("change_type") or "") @@ -176,8 +301,12 @@ def build_catalog_alerts( change_type=change_type, ), "provider_id": str(event.get("provider_id") or ""), - "headline": (f"Catalog change detected for {event.get('provider_id')}: {event.get('change_type')}"), - "detail": str(event.get("message") or "").strip() or "A provider catalog change was detected.", + "headline": ( + f"Catalog change detected for {event.get('provider_id')}: " + f"{event.get('change_type')}" + ), + "detail": str(event.get("message") or "").strip() + or "A provider catalog change was detected.", "suggestion": _catalog_change_suggestion(event), "source_kind": str(event.get("source_kind") or ""), "change_type": change_type, @@ -246,6 +375,22 @@ def build_catalog_summary( latest_models = store.get_latest_models(provider_id, "models") latest_pricing = store.get_latest_models(provider_id, "pricing") latest_docs_index = store.get_latest_models(provider_id, "docs-index") + global_catalog_model_ids = { + str(item.get("model_id") or "").strip() + for item in latest_models + latest_pricing + if str(item.get("model_id") or "").strip() + } + global_free_model_ids = { + str(item.get("model_id") or "").strip() + for item in latest_pricing + if bool(item.get("is_free")) and str(item.get("model_id") or "").strip() + } + local_availability = build_provider_availability_overlay( + store, + provider_id=provider_id, + global_model_ids=global_catalog_model_ids, + global_free_model_ids=global_free_model_ids, + ) last_success_at = float(source.get("last_success_at") or 0) last_checked_at = float(source.get("last_checked_at") or 0) refresh_interval_seconds = int(source.get("refresh_interval_seconds") or 21600) @@ -275,13 +420,19 @@ def build_catalog_summary( "models_count": len(latest_models), "pricing_count": len(latest_pricing), "docs_index_count": len(latest_docs_index), - "sample_models": [str(item.get("model_id") or "") for item in (latest_pricing or latest_models)[:5]], + "sample_models": [ + str(item.get("model_id") or "") + for item in (latest_pricing or latest_models)[:5] + ], + "local_availability": local_availability, "billing_notes": str(source.get("billing_notes") or ""), "account_profile": store.get_account_profile(provider_id), } ) - selected_provider_id = provider_ids[0] if provider_ids and len(provider_ids) == 1 else None + selected_provider_id = ( + provider_ids[0] if provider_ids and len(provider_ids) == 1 else None + ) recent_events = store.get_recent_change_events( provider_id=selected_provider_id, limit=20, @@ -310,7 +461,10 @@ def build_catalog_summary( elif recent_events: priority_next = { "path": "Provider Catalog Review", - "why": ("recent provider catalog changes were detected and should be reviewed."), + "why": ( + "recent provider catalog changes were detected and should " + "be reviewed." + ), } alerts = build_catalog_alerts( @@ -350,7 +504,10 @@ def render_catalog_summary_text( + f"due={int(summary.get('due_sources') or 0)} | " + f"recent changes={int(summary.get('recent_changes') or 0)}" ) - alert_summary = dict(summary.get("alert_summary") or build_catalog_alert_summary(list(summary.get("alerts") or []))) + alert_summary = dict( + summary.get("alert_summary") + or build_catalog_alert_summary(list(summary.get("alerts") or [])) + ) lines.append( " alert summary: " + f"status={alert_summary.get('status') or 'clear'} | " @@ -371,9 +528,14 @@ def render_catalog_summary_text( if item.get("billing_notes"): lines.append(f" billing: {item['billing_notes']}") if item.get("refresh_interval_seconds"): - lines.append(f" refresh interval: {int(item['refresh_interval_seconds'])}s") + lines.append( + f" refresh interval: {int(item['refresh_interval_seconds'])}s" + ) if item.get("seconds_since_success") is not None: - lines.append(f" age: {int(float(item['seconds_since_success']))}s since last success") + lines.append( + f" age: {int(float(item['seconds_since_success']))}s " + "since last success" + ) profile = dict(item.get("account_profile") or {}) if profile: profile_bits = [str(profile.get("billing_mode") or "")] @@ -383,7 +545,41 @@ def render_catalog_summary_text( profile_bits.append(f"window={profile['quota_window']}") if profile.get("quota_remaining") is not None: profile_bits.append(f"remaining={profile['quota_remaining']}") - lines.append(" local account: " + " | ".join(bit for bit in profile_bits if bit)) + lines.append( + " local account: " + + " | ".join(bit for bit in profile_bits if bit) + ) + local_availability = dict(item.get("local_availability") or {}) + if local_availability: + lines.append( + " local availability: " + + f"routes={int(local_availability.get('local_routes') or 0)} | " + + f"ready={int(local_availability.get('request_ready_routes') or 0)} | " + + "models-endpoint=" + + f"{int(local_availability.get('models_endpoint_routes') or 0)} | " + + "visible-models=" + + f"{int(local_availability.get('visible_model_count') or 0)}" + ) + if local_availability.get("configured_models_missing_globally"): + lines.append( + " catalog mismatch: " + + ", ".join( + local_availability["configured_models_missing_globally"][:3] + ) + ) + if local_availability.get("key_model_mismatches"): + lines.append( + " key mismatch: " + + ", ".join( + f"{item['route_name']} -> {item['model_id']}" + for item in local_availability["key_model_mismatches"][:3] + ) + ) + if local_availability.get("local_only_models"): + lines.append( + " local-only models: " + + ", ".join(local_availability["local_only_models"][:3]) + ) if item.get("last_error"): lines.append(f" last error: {item['last_error']}") events = list(summary.get("recent_events") or []) @@ -428,7 +624,9 @@ def due_provider_ids( stored = dict(source_rows.get(provider_id) or {}) last_success_at = float(stored.get("last_success_at") or 0) refresh_interval_seconds = int( - stored.get("refresh_interval_seconds") or source.get("refresh_interval_seconds") or 21600 + stored.get("refresh_interval_seconds") + or source.get("refresh_interval_seconds") + or 21600 ) if not last_success_at or refresh_interval_seconds <= 0: due.append(provider_id) @@ -526,7 +724,9 @@ def parse_regex_model_refs( """Extract model ids from docs text using prefixes and regex patterns.""" found: set[str] = set() rows: list[dict[str, Any]] = [] - prefix_patterns = [re.escape(prefix) + r"[a-zA-Z0-9.\-:\/]+" for prefix in (model_prefixes or [])] + prefix_patterns = [ + re.escape(prefix) + r"[a-zA-Z0-9.\-:\/]+" for prefix in (model_prefixes or []) + ] for pattern in prefix_patterns + list(model_patterns or []): for match in re.findall(pattern, text): token = str(match).strip("`*.,)('\"") @@ -606,7 +806,9 @@ def _diff_model_sets( "field_name": "model_id", "old_value": "", "new_value": model_id, - "message": (f"{provider_id}: model '{model_id}' appeared in {source_kind}."), + "message": ( + f"{provider_id}: model '{model_id}' appeared in {source_kind}." + ), } ) for model_id in sorted(previous_by_id.keys() - current_by_id.keys()): @@ -621,7 +823,9 @@ def _diff_model_sets( "field_name": "model_id", "old_value": model_id, "new_value": "", - "message": (f"{provider_id}: model '{model_id}' disappeared from {source_kind}."), + "message": ( + f"{provider_id}: model '{model_id}' disappeared from {source_kind}." + ), } ) for model_id in sorted(current_by_id.keys() & previous_by_id.keys()): diff --git a/faigate/provider_catalog_store.py b/faigate/provider_catalog_store.py index 003e4e5..71b66c0 100644 --- a/faigate/provider_catalog_store.py +++ b/faigate/provider_catalog_store.py @@ -50,6 +50,7 @@ id INTEGER PRIMARY KEY AUTOINCREMENT, provider_id TEXT NOT NULL, route_name TEXT NOT NULL, + source_name TEXT DEFAULT 'route-state', checked_at REAL NOT NULL, model_id TEXT DEFAULT '', available_for_key INTEGER DEFAULT 0, @@ -106,8 +107,24 @@ def init(self) -> None: self._conn.execute("PRAGMA journal_mode=WAL") self._conn.execute("PRAGMA synchronous=NORMAL") self._conn.executescript(_CREATE_SQL) + self._migrate_schema() self._conn.commit() + def _migrate_schema(self) -> None: + if not self._conn: + return + columns = { + row[1] + for row in self._conn.execute("PRAGMA table_info(provider_availability_snapshots)") + } + if "source_name" not in columns: + self._conn.execute( + """ + ALTER TABLE provider_availability_snapshots + ADD COLUMN source_name TEXT DEFAULT 'route-state' + """ + ) + def close(self) -> None: if self._conn: self._conn.close() @@ -267,6 +284,7 @@ def record_availability_snapshot( provider_id: str, route_name: str, *, + source_name: str = "route-state", model_id: str = "", available_for_key: bool = False, request_ready: bool = False, @@ -280,13 +298,14 @@ def record_availability_snapshot( self._conn.execute( """ INSERT INTO provider_availability_snapshots( - provider_id, route_name, checked_at, model_id, + provider_id, route_name, source_name, checked_at, model_id, available_for_key, request_ready, verified_via, last_issue_type, metadata_json - ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?) + ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?) """, ( provider_id, route_name, + source_name, float(checked_at or time.time()), model_id, 1 if available_for_key else 0, @@ -298,6 +317,52 @@ def record_availability_snapshot( ) self._conn.commit() + def get_latest_availability( + self, + *, + provider_id: str | None = None, + source_name: str | None = None, + ) -> list[dict[str, Any]]: + if not self._conn: + return [] + + where_clauses: list[str] = [] + params: list[Any] = [] + if provider_id: + where_clauses.append("provider_id=?") + params.append(provider_id) + if source_name: + where_clauses.append("source_name=?") + params.append(source_name) + where_sql = f"WHERE {' AND '.join(where_clauses)}" if where_clauses else "" + cur = self._conn.execute( + f""" + SELECT snap.provider_id, snap.route_name, snap.source_name, snap.checked_at, + snap.model_id, snap.available_for_key, snap.request_ready, + snap.verified_via, snap.last_issue_type, snap.metadata_json + FROM provider_availability_snapshots AS snap + INNER JOIN ( + SELECT provider_id, route_name, source_name, MAX(checked_at) AS checked_at + FROM provider_availability_snapshots + {where_sql} + GROUP BY provider_id, route_name, source_name + ) AS latest + ON snap.provider_id = latest.provider_id + AND snap.route_name = latest.route_name + AND snap.source_name = latest.source_name + AND snap.checked_at = latest.checked_at + ORDER BY snap.provider_id, snap.route_name, snap.source_name + """, + params, + ) + cols = [item[0] for item in cur.description] + rows = [dict(zip(cols, row)) for row in cur.fetchall()] + for row in rows: + row["metadata"] = json.loads(str(row.pop("metadata_json") or "{}")) + row["available_for_key"] = bool(row.get("available_for_key")) + row["request_ready"] = bool(row.get("request_ready")) + return rows + def upsert_account_profile( self, provider_id: str, diff --git a/faigate/provider_sources.py b/faigate/provider_sources.py index 6766e04..3858084 100644 --- a/faigate/provider_sources.py +++ b/faigate/provider_sources.py @@ -6,6 +6,33 @@ from typing import Any _SOURCE_REGISTRY: dict[str, dict[str, Any]] = { + "anthropic": { + "provider_id": "anthropic", + "display_name": "Anthropic", + "refresh_interval_seconds": 43_200, + "billing_notes": ( + "Anthropic usage can combine direct API billing with operator-side " + "subscription or quota windows. Local route availability should be tracked " + "separately from the public model docs." + ), + "route_prefixes": ["anthropic", "claude"], + "provider_names": ["anthropic-claude", "anthropic-sonnet"], + "endpoints": [ + { + "kind": "models", + "url": "https://docs.anthropic.com/en/docs/about-claude/models", + "parser_type": "regex-model-refs", + "model_patterns": [ + r"\bclaude-[a-z0-9.\-]+", + ], + } + ], + "availability": { + "supports_models_endpoint": False, + "models_paths": [], + "transport": "anthropic", + }, + }, "blackbox": { "provider_id": "blackbox", "display_name": "BLACKBOX", @@ -14,6 +41,8 @@ "BLACKBOX can expose both free and paid model variants. Local key availability " "must be checked separately from the global pricing catalog." ), + "route_prefixes": ["blackbox"], + "provider_names": ["blackbox-free"], "endpoints": [ { "kind": "docs-index", @@ -28,10 +57,63 @@ ], "availability": { "supports_models_endpoint": True, - "models_path": "/v1/models", + "models_paths": ["/v1/models", "/models"], "transport": "openai-compat", }, }, + "deepseek": { + "provider_id": "deepseek", + "display_name": "DeepSeek", + "refresh_interval_seconds": 43_200, + "billing_notes": ( + "DeepSeek route cost and quota behavior can differ between direct API billing " + "and operator-specific subscription or account limits." + ), + "route_prefixes": ["deepseek"], + "provider_names": ["deepseek-chat", "deepseek-reasoner"], + "endpoints": [ + { + "kind": "models", + "url": "https://api-docs.deepseek.com/", + "parser_type": "regex-model-refs", + "model_patterns": [ + r"\bdeepseek-[a-z0-9.\-]+", + ], + } + ], + "availability": { + "supports_models_endpoint": True, + "models_paths": ["/v1/models", "/models"], + "transport": "openai-compat", + }, + }, + "google": { + "provider_id": "google", + "display_name": "Google", + "refresh_interval_seconds": 43_200, + "billing_notes": ( + "Google model access can sit behind AI Studio or platform-specific quotas. " + "Local availability and operator limits should be overlaid separately." + ), + "route_prefixes": ["google", "gemini"], + "provider_names": ["gemini-flash", "gemini-flash-lite"], + "endpoints": [ + { + "kind": "models", + "url": "https://ai.google.dev/gemini-api/docs/models", + "parser_type": "regex-model-refs", + "model_patterns": [ + r"\bgemini-[a-z0-9.\-:]+", + r"\bgemma-[a-z0-9.\-:]+", + ], + } + ], + "availability": { + "supports_models_endpoint": False, + "models_paths": [], + "transport": "google", + }, + }, "kilo": { "provider_id": "kilo", "display_name": "Kilo", @@ -40,6 +122,8 @@ "Kilo mixes gateway wallet, free models, and BYOK-style execution paths. " "Local billing interpretation should be overlaid from account usage and route probes." ), + "route_prefixes": ["kilo"], + "provider_names": ["kilocode", "kilo-sonnet", "kilo-opus"], "endpoints": [ { "kind": "models", @@ -61,7 +145,7 @@ ], "availability": { "supports_models_endpoint": False, - "models_path": "", + "models_paths": [], "transport": "openai-compat", }, }, @@ -74,6 +158,8 @@ "limits outside the raw API pricing table. Local account state should be " "tracked separately." ), + "route_prefixes": ["openai", "gpt", "o1", "o3", "o4"], + "provider_names": ["openai-gpt4o", "openai-images"], "endpoints": [ { "kind": "models", @@ -91,7 +177,7 @@ ], "availability": { "supports_models_endpoint": True, - "models_path": "/v1/models", + "models_paths": ["/models", "/v1/models"], "transport": "openai-compat", }, }, @@ -117,3 +203,29 @@ def list_provider_sources(provider_ids: list[str] | None = None) -> list[dict[st if item: items.append(item) return items + + +def resolve_provider_source_id( + provider_name: str, + provider: dict[str, Any] | None = None, +) -> str: + """Map one configured route to a provider source family.""" + normalized_name = str(provider_name or "").strip().lower() + lane = dict((provider or {}).get("lane") or {}) + family = str(lane.get("family") or "").strip().lower() + + for provider_id, source in _SOURCE_REGISTRY.items(): + if family and family == provider_id: + return provider_id + for explicit_name in list(source.get("provider_names") or []): + if normalized_name == str(explicit_name or "").strip().lower(): + return provider_id + for prefix in list(source.get("route_prefixes") or []): + token = str(prefix or "").strip().lower() + if token and ( + normalized_name == token + or normalized_name.startswith(f"{token}-") + or normalized_name.startswith(f"{token}_") + ): + return provider_id + return family or normalized_name.split("-", 1)[0] or normalized_name diff --git a/faigate/router.py b/faigate/router.py index 67869d9..52a8b23 100644 --- a/faigate/router.py +++ b/faigate/router.py @@ -6,7 +6,7 @@ import re import time from dataclasses import dataclass, field -from datetime import date +from datetime import date, datetime from typing import Any from .config import Config @@ -595,6 +595,7 @@ def _estimated_request_cost_usd(provider: dict[str, Any], ctx: _RoutingContext | prompt_rate = float(pricing.get("input", 0) or 0) output_rate = float(pricing.get("output", 0) or 0) cache_rate = float(pricing.get("cache_read", prompt_rate) or 0) + prompt_rate, output_rate, cache_rate = _apply_promotion_discount(pricing, prompt_rate, output_rate, cache_rate) prompt_tokens = max(1, int(ctx.total_tokens or 0)) output_tokens = int(ctx.requested_output_tokens or 0) if output_tokens <= 0: @@ -614,6 +615,27 @@ def _estimated_request_cost_usd(provider: dict[str, Any], ctx: _RoutingContext | return round(prompt_cost + output_cost, 6) +def _apply_promotion_discount( + pricing: dict[str, Any], prompt_rate: float, output_rate: float, cache_rate: float +) -> tuple[float, float, float]: + """Apply promotion discount to rates if promotion is active and not expired.""" + discount_percentage = pricing.get("discount_percentage") + expires_at = pricing.get("expires_at") + if discount_percentage is None or expires_at is None: + return prompt_rate, output_rate, cache_rate + try: + expiry = datetime.fromisoformat(expires_at.replace("Z", "+00:00")) + now = datetime.now(expiry.tzinfo) if expiry.tzinfo else datetime.now() + if now > expiry: + # Promotion expired + return prompt_rate, output_rate, cache_rate + discount_factor = 1.0 - (float(discount_percentage) / 100.0) + return prompt_rate * discount_factor, output_rate * discount_factor, cache_rate * discount_factor + except (ValueError, TypeError): + # If date parsing fails, ignore promotion + return prompt_rate, output_rate, cache_rate + + def _estimated_request_cost_usd_with_lane( provider_name: str, model_id: str | None, @@ -634,6 +656,7 @@ def _estimated_request_cost_usd_with_lane( prompt_rate = float(pricing.get("input", 0) or 0) output_rate = float(pricing.get("output", 0) or 0) cache_rate = float(pricing.get("cache_read", prompt_rate) or 0) + prompt_rate, output_rate, cache_rate = _apply_promotion_discount(pricing, prompt_rate, output_rate, cache_rate) prompt_tokens = max(1, int(ctx.total_tokens or 0)) output_tokens = int(ctx.requested_output_tokens or 0) if output_tokens <= 0: diff --git a/faigate/wizard.py b/faigate/wizard.py index 3cc699f..28f1b8b 100644 --- a/faigate/wizard.py +++ b/faigate/wizard.py @@ -23,6 +23,7 @@ ) from .provider_catalog import build_provider_refresh_guidance, get_provider_catalog from .providers import ProviderBackend +from .config import dedupe_model_shortcut_aliases ProviderFactory = dict[str, Any] @@ -3059,6 +3060,7 @@ def merge_initial_config( _mapping_or_empty(suggested_shortcuts.get("shortcuts")), ) merged["model_shortcuts"] = existing_shortcuts + merged, _ = dedupe_model_shortcut_aliases(merged) existing_profiles = _mapping_or_empty(merged.get("client_profiles")) suggested_profiles = _mapping_or_empty(suggestion.get("client_profiles")) @@ -3160,7 +3162,7 @@ def build_initial_config( "on_startup": True, "timeout_seconds": 10.0, "interval_seconds": 21600, - "providers": ["blackbox", "kilo", "openai"], + "providers": ["anthropic", "blackbox", "deepseek", "google", "kilo", "openai"], }, "providers": providers, "fallback_chain": fallback_chain, diff --git a/pyproject.toml b/pyproject.toml index da68fe9..e300b25 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -35,9 +35,14 @@ dev = [ "build>=1.2", "pytest>=8.0", "pytest-asyncio>=0.24", + "pytest-cov>=5.0", + "pytest-benchmark>=4.0.0", "httpx", # for TestClient "ruff>=0.8", "twine>=6.1", + "pre-commit>=3.0", + "bandit[toml]>=1.8.0", + "jinja2>=3.1.0", ] [project.scripts] diff --git a/scripts/faigate-doctor b/scripts/faigate-doctor index 156b264..a1fca00 100755 --- a/scripts/faigate-doctor +++ b/scripts/faigate-doctor @@ -90,7 +90,10 @@ from pathlib import Path import yaml from faigate.onboarding import collect_provider_env_requirements from faigate.provider_catalog import build_provider_catalog_report, build_provider_refresh_guidance -from faigate.provider_availability import record_availability_from_health +from faigate.provider_availability import ( + record_availability_from_health, + refresh_local_model_availability, +) from faigate.provider_catalog_refresh import ProviderCatalogRefresher, build_catalog_summary from faigate.provider_catalog_refresh import build_catalog_alert_summary from faigate.provider_catalog_store import ProviderCatalogStore @@ -177,7 +180,18 @@ for item in catalog.get("items", []): health_raw = os.environ.get("FAIGATE_HEALTH_PAYLOAD", "").strip() if store is not None and health_raw: health_payload = json.loads(health_raw) - record_availability_from_health(store, health_payload=health_payload) + record_availability_from_health( + store, + config_path=os.environ.get("FAIGATE_CONFIG_FILE", ""), + health_payload=health_payload, + ) + if os.environ.get("FAIGATE_DOCTOR_REFRESH_CATALOG") == "1": + refresh_local_model_availability( + store, + config_path=os.environ.get("FAIGATE_CONFIG_FILE", ""), + provider_ids=list(config.provider_source_refresh.get("providers") or []), + timeout_seconds=float(config.provider_source_refresh.get("timeout_seconds") or 10.0), + ) providers = (health_payload.get("providers") or {}) ready = 0 total = 0 diff --git a/scripts/faigate-provider-probe b/scripts/faigate-provider-probe index e90ea56..5026376 100755 --- a/scripts/faigate-provider-probe +++ b/scripts/faigate-provider-probe @@ -77,7 +77,10 @@ import json import os from faigate.config import load_config -from faigate.provider_availability import record_availability_from_health +from faigate.provider_availability import ( + record_availability_from_health, + refresh_local_model_availability, +) from faigate.provider_catalog_refresh import ( ProviderCatalogRefresher, build_catalog_alert_summary, @@ -108,7 +111,18 @@ if store is not None and os.environ.get("FAIGATE_PROVIDER_PROBE_REFRESH_CATALOG" timeout_seconds=float(os.environ.get("FAIGATE_PROVIDER_PROBE_TIMEOUT") or "2.0"), ) if store is not None: - record_availability_from_health(store, health_payload=health_payload) + record_availability_from_health( + store, + config_path=os.environ["FAIGATE_PROVIDER_PROBE_CONFIG"], + health_payload=health_payload, + ) + if os.environ.get("FAIGATE_PROVIDER_PROBE_REFRESH_CATALOG") == "1": + refresh_local_model_availability( + store, + config_path=os.environ["FAIGATE_PROVIDER_PROBE_CONFIG"], + provider_ids=list(config.provider_source_refresh.get("providers") or []), + timeout_seconds=float(os.environ.get("FAIGATE_PROVIDER_PROBE_TIMEOUT") or "2.0"), + ) catalog_summary = build_catalog_summary( store, provider_ids=list(config.provider_source_refresh.get("providers") or []), diff --git a/scripts/generate-api-docs.py b/scripts/generate-api-docs.py new file mode 100644 index 0000000..acfa21f --- /dev/null +++ b/scripts/generate-api-docs.py @@ -0,0 +1,152 @@ +#!/usr/bin/env python3 +"""Generate API.md from OpenAPI specification. + +This script extracts the OpenAPI spec from the FastAPI application and +generates a Markdown documentation file. + +Usage: + python scripts/generate-api-docs.py +""" + +import json +import sys +from pathlib import Path + +# Add the project root to sys.path +project_root = Path(__file__).parent.parent +sys.path.insert(0, str(project_root)) + +# Import after path setup +try: + from faigate.main import app +except ImportError as e: + print(f"Error importing faigate.main: {e}") + sys.exit(1) + + +def generate_markdown_from_openapi(openapi_spec: dict) -> str: + """Convert OpenAPI spec to Markdown documentation.""" + lines = [] + + # Title + lines.append(f"# {openapi_spec.get('info', {}).get('title', 'API Reference')}") + lines.append("") + + # Description + description = openapi_spec.get("info", {}).get("description", "") + if description: + lines.append(description) + lines.append("") + + # Servers + servers = openapi_spec.get("servers", []) + if servers: + lines.append("## Servers") + lines.append("") + for server in servers: + lines.append(f"- `{server.get('url', '')}`") + if server.get("description"): + lines.append(f" - {server['description']}") + lines.append("") + + # Paths + paths = openapi_spec.get("paths", {}) + if paths: + lines.append("## Endpoints") + lines.append("") + + for path, methods in sorted(paths.items()): + lines.append(f"### `{path}`") + lines.append("") + + for method, details in methods.items(): + lines.append(f"#### `{method.upper()}`") + lines.append("") + + # Summary and description + summary = details.get("summary", "") + description = details.get("description", "") + if summary: + lines.append(f"**{summary}**") + lines.append("") + if description: + lines.append(description) + lines.append("") + + # Parameters + parameters = details.get("parameters", []) + if parameters: + lines.append("**Parameters:**") + lines.append("") + for param in parameters: + param_name = param.get("name", "") + param_in = param.get("in", "") + param_desc = param.get("description", "") + param_required = param.get("required", False) + required_str = "required" if param_required else "optional" + lines.append(f"- `{param_name}` ({param_in}, {required_str})") + if param_desc: + lines.append(f" - {param_desc}") + lines.append("") + + # Request body + request_body = details.get("requestBody", {}) + if request_body: + lines.append("**Request Body:**") + lines.append("") + content = request_body.get("content", {}) + for content_type, media_type in content.items(): + lines.append(f"- `{content_type}`") + schema = media_type.get("schema", {}) + if schema: + # Simplified schema representation + lines.append(f" - Schema: {json.dumps(schema, indent=2)}") + lines.append("") + + # Responses + responses = details.get("responses", {}) + if responses: + lines.append("**Responses:**") + lines.append("") + for status_code, response in responses.items(): + lines.append(f"- `{status_code}`") + desc = response.get("description", "") + if desc: + lines.append(f" - {desc}") + lines.append("") + + # Security + security = details.get("security", []) + if security: + lines.append("**Security:**") + lines.append("") + for sec in security: + for scheme, scopes in sec.items(): + lines.append(f"- `{scheme}`: {', '.join(scopes)}") + lines.append("") + + lines.append("---") + lines.append("") + + return "\n".join(lines) + + +def main(): + """Main entry point.""" + # Get OpenAPI spec + openapi_spec = app.openapi() + + # Generate Markdown + markdown = generate_markdown_from_openapi(openapi_spec) + + # Write to docs/API.md + output_path = project_root / "docs" / "API.md" + output_path.parent.mkdir(parents=True, exist_ok=True) + output_path.write_text(markdown, encoding="utf-8") + + print(f"Generated API documentation at {output_path}") + print(f"Total paths documented: {len(openapi_spec.get('paths', {}))}") + + +if __name__ == "__main__": + main() diff --git a/scripts/run-validation.py b/scripts/run-validation.py new file mode 100644 index 0000000..1d3170f --- /dev/null +++ b/scripts/run-validation.py @@ -0,0 +1,110 @@ +#!/usr/bin/env python3 +"""Run Claude Desktop validation with auto-started server.""" + +import asyncio +import os +import signal +import subprocess +import sys +import time +from pathlib import Path + +# Add project root to path +project_root = Path(__file__).parent.parent +sys.path.insert(0, str(project_root)) + + +async def start_server() -> subprocess.Popen: + """Start faigate server in background.""" + env = os.environ.copy() + # Use default config (already modified) + cmd = [sys.executable, "-m", "faigate"] + print(f"Starting server: {' '.join(cmd)}") + proc = subprocess.Popen( + cmd, + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT, + text=True, + env=env, + ) + + # Wait for server to be ready (check health endpoint) + max_wait = 30 + for i in range(max_wait): + if proc.poll() is not None: + # Server died + output = proc.stdout.read() if proc.stdout else "" + print(f"Server exited early: {output}") + raise RuntimeError("Server failed to start") + + # Try to connect + try: + import httpx + + async with httpx.AsyncClient(timeout=1.0) as client: + resp = await client.get("http://127.0.0.1:8090/health") + if resp.status_code == 200: + print("Server is ready") + return proc + except: + pass + + await asyncio.sleep(1) + if i % 5 == 0: + print(f"Waiting for server... ({i + 1}s)") + + raise RuntimeError("Server did not become ready in time") + + +async def stop_server(proc: subprocess.Popen) -> None: + """Stop faigate server gracefully.""" + if proc.poll() is None: + print("Stopping server...") + proc.terminate() + try: + proc.wait(timeout=5) + except subprocess.TimeoutExpired: + print("Server did not terminate, killing...") + proc.kill() + proc.wait() + + +async def main() -> int: + """Main validation runner.""" + server = None + try: + # Start server + server = await start_server() + + # Run validation script + validation_script = project_root / "scripts" / "validate-claude-desktop.py" + if not validation_script.exists(): + print(f"Validation script not found: {validation_script}") + return 1 + + print("\n" + "=" * 70) + print("Running Claude Desktop validation...") + print("=" * 70) + + result = subprocess.run( + [sys.executable, str(validation_script)], + capture_output=True, + text=True, + ) + + print(result.stdout) + if result.stderr: + print("STDERR:", result.stderr) + + return result.returncode + + except Exception as e: + print(f"Validation failed: {e}") + return 1 + finally: + if server: + await stop_server(server) + + +if __name__ == "__main__": + sys.exit(asyncio.run(main())) diff --git a/scripts/validate-claude-desktop.py b/scripts/validate-claude-desktop.py new file mode 100644 index 0000000..4f2ce0b --- /dev/null +++ b/scripts/validate-claude-desktop.py @@ -0,0 +1,354 @@ +#!/usr/bin/env python3 +"""Validate Claude Desktop compatibility with fusionAIze Gate. + +This script tests the Anthropic bridge endpoints to ensure they meet +Claude Desktop's requirements for local gateway integration. + +Usage: + python scripts/validate-claude-desktop.py +""" + +import asyncio +import json +import sys +from typing import Any + +import httpx + +# Test server (assumes faigate is running on default port) +BASE_URL = "http://127.0.0.1:8091" +ANTHROPIC_BASE_URL = f"{BASE_URL}/v1" + + +async def test_health() -> bool: + """Test basic gateway health.""" + async with httpx.AsyncClient(timeout=10.0) as client: + try: + resp = await client.get(f"{BASE_URL}/health") + if resp.status_code == 200: + print("✓ Gateway health endpoint OK") + return True + else: + print(f"✗ Gateway health endpoint returned {resp.status_code}") + return False + except Exception as e: + print(f"✗ Cannot reach gateway: {e}") + return False + + +async def test_messages_non_streaming() -> bool: + """Test POST /v1/messages non-streaming.""" + async with httpx.AsyncClient(timeout=10.0) as client: + headers = { + "anthropic-version": "2023-06-01", + "anthropic-beta": "max-tokens-2024-07-15", + "content-type": "application/json", + } + payload = { + "model": "claude-3-5-sonnet-20241022", + "system": "You are a helpful assistant.", + "messages": [{"role": "user", "content": "Hello, please respond with 'Gateway test successful'."}], + "max_tokens": 100, + "stream": False, + } + + try: + resp = await client.post( + f"{ANTHROPIC_BASE_URL}/messages", + headers=headers, + json=payload, + ) + + if resp.status_code == 200: + data = resp.json() + print(f"✓ Non-streaming messages OK (model: {data.get('model')})") + # Check response structure + required_keys = {"id", "model", "content", "stop_reason", "usage"} + if all(key in data for key in required_keys): + print(" Response structure valid") + return True + else: + print(f" Missing keys: {required_keys - set(data.keys())}") + return False + elif resp.status_code == 401: + # Bridge is active but authentication failed + print("⚠ Non-streaming messages: Bridge active but authentication failed (401)") + print(" This is expected with dummy API keys") + # Check if response indicates bridge is enabled (not "Anthropic bridge is disabled") + if "Anthropic bridge is disabled" not in resp.text: + print(" ✓ Bridge endpoint is enabled") + return True + else: + print(" ✗ Bridge endpoint reports disabled") + return False + elif resp.status_code == 404: + print(f"✗ Non-streaming messages failed: 404 (Bridge likely disabled)") + print(f" Response: {resp.text[:200]}") + return False + else: + print(f"✗ Non-streaming messages failed: {resp.status_code}") + print(f" Response: {resp.text[:200]}") + return False + except Exception as e: + print(f"✗ Non-streaming messages error: {e}") + return False + + +async def test_messages_streaming() -> bool: + """Test POST /v1/messages streaming (SSE).""" + async with httpx.AsyncClient(timeout=30.0) as client: + headers = { + "anthropic-version": "2023-06-01", + "accept": "text/event-stream", + "content-type": "application/json", + } + payload = { + "model": "claude-3-5-sonnet-20241022", + "messages": [{"role": "user", "content": "Stream a short response."}], + "max_tokens": 50, + "stream": True, + } + + try: + async with client.stream( + "POST", + f"{ANTHROPIC_BASE_URL}/messages", + headers=headers, + json=payload, + ) as response: + if response.status_code == 200: + event_count = 0 + async for line in response.aiter_lines(): + if line.startswith("data:"): + event_count += 1 + data = line[5:].strip() + if data == "[DONE]": + print(f"✓ Streaming messages OK ({event_count} events)") + return True + if event_count > 0: + print(f"✓ Streaming messages OK ({event_count} events)") + return True + else: + print("✗ Streaming messages: no events received") + return False + elif response.status_code == 401: + # Bridge is active but authentication failed + print("⚠ Streaming messages: Bridge active but authentication failed (401)") + print(" This is expected with dummy API keys") + return True + elif response.status_code == 404: + print(f"✗ Streaming messages failed: 404 (Bridge likely disabled)") + return False + else: + print(f"✗ Streaming messages failed: {response.status_code}") + return False + except Exception as e: + print(f"✗ Streaming messages error: {e}") + return False + + +async def test_count_tokens() -> bool: + """Test POST /v1/messages/count_tokens.""" + async with httpx.AsyncClient(timeout=10.0) as client: + headers = { + "anthropic-version": "2023-06-01", + "content-type": "application/json", + } + payload = { + "model": "claude-3-5-sonnet-20241022", + "messages": [{"role": "user", "content": "Count these tokens please."}], + } + + try: + resp = await client.post( + f"{ANTHROPIC_BASE_URL}/messages/count_tokens", + headers=headers, + json=payload, + ) + + if resp.status_code == 200: + data = resp.json() + if "input_tokens" in data: + print(f"✓ Count tokens OK ({data['input_tokens']} tokens)") + # Check for X-faigate headers + if "X-faigate-Token-Count-Exact" in resp.headers: + print(f" Token count method: {resp.headers.get('X-faigate-Token-Count-Method', 'unknown')}") + return True + else: + print(f"✗ Count tokens missing 'input_tokens': {data}") + return False + elif resp.status_code == 401: + # Bridge is active but authentication failed + print("⚠ Count tokens: Bridge active but authentication failed (401)") + print(" This is expected with dummy API keys") + return True + elif resp.status_code == 404: + print(f"✗ Count tokens failed: 404 (Bridge likely disabled)") + print(f" Response: {resp.text[:200]}") + return False + else: + print(f"✗ Count tokens failed: {resp.status_code}") + print(f" Response: {resp.text[:200]}") + return False + except Exception as e: + print(f"✗ Count tokens error: {e}") + return False + + +async def test_model_aliases() -> bool: + """Test that Claude Desktop model aliases work correctly.""" + async with httpx.AsyncClient(timeout=10.0) as client: + headers = {"content-type": "application/json"} + + # Common Claude Desktop model IDs + test_models = [ + "claude-3-5-sonnet-20241022", + "claude-3-opus-20240229", + "claude-3-haiku-20240307", + "claude-3-5-sonnet", # Short alias + "claude-3-opus", + "claude-3-haiku", + ] + + success = True + for model in test_models: + payload = { + "model": model, + "messages": [{"role": "user", "content": "Test"}], + "max_tokens": 10, + } + + try: + resp = await client.post( + f"{ANTHROPIC_BASE_URL}/messages", + headers=headers, + json=payload, + ) + if resp.status_code == 200: + print(f" ✓ Model alias '{model}' accepted") + elif resp.status_code == 401: + print(f" ⚠ Model alias '{model}' accepted (auth failed)") + elif resp.status_code == 404: + print(f" ✗ Model alias '{model}' failed: 404 (Bridge likely disabled)") + success = False + else: + print(f" ✗ Model alias '{model}' failed: {resp.status_code}") + success = False + except Exception as e: + print(f" ✗ Model alias '{model}' error: {e}") + success = False + + if success: + print("✓ Model aliases test passed") + else: + print("✗ Model aliases test failed") + + return success + + +async def test_desktop_headers() -> bool: + """Test that Claude Desktop specific headers are handled.""" + async with httpx.AsyncClient(timeout=10.0) as client: + # Headers that Claude Desktop might send + headers = { + "anthropic-version": "2023-06-01", + "anthropic-beta": "max-tokens-2024-07-15", + "anthropic-client": "claude-desktop", + "x-api-key": "test-key-ignored", # Should be ignored if not needed + "content-type": "application/json", + } + + payload = { + "model": "claude-3-5-sonnet-20241022", + "messages": [{"role": "user", "content": "Test headers"}], + "max_tokens": 10, + } + + try: + resp = await client.post( + f"{ANTHROPIC_BASE_URL}/messages", + headers=headers, + json=payload, + ) + + if resp.status_code == 200: + # Check that gateway adds its own headers + gate_headers = {k: v for k, v in resp.headers.items() if k.lower().startswith("x-faigate")} + if gate_headers: + print(f"✓ Desktop headers handled (added {len(gate_headers)} gateway headers)") + return True + else: + print("✓ Desktop headers handled (no gateway headers added)") + return True + elif resp.status_code == 401: + # Bridge is active but authentication failed + print("⚠ Desktop headers: Bridge active but authentication failed (401)") + print(" This is expected with dummy API keys") + return True + elif resp.status_code == 404: + print(f"✗ Desktop headers test failed: 404 (Bridge likely disabled)") + return False + else: + print(f"✗ Desktop headers test failed: {resp.status_code}") + return False + except Exception as e: + print(f"✗ Desktop headers error: {e}") + return False + + +async def main() -> int: + """Run all validation tests.""" + print("=" * 70) + print("Claude Desktop Compatibility Validation") + print("=" * 70) + print(f"Testing gateway at: {BASE_URL}") + print(f"Anthropic base URL: {ANTHROPIC_BASE_URL}") + print() + + # Check if gateway is running + if not await test_health(): + print("\n❌ Gateway not reachable. Please start faigate first:") + print(" python -m faigate") + return 1 + + tests = [ + ("Non-streaming messages", test_messages_non_streaming), + ("Streaming messages", test_messages_streaming), + ("Count tokens", test_count_tokens), + ("Model aliases", test_model_aliases), + ("Desktop headers", test_desktop_headers), + ] + + passed = 0 + total = len(tests) + + for name, test_func in tests: + print(f"\n{name}:") + try: + if await test_func(): + passed += 1 + else: + print(f" ❌ {name} failed") + except Exception as e: + print(f" ❌ {name} exception: {e}") + + print("\n" + "=" * 70) + print(f"Results: {passed}/{total} tests passed") + + if passed == total: + print("✅ All tests passed! Claude Desktop compatibility looks good.") + print("\nNext steps:") + print("1. Configure Claude Desktop with ANTHROPIC_BASE_URL=" + ANTHROPIC_BASE_URL) + print("2. Test real desktop workflows") + return 0 + else: + print("❌ Some tests failed. Review logs above.") + print("\nCommon issues:") + print("- Ensure anthropic_bridge.enabled: true in config.yaml") + print("- Check gateway logs for bridge-related errors") + print("- Verify model aliases are configured in anthropic_bridge.model_aliases") + return 1 + + +if __name__ == "__main__": + sys.exit(asyncio.run(main())) diff --git a/tests/benchmarks/test_performance.py b/tests/benchmarks/test_performance.py new file mode 100644 index 0000000..4a33aae --- /dev/null +++ b/tests/benchmarks/test_performance.py @@ -0,0 +1,136 @@ +"""Performance benchmarks for fusionAIze Gate critical paths. + +These benchmarks measure the performance of key operations to detect regressions. +Run with: pytest tests/benchmarks/test_performance.py --benchmark-only +""" + +import time +import pytest +from pathlib import Path +import sys +import types + +# Set up mock environment before imports +sys.path.insert(0, str(Path(__file__).parent.parent.parent)) + +# Mock httpx before importing our modules +_httpx = types.ModuleType("httpx") +_httpx.Timeout = type("Timeout", (), {"__init__": lambda *a, **kw: None}) +_httpx.Limits = type("Limits", (), {"__init__": lambda *a, **kw: None}) +_httpx.AsyncClient = type( + "AsyncClient", + (), + { + "__init__": lambda *a, **kw: None, + "aclose": lambda self: None, + }, +) +sys.modules["httpx"] = _httpx + +# Import faigate modules after mocking +from faigate import config +from faigate.providers import ProviderCatalog +from faigate.routing import Router + + +@pytest.fixture +def sample_catalog(): + """Create a sample provider catalog for benchmarking.""" + catalog = ProviderCatalog() + # Add mock providers + catalog._providers = { + "openai": { + "id": "openai", + "name": "OpenAI", + "enabled": True, + "capabilities": ["chat", "completions"], + "cost_per_token": 0.000001, + }, + "anthropic": { + "id": "anthropic", + "name": "Anthropic", + "enabled": True, + "capabilities": ["chat", "claude"], + "cost_per_token": 0.0000015, + }, + "local": { + "id": "local", + "name": "Local", + "enabled": True, + "capabilities": ["chat"], + "cost_per_token": 0.0, + }, + } + return catalog + + +@pytest.fixture +def sample_router(sample_catalog): + """Create a router with sample catalog.""" + return Router(catalog=sample_catalog) + + +def test_router_initialization(benchmark): + """Benchmark router initialization time.""" + + def init_router(): + catalog = ProviderCatalog() + catalog._providers = {"test": {"id": "test", "enabled": True}} + return Router(catalog=catalog) + + result = benchmark(init_router) + assert result is not None + + +def test_provider_selection(benchmark, sample_router): + """Benchmark provider selection algorithm.""" + + def select_provider(): + return sample_router.select_provider(model="gpt-4", capabilities=["chat"], max_cost=0.01) + + result = benchmark(select_provider) + assert result in ("openai", "anthropic", "local", None) + + +def test_config_loading(benchmark): + """Benchmark configuration loading from YAML.""" + # Create a minimal config YAML content + config_content = """ +providers: + - id: test + name: Test Provider + enabled: true + api_key: "test-key" + base_url: "https://api.test.com" +""" + config_path = Path("/tmp/test_config.yaml") + config_path.write_text(config_content) + + def load_config(): + return config.load_config(str(config_path)) + + result = benchmark(load_config) + assert result is not None + config_path.unlink(missing_ok=True) + + +def test_cost_calculation(benchmark, sample_router): + """Benchmark cost calculation for requests.""" + + def calculate_cost(): + return sample_router.estimate_cost(provider_id="openai", input_tokens=100, output_tokens=50) + + result = benchmark(calculate_cost) + assert isinstance(result, (int, float)) + + +@pytest.mark.skip("Requires actual HTTP endpoints") +def test_request_routing_end_to_end(benchmark): + """End-to-end request routing benchmark (requires mocked HTTP).""" + # This would be more complex and require async mocking + pass + + +if __name__ == "__main__": + # Allow running directly for profiling + pytest.main([__file__, "--benchmark-only"]) diff --git a/tests/test_config.py b/tests/test_config.py index 7ac9a6f..9cbb04a 100644 --- a/tests/test_config.py +++ b/tests/test_config.py @@ -385,7 +385,7 @@ def test_provider_source_refresh_defaults_are_exposed(): "on_startup": True, "timeout_seconds": 10.0, "interval_seconds": 21600, - "providers": ["blackbox", "kilo", "openai"], + "providers": ["anthropic", "blackbox", "deepseek", "google", "kilo", "openai"], } diff --git a/tests/test_provider_availability.py b/tests/test_provider_availability.py new file mode 100644 index 0000000..0b0584c --- /dev/null +++ b/tests/test_provider_availability.py @@ -0,0 +1,145 @@ +from __future__ import annotations + +from pathlib import Path + +from faigate.provider_availability import ( + build_provider_availability_overlay, + record_availability_from_config, + refresh_local_model_availability, +) +from faigate.provider_catalog_store import ProviderCatalogStore + + +class FakeJsonFetcher: + def __init__(self, payloads: dict[str, dict]): + self._payloads = payloads + + def fetch_json( + self, + url: str, + *, + headers: dict[str, str], + timeout_seconds: float, + ) -> dict: + return dict(self._payloads[url]) + + +def _write_config(tmp_path: Path) -> Path: + path = tmp_path / "config.yaml" + path.write_text( + """ +server: + host: "127.0.0.1" + port: 8090 +providers: + blackbox-free: + backend: openai-compat + base_url: "https://api.blackbox.ai" + api_key: "secret" + model: "x-ai/grok-code-fast-1:free" + deepseek-chat: + backend: openai-compat + base_url: "https://api.deepseek.com/v1" + api_key: "secret" + model: "deepseek-chat" +fallback_chain: [] +metrics: + enabled: false +""".strip(), + encoding="utf-8", + ) + return path + + +def test_local_models_endpoint_overlay_detects_key_specific_mismatch(tmp_path: Path): + config_path = _write_config(tmp_path) + db_path = tmp_path / "faigate.db" + store = ProviderCatalogStore(str(db_path)) + store.init() + store.replace_model_snapshot( + "blackbox", + "pricing", + [ + { + "model_id": "x-ai/grok-code-fast-1:free", + "model_name": "Grok Code Fast 1 Free", + "input_cost": 0.0, + "output_cost": 0.0, + "context_length": 256000, + "is_free": True, + "raw_source_hash": "hash-blackbox", + } + ], + ) + store.replace_model_snapshot( + "deepseek", + "models", + [ + { + "model_id": "deepseek-chat", + "model_name": "DeepSeek Chat", + "input_cost": None, + "output_cost": None, + "context_length": None, + "is_free": False, + "raw_source_hash": "hash-deepseek", + } + ], + ) + + record_availability_from_config( + store, + config_path=str(config_path), + health_payload={ + "providers": { + "blackbox-free": { + "request_readiness": { + "ready": False, + "status": "degraded", + "reason": "last request failed", + } + }, + "deepseek-chat": { + "request_readiness": { + "ready": True, + "status": "ready", + "reason": "healthy", + } + }, + } + }, + ) + refresh_local_model_availability( + store, + config_path=str(config_path), + fetcher=FakeJsonFetcher( + { + "https://api.blackbox.ai/v1/models": { + "data": [{"id": "x-ai/grok-code-fast-1"}] + }, + "https://api.deepseek.com/v1/models": { + "data": [{"id": "deepseek-chat"}, {"id": "deepseek-reasoner"}] + }, + } + ), + ) + + blackbox_overlay = build_provider_availability_overlay( + store, + provider_id="blackbox", + global_model_ids={"x-ai/grok-code-fast-1:free"}, + global_free_model_ids={"x-ai/grok-code-fast-1:free"}, + ) + deepseek_overlay = build_provider_availability_overlay( + store, + provider_id="deepseek", + global_model_ids={"deepseek-chat", "deepseek-reasoner"}, + global_free_model_ids=set(), + ) + + assert blackbox_overlay["status"] == "intervention-needed" + assert blackbox_overlay["key_model_mismatches"][0]["route_name"] == "blackbox-free" + assert blackbox_overlay["local_only_models"] == ["x-ai/grok-code-fast-1"] + assert blackbox_overlay["free_models_missing_locally"] == ["x-ai/grok-code-fast-1:free"] + assert deepseek_overlay["status"] == "clear" + assert deepseek_overlay["visible_models"] == ["deepseek-chat", "deepseek-reasoner"] diff --git a/tests/test_provider_catalog_refresh.py b/tests/test_provider_catalog_refresh.py index 18f822f..989e47e 100644 --- a/tests/test_provider_catalog_refresh.py +++ b/tests/test_provider_catalog_refresh.py @@ -196,3 +196,40 @@ def test_due_provider_ids_returns_sources_without_recent_success(tmp_path): assert "blackbox" in due assert "kilo" in due + + +def test_build_catalog_alerts_include_local_availability_mismatches(): + summary = { + "items": [ + { + "provider_id": "blackbox", + "status": "current", + "last_error": "", + "seconds_since_success": 10, + "local_availability": { + "key_model_mismatches": [ + { + "route_name": "blackbox-free", + "model_id": "x-ai/grok-code-fast-1:free", + "visible_model_count": 1, + } + ], + "configured_models_missing_globally": ["x-ai/grok-code-fast-1:free"], + "local_only_models": ["x-ai/grok-code-fast-1"], + "models_endpoint_routes": 1, + "free_models_visible_locally": 0, + "global_free_models": ["x-ai/grok-code-fast-1:free"], + }, + } + ], + "recent_events": [], + } + + alerts = build_catalog_alerts(summary) + alert_summary = build_catalog_alert_summary(alerts) + + kinds = [alert["kind"] for alert in alerts] + assert "local-model-availability" in kinds + assert "catalog-route-mismatch" in kinds + assert "free-model-unavailable" in kinds + assert alert_summary["status"] == "intervention-needed" diff --git a/tests/test_provider_catalog_store.py b/tests/test_provider_catalog_store.py index 91efd84..0b1e8d2 100644 --- a/tests/test_provider_catalog_store.py +++ b/tests/test_provider_catalog_store.py @@ -86,3 +86,31 @@ def test_provider_catalog_store_persists_snapshots_and_events(tmp_path): events = store.get_recent_change_events(provider_id="blackbox") assert events[0]["change_type"] == "model-added" + + +def test_provider_catalog_store_returns_latest_availability_by_source(tmp_path): + db_path = tmp_path / "faigate.db" + store = ProviderCatalogStore(str(db_path)) + store.init() + store.record_availability_snapshot( + "blackbox", + "blackbox-free", + source_name="route-state", + model_id="x-ai/grok-code-fast-1:free", + request_ready=False, + checked_at=1.0, + ) + store.record_availability_snapshot( + "blackbox", + "blackbox-free", + source_name="models-endpoint", + model_id="x-ai/grok-code-fast-1:free", + available_for_key=False, + metadata={"visible_models": ["x-ai/grok-code-fast-1"]}, + checked_at=2.0, + ) + + rows = store.get_latest_availability(provider_id="blackbox") + + assert len(rows) == 2 + assert {row["source_name"] for row in rows} == {"route-state", "models-endpoint"}