diff --git a/.codenomad/background_processes/mnh8wrrb/proc_2026-04-02T1003_7da70c/output.txt b/.codenomad/background_processes/mnh8wrrb/proc_2026-04-02T1003_7da70c/output.txt new file mode 100644 index 0000000..05af283 --- /dev/null +++ b/.codenomad/background_processes/mnh8wrrb/proc_2026-04-02T1003_7da70c/output.txt @@ -0,0 +1,33 @@ +INFO: Started server process [69633] +INFO: Waiting for application startup. +12:03:25 [faigate] INFO Loaded config with 14 providers +12:03:25 [faigate] INFO ✓ anthropic-haiku → claude-haiku-3-5 (default) +12:03:25 [faigate] INFO ✓ anthropic-sonnet → claude-sonnet-4-6 (mid) +12:03:25 [faigate] INFO ✓ anthropic-claude → claude-opus-4-6 (mid) +12:03:25 [faigate] INFO ✓ blackbox-free → blackboxai/x-ai/grok-code-fast-1 (fallback) +12:03:25 [faigate] INFO ✓ deepseek-chat → deepseek-chat (default) +12:03:25 [faigate] INFO ✓ deepseek-reasoner → deepseek-reasoner (reasoning) +12:03:25 [faigate] INFO ✓ gemini-flash → gemini-3-flash (mid) +12:03:25 [faigate] INFO ✓ gemini-flash-lite → gemini-3-flash-lite (cheap) +12:03:25 [faigate] INFO ✓ gemini-pro-high → gemini-3.1-pro (mid) +12:03:25 [faigate] INFO ✓ gemini-pro-low → gemini-3.1-pro (mid) +12:03:25 [faigate] INFO ✓ kilocode → z-ai/glm-5:free (fallback) +12:03:25 [faigate] INFO ✓ openai-gpt4o → gpt-4o (mid) +12:03:25 [faigate] INFO ✓ openai-images → gpt-image-1 (specialty) +12:03:25 [faigate] INFO ✓ openrouter-fallback → openrouter/auto (fallback) +ERROR: Traceback (most recent call last): + File "/opt/homebrew/lib/python3.14/site-packages/starlette/routing.py", line 694, in lifespan + async with self.lifespan_context(app) as maybe_state: + ~~~~~~~~~~~~~~~~~~~~~^^^^^ + File "/opt/homebrew/Cellar/python@3.14/3.14.3_1/Frameworks/Python.framework/Versions/3.14/lib/python3.14/contextlib.py", line 214, in __aenter__ + return await anext(self.gen) + ^^^^^^^^^^^^^^^^^^^^^ + File "/Users/andrelange/Documents/repositories/github/faigate/faigate/main.py", line 2252, in lifespan + _metrics.init() + ~~~~~~~~~~~~~^^ + File "/Users/andrelange/Documents/repositories/github/faigate/faigate/metrics.py", line 121, in init + self._conn = sqlite3.connect(self._db_path, check_same_thread=False) + ~~~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +sqlite3.OperationalError: unable to open database file + +ERROR: Application startup failed. Exiting. diff --git a/.codenomad/background_processes/mnh8wrrb/proc_2026-04-02T1004_637a07/output.txt b/.codenomad/background_processes/mnh8wrrb/proc_2026-04-02T1004_637a07/output.txt new file mode 100644 index 0000000..e100ca3 --- /dev/null +++ b/.codenomad/background_processes/mnh8wrrb/proc_2026-04-02T1004_637a07/output.txt @@ -0,0 +1,93 @@ +INFO: Started server process [72277] +INFO: Waiting for application startup. +12:04:53 [faigate] INFO Loaded config with 14 providers +12:04:53 [faigate] INFO ✓ anthropic-haiku → claude-haiku-3-5 (default) +12:04:53 [faigate] INFO ✓ anthropic-sonnet → claude-sonnet-4-6 (mid) +12:04:53 [faigate] INFO ✓ anthropic-claude → claude-opus-4-6 (mid) +12:04:53 [faigate] INFO ✓ blackbox-free → blackboxai/x-ai/grok-code-fast-1 (fallback) +12:04:53 [faigate] INFO ✓ deepseek-chat → deepseek-chat (default) +12:04:53 [faigate] INFO ✓ deepseek-reasoner → deepseek-reasoner (reasoning) +12:04:53 [faigate] INFO ✓ gemini-flash → gemini-3-flash (mid) +12:04:53 [faigate] INFO ✓ gemini-flash-lite → gemini-3-flash-lite (cheap) +12:04:53 [faigate] INFO ✓ gemini-pro-high → gemini-3.1-pro (mid) +12:04:53 [faigate] INFO ✓ gemini-pro-low → gemini-3.1-pro (mid) +12:04:53 [faigate] INFO ✓ kilocode → z-ai/glm-5:free (fallback) +12:04:53 [faigate] INFO ✓ openai-gpt4o → gpt-4o (mid) +12:04:53 [faigate] INFO ✓ openai-images → gpt-image-1 (specialty) +12:04:53 [faigate] INFO ✓ openrouter-fallback → openrouter/auto (fallback) +12:04:53 [faigate.metrics] INFO Metrics DB ready: /Users/andrelange/.local/share/faigate/faigate.db +12:04:54 [httpx] INFO HTTP Request: GET https://docs.blackbox.ai/llms.txt "HTTP/1.1 200 OK" +12:04:54 [httpx] INFO HTTP Request: GET https://docs.blackbox.ai/api-reference/models/chat-pricing "HTTP/1.1 200 OK" +12:04:54 [httpx] INFO HTTP Request: GET https://kilo.ai/docs/gateway/models-and-providers "HTTP/1.1 200 OK" +12:04:55 [httpx] INFO HTTP Request: GET https://kilo.ai/docs/gateway/usage-and-billing "HTTP/1.1 200 OK" +12:04:55 [httpx] INFO HTTP Request: GET https://platform.openai.com/docs/models "HTTP/1.1 403 Forbidden" +12:04:55 [httpx] INFO HTTP Request: GET https://api.blackbox.ai/v1/models "HTTP/1.1 401 Unauthorized" +12:04:56 [httpx] INFO HTTP Request: GET https://api.blackbox.ai/models "HTTP/1.1 401 Unauthorized" +12:04:56 [httpx] INFO HTTP Request: GET https://api.openai.com/v1/models "HTTP/1.1 401 Unauthorized" +12:04:56 [httpx] INFO HTTP Request: GET https://api.openai.com/v1/models "HTTP/1.1 401 Unauthorized" +12:04:56 [httpx] INFO HTTP Request: GET https://api.openai.com/v1/models "HTTP/1.1 401 Unauthorized" +12:04:57 [httpx] INFO HTTP Request: GET https://api.openai.com/v1/models "HTTP/1.1 401 Unauthorized" +12:04:57 [faigate] INFO Provider source refresh completed: 4/5 source endpoints succeeded (startup) +12:04:57 [faigate] INFO fusionAIze Gate ready on 127.0.0.1:8092 +INFO: Application startup complete. +INFO: Uvicorn running on http://127.0.0.1:8092 (Press CTRL+C to quit) +INFO: 127.0.0.1:61351 - "POST /api/route HTTP/1.1" 200 OK +INFO: 127.0.0.1:61356 - "GET /api/traces?limit=1 HTTP/1.1" 200 OK +INFO: 127.0.0.1:61361 - "POST /api/route HTTP/1.1" 200 OK +12:06:00 [faigate] INFO Route: gemini-flash-lite [heuristic/simple-query] 1.2ms +12:06:00 [httpx] INFO HTTP Request: POST https://generativelanguage.googleapis.com/v1beta/models/gemini-3-flash-lite:generateContent?key=${GEMINI_API_KEY} "HTTP/1.1 400 Bad Request" +12:06:00 [faigate] WARNING Provider gemini-flash-lite failed: { + "error": { + "code": 400, + "message": "API key not valid. Please pass a valid API key.", + "status": "INVALID_ARGUMENT", + "details": [ + { + "@type": "type.googleapis.com/googl, trying next... +12:06:00 [httpx] INFO HTTP Request: POST https://api.deepseek.com/v1/chat/completions "HTTP/1.1 401 Unauthorized" +12:06:00 [faigate] WARNING Provider deepseek-chat failed: {"error":{"message":"Authentication Fails, Your api key: ****KEY} is invalid","type":"authentication_error","param":null,"code":"invalid_request_error"}}, trying next... +12:06:00 [httpx] INFO HTTP Request: POST https://api.anthropic.com/v1/chat/completions "HTTP/1.1 401 Unauthorized" +12:06:00 [faigate] WARNING Provider anthropic-haiku failed: {"error":{"code":"authentication_error","message":"Invalid Anthropic API Key","type":"invalid_request_error","param":null}}, trying next... +12:06:01 [httpx] INFO HTTP Request: POST https://generativelanguage.googleapis.com/v1beta/models/gemini-3-flash:generateContent?key=${GEMINI_API_KEY} "HTTP/1.1 400 Bad Request" +12:06:01 [faigate] WARNING Provider gemini-flash failed: { + "error": { + "code": 400, + "message": "API key not valid. Please pass a valid API key.", + "status": "INVALID_ARGUMENT", + "details": [ + { + "@type": "type.googleapis.com/googl, trying next... +12:06:01 [httpx] INFO HTTP Request: POST https://api.deepseek.com/v1/chat/completions "HTTP/1.1 401 Unauthorized" +12:06:01 [faigate] WARNING Provider deepseek-reasoner failed: {"error":{"message":"Authentication Fails, Your api key: ****KEY} is invalid","type":"authentication_error","param":null,"code":"invalid_request_error"}}, trying next... +12:06:01 [httpx] INFO HTTP Request: POST https://api.anthropic.com/v1/chat/completions "HTTP/1.1 401 Unauthorized" +12:06:01 [faigate] WARNING Provider anthropic-sonnet failed: {"error":{"code":"authentication_error","message":"Invalid Anthropic API Key","type":"invalid_request_error","param":null}}, trying next... +12:06:01 [httpx] INFO HTTP Request: POST https://generativelanguage.googleapis.com/v1beta/models/gemini-3.1-pro:generateContent?key=${GEMINI_API_KEY} "HTTP/1.1 400 Bad Request" +12:06:01 [faigate] WARNING Provider gemini-pro-high failed: { + "error": { + "code": 400, + "message": "API key not valid. Please pass a valid API key.", + "status": "INVALID_ARGUMENT", + "details": [ + { + "@type": "type.googleapis.com/googl, trying next... +12:06:01 [httpx] INFO HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 401 Unauthorized" +12:06:01 [faigate] WARNING Provider openai-gpt4o failed: { + "error": { + "message": "Incorrect API key provided: ${OPENAI*****KEY}. You can find your API key at https://platform.openai.com/account/api-keys.", + "type": "invalid_request_error", trying next... +12:06:02 [httpx] INFO HTTP Request: POST https://openrouter.ai/api/v1/chat/completions "HTTP/1.1 401 Unauthorized" +12:06:02 [faigate] WARNING Provider openrouter-fallback failed: {"error":{"message":"Missing Authentication header","code":401}}, trying next... +12:06:02 [httpx] INFO HTTP Request: POST https://api.anthropic.com/v1/chat/completions "HTTP/1.1 401 Unauthorized" +12:06:02 [faigate] WARNING Provider anthropic-claude failed: {"error":{"code":"authentication_error","message":"Invalid Anthropic API Key","type":"invalid_request_error","param":null}}, trying next... +12:06:02 [httpx] INFO HTTP Request: POST https://api.kilo.ai/api/gateway/chat/completions "HTTP/1.1 404 Not Found" +12:06:02 [faigate] WARNING Provider kilocode failed: {"error":"The free period of this model ended. Please use kilo-auto/balanced for affordable inference or kilo-auto/free for limited free inference.","message":"The free period of this model ended. Ple, trying next... +12:06:02 [httpx] INFO HTTP Request: POST https://api.blackbox.ai/chat/completions "HTTP/1.1 401 Unauthorized" +12:06:02 [faigate] WARNING Provider blackbox-free failed: {"error":{"message":"Authentication Error, LiteLLM Virtual Key expected. Received=${BLACKBOX_API_KEY}, expected to start with 'sk-'.","type":"auth_error","param":"None","code":"401"}}, trying next... +INFO: 127.0.0.1:61367 - "POST /v1/chat/completions HTTP/1.1" 401 Unauthorized +INFO: 127.0.0.1:61388 - "POST /api/route HTTP/1.1" 200 OK +INFO: 127.0.0.1:61401 - "GET /api/traces?limit=1 HTTP/1.1" 200 OK +INFO: Shutting down +INFO: Waiting for application shutdown. +12:08:21 [faigate] INFO fusionAIze Gate shut down +INFO: Application shutdown complete. +INFO: Finished server process [72277] diff --git a/.codenomad/background_processes/mnh8wrrb/proc_2026-04-02T1008_40ed04/output.txt b/.codenomad/background_processes/mnh8wrrb/proc_2026-04-02T1008_40ed04/output.txt new file mode 100644 index 0000000..e47f615 --- /dev/null +++ b/.codenomad/background_processes/mnh8wrrb/proc_2026-04-02T1008_40ed04/output.txt @@ -0,0 +1,36 @@ +INFO: Started server process [80392] +INFO: Waiting for application startup. +12:08:31 [faigate] INFO Loaded config with 14 providers +12:08:31 [faigate] INFO ✓ anthropic-haiku → claude-haiku-3-5 (default) +12:08:31 [faigate] INFO ✓ anthropic-sonnet → claude-sonnet-4-6 (mid) +12:08:31 [faigate] INFO ✓ anthropic-claude → claude-opus-4-6 (mid) +12:08:31 [faigate] INFO ✓ blackbox-free → blackboxai/x-ai/grok-code-fast-1 (fallback) +12:08:31 [faigate] INFO ✓ deepseek-chat → deepseek-chat (default) +12:08:31 [faigate] INFO ✓ deepseek-reasoner → deepseek-reasoner (reasoning) +12:08:31 [faigate] INFO ✓ gemini-flash → gemini-3-flash (mid) +12:08:31 [faigate] INFO ✓ gemini-flash-lite → gemini-3-flash-lite (cheap) +12:08:31 [faigate] INFO ✓ gemini-pro-high → gemini-3.1-pro (mid) +12:08:31 [faigate] INFO ✓ gemini-pro-low → gemini-3.1-pro (mid) +12:08:31 [faigate] INFO ✓ kilocode → z-ai/glm-5:free (fallback) +12:08:31 [faigate] INFO ✓ openai-gpt4o → gpt-4o (mid) +12:08:31 [faigate] INFO ✓ openai-images → gpt-image-1 (specialty) +12:08:31 [faigate] INFO ✓ openrouter-fallback → openrouter/auto (fallback) +12:08:31 [faigate.metrics] INFO Metrics DB ready: ./faigate.db +12:08:31 [httpx] INFO HTTP Request: GET https://docs.blackbox.ai/llms.txt "HTTP/1.1 200 OK" +12:08:31 [httpx] INFO HTTP Request: GET https://docs.blackbox.ai/api-reference/models/chat-pricing "HTTP/1.1 200 OK" +12:08:31 [httpx] INFO HTTP Request: GET https://kilo.ai/docs/gateway/models-and-providers "HTTP/1.1 200 OK" +12:08:32 [httpx] INFO HTTP Request: GET https://kilo.ai/docs/gateway/usage-and-billing "HTTP/1.1 200 OK" +12:08:32 [httpx] INFO HTTP Request: GET https://platform.openai.com/docs/models "HTTP/1.1 403 Forbidden" +12:08:32 [httpx] INFO HTTP Request: GET https://api.blackbox.ai/v1/models "HTTP/1.1 401 Unauthorized" +12:08:33 [httpx] INFO HTTP Request: GET https://api.blackbox.ai/models "HTTP/1.1 401 Unauthorized" +12:08:33 [httpx] INFO HTTP Request: GET https://api.openai.com/v1/models "HTTP/1.1 401 Unauthorized" +12:08:33 [httpx] INFO HTTP Request: GET https://api.openai.com/v1/models "HTTP/1.1 401 Unauthorized" +12:08:33 [httpx] INFO HTTP Request: GET https://api.openai.com/v1/models "HTTP/1.1 401 Unauthorized" +12:08:34 [httpx] INFO HTTP Request: GET https://api.openai.com/v1/models "HTTP/1.1 401 Unauthorized" +12:08:34 [faigate] INFO Provider source refresh completed: 4/5 source endpoints succeeded (startup) +12:08:34 [faigate] INFO fusionAIze Gate ready on 127.0.0.1:8092 +INFO: Application startup complete. +INFO: Uvicorn running on http://127.0.0.1:8092 (Press CTRL+C to quit) +INFO: 127.0.0.1:61521 - "GET /api/traces?limit=1 HTTP/1.1" 200 OK +INFO: 127.0.0.1:61533 - "POST /api/route HTTP/1.1" 200 OK +INFO: 127.0.0.1:61539 - "POST /api/route HTTP/1.1" 200 OK diff --git a/.codenomad/background_processes/mnh9cz26/proc_2026-04-02T0935_cdbfc0/output.txt b/.codenomad/background_processes/mnh9cz26/proc_2026-04-02T0935_cdbfc0/output.txt new file mode 100644 index 0000000..20b0502 --- /dev/null +++ b/.codenomad/background_processes/mnh9cz26/proc_2026-04-02T0935_cdbfc0/output.txt @@ -0,0 +1,24 @@ +INFO: Started server process [17852] +INFO: Waiting for application startup. +11:35:44 [faigate] INFO Loaded config with 14 providers +11:35:44 [faigate] INFO ✓ anthropic-haiku → claude-haiku-3-5 (default) +11:35:44 [faigate] INFO ✓ anthropic-sonnet → claude-sonnet-4-6 (mid) +11:35:44 [faigate] INFO ✓ anthropic-claude → claude-opus-4-6 (mid) +11:35:44 [faigate] INFO ✓ blackbox-free → blackboxai/x-ai/grok-code-fast-1 (fallback) +11:35:44 [faigate] INFO ✓ deepseek-chat → deepseek-chat (default) +11:35:44 [faigate] INFO ✓ deepseek-reasoner → deepseek-reasoner (reasoning) +11:35:44 [faigate] INFO ✓ gemini-flash → gemini-3-flash (mid) +11:35:44 [faigate] INFO ✓ gemini-flash-lite → gemini-3-flash-lite (cheap) +11:35:44 [faigate] INFO ✓ gemini-pro-high → gemini-3.1-pro (mid) +11:35:44 [faigate] INFO ✓ gemini-pro-low → gemini-3.1-pro (mid) +11:35:44 [faigate] INFO ✓ kilocode → z-ai/glm-5:free (fallback) +11:35:44 [faigate] INFO ✓ openai-gpt4o → gpt-4o (mid) +11:35:44 [faigate] INFO ✓ openai-images → gpt-image-1 (specialty) +11:35:44 [faigate] INFO ✓ openrouter-fallback → openrouter/auto (fallback) +11:35:44 [faigate] WARNING Provider source catalog startup refresh skipped: [Errno 13] Permission denied: '/private/var/lib/faigate' +11:35:44 [faigate] INFO fusionAIze Gate ready on 127.0.0.1:8092 +INFO: Application startup complete. +ERROR: [Errno 48] error while attempting to bind on address ('127.0.0.1', 8092): address already in use +INFO: Waiting for application shutdown. +11:35:44 [faigate] INFO fusionAIze Gate shut down +INFO: Application shutdown complete. diff --git a/.codenomad/background_processes/mnh9cz26/proc_2026-04-02T0936_751dcd/output.txt b/.codenomad/background_processes/mnh9cz26/proc_2026-04-02T0936_751dcd/output.txt new file mode 100644 index 0000000..5a23d62 --- /dev/null +++ b/.codenomad/background_processes/mnh9cz26/proc_2026-04-02T0936_751dcd/output.txt @@ -0,0 +1,85 @@ +INFO: Started server process [20330] +INFO: Waiting for application startup. +11:36:28 [faigate] INFO Loaded config with 14 providers +11:36:28 [faigate] INFO ✓ anthropic-haiku → claude-haiku-3-5 (default) +11:36:28 [faigate] INFO ✓ anthropic-sonnet → claude-sonnet-4-6 (mid) +11:36:28 [faigate] INFO ✓ anthropic-claude → claude-opus-4-6 (mid) +11:36:28 [faigate] INFO ✓ blackbox-free → blackboxai/x-ai/grok-code-fast-1 (fallback) +11:36:28 [faigate] INFO ✓ deepseek-chat → deepseek-chat (default) +11:36:28 [faigate] INFO ✓ deepseek-reasoner → deepseek-reasoner (reasoning) +11:36:28 [faigate] INFO ✓ gemini-flash → gemini-3-flash (mid) +11:36:28 [faigate] INFO ✓ gemini-flash-lite → gemini-3-flash-lite (cheap) +11:36:28 [faigate] INFO ✓ gemini-pro-high → gemini-3.1-pro (mid) +11:36:28 [faigate] INFO ✓ gemini-pro-low → gemini-3.1-pro (mid) +11:36:28 [faigate] INFO ✓ kilocode → z-ai/glm-5:free (fallback) +11:36:28 [faigate] INFO ✓ openai-gpt4o → gpt-4o (mid) +11:36:28 [faigate] INFO ✓ openai-images → gpt-image-1 (specialty) +11:36:28 [faigate] INFO ✓ openrouter-fallback → openrouter/auto (fallback) +11:36:28 [faigate] WARNING Provider source catalog startup refresh skipped: [Errno 13] Permission denied: '/private/var/lib/faigate' +11:36:28 [faigate] INFO fusionAIze Gate ready on 127.0.0.1:8092 +INFO: Application startup complete. +INFO: Uvicorn running on http://127.0.0.1:8092 (Press CTRL+C to quit) +INFO: 127.0.0.1:59746 - "GET /api/analytics/provider-mix HTTP/1.1" 200 OK +INFO: 127.0.0.1:59747 - "GET /api/analytics/provider-mix HTTP/1.1" 200 OK +INFO: 127.0.0.1:59748 - "GET /api/provider-catalog HTTP/1.1" 200 OK +INFO: 127.0.0.1:59765 - "GET /api/provider-catalog HTTP/1.1" 200 OK +INFO: 127.0.0.1:59770 - "GET /api/traces?limit=3 HTTP/1.1" 200 OK +INFO: 127.0.0.1:59779 - "GET /api/traces HTTP/1.1" 200 OK +INFO: 127.0.0.1:59788 - "POST /api/route HTTP/1.1" 200 OK +INFO: 127.0.0.1:59799 - "POST /api/route HTTP/1.1" 200 OK +INFO: 127.0.0.1:59807 - "POST /api/route HTTP/1.1" 200 OK +11:38:12 [faigate] INFO Route: gemini-flash-lite [heuristic/simple-query] 1.2ms +11:38:12 [httpx] INFO HTTP Request: POST https://generativelanguage.googleapis.com/v1beta/models/gemini-3-flash-lite:generateContent?key=${GEMINI_API_KEY} "HTTP/1.1 400 Bad Request" +11:38:12 [faigate] WARNING Provider gemini-flash-lite failed: { + "error": { + "code": 400, + "message": "API key not valid. Please pass a valid API key.", + "status": "INVALID_ARGUMENT", + "details": [ + { + "@type": "type.googleapis.com/googl, trying next... +11:38:13 [httpx] INFO HTTP Request: POST https://api.deepseek.com/v1/chat/completions "HTTP/1.1 401 Unauthorized" +11:38:13 [faigate] WARNING Provider deepseek-chat failed: {"error":{"message":"Authentication Fails, Your api key: ****KEY} is invalid","type":"authentication_error","param":null,"code":"invalid_request_error"}}, trying next... +11:38:13 [httpx] INFO HTTP Request: POST https://api.anthropic.com/v1/chat/completions "HTTP/1.1 401 Unauthorized" +11:38:13 [faigate] WARNING Provider anthropic-haiku failed: {"error":{"code":"authentication_error","message":"Invalid Anthropic API Key","type":"invalid_request_error","param":null}}, trying next... +11:38:13 [httpx] INFO HTTP Request: POST https://generativelanguage.googleapis.com/v1beta/models/gemini-3-flash:generateContent?key=${GEMINI_API_KEY} "HTTP/1.1 400 Bad Request" +11:38:13 [faigate] WARNING Provider gemini-flash failed: { + "error": { + "code": 400, + "message": "API key not valid. Please pass a valid API key.", + "status": "INVALID_ARGUMENT", + "details": [ + { + "@type": "type.googleapis.com/googl, trying next... +11:38:13 [httpx] INFO HTTP Request: POST https://api.deepseek.com/v1/chat/completions "HTTP/1.1 401 Unauthorized" +11:38:13 [faigate] WARNING Provider deepseek-reasoner failed: {"error":{"message":"Authentication Fails, Your api key: ****KEY} is invalid","type":"authentication_error","param":null,"code":"invalid_request_error"}}, trying next... +11:38:13 [httpx] INFO HTTP Request: POST https://api.anthropic.com/v1/chat/completions "HTTP/1.1 401 Unauthorized" +11:38:13 [faigate] WARNING Provider anthropic-sonnet failed: {"error":{"code":"authentication_error","message":"Invalid Anthropic API Key","type":"invalid_request_error","param":null}}, trying next... +11:38:14 [httpx] INFO HTTP Request: POST https://generativelanguage.googleapis.com/v1beta/models/gemini-3.1-pro:generateContent?key=${GEMINI_API_KEY} "HTTP/1.1 400 Bad Request" +11:38:14 [faigate] WARNING Provider gemini-pro-high failed: { + "error": { + "code": 400, + "message": "API key not valid. Please pass a valid API key.", + "status": "INVALID_ARGUMENT", + "details": [ + { + "@type": "type.googleapis.com/googl, trying next... +11:38:14 [httpx] INFO HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 401 Unauthorized" +11:38:14 [faigate] WARNING Provider openai-gpt4o failed: { + "error": { + "message": "Incorrect API key provided: ${OPENAI*****KEY}. You can find your API key at https://platform.openai.com/account/api-keys.", + "type": "invalid_request_error", trying next... +11:38:14 [httpx] INFO HTTP Request: POST https://openrouter.ai/api/v1/chat/completions "HTTP/1.1 401 Unauthorized" +11:38:14 [faigate] WARNING Provider openrouter-fallback failed: {"error":{"message":"Missing Authentication header","code":401}}, trying next... +11:38:14 [httpx] INFO HTTP Request: POST https://api.anthropic.com/v1/chat/completions "HTTP/1.1 401 Unauthorized" +11:38:14 [faigate] WARNING Provider anthropic-claude failed: {"error":{"code":"authentication_error","message":"Invalid Anthropic API Key","type":"invalid_request_error","param":null}}, trying next... +11:38:14 [httpx] INFO HTTP Request: POST https://api.kilo.ai/api/gateway/chat/completions "HTTP/1.1 404 Not Found" +11:38:14 [faigate] WARNING Provider kilocode failed: {"error":"The free period of this model ended. Please use kilo-auto/balanced for affordable inference or kilo-auto/free for limited free inference.","message":"The free period of this model ended. Ple, trying next... +11:38:15 [httpx] INFO HTTP Request: POST https://api.blackbox.ai/chat/completions "HTTP/1.1 401 Unauthorized" +11:38:15 [faigate] WARNING Provider blackbox-free failed: {"error":{"message":"Authentication Error, LiteLLM Virtual Key expected. Received=${BLACKBOX_API_KEY}, expected to start with 'sk-'.","type":"auth_error","param":"None","code":"401"}}, trying next... +INFO: 127.0.0.1:59825 - "POST /v1/chat/completions HTTP/1.1" 401 Unauthorized +INFO: Shutting down +INFO: Waiting for application shutdown. +12:03:19 [faigate] INFO fusionAIze Gate shut down +INFO: Application shutdown complete. +INFO: Finished server process [20330] diff --git a/.codenomad/background_processes/mnh9cz26/proc_2026-04-02T1005_2884b5/output.txt b/.codenomad/background_processes/mnh9cz26/proc_2026-04-02T1005_2884b5/output.txt new file mode 100644 index 0000000..0ef0c31 --- /dev/null +++ b/.codenomad/background_processes/mnh9cz26/proc_2026-04-02T1005_2884b5/output.txt @@ -0,0 +1,36 @@ +INFO: Started server process [72940] +INFO: Waiting for application startup. +12:05:14 [faigate] INFO Loaded config with 14 providers +12:05:14 [faigate] INFO ✓ anthropic-haiku → claude-haiku-3-5 (default) +12:05:14 [faigate] INFO ✓ anthropic-sonnet → claude-sonnet-4-6 (mid) +12:05:14 [faigate] INFO ✓ anthropic-claude → claude-opus-4-6 (mid) +12:05:14 [faigate] INFO ✓ blackbox-free → blackboxai/x-ai/grok-code-fast-1 (fallback) +12:05:14 [faigate] INFO ✓ deepseek-chat → deepseek-chat (default) +12:05:14 [faigate] INFO ✓ deepseek-reasoner → deepseek-reasoner (reasoning) +12:05:14 [faigate] INFO ✓ gemini-flash → gemini-3-flash (mid) +12:05:14 [faigate] INFO ✓ gemini-flash-lite → gemini-3-flash-lite (cheap) +12:05:14 [faigate] INFO ✓ gemini-pro-high → gemini-3.1-pro (mid) +12:05:14 [faigate] INFO ✓ gemini-pro-low → gemini-3.1-pro (mid) +12:05:14 [faigate] INFO ✓ kilocode → z-ai/glm-5:free (fallback) +12:05:14 [faigate] INFO ✓ openai-gpt4o → gpt-4o (mid) +12:05:14 [faigate] INFO ✓ openai-images → gpt-image-1 (specialty) +12:05:14 [faigate] INFO ✓ openrouter-fallback → openrouter/auto (fallback) +12:05:14 [faigate.metrics] INFO Metrics DB ready: ./faigate.db +12:05:14 [httpx] INFO HTTP Request: GET https://docs.blackbox.ai/llms.txt "HTTP/1.1 200 OK" +12:05:14 [httpx] INFO HTTP Request: GET https://docs.blackbox.ai/api-reference/models/chat-pricing "HTTP/1.1 200 OK" +12:05:14 [httpx] INFO HTTP Request: GET https://kilo.ai/docs/gateway/models-and-providers "HTTP/1.1 200 OK" +12:05:14 [httpx] INFO HTTP Request: GET https://kilo.ai/docs/gateway/usage-and-billing "HTTP/1.1 200 OK" +12:05:15 [httpx] INFO HTTP Request: GET https://platform.openai.com/docs/models "HTTP/1.1 403 Forbidden" +12:05:15 [httpx] INFO HTTP Request: GET https://api.blackbox.ai/v1/models "HTTP/1.1 401 Unauthorized" +12:05:16 [httpx] INFO HTTP Request: GET https://api.blackbox.ai/models "HTTP/1.1 401 Unauthorized" +12:05:16 [httpx] INFO HTTP Request: GET https://api.openai.com/v1/models "HTTP/1.1 401 Unauthorized" +12:05:16 [httpx] INFO HTTP Request: GET https://api.openai.com/v1/models "HTTP/1.1 401 Unauthorized" +12:05:16 [httpx] INFO HTTP Request: GET https://api.openai.com/v1/models "HTTP/1.1 401 Unauthorized" +12:05:17 [httpx] INFO HTTP Request: GET https://api.openai.com/v1/models "HTTP/1.1 401 Unauthorized" +12:05:17 [faigate] INFO Provider source refresh completed: 4/5 source endpoints succeeded (startup) +12:05:17 [faigate] INFO fusionAIze Gate ready on 127.0.0.1:8092 +INFO: Application startup complete. +ERROR: [Errno 48] error while attempting to bind on address ('127.0.0.1', 8092): address already in use +INFO: Waiting for application shutdown. +12:05:17 [faigate] INFO fusionAIze Gate shut down +INFO: Application shutdown complete. diff --git a/.codenomad/background_processes/mnhbc25b/index.json b/.codenomad/background_processes/mnhbc25b/index.json new file mode 100644 index 0000000..0637a08 --- /dev/null +++ b/.codenomad/background_processes/mnhbc25b/index.json @@ -0,0 +1 @@ +[] \ No newline at end of file diff --git a/CHANGELOG.md b/CHANGELOG.md index cbd9700..2e631de 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,54 @@ # fusionAIze Gate Changelog +## v2.0.1 - 2026-04-04 + +### Added + +- **OAuth wrapper for managed providers**: token store, generic OAuth backend, device-code flows for Google, Qwen, and Antigravity; `claude_code_oauth()` reads token from local claude CLI settings +- **Antigravity provider**: full registry, catalog, and lane-registry integration for `ag/` model family (Claude Opus/Sonnet 4.6, Gemini 3.x variants via Google Antigravity gateway) +- **Local worker GPU metrics**: probe GPU/VRAM usage from Ollama (`/api/ps`) and vLLM (`/metrics`); `GpuInfo` surfaced in discovery output and provider config +- **Dynamic model enumeration**: `dynamic_models` field on `DiscoveredWorker`; discovered models preferred over static defaults in `generate_provider_config` +- **Grid worker discovery**: reads `~/.faigrid/config.json` (JSON format) with fallback to legacy key=value state file +- **Per-client budget limits**: `cost_limit_usd_day` and `cost_limit_usd_month` fields in client profile config; HTTP 429 returned before routing when threshold is reached +- **Anomaly detection**: `MetricsStore.get_anomalies()` compares recent window to rolling baseline for error rate, latency, cost, and traffic spikes +- **Alerts API**: `GET /api/alerts` with configurable `lookback_hours` and `baseline_hours` parameters + +### Changed + +- `google-vertex` renamed to `google-gemini-cli` in registry and catalog (alias preserved for backward compatibility) + +## v2.0.0 - 2026-04-03 + +### Added + +- **Shell parity and intelligence**: CLI commands now integrate deeply with dashboard + - `--suggest` argument analyzes metrics to recommend relevant CLI commands + - `--link` generates dashboard deep‑link URLs with filters preserved + - All CLI commands (`overview`, `recent`, `daily`, `trends`) show dashboard links + - Filter arguments (`--provider`, `--modality`, `--client‑profile`, etc.) work across commands + - Dashboard links include matching filters for seamless CLI→dashboard navigation +- **Safe config workflows**: New `faigate-config` CLI for config management + - `preview`: Preview config changes before applying + - `diff`: Show detailed config differences + - `apply`: Apply config changes with backup and confirmation + - `validate`: Validate config syntax and structure +- **Clipboard integration**: `--copy` flag copies dashboard URLs to clipboard (macOS/Linux/Windows) +- **Scope suggestions**: CLI suggests relevant commands based on metrics analysis (failure rates, provider concentration, costs, recent activity) +- **Local worker auto‑discovery**: `faigate-config discover` automatically detects local AI workers (Ollama, vLLM, LM Studio, LiteLLM) and suggests configuration snippets +- **Complete provider coverage**: All LLM AI Router custom endpoints now represented in the provider catalog + - Added missing providers: xAI, Z.AI, Mistral, Groq, HuggingFace, MoonshotAI, MiniMax, Volcano Engine, BytePlus, Qwen, OpenAI Codex, OpenCode Zen, Cerebras, GitHub Copilot, Synthetic, Kimi Coding, Vercel AI Gateway + - Generic provider support (OpenAI, Anthropic, Google) with config examples + - KiloCode model‑level access: individual catalog entries for `kilo‑auto/frontier`, `kilo‑auto/balanced`, `kilo‑auto/free` + - Consistent `recommended_model` values across all providers +- **Local worker examples**: Commented configuration templates for Ollama, vLLM, LM Studio, LiteLLM in `config.yaml` +- **Enhanced provider catalog**: 41 curated provider entries (up from 17) with official source URLs, signup links, and volatility ratings + +### Changed + +- CLI help text updated with new arguments and examples +- Dashboard deep links use proper URL encoding and parameter validation +- Existing CLI commands remain fully backward compatible + ## v1.21.0 - 2026-04-02 ### Added diff --git a/README.md b/README.md index fc42fb1..673d2b4 100644 --- a/README.md +++ b/README.md @@ -38,6 +38,7 @@ Runs locally on Linux, macOS, and Windows, with first-class workstation guidance - Strong operator visibility: `/health`, provider inventory, route previews, traces, stats, update checks, and dashboard views are built in, including per-client usage highlights. - Practical rollout controls: fallback chains, maintenance windows, rollout rings, provider scopes, and post-update verification gates are already there. - Copy/paste onboarding: OpenClaw, n8n, CLI, delegated-agent traffic, provider templates, and env starter files ship with the repo. +- Shell parity & intelligence: CLI deep‑links, suggestions, and safe config workflows keep dashboard and shell views synchronized (v2.0.0+). - Curated provider-catalog checks catch stale model choices, volatile free-tier picks, and source-confidence gaps before local configs quietly age out. - Provider discovery can stay transparent: catalog entries can expose official or operator-configured signup links, while recommendation ranking stays performance-led and link-neutral. - The onboarding report and doctor CLI can surface those links with disclosure, so operators can share a signup path without turning discovery into biased ranking. diff --git a/config.yaml b/config.yaml index 0b866c8..972f09a 100644 --- a/config.yaml +++ b/config.yaml @@ -871,6 +871,292 @@ providers: timeout: connect_s: 10 read_s: 90 + + # ── Local runtimes (uncomment and configure) ────────────────────────────── + # ollama: + # api_key: "" + # backend: openai-compat + # base_url: http://127.0.0.1:11434/v1 + # auth_optional: true + # capabilities: + # cost_tier: local + # latency_tier: local + # max_tokens: 8192 + # model: ollama/llama3.3 + # tier: local + # timeout: + # connect_s: 10 + # read_s: 120 + + # vllm: + # api_key: "" + # backend: openai-compat + # base_url: http://127.0.0.1:8000/v1 + # auth_optional: true + # capabilities: + # cost_tier: local + # latency_tier: local + # max_tokens: 8192 + # model: vllm/your-model-id + # tier: local + # timeout: + # connect_s: 10 + # read_s: 120 + + # lmstudio: + # api_key: "" + # backend: openai-compat + # base_url: http://localhost:1234/v1 + # auth_optional: true + # capabilities: + # cost_tier: local + # latency_tier: local + # max_tokens: 8192 + # model: lmstudio/minimax-m2.1-gs32 + # tier: local + # timeout: + # connect_s: 10 + # read_s: 120 + + # litellm: + # api_key: "" + # backend: openai-compat + # base_url: http://localhost:4000/v1 + # auth_optional: true + # capabilities: + # cost_tier: local + # latency_tier: local + # max_tokens: 8192 + # model: litellm/your-model-id + # tier: local + # timeout: + # connect_s: 10 + # read_s: 120 + + # ── Generic providers (use any model) ───────────────────────────────────── + # openai: + # api_key: ${OPENAI_API_KEY} + # backend: openai-compat + # base_url: ${OPENAI_BASE_URL:-https://api.openai.com/v1} + # capabilities: + # cost_tier: standard + # latency_tier: balanced + # max_tokens: 8192 + # model: gpt-4o # any OpenAI model ID + # tier: default + # timeout: + # connect_s: 10 + # read_s: 60 + + # anthropic: + # api_key: ${ANTHROPIC_API_KEY} + # backend: anthropic-compat + # base_url: ${ANTHROPIC_BASE_URL:-https://api.anthropic.com/v1} + # capabilities: + # cost_tier: standard + # latency_tier: balanced + # max_tokens: 16000 + # model: claude-sonnet-4-6 # any Anthropic model ID + # tier: default + # timeout: + # connect_s: 10 + # read_s: 90 + + # google: + # api_key: ${GEMINI_API_KEY} + # backend: google-genai + # base_url: ${GEMINI_BASE_URL:-https://generativelanguage.googleapis.com/v1beta} + # capabilities: + # cost_tier: standard + # latency_tier: balanced + # max_tokens: 65536 + # model: gemini-3.1-pro # any Gemini model ID + # tier: default + # timeout: + # connect_s: 10 + # read_s: 90 + + # ── OAuth‑managed providers ──────────────────────────────────────────────── + # + # qwen-portal (Qwen OAuth free tier via qwen-code CLI) + # Prerequisites: npm install -g @qwen-code/cli && qwen auth login + # Token is read from ~/.qwen/oauth_creds.json; base_url from resource_url field. + # Set QWEN_PORTAL_TOKEN to the access_token value, or let faigate-auth resolve it. + # + # qwen-portal: + # backend: openai-compat + # base_url: "https://portal.qwen.ai/compatible-mode/v1" # or use $QWEN_PORTAL_BASE_URL + # api_key_env: QWEN_PORTAL_TOKEN + # model: coder-model + # tier: default + # timeout: + # connect_s: 10 + # read_s: 90 + + # claude‑code: + # backend: oauth + # oauth: + # helper: "faigate‑auth claude‑code" + # client_id: "anthropic‑codex‑client" + # token_endpoint: "https://api.anthropic.com/oauth/token" + # refresh_endpoint: "https://api.anthropic.com/oauth/refresh" + # scope: "openid email" + # underlying_backend: anthropic‑compat + # base_url: ${ANTHROPIC_BASE_URL:-https://api.anthropic.com/v1} + # model: claude‑code + # tier: default + # timeout: + # connect_s: 10 + # read_s: 90 + + # openai‑codex: + # backend: oauth + # oauth: + # helper: "faigate‑auth openai‑codex" + # client_id: "openai‑codex‑client" + # token_endpoint: "https://api.openai.com/oauth/token" + # refresh_endpoint: "https://api.openai.com/oauth/refresh" + # scope: "openid email" + # underlying_backend: openai‑compat + # base_url: ${OPENAI_BASE_URL:-https://api.openai.com/v1} + # model: openai‑codex/gpt‑5.3‑codex + # tier: default + # timeout: + # connect_s: 10 + # read_s: 60 + + # Antigravity (Google OAuth – Authorization Code + PKCE → Google Generative Language API) + # Network discovery: Antigravity's client interface is a LOCAL ephemeral gRPC language + # server (127.0.0.1:/exa.language_server_pb.LanguageServerService/…) that proxies + # to Google internally. faigate uses the Google token to call the upstream API directly. + # + # Prerequisites: + # 1. Sign in to the Antigravity IDE -OR- run: faigate-auth google-antigravity + # Token is stored at ~/.gemini/oauth_creds.json + # 2. Set ANTIGRAVITY_TOKEN to the access_token from that file + # (Optional: override ANTIGRAVITY_BASE_URL to use a different Google endpoint) + # + # OAuth details: + # client_id: 1071006060591-tmhssin2h21lcre235vtolojh4g403ep.apps.googleusercontent.com + # scopes: cloud-platform, userinfo.email, userinfo.profile, cclog, experimentsandconfigs + # token file: ~/.gemini/oauth_creds.json (shared with Antigravity IDE) + # + antigravity: + backend: openai-compat + base_url: https://generativelanguage.googleapis.com/v1beta/openai + base_url_env: ANTIGRAVITY_BASE_URL # optional override + api_key_env: ANTIGRAVITY_TOKEN # access_token from ~/.gemini/oauth_creds.json + model: gemini-2.5-pro + tier: default + timeout: + connect_s: 10 + read_s: 90 + + # --------------------------------------------------------------------------- + # Additional providers (LLMAIRouter parity) – uncomment to activate + # --------------------------------------------------------------------------- + + # DeepSeek – NOTE: deepseek-chat and deepseek-reasoner are already configured + # above as active providers. This entry is a convenience alias using the + # registry key; the active configs above take precedence. + # deepseek: + # api_key: ${DEEPSEEK_API_KEY} + # backend: openai-compat + # base_url: ${DEEPSEEK_BASE_URL:-https://api.deepseek.com/v1} + # model: deepseek-reasoner + # tier: reasoning + # timeout: + # connect_s: 10 + # read_s: 120 + + # Together AI – serverless inference, Llama / Mixtral / DeepSeek / Qwen + # together: + # api_key: ${TOGETHER_API_KEY} + # backend: openai-compat + # base_url: ${TOGETHER_BASE_URL:-https://api.together.xyz/v1} + # model: meta-llama/Llama-3.3-70B-Instruct-Turbo + # tier: cheap + # timeout: + # connect_s: 10 + # read_s: 60 + + # Fireworks AI – fast serverless inference, DeepSeek / Llama / Qwen + # fireworks: + # api_key: ${FIREWORKS_API_KEY} + # backend: openai-compat + # base_url: ${FIREWORKS_BASE_URL:-https://api.fireworks.ai/inference/v1} + # model: accounts/fireworks/models/deepseek-r1 + # tier: cheap + # timeout: + # connect_s: 10 + # read_s: 90 + + # Cohere – Command A/R series, OpenAI-compat compatibility endpoint + # cohere: + # api_key: ${COHERE_API_KEY} + # backend: openai-compat + # base_url: ${COHERE_BASE_URL:-https://api.cohere.com/compatibility/v1} + # model: command-a-03-2025 + # tier: default + # timeout: + # connect_s: 10 + # read_s: 60 + + # Nebius AI Studio – DeepSeek / Llama / Qwen on European infra + # nebius: + # api_key: ${NEBIUS_API_KEY} + # backend: openai-compat + # base_url: ${NEBIUS_BASE_URL:-https://api.studio.nebius.ai/v1} + # model: deepseek-ai/DeepSeek-R1 + # tier: cheap + # timeout: + # connect_s: 10 + # read_s: 90 + + # SiliconFlow – low-cost inference (CN), DeepSeek / Qwen / GLM + # siliconflow: + # api_key: ${SILICONFLOW_API_KEY} + # backend: openai-compat + # base_url: ${SILICONFLOW_BASE_URL:-https://api.siliconflow.cn/v1} + # model: deepseek-ai/DeepSeek-R1 + # tier: cheap + # timeout: + # connect_s: 10 + # read_s: 90 + + # Hyperbolic – GPU cloud inference, DeepSeek / Llama / Qwen + # hyperbolic: + # api_key: ${HYPERBOLIC_API_KEY} + # backend: openai-compat + # base_url: ${HYPERBOLIC_BASE_URL:-https://api.hyperbolic.xyz/v1} + # model: deepseek-ai/DeepSeek-R1 + # tier: cheap + # timeout: + # connect_s: 10 + # read_s: 90 + + # Perplexity – online/search-augmented models + # perplexity: + # api_key: ${PERPLEXITY_API_KEY} + # backend: openai-compat + # base_url: ${PERPLEXITY_BASE_URL:-https://api.perplexity.ai} + # model: sonar-pro + # tier: default + # timeout: + # connect_s: 10 + # read_s: 60 + + # NVIDIA NIM – optimized inference on NVIDIA infra + # nvidia-nim: + # api_key: ${NVIDIA_API_KEY} + # backend: openai-compat + # base_url: ${NVIDIA_NIM_BASE_URL:-https://integrate.api.nvidia.com/v1} + # model: deepseek-ai/deepseek-r1 + # tier: default + # timeout: + # connect_s: 10 + # read_s: 90 + client_profiles: enabled: true default: generic diff --git a/docs/CONFIGURATION.md b/docs/CONFIGURATION.md index c7af4c0..460a45f 100644 --- a/docs/CONFIGURATION.md +++ b/docs/CONFIGURATION.md @@ -366,6 +366,87 @@ For the end-to-end flow and local smoke example, see [Anthropic Bridge](./anthro Use the onboarding docs and starter examples when introducing a new client instead of hand-authoring these sections from scratch. +## CLI Intelligence & Config Management (v2.0.0+) + +fusionAIze Gate v2.0.0 introduces deeper shell parity between the CLI and dashboard, plus safe config workflows and local worker auto‑discovery. + +### Dashboard Deep‑Links & Suggestions + +The `faigate-stats` CLI now integrates with the dashboard through filter‑preserving deep‑links: + +```bash +# Generate dashboard URL with current filters +faigate-stats --link --view routes --provider deepseek-chat +# Copy URL to clipboard (macOS/Linux/Windows) +faigate-stats --link --view routes --provider deepseek-chat --copy +# Get CLI command suggestions based on metrics analysis +faigate-stats --suggest +``` + +All CLI commands (`overview`, `recent`, `daily`, `trends`) now show matching dashboard links. Filter arguments work across commands: + +- `--provider` – filter by provider id (e.g., `deepseek-chat`, `gemini-flash`) +- `--modality` – filter by modality (`chat`, `image`, `code`) +- `--client-profile` – filter by client profile (`opencode`, `n8n`, `openclaw`) +- `--client-tag` – filter by client tag +- `--layer` – filter by routing layer (`policy`, `profile`, `static`, `heuristic`) +- `--success` – filter by success (`true`/`false`) + +### Safe Config Workflows + +The new `faigate-config` CLI provides safe config management with preview, diff, and backup: + +```bash +# Preview config changes before applying +faigate-config preview --provider xai --provider zai +# Show detailed diff between config versions +faigate-config diff config.yaml config.new.yaml +# Apply changes with backup and confirmation +faigate-config apply config.new.yaml --backup --confirm +# Validate config syntax and structure +faigate-config validate config.yaml +``` + +### Local Worker Auto‑Discovery + +Automatically detect local AI workers and generate configuration snippets: + +```bash +# Scan for local workers (Ollama, vLLM, LM Studio, LiteLLM) +faigate-config discover +# JSON output for automation +faigate-config discover --json +# Skip port scanning, only check Grid integration +faigate-config discover --no-scan +``` + +The discovery command checks: +- **Ollama** (localhost:11434) +- **vLLM** (localhost:8000) +- **LM Studio** (localhost:1234) +- **LiteLLM** proxy (localhost:4000) +- **Grid** integration (if available) + +For each detected worker, it suggests a ready‑to‑copy provider block for `config.yaml`. + +### Complete Provider Coverage + +The provider catalog now includes **43 curated entries** covering all LLM AI Router custom endpoints: + +```bash +# View available providers +faigate-stats --link --view catalog +# Check provider metadata and recommended models +faigate-stats --provider xai --link +``` + +New providers include: +- **xAI / Grok**, **Z.AI / GLM**, **Mistral**, **Groq**, **HuggingFace Inference** +- **Moonshot AI / Kimi**, **MiniMax**, **Volcano Engine / Doubao**, **BytePlus** +- **Qwen**, **OpenAI Codex**, **OpenCode Zen**, **Cerebras**, **GitHub Copilot** +- **Synthetic**, **Kimi Coding**, **Vercel AI Gateway** +- **KiloCode model‑level lanes**: `kilo‑auto/frontier`, `/balanced`, `/free` + ## Config Wizard For a first local config, let fusionAIze Gate suggest one from the API keys already present in your env file: diff --git a/docs/FAIGATE-ROADMAP.md b/docs/FAIGATE-ROADMAP.md index e663e89..1698a93 100644 --- a/docs/FAIGATE-ROADMAP.md +++ b/docs/FAIGATE-ROADMAP.md @@ -2,7 +2,7 @@ ## Status -`v1.21.0` is shipped. +`v2.0.0` is shipped. Gate is no longer just a routing core with helper scripts around it. The current product baseline is now clear: @@ -14,7 +14,7 @@ current product baseline is now clear: - an operator shell made up of dashboard, doctor, catalog, probe, and guided setup - package renewal alerts and cost projection wizard -### Recent Achievements (v1.15.0 - v1.21.0) +### Recent Achievements (v1.15.0 - v2.0.0) - **Anthropic bridge production-ready**: SSE streaming adapter, tool result continuity, Claude Code model ID mapping - **Dashboard enhancements**: Package renewal alerts, cost trends CLI, uPlot charts integration - **Operator tools**: Branch management guidelines, model shortcut alias conflict detection @@ -22,6 +22,7 @@ current product baseline is now clear: - **Claude Desktop parity finalization**: Desktop endpoint override flows, bridge hardening, workflow validation (v1.19.x) - **External metadata integration**: Git-based metadata sync, model/provider/price mapping, cost truth visualization (v1.20.x) - **Route explainability & operator trust**: Lane family decision factors, selection path categorization, route decision drilldowns (v1.21.x) +- **Shell parity & complete provider coverage**: CLI deep‑links, config workflows, local worker discovery, all LLM AI Router custom endpoints, KiloCode model‑level lanes (v2.0.0) The roadmap should now stay disciplined. The next release lines should finalize Claude Desktop parity, then deepen operator trust through metadata truth and @@ -269,25 +270,34 @@ explainability so operators understand and trust routing decisions. ## v2.0.0 Planning -**Target: Major release with shell parity, local worker support, and enhanced client profiles** +**Target: Major release with shell parity, local worker support, complete provider coverage, and enhanced client profiles** ### Core Themes -1. **Shell parity and intelligence** - - Shell-backed scope suggestions matching dashboard - - Deep links between dashboard panels and CLI views - - Safe config preview/diff/apply workflows - -2. **Local worker support** - - First-class local model worker integration - - Worker health monitoring and auto-recovery - - Cost-aware routing between local and cloud providers - -3. **Enhanced client profiles** +1. **Shell parity and intelligence** ✓ _(implemented)_ + - Shell-backed scope suggestions matching dashboard ✓ + - Deep links between dashboard panels and CLI views ✓ + - Safe config preview/diff/apply workflows ✓ + - Config workflow suggestions and deep‑link generation ✓ + +2. **Local worker support** ✓ _(implemented)_ + - First‑class local model worker integration ✓ (cost‑tier mapping, auto‑discovery CLI) + - Worker health monitoring and auto‑recovery ✓ (basic health probes) + - Cost‑aware routing between local and cloud providers ✓ (local cost tier scoring) + - Example configurations for Ollama, vLLM, LM Studio, LiteLLM ✓ + +3. **Complete provider coverage** ✓ _(implemented)_ + - All LLM AI Router custom endpoints represented in provider catalog ✓ + - Generic provider support (OpenAI, Anthropic, Google) with config examples ✓ + - Full provider families (Mistral, Groq, xAI, HuggingFace, Cerebras, etc.) ✓ + - KiloCode model‑level access with individual catalog entries ✓ + - Consistent `recommended_model` values across all providers ✓ + +4. **Enhanced client profiles** ⚠️ _(deferred to v2.1.0)_ - Advanced client policy management - - Per-client routing rules and cost controls - - Client-specific observability and reporting + - Per‑client routing rules and cost controls + - Client‑specific observability and reporting -4. **Observability improvements** +5. **Observability improvements** ⚠️ _(deferred to v2.1.0)_ - Advanced metrics and alerting - Performance tracing across request chains - Automated anomaly detection @@ -295,9 +305,50 @@ explainability so operators understand and trust routing decisions. ### Considerations - v2.0.0 may include breaking changes for cleaner APIs and configuration - Migration paths will be documented for existing deployments -- Focus remains on gateway-first architecture and operator trust +- Focus remains on gateway‑first architecture and operator trust +- **Provider coverage now matches LLM AI Router’s custom endpoints**; each KiloCode model can be accessed individually via API key +- **Local worker examples** added to config.yaml; generic providers available as commented templates + + *Detailed planning and issue creation pending review of current priorities and community feedback.* -*Detailed planning and issue creation pending review of current priorities and community feedback.* +## v2.1.0 Planning + +**Target: Managed provider OAuth wrapper, enhanced local worker integration, and advanced client profiles** + +### Core Themes +1. **Managed provider OAuth wrapper** ✓ _(implemented)_ + - OAuth‑based authentication for managed providers (Gemini, Antigravity, etc.) ✓ + - Interactive device‑code login flows (Google, Qwen, Antigravity) ✓ + - Token store and generic OAuth backend ✓ + - Antigravity provider in registry, catalog, and lane registry (ag/ model family) ✓ + - claude_code_oauth() reading token from local claude CLI settings ✓ + +2. **Local worker completion** ✓ _(implemented)_ + - Grid integration: reads `~/.faigrid/config.json` + legacy state file ✓ + - GPU/VRAM metrics via Ollama `/api/ps` and vLLM `/metrics` ✓ + - Dynamic model enumeration from `/v1/models` endpoints ✓ + - `dynamic_models` field in DiscoveredWorker; surfaced in generate_provider_config ✓ + - _(Lifecycle management hooks deferred — requires Grid daemon integration)_ + +3. **Enhanced client profiles** ✓ _(implemented)_ + - `cost_limit_usd_day` and `cost_limit_usd_month` per profile ✓ + - Config validation with type checking ✓ + - HTTP 429 enforcement before routing when budget is reached ✓ + - Provider allow/deny lists already live in policy layer ✓ + - _(Advanced policy management UI deferred)_ + +4. **Observability suite** ✓ _(implemented)_ + - `MetricsStore.get_anomalies()`: error rate, latency, cost, and traffic spike detection ✓ + - `GET /api/alerts` endpoint with configurable lookback and baseline windows ✓ + - GPU utilization surfaced from local worker probes ✓ + - _(External alerting integrations and Prometheus export deferred)_ + +### Considerations +- Maintain backward compatibility with v2.0.0 configurations +- Focus on operator trust through enhanced visibility +- Keep gateway‑first architecture principle +- OAuth wrapper should be optional; API‑key providers remain the default +- Interactive login flows must be clearly separated from automated routing core ## Anti-Goals diff --git a/docs/ONBOARDING.md b/docs/ONBOARDING.md index 7c47dc0..37a216a 100644 --- a/docs/ONBOARDING.md +++ b/docs/ONBOARDING.md @@ -337,6 +337,80 @@ Recommended rollout: 3. introduce policies only for real constraints 4. keep route debugging enabled through traces and stats +## CLI Intelligence & Dashboard Integration (v2.0.0+) + +fusionAIze Gate v2.0.0 brings deeper shell parity between the CLI and dashboard. Key enhancements: + +### Dashboard Deep‑Links + +All `faigate-stats` commands now generate matching dashboard URLs: + +```bash +# Generate URL for current view/filters +faigate-stats --link --view routes --provider deepseek-chat +# Copy to clipboard +faigate-stats --link --view routes --provider deepseek-chat --copy +``` + +Filters work across all CLI commands: +- `--provider` – filter by provider id +- `--modality` – filter by modality (`chat`, `image`, `code`) +- `--client-profile` – filter by client profile (`opencode`, `n8n`, `openclaw`) +- `--layer` – filter by routing layer (`policy`, `profile`, `static`, `heuristic`) + +### Intelligent Suggestions + +The CLI can analyze metrics and suggest relevant commands: + +```bash +# Get command suggestions based on failure rates, provider concentration, costs, recent activity +faigate-stats --suggest +``` + +### Safe Config Management + +New `faigate-config` CLI provides safe config workflows: + +```bash +# Preview config changes +faigate-config preview --provider xai --provider zai +# Show detailed diff +faigate-config diff config.yaml config.new.yaml +# Apply with backup and confirmation +faigate-config apply config.new.yaml --backup --confirm +# Validate syntax +faigate-config validate config.yaml +``` + +### Local Worker Auto‑Discovery + +Automatically detect local AI workers: + +```bash +# Scan for Ollama, vLLM, LM Studio, LiteLLM +faigate-config discover +# JSON output for automation +faigate-config discover --json +``` + +For each detected worker, the command suggests a ready‑to‑copy provider block for `config.yaml`. + +### Complete Provider Coverage + +The provider catalog now includes **43 curated entries** covering all LLM AI Router custom endpoints: + +- **xAI / Grok**, **Z.AI / GLM**, **Mistral**, **Groq**, **HuggingFace Inference** +- **Moonshot AI / Kimi**, **MiniMax**, **Volcano Engine / Doubao**, **BytePlus** +- **Qwen**, **OpenAI Codex**, **OpenCode Zen**, **Cerebras**, **GitHub Copilot** +- **Synthetic**, **Kimi Coding**, **Vercel AI Gateway** +- **KiloCode model‑level lanes**: `kilo‑auto/frontier`, `/balanced`, `/free` + +View the full catalog: + +```bash +faigate-stats --link --view catalog +``` + ## Update operations Current state: diff --git a/faigate/__init__.py b/faigate/__init__.py index 0d15a6a..5d1bdd3 100644 --- a/faigate/__init__.py +++ b/faigate/__init__.py @@ -1,3 +1,3 @@ """fusionAIze Gate package.""" -__version__ = "1.21.0" +__version__ = "2.0.1" diff --git a/faigate/cli.py b/faigate/cli.py index c014338..7661959 100644 --- a/faigate/cli.py +++ b/faigate/cli.py @@ -14,9 +14,11 @@ import argparse import json +import os import sys import time from pathlib import Path +from urllib.parse import urlencode from .config import _safe_db_path, load_config from .metrics import MetricsStore @@ -24,6 +26,155 @@ from .cost import estimate_provider_cost +# ── Dashboard URL generation ────────────────────────────────── + +DEFAULT_DASHBOARD_URL = os.environ.get("FAIGATE_DASHBOARD_URL", "http://localhost:8000/dashboard") + +VALID_VIEWS = { + "overview": "Overview", + "providers": "Providers", + "clients": "Clients", + "routes": "Routes", + "analytics": "Analytics", + "catalog": "Catalog", + "integrations": "Integrations", +} + + +def generate_dashboard_url( + view: str = "overview", + provider: str = "", + modality: str = "", + client_profile: str = "", + client_tag: str = "", + layer: str = "", + success: str = "", + saved_view: str = "", +) -> str: + """Generate a dashboard deep-link URL with the given filters.""" + params = {} + if provider: + params["provider"] = provider + if modality: + params["modality"] = modality + if client_profile: + params["client_profile"] = client_profile + if client_tag: + params["client_tag"] = client_tag + if layer: + params["layer"] = layer + if success: + params["success"] = success + if saved_view: + params["saved_view"] = saved_view + if view and view != "overview": + params["view"] = view + + url = DEFAULT_DASHBOARD_URL + if params: + url += "?" + urlencode(params) + return url + + +def cmd_dashboard_link( + view: str = "overview", + provider: str = "", + modality: str = "", + client_profile: str = "", + client_tag: str = "", + layer: str = "", + success: str = "", + saved_view: str = "", + copy: bool = False, +): + """Generate and display a dashboard deep-link URL.""" + # Validate view + if view not in VALID_VIEWS: + print(_c(f"Error: Invalid view '{view}'. Valid views are:", RED)) + for v, desc in VALID_VIEWS.items(): + print(f" {v:12} - {desc}") + return + + url = generate_dashboard_url( + view=view, + provider=provider, + modality=modality, + client_profile=client_profile, + client_tag=client_tag, + layer=layer, + success=success, + saved_view=saved_view, + ) + + print() + print(_c(" ╔══════════════════════════════════════╗", BLUE)) + print(_c(" ║", BLUE) + _c(" Dashboard Deep Link", BOLD) + _c(" ║", BLUE)) + print(_c(" ╚══════════════════════════════════════╝", BLUE)) + print() + + # Show parameters + params_used = [] + if view != "overview": + params_used.append(f"view={view}") + if provider: + params_used.append(f"provider={provider}") + if modality: + params_used.append(f"modality={modality}") + if client_profile: + params_used.append(f"client_profile={client_profile}") + if client_tag: + params_used.append(f"client_tag={client_tag}") + if layer: + params_used.append(f"layer={layer}") + if success: + params_used.append(f"success={success}") + if saved_view: + params_used.append(f"saved_view={saved_view}") + + if params_used: + print(_c(" Parameters:", DIM)) + for param in params_used: + print(f" {param}") + print() + + print(_c(" URL:", DIM)) + print(f" {url}") + print() + + # Platform-specific copy instructions + if copy: + import platform + import subprocess + + try: + system = platform.system() + if system == "Darwin": # macOS + subprocess.run(["pbcopy"], input=url.encode(), check=False) + print(_c(" ✓ URL copied to clipboard (macOS pbcopy)", GREEN)) + elif system == "Linux": + # Try xclip first, then xsel + try: + subprocess.run(["xclip", "-selection", "clipboard"], input=url.encode(), check=False) + print(_c(" ✓ URL copied to clipboard (Linux xclip)", GREEN)) + except FileNotFoundError: + try: + subprocess.run(["xsel", "--clipboard", "--input"], input=url.encode(), check=False) + print(_c(" ✓ URL copied to clipboard (Linux xsel)", GREEN)) + except FileNotFoundError: + print(_c(" Note: Install xclip or xsel for clipboard support", YELLOW)) + elif system == "Windows": + subprocess.run(["clip"], input=url.encode(), check=False) + print(_c(" ✓ URL copied to clipboard (Windows clip)", GREEN)) + else: + print(_c(f" Note: Clipboard not supported on {system}", YELLOW)) + except Exception as e: + print(_c(f" Note: Could not copy to clipboard: {e}", YELLOW)) + + print(_c(" Open in browser:", DIM)) + print(f" {_c('open', BOLD)} '{url}'") + print() + + # ── Formatting helpers ───────────────────────────────────────── RESET = "\033[0m" @@ -105,11 +256,11 @@ def _table(headers: list[str], rows: list[list[str]], col_widths: list[int] | No # ── Commands ─────────────────────────────────────────────────── -def cmd_overview(metrics: MetricsStore): - totals = metrics.get_totals() - providers = metrics.get_provider_summary() - routing = metrics.get_routing_breakdown() - clients = metrics.get_client_breakdown() +def cmd_overview(metrics: MetricsStore, **filters): + totals = metrics.get_totals(**filters) + providers = metrics.get_provider_summary(**filters) + routing = metrics.get_routing_breakdown(**filters) + clients = metrics.get_client_breakdown(**filters) print() print(_c(" ╔══════════════════════════════════════╗", BLUE)) @@ -204,9 +355,25 @@ def cmd_overview(metrics: MetricsStore): _table(["Profile", "Client", "Provider", "Layer", "Reqs", "Cost"], rows) print() + # Dashboard link + print(_c(" ── Dashboard ───────────────────────────", DIM)) + url = generate_dashboard_url(**filters) + print(f" {_c('View in browser:', DIM)} {_c('open', BOLD)} '{url}'") + + # Build CLI command suggestion with filters + filter_args = [] + for key, value in filters.items(): + if key == "success": + filter_args.append(f"--success {str(value).lower()}") + else: + filter_args.append(f"--{key.replace('_', '-')} {value}") + filter_str = " ".join(filter_args) + print(f" {_c('Generate deep link:', DIM)} {_c(f'faigate-stats --link --view overview {filter_str}', DIM)}") + print() + -def cmd_recent(metrics: MetricsStore, limit: int): - recent = metrics.get_recent(limit) +def cmd_recent(metrics: MetricsStore, limit: int, **filters): + recent = metrics.get_recent(limit, **filters) if not recent: print(_c(" No requests recorded yet.", DIM)) return @@ -235,8 +402,24 @@ def cmd_recent(metrics: MetricsStore, limit: int): ) print() + # Dashboard link + print(_c(" ── Dashboard ───────────────────────────", DIM)) + url = generate_dashboard_url(**filters) + print(f" {_c('View in browser:', DIM)} {_c('open', BOLD)} '{url}'") + + # Build CLI command suggestion with filters + filter_args = [] + for key, value in filters.items(): + if key == "success": + filter_args.append(f"--success {str(value).lower()}") + else: + filter_args.append(f"--{key.replace('_', '-')} {value}") + filter_str = " ".join(filter_args) + print(f" {_c('See more recent:', DIM)} {_c(f'faigate-stats --link --view overview {filter_str}', DIM)}") + print() + -def cmd_daily(metrics: MetricsStore, days: int): +def cmd_daily(metrics: MetricsStore, days: int, **filters): daily = metrics.get_daily_totals(days) if not daily: print(_c(" No data for the selected period.", DIM)) @@ -271,6 +454,13 @@ def cmd_daily(metrics: MetricsStore, days: int): ) print() + # Dashboard link + print(_c(" ── Dashboard ───────────────────────────", DIM)) + url = generate_dashboard_url(view="analytics") + print(f" {_c('View analytics:', DIM)} {_c('open', BOLD)} '{url}'") + print(f" {_c('Generate deep link:', DIM)} {_c('faigate-stats --link --view analytics', DIM)}") + print() + def cmd_project( tokens_input: int, @@ -348,7 +538,7 @@ def cmd_project( print() -def cmd_trends(metrics: MetricsStore, days: int): +def cmd_trends(metrics: MetricsStore, days: int, **filters): """Show cost trends over time.""" daily = metrics.get_daily_totals(days) if not daily: @@ -389,6 +579,126 @@ def cmd_trends(metrics: MetricsStore, days: int): ) print() + # Dashboard link + print(_c(" ── Dashboard ───────────────────────────", DIM)) + url = generate_dashboard_url(view="analytics", **filters) + print(f" {_c('View analytics:', DIM)} {_c('open', BOLD)} '{url}'") + print(f" {_c('Generate deep link:', DIM)} {_c('faigate-stats --link --view analytics', DIM)}") + print() + + +def cmd_suggest(metrics: MetricsStore, **filters): + """Suggest relevant CLI commands based on metrics analysis.""" + totals = metrics.get_totals(**filters) + providers = metrics.get_provider_summary(**filters) + recent = metrics.get_recent(20, **filters) + + total_requests = totals.get("total_requests", 0) or 0 + total_failures = totals.get("total_failures", 0) or 0 + total_cost = totals.get("total_cost_usd", 0) or 0 + + print() + print(_c(" ╔══════════════════════════════════════╗", BLUE)) + print(_c(" ║", BLUE) + _c(" CLI Command Suggestions", BOLD) + _c(" ║", BLUE)) + print(_c(" ╚══════════════════════════════════════╝", BLUE)) + print() + + suggestions = [] + + # Analyze failures + failure_rate = (total_failures / total_requests * 100) if total_requests > 0 else 0 + if failure_rate > 10: # More than 10% failure rate + suggestions.append( + { + "priority": "high", + "description": f"High failure rate ({failure_rate:.1f}%)", + "command": "faigate-stats --recent 20 --success false", + "reason": "Investigate recent failed requests", + } + ) + + # Analyze provider distribution + if providers: + top_provider = max(providers, key=lambda p: p.get("requests", 0)) if providers else None + if top_provider: + provider_name = top_provider.get("provider", "") + provider_requests = top_provider.get("requests", 0) + provider_share = (provider_requests / total_requests * 100) if total_requests > 0 else 0 + + if provider_share > 50: # One provider handles >50% of traffic + suggestions.append( + { + "priority": "medium", + "description": f"Provider concentration: {provider_name} ({provider_share:.1f}% of traffic)", + "command": f"faigate-stats --provider {provider_name}", + "reason": "Focus on dominant provider", + } + ) + + # Analyze cost + if total_cost > 10: # More than $10 total cost + suggestions.append( + { + "priority": "medium", + "description": f"Significant cost detected (${total_cost:.2f})", + "command": "faigate-stats --daily --days 30", + "reason": "Review daily cost breakdown", + } + ) + + # Analyze recent activity + if recent: + recent_failures = sum(1 for r in recent if not r.get("success")) + if recent_failures > 0: + suggestions.append( + { + "priority": "medium", + "description": f"Recent failures ({recent_failures} in last 20 requests)", + "command": "faigate-stats --recent 20", + "reason": "Check recent request log", + } + ) + + # Always suggest dashboard link + suggestions.append( + { + "priority": "low", + "description": "Open dashboard for visual analysis", + "command": "faigate-stats --link", + "reason": "Interactive exploration", + } + ) + + # Sort by priority (high > medium > low) + priority_order = {"high": 0, "medium": 1, "low": 2} + suggestions.sort(key=lambda x: priority_order[x["priority"]]) + + if not suggestions: + print(_c(" No specific suggestions based on current metrics.", DIM)) + print(_c(" Try:", DIM)) + print(_c(" • faigate-stats --overview", DIM)) + print(_c(" • faigate-stats --link", DIM)) + print() + return + + for i, suggestion in enumerate(suggestions, 1): + priority_color = { + "high": RED, + "medium": YELLOW, + "low": GREEN, + }.get(suggestion["priority"], WHITE) + + print(f" {i}. {_c(suggestion['description'], priority_color)}") + print(f" {_c('Command:', DIM)} {_c(suggestion['command'], BOLD)}") + print(f" {_c('Reason:', DIM)} {suggestion['reason']}") + print() + + print(_c(" Tip: Use filters to focus analysis:", DIM)) + print(_c(" • --provider Filter by provider", DIM)) + print(_c(" • --success false Show only failures", DIM)) + print(_c(" • --days 7 Limit to last 7 days", DIM)) + print() + # ── Main ─────────────────────────────────────────────────────── @@ -416,8 +726,64 @@ def main(): parser.add_argument("--no-credits", action="store_true", help="Exclude package credits from projection") parser.add_argument("--trends", action="store_true", help="Show cost trends over time") parser.add_argument("--trend-days", type=int, default=30, help="Days for --trends (default: 30)") + parser.add_argument("--suggest", action="store_true", help="Suggest relevant CLI commands based on metrics") + + # Dashboard link arguments + parser.add_argument("--link", action="store_true", help="Generate dashboard deep-link URL") + parser.add_argument( + "--view", + type=str, + default="overview", + help="Dashboard view (overview, providers, clients, routes, analytics, catalog, integrations)", + ) + parser.add_argument("--provider", type=str, default="", help="Filter by provider") + parser.add_argument("--modality", type=str, default="", help="Filter by modality") + parser.add_argument("--client-profile", type=str, default="", help="Filter by client profile") + parser.add_argument("--client-tag", type=str, default="", help="Filter by client tag") + parser.add_argument("--layer", type=str, default="", help="Filter by layer") + parser.add_argument("--success", type=str, default="", help="Filter by success (true/false)") + parser.add_argument("--saved-view", type=str, default="", help="Use saved view ID") + parser.add_argument("--copy", action="store_true", help="Copy URL to clipboard") + args = parser.parse_args() + # Build filters dict from filter arguments (for metrics queries) + filters = {} + if args.provider: + filters["provider"] = args.provider + if args.modality: + filters["modality"] = args.modality + if args.client_profile: + filters["client_profile"] = args.client_profile + if args.client_tag: + filters["client_tag"] = args.client_tag + if args.layer: + filters["layer"] = args.layer + if args.success: + # Convert string "true"/"false" to boolean, otherwise pass as-is + lower = args.success.lower() + if lower == "true": + filters["success"] = True + elif lower == "false": + filters["success"] = False + else: + filters["success"] = args.success + + # Handle dashboard link mode + if args.link: + cmd_dashboard_link( + view=args.view, + provider=args.provider, + modality=args.modality, + client_profile=args.client_profile, + client_tag=args.client_tag, + layer=args.layer, + success=args.success, + saved_view=args.saved_view, + copy=args.copy, + ) + return + # Handle projection mode if args.project: if args.tokens_input <= 0 or args.tokens_output <= 0: @@ -450,25 +816,27 @@ def main(): if args.json: data = { - "totals": metrics.get_totals(), - "providers": metrics.get_provider_summary(), - "routing": metrics.get_routing_breakdown(), - "clients": metrics.get_client_breakdown(), + "totals": metrics.get_totals(**filters), + "providers": metrics.get_provider_summary(**filters), + "routing": metrics.get_routing_breakdown(**filters), + "clients": metrics.get_client_breakdown(**filters), "daily": metrics.get_daily_totals(args.days), - "recent": metrics.get_recent(args.recent or 20), + "recent": metrics.get_recent(args.recent or 20, **filters), } print(json.dumps(data, indent=2, default=str)) metrics.close() return if args.recent: - cmd_recent(metrics, args.recent) + cmd_recent(metrics, args.recent, **filters) elif args.daily: cmd_daily(metrics, args.days) elif args.trends: - cmd_trends(metrics, args.trend_days) + cmd_trends(metrics, args.trend_days, **filters) + elif args.suggest: + cmd_suggest(metrics, **filters) else: - cmd_overview(metrics) + cmd_overview(metrics, **filters) metrics.close() diff --git a/faigate/config.py b/faigate/config.py index 689f8e5..c0b4924 100644 --- a/faigate/config.py +++ b/faigate/config.py @@ -986,6 +986,18 @@ def _normalize_policy_select( else: normalized["routing_mode"] = routing_mode.strip() + if extra_keys and "cost_limit_usd_day" in extra_keys: + for limit_field in ("cost_limit_usd_day", "cost_limit_usd_month"): + raw_limit = normalized.get(limit_field) + if raw_limit is None: + normalized[limit_field] = None + elif isinstance(raw_limit, (int, float)) and raw_limit > 0: + normalized[limit_field] = float(raw_limit) + else: + raise ConfigError( + f"Policy '{name}' field '{limit_field}' must be a positive number (USD)" + ) + return normalized @@ -1115,7 +1127,7 @@ def _normalize_client_profiles(data: dict[str, Any]) -> dict[str, Any]: f"client profile '{preset_name}'", dict(preset["profile"]), data.get("providers", {}), - extra_keys={"routing_mode"}, + extra_keys={"routing_mode", "cost_limit_usd_day", "cost_limit_usd_month"}, ) for profile_name, hints in profiles.items(): @@ -1127,7 +1139,7 @@ def _normalize_client_profiles(data: dict[str, Any]) -> dict[str, Any]: f"client profile '{profile_name.strip()}'", hints, data.get("providers", {}), - extra_keys={"routing_mode"}, + extra_keys={"routing_mode", "cost_limit_usd_day", "cost_limit_usd_month"}, ) if default_profile not in normalized_profiles: diff --git a/faigate/config_cli.py b/faigate/config_cli.py new file mode 100644 index 0000000..233a9f7 --- /dev/null +++ b/faigate/config_cli.py @@ -0,0 +1,541 @@ +#!/usr/bin/env python3 +"""faigate-config – Safe config workflows for fusionAIze Gate. + +Usage: + python -m faigate.config_cli preview # Preview changes + python -m faigate.config_cli diff # Show detailed diff + python -m faigate.config_cli apply # Apply with confirmation + python -m faigate.config_cli validate # Validate config syntax +""" + +from __future__ import annotations + +import argparse +import difflib +import os +import sys +from pathlib import Path +from typing import Any + +import yaml + +from .config import ConfigError, load_config +from .wizard import build_config_change_summary + +# Reuse color formatting from cli.py +RESET = "\033[0m" +BOLD = "\033[1m" +DIM = "\033[2m" +GREEN = "\033[32m" +YELLOW = "\033[33m" +BLUE = "\033[34m" +MAGENTA = "\033[35m" +CYAN = "\033[36m" +RED = "\033[31m" +WHITE = "\033[37m" + + +def _c(text: str, color: str) -> str: + return f"{color}{text}{RESET}" + + +def _load_yaml(path: str | Path) -> dict[str, Any]: + """Load YAML file with error handling.""" + path = Path(path) + if not path.exists(): + print(f"{_c('Error:', RED)} Config file not found: {path}", file=sys.stderr) + sys.exit(1) + + try: + content = path.read_text(encoding="utf-8") + return yaml.safe_load(content) or {} + except yaml.YAMLError as e: + print(f"{_c('Error:', RED)} Invalid YAML in {path}: {e}", file=sys.stderr) + sys.exit(1) + except Exception as e: + print(f"{_c('Error:', RED)} Failed to read {path}: {e}", file=sys.stderr) + sys.exit(1) + + +def _get_current_config_path() -> Path: + """Get path to current config from environment or default.""" + config_path = os.environ.get("FAIGATE_CONFIG_FILE") + if config_path and Path(config_path).exists(): + return Path(config_path) + + # Try default locations + default_paths = [ + Path("config.yaml"), + Path("/etc/faigate/config.yaml"), + Path.home() / ".config" / "faigate" / "config.yaml", + ] + + for path in default_paths: + if path.exists(): + return path + + print( + f"{_c('Error:', RED)} No config file found. Set FAIGATE_CONFIG_FILE or place config.yaml in current directory.", + file=sys.stderr, + ) + sys.exit(1) + + +def cmd_preview(new_config_path: str, current_config_path: str | None = None): + """Preview changes between current and new config.""" + if not current_config_path: + current_config_path = str(_get_current_config_path()) + + # Load both configs to validate syntax + _ = _load_yaml(current_config_path) # Validate current config + new_config = _load_yaml(new_config_path) + + print() + print(_c(" ╔══════════════════════════════════════╗", BLUE)) + print(_c(" ║", BLUE) + _c(" Config Change Preview", BOLD) + _c(" ║", BLUE)) + print(_c(" ╚══════════════════════════════════════╝", BLUE)) + print() + + print(_c(f" Current config: {current_config_path}", DIM)) + print(_c(f" New config: {new_config_path}", DIM)) + print() + + try: + summary = build_config_change_summary( + config_path=current_config_path, + updated_config=new_config, + ) + + # Display summary + added_providers = summary.get("added_providers", []) + replaced_models = summary.get("replaced_models", []) + changed_profile_modes = summary.get("changed_profile_modes", []) + fallback_additions = summary.get("fallback_additions", []) + + if not any([added_providers, replaced_models, changed_profile_modes, fallback_additions]): + print(_c(" No significant changes detected.", GREEN)) + print(_c(" (Configs are identical or changes are outside tracked sections)", DIM)) + print() + return + + if added_providers: + print(_c(" ➕ Added providers:", GREEN)) + for provider in added_providers: + print(f" • {_c(provider, BOLD)}") + print() + + if replaced_models: + print(_c(" 🔄 Changed provider models:", YELLOW)) + for change in replaced_models: + provider = change["provider"] + from_model = change["from_model"] + to_model = change["to_model"] + print(f" • {_c(provider, BOLD)}: {from_model} → {to_model}") + print() + + if changed_profile_modes: + print(_c(" 📋 Changed profile routing modes:", CYAN)) + for change in changed_profile_modes: + profile = change["profile"] + from_mode = change["from_mode"] + to_mode = change["to_mode"] + print(f" • {_c(profile, BOLD)}: {from_mode} → {to_mode}") + print() + + if fallback_additions: + print(_c(" ⛓️ Added to fallback chain:", MAGENTA)) + for provider in fallback_additions: + print(f" • {_c(provider, BOLD)}") + print() + + print(_c(" Next steps:", DIM)) + print(f" {_c('View diff:', DIM)} {_c(f'faigate-config diff {new_config_path}', BOLD)}") + print(f" {_c('Apply changes:', DIM)} {_c(f'faigate-config apply {new_config_path}', BOLD)}") + print() + + except Exception as e: + print(f"{_c('Error:', RED)} Failed to analyze config changes: {e}", file=sys.stderr) + sys.exit(1) + + +def cmd_diff(new_config_path: str, current_config_path: str | None = None): + """Show detailed diff between current and new config.""" + if not current_config_path: + current_config_path = str(_get_current_config_path()) + + try: + current_content = Path(current_config_path).read_text(encoding="utf-8").splitlines(keepends=True) + new_content = Path(new_config_path).read_text(encoding="utf-8").splitlines(keepends=True) + except Exception as e: + print(f"{_c('Error:', RED)} Failed to read config files: {e}", file=sys.stderr) + sys.exit(1) + + print() + print(_c(" ╔══════════════════════════════════════╗", BLUE)) + print(_c(" ║", BLUE) + _c(" Config Diff", BOLD) + _c(" ║", BLUE)) + print(_c(" ╚══════════════════════════════════════╝", BLUE)) + print() + + print(_c(f" --- {current_config_path}", RED)) + print(_c(f" +++ {new_config_path}", GREEN)) + print() + + diff = difflib.unified_diff( + current_content, + new_content, + fromfile=current_config_path, + tofile=new_config_path, + lineterm="", + ) + + diff_lines = list(diff) + if not diff_lines: + print(_c(" Configs are identical.", GREEN)) + print() + return + + for line in diff_lines: + if line.startswith("---"): + print(_c(line, RED)) + elif line.startswith("+++"): + print(_c(line, GREEN)) + elif line.startswith("@@"): + print(_c(line, CYAN)) + elif line.startswith("-"): + print(_c(line, RED)) + elif line.startswith("+"): + print(_c(line, GREEN)) + else: + print(line.rstrip()) + + print() + + +def cmd_apply(new_config_path: str, current_config_path: str | None = None, force: bool = False): + """Apply new config with safety checks.""" + if not current_config_path: + current_config_path = str(_get_current_config_path()) + + current_path = Path(current_config_path) + new_path = Path(new_config_path) + + if not new_path.exists(): + print(f"{_c('Error:', RED)} New config file not found: {new_config_path}", file=sys.stderr) + sys.exit(1) + + # Preview changes first + _ = _load_yaml(current_path) # Validate current config syntax + new_config = _load_yaml(new_path) + + try: + summary = build_config_change_summary( + config_path=current_config_path, + updated_config=new_config, + ) + except Exception as e: + print(f"{_c('Error:', RED)} Failed to analyze config changes: {e}", file=sys.stderr) + sys.exit(1) + + print() + print(_c(" ╔══════════════════════════════════════╗", BLUE)) + print(_c(" ║", BLUE) + _c(" Apply Config Changes", BOLD) + _c(" ║", BLUE)) + print(_c(" ╚══════════════════════════════════════╝", BLUE)) + print() + + print(_c(f" Current config: {current_config_path}", DIM)) + print(_c(f" New config: {new_config_path}", DIM)) + print() + + # Show summary + added_providers = summary.get("added_providers", []) + replaced_models = summary.get("replaced_models", []) + changed_profile_modes = summary.get("changed_profile_modes", []) + fallback_additions = summary.get("fallback_additions", []) + + has_changes = any([added_providers, replaced_models, changed_profile_modes, fallback_additions]) + + if not has_changes: + print(_c(" No significant changes detected.", GREEN)) + print(_c(" Configs are identical or changes are outside tracked sections.", DIM)) + print() + + if added_providers: + print(_c(" ➕ Will add providers:", GREEN)) + for provider in added_providers: + print(f" • {_c(provider, BOLD)}") + + if replaced_models: + print(_c(" 🔄 Will change provider models:", YELLOW)) + for change in replaced_models: + provider = change["provider"] + from_model = change["from_model"] + to_model = change["to_model"] + print(f" • {_c(provider, BOLD)}: {from_model} → {to_model}") + + if changed_profile_modes: + print(_c(" 📋 Will change profile routing modes:", CYAN)) + for change in changed_profile_modes: + profile = change["profile"] + from_mode = change["from_mode"] + to_mode = change["to_mode"] + print(f" • {_c(profile, BOLD)}: {from_mode} → {to_mode}") + + if fallback_additions: + print(_c(" ⛓️ Will add to fallback chain:", MAGENTA)) + for provider in fallback_additions: + print(f" • {_c(provider, BOLD)}") + + print() + + if not force: + print(_c(" ⚠️ Warning: Applying config changes will:", YELLOW)) + print(_c(" 1. Replace the current config file", DIM)) + print(_c(" 2. Require gateway restart to take effect", DIM)) + print() + + try: + response = input(_c(" Continue? (y/N): ", BOLD)).strip().lower() + if response not in ("y", "yes"): + print(_c(" Cancelled.", DIM)) + print() + return + except KeyboardInterrupt: + print() + print(_c(" Cancelled.", DIM)) + print() + return + + # Create backup + backup_path = current_path.with_suffix(current_path.suffix + ".bak") + try: + import shutil + + shutil.copy2(current_path, backup_path) + print(_c(f" ✓ Created backup: {backup_path}", GREEN)) + except Exception as e: + print(f"{_c('Warning:', YELLOW)} Failed to create backup: {e}") + + # Apply config + try: + new_content = new_path.read_text(encoding="utf-8") + current_path.write_text(new_content, encoding="utf-8") + print(_c(f" ✓ Config applied: {current_path}", GREEN)) + except Exception as e: + print(f"{_c('Error:', RED)} Failed to apply config: {e}", file=sys.stderr) + sys.exit(1) + + print() + print(_c(" Next steps:", DIM)) + print(_c(" 1. Restart the gateway:", BOLD)) + print(_c(" systemctl restart faigate # systemd", DIM)) + print(_c(" or kill -HUP $(pgrep -f 'faigate') # reload if supported", DIM)) + print() + print(_c(" 2. Verify config:", BOLD)) + print(_c(f" faigate-config validate {current_path}", DIM)) + print() + + +def cmd_validate(config_path: str): + """Validate config syntax and semantics.""" + path = Path(config_path) + if not path.exists(): + print(f"{_c('Error:', RED)} Config file not found: {config_path}", file=sys.stderr) + sys.exit(1) + + print() + print(_c(" ╔══════════════════════════════════════╗", BLUE)) + print(_c(" ║", BLUE) + _c(" Config Validation", BOLD) + _c(" ║", BLUE)) + print(_c(" ╚══════════════════════════════════════╝", BLUE)) + print() + + print(_c(f" Validating: {config_path}", DIM)) + print() + + # Try to load YAML first + try: + content = path.read_text(encoding="utf-8") + _ = yaml.safe_load(content) # Validate YAML syntax + print(_c(" ✓ YAML syntax is valid", GREEN)) + except yaml.YAMLError as e: + print(f"{_c(' ✗ YAML syntax error:', RED)} {e}") + sys.exit(1) + + # Try to load as Config object + try: + config = load_config(config_path) + print(_c(" ✓ Config structure is valid", GREEN)) + except ConfigError as e: + print(f"{_c(' ✗ Config validation error:', RED)} {e}") + sys.exit(1) + except Exception as e: + print(f"{_c(' ✗ Unexpected error:', RED)} {e}") + sys.exit(1) + + # Basic checks + issues = [] + + # Check for required sections + if not getattr(config, "providers", None): + issues.append("No providers defined") + + if not getattr(config, "routing_modes", None): + issues.append("No routing_modes defined") + + if not getattr(config, "client_profiles", None): + issues.append("No client_profiles defined") + + if issues: + print() + print(_c(" ⚠️ Config warnings:", YELLOW)) + for issue in issues: + print(f" • {issue}") + + print() + print(_c(" Validation passed successfully.", GREEN)) + print() + + +def cmd_discover(json_output: bool = False, no_scan: bool = False, no_grid: bool = False, timeout: float = 3.0): + """Discover local workers and display results.""" + import asyncio + + from .local_discovery import discover_local_workers, generate_provider_config + + print() + print(_c(" ╔══════════════════════════════════════╗", BLUE)) + print(_c(" ║", BLUE) + _c(" Local Worker Discovery", BOLD) + _c(" ║", BLUE)) + print(_c(" ╚══════════════════════════════════════╝", BLUE)) + print() + + workers = asyncio.run( + discover_local_workers(scan_ports=not no_scan, check_grid=not no_grid, timeout_per_worker=timeout) + ) + + if json_output: + import json + + result = [ + { + "name": w["name"], + "base_url": w["base_url"], + "healthy": w["healthy"], + "models": w["models"], + "config": generate_provider_config(w), + } + for w in workers + ] + print(json.dumps(result, indent=2)) + return + + if not workers: + print(_c(" No local workers discovered.", DIM)) + print() + return + + print(_c(f" Found {len(workers)} local worker(s):", GREEN)) + print() + + for i, worker in enumerate(workers, 1): + status = _c("✓", GREEN) if worker["healthy"] else _c("✗", RED) + name = _c(worker["name"], BOLD) + base_url = worker["base_url"] + models = worker["models"] + + print(f" {i}. {status} {name} – {base_url}") + + if worker["healthy"]: + if models: + print(f" {_c('Models:', DIM)} {', '.join(models[:3])}") + if len(models) > 3: + print(f" {_c(' ... and', DIM)} {len(models) - 3} more") + else: + print(f" {_c('Models:', DIM)} Not discoverable") + else: + print(f" {_c('Status:', DIM)} Health check failed") + print() + + # Show configuration suggestions + print(_c(" Configuration suggestions:", CYAN)) + print() + + for worker in workers: + if worker["healthy"]: + config = generate_provider_config(worker) + provider_name = worker["name"] + print(f" To add {_c(provider_name, BOLD)} to config.yaml:") + print(f" {provider_name}:") + print(" contract: local-worker") + print(f" backend: {config.get('backend', 'openai-compat')}") + print(f" base_url: {config['base_url']}") + if "model" in config: + print(f" model: {config['model']}") + print(" tier: local") + print(" capabilities:") + print(" local: true") + print(" cloud: false") + print(" network_zone: local") + print(" cost_tier: local") + print(" latency_tier: local") + print() + + print(_c(" Next steps:", DIM)) + print(f" {_c('Add a worker:', BOLD)} Edit config.yaml and add provider configuration") + print(f" {_c('Validate config:', BOLD)} faigate-config validate config.yaml") + print(f" {_c('Apply changes:', BOLD)} faigate-config apply config.yaml") + print() + + +def main(): + parser = argparse.ArgumentParser( + prog="faigate-config", + description="Safe config workflows for fusionAIze Gate", + ) + + subparsers = parser.add_subparsers(dest="command", required=True, help="Command to execute") + + # Preview command + preview_parser = subparsers.add_parser("preview", help="Preview config changes") + preview_parser.add_argument("new_config", help="Path to new config YAML file") + preview_parser.add_argument("--current-config", help="Path to current config (default: auto-detect)") + + # Diff command + diff_parser = subparsers.add_parser("diff", help="Show detailed config diff") + diff_parser.add_argument("new_config", help="Path to new config YAML file") + diff_parser.add_argument("--current-config", help="Path to current config (default: auto-detect)") + + # Apply command + apply_parser = subparsers.add_parser("apply", help="Apply config changes") + apply_parser.add_argument("new_config", help="Path to new config YAML file") + apply_parser.add_argument("--current-config", help="Path to current config (default: auto-detect)") + apply_parser.add_argument("--force", action="store_true", help="Skip confirmation prompt") + + # Validate command + validate_parser = subparsers.add_parser("validate", help="Validate config syntax") + validate_parser.add_argument("config", help="Path to config YAML file") + + # Discover command + discover_parser = subparsers.add_parser("discover", help="Discover local workers") + discover_parser.add_argument("--json", action="store_true", help="Output as JSON") + discover_parser.add_argument("--no-scan", action="store_true", help="Skip port scanning") + discover_parser.add_argument("--no-grid", action="store_true", help="Skip Grid integration check") + discover_parser.add_argument("--timeout", type=float, default=3.0, help="Timeout per worker in seconds") + + args = parser.parse_args() + + if args.command == "preview": + cmd_preview(args.new_config, args.current_config) + elif args.command == "diff": + cmd_diff(args.new_config, args.current_config) + elif args.command == "apply": + cmd_apply(args.new_config, args.current_config, args.force) + elif args.command == "validate": + cmd_validate(args.config) + elif args.command == "discover": + cmd_discover(json_output=args.json, no_scan=args.no_scan, no_grid=args.no_grid, timeout=args.timeout) + else: + parser.print_help() + sys.exit(1) + + +if __name__ == "__main__": + main() diff --git a/faigate/lane_registry.py b/faigate/lane_registry.py index 5376b3e..5c7faaa 100644 --- a/faigate/lane_registry.py +++ b/faigate/lane_registry.py @@ -225,6 +225,318 @@ def get_active_model_label(canonical_id: str) -> str: "preferred_degrades": ["aggregator/kilo-glm5-free", "google/gemini-flash-lite"], "last_reviewed": "2026-03-22", }, + "mistral/mistral-large-latest": { + "family": "mistral", + "name": "Mistral Large", + "cluster": "quality-workhorse", + "benchmark_cluster": "quality-coding", + "quality_tier": "high", + "reasoning_strength": "high", + "context_strength": "high", + "tool_strength": "medium", + "preferred_degrades": ["openai/gpt-4o", "google/gemini-pro-high"], + "last_reviewed": "2026-04-03", + }, + "groq/llama-3.3-70b-versatile": { + "family": "groq", + "name": "Llama 3.3 70B", + "cluster": "fast-workhorse", + "benchmark_cluster": "fast-general", + "quality_tier": "mid", + "reasoning_strength": "mid", + "context_strength": "mid", + "tool_strength": "medium", + "preferred_degrades": ["google/gemini-flash", "deepseek/chat"], + "last_reviewed": "2026-04-03", + }, + "xai/grok-3": { + "family": "xai", + "name": "Grok 3", + "cluster": "quality-workhorse", + "benchmark_cluster": "quality-coding", + "quality_tier": "high", + "reasoning_strength": "high", + "context_strength": "high", + "tool_strength": "medium", + "preferred_degrades": ["openai/gpt-4o", "google/gemini-pro-high"], + "last_reviewed": "2026-04-03", + }, + "zai/glm-4.7": { + "family": "zai", + "name": "GLM 4.7", + "cluster": "quality-workhorse", + "benchmark_cluster": "quality-coding", + "quality_tier": "high", + "reasoning_strength": "high", + "context_strength": "high", + "tool_strength": "medium", + "preferred_degrades": ["openai/gpt-4o", "google/gemini-pro-high"], + "last_reviewed": "2026-04-03", + }, + "cerebras/llama3.3-70b": { + "family": "cerebras", + "name": "Llama 3.3 70B", + "cluster": "fast-workhorse", + "benchmark_cluster": "fast-general", + "quality_tier": "mid", + "reasoning_strength": "mid", + "context_strength": "mid", + "tool_strength": "medium", + "preferred_degrades": ["google/gemini-flash", "deepseek/chat"], + "last_reviewed": "2026-04-03", + }, + "opencode/claude-opus-4-6": { + "family": "opencode", + "name": "OpenCode Claude Opus", + "cluster": "elite-reasoning", + "benchmark_cluster": "quality-coding", + "quality_tier": "premium", + "reasoning_strength": "high", + "context_strength": "high", + "tool_strength": "medium", + "preferred_degrades": ["anthropic/sonnet-4.6", "openai/gpt-4o", "deepseek/reasoner"], + "last_reviewed": "2026-04-03", + }, + "huggingface/deepseek-ai/DeepSeek-R1": { + "family": "huggingface", + "name": "DeepSeek R1", + "cluster": "balanced-workhorse", + "benchmark_cluster": "balanced-coding", + "quality_tier": "mid", + "reasoning_strength": "high", + "context_strength": "mid", + "tool_strength": "medium", + "preferred_degrades": ["deepseek/chat", "google/gemini-flash"], + "last_reviewed": "2026-04-03", + }, + "moonshot/kimi-k2.5": { + "family": "moonshot", + "name": "Kimi K2.5", + "cluster": "quality-workhorse", + "benchmark_cluster": "quality-coding", + "quality_tier": "high", + "reasoning_strength": "high", + "context_strength": "high", + "tool_strength": "medium", + "preferred_degrades": ["openai/gpt-4o", "google/gemini-pro-high"], + "last_reviewed": "2026-04-03", + }, + "ag/claude-opus-4-6-thinking": { + "family": "google-antigravity", + "name": "Opus 4.6 Thinking (Antigravity)", + "cluster": "elite-reasoning", + "benchmark_cluster": "quality-coding", + "quality_tier": "premium", + "reasoning_strength": "high", + "context_strength": "high", + "tool_strength": "medium", + "preferred_degrades": ["ag/claude-sonnet-4-6", "anthropic/sonnet-4.6", "openai/gpt-4o"], + "last_reviewed": "2026-04-03", + }, + "ag/claude-sonnet-4-6": { + "family": "google-antigravity", + "name": "Sonnet 4.6 (Antigravity)", + "cluster": "quality-workhorse", + "benchmark_cluster": "quality-coding", + "quality_tier": "high", + "reasoning_strength": "high", + "context_strength": "high", + "tool_strength": "medium", + "preferred_degrades": ["ag/gemini-3.1-pro-high", "google/gemini-pro-high", "deepseek/reasoner"], + "last_reviewed": "2026-04-03", + }, + "ag/gemini-3.1-pro-high": { + "family": "google-antigravity", + "name": "Gemini 3.1 Pro High (Antigravity)", + "cluster": "quality-workhorse", + "benchmark_cluster": "quality-coding", + "quality_tier": "high", + "reasoning_strength": "high", + "context_strength": "high", + "tool_strength": "high", + "preferred_degrades": ["ag/gemini-3.1-pro-low", "google/gemini-pro-low", "deepseek/chat"], + "last_reviewed": "2026-04-03", + }, + "ag/gemini-3.1-pro-low": { + "family": "google-antigravity", + "name": "Gemini 3.1 Pro Low (Antigravity)", + "cluster": "balanced-workhorse", + "benchmark_cluster": "balanced-coding", + "quality_tier": "mid", + "reasoning_strength": "mid", + "context_strength": "high", + "tool_strength": "medium", + "preferred_degrades": ["ag/gemini-3-flash", "google/gemini-flash", "deepseek/chat"], + "last_reviewed": "2026-04-03", + }, + "ag/gemini-3-flash": { + "family": "google-antigravity", + "name": "Gemini 3 Flash (Antigravity)", + "cluster": "fast-workhorse", + "benchmark_cluster": "fast-general", + "quality_tier": "mid", + "reasoning_strength": "mid", + "context_strength": "mid", + "tool_strength": "medium", + "preferred_degrades": ["ag/gpt-oss-120b-medium", "google/gemini-flash-lite", "deepseek/chat"], + "last_reviewed": "2026-04-03", + }, + "ag/gpt-oss-120b-medium": { + "family": "google-antigravity", + "name": "GPT OSS 120B Medium (Antigravity)", + "cluster": "balanced-workhorse", + "benchmark_cluster": "balanced-coding", + "quality_tier": "mid", + "reasoning_strength": "mid", + "context_strength": "high", + "tool_strength": "medium", + "preferred_degrades": ["openai/gpt-4o", "google/gemini-pro-high", "deepseek/reasoner"], + "last_reviewed": "2026-04-03", + }, + "gc/gemini-3-flash-preview": { + "family": "google-gemini-cli", + "name": "Gemini 3 Flash Preview (Gemini CLI)", + "cluster": "fast-workhorse", + "benchmark_cluster": "fast-general", + "quality_tier": "mid", + "reasoning_strength": "mid", + "context_strength": "mid", + "tool_strength": "medium", + "preferred_degrades": ["gc/gemini-3-flash", "google/gemini-flash", "deepseek/chat"], + "last_reviewed": "2026-04-03", + }, + "gc/gemini-3-pro-preview": { + "family": "google-gemini-cli", + "name": "Gemini 3 Pro Preview (Gemini CLI)", + "cluster": "quality-workhorse", + "benchmark_cluster": "quality-coding", + "quality_tier": "high", + "reasoning_strength": "high", + "context_strength": "high", + "tool_strength": "high", + "preferred_degrades": ["gc/gemini-3.1-pro-high", "google/gemini-pro-high", "deepseek/reasoner"], + "last_reviewed": "2026-04-03", + }, + "gc/gemini-3.1-pro-high": { + "family": "google-gemini-cli", + "name": "Gemini 3.1 Pro High (Gemini CLI)", + "cluster": "quality-workhorse", + "benchmark_cluster": "quality-coding", + "quality_tier": "high", + "reasoning_strength": "high", + "context_strength": "high", + "tool_strength": "high", + "preferred_degrades": ["gc/gemini-3.1-pro-low", "google/gemini-pro-low", "deepseek/chat"], + "last_reviewed": "2026-04-03", + }, + "gc/gemini-3.1-pro-low": { + "family": "google-gemini-cli", + "name": "Gemini 3.1 Pro Low (Gemini CLI)", + "cluster": "balanced-workhorse", + "benchmark_cluster": "balanced-coding", + "quality_tier": "mid", + "reasoning_strength": "mid", + "context_strength": "high", + "tool_strength": "medium", + "preferred_degrades": ["gc/gemini-3-flash", "google/gemini-flash", "deepseek/chat"], + "last_reviewed": "2026-04-03", + }, + "gc/gemini-3-flash": { + "family": "google-gemini-cli", + "name": "Gemini 3 Flash (Gemini CLI)", + "cluster": "fast-workhorse", + "benchmark_cluster": "fast-general", + "quality_tier": "mid", + "reasoning_strength": "mid", + "context_strength": "mid", + "tool_strength": "medium", + "preferred_degrades": ["gc/gemini-2.5-flash", "google/gemini-flash-lite", "deepseek/chat"], + "last_reviewed": "2026-04-03", + }, + "gc/gemini-2.5-pro": { + "family": "google-gemini-cli", + "name": "Gemini 2.5 Pro (Gemini CLI)", + "cluster": "elite-reasoning", + "benchmark_cluster": "quality-coding", + "quality_tier": "premium", + "reasoning_strength": "high", + "context_strength": "high", + "tool_strength": "high", + "preferred_degrades": ["gc/gemini-3.1-pro-high", "google/gemini-pro-high", "deepseek/reasoner"], + "last_reviewed": "2026-04-03", + }, + "gc/gemini-2.5-flash": { + "family": "google-gemini-cli", + "name": "Gemini 2.5 Flash (Gemini CLI)", + "cluster": "fast-workhorse", + "benchmark_cluster": "fast-general", + "quality_tier": "mid", + "reasoning_strength": "mid", + "context_strength": "mid", + "tool_strength": "medium", + "preferred_degrades": ["gc/gemini-2.0-flash", "google/gemini-flash-lite", "deepseek/chat"], + "last_reviewed": "2026-04-03", + }, + "gc/gemini-2.0-flash": { + "family": "google-gemini-cli", + "name": "Gemini 2.0 Flash (Gemini CLI)", + "cluster": "fast-workhorse", + "benchmark_cluster": "fast-general", + "quality_tier": "budget", + "reasoning_strength": "low", + "context_strength": "mid", + "tool_strength": "medium", + "preferred_degrades": ["google/gemini-flash-lite", "deepseek/chat", "anthropic/haiku-3.5"], + "last_reviewed": "2026-04-03", + }, + "qw/qwen3-coder-plus": { + "family": "qwen-portal", + "name": "Qwen3 Coder Plus (Qwen Portal)", + "cluster": "quality-workhorse", + "benchmark_cluster": "quality-coding", + "quality_tier": "high", + "reasoning_strength": "high", + "context_strength": "high", + "tool_strength": "high", + "preferred_degrades": ["qw/qwen3-coder-flash", "qw/coder-model", "deepseek/chat"], + "last_reviewed": "2026-04-03", + }, + "qw/qwen3-coder-flash": { + "family": "qwen-portal", + "name": "Qwen3 Coder Flash (Qwen Portal)", + "cluster": "fast-workhorse", + "benchmark_cluster": "fast-general", + "quality_tier": "mid", + "reasoning_strength": "mid", + "context_strength": "mid", + "tool_strength": "medium", + "preferred_degrades": ["qw/coder-model", "qw/vision-model", "google/gemini-flash"], + "last_reviewed": "2026-04-03", + }, + "qw/vision-model": { + "family": "qwen-portal", + "name": "Vision Model (Qwen Portal)", + "cluster": "balanced-workhorse", + "benchmark_cluster": "balanced-coding", + "quality_tier": "mid", + "reasoning_strength": "mid", + "context_strength": "high", + "tool_strength": "medium", + "preferred_degrades": ["qw/coder-model", "google/gemini-pro-low", "deepseek/chat"], + "last_reviewed": "2026-04-03", + }, + "qw/coder-model": { + "family": "qwen-portal", + "name": "Coder Model (Qwen Portal)", + "cluster": "balanced-workhorse", + "benchmark_cluster": "balanced-coding", + "quality_tier": "mid", + "reasoning_strength": "mid", + "context_strength": "high", + "tool_strength": "high", + "preferred_degrades": ["qw/vision-model", "google/gemini-pro-low", "deepseek/chat"], + "last_reviewed": "2026-04-03", + }, } _PROVIDER_LANE_BINDINGS: dict[str, dict[str, Any]] = { @@ -235,45 +547,591 @@ def get_active_model_label(canonical_id: str) -> str: "route_type": "direct", "cluster": "elite-reasoning", "benchmark_cluster": "quality-coding", - "quality_tier": "premium", + "quality_tier": "premium", + "reasoning_strength": "high", + "context_strength": "high", + "tool_strength": "medium", + "same_model_group": "anthropic/opus-4.6", + "degrade_to": ["anthropic/sonnet-4.6", "openai/gpt-4o", "deepseek/reasoner"], + }, + "deepseek-chat": { + "family": "deepseek", + "name": "workhorse", + "canonical_model": "deepseek/chat", + "route_type": "direct", + "cluster": "balanced-workhorse", + "benchmark_cluster": "balanced-coding", + "quality_tier": "mid", + "reasoning_strength": "mid", + "context_strength": "mid", + "tool_strength": "medium", + "same_model_group": "deepseek/chat", + "degrade_to": ["google/gemini-flash", "aggregator/openrouter-auto"], + }, + "deepseek-reasoner": { + "family": "deepseek", + "name": "reasoning", + "canonical_model": "deepseek/reasoner", + "route_type": "direct", + "cluster": "elite-reasoning", + "benchmark_cluster": "reasoning-coding", + "quality_tier": "high", + "reasoning_strength": "high", + "context_strength": "mid", + "tool_strength": "medium", + "same_model_group": "deepseek/reasoner", + "degrade_to": ["deepseek/chat", "google/gemini-pro-high"], + }, + "gemini-flash": { + "family": "google", + "name": "fast", + "canonical_model": "google/gemini-flash", + "route_type": "direct", + "cluster": "fast-workhorse", + "benchmark_cluster": "fast-general", + "quality_tier": "mid", + "reasoning_strength": "mid", + "context_strength": "mid", + "tool_strength": "medium", + "same_model_group": "google/gemini-flash", + "degrade_to": ["google/gemini-flash-lite", "deepseek/chat"], + }, + "gemini-flash-lite": { + "family": "google", + "name": "cheap", + "canonical_model": "google/gemini-flash-lite", + "route_type": "direct", + "cluster": "budget-general", + "benchmark_cluster": "budget-chat", + "quality_tier": "budget", + "reasoning_strength": "low", + "context_strength": "mid", + "tool_strength": "low", + "same_model_group": "google/gemini-flash-lite", + "degrade_to": ["aggregator/kilo-glm5-free", "aggregator/blackbox-grok-code-fast"], + }, + "openai-gpt4o": { + "family": "openai", + "name": "balanced", + "canonical_model": "openai/gpt-4o", + "route_type": "direct", + "cluster": "quality-workhorse", + "benchmark_cluster": "quality-coding", + "quality_tier": "high", + "reasoning_strength": "high", + "context_strength": "high", + "tool_strength": "high", + "same_model_group": "openai/gpt-4o", + "degrade_to": ["openai/gpt-4o-mini", "google/gemini-pro-high"], + }, + "openai-images": { + "family": "openai", + "name": "image", + "canonical_model": "openai/gpt-image-1", + "route_type": "direct", + "cluster": "image-quality", + "benchmark_cluster": "image-generation", + "quality_tier": "premium", + "reasoning_strength": "n/a", + "context_strength": "n/a", + "tool_strength": "n/a", + "same_model_group": "openai/gpt-image-1", + "degrade_to": [], + }, + "openrouter-fallback": { + "family": "openrouter", + "name": "router", + "canonical_model": "aggregator/openrouter-auto", + "route_type": "aggregator", + "cluster": "aggregator-fallback", + "benchmark_cluster": "marketplace-general", + "quality_tier": "variable", + "reasoning_strength": "variable", + "context_strength": "variable", + "tool_strength": "variable", + "same_model_group": "aggregator/openrouter-auto", + "degrade_to": ["aggregator/kilo-glm5-free", "aggregator/blackbox-grok-code-fast"], + }, + "kilocode": { + "family": "kilo", + "name": "free", + "canonical_model": "aggregator/kilo-glm5-free", + "route_type": "aggregator", + "cluster": "budget-general", + "benchmark_cluster": "free-coding", + "quality_tier": "free", + "reasoning_strength": "mid", + "context_strength": "mid", + "tool_strength": "low", + "same_model_group": "aggregator/kilo-glm5-free", + "degrade_to": ["aggregator/blackbox-grok-code-fast", "google/gemini-flash-lite"], + }, + "kilo-sonnet": { + "family": "kilo", + "name": "sonnet", + "canonical_model": "anthropic/sonnet-4.6", + "route_type": "aggregator", + "cluster": "quality-workhorse", + "benchmark_cluster": "quality-coding", + "quality_tier": "high", + "reasoning_strength": "high", + "context_strength": "high", + "tool_strength": "high", + "same_model_group": "anthropic/sonnet-4.6", + "degrade_to": ["anthropic/haiku-4.5", "deepseek/chat", "google/gemini-flash"], + }, + "kilo-opus": { + "family": "kilo", + "name": "opus", + "canonical_model": "anthropic/opus-4.6", + "route_type": "aggregator", + "cluster": "elite-reasoning", + "benchmark_cluster": "quality-coding", + "quality_tier": "premium", + "reasoning_strength": "high", + "context_strength": "high", + "tool_strength": "high", + "same_model_group": "anthropic/opus-4.6", + "degrade_to": ["anthropic/sonnet-4.6", "openai/gpt-4o", "deepseek/reasoner"], + }, + "blackbox-free": { + "family": "blackbox", + "name": "burst", + "canonical_model": "aggregator/blackbox-grok-code-fast", + "route_type": "aggregator", + "cluster": "budget-general", + "benchmark_cluster": "budget-chat", + "quality_tier": "budget", + "reasoning_strength": "mid", + "context_strength": "mid", + "tool_strength": "mid", + "same_model_group": "aggregator/blackbox-grok-code-fast", + "degrade_to": ["aggregator/kilo-glm5-free", "google/gemini-flash-lite"], + }, + "clawrouter": { + "family": "blockrun", + "name": "wallet-router", + "canonical_model": "aggregator/openrouter-auto", + "route_type": "wallet-router", + "cluster": "aggregator-fallback", + "benchmark_cluster": "marketplace-general", + "quality_tier": "variable", + "reasoning_strength": "variable", + "context_strength": "variable", + "tool_strength": "variable", + "same_model_group": "aggregator/openrouter-auto", + "degrade_to": ["aggregator/kilo-glm5-free", "aggregator/blackbox-grok-code-fast"], + }, + "kilo-auto-frontier": { + "family": "kilo", + "name": "frontier", + "canonical_model": "anthropic/opus-4.6", + "route_type": "aggregator", + "cluster": "elite-reasoning", + "benchmark_cluster": "quality-coding", + "quality_tier": "premium", + "reasoning_strength": "high", + "context_strength": "high", + "tool_strength": "high", + "same_model_group": "anthropic/opus-4.6", + "degrade_to": ["kilo-sonnet", "openai/gpt-4o", "deepseek/reasoner"], + }, + "kilo-auto-balanced": { + "family": "kilo", + "name": "balanced", + "canonical_model": "anthropic/sonnet-4.6", + "route_type": "aggregator", + "cluster": "quality-workhorse", + "benchmark_cluster": "quality-coding", + "quality_tier": "high", + "reasoning_strength": "high", + "context_strength": "high", + "tool_strength": "high", + "same_model_group": "anthropic/sonnet-4.6", + "degrade_to": ["google/gemini-pro-high", "deepseek/chat"], + }, + "kilo-auto-free": { + "family": "kilo", + "name": "free", + "canonical_model": "aggregator/kilo-glm5-free", + "route_type": "aggregator", + "cluster": "budget-general", + "benchmark_cluster": "free-coding", + "quality_tier": "free", + "reasoning_strength": "mid", + "context_strength": "mid", + "tool_strength": "low", + "same_model_group": "aggregator/kilo-glm5-free", + "degrade_to": ["aggregator/blackbox-grok-code-fast", "google/gemini-flash-lite"], + }, + "mistral": { + "family": "mistral", + "name": "quality", + "canonical_model": "mistral/mistral-large-latest", + "route_type": "direct", + "cluster": "quality-workhorse", + "benchmark_cluster": "quality-coding", + "quality_tier": "high", + "reasoning_strength": "high", + "context_strength": "high", + "tool_strength": "medium", + "same_model_group": "mistral/mistral-large-latest", + "degrade_to": ["openai/gpt-4o", "google/gemini-pro-high"], + }, + "groq": { + "family": "groq", + "name": "fast", + "canonical_model": "groq/llama-3.3-70b-versatile", + "route_type": "direct", + "cluster": "fast-workhorse", + "benchmark_cluster": "fast-general", + "quality_tier": "mid", + "reasoning_strength": "mid", + "context_strength": "mid", + "tool_strength": "medium", + "same_model_group": "groq/llama-3.3-70b-versatile", + "degrade_to": ["google/gemini-flash", "deepseek/chat"], + }, + "xai": { + "family": "xai", + "name": "quality", + "canonical_model": "xai/grok-3", + "route_type": "direct", + "cluster": "quality-workhorse", + "benchmark_cluster": "quality-coding", + "quality_tier": "high", + "reasoning_strength": "high", + "context_strength": "high", + "tool_strength": "medium", + "same_model_group": "xai/grok-3", + "degrade_to": ["openai/gpt-4o", "google/gemini-pro-high"], + }, + "zai": { + "family": "zai", + "name": "quality", + "canonical_model": "zai/glm-4.7", + "route_type": "direct", + "cluster": "quality-workhorse", + "benchmark_cluster": "quality-coding", + "quality_tier": "high", + "reasoning_strength": "high", + "context_strength": "high", + "tool_strength": "medium", + "same_model_group": "zai/glm-4.7", + "degrade_to": ["openai/gpt-4o", "google/gemini-pro-high"], + }, + "cerebras": { + "family": "cerebras", + "name": "fast", + "canonical_model": "cerebras/llama3.3-70b", + "route_type": "direct", + "cluster": "fast-workhorse", + "benchmark_cluster": "fast-general", + "quality_tier": "mid", + "reasoning_strength": "mid", + "context_strength": "mid", + "tool_strength": "medium", + "same_model_group": "cerebras/llama3.3-70b", + "degrade_to": ["google/gemini-flash", "deepseek/chat"], + }, + "opencode": { + "family": "opencode", + "name": "quality", + "canonical_model": "opencode/claude-opus-4-6", + "route_type": "direct", + "cluster": "elite-reasoning", + "benchmark_cluster": "quality-coding", + "quality_tier": "premium", + "reasoning_strength": "high", + "context_strength": "high", + "tool_strength": "medium", + "same_model_group": "opencode/claude-opus-4-6", + "degrade_to": ["anthropic/sonnet-4.6", "openai/gpt-4o", "deepseek/reasoner"], + }, + "huggingface": { + "family": "huggingface", + "name": "workhorse", + "canonical_model": "huggingface/deepseek-ai/DeepSeek-R1", + "route_type": "direct", + "cluster": "balanced-workhorse", + "benchmark_cluster": "balanced-coding", + "quality_tier": "mid", + "reasoning_strength": "high", + "context_strength": "mid", + "tool_strength": "medium", + "same_model_group": "huggingface/deepseek-ai/DeepSeek-R1", + "degrade_to": ["deepseek/chat", "google/gemini-flash"], + }, + "moonshot": { + "family": "moonshot", + "name": "quality", + "canonical_model": "moonshot/kimi-k2.5", + "route_type": "direct", + "cluster": "quality-workhorse", + "benchmark_cluster": "quality-coding", + "quality_tier": "high", + "reasoning_strength": "high", + "context_strength": "high", + "tool_strength": "medium", + "same_model_group": "moonshot/kimi-k2.5", + "degrade_to": ["openai/gpt-4o", "google/gemini-pro-high"], + }, + "minimax": { + "family": "minimax", + "name": "quality", + "canonical_model": "minimax/MiniMax-M2.7", + "route_type": "direct", + "cluster": "quality-workhorse", + "benchmark_cluster": "quality-coding", + "quality_tier": "high", + "reasoning_strength": "high", + "context_strength": "high", + "tool_strength": "medium", + "same_model_group": "minimax/MiniMax-M2.7", + "degrade_to": ["openai/gpt-4o", "google/gemini-pro-high"], + }, + "volcengine": { + "family": "volcengine", + "name": "quality", + "canonical_model": "volcengine/doubao-seed-1-8-251228", + "route_type": "direct", + "cluster": "quality-workhorse", + "benchmark_cluster": "quality-coding", + "quality_tier": "high", + "reasoning_strength": "high", + "context_strength": "high", + "tool_strength": "medium", + "same_model_group": "volcengine/doubao-seed-1-8-251228", + "degrade_to": ["openai/gpt-4o", "google/gemini-pro-high"], + }, + "byteplus": { + "family": "byteplus", + "name": "quality", + "canonical_model": "byteplus/seed-1-8-251228", + "route_type": "direct", + "cluster": "quality-workhorse", + "benchmark_cluster": "quality-coding", + "quality_tier": "high", + "reasoning_strength": "high", + "context_strength": "high", + "tool_strength": "medium", + "same_model_group": "byteplus/seed-1-8-251228", + "degrade_to": ["openai/gpt-4o", "google/gemini-pro-high"], + }, + "qwen": { + "family": "qwen", + "name": "quality", + "canonical_model": "qwen/qwen3.6-plus", + "route_type": "direct", + "cluster": "quality-workhorse", + "benchmark_cluster": "quality-coding", + "quality_tier": "high", + "reasoning_strength": "high", + "context_strength": "high", + "tool_strength": "medium", + "same_model_group": "qwen/qwen3.6-plus", + "degrade_to": ["openai/gpt-4o", "google/gemini-pro-high"], + }, + "openai-codex": { + "family": "openai", + "name": "codex", + "canonical_model": "openai-codex/gpt-5.3-codex", + "route_type": "direct", + "cluster": "elite-reasoning", + "benchmark_cluster": "quality-coding", + "quality_tier": "premium", + "reasoning_strength": "high", + "context_strength": "high", + "tool_strength": "medium", + "same_model_group": "openai-codex/gpt-5.3-codex", + "degrade_to": ["openai/gpt-4o", "anthropic/sonnet-4.6"], + }, + "claude-code": { + "family": "anthropic", + "name": "code", + "canonical_model": "claude-code", + "route_type": "direct", + "cluster": "elite-reasoning", + "benchmark_cluster": "quality-coding", + "quality_tier": "premium", + "reasoning_strength": "high", + "context_strength": "high", + "tool_strength": "medium", + "same_model_group": "claude-code", + "degrade_to": ["anthropic/sonnet-4.6", "openai/gpt-4o"], + }, + "google-antigravity-opus": { + "family": "google-antigravity", + "name": "opus", + "canonical_model": "ag/claude-opus-4-6-thinking", + "route_type": "direct", + "cluster": "elite-reasoning", + "benchmark_cluster": "quality-coding", + "quality_tier": "premium", + "reasoning_strength": "high", + "context_strength": "high", + "tool_strength": "medium", + "same_model_group": "ag/claude-opus-4-6-thinking", + "degrade_to": ["ag/claude-sonnet-4-6", "anthropic/sonnet-4.6", "openai/gpt-4o"], + }, + "google-antigravity-sonnet": { + "family": "google-antigravity", + "name": "sonnet", + "canonical_model": "ag/claude-sonnet-4-6", + "route_type": "direct", + "cluster": "quality-workhorse", + "benchmark_cluster": "quality-coding", + "quality_tier": "high", + "reasoning_strength": "high", + "context_strength": "high", + "tool_strength": "medium", + "same_model_group": "ag/claude-sonnet-4-6", + "degrade_to": ["ag/gemini-3.1-pro-high", "google/gemini-pro-high", "deepseek/reasoner"], + }, + "google-antigravity-gemini-pro-high": { + "family": "google-antigravity", + "name": "gemini-pro-high", + "canonical_model": "ag/gemini-3.1-pro-high", + "route_type": "direct", + "cluster": "quality-workhorse", + "benchmark_cluster": "quality-coding", + "quality_tier": "high", "reasoning_strength": "high", "context_strength": "high", + "tool_strength": "high", + "same_model_group": "ag/gemini-3.1-pro-high", + "degrade_to": ["ag/gemini-3.1-pro-low", "google/gemini-pro-low", "deepseek/chat"], + }, + "google-antigravity-gemini-pro-low": { + "family": "google-antigravity", + "name": "gemini-pro-low", + "canonical_model": "ag/gemini-3.1-pro-low", + "route_type": "direct", + "cluster": "balanced-workhorse", + "benchmark_cluster": "balanced-coding", + "quality_tier": "mid", + "reasoning_strength": "mid", + "context_strength": "high", "tool_strength": "medium", - "same_model_group": "anthropic/opus-4.6", - "degrade_to": ["anthropic/sonnet-4.6", "openai/gpt-4o", "deepseek/reasoner"], + "same_model_group": "ag/gemini-3.1-pro-low", + "degrade_to": ["ag/gemini-3-flash", "google/gemini-flash", "deepseek/chat"], }, - "deepseek-chat": { - "family": "deepseek", - "name": "workhorse", - "canonical_model": "deepseek/chat", + "google-antigravity-gemini-flash": { + "family": "google-antigravity", + "name": "gemini-flash", + "canonical_model": "ag/gemini-3-flash", + "route_type": "direct", + "cluster": "fast-workhorse", + "benchmark_cluster": "fast-general", + "quality_tier": "mid", + "reasoning_strength": "mid", + "context_strength": "mid", + "tool_strength": "medium", + "same_model_group": "ag/gemini-3-flash", + "degrade_to": ["ag/gpt-oss-120b-medium", "google/gemini-flash-lite", "deepseek/chat"], + }, + "google-antigravity-gpt-oss-120b-medium": { + "family": "google-antigravity", + "name": "gpt-oss-120b-medium", + "canonical_model": "ag/gpt-oss-120b-medium", "route_type": "direct", "cluster": "balanced-workhorse", "benchmark_cluster": "balanced-coding", "quality_tier": "mid", "reasoning_strength": "mid", + "context_strength": "high", + "tool_strength": "medium", + "same_model_group": "ag/gpt-oss-120b-medium", + "degrade_to": ["openai/gpt-4o", "google/gemini-pro-high", "deepseek/reasoner"], + }, + "google-gemini-cli-flash-preview": { + "family": "google-gemini-cli", + "name": "flash-preview", + "canonical_model": "gc/gemini-3-flash-preview", + "route_type": "direct", + "cluster": "fast-workhorse", + "benchmark_cluster": "fast-general", + "quality_tier": "mid", + "reasoning_strength": "mid", "context_strength": "mid", "tool_strength": "medium", - "same_model_group": "deepseek/chat", - "degrade_to": ["google/gemini-flash", "aggregator/openrouter-auto"], + "same_model_group": "gc/gemini-3-flash-preview", + "degrade_to": ["gc/gemini-3-flash", "google/gemini-flash", "deepseek/chat"], }, - "deepseek-reasoner": { - "family": "deepseek", - "name": "reasoning", - "canonical_model": "deepseek/reasoner", + "google-gemini-cli-pro-preview": { + "family": "google-gemini-cli", + "name": "pro-preview", + "canonical_model": "gc/gemini-3-pro-preview", "route_type": "direct", - "cluster": "elite-reasoning", - "benchmark_cluster": "reasoning-coding", + "cluster": "quality-workhorse", + "benchmark_cluster": "quality-coding", + "quality_tier": "high", + "reasoning_strength": "high", + "context_strength": "high", + "tool_strength": "high", + "same_model_group": "gc/gemini-3-pro-preview", + "degrade_to": ["gc/gemini-3.1-pro-high", "google/gemini-pro-high", "deepseek/reasoner"], + }, + "google-gemini-cli-pro-high": { + "family": "google-gemini-cli", + "name": "pro-high", + "canonical_model": "gc/gemini-3.1-pro-high", + "route_type": "direct", + "cluster": "quality-workhorse", + "benchmark_cluster": "quality-coding", "quality_tier": "high", "reasoning_strength": "high", + "context_strength": "high", + "tool_strength": "high", + "same_model_group": "gc/gemini-3.1-pro-high", + "degrade_to": ["gc/gemini-3.1-pro-low", "google/gemini-pro-low", "deepseek/chat"], + }, + "google-gemini-cli-pro-low": { + "family": "google-gemini-cli", + "name": "pro-low", + "canonical_model": "gc/gemini-3.1-pro-low", + "route_type": "direct", + "cluster": "balanced-workhorse", + "benchmark_cluster": "balanced-coding", + "quality_tier": "mid", + "reasoning_strength": "mid", + "context_strength": "high", + "tool_strength": "medium", + "same_model_group": "gc/gemini-3.1-pro-low", + "degrade_to": ["gc/gemini-3-flash", "google/gemini-flash", "deepseek/chat"], + }, + "google-gemini-cli-flash": { + "family": "google-gemini-cli", + "name": "flash", + "canonical_model": "gc/gemini-3-flash", + "route_type": "direct", + "cluster": "fast-workhorse", + "benchmark_cluster": "fast-general", + "quality_tier": "mid", + "reasoning_strength": "mid", "context_strength": "mid", "tool_strength": "medium", - "same_model_group": "deepseek/reasoner", - "degrade_to": ["deepseek/chat", "google/gemini-pro-high"], + "same_model_group": "gc/gemini-3-flash", + "degrade_to": ["gc/gemini-2.5-flash", "google/gemini-flash-lite", "deepseek/chat"], }, - "gemini-flash": { - "family": "google", - "name": "fast", - "canonical_model": "google/gemini-flash", + "google-gemini-cli-2-5-pro": { + "family": "google-gemini-cli", + "name": "2-5-pro", + "canonical_model": "gc/gemini-2.5-pro", + "route_type": "direct", + "cluster": "elite-reasoning", + "benchmark_cluster": "quality-coding", + "quality_tier": "premium", + "reasoning_strength": "high", + "context_strength": "high", + "tool_strength": "high", + "same_model_group": "gc/gemini-2.5-pro", + "degrade_to": ["gc/gemini-3.1-pro-high", "google/gemini-pro-high", "deepseek/reasoner"], + }, + "google-gemini-cli-2-5-flash": { + "family": "google-gemini-cli", + "name": "2-5-flash", + "canonical_model": "gc/gemini-2.5-flash", "route_type": "direct", "cluster": "fast-workhorse", "benchmark_cluster": "fast-general", @@ -281,27 +1139,27 @@ def get_active_model_label(canonical_id: str) -> str: "reasoning_strength": "mid", "context_strength": "mid", "tool_strength": "medium", - "same_model_group": "google/gemini-flash", - "degrade_to": ["google/gemini-flash-lite", "deepseek/chat"], + "same_model_group": "gc/gemini-2.5-flash", + "degrade_to": ["gc/gemini-2.0-flash", "google/gemini-flash-lite", "deepseek/chat"], }, - "gemini-flash-lite": { - "family": "google", - "name": "cheap", - "canonical_model": "google/gemini-flash-lite", + "google-gemini-cli-2-0-flash": { + "family": "google-gemini-cli", + "name": "2-0-flash", + "canonical_model": "gc/gemini-2.0-flash", "route_type": "direct", - "cluster": "budget-general", - "benchmark_cluster": "budget-chat", + "cluster": "fast-workhorse", + "benchmark_cluster": "fast-general", "quality_tier": "budget", "reasoning_strength": "low", "context_strength": "mid", - "tool_strength": "low", - "same_model_group": "google/gemini-flash-lite", - "degrade_to": ["aggregator/kilo-glm5-free", "aggregator/blackbox-grok-code-fast"], + "tool_strength": "medium", + "same_model_group": "gc/gemini-2.0-flash", + "degrade_to": ["google/gemini-flash-lite", "deepseek/chat", "anthropic/haiku-3.5"], }, - "openai-gpt4o": { - "family": "openai", - "name": "balanced", - "canonical_model": "openai/gpt-4o", + "qwen-portal-coder-plus": { + "family": "qwen-portal", + "name": "coder-plus", + "canonical_model": "qw/qwen3-coder-plus", "route_type": "direct", "cluster": "quality-workhorse", "benchmark_cluster": "quality-coding", @@ -309,105 +1167,105 @@ def get_active_model_label(canonical_id: str) -> str: "reasoning_strength": "high", "context_strength": "high", "tool_strength": "high", - "same_model_group": "openai/gpt-4o", - "degrade_to": ["openai/gpt-4o-mini", "google/gemini-pro-high"], + "same_model_group": "qw/qwen3-coder-plus", + "degrade_to": ["qw/qwen3-coder-flash", "qw/coder-model", "deepseek/chat"], }, - "openai-images": { - "family": "openai", - "name": "image", - "canonical_model": "openai/gpt-image-1", + "qwen-portal-coder-flash": { + "family": "qwen-portal", + "name": "coder-flash", + "canonical_model": "qw/qwen3-coder-flash", "route_type": "direct", - "cluster": "image-quality", - "benchmark_cluster": "image-generation", - "quality_tier": "premium", - "reasoning_strength": "n/a", - "context_strength": "n/a", - "tool_strength": "n/a", - "same_model_group": "openai/gpt-image-1", - "degrade_to": [], + "cluster": "fast-workhorse", + "benchmark_cluster": "fast-general", + "quality_tier": "mid", + "reasoning_strength": "mid", + "context_strength": "mid", + "tool_strength": "medium", + "same_model_group": "qw/qwen3-coder-flash", + "degrade_to": ["qw/coder-model", "qw/vision-model", "google/gemini-flash"], }, - "openrouter-fallback": { - "family": "openrouter", - "name": "router", - "canonical_model": "aggregator/openrouter-auto", - "route_type": "aggregator", - "cluster": "aggregator-fallback", - "benchmark_cluster": "marketplace-general", - "quality_tier": "variable", - "reasoning_strength": "variable", - "context_strength": "variable", - "tool_strength": "variable", - "same_model_group": "aggregator/openrouter-auto", - "degrade_to": ["aggregator/kilo-glm5-free", "aggregator/blackbox-grok-code-fast"], + "qwen-portal-vision": { + "family": "qwen-portal", + "name": "vision", + "canonical_model": "qw/vision-model", + "route_type": "direct", + "cluster": "balanced-workhorse", + "benchmark_cluster": "balanced-coding", + "quality_tier": "mid", + "reasoning_strength": "mid", + "context_strength": "high", + "tool_strength": "medium", + "same_model_group": "qw/vision-model", + "degrade_to": ["qw/coder-model", "google/gemini-pro-low", "deepseek/chat"], }, - "kilocode": { - "family": "kilo", - "name": "free", - "canonical_model": "aggregator/kilo-glm5-free", - "route_type": "aggregator", - "cluster": "budget-general", - "benchmark_cluster": "free-coding", - "quality_tier": "free", + "qwen-portal-coder": { + "family": "qwen-portal", + "name": "coder", + "canonical_model": "qw/coder-model", + "route_type": "direct", + "cluster": "balanced-workhorse", + "benchmark_cluster": "balanced-coding", + "quality_tier": "mid", "reasoning_strength": "mid", - "context_strength": "mid", - "tool_strength": "low", - "same_model_group": "aggregator/kilo-glm5-free", - "degrade_to": ["aggregator/blackbox-grok-code-fast", "google/gemini-flash-lite"], + "context_strength": "high", + "tool_strength": "high", + "same_model_group": "qw/coder-model", + "degrade_to": ["qw/vision-model", "google/gemini-pro-low", "deepseek/chat"], }, - "kilo-sonnet": { - "family": "kilo", - "name": "sonnet", - "canonical_model": "anthropic/sonnet-4.6", - "route_type": "aggregator", + "github-copilot": { + "family": "github", + "name": "copilot", + "canonical_model": "github-copilot/gpt-4o", + "route_type": "direct", "cluster": "quality-workhorse", "benchmark_cluster": "quality-coding", "quality_tier": "high", "reasoning_strength": "high", "context_strength": "high", - "tool_strength": "high", - "same_model_group": "anthropic/sonnet-4.6", - "degrade_to": ["anthropic/haiku-4.5", "deepseek/chat", "google/gemini-flash"], + "tool_strength": "medium", + "same_model_group": "github-copilot/gpt-4o", + "degrade_to": ["openai/gpt-4o", "google/gemini-pro-high"], }, - "kilo-opus": { - "family": "kilo", - "name": "opus", - "canonical_model": "anthropic/opus-4.6", - "route_type": "aggregator", - "cluster": "elite-reasoning", + "synthetic": { + "family": "synthetic", + "name": "workhorse", + "canonical_model": "synthetic/hf:MiniMaxAI/MiniMax-M2.1", + "route_type": "direct", + "cluster": "balanced-workhorse", + "benchmark_cluster": "balanced-coding", + "quality_tier": "mid", + "reasoning_strength": "high", + "context_strength": "mid", + "tool_strength": "medium", + "same_model_group": "synthetic/hf:MiniMaxAI/MiniMax-M2.1", + "degrade_to": ["deepseek/chat", "google/gemini-flash"], + }, + "kimi-coding": { + "family": "moonshot", + "name": "coding", + "canonical_model": "kimi-coding/k2p5", + "route_type": "direct", + "cluster": "quality-workhorse", "benchmark_cluster": "quality-coding", - "quality_tier": "premium", + "quality_tier": "high", "reasoning_strength": "high", "context_strength": "high", - "tool_strength": "high", - "same_model_group": "anthropic/opus-4.6", - "degrade_to": ["anthropic/sonnet-4.6", "openai/gpt-4o", "deepseek/reasoner"], + "tool_strength": "medium", + "same_model_group": "kimi-coding/k2p5", + "degrade_to": ["openai/gpt-4o", "google/gemini-pro-high"], }, - "blackbox-free": { - "family": "blackbox", - "name": "burst", - "canonical_model": "aggregator/blackbox-grok-code-fast", + "vercel-ai-gateway": { + "family": "vercel", + "name": "gateway", + "canonical_model": "vercel-ai-gateway/anthropic/claude-opus-4.6", "route_type": "aggregator", - "cluster": "budget-general", - "benchmark_cluster": "budget-chat", - "quality_tier": "budget", - "reasoning_strength": "mid", - "context_strength": "mid", - "tool_strength": "mid", - "same_model_group": "aggregator/blackbox-grok-code-fast", - "degrade_to": ["aggregator/kilo-glm5-free", "google/gemini-flash-lite"], - }, - "clawrouter": { - "family": "blockrun", - "name": "wallet-router", - "canonical_model": "aggregator/openrouter-auto", - "route_type": "wallet-router", "cluster": "aggregator-fallback", "benchmark_cluster": "marketplace-general", "quality_tier": "variable", "reasoning_strength": "variable", "context_strength": "variable", "tool_strength": "variable", - "same_model_group": "aggregator/openrouter-auto", + "same_model_group": "vercel-ai-gateway/anthropic/claude-opus-4.6", "degrade_to": ["aggregator/kilo-glm5-free", "aggregator/blackbox-grok-code-fast"], }, } @@ -598,6 +1456,57 @@ def _lane_binding_with_freshness(binding: dict[str, Any]) -> dict[str, Any]: ("low-cost BLACKBOX routes can shift quickly in pricing, auth behavior, or model availability"), ], }, + "kilo-auto-frontier": { + "profile": "kilo-openai-compat", + "compatibility": "aggregator", + "probe_confidence": "medium", + "auth_mode": "bearer", + "probe_strategy": "chat", + "probe_payload_kind": "kilo-chat-minimal", + "probe_payload_text": "ping", + "probe_payload_max_tokens": 1, + "models_path": "", + "chat_path": "/chat/completions", + "supports_models_probe": False, + "notes": [ + "aggregator route uses a shallow chat probe instead of assuming /models support", + "paid Kilo routes should be revalidated against current gateway behavior periodically", + ], + }, + "kilo-auto-balanced": { + "profile": "kilo-openai-compat", + "compatibility": "aggregator", + "probe_confidence": "medium", + "auth_mode": "bearer", + "probe_strategy": "chat", + "probe_payload_kind": "kilo-chat-minimal", + "probe_payload_text": "ping", + "probe_payload_max_tokens": 1, + "models_path": "", + "chat_path": "/chat/completions", + "supports_models_probe": False, + "notes": [ + "aggregator route uses a shallow chat probe instead of assuming /models support", + "paid Kilo routes should be revalidated against current gateway behavior periodically", + ], + }, + "kilo-auto-free": { + "profile": "kilo-openai-compat", + "compatibility": "aggregator", + "probe_confidence": "medium", + "auth_mode": "bearer", + "probe_strategy": "chat", + "probe_payload_kind": "kilo-chat-minimal", + "probe_payload_text": "ping", + "probe_payload_max_tokens": 1, + "models_path": "", + "chat_path": "/chat/completions", + "supports_models_probe": False, + "notes": [ + "aggregator route uses a shallow chat probe instead of assuming /models support", + "free-tier model availability and path behavior should be revalidated regularly", + ], + }, } _CANONICAL_MODEL_ROUTE_REGISTRY: dict[str, list[dict[str, Any]]] = { diff --git a/faigate/local_discovery.py b/faigate/local_discovery.py new file mode 100644 index 0000000..59fa107 --- /dev/null +++ b/faigate/local_discovery.py @@ -0,0 +1,392 @@ +"""Local worker discovery for fusionAIze Gate. + +This module provides auto-discovery of local AI model workers (Ollama, vLLM, LM Studio, etc.) +and integration with fusionAIze Grid when available. +""" + +from __future__ import annotations + +import asyncio +import json +import logging +import os +from typing import Any, TypedDict + +import httpx + +from .registry import LOCAL + +logger = logging.getLogger(__name__) + + +class GpuInfo(TypedDict, total=False): + """GPU metrics from a local worker.""" + + gpu_name: str + vram_total_mb: int + vram_used_mb: int + vram_free_mb: int + utilization_pct: float + queue_depth: int + + +class DiscoveredWorker(TypedDict): + """A discovered local worker instance.""" + + name: str # Canonical name (e.g., "ollama", "vllm") + base_url: str # Full base URL including port and /v1 path + healthy: bool # Whether the worker responds to health check + models: list[str] # List of available model IDs (dynamically enumerated) + dynamic_models: bool # Whether models were fetched from /v1/models at discovery time + capabilities: dict[str, Any] # Capabilities inferred from worker type + gpu_info: GpuInfo | None # GPU/VRAM metrics if available + + +# Default ports for known local workers +DEFAULT_PORTS = { + "ollama": 11434, + "vllm": 8000, + "lmstudio": 1234, + "litellm": 4000, +} + +# Health check endpoints and expected response patterns +HEALTH_CHECKS = { + "ollama": ("/v1/models", {"object": "list"}), + "vllm": ("/v1/models", {"object": "list"}), + "lmstudio": ("/v1/models", {"object": "list"}), + "litellm": ("/v1/models", {"object": "list"}), +} + +# GPU/metrics endpoints per worker type +# These are best-effort — failure is silently ignored +GPU_ENDPOINTS = { + "ollama": "/api/ps", # Ollama process info including GPU usage + "vllm": "/metrics", # Prometheus text metrics + "lmstudio": None, + "litellm": None, +} + + +async def check_port_open(host: str, port: int, timeout: float = 1.0) -> bool: + """Check if a TCP port is open.""" + try: + reader, writer = await asyncio.wait_for(asyncio.open_connection(host, port), timeout=timeout) + writer.close() + await writer.wait_closed() + return True + except (TimeoutError, OSError): + return False + + +async def probe_worker(base_url: str, worker_type: str, timeout: float = 5.0) -> tuple[bool, list[str]]: + """Probe a worker endpoint to check health and discover models dynamically.""" + endpoint, expected_key = HEALTH_CHECKS.get(worker_type, ("/v1/models", {"object": "list"})) + url = f"{base_url.rstrip('/')}{endpoint}" + + try: + async with httpx.AsyncClient(timeout=timeout) as client: + response = await client.get(url) + if response.status_code == 200: + data = response.json() + if expected_key.items() <= data.items(): + models = [] + if "data" in data and isinstance(data["data"], list): + models = [model.get("id", "") for model in data["data"] if model.get("id")] + return True, models + return True, [] + return False, [] + except Exception as e: + logger.debug("Worker probe failed for %s: %s", url, e) + return False, [] + + +async def probe_gpu_info(base_url: str, worker_type: str, timeout: float = 3.0) -> GpuInfo | None: + """Probe GPU/VRAM metrics from a worker. Returns None on any failure.""" + gpu_endpoint = GPU_ENDPOINTS.get(worker_type) + if not gpu_endpoint: + return None + + url = f"{base_url.rstrip('/')}{gpu_endpoint}" + try: + async with httpx.AsyncClient(timeout=timeout) as client: + response = await client.get(url) + if response.status_code != 200: + return None + + if worker_type == "ollama": + # Ollama /api/ps returns running models with size_vram field + data = response.json() + models_running = data.get("models", []) + if not models_running: + return None + total_vram = sum(m.get("size_vram", 0) for m in models_running) // (1024 * 1024) + queue = len(models_running) + info: GpuInfo = {"vram_used_mb": total_vram, "queue_depth": queue} + return info + + if worker_type == "vllm": + # vLLM /metrics returns Prometheus text format + text = response.text + gpu_info: GpuInfo = {} + for line in text.splitlines(): + if line.startswith("#"): + continue + if "vllm:gpu_cache_usage_perc" in line: + try: + val = float(line.split()[-1]) + gpu_info["utilization_pct"] = round(val * 100, 1) + except (ValueError, IndexError): + pass + if "vllm:num_requests_running" in line: + try: + gpu_info["queue_depth"] = int(float(line.split()[-1])) + except (ValueError, IndexError): + pass + return gpu_info if gpu_info else None + + except Exception as e: + logger.debug("GPU probe failed for %s: %s", url, e) + + return None + + +async def discover_local_workers( + scan_ports: bool = True, check_grid: bool = True, timeout_per_worker: float = 3.0 +) -> list[DiscoveredWorker]: + """Discover local AI workers. + + Args: + scan_ports: Whether to scan default ports for known worker types + check_grid: Whether to check for fusionAIze Grid configuration + timeout_per_worker: Timeout for each worker probe in seconds + + Returns: + List of discovered workers with health status, dynamically enumerated models, + and GPU metrics where available. + """ + discovered: list[DiscoveredWorker] = [] + + # 1. Scan default ports for known worker types + if scan_ports: + for worker_name, port in DEFAULT_PORTS.items(): + base_url = f"http://127.0.0.1:{port}/v1" + logger.debug("Checking %s at %s", worker_name, base_url) + + if not await check_port_open("127.0.0.1", port, timeout=1.0): + continue + + healthy, models = await probe_worker(base_url, worker_name, timeout_per_worker) + gpu_info = await probe_gpu_info(base_url, worker_name, timeout=2.0) if healthy else None + + worker: DiscoveredWorker = { + "name": worker_name, + "base_url": base_url, + "healthy": healthy, + "models": models, + "dynamic_models": len(models) > 0, + "capabilities": { + "local": True, + "cloud": False, + "network_zone": "local", + "cost_tier": "local", + "latency_tier": "local", + }, + "gpu_info": gpu_info, + } + discovered.append(worker) + + if healthy: + model_count = len(models) + gpu_note = f", GPU: {gpu_info}" if gpu_info else "" + logger.info( + "Discovered healthy %s worker at %s (%d model(s)%s)", + worker_name, base_url, model_count, gpu_note, + ) + else: + logger.debug("Found %s worker at %s but health check failed", worker_name, base_url) + + # 2. Check for fusionAIze Grid configuration + if check_grid: + grid_workers = await discover_grid_workers(timeout_per_worker) + discovered.extend(grid_workers) + + return discovered + + +async def discover_grid_workers(timeout: float = 5.0) -> list[DiscoveredWorker]: + """Discover workers configured via fusionAIze Grid. + + Reads Grid configuration from: + - ~/.faigrid/config.json (primary JSON config) + - ~/.faigrid/state/worker.state (key=value state file, legacy) + """ + grid_workers: list[DiscoveredWorker] = [] + + # Primary: ~/.faigrid/config.json + config_path = os.path.expanduser("~/.faigrid/config.json") + if os.path.exists(config_path): + try: + with open(config_path) as f: + config = json.load(f) + + for entry in config.get("workers", []): + worker_type = entry.get("type", "openai-compat") + host = entry.get("host", "127.0.0.1") + port = entry.get("port") + name = entry.get("name", f"grid-{worker_type}") + + if not port: + logger.debug("Grid config entry '%s' missing port, skipping", name) + continue + + base_url = entry.get("base_url") or f"http://{host}:{port}/v1" + healthy, models = await probe_worker(base_url, worker_type, timeout) + gpu_info = await probe_gpu_info(base_url, worker_type, timeout=2.0) if healthy else None + + worker: DiscoveredWorker = { + "name": name, + "base_url": base_url, + "healthy": healthy, + "models": models or entry.get("models", []), + "dynamic_models": len(models) > 0, + "capabilities": { + "local": True, + "cloud": False, + "network_zone": entry.get("network_zone", "local"), + "cost_tier": entry.get("cost_tier", "local"), + "latency_tier": "local", + }, + "gpu_info": gpu_info, + } + grid_workers.append(worker) + + if grid_workers: + logger.info("Grid config: found %d worker(s) in %s", len(grid_workers), config_path) + except Exception as e: + logger.debug("Failed to read Grid config %s: %s", config_path, e) + + # Fallback: ~/.faigrid/state/worker.state (key=value format) + state_path = os.path.expanduser("~/.faigrid/state/worker.state") + if os.path.exists(state_path) and not grid_workers: + try: + with open(state_path) as f: + state: dict[str, str] = {} + for line in f: + line = line.strip() + if line and "=" in line: + key, value = line.split("=", 1) + state[key.strip()] = value.strip() + + if "WORKER_ENDPOINTS" in state: + for endpoint in state["WORKER_ENDPOINTS"].split(","): + endpoint = endpoint.strip() + if not endpoint: + continue + # Format: worker_type:host:port + parts = endpoint.split(":") + if len(parts) >= 3: + worker_type, host, port_str = parts[0], parts[1], parts[2] + base_url = f"http://{host}:{port_str}/v1" + healthy, models = await probe_worker(base_url, worker_type, timeout) + gpu_info = await probe_gpu_info(base_url, worker_type, timeout=2.0) if healthy else None + + worker = { + "name": f"grid-{worker_type}", + "base_url": base_url, + "healthy": healthy, + "models": models, + "dynamic_models": len(models) > 0, + "capabilities": { + "local": True, + "cloud": False, + "network_zone": "local", + "cost_tier": "local", + "latency_tier": "local", + }, + "gpu_info": gpu_info, + } + grid_workers.append(worker) + except Exception as e: + logger.debug("Failed to read Grid state %s: %s", state_path, e) + + return grid_workers + + +def generate_provider_config(worker: DiscoveredWorker) -> dict[str, Any]: + """Generate a provider configuration entry for a discovered worker.""" + base_def = LOCAL.get(worker["name"]) + + config: dict[str, Any] = { + "contract": "local-worker", + "backend": "openai-compat", + "base_url": worker["base_url"], + "tier": "local", + "capabilities": worker["capabilities"], + } + + # Prefer dynamically enumerated model over static default + if worker["models"]: + config["model"] = worker["models"][0] + if len(worker["models"]) > 1: + config["available_models"] = worker["models"] + elif base_def and "example_model" in base_def: + config["model"] = base_def["example_model"] + + if worker.get("gpu_info"): + config["gpu_info"] = worker["gpu_info"] + + return config + + +async def main() -> None: + """CLI entry point for local worker discovery.""" + import argparse + + parser = argparse.ArgumentParser(description="Discover local AI workers") + parser.add_argument("--json", action="store_true", help="Output as JSON") + parser.add_argument("--no-scan", action="store_true", help="Skip port scanning") + parser.add_argument("--no-grid", action="store_true", help="Skip Grid check") + parser.add_argument("--timeout", type=float, default=3.0, help="Timeout per worker") + + args = parser.parse_args() + + workers = await discover_local_workers( + scan_ports=not args.no_scan, check_grid=not args.no_grid, timeout_per_worker=args.timeout + ) + + if args.json: + print(json.dumps(workers, indent=2)) + else: + if not workers: + print("No local workers discovered.") + return + + print(f"Discovered {len(workers)} local worker(s):") + for worker in workers: + status = "✓" if worker["healthy"] else "✗" + model_note = f", {len(worker['models'])} models (dynamic)" if worker["dynamic_models"] else ( + f", {len(worker['models'])} models" if worker["models"] else "" + ) + print(f" {status} {worker['name']}: {worker['base_url']}{model_note}") + + if worker["models"]: + print(f" Models: {', '.join(worker['models'][:5])}") + if len(worker["models"]) > 5: + print(f" ... and {len(worker['models']) - 5} more") + + if worker.get("gpu_info"): + gpu = worker["gpu_info"] + parts = [] + if "vram_used_mb" in gpu: + parts.append(f"VRAM used: {gpu['vram_used_mb']}MB") + if "utilization_pct" in gpu: + parts.append(f"GPU: {gpu['utilization_pct']}%") + if "queue_depth" in gpu: + parts.append(f"queue: {gpu['queue_depth']}") + if parts: + print(f" GPU: {', '.join(parts)}") + + +if __name__ == "__main__": + asyncio.run(main()) diff --git a/faigate/main.py b/faigate/main.py index 57a1996..d3f8e32 100644 --- a/faigate/main.py +++ b/faigate/main.py @@ -42,7 +42,6 @@ from .config import Config, load_config from .dashboard import _metadata_catalogs_summary, _metadata_packages_detail from .dashboard_web import DASHBOARD_HTML -from .dashboard import _metadata_catalogs_summary, _metadata_packages_detail from .hooks import ( AppliedHooks, HookExecutionError, @@ -71,7 +70,7 @@ ) from .provider_catalog_store import ProviderCatalogStore from .provider_sources import list_provider_sources -from .providers import ProviderBackend, ProviderError, classify_runtime_issue +from .providers import ProviderBackend, ProviderError, classify_runtime_issue, create_provider_backend from .router import Router, RoutingDecision from .updates import ( UpdateChecker, @@ -2122,6 +2121,40 @@ async def _resolve_image_route_preview( ) client_tag = _resolve_client_tag(headers, client_profile) + # Budget enforcement for image endpoints + limit_day = profile_hints.get("cost_limit_usd_day") + limit_month = profile_hints.get("cost_limit_usd_month") + if (limit_day or limit_month) and _metrics: + now = time.time() + if limit_day: + spent_day = _metrics.get_client_cost_since(client_profile, now - 86400) + if spent_day >= limit_day: + return JSONResponse( + status_code=429, + content={ + "error": { + "type": "budget_exceeded", + "message": f"Client profile '{client_profile}' has reached its daily budget limit " + f"(${spent_day:.4f} / ${limit_day:.4f} USD).", + "code": "daily_budget_exceeded", + } + }, + ) + if limit_month: + spent_month = _metrics.get_client_cost_since(client_profile, now - 30 * 86400) + if spent_month >= limit_month: + return JSONResponse( + status_code=429, + content={ + "error": { + "type": "budget_exceeded", + "message": f"Client profile '{client_profile}' has reached its monthly budget limit " + f"(${spent_month:.4f} / ${limit_month:.4f} USD).", + "code": "monthly_budget_exceeded", + } + }, + ) + ( effective_model_requested, direct_provider_name, @@ -2213,7 +2246,7 @@ async def lifespan(app: FastAPI): if not pcfg.get("api_key"): logger.warning("Provider %s has no API key, skipping", name) continue - _providers[name] = ProviderBackend(name, pcfg) + _providers[name] = create_provider_backend(name, pcfg) logger.info(" ✓ %s → %s (%s)", name, pcfg["model"], pcfg.get("tier", "default")) # Merge virtual providers registered by community hooks @@ -2222,7 +2255,7 @@ async def lifespan(app: FastAPI): logger.info(" skip virtual:%s — overridden by config-defined provider", vp_name) continue try: - _providers[vp_name] = ProviderBackend(vp_name, vp_cfg) + _providers[vp_name] = create_provider_backend(vp_name, vp_cfg) logger.info( " ✓ virtual:%s → %s (%s) [community hook]", vp_name, @@ -2731,6 +2764,26 @@ async def operator_events( } +@app.get("/api/alerts") +async def get_alerts(lookback_hours: int = 1, baseline_hours: int = 24): + """Anomaly detection: compare recent window against rolling baseline. + + Returns detected anomalies with severity, description, and thresholds. + Useful for operator dashboards and automated alerting integrations. + """ + anomalies = _metrics.get_anomalies( + lookback_hours=lookback_hours, + baseline_hours=baseline_hours, + ) + return { + "anomalies": anomalies, + "lookback_hours": lookback_hours, + "baseline_hours": baseline_hours, + "count": len(anomalies), + "has_high_severity": any(a["severity"] == "high" for a in anomalies), + } + + def _build_cache_intelligence( provider_name: str, request_dims: dict[str, Any], diff --git a/faigate/metrics.py b/faigate/metrics.py index f35154d..6fe3f78 100644 --- a/faigate/metrics.py +++ b/faigate/metrics.py @@ -583,6 +583,120 @@ def _build_operator_where_clause(self, filters: dict[str, Any]) -> tuple[str, tu return "", () return f" WHERE {' AND '.join(clauses)}", tuple(params) + def get_client_cost_since(self, client_profile: str, since_ts: float) -> float: + """Return total cost_usd for a client_profile since a given Unix timestamp. + + Used for budget enforcement: check daily/monthly spend before routing. + Returns 0.0 if the database is not available. + """ + if not self._conn: + return 0.0 + rows = self._q( + "SELECT ROUND(SUM(cost_usd),6) AS cost FROM requests WHERE client_profile=? AND timestamp>=?", + (client_profile, since_ts), + ) + return float((rows[0].get("cost") or 0.0)) if rows else 0.0 + + def get_anomalies(self, lookback_hours: int = 1, baseline_hours: int = 24) -> list[dict]: + """Detect anomalies by comparing recent window to a rolling baseline. + + Returns a list of anomaly dicts with keys: + type, severity, description, current_value, baseline_value, threshold + """ + if not self._conn: + return [] + + now = time.time() + recent_since = now - lookback_hours * 3600 + baseline_since = now - baseline_hours * 3600 + + recent = self._q( + """SELECT COUNT(*) AS reqs, + SUM(CASE WHEN success=0 THEN 1 ELSE 0 END) AS failures, + ROUND(AVG(latency_ms),1) AS avg_latency, + ROUND(SUM(cost_usd),6) AS cost + FROM requests WHERE timestamp>=?""", + (recent_since,), + ) + baseline = self._q( + """SELECT COUNT(*) AS reqs, + SUM(CASE WHEN success=0 THEN 1 ELSE 0 END) AS failures, + ROUND(AVG(latency_ms),1) AS avg_latency, + ROUND(SUM(cost_usd),6) AS cost + FROM requests WHERE timestamp>=? AND timestamp20% failure rate and significantly worse than baseline) + if r_reqs > 5: + r_error_rate = r_failures / r_reqs + b_failures = b.get("failures") or 0 + b_reqs = b.get("reqs") or 1 + b_error_rate = b_failures / b_reqs + if r_error_rate > 0.2 and r_error_rate > b_error_rate * 2: + anomalies.append({ + "type": "error_rate_spike", + "severity": "high" if r_error_rate > 0.5 else "medium", + "description": f"Error rate {r_error_rate:.0%} in last {lookback_hours}h (baseline: {b_error_rate:.0%})", + "current_value": round(r_error_rate, 4), + "baseline_value": round(b_error_rate, 4), + "threshold": 0.2, + }) + + # Latency spike (>2x baseline, and >500ms) + if b_latency > 0 and r_latency > 500 and r_latency > b_latency * 2: + anomalies.append({ + "type": "latency_spike", + "severity": "medium", + "description": f"Avg latency {r_latency:.0f}ms in last {lookback_hours}h (baseline: {b_latency:.0f}ms)", + "current_value": r_latency, + "baseline_value": b_latency, + "threshold": b_latency * 2, + }) + + # Cost spike (>3x normalized baseline, and >$0.01 absolute) + if b_cost_norm > 0 and r_cost > 0.01 and r_cost > b_cost_norm * 3: + anomalies.append({ + "type": "cost_spike", + "severity": "high", + "description": f"Cost ${r_cost:.4f} in last {lookback_hours}h (baseline rate: ${b_cost_norm:.4f}/h)", + "current_value": r_cost, + "baseline_value": b_cost_norm, + "threshold": b_cost_norm * 3, + }) + + # Traffic spike (>5x normalized baseline) + if b_reqs_norm > 0 and r_reqs > b_reqs_norm * 5: + anomalies.append({ + "type": "traffic_spike", + "severity": "low", + "description": f"{r_reqs} requests in last {lookback_hours}h (baseline: ~{b_reqs_norm:.0f}/h)", + "current_value": r_reqs, + "baseline_value": b_reqs_norm, + "threshold": b_reqs_norm * 5, + }) + + return anomalies + def _q(self, sql: str, params: tuple = ()) -> list[dict]: if not self._conn: return [] diff --git a/faigate/oauth/__init__.py b/faigate/oauth/__init__.py new file mode 100644 index 0000000..d79dd07 --- /dev/null +++ b/faigate/oauth/__init__.py @@ -0,0 +1 @@ +"""OAuth token management for managed providers.""" diff --git a/faigate/oauth/backend.py b/faigate/oauth/backend.py new file mode 100644 index 0000000..0143d9f --- /dev/null +++ b/faigate/oauth/backend.py @@ -0,0 +1,224 @@ +"""OAuth‑wrapped provider backend. + +This module provides `OAuthBackend`, a wrapper around an existing provider backend +that injects OAuth2 tokens obtained from the token store. It handles token +refresh and interactive login delegation. +""" + +import asyncio +import json +import logging +import subprocess +import time +from typing import Any + +import httpx +from httpx import AsyncClient, Request, Response + +from .token_store import TokenStore +from ..providers import ProviderBackend + +logger = logging.getLogger("faigate.oauth.backend") + + +class OAuthBackend(ProviderBackend): + """Provider backend that adds OAuth2 token management. + + This backend wraps an underlying backend (e.g., openai‑compat, anthropic‑compat) + and injects an OAuth2 bearer token into each request. Tokens are obtained from + the token store; if missing or expired, the backend can delegate to an external + helper for interactive login or token refresh. + + Configuration example in config.yaml: + + providers: + qwen‑portal: + backend: oauth + oauth: + helper: "faigate‑auth qwen‑portal" + client_id: "..." + token_endpoint: "https://qwen.example.com/oauth/token" + refresh_endpoint: "https://qwen.example.com/oauth/refresh" + scope: "openid email" + underlying_backend: openai‑compat + base_url: "https://qwen‑portal.example.com/v1" + + The `underlying_backend` field specifies which real backend to use after + token injection. + """ + + def __init__(self, name: str, cfg: dict[str, Any]): + """Initialize OAuth backend. + + Args: + name: Provider canonical name. + cfg: Provider configuration dict. Must contain an "oauth" sub‑dict + with at least "helper" (command to obtain tokens) and + "underlying_backend" (backend type to wrap). + """ + super().__init__(name, cfg) + self.oauth_cfg = cfg.get("oauth", {}) + self.helper_cmd = self.oauth_cfg.get("helper", "") + self.underlying_backend_type = self.oauth_cfg.get("underlying_backend", "openai‑compat") + self.token_store = TokenStore() + self._wrapped_backend = self._create_wrapped_backend() + + def _create_wrapped_backend(self) -> ProviderBackend: + """Instantiate the underlying backend.""" + # Create a config dict for the wrapped backend by stripping oauth fields + wrapped_cfg = self.cfg.copy() + wrapped_cfg.pop("oauth", None) + wrapped_cfg["backend"] = self.underlying_backend_type + # Ensure auth_optional is True because we will add the token ourselves + wrapped_cfg["auth_optional"] = True + return ProviderBackend(self.name, wrapped_cfg) + + async def _ensure_token(self) -> str: + """Ensure a valid access token exists, refreshing or logging in if needed. + + Returns: + Access token string. + + Raises: + RuntimeError: If token cannot be obtained. + """ + token_data = self.token_store.get(self.name) + if not token_data: + logger.info("No token for %s, invoking helper", self.name) + token_data = await self._run_helper() + if not token_data: + raise RuntimeError( + f"Could not obtain OAuth token for {self.name}. Run helper manually: {self.helper_cmd}" + ) + self.token_store.set(self.name, token_data) + + # Check expiration + if self.token_store.is_expired(self.name): + logger.info("Token for %s expired, attempting refresh", self.name) + refreshed = self.token_store.refresh_if_needed(self.name, self._refresh_token) + if not refreshed: + # Refresh failed or not possible; try full re‑login + logger.warning("Refresh failed, invoking helper") + token_data = await self._run_helper() + if not token_data: + raise RuntimeError( + f"Could not refresh OAuth token for {self.name}. Run helper manually: {self.helper_cmd}" + ) + self.token_store.set(self.name, token_data) + + # Return access token + token_data = self.token_store.get(self.name) + return token_data.get("access_token", "") + + async def _run_helper(self) -> dict[str, Any]: + """Run external helper to obtain tokens. + + Returns: + Token data dict (access_token, refresh_token, expires_at, etc.) + + Raises: + RuntimeError: If helper fails. + """ + if not self.helper_cmd: + raise RuntimeError(f"No OAuth helper command configured for {self.name}") + + logger.info("Running OAuth helper: %s", self.helper_cmd) + try: + # Run helper command + proc = await asyncio.create_subprocess_shell( + self.helper_cmd, + stdout=asyncio.subprocess.PIPE, + stderr=asyncio.subprocess.PIPE, + ) + stdout, stderr = await proc.communicate() + if proc.returncode != 0: + stderr_text = stderr.decode("utf-8", errors="replace").strip() + raise RuntimeError(f"Helper failed with exit code {proc.returncode}: {stderr_text}") + + # Parse JSON output + output = stdout.decode("utf-8", errors="replace").strip() + try: + token_data = json.loads(output) + except json.JSONDecodeError as e: + logger.error("Helper output not valid JSON: %s", output[:200]) + raise RuntimeError(f"Helper output not valid JSON: {e}") + + # Validate required fields + if "access_token" not in token_data: + raise RuntimeError("Helper output missing 'access_token' field") + + # Add provider config for future refreshes + token_data.setdefault("provider_config", self.oauth_cfg.copy()) + logger.info("Obtained OAuth token for %s", self.name) + return token_data + + except (OSError, asyncio.SubprocessError) as e: + logger.error("Failed to run OAuth helper %s: %s", self.helper_cmd, e) + raise RuntimeError(f"OAuth helper execution failed: {e}") + + def _refresh_token(self, token_data: dict[str, Any]) -> dict[str, Any]: + """Refresh an access token using the refresh token. + + Args: + token_data: Current token data (must contain refresh_token). + + Returns: + New token data. + + Raises: + RuntimeError: If refresh fails. + """ + refresh_token = token_data.get("refresh_token") + if not refresh_token: + raise RuntimeError("No refresh token available") + + provider_config = token_data.get("provider_config", self.oauth_cfg) + token_endpoint = provider_config.get("refresh_endpoint") or provider_config.get("token_endpoint") + if not token_endpoint: + raise RuntimeError("No token endpoint configured for refresh") + + client_id = provider_config.get("client_id", "") + client_secret = provider_config.get("client_secret") + + # Prepare OAuth2 refresh request + data = { + "grant_type": "refresh_token", + "refresh_token": refresh_token, + "client_id": client_id, + } + if client_secret: + data["client_secret"] = client_secret + + headers = {"Content-Type": "application/x-www-form-urlencoded"} + + logger.info("Refreshing OAuth token for %s", self.name) + try: + resp = httpx.post(token_endpoint, data=data, headers=headers, timeout=30.0) + resp.raise_for_status() + new_token = resp.json() + except (httpx.HTTPError, json.JSONDecodeError) as e: + logger.error("Token refresh failed: %s", e) + raise RuntimeError(f"Token refresh failed: {e}") + + # Merge new token data with existing (preserve provider_config) + merged = token_data.copy() + merged.update(new_token) + merged.setdefault("provider_config", provider_config) + + # Ensure expires_at is set if expires_in provided + if "expires_in" in merged and "expires_at" not in merged: + merged["expires_at"] = time.time() + merged["expires_in"] + + logger.info("Token refreshed for %s", self.name) + return merged + + async def _request(self, client: AsyncClient, req: Request) -> Response: + """Override _request to inject OAuth bearer token.""" + token = await self._ensure_token() + req.headers["Authorization"] = f"Bearer {token}" + return await self._wrapped_backend._request(client, req) + + # Forward all other methods to wrapped backend + def __getattr__(self, name: str) -> Any: + """Delegate unknown attributes to wrapped backend.""" + return getattr(self._wrapped_backend, name) diff --git a/faigate/oauth/cli.py b/faigate/oauth/cli.py new file mode 100644 index 0000000..29f377e --- /dev/null +++ b/faigate/oauth/cli.py @@ -0,0 +1,676 @@ +"""OAuth CLI helper for managed providers.""" + +import argparse +import json +import logging +import os +import sys +import time +from typing import Any + +# Optional imports for OAuth flows +try: + import requests +except ImportError: + requests = None + +try: + import webbrowser +except ImportError: + webbrowser = None + + +logger = logging.getLogger("faigate.oauth.cli") + +# ── Antigravity constants (from LLM AI Router OAuth URL) ───────────────────── +_ANTIGRAVITY_CLIENT_ID = "1071006060591-tmhssin2h21lcre235vtolojh4g403ep.apps.googleusercontent.com" +_ANTIGRAVITY_SCOPE = " ".join([ + "https://www.googleapis.com/auth/cloud-platform", + "https://www.googleapis.com/auth/userinfo.email", + "https://www.googleapis.com/auth/userinfo.profile", + "https://www.googleapis.com/auth/cclog", + "https://www.googleapis.com/auth/experimentsandconfigs", +]) +_ANTIGRAVITY_AUTH_ENDPOINT = "https://accounts.google.com/o/oauth2/v2/auth" +_ANTIGRAVITY_TOKEN_ENDPOINT = "https://oauth2.googleapis.com/token" +_ANTIGRAVITY_CREDS_PATH = "~/.gemini/oauth_creds.json" +_ANTIGRAVITY_CALLBACK_PORT = 8080 +# Base URL: Antigravity's client-facing interface is a local ephemeral gRPC language server +# (127.0.0.1:/exa.language_server_pb.LanguageServerService/…) that proxies to Google +# internally. faigate uses the OAuth token to call the Google Generative Language API directly. +# Default: https://generativelanguage.googleapis.com/v1beta/openai (matches registry.py) +# Override with ANTIGRAVITY_BASE_URL if a different Google endpoint is needed. +_ANTIGRAVITY_BASE_URL_DEFAULT = "https://generativelanguage.googleapis.com/v1beta/openai" +_ANTIGRAVITY_BASE_URL_ENV = "ANTIGRAVITY_BASE_URL" + +# ── Qwen constants (from qwen-code source) ─────────────────────────────────── +_QWEN_CLIENT_ID = "f0304373b74a44d2b584a3fb70ca9e56" +_QWEN_SCOPE = "openid profile email model.completion" +_QWEN_DEVICE_ENDPOINT = "https://chat.qwen.ai/api/v1/oauth2/device/code" +_QWEN_TOKEN_ENDPOINT = "https://chat.qwen.ai/api/v1/oauth2/token" +_QWEN_CREDS_PATH = "~/.qwen/oauth_creds.json" +_QWEN_FALLBACK_BASE_URL = "https://dashscope.aliyuncs.com/compatible-mode/v1" +_QWEN_OAUTH_MODEL = "coder-model" + + +def _qwen_base_url_from_resource(resource_url: str | None) -> str: + """Build the inference base URL from the resource_url field in Qwen credentials. + + resource_url is a hostname (e.g. 'portal.qwen.ai'). The full API path + follows DashScope's compatible-mode convention. + """ + if not resource_url: + return _QWEN_FALLBACK_BASE_URL + host = resource_url.rstrip("/") + if not host.startswith("http"): + host = f"https://{host}" + return f"{host}/compatible-mode/v1" + + +def qwen_oauth() -> dict[str, Any]: + """Read Qwen OAuth credentials from the local qwen-code CLI token store. + + The qwen-code CLI (https://github.com/QwenLM/qwen-code) stores OAuth + credentials at ~/.qwen/oauth_creds.json after running `qwen auth login`. + Token format: + { + "access_token": "...", + "refresh_token": "...", + "token_type": "Bearer", + "resource_url": "portal.qwen.ai", # inference endpoint hostname + "expiry_date": 1234567890000, # ms timestamp + } + + Returns a dict with access_token, base_url, and model suitable for + injecting into faigate's provider config. + """ + creds_path = os.path.expanduser(_QWEN_CREDS_PATH) + if not os.path.exists(creds_path): + raise RuntimeError( + f"Qwen credentials not found at {creds_path}.\n" + "Please authenticate with qwen-code first:\n" + " npm install -g @qwen-code/cli # or: npx @qwen-code/cli\n" + " qwen auth login" + ) + + try: + with open(creds_path) as f: + creds = json.load(f) + except (json.JSONDecodeError, IOError) as e: + raise RuntimeError(f"Failed to read Qwen credentials from {creds_path}: {e}") + + access_token = creds.get("access_token") + if not access_token: + raise RuntimeError( + f"Qwen credentials at {creds_path} have no access_token. " + "Please re-authenticate: qwen auth login" + ) + + # Check expiry (expiry_date is in milliseconds) + expiry_ms = creds.get("expiry_date") + if expiry_ms and expiry_ms < time.time() * 1000: + logger.warning( + "Qwen token appears expired (expiry: %s). " + "Consider refreshing: qwen auth login", + expiry_ms, + ) + + resource_url = creds.get("resource_url") + base_url = _qwen_base_url_from_resource(resource_url) + + return { + "access_token": access_token, + "refresh_token": creds.get("refresh_token"), + "token_type": creds.get("token_type", "Bearer"), + "base_url": base_url, + "model": _QWEN_OAUTH_MODEL, + "resource_url": resource_url, + "expiry_date": expiry_ms, + } + + +def qwen_refresh(refresh_token: str) -> dict[str, Any]: + """Refresh an expired Qwen OAuth token using the refresh_token. + + Writes the updated credentials back to ~/.qwen/oauth_creds.json. + """ + if requests is None: + raise RuntimeError("requests package required. Install with: pip install faigate[oauth]") + + resp = requests.post( + _QWEN_TOKEN_ENDPOINT, + json={ + "grant_type": "refresh_token", + "refresh_token": refresh_token, + "client_id": _QWEN_CLIENT_ID, + }, + timeout=30, + ) + resp.raise_for_status() + token = resp.json() + + new_creds = { + "access_token": token["access_token"], + "refresh_token": token.get("refresh_token", refresh_token), + "token_type": token.get("token_type", "Bearer"), + "resource_url": token.get("resource_url"), + "expiry_date": int((time.time() + token.get("expires_in", 3600)) * 1000), + } + + creds_path = os.path.expanduser(_QWEN_CREDS_PATH) + os.makedirs(os.path.dirname(creds_path), exist_ok=True) + tmp = creds_path + ".tmp" + with open(tmp, "w") as f: + json.dump(new_creds, f, indent=2) + os.replace(tmp, creds_path) + os.chmod(creds_path, 0o600) + + logger.info("Qwen token refreshed and written to %s", creds_path) + return new_creds + + +def qwen_device_code_flow() -> dict[str, Any]: + """Obtain a new Qwen OAuth token via the device code flow. + + Uses the same client_id and endpoints as qwen-code CLI so the resulting + token is stored in the shared ~/.qwen/oauth_creds.json and usable by + both faigate and qwen-code. + """ + if requests is None: + raise RuntimeError("requests package required. Install with: pip install faigate[oauth]") + + # Step 1: Request device code + resp = requests.post( + _QWEN_DEVICE_ENDPOINT, + json={ + "client_id": _QWEN_CLIENT_ID, + "scope": _QWEN_SCOPE, + }, + timeout=30, + ) + resp.raise_for_status() + device = resp.json() + + device_code = device["device_code"] + user_code = device["user_code"] + verification_uri = device.get("verification_uri", "https://chat.qwen.ai/activate") + interval = device.get("interval", 5) + expires_in = device.get("expires_in", 300) + + print(f"\nPlease visit: {verification_uri}") + print(f"Enter code: {user_code}\n") + if webbrowser: + webbrowser.open(verification_uri) + + # Step 2: Poll for token (RFC 8628) + max_polls = expires_in // max(interval, 1) + for _ in range(max_polls): + time.sleep(interval) + try: + resp = requests.post( + _QWEN_TOKEN_ENDPOINT, + json={ + "grant_type": "urn:ietf:params:oauth:grant-type:device_code", + "device_code": device_code, + "client_id": _QWEN_CLIENT_ID, + }, + timeout=30, + ) + if resp.status_code == 200: + token = resp.json() + resource_url = token.get("resource_url") + new_creds = { + "access_token": token["access_token"], + "refresh_token": token.get("refresh_token"), + "token_type": token.get("token_type", "Bearer"), + "resource_url": resource_url, + "expiry_date": int((time.time() + token.get("expires_in", 3600)) * 1000), + } + # Write to shared ~/.qwen/oauth_creds.json + creds_path = os.path.expanduser(_QWEN_CREDS_PATH) + os.makedirs(os.path.dirname(creds_path), exist_ok=True) + tmp = creds_path + ".tmp" + with open(tmp, "w") as f: + json.dump(new_creds, f, indent=2) + os.replace(tmp, creds_path) + os.chmod(creds_path, 0o600) + print(f"Authenticated. Token written to {creds_path}") + + return { + **new_creds, + "base_url": _qwen_base_url_from_resource(resource_url), + "model": _QWEN_OAUTH_MODEL, + } + data = resp.json() if resp.content else {} + error = data.get("error", "") + if error == "authorization_pending": + continue + if error == "slow_down": + interval += 5 + continue + resp.raise_for_status() + except requests.RequestException as e: + logger.warning("Poll error: %s", e) + + raise RuntimeError("Qwen device code flow timed out. Please try again.") + + +def antigravity_oauth() -> dict[str, Any]: + """Read Antigravity OAuth credentials from the local token store. + + Antigravity (Google's AI coding IDE) stores Google OAuth credentials at + ~/.gemini/oauth_creds.json after signing in via the app or via + `antigravity auth login` (agy auth login). + + Token format: + { + "access_token": "ya29.a0...", + "refresh_token": "1//03...", + "token_type": "Bearer", + "id_token": "eyJ...", + "expiry_date": 1234567890000, # ms timestamp + "scope": "https://www.googleapis.com/auth/cloud-platform ...", + } + + Returns token data including the base_url from ANTIGRAVITY_BASE_URL env var + if set, otherwise flags that discovery is required. + """ + creds_path = os.path.expanduser(_ANTIGRAVITY_CREDS_PATH) + if not os.path.exists(creds_path): + raise RuntimeError( + f"Antigravity credentials not found at {creds_path}.\n" + "Please sign in to Antigravity (the IDE) or run:\n" + " agy auth login" + ) + + try: + with open(creds_path) as f: + creds = json.load(f) + except (json.JSONDecodeError, IOError) as e: + raise RuntimeError(f"Failed to read Antigravity credentials from {creds_path}: {e}") + + access_token = creds.get("access_token") + if not access_token: + raise RuntimeError( + f"Antigravity credentials at {creds_path} have no access_token. " + "Please sign in to Antigravity or run: agy auth login" + ) + + expiry_ms = creds.get("expiry_date") + if expiry_ms and expiry_ms < time.time() * 1000: + logger.warning( + "Antigravity token appears expired. " + "Run: faigate-auth google-antigravity --refresh or sign in to Antigravity." + ) + + base_url = os.environ.get(_ANTIGRAVITY_BASE_URL_ENV, _ANTIGRAVITY_BASE_URL_DEFAULT) + + return { + "access_token": access_token, + "refresh_token": creds.get("refresh_token"), + "token_type": creds.get("token_type", "Bearer"), + "id_token": creds.get("id_token"), + "expiry_date": expiry_ms, + "scope": creds.get("scope", _ANTIGRAVITY_SCOPE), + "base_url": base_url, + "base_url_discovered": True, + } + + +def antigravity_refresh(refresh_token: str) -> dict[str, Any]: + """Refresh an expired Antigravity Google OAuth token. + + Writes the updated credentials back to ~/.gemini/oauth_creds.json. + """ + if requests is None: + raise RuntimeError("requests package required. Install with: pip install faigate[oauth]") + + resp = requests.post( + _ANTIGRAVITY_TOKEN_ENDPOINT, + data={ + "grant_type": "refresh_token", + "refresh_token": refresh_token, + "client_id": _ANTIGRAVITY_CLIENT_ID, + }, + timeout=30, + ) + resp.raise_for_status() + token = resp.json() + + # Read existing creds to preserve fields (refresh_token may not be re-issued) + creds_path = os.path.expanduser(_ANTIGRAVITY_CREDS_PATH) + existing: dict[str, Any] = {} + try: + with open(creds_path) as f: + existing = json.load(f) + except Exception: + pass + + new_creds = { + **existing, + "access_token": token["access_token"], + "token_type": token.get("token_type", "Bearer"), + "scope": token.get("scope", existing.get("scope", _ANTIGRAVITY_SCOPE)), + "expiry_date": int((time.time() + token.get("expires_in", 3600)) * 1000), + } + if "id_token" in token: + new_creds["id_token"] = token["id_token"] + if "refresh_token" in token: + new_creds["refresh_token"] = token["refresh_token"] + + os.makedirs(os.path.dirname(os.path.expanduser(creds_path)), exist_ok=True) + tmp = creds_path + ".tmp" + with open(tmp, "w") as f: + json.dump(new_creds, f, indent=2) + os.replace(tmp, creds_path) + os.chmod(creds_path, 0o600) + logger.info("Antigravity token refreshed and written to %s", creds_path) + + return { + **new_creds, + "base_url": os.environ.get(_ANTIGRAVITY_BASE_URL_ENV, _ANTIGRAVITY_BASE_URL_DEFAULT), + } + + +def antigravity_login() -> dict[str, Any]: + """Full Antigravity Google OAuth login via Authorization Code + PKCE. + + Opens a browser to Google's OAuth consent screen, starts a local HTTP + server on port 8080 to receive the callback, exchanges the code for + tokens, and writes credentials to ~/.gemini/oauth_creds.json. + + This uses the same client_id and scopes as the Antigravity IDE so the + resulting token is valid for Antigravity's inference API. + """ + import base64 + import hashlib + import secrets + import urllib.parse + from http.server import BaseHTTPRequestHandler, HTTPServer + + if requests is None: + raise RuntimeError("requests package required. Install with: pip install faigate[oauth]") + + # Generate PKCE code_verifier + code_challenge (S256) + code_verifier = base64.urlsafe_b64encode(secrets.token_bytes(32)).rstrip(b"=").decode() + code_challenge = base64.urlsafe_b64encode( + hashlib.sha256(code_verifier.encode()).digest() + ).rstrip(b"=").decode() + + state = secrets.token_urlsafe(24) + redirect_uri = f"http://localhost:{_ANTIGRAVITY_CALLBACK_PORT}/callback" + + params = { + "client_id": _ANTIGRAVITY_CLIENT_ID, + "response_type": "code", + "redirect_uri": redirect_uri, + "scope": _ANTIGRAVITY_SCOPE, + "state": state, + "access_type": "offline", + "prompt": "consent", + "code_challenge": code_challenge, + "code_challenge_method": "S256", + } + auth_url = f"{_ANTIGRAVITY_AUTH_ENDPOINT}?{urllib.parse.urlencode(params)}" + + # Capture auth code via local callback server + received: dict[str, str] = {} + + class _CallbackHandler(BaseHTTPRequestHandler): + def do_GET(self) -> None: + parsed = urllib.parse.urlparse(self.path) + qs = urllib.parse.parse_qs(parsed.query) + received["code"] = qs.get("code", [""])[0] + received["state"] = qs.get("state", [""])[0] + self.send_response(200) + self.end_headers() + self.wfile.write(b"

Antigravity login complete. You can close this tab.

") + + def log_message(self, *args: Any) -> None: + pass # suppress server logs + + server = HTTPServer(("localhost", _ANTIGRAVITY_CALLBACK_PORT), _CallbackHandler) + server.timeout = 120 + + print(f"\nOpening browser for Antigravity login...\n{auth_url}\n") + if webbrowser: + webbrowser.open(auth_url) + else: + print(f"Open this URL manually:\n{auth_url}") + + print(f"Waiting for callback on http://localhost:{_ANTIGRAVITY_CALLBACK_PORT}/callback ...") + server.handle_request() + server.server_close() + + code = received.get("code") + if not code: + raise RuntimeError("No authorization code received from callback.") + if received.get("state") != state: + raise RuntimeError("OAuth state mismatch — possible CSRF. Aborting.") + + # Exchange code for tokens + resp = requests.post( + _ANTIGRAVITY_TOKEN_ENDPOINT, + data={ + "grant_type": "authorization_code", + "code": code, + "redirect_uri": redirect_uri, + "client_id": _ANTIGRAVITY_CLIENT_ID, + "code_verifier": code_verifier, + }, + timeout=30, + ) + resp.raise_for_status() + token = resp.json() + + new_creds = { + "access_token": token["access_token"], + "refresh_token": token.get("refresh_token"), + "token_type": token.get("token_type", "Bearer"), + "id_token": token.get("id_token"), + "scope": token.get("scope", _ANTIGRAVITY_SCOPE), + "expiry_date": int((time.time() + token.get("expires_in", 3600)) * 1000), + } + + creds_path = os.path.expanduser(_ANTIGRAVITY_CREDS_PATH) + os.makedirs(os.path.dirname(creds_path), exist_ok=True) + tmp = creds_path + ".tmp" + with open(tmp, "w") as f: + json.dump(new_creds, f, indent=2) + os.replace(tmp, creds_path) + os.chmod(creds_path, 0o600) + print(f"Antigravity credentials written to {creds_path}") + + return { + **new_creds, + "base_url": os.environ.get(_ANTIGRAVITY_BASE_URL_ENV, _ANTIGRAVITY_BASE_URL_DEFAULT), + "base_url_discovered": True, + } + + +def claude_code_oauth() -> dict[str, Any]: + """Read Claude Code OAuth token from the local claude CLI config. + + Requires: npm install -g @anthropic-ai/claude-code && claude login + Token stored at: ~/.config/claude/settings.json + """ + settings_path = os.path.expanduser("~/.config/claude/settings.json") + if os.path.exists(settings_path): + try: + with open(settings_path) as f: + settings = json.load(f) + token = settings.get("token") or settings.get("api_key") + if token and token.startswith("sk-ant-"): + return { + "access_token": token, + "token_type": "Bearer", + "expires_in": 3600 * 24 * 365, + "scope": "claude-code", + } + except (json.JSONDecodeError, IOError) as e: + logger.warning("Failed to read claude settings: %s", e) + + print("Claude Code token not found.") + print("Please install and login:\n npm install -g @anthropic-ai/claude-code\n claude login") + raise RuntimeError("Claude Code token not found.") + + +def openai_codex_oauth() -> dict[str, Any]: + """Obtain OpenAI Codex token via ChatGPT OAuth.""" + raise NotImplementedError("OpenAI Codex OAuth not yet implemented") + + +def google_vertex_adc() -> dict[str, Any]: + """Use Google Application Default Credentials (gcloud ADC).""" + import subprocess + + try: + result = subprocess.run( + ["gcloud", "auth", "print-access-token"], + capture_output=True, text=True, check=True, + ) + access_token = result.stdout.strip() + if not access_token: + raise RuntimeError("gcloud returned empty access token") + return { + "access_token": access_token, + "token_type": "Bearer", + "expires_in": 3600, + "scope": "https://www.googleapis.com/auth/cloud-platform", + } + except (subprocess.CalledProcessError, FileNotFoundError) as e: + raise RuntimeError( + f"Failed to obtain Google ADC token: {e}. " + "Ensure gcloud is installed and authenticated." + ) + + +def google_oauth_device_flow( + client_id: str, + scope: str = "openid email", + device_endpoint: str = "https://accounts.google.com/o/oauth2/device/code", + token_endpoint: str = "https://oauth2.googleapis.com/token", +) -> dict[str, Any]: + """Obtain Google OAuth token via device code flow (for Antigravity etc.).""" + if requests is None: + raise RuntimeError("requests package required. Install with: pip install faigate[oauth]") + + resp = requests.post(device_endpoint, data={"client_id": client_id, "scope": scope}, timeout=30) + resp.raise_for_status() + device = resp.json() + + device_code = device["device_code"] + user_code = device["user_code"] + verification_uri = device.get("verification_uri", "https://www.google.com/device") + interval = device.get("interval", 5) + + print(f"Please visit {verification_uri} and enter code: {user_code}") + if webbrowser: + webbrowser.open(verification_uri) + + for _ in range(60): + time.sleep(interval) + try: + resp = requests.post( + token_endpoint, + data={ + "grant_type": "urn:ietf:params:oauth:grant-type:device_code", + "device_code": device_code, + "client_id": client_id, + }, + timeout=30, + ) + if resp.status_code == 200: + token = resp.json() + return { + "access_token": token["access_token"], + "refresh_token": token.get("refresh_token"), + "expires_in": token.get("expires_in", 3600), + "token_type": token.get("token_type", "Bearer"), + "scope": token.get("scope", scope), + } + if resp.status_code == 400 and "authorization_pending" in resp.text: + continue + resp.raise_for_status() + except requests.RequestException as e: + logger.warning("Poll error: %s", e) + + raise RuntimeError("Device code flow timed out") + + +def main() -> None: + parser = argparse.ArgumentParser(description="OAuth helper for managed providers") + parser.add_argument("provider", help="Provider canonical name") + parser.add_argument("--client-id", help="OAuth client ID (for Google flows)") + parser.add_argument("--scope", help="OAuth scope override") + parser.add_argument("--refresh", action="store_true", help="Refresh existing token instead of new login") + parser.add_argument("-v", "--verbose", action="store_true", help="Verbose logging") + args = parser.parse_args() + + logging.basicConfig(level=logging.DEBUG if args.verbose else logging.INFO) + + try: + if args.provider == "qwen-portal": + if args.refresh: + # Read existing refresh_token and refresh + creds_path = os.path.expanduser(_QWEN_CREDS_PATH) + with open(creds_path) as f: + creds = json.load(f) + rt = creds.get("refresh_token") + if not rt: + raise RuntimeError("No refresh_token in existing credentials.") + token_data = qwen_refresh(rt) + token_data["base_url"] = _qwen_base_url_from_resource(token_data.get("resource_url")) + token_data["model"] = _QWEN_OAUTH_MODEL + else: + # Try reading existing credentials first; fall back to device flow + try: + token_data = qwen_oauth() + print("Using existing Qwen credentials.", file=sys.stderr) + except RuntimeError: + print("No existing credentials found, starting device code flow...", file=sys.stderr) + token_data = qwen_device_code_flow() + + elif args.provider == "claude-code": + token_data = claude_code_oauth() + + elif args.provider == "openai-codex": + token_data = openai_codex_oauth() + + elif args.provider == "google-gemini-cli": + token_data = google_vertex_adc() + + elif args.provider == "google-antigravity": + if args.refresh: + creds_path = os.path.expanduser(_ANTIGRAVITY_CREDS_PATH) + with open(creds_path) as f: + creds = json.load(f) + rt = creds.get("refresh_token") + if not rt: + raise RuntimeError("No refresh_token in existing Antigravity credentials.") + token_data = antigravity_refresh(rt) + else: + try: + token_data = antigravity_oauth() + print("Using existing Antigravity credentials.", file=sys.stderr) + except RuntimeError: + print("No existing credentials, starting browser login...", file=sys.stderr) + token_data = antigravity_login() + + else: + print(f"Unknown provider: {args.provider}", file=sys.stderr) + print("Supported: qwen-portal, claude-code, google-gemini-cli, google-antigravity", file=sys.stderr) + sys.exit(1) + + # Tokens are written to the provider credentials file by each auth function. + # Do not print any value derived from token_data to stdout. + print(f"Authentication successful for {args.provider}.") + print("Token stored in credentials file.") + + except Exception as e: + logger.error("Failed to obtain token: %s", e) + sys.exit(1) + + +if __name__ == "__main__": + main() diff --git a/faigate/oauth/token_store.py b/faigate/oauth/token_store.py new file mode 100644 index 0000000..507fc81 --- /dev/null +++ b/faigate/oauth/token_store.py @@ -0,0 +1,174 @@ +"""OAuth token storage and refresh logic. + +This module manages OAuth2 tokens for managed providers (Gemini, Antigravity, Qwen, +OpenAI Codex, Claude Code). Tokens are stored in a JSON file under the user's +config directory with restricted permissions. + +Tokens are stored as: + + { + "provider_name": { + "access_token": "ey...", + "refresh_token": "ey...", + "expires_at": 1735689600.0, + "token_type": "Bearer", + "scope": "openid email", + "provider_config": { + "client_id": "...", + "token_endpoint": "...", + "refresh_endpoint": "..." + } + } + } + +If a refresh token is present and the access token is expired, the store can +attempt to refresh it automatically (requires a refresh callback). + +The store does not handle interactive login flows; those are delegated to an +external helper (e.g., `faigate-auth`). This module only stores, loads, and +refreshes tokens once they are obtained. +""" + +import json +import logging +import os +import time +from pathlib import Path +from typing import Any, Optional + +logger = logging.getLogger("faigate.oauth") + + +class TokenStore: + """Manages OAuth2 tokens for managed providers.""" + + def __init__(self, config_dir: Optional[str] = None): + """Initialize token store. + + Args: + config_dir: Directory to store tokens.json. Defaults to + ~/.config/faigate. + """ + if config_dir is None: + config_dir = Path.home() / ".config" / "faigate" + self.config_dir = Path(config_dir).expanduser().resolve() + self.token_path = self.config_dir / "tokens.json" + self._tokens: dict[str, dict[str, Any]] = {} + self._load() + + def _ensure_config_dir(self) -> None: + """Create config directory if it doesn't exist.""" + self.config_dir.mkdir(parents=True, exist_ok=True) + + def _load(self) -> None: + """Load tokens from disk.""" + if not self.token_path.exists(): + self._tokens = {} + return + try: + with open(self.token_path, "r", encoding="utf-8") as f: + self._tokens = json.load(f) + logger.debug("Loaded tokens for %d providers", len(self._tokens)) + except (json.JSONDecodeError, OSError) as e: + logger.warning("Failed to load tokens from %s: %s", self.token_path, e) + self._tokens = {} + + def _save(self) -> None: + """Save tokens to disk.""" + self._ensure_config_dir() + try: + with open(self.token_path, "w", encoding="utf-8") as f: + json.dump(self._tokens, f, indent=2) + # Restrict permissions to owner only (0o600) + self.token_path.chmod(0o600) + logger.debug("Saved tokens for %d providers", len(self._tokens)) + except OSError as e: + logger.error("Failed to save tokens to %s: %s", self.token_path, e) + raise + + def get(self, provider: str) -> Optional[dict[str, Any]]: + """Get token data for a provider. + + Returns None if the provider has no stored token. + """ + return self._tokens.get(provider) + + def set(self, provider: str, token_data: dict[str, Any]) -> None: + """Store or update token data for a provider. + + Args: + provider: Provider canonical name (e.g., "qwen-portal"). + token_data: Dictionary containing at least "access_token". + Should include "refresh_token", "expires_at", "token_type", + "scope", and "provider_config" if available. + """ + self._tokens[provider] = token_data + self._save() + + def delete(self, provider: str) -> None: + """Remove token data for a provider.""" + if provider in self._tokens: + del self._tokens[provider] + self._save() + + def list_providers(self) -> list[str]: + """Return list of providers with stored tokens.""" + return list(self._tokens.keys()) + + def is_expired(self, provider: str, margin_seconds: int = 60) -> bool: + """Check if the access token for a provider is expired. + + Args: + provider: Provider canonical name. + margin_seconds: Consider token expired this many seconds before + actual expiry to avoid race conditions. + + Returns: + True if token is missing or expired, False otherwise. + """ + token = self.get(provider) + if not token: + return True + expires_at = token.get("expires_at") + if expires_at is None: + return False # No expiry information, assume still valid + return time.time() >= (expires_at - margin_seconds) + + def refresh_if_needed( + self, + provider: str, + refresh_callback: callable, + *args, + **kwargs, + ) -> bool: + """Refresh access token if expired. + + Args: + provider: Provider canonical name. + refresh_callback: Callable that takes the current token data and + returns refreshed token data (dict). Should raise an exception + if refresh fails. + *args, **kwargs: Passed to refresh_callback. + + Returns: + True if token was refreshed, False if no refresh needed or no + refresh token available. + """ + token = self.get(provider) + if not token: + logger.debug("No token for %s, cannot refresh", provider) + return False + if not self.is_expired(provider): + logger.debug("Token for %s still valid, skipping refresh", provider) + return False + if "refresh_token" not in token: + logger.warning("Token for %s expired but no refresh token", provider) + return False + try: + new_token = refresh_callback(token, *args, **kwargs) + self.set(provider, new_token) + logger.info("Refreshed token for %s", provider) + return True + except Exception as e: + logger.error("Failed to refresh token for %s: %s", provider, e) + return False diff --git a/faigate/provider_catalog.py b/faigate/provider_catalog.py index eee11d3..3dae9da 100644 --- a/faigate/provider_catalog.py +++ b/faigate/provider_catalog.py @@ -589,6 +589,59 @@ def _get_packages_for_provider(provider_name: str) -> list[dict[str, Any]]: "notes": "Balanced Anthropic model", "last_reviewed": "2026-04-01", }, + "claude-code": { + "recommended_model": "claude-code", + "aliases": ["claude-code"], + "track": "stable", + "offer_track": "oauth", + "provider_type": "oauth", + "auth_modes": ["oauth"], + "volatility": "low", + "evidence_level": "official", + "official_source_url": "https://docs.anthropic.com/en/docs/about-claude/models", + "signup_url": "https://console.anthropic.com/", + "watch_sources": [], + "notes": "Claude Code – special coding model via Anthropic OAuth", + "last_reviewed": "2026-04-03", + }, + "google-antigravity": { + "recommended_model": "gemini-2.5-pro", + "aliases": ["google-antigravity", "antigravity", "agy"], + "track": "stable", + "offer_track": "oauth", + "provider_type": "oauth", + "auth_modes": ["oauth"], + "volatility": "low", + "evidence_level": "observed", + "official_source_url": "https://antigravity.dev/", + "signup_url": "https://antigravity.dev/", + "watch_sources": [], + "notes": ( + "Google Antigravity (VS Code AI fork) – Google OAuth via Authorization Code + PKCE. " + "client_id: 1071006060591-tmhssin2h21lcre235vtolojh4g403ep.apps.googleusercontent.com. " + "Token: ~/.gemini/oauth_creds.json. " + "Antigravity's local interface is a gRPC language server (127.0.0.1:/" + "exa.language_server_pb.LanguageServerService/…) – faigate bypasses it and calls " + "the Google Generative Language API (generativelanguage.googleapis.com/v1beta/openai) " + "directly with the OAuth token. Auth: faigate-auth google-antigravity or sign in to Antigravity IDE." + ), + "last_reviewed": "2026-04-04", + }, + "google-gemini-cli": { + "recommended_model": "gc/gemini-2.5-pro", + "aliases": ["google-gemini-cli", "google-vertex"], + "track": "stable", + "offer_track": "oauth", + "provider_type": "oauth", + "auth_modes": ["oauth"], + "volatility": "low", + "evidence_level": "official", + "official_source_url": "https://cloud.google.com/vertex-ai/generative-ai/docs/model-reference/gemini", + "signup_url": "https://cloud.google.com/vertex-ai", + "watch_sources": [], + "notes": "Google Gemini via Vertex AI – uses gcloud ADC; requires: gcloud auth login", + "last_reviewed": "2026-04-03", + }, "gemini-pro-high": { "recommended_model": get_active_model_id("google/gemini-pro-high"), "aliases": ["gemini-3.1-pro"], @@ -634,6 +687,400 @@ def _get_packages_for_provider(provider_name: str) -> list[dict[str, Any]]: "notes": "BlockRun ClawRouter uses wallet/x402 routing modes rather than a classic API key", # noqa: E501 "last_reviewed": "2026-03-19", }, + # ── xAI / Grok ─────────────────────────────────────────────────────────── + "xai": { + "recommended_model": "grok-3", + "aliases": ["xai", "grok-3"], + "track": "stable", + "offer_track": "direct", + "provider_type": "direct", + "auth_modes": ["api_key"], + "volatility": "low", + "evidence_level": "official", + "official_source_url": "https://docs.x.ai/", + "signup_url": "https://platform.x.ai/", + "watch_sources": [], + "notes": "xAI / Grok models", + "last_reviewed": "2026-04-03", + }, + # ── Z.AI / GLM ─────────────────────────────────────────────────────────── + "zai": { + "recommended_model": "glm-4.7", + "aliases": ["zai", "z.ai", "glm-4.7"], + "track": "stable", + "offer_track": "direct", + "provider_type": "direct", + "auth_modes": ["api_key"], + "volatility": "low", + "evidence_level": "official", + "official_source_url": "https://docs.z.ai/", + "signup_url": "https://platform.z.ai/", + "watch_sources": [], + "notes": "Z.AI / GLM models", + "last_reviewed": "2026-04-03", + }, + # ── Mistral ────────────────────────────────────────────────────────────── + "mistral": { + "recommended_model": "mistral-large-latest", + "aliases": ["mistral", "mistral-large"], + "track": "stable", + "offer_track": "direct", + "provider_type": "direct", + "auth_modes": ["api_key"], + "volatility": "low", + "evidence_level": "official", + "official_source_url": "https://docs.mistral.ai/", + "signup_url": "https://console.mistral.ai/", + "watch_sources": [], + "notes": "Mistral AI – Mistral Large, Codestral, etc.", + "last_reviewed": "2026-04-03", + }, + # ── Groq ───────────────────────────────────────────────────────────────── + "groq": { + "recommended_model": "llama-3.3-70b-versatile", + "aliases": ["groq", "llama-3.3"], + "track": "stable", + "offer_track": "direct", + "provider_type": "direct", + "auth_modes": ["api_key"], + "volatility": "low", + "evidence_level": "official", + "official_source_url": "https://console.groq.com/docs/quickstart", + "signup_url": "https://console.groq.com/", + "watch_sources": [], + "notes": "Groq – ultra-fast inference (LPU), Llama / DeepSeek", + "last_reviewed": "2026-04-03", + }, + # ── Hugging Face Inference ─────────────────────────────────────────────── + "huggingface": { + "recommended_model": "huggingface/deepseek-ai/DeepSeek-R1", + "aliases": ["huggingface", "hf"], + "track": "stable", + "offer_track": "direct", + "provider_type": "direct", + "auth_modes": ["api_key"], + "volatility": "medium", + "evidence_level": "official", + "official_source_url": "https://huggingface.co/docs/api-inference/quicktour", + "signup_url": "https://huggingface.co/", + "watch_sources": [], + "notes": "HuggingFace Inference – OpenAI-compat router", + "last_reviewed": "2026-04-03", + }, + # ── Moonshot AI / Kimi ─────────────────────────────────────────────────── + "moonshot": { + "recommended_model": "moonshot/kimi-k2.5", + "aliases": ["moonshot", "kimi-k2.5"], + "track": "stable", + "offer_track": "direct", + "provider_type": "direct", + "auth_modes": ["api_key"], + "volatility": "low", + "evidence_level": "official", + "official_source_url": "https://platform.moonshot.cn/docs/", + "signup_url": "https://platform.moonshot.cn/", + "watch_sources": [], + "notes": "Moonshot AI / Kimi – OpenAI-compatible endpoint", + "last_reviewed": "2026-04-03", + }, + # ── MiniMax ────────────────────────────────────────────────────────────── + "minimax": { + "recommended_model": "minimax/MiniMax-M2.7", + "aliases": ["minimax", "minimax-m2.7"], + "track": "stable", + "offer_track": "direct", + "provider_type": "direct", + "auth_modes": ["api_key"], + "volatility": "low", + "evidence_level": "official", + "official_source_url": "https://api.minimax.chat/", + "signup_url": "https://platform.minimaxi.com/", + "watch_sources": [], + "notes": "MiniMax – Anthropic-compatible custom endpoint", + "last_reviewed": "2026-04-03", + }, + # ── Volcano Engine / Doubao ────────────────────────────────────────────── + "volcengine": { + "recommended_model": "volcengine/doubao-seed-1-8-251228", + "aliases": ["volcengine", "doubao"], + "track": "stable", + "offer_track": "direct", + "provider_type": "direct", + "auth_modes": ["api_key"], + "volatility": "medium", + "evidence_level": "official", + "official_source_url": "https://www.volcengine.com/docs/82379", + "signup_url": "https://console.volcengine.com/", + "watch_sources": [], + "notes": "Volcano Engine – Doubao, Kimi K2.5, GLM 4.7, DeepSeek V3.2 (CN)", + "last_reviewed": "2026-04-03", + }, + # ── BytePlus (international Volcano Engine) ────────────────────────────── + "byteplus": { + "recommended_model": "byteplus/seed-1-8-251228", + "aliases": ["byteplus", "seed"], + "track": "stable", + "offer_track": "direct", + "provider_type": "direct", + "auth_modes": ["api_key"], + "volatility": "medium", + "evidence_level": "official", + "official_source_url": "https://docs.byteplus.com/", + "signup_url": "https://console.byteplus.com/", + "watch_sources": [], + "notes": "BytePlus ARK – international access to Volcano Engine models", + "last_reviewed": "2026-04-03", + }, + # ── Qwen (Alibaba) ────────────────────────────────────────────────────── + "qwen": { + "recommended_model": "qwen/qwen3.6-plus", + "aliases": ["qwen", "qwen3.6-plus"], + "track": "stable", + "offer_track": "direct", + "provider_type": "direct", + "auth_modes": ["api_key"], + "volatility": "medium", + "evidence_level": "official", + "official_source_url": "https://help.aliyun.com/zh/model-studio/developer-reference/quick-start", + "signup_url": "https://dashscope.aliyun.com/", + "watch_sources": [], + "notes": "Qwen models via Alibaba Cloud", + "last_reviewed": "2026-04-03", + }, + "qwen-portal": { + "recommended_model": "coder-model", + "aliases": ["qwen-portal", "qwen-code"], + "track": "free", + "offer_track": "oauth", + "provider_type": "oauth", + "auth_modes": ["oauth"], + "volatility": "low", + "evidence_level": "official", + "official_source_url": "https://github.com/QwenLM/qwen-code", + "signup_url": "https://chat.qwen.ai/", + "watch_sources": ["https://github.com/QwenLM/qwen-code"], + "notes": "Qwen OAuth free tier – token from ~/.qwen/oauth_creds.json; run: qwen auth login. Model: coder-model (Qwen 3 Coder). Endpoint dynamic via resource_url.", + "last_reviewed": "2026-04-04", + }, + # ── KiloCode lanes (individual model-level access) ─────────────────────── + "kilo-auto-frontier": { + "recommended_model": "kilo-auto/frontier", + "aliases": ["kilo-auto/frontier", "kilo-frontier"], + "track": "stable", + "offer_track": "gateway-paid", + "provider_type": "aggregator", + "auth_modes": ["api_key", "byok"], + "volatility": "medium", + "evidence_level": "official", + "official_source_url": "https://kilo.ai/docs/gateway/models-and-providers", + "signup_url": "https://kilo.ai/", + "watch_sources": [], + "notes": "Kilo Auto Frontier lane – premium routing through Kilo gateway", + "last_reviewed": "2026-04-03", + }, + "kilo-auto-balanced": { + "recommended_model": "kilo-auto/balanced", + "aliases": ["kilo-auto/balanced", "kilo-balanced"], + "track": "stable", + "offer_track": "gateway-paid", + "provider_type": "aggregator", + "auth_modes": ["api_key", "byok"], + "volatility": "medium", + "evidence_level": "official", + "official_source_url": "https://kilo.ai/docs/gateway/models-and-providers", + "signup_url": "https://kilo.ai/", + "watch_sources": [], + "notes": "Kilo Auto Balanced lane – balanced routing through Kilo gateway", + "last_reviewed": "2026-04-03", + }, + "kilo-auto-free": { + "recommended_model": "kilo-auto/free", + "aliases": ["kilo-auto/free", "kilo-free"], + "track": "free", + "offer_track": "free", + "provider_type": "aggregator", + "auth_modes": ["api_key", "byok"], + "volatility": "high", + "evidence_level": "official", + "official_source_url": "https://kilo.ai/docs/gateway/models-and-providers", + "signup_url": "https://kilo.ai/", + "watch_sources": [_COMMUNITY_WATCHLIST], + "notes": "Kilo Auto Free lane – free-tier routing through Kilo gateway", + "last_reviewed": "2026-04-03", + }, + # ── OpenAI Codex (OAuth via ChatGPT) ───────────────────────────────────── + "openai-codex": { + "recommended_model": "openai-codex/gpt-5.3-codex", + "aliases": ["openai-codex", "codex"], + "track": "stable", + "offer_track": "oauth", + "provider_type": "oauth", + "auth_modes": ["oauth"], + "volatility": "low", + "evidence_level": "official", + "official_source_url": "https://platform.openai.com/docs/models/codex", + "signup_url": "https://platform.openai.com/", + "watch_sources": [], + "notes": "OpenAI Codex (OAuth via ChatGPT) – requires interactive login", + "last_reviewed": "2026-04-03", + }, + # ── OpenCode Zen ──────────────────────────────────────────────────────── + "opencode": { + "recommended_model": "opencode/claude-opus-4-6", + "aliases": ["opencode"], + "track": "stable", + "offer_track": "direct", + "provider_type": "direct", + "auth_modes": ["api_key"], + "volatility": "low", + "evidence_level": "official", + "official_source_url": "https://docs.opencode.ai/", + "signup_url": "https://opencode.ai/", + "watch_sources": [], + "notes": "OpenCode Zen – Anthropic-compatible gateway", + "last_reviewed": "2026-04-03", + }, + # ── Cerebras ──────────────────────────────────────────────────────────── + "cerebras": { + "recommended_model": "llama3.3-70b", + "aliases": ["cerebras", "llama3.3"], + "track": "stable", + "offer_track": "direct", + "provider_type": "direct", + "auth_modes": ["api_key"], + "volatility": "low", + "evidence_level": "official", + "official_source_url": "https://docs.cerebras.ai/", + "signup_url": "https://cerebras.ai/", + "watch_sources": [], + "notes": "Cerebras – fast inference, zai-glm-4.7 / zai-glm-4.6 compatible", + "last_reviewed": "2026-04-03", + }, + # ── GitHub Copilot ────────────────────────────────────────────────────── + "github-copilot": { + "recommended_model": "gpt-4o", + "aliases": ["github-copilot", "copilot"], + "track": "stable", + "offer_track": "oauth", + "provider_type": "oauth", + "auth_modes": ["oauth", "api_key"], + "volatility": "low", + "evidence_level": "official", + "official_source_url": "https://docs.github.com/en/copilot", + "signup_url": "https://github.com/", + "watch_sources": [], + "notes": "GitHub Copilot – requires GH_TOKEN / COPILOT_GITHUB_TOKEN", + "last_reviewed": "2026-04-03", + }, + # ── Synthetic ─────────────────────────────────────────────────────────── + "synthetic": { + "recommended_model": "synthetic/hf:MiniMaxAI/MiniMax-M2.1", + "aliases": ["synthetic"], + "track": "stable", + "offer_track": "direct", + "provider_type": "direct", + "auth_modes": ["api_key"], + "volatility": "medium", + "evidence_level": "official", + "official_source_url": "https://docs.synthetic.new/", + "signup_url": "https://synthetic.new/", + "watch_sources": [], + "notes": "Synthetic – Anthropic-compat; exposes HuggingFace models (MiniMax, etc.)", + "last_reviewed": "2026-04-03", + }, + # ── Kimi Coding ───────────────────────────────────────────────────────── + "kimi-coding": { + "recommended_model": "kimi-coding/k2p5", + "aliases": ["kimi-coding", "kimi-k2.5"], + "track": "stable", + "offer_track": "direct", + "provider_type": "direct", + "auth_modes": ["api_key"], + "volatility": "low", + "evidence_level": "official", + "official_source_url": "https://platform.moonshot.cn/docs/", + "signup_url": "https://platform.moonshot.cn/", + "watch_sources": [], + "notes": "Kimi Coding – Anthropic-compat endpoint via Moonshot", + "last_reviewed": "2026-04-03", + }, + # ── Vercel AI Gateway ────────────────────────────────────────────────── + "vercel-ai-gateway": { + "recommended_model": "vercel-ai-gateway/anthropic/claude-opus-4.6", + "aliases": ["vercel-ai-gateway", "vercel"], + "track": "stable", + "offer_track": "direct", + "provider_type": "direct", + "auth_modes": ["api_key"], + "volatility": "low", + "evidence_level": "official", + "official_source_url": "https://vercel.com/docs/ai/ai-gateway", + "signup_url": "https://vercel.com/", + "watch_sources": [], + "notes": "Vercel AI Gateway – multi-model proxy", + "last_reviewed": "2026-04-03", + }, + # ── Local runtimes ────────────────────────────────────────────────────── + "ollama": { + "recommended_model": "ollama/llama3.3", + "aliases": ["ollama"], + "track": "local", + "offer_track": "local", + "provider_type": "local", + "auth_modes": [], + "volatility": "low", + "evidence_level": "official", + "official_source_url": "https://ollama.com/", + "signup_url": "https://ollama.com/", + "watch_sources": [], + "notes": "Ollama – local LLM runtime, OpenAI-compat at :11434", + "last_reviewed": "2026-04-03", + }, + "vllm": { + "recommended_model": "vllm/your-model-id", + "aliases": ["vllm"], + "track": "local", + "offer_track": "local", + "provider_type": "local", + "auth_modes": [], + "volatility": "low", + "evidence_level": "official", + "official_source_url": "https://docs.vllm.ai/", + "signup_url": "https://vllm.ai/", + "watch_sources": [], + "notes": "vLLM – local/self-hosted OpenAI-compat server at :8000", + "last_reviewed": "2026-04-03", + }, + "lmstudio": { + "recommended_model": "lmstudio/minimax-m2.1-gs32", + "aliases": ["lmstudio"], + "track": "local", + "offer_track": "local", + "provider_type": "local", + "auth_modes": [], + "volatility": "low", + "evidence_level": "official", + "official_source_url": "https://lmstudio.ai/", + "signup_url": "https://lmstudio.ai/", + "watch_sources": [], + "notes": "LM Studio – local OpenAI-compat server at :1234", + "last_reviewed": "2026-04-03", + }, + "litellm": { + "recommended_model": "litellm/your-model-id", + "aliases": ["litellm"], + "track": "local", + "offer_track": "local", + "provider_type": "local", + "auth_modes": [], + "volatility": "low", + "evidence_level": "official", + "official_source_url": "https://docs.litellm.ai/", + "signup_url": "https://litellm.ai/", + "watch_sources": [], + "notes": "LiteLLM proxy – OpenAI-compat gateway to 100+ providers at :4000", + "last_reviewed": "2026-04-03", + }, } diff --git a/faigate/providers.py b/faigate/providers.py index 7e2b62b..531f646 100644 --- a/faigate/providers.py +++ b/faigate/providers.py @@ -11,6 +11,12 @@ import httpx +# OAuth backend (optional) +try: + from .oauth.backend import OAuthBackend +except ImportError: + OAuthBackend = None + from .lane_registry import get_provider_transport_binding logger = logging.getLogger("faigate.providers") @@ -49,6 +55,19 @@ def classify_runtime_issue( return "degraded" +def create_provider_backend(name: str, cfg: dict) -> ProviderBackend: + """Create a provider backend instance, handling OAuth wrapping if needed.""" + backend_type = cfg.get("backend", "openai-compat") + if backend_type == "oauth": + if OAuthBackend is None: + raise ImportError( + "OAuth backend requested but faigate.oauth.backend could not be imported. " + "Make sure optional OAuth dependencies are installed." + ) + return OAuthBackend(name, cfg) + return ProviderBackend(name, cfg) + + @dataclass class ProviderHealth: """Tracks health state for a single provider.""" diff --git a/faigate/registry.py b/faigate/registry.py index e852912..ca78b81 100644 --- a/faigate/registry.py +++ b/faigate/registry.py @@ -223,6 +223,105 @@ class ProviderDef(TypedDict, total=False): pricing={"input": 0.0, "output": 0.0}, notes="HuggingFace Inference – OpenAI-compat router", ), + # ── DeepSeek ────────────────────────────────────────────────────────── + "deepseek": ProviderDef( + backend="openai-compat", + base_url="https://api.deepseek.com/v1", + base_url_env="DEEPSEEK_BASE_URL", + api_key_env="DEEPSEEK_API_KEY", + tier="default", + example_model="deepseek-reasoner", + pricing={"input": 0.55, "output": 2.19}, + notes="DeepSeek – deepseek-chat (V3) and deepseek-reasoner (R1)", + ), + # ── Together AI ─────────────────────────────────────────────────────── + "together": ProviderDef( + backend="openai-compat", + base_url="https://api.together.xyz/v1", + base_url_env="TOGETHER_BASE_URL", + api_key_env="TOGETHER_API_KEY", + tier="cheap", + example_model="together/meta-llama/Llama-3.3-70B-Instruct-Turbo", + pricing={"input": 0.18, "output": 0.18}, + notes="Together AI – serverless inference, Llama / Mixtral / DeepSeek / Qwen", + ), + # ── Fireworks AI ────────────────────────────────────────────────────── + "fireworks": ProviderDef( + backend="openai-compat", + base_url="https://api.fireworks.ai/inference/v1", + base_url_env="FIREWORKS_BASE_URL", + api_key_env="FIREWORKS_API_KEY", + tier="cheap", + example_model="fireworks/accounts/fireworks/models/deepseek-r1", + pricing={"input": 0.22, "output": 0.88}, + notes="Fireworks AI – fast serverless inference, DeepSeek / Llama / Qwen", + ), + # ── Cohere ──────────────────────────────────────────────────────────── + "cohere": ProviderDef( + backend="openai-compat", + base_url="https://api.cohere.com/compatibility/v1", + base_url_env="COHERE_BASE_URL", + api_key_env="COHERE_API_KEY", + tier="default", + example_model="command-a-03-2025", + pricing={"input": 2.50, "output": 10.00}, + notes="Cohere – Command A/R series, OpenAI-compat at /compatibility/v1", + ), + # ── Nebius AI ───────────────────────────────────────────────────────── + "nebius": ProviderDef( + backend="openai-compat", + base_url="https://api.studio.nebius.ai/v1", + base_url_env="NEBIUS_BASE_URL", + api_key_env="NEBIUS_API_KEY", + tier="cheap", + example_model="nebius/deepseek-ai/DeepSeek-R1", + pricing={"input": 0.80, "output": 3.20}, + notes="Nebius AI Studio – DeepSeek / Llama / Qwen on European infra", + ), + # ── SiliconFlow ─────────────────────────────────────────────────────── + "siliconflow": ProviderDef( + backend="openai-compat", + base_url="https://api.siliconflow.cn/v1", + base_url_env="SILICONFLOW_BASE_URL", + api_key_env="SILICONFLOW_API_KEY", + tier="cheap", + example_model="siliconflow/deepseek-ai/DeepSeek-R1", + pricing={"input": 0.14, "output": 0.55}, + notes="SiliconFlow – low-cost inference (CN), DeepSeek / Qwen / GLM", + ), + # ── Hyperbolic ──────────────────────────────────────────────────────── + "hyperbolic": ProviderDef( + backend="openai-compat", + base_url="https://api.hyperbolic.xyz/v1", + base_url_env="HYPERBOLIC_BASE_URL", + api_key_env="HYPERBOLIC_API_KEY", + tier="cheap", + example_model="hyperbolic/deepseek-ai/DeepSeek-R1", + pricing={"input": 0.20, "output": 0.80}, + notes="Hyperbolic – GPU cloud inference, DeepSeek / Llama / Qwen", + ), + # ── Perplexity ──────────────────────────────────────────────────────── + "perplexity": ProviderDef( + backend="openai-compat", + base_url="https://api.perplexity.ai", + base_url_env="PERPLEXITY_BASE_URL", + api_key_env="PERPLEXITY_API_KEY", + tier="default", + example_model="sonar-pro", + pricing={"input": 3.00, "output": 15.00}, + notes="Perplexity – online/search-augmented models (sonar, sonar-pro, sonar-reasoning)", + ), + # ── NVIDIA NIM ──────────────────────────────────────────────────────── + "nvidia-nim": ProviderDef( + backend="openai-compat", + base_url="https://integrate.api.nvidia.com/v1", + base_url_env="NVIDIA_NIM_BASE_URL", + api_key_env="NVIDIA_API_KEY", + tier="default", + example_model="nvidia-nim/deepseek-ai/deepseek-r1", + pricing={"input": 0.0, "output": 0.0}, + notes="NVIDIA NIM – optimized inference on NVIDIA infra, DeepSeek / Llama / Mistral", + ), } @@ -327,6 +426,17 @@ class ProviderDef(TypedDict, total=False): pricing={"input": 0.0, "output": 0.0}, notes="MiniMax – Anthropic-compat custom endpoint", ), + # ── Qwen (Alibaba Cloud) ────────────────────────────────────────────── + "qwen": ProviderDef( + backend="openai-compat", + base_url="https://dashscope.aliyun.com/api/v1", + base_url_env="QWEN_BASE_URL", + api_key_env="QWEN_API_KEY", + tier="default", + example_model="qwen/qwen3.6-plus", + pricing={"input": 0.0, "output": 0.0}, + notes="Qwen models via Alibaba Cloud – OpenAI-compatible endpoint", + ), } @@ -393,28 +503,63 @@ class ProviderDef(TypedDict, total=False): # --------------------------------------------------------------------------- OAUTH: dict[str, ProviderDef] = { - # ── Google Vertex AI ────────────────────────────────────────────────── - "google-vertex": ProviderDef( + # ── Google Gemini CLI (Vertex AI via gcloud ADC) ─────────────────────── + "google-gemini-cli": ProviderDef( backend="openai-compat", base_url="https://us-central1-aiplatform.googleapis.com/v1", base_url_env="GOOGLE_VERTEX_BASE_URL", api_key_env="GOOGLE_APPLICATION_CREDENTIALS", auth_optional=True, tier="mid", - example_model="google-vertex/gemini-2.5-pro", + example_model="gc/gemini-2.5-pro", pricing={"input": 0.0, "output": 0.0}, - notes="Google Vertex AI – uses gcloud ADC; interactive setup required", + notes="Google Gemini via Vertex AI – uses gcloud ADC; requires: gcloud auth login", ), - # ── Qwen OAuth (free tier) ──────────────────────────────────────────── + # ── Qwen OAuth (free tier via qwen-code CLI) ────────────────────────── "qwen-portal": ProviderDef( backend="openai-compat", - base_url="https://qwen-portal.example.com/v1", # placeholder; set via oauth + base_url="https://portal.qwen.ai/compatible-mode/v1", + base_url_env="QWEN_PORTAL_BASE_URL", api_key_env="QWEN_PORTAL_TOKEN", auth_optional=True, tier="default", - example_model="qwen-portal/coder-model", + example_model="coder-model", pricing={"input": 0.0, "output": 0.0}, - notes=("Qwen OAuth (free tier) – device-code flow; requires: openclaw plugins enable qwen-portal-auth"), + notes="Qwen OAuth (free tier) – reads token from ~/.qwen/oauth_creds.json; run: qwen auth login", + ), + # ── Claude Code (OAuth via Anthropic) ────────────────────────────────── + "claude-code": ProviderDef( + backend="anthropic-compat", + base_url="https://api.anthropic.com/v1", + base_url_env="ANTHROPIC_BASE_URL", + api_key_env="ANTHROPIC_CODEX_TOKEN", + auth_optional=True, + tier="default", + example_model="claude-code", + pricing={"input": 0.0, "output": 0.0}, + notes="Claude Code – special coding model via Anthropic OAuth", + ), + # ── Google Antigravity (Google OAuth – Generative Language API) ──────── + # Network discovery result: Antigravity's client-facing interface is a + # local ephemeral gRPC language server (127.0.0.1:/exa.language_server_pb…) + # that itself proxies to Google's backend. The OAuth token from + # ~/.gemini/oauth_creds.json grants access to the Google Generative + # Language API directly – that is the correct upstream for faigate. + "google-antigravity": ProviderDef( + backend="openai-compat", + base_url="https://generativelanguage.googleapis.com/v1beta/openai", + base_url_env="ANTIGRAVITY_BASE_URL", # override if using a different Google endpoint + api_key_env="ANTIGRAVITY_TOKEN", + auth_optional=True, + tier="default", + example_model="gemini-2.5-pro", + pricing={"input": 0.0, "output": 0.0}, + notes=( + "Google Antigravity – Google OAuth (client_id: 1071006060591-...apps.googleusercontent.com); " + "token from ~/.gemini/oauth_creds.json. Antigravity's local gRPC LS (127.0.0.1:) " + "is its internal proxy – faigate uses the Google Generative Language API directly. " + "Run: faigate-auth google-antigravity or sign in to the Antigravity IDE." + ), ), } diff --git a/faigate/router.py b/faigate/router.py index 52a8b23..d78649d 100644 --- a/faigate/router.py +++ b/faigate/router.py @@ -414,6 +414,7 @@ "budget": 0, "free": -1, "variable": 0, + "local": 2, }, "balanced": { "premium": 1, @@ -423,6 +424,7 @@ "budget": 4, "free": 4, "variable": 2, + "local": 5, }, "eco": { "premium": -2, @@ -432,6 +434,7 @@ "budget": 6, "free": 8, "variable": 3, + "local": 8, }, "free": { "premium": -4, @@ -441,6 +444,7 @@ "budget": 7, "free": 10, "variable": 4, + "local": 10, }, } diff --git a/faigate/wizard.py b/faigate/wizard.py index b37c15b..15d62c5 100644 --- a/faigate/wizard.py +++ b/faigate/wizard.py @@ -13,6 +13,7 @@ import yaml from dotenv import dotenv_values +from .config import dedupe_model_shortcut_aliases from .lane_registry import ( get_active_model_id, get_active_model_label, @@ -22,8 +23,7 @@ get_route_add_recommendations, ) from .provider_catalog import build_provider_refresh_guidance, get_provider_catalog -from .providers import ProviderBackend -from .config import dedupe_model_shortcut_aliases +from .providers import ProviderBackend, create_provider_backend ProviderFactory = dict[str, Any] @@ -666,7 +666,7 @@ async def _probe_providers_live( runtime_cfg = _expand_env_with_values(deepcopy(provider), env_values) if not isinstance(runtime_cfg, dict): continue - backend = ProviderBackend(name, runtime_cfg) + backend = create_provider_backend(name, runtime_cfg) try: ok = await backend.probe_health(timeout_seconds=timeout_seconds) results[name] = { diff --git a/pyproject.toml b/pyproject.toml index c29ba6b..08a4a39 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta" [project] name = "faigate" -version = "1.21.0" +version = "2.0.1" description = "Local OpenAI-compatible routing gateway for OpenClaw and other AI-native clients." readme = "README.md" license = "Apache-2.0" @@ -44,10 +44,15 @@ dev = [ "bandit[toml]>=1.8.0", "jinja2>=3.1.0", ] +oauth = [ + "requests>=2.31.0", + "google-auth>=2.0.0", +] [project.scripts] faigate = "faigate.main:main" faigate-stats = "faigate.cli:main" +faigate-auth = "faigate.oauth.cli:main" [project.urls] Homepage = "https://github.com/fusionAIze/faigate"