diff --git a/lat.md/cli.md b/lat.md/cli.md index be0e157..f80be96 100644 --- a/lat.md/cli.md +++ b/lat.md/cli.md @@ -321,6 +321,7 @@ Provider is auto-detected from the resolved key prefix: - `sk-...` — OpenAI (uses `text-embedding-3-small`, 1536 dims) - `vck_...` — Vercel AI Gateway (uses `openai/text-embedding-3-small`, 1536 dims) +- `di_...` — DeepInfra (uses `Qwen/Qwen3-Embedding-0.6B`, 1024 dims; `di_` prefix is stripped before the key is sent) - `sk-ant-...` — Anthropic (not supported, errors with guidance) - `REPLAY_LAT_LLM_KEY::` — test-only replay server for offline testing diff --git a/lat.md/tests/search.md b/lat.md/tests/search.md index 843e755..99483f8 100644 --- a/lat.md/tests/search.md +++ b/lat.md/tests/search.md @@ -8,7 +8,7 @@ Tests in `tests/search.test.ts`. ## Provider Detection -Unit tests (always run). Verify `detectProvider` correctly identifies OpenAI (`sk-`), Vercel (`vck_`), rejects Anthropic (`sk-ant-`) with a helpful message, and rejects unknown prefixes. +Unit tests (always run). Verify `detectProvider` correctly identifies OpenAI (`sk-`), Vercel (`vck_`), DeepInfra (`di_`), rejects Anthropic (`sk-ant-`) with a helpful message, and rejects unknown prefixes. For DeepInfra, also verify that the `di_` prefix is stripped from the key before it appears in the `Authorization` header. ## RAG Replay Tests diff --git a/src/search/provider.ts b/src/search/provider.ts index 16ee2b6..2e0c8b6 100644 --- a/src/search/provider.ts +++ b/src/search/provider.ts @@ -28,6 +28,17 @@ const vercel: EmbeddingProvider = { }), }; +const deepinfra: EmbeddingProvider = { + name: 'deepinfra', + apiBase: 'https://api.deepinfra.com/v1/openai', + model: 'Qwen/Qwen3-Embedding-0.6B', + dimensions: 1024, + headers: (key) => ({ + Authorization: `Bearer ${key.startsWith('di_') ? key.slice('di_'.length) : key}`, + 'Content-Type': 'application/json', + }), +}; + export function detectProvider(key: string): EmbeddingProvider { if (key.startsWith('REPLAY_LAT_LLM_KEY::')) { const replayUrl = key.slice('REPLAY_LAT_LLM_KEY::'.length); @@ -46,7 +57,8 @@ export function detectProvider(key: string): EmbeddingProvider { } if (key.startsWith('vck_')) return vercel; if (key.startsWith('sk-')) return openai; + if (key.startsWith('di_')) return deepinfra; throw new Error( - `Unrecognized LAT_LLM_KEY prefix. Supported: OpenAI (sk-...), Vercel AI Gateway (vck_...).`, + `Unrecognized LAT_LLM_KEY prefix. Supported: OpenAI (sk-...), Vercel AI Gateway (vck_...), DeepInfra (di_...).`, ); } diff --git a/tests/search.test.ts b/tests/search.test.ts index bc284dd..7947b3b 100644 --- a/tests/search.test.ts +++ b/tests/search.test.ts @@ -27,6 +27,16 @@ describe('detectProvider', () => { expect(p.name).toBe('vercel'); }); + it('detects DeepInfra key', () => { + const p = detectProvider('di_abc123'); + expect(p.name).toBe('deepinfra'); + }); + + it('strips di_ prefix from DeepInfra key in Authorization header', () => { + const p = detectProvider('di_abc123'); + expect(p.headers('di_abc123').Authorization).toBe('Bearer abc123'); + }); + it('rejects Anthropic key with helpful message', () => { expect(() => detectProvider('sk-ant-abc123')).toThrow(/Anthropic/); });