diff --git a/src/providers/embedding/openai.ts b/src/providers/embedding/openai.ts index 99205c6..308479f 100644 --- a/src/providers/embedding/openai.ts +++ b/src/providers/embedding/openai.ts @@ -4,19 +4,48 @@ import { getEnvVar } from "../../config.js"; const DEFAULT_BASE_URL = "https://api.openai.com"; const DEFAULT_MODEL = "text-embedding-3-small"; +/** + * Known OpenAI embedding model dimensions. Extend as new models ship. + * Override in any case via OPENAI_EMBEDDING_DIMENSIONS for custom or + * self-hosted OpenAI-compatible endpoints returning non-standard sizes. + */ +const MODEL_DIMENSIONS: Record = { + "text-embedding-3-small": 1536, + "text-embedding-3-large": 3072, + "text-embedding-ada-002": 1536, +}; + +const DEFAULT_DIMENSIONS = MODEL_DIMENSIONS[DEFAULT_MODEL] ?? 1536; + +function resolveDimensions(model: string, override: string | undefined): number { + if (override !== undefined && override.trim().length > 0) { + const parsed = parseInt(override, 10); + if (!Number.isFinite(parsed) || parsed <= 0) { + throw new Error( + `OPENAI_EMBEDDING_DIMENSIONS must be a positive integer, got: ${override}`, + ); + } + return parsed; + } + return MODEL_DIMENSIONS[model] ?? DEFAULT_DIMENSIONS; +} + /** * OpenAI-compatible embedding provider. * * Required env vars: - * OPENAI_API_KEY — API key + * OPENAI_API_KEY — API key * * Optional: - * OPENAI_BASE_URL — base URL without path (default: https://api.openai.com) - * OPENAI_EMBEDDING_MODEL — model name (default: text-embedding-3-small) + * OPENAI_BASE_URL — base URL without path (default: https://api.openai.com) + * OPENAI_EMBEDDING_MODEL — model name (default: text-embedding-3-small) + * OPENAI_EMBEDDING_DIMENSIONS — override reported dimensions (required for + * custom / self-hosted models not in the + * MODEL_DIMENSIONS table above) */ export class OpenAIEmbeddingProvider implements EmbeddingProvider { readonly name = "openai"; - readonly dimensions = 1536; + readonly dimensions: number; private apiKey: string; private baseUrl: string; private model: string; @@ -28,6 +57,10 @@ export class OpenAIEmbeddingProvider implements EmbeddingProvider { getEnvVar("OPENAI_BASE_URL") || DEFAULT_BASE_URL; this.model = getEnvVar("OPENAI_EMBEDDING_MODEL") || DEFAULT_MODEL; + this.dimensions = resolveDimensions( + this.model, + getEnvVar("OPENAI_EMBEDDING_DIMENSIONS"), + ); } async embed(text: string): Promise { diff --git a/test/embedding-provider.test.ts b/test/embedding-provider.test.ts index 2cf3271..48092eb 100644 --- a/test/embedding-provider.test.ts +++ b/test/embedding-provider.test.ts @@ -55,6 +55,7 @@ describe("OpenAIEmbeddingProvider", () => { process.env = { ...originalEnv }; delete process.env["OPENAI_BASE_URL"]; delete process.env["OPENAI_EMBEDDING_MODEL"]; + delete process.env["OPENAI_EMBEDDING_DIMENSIONS"]; }); afterEach(() => { @@ -103,4 +104,48 @@ describe("OpenAIEmbeddingProvider", () => { fetchSpy.mockRestore(); }); + + it("derives dimensions from model in the known-models table", () => { + process.env["OPENAI_EMBEDDING_MODEL"] = "text-embedding-3-large"; + const large = new OpenAIEmbeddingProvider("test-key"); + expect(large.dimensions).toBe(3072); + + process.env["OPENAI_EMBEDDING_MODEL"] = "text-embedding-ada-002"; + const ada = new OpenAIEmbeddingProvider("test-key"); + expect(ada.dimensions).toBe(1536); + + process.env["OPENAI_EMBEDDING_MODEL"] = "text-embedding-3-small"; + const small = new OpenAIEmbeddingProvider("test-key"); + expect(small.dimensions).toBe(1536); + }); + + it("OPENAI_EMBEDDING_DIMENSIONS overrides the model-derived dimensions", () => { + process.env["OPENAI_EMBEDDING_MODEL"] = "text-embedding-3-large"; + process.env["OPENAI_EMBEDDING_DIMENSIONS"] = "768"; + const provider = new OpenAIEmbeddingProvider("test-key"); + expect(provider.dimensions).toBe(768); + }); + + it("falls back to 1536 for unknown custom models", () => { + process.env["OPENAI_EMBEDDING_MODEL"] = "mystery-self-hosted-model"; + const provider = new OpenAIEmbeddingProvider("test-key"); + expect(provider.dimensions).toBe(1536); + }); + + it("rejects invalid OPENAI_EMBEDDING_DIMENSIONS values", () => { + process.env["OPENAI_EMBEDDING_DIMENSIONS"] = "not-a-number"; + expect(() => new OpenAIEmbeddingProvider("test-key")).toThrow( + /OPENAI_EMBEDDING_DIMENSIONS must be a positive integer/, + ); + + process.env["OPENAI_EMBEDDING_DIMENSIONS"] = "-5"; + expect(() => new OpenAIEmbeddingProvider("test-key")).toThrow( + /OPENAI_EMBEDDING_DIMENSIONS must be a positive integer/, + ); + + process.env["OPENAI_EMBEDDING_DIMENSIONS"] = "0"; + expect(() => new OpenAIEmbeddingProvider("test-key")).toThrow( + /OPENAI_EMBEDDING_DIMENSIONS must be a positive integer/, + ); + }); });