Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
41 changes: 41 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,47 @@ Semantic search (`lat search`) requires an OpenAI (`sk-...`) or Vercel AI Gatewa
3. `LAT_LLM_KEY_HELPER` env var — shell command that prints the key (10s timeout)
4. Config file — saved by `lat init`. Run `lat config` to see its location.

### Other providers

You can point `lat search` at a local embedding server with OpenAI-compatible `/v1/embeddings` endpoint.

Example for [OpenRouter](https://openrouter.ai) configured via environment variables:

```bash
LAT_LLM_BASE=https://openrouter.ai/api/v1
LAT_LLM_MODEL=qwen/qwen3-embedding-8b
LAT_LLM_DIMENSIONS=4096 # must match model output dimensions
LAT_LLM_KEY=sk-or-v1-xyz
```

Or via the config file (run `lat config` to see its location):

```json
{
"llm_base": "https://openrouter.ai/api/v1",
"llm_model": "qwen/qwen3-embedding-8b",
"llm_dimensions": 4096
}
```
Environment variables take precedence over config file values.

### Local models

For example using `llama-server` from [llama.cpp](https://github.com/ggerganov/llama.cpp), start a local server with an embedding model with:

```bash
llama-server -hf Qwen/Qwen3-Embedding-0.6B-GGUF --embedding --gpu-layers 99
```

Configure like:

```bash
export LAT_LLM_BASE=http://localhost:8080/v1
export LAT_LLM_DIMENSIONS=1024
```

When `LAT_LLM_BASE` is set, the API key is optional. If your server doesn't require authentication, you can omit `LAT_LLM_KEY` entirely.

## Development

Requires Node.js 22+ and pnpm.
Expand Down
8 changes: 5 additions & 3 deletions src/cli/hook.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,8 @@ import { plainStyler, type CmdContext } from '../context.js';
import { expandPrompt } from './expand.js';
import { runSearch } from './search.js';
import { getSection, formatSectionOutput } from './section.js';
import { getLlmKey } from '../config.js';
import { getLlmKey, readConfig } from '../config.js';
import { detectProvider } from '../search/provider.js';
import { checkMd, checkCodeRefs, checkIndex, checkSections } from './check.js';
import { SOURCE_EXTENSIONS } from '../source-parser.js';

Expand Down Expand Up @@ -62,15 +63,16 @@ async function searchAndExpand(
ctx: CmdContext,
userPrompt: string,
): Promise<string | null> {
const config = readConfig();
let key: string | undefined;
try {
key = getLlmKey();
} catch {
return null;
}
if (!key) return null;

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why remove this check? Can it not simply stay as is?

Copy link
Copy Markdown
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is to allow connecting to local llama-server without authentication.

const provider = detectProvider(key, config);

const result = await runSearch(ctx.latDir, userPrompt, key, 5);
const result = await runSearch(ctx.latDir, userPrompt, provider, key, 5);
if (result.matches.length === 0) return null;

const parts: string[] = [
Expand Down
5 changes: 5 additions & 0 deletions src/cli/init.ts
Original file line number Diff line number Diff line change
Expand Up @@ -1168,6 +1168,11 @@ async function setupLlmKey(
styleText('yellow', ' Unrecognized key prefix.') +
' Expected sk-... (OpenAI) or vck_... (Vercel AI Gateway).',
);
console.log(
' For a custom endpoint (e.g. local llama-server), set ' +
styleText('cyan', 'LAT_LLM_BASE') +
' instead.',
);
console.log(' Saving anyway — you can update it later.');
}

Expand Down
47 changes: 30 additions & 17 deletions src/cli/search.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import type { CmdContext, CmdResult, Styler } from '../context.js';
import { openDb, ensureSchema, closeDb } from '../search/db.js';
import { detectProvider } from '../search/provider.js';
import type { EmbeddingProvider } from '../search/provider.js';
import { indexSections, type IndexStats } from '../search/index.js';
import { searchSections } from '../search/search.js';
import {
Expand All @@ -24,14 +24,11 @@ export type IndexProgress = {

async function withDb<T>(
latDir: string,
key: string,
provider: EmbeddingProvider,
key: string | undefined,
progress: IndexProgress | undefined,
fn: (
db: Awaited<ReturnType<typeof openDb>>,
provider: ReturnType<typeof detectProvider>,
) => Promise<T>,
fn: (db: Awaited<ReturnType<typeof openDb>>) => Promise<T>,
): Promise<T> {
const provider = detectProvider(key);
const db = openDb(latDir);

try {
Expand All @@ -44,7 +41,7 @@ async function withDb<T>(
const stats = await indexSections(latDir, db, provider, key);
progress?.afterIndex?.(stats, isEmpty);

return await fn(db, provider);
return await fn(db);
} finally {
await closeDb(db);
}
Expand All @@ -57,11 +54,12 @@ async function withDb<T>(
export async function runSearch(
latDir: string,
query: string,
key: string,
provider: EmbeddingProvider,
key: string | undefined,
limit: number,
progress?: IndexProgress,
): Promise<SearchResult> {
return withDb(latDir, key, progress, async (db, provider) => {
return withDb(latDir, provider, key, progress, async (db) => {
const results = await searchSections(db, query, provider, key, limit);
if (results.length === 0) {
return { query, matches: [] };
Expand All @@ -85,10 +83,11 @@ export async function runSearch(
*/
export async function runIndex(
latDir: string,
key: string,
provider: EmbeddingProvider,
key: string | undefined,
progress?: IndexProgress,
): Promise<void> {
await withDb(latDir, key, progress, async () => {});
await withDb(latDir, provider, key, progress, async () => {});
}

export function cliProgress(reindex: boolean, s: Styler): IndexProgress {
Expand Down Expand Up @@ -123,18 +122,25 @@ export async function searchCommand(
opts: { limit: number; reindex?: boolean },
progress?: IndexProgress,
): Promise<CmdResult> {
const { getLlmKey, getConfigPath } = await import('../config.js');
const { getLlmKey, readConfig, getConfigPath } = await import('../config.js');
const { detectProvider } = await import('../search/provider.js');

const config = readConfig();
let key: string | undefined;
try {
key = getLlmKey();
} catch (err) {
return { output: (err as Error).message, isError: true };
}
if (!key) {

let provider: Awaited<ReturnType<typeof detectProvider>>;
try {
provider = detectProvider(key, config);
} catch (err) {
const s = ctx.styler;
return {
output:
s.red('No API key configured.') +
s.red((err as Error).message) +
' Provide a key via LAT_LLM_KEY, LAT_LLM_KEY_FILE, LAT_LLM_KEY_HELPER, or run ' +
s.cyan('lat init') +
(ctx.mode === 'cli'
Expand All @@ -146,11 +152,18 @@ export async function searchCommand(
}

if (!query) {
await runIndex(ctx.latDir, key, progress);
await runIndex(ctx.latDir, provider, key, progress);
return { output: '' };
}

const result = await runSearch(ctx.latDir, query, key, opts.limit, progress);
const result = await runSearch(
ctx.latDir,
query,
provider,
key,
opts.limit,
progress,
);

if (result.matches.length === 0) {
return { output: 'No results found.' };
Expand Down
3 changes: 3 additions & 0 deletions src/config.ts
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,9 @@ export function getConfigPath(): string {

export type LatConfig = {
llm_key?: string;
llm_base?: string;
llm_model?: string;
llm_dimensions?: number;
};

export function readConfig(): LatConfig {
Expand Down
17 changes: 16 additions & 1 deletion src/search/db.ts
Original file line number Diff line number Diff line change
Expand Up @@ -17,14 +17,29 @@ export async function ensureSchema(
db: Client,
dimensions: number,
): Promise<void> {
const embeddingType = `F32_BLOB(${dimensions})`;

// Detect dimension change from the column type in the existing schema
const schema = await db.execute(
`SELECT sql FROM sqlite_master WHERE type='table' AND name='sections'`,
);
if (
schema.rows.length > 0 &&
!String(schema.rows[0].sql).includes(embeddingType)
) {
await db.execute('DROP INDEX IF EXISTS sections_vec_idx');
await db.execute('DROP TABLE IF EXISTS sections');
process.stderr.write(`Embedding type changed, re-indexing...\n`);
}

await db.execute(
`CREATE TABLE IF NOT EXISTS sections (
id TEXT PRIMARY KEY,
file TEXT NOT NULL,
heading TEXT NOT NULL,
content TEXT NOT NULL,
content_hash TEXT NOT NULL,
embedding F32_BLOB(${dimensions}),
embedding ${embeddingType},
updated_at INTEGER NOT NULL
)`,
);
Expand Down
2 changes: 1 addition & 1 deletion src/search/embeddings.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ const MAX_BATCH = 2048;
export async function embed(
texts: string[],
provider: EmbeddingProvider,
key: string,
key?: string,
): Promise<number[][]> {
const results: number[][] = [];

Expand Down
2 changes: 1 addition & 1 deletion src/search/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ export async function indexSections(
latDir: string,
db: Client,
provider: EmbeddingProvider,
key: string,
key?: string,
): Promise<IndexStats> {
const projectRoot = dirname(latDir);
const allSections = await loadAllSections(latDir);
Expand Down
41 changes: 37 additions & 4 deletions src/search/provider.ts
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
import type { LatConfig } from '../config.js';

export type EmbeddingProvider = {
name: string;
apiBase: string;
model: string;
dimensions: number;
headers: (key: string) => Record<string, string>;
headers: (key?: string) => Record<string, string>;
};

const openai: EmbeddingProvider = {
Expand All @@ -28,8 +30,32 @@ const vercel: EmbeddingProvider = {
}),
};

export function detectProvider(key: string): EmbeddingProvider {
if (key.startsWith('REPLAY_LAT_LLM_KEY::')) {
function customProvider(config: LatConfig): EmbeddingProvider | null {

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe check more carefully. Number() can produce NaN for invalid strings. Suggested code below.

function customProvider(config: LatConfig): EmbeddingProvider | null {
  const base = process.env.LAT_LLM_BASE ?? config.llm_base;
  if (!base) return null;
  return {
    name: 'custom',
    apiBase: base,
    model: process.env.LAT_LLM_MODEL ?? config.llm_model ?? 'default',
    dimensions: (() => {
      const envDimensions = process.env.LAT_LLM_DIMENSIONS;
      if (envDimensions != null) {
        const parsed = parseInt(envDimensions, 10);
        if (isNaN(parsed)) throw new Error(`Invalid LAT_LLM_DIMENSIONS: "${envDimensions}"`);
        return parsed;
      }
      return config.llm_dimensions ?? 1536;
    })(),
    headers: (k) => {
      const h: Record<string, string> = {
        'Content-Type': 'application/json',
      };
      if (k) h['Authorization'] = `Bearer ${k}`;
      return h;
    },
  };
}

Copy link
Copy Markdown
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

good point, probably want to extract it as variable instead of IIFE

const base = process.env.LAT_LLM_BASE ?? config.llm_base;
if (!base) return null;
return {
name: 'custom',
apiBase: base,
model: process.env.LAT_LLM_MODEL ?? config.llm_model ?? 'default',
dimensions:
process.env.LAT_LLM_DIMENSIONS != null
? Number(process.env.LAT_LLM_DIMENSIONS)
: (config.llm_dimensions ?? 1536),
headers: (k) => {
const h: Record<string, string> = {
'Content-Type': 'application/json',
};
if (k) h['Authorization'] = `Bearer ${k}`;
return h;
},
};
}

export function detectProvider(
key: string | undefined,
config: LatConfig = {},
): EmbeddingProvider {
if (key?.startsWith('REPLAY_LAT_LLM_KEY::')) {
const replayUrl = key.slice('REPLAY_LAT_LLM_KEY::'.length);
return {
name: 'replay',
Expand All @@ -39,6 +65,13 @@ export function detectProvider(key: string): EmbeddingProvider {
headers: () => ({ 'Content-Type': 'application/json' }),
};
}

const custom = customProvider(config);
if (custom) return custom;

if (!key) {
throw new Error('No API key configured.');
}
if (key.startsWith('sk-ant-')) {
throw new Error(
"Anthropic doesn't offer an embedding model. Set LAT_LLM_KEY to an OpenAI (sk-...) or Vercel AI Gateway (vck_...) key.",
Expand All @@ -47,6 +80,6 @@ export function detectProvider(key: string): EmbeddingProvider {
if (key.startsWith('vck_')) return vercel;
if (key.startsWith('sk-')) return openai;
throw new Error(
`Unrecognized LAT_LLM_KEY prefix. Supported: OpenAI (sk-...), Vercel AI Gateway (vck_...).`,
`Unrecognized LAT_LLM_KEY prefix. Supported: OpenAI (sk-...), Vercel AI Gateway (vck_...), or set LAT_LLM_BASE for a custom endpoint.`,
);
}
2 changes: 1 addition & 1 deletion src/search/search.ts
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ export async function searchSections(
db: Client,
query: string,
provider: EmbeddingProvider,
key: string,
key: string | undefined,
limit = 5,
): Promise<SearchResult[]> {
const [queryVec] = await embed([query], provider, key);
Expand Down
33 changes: 33 additions & 0 deletions tests/search.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,39 @@ describe('detectProvider', () => {
it('rejects unknown key', () => {
expect(() => detectProvider('xyz_abc123')).toThrow(/Unrecognized/);
});

it('uses custom provider when LAT_LLM_BASE is set', () => {
process.env.LAT_LLM_BASE = 'http://localhost:8080/v1';
process.env.LAT_LLM_MODEL = 'qwen3-embedding';
process.env.LAT_LLM_DIMENSIONS = '1024';
try {
const p = detectProvider('sk-abc123');
expect(p).toMatchObject({
name: 'custom',
apiBase: 'http://localhost:8080/v1',
model: 'qwen3-embedding',
dimensions: 1024,
});
} finally {
delete process.env.LAT_LLM_BASE;
delete process.env.LAT_LLM_MODEL;
delete process.env.LAT_LLM_DIMENSIONS;
}
});

it('custom provider omits Authorization when key is undefined', () => {
process.env.LAT_LLM_BASE = 'http://localhost:8080/v1';
process.env.LAT_LLM_DIMENSIONS = '1024';
try {
const p = detectProvider(undefined);
expect(p.headers()).not.toHaveProperty('Authorization');
expect(p.headers('sk-real')).toHaveProperty('Authorization');
} finally {
delete process.env.LAT_LLM_BASE;
delete process.env.LAT_LLM_DIMENSIONS;
}
});

});

// --- RAG functional tests ---
Expand Down