-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathtypes.ts
More file actions
210 lines (193 loc) · 6.56 KB
/
types.ts
File metadata and controls
210 lines (193 loc) · 6.56 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
/**
* Core domain types for recallr.
*
* Every connector (email, slack, discord, ...) normalizes its data into
* a `Message`. Everything downstream — storage, indexing, search, RAG,
* MCP — speaks only this shape.
*/
export type Source =
| "imap"
| "gmail"
| "mbox"
| "slack"
| "discord"
| "matrix"
| "telegram"
| "whatsapp"
| "imessage"
| "teams"
| "other";
export interface Participant {
/** Stable identity within the source (email address, slack user id, etc.) */
id: string;
/** Display name at the time of the message, if known. */
name?: string;
/** Email address, if applicable. */
email?: string;
}
export interface Attachment {
filename?: string;
contentType?: string;
size?: number;
}
/**
* A single message normalized across every source.
*
* `id` must be globally unique within the database. It is the
* responsibility of the connector to mint a stable id (typically
* `${source}:${sourceId}`) so re-indexing is idempotent.
*/
export interface Message {
id: string;
source: Source;
/** Original id within the source (e.g. IMAP UID, Slack ts). */
sourceId: string;
/**
* Channel the message belongs to. For email this is the mailbox/folder
* (`INBOX`, `Sent`, ...). For Slack/Discord it's the channel id.
*/
channel?: string;
/** Stable thread/conversation id when the source supports threading. */
threadId?: string;
/** Subject line for email; first message text or pinned title elsewhere. */
subject?: string;
/** Plain-text body. HTML is normalized away during ingestion. */
body: string;
from: Participant;
to: Participant[];
cc?: Participant[];
bcc?: Participant[];
/** Unix epoch milliseconds when the message was sent. */
timestamp: number;
attachments?: Attachment[];
/**
* Free-form provenance — e.g. mbox path, IMAP folder, Slack workspace.
* Used for citation in answers ("from your Fastmail INBOX").
*/
provenance?: Record<string, string>;
}
export interface Thread {
id: string;
source: Source;
channel?: string;
subject?: string;
participants: Participant[];
messages: Message[];
/** Timestamp of the most recent message. */
lastTimestamp: number;
}
/* --------------------------------- search -------------------------------- */
export interface SearchOptions {
/** Final number of results to return after rerank. Default 10. */
limit?: number;
/** Number of FTS candidates to pull before embedding rerank. Default 200. */
candidates?: number;
/** Restrict to a single source. */
source?: Source;
/** Restrict to messages from / to / about a participant (matches id, name, or email). */
participant?: string;
/** Inclusive lower bound, epoch ms. */
after?: number;
/** Inclusive upper bound, epoch ms. */
before?: number;
}
export interface SearchHit {
message: Message;
/** Final fused score, higher is better. */
score: number;
/** Sparse (BM25) score from FTS5, present when the message matched lexically. */
bm25?: number;
/** Dense (cosine) score, present when reranked by embedding. */
cosine?: number;
}
/* ------------------------------- contracts ------------------------------- */
/**
* Pulls messages from a source. Connectors are pure producers — they don't
* touch the store; the indexer wires them together.
*
* `since` lets the indexer do incremental syncs; connectors should respect
* it as a best-effort filter (skipping ahead is OK, returning extras is OK).
*/
export interface Connector {
readonly name: string;
readonly source: Source;
fetch(opts?: { since?: number; signal?: AbortSignal }): AsyncIterable<Message>;
}
export interface Embedder {
/** Vector dimension produced by `embed`. */
readonly dimension: number;
/** Identifier persisted alongside vectors so we can detect model changes. */
readonly modelId: string;
embed(texts: string[]): Promise<Float32Array[]>;
}
export interface ChatMessage {
role: "system" | "user" | "assistant";
content: string;
}
export interface ChatOptions {
model?: string;
temperature?: number;
maxTokens?: number;
signal?: AbortSignal;
}
export interface LlmClient {
chat(messages: ChatMessage[], opts?: ChatOptions): Promise<string>;
/**
* Stream a chat completion. Yields partial content deltas in order;
* concatenating every yielded chunk reconstructs the full answer.
* Implementations that don't natively stream may emit a single chunk
* containing the entire response.
*/
chatStream?(messages: ChatMessage[], opts?: ChatOptions): AsyncIterable<string>;
}
export interface ThreadSummary {
id: string;
source: Source;
channel?: string;
subject?: string;
/** Message count within the thread. */
messageCount: number;
/** Distinct participants (capped to a reasonable max for UI rendering). */
participants: Participant[];
/** Timestamp (epoch ms) of the most recent message. */
lastTimestamp: number;
/** Snippet from the most recent message body. */
snippet: string;
/**
* The id of the most recent message in the thread. The web UI uses this
* to call `/api/thread/{messageId}` for the full thread view — that
* endpoint takes a message id rather than a thread id by design.
*/
latestMessageId: string;
}
export interface ListThreadsOptions {
/** Max threads to return. Default 30. */
limit?: number;
/** Restrict to a single source. */
source?: Source;
/** Return only threads whose lastTimestamp is strictly before this (for pagination). */
before?: number;
}
export interface Store {
/** Insert or replace messages by id. Idempotent. */
upsertMessages(messages: Message[]): Promise<void>;
/** Persist embeddings for messages keyed by message id. */
upsertEmbeddings(rows: { id: string; modelId: string; vector: Float32Array }[]): Promise<void>;
/** Return ids that already have an embedding for the given model. */
embeddedIds(modelId: string): Promise<Set<string>>;
/** Hybrid search: BM25 candidates from FTS, optional embedding rerank. */
search(
query: string,
queryVector: Float32Array | null,
opts?: SearchOptions,
): Promise<SearchHit[]>;
/** Look up a message by id. */
getMessage(id: string): Promise<Message | null>;
/** Return the full thread containing the given message id, ordered by time. */
getThread(messageId: string): Promise<Thread | null>;
/** Recent threads, newest first. Used by the web UI's thread browser. */
listThreads(opts?: ListThreadsOptions): Promise<ThreadSummary[]>;
/** Aggregate counts for diagnostics / `recallr status`. */
stats(): Promise<{ messages: number; embeddings: number; sources: Record<string, number> }>;
close(): void;
}