diff --git a/ROADMAP.md b/ROADMAP.md index 95c5e20..ce0cad6 100644 --- a/ROADMAP.md +++ b/ROADMAP.md @@ -18,6 +18,7 @@ These are implemented and in the repo now: - `proxy_list_sessions` - `proxy_get_session` - `proxy_query_session` + - `proxy_search_session_bodies` - `proxy_get_session_exchange` - `proxy_export_har` - `proxy_delete_session` diff --git a/src/session-store.ts b/src/session-store.ts index f3722ed..5cbd03a 100644 --- a/src/session-store.ts +++ b/src/session-store.ts @@ -65,6 +65,7 @@ export interface SessionIndexEntry { ja3: string | null; ja4: string | null; ja3s?: string | null; + responseContentType?: string | null; recordOffset: number; recordLineBytes: number; } @@ -97,6 +98,41 @@ export interface SessionQueryResult { items: SessionIndexEntry[]; } +export interface SessionBodySearchQuery { + text: string; + hostnameContains?: string; + urlContains?: string; + method?: string; + statusCode?: number; + contentTypeContains?: string; + searchIn?: "response" | "request" | "both"; + caseSensitive?: boolean; + limit?: number; + maxScan?: number; + contextChars?: number; +} + +export interface SessionBodySearchMatch { + exchangeId: string; + seq: number; + url: string; + method: string; + statusCode: number | null; + contentType: string | null; + matchedIn: "request" | "response"; + source: "full" | "preview"; + snippets: Array<{ position: number; context: string }>; +} + +export interface SessionBodySearchResult { + query: string; + scanned: number; + skippedBinary: number; + skippedNoBody: number; + totalMatches: number; + matches: SessionBodySearchMatch[]; +} + export interface HarImportOptions { harFile: string; sessionName?: string; @@ -189,6 +225,59 @@ function decompressBody(body: Buffer, contentEncoding: string | undefined): Buff return body; } +const BINARY_MIME_PREFIXES = [ + "image/", "audio/", "video/", "font/", + "application/octet-stream", "application/zip", + "application/gzip", "application/pdf", + "application/wasm", +]; + +function isKnownBinaryMime(contentType: string | null | undefined): boolean { + if (!contentType) return false; + const ct = contentType.toLowerCase(); + return BINARY_MIME_PREFIXES.some(prefix => ct.startsWith(prefix)); +} + +function isBinaryContent(buf: Buffer): boolean { + const checkLen = Math.min(buf.length, 512); + for (let i = 0; i < checkLen; i++) { + if (buf[i] === 0) return true; + } + return false; +} + +function extractSnippets( + text: string, + searchText: string, + caseSensitive: boolean, + contextChars: number, + maxSnippets: number, +): Array<{ position: number; context: string }> { + const haystack = caseSensitive ? text : text.toLowerCase(); + const needle = caseSensitive ? searchText : searchText.toLowerCase(); + const snippets: Array<{ position: number; context: string }> = []; + let startPos = 0; + + while (snippets.length < maxSnippets) { + const idx = haystack.indexOf(needle, startPos); + if (idx === -1) break; + + const ctxStart = Math.max(0, idx - contextChars); + const ctxEnd = Math.min(text.length, idx + needle.length + contextChars); + + const before = text.slice(ctxStart, idx); + const match = text.slice(idx, idx + needle.length); + const after = text.slice(idx + needle.length, ctxEnd); + + const context = `${ctxStart > 0 ? "..." : ""}${before}[${match}]${after}${ctxEnd < text.length ? "..." : ""}`; + + snippets.push({ position: idx, context }); + startPos = idx + needle.length; + } + + return snippets; +} + function headersToHar(headers: Record | undefined): Array<{ name: string; value: string }> { if (!headers) return []; return Object.entries(headers).map(([name, value]) => ({ name, value })); @@ -598,6 +687,134 @@ export class SessionStore { }; } + async searchSessionBodies( + sessionId: string, + query: SessionBodySearchQuery, + ): Promise { + const entries = await this.readSessionIndex(sessionId); + + const searchText = query.text; + const searchIn = query.searchIn ?? "both"; + const caseSensitive = query.caseSensitive ?? false; + const limit = Math.max(1, Math.min(100, query.limit ?? 10)); + const maxScan = Math.max(1, Math.min(5000, query.maxScan ?? 200)); + const contextChars = Math.max(20, Math.min(500, query.contextChars ?? 120)); + + const method = query.method?.toUpperCase(); + const hostnameContains = query.hostnameContains?.toLowerCase(); + const urlContains = query.urlContains?.toLowerCase(); + const contentTypeContains = query.contentTypeContains?.toLowerCase(); + + const candidates = entries.filter(e => { + if (method && e.method !== method) return false; + if (hostnameContains && !e.hostname.toLowerCase().includes(hostnameContains)) return false; + if (urlContains && !e.url.toLowerCase().includes(urlContains)) return false; + if (query.statusCode !== undefined && e.statusCode !== query.statusCode) return false; + if (contentTypeContains && e.responseContentType != null) { + if (!e.responseContentType.includes(contentTypeContains)) return false; + } + if (isKnownBinaryMime(e.responseContentType)) return false; + return true; + }); + + candidates.sort((a, b) => a.timestamp - b.timestamp); + + const recordsPath = path.join(this.rootDir, sessionId, RECORDS_FILENAME); + const fh = await fs.open(recordsPath, "r"); + + const result: SessionBodySearchResult = { + query: searchText, + scanned: 0, + skippedBinary: 0, + skippedNoBody: 0, + totalMatches: 0, + matches: [], + }; + + try { + for (const entry of candidates) { + if (result.scanned >= maxScan) break; + if (result.matches.length >= limit) break; + + const buf = Buffer.alloc(entry.recordLineBytes); + const readResult = await fh.read(buf, 0, entry.recordLineBytes, entry.recordOffset); + const line = buf.subarray(0, readResult.bytesRead).toString("utf8").trimEnd(); + let record: PersistedExchangeRecord; + try { + record = JSON.parse(line) as PersistedExchangeRecord; + } catch { + continue; + } + + result.scanned++; + let exchangeMatched = false; + let exchangeHadBinary = false; + let exchangeHadNoBody = false; + + if (searchIn === "response" || searchIn === "both") { + const bodyResult = this.extractSearchableBody(record, "response"); + if (bodyResult === null) { + exchangeHadNoBody = true; + } else if (bodyResult === "binary") { + exchangeHadBinary = true; + } else { + const snippets = extractSnippets(bodyResult.text, searchText, caseSensitive, contextChars, 3); + if (snippets.length > 0) { + exchangeMatched = true; + result.totalMatches++; + result.matches.push({ + exchangeId: entry.exchangeId, + seq: entry.seq, + url: capString(entry.url, 200), + method: entry.method, + statusCode: entry.statusCode, + contentType: entry.responseContentType ?? record.exchange.response?.headers?.["content-type"] ?? null, + matchedIn: "response", + source: bodyResult.source, + snippets, + }); + } + } + } + + if (!exchangeMatched && (searchIn === "request" || searchIn === "both")) { + const bodyResult = this.extractSearchableBody(record, "request"); + if (bodyResult === null) { + if (!exchangeHadNoBody) exchangeHadNoBody = true; + } else if (bodyResult === "binary") { + if (!exchangeHadBinary) exchangeHadBinary = true; + } else { + const snippets = extractSnippets(bodyResult.text, searchText, caseSensitive, contextChars, 3); + if (snippets.length > 0) { + result.totalMatches++; + result.matches.push({ + exchangeId: entry.exchangeId, + seq: entry.seq, + url: capString(entry.url, 200), + method: entry.method, + statusCode: entry.statusCode, + contentType: record.exchange.request?.headers?.["content-type"] ?? null, + matchedIn: "request", + source: bodyResult.source, + snippets, + }); + exchangeMatched = true; + } + } + } + + if (!exchangeMatched) { + if (exchangeHadBinary) result.skippedBinary++; + else if (exchangeHadNoBody) result.skippedNoBody++; + } + } + } finally { + await fh.close(); + } + + return result; + } + async getSessionExchange( sessionId: string, opts: { seq?: number; exchangeId?: string; includeBody?: boolean }, @@ -928,6 +1145,41 @@ export class SessionStore { return { highErrorEndpoints, slowestExchanges, hostErrorRates }; } + private extractSearchableBody( + record: PersistedExchangeRecord, + side: "request" | "response", + ): { text: string; source: "full" | "preview" } | "binary" | null { + const exchange = record.exchange; + + if (side === "response") { + if (record.responseBodyBase64) { + const raw = fromBase64Buffer(record.responseBodyBase64); + if (!raw || raw.length === 0) return null; + const encoding = exchange.response?.headers?.["content-encoding"]; + const decompressed = decompressBody(raw, encoding); + if (isBinaryContent(decompressed)) return "binary"; + return { text: decompressed.toString("utf-8"), source: "full" }; + } + if (exchange.response?.bodyPreview) { + return { text: exchange.response.bodyPreview, source: "preview" }; + } + return null; + } + + if (record.requestBodyBase64) { + const raw = fromBase64Buffer(record.requestBodyBase64); + if (!raw || raw.length === 0) return null; + const encoding = exchange.request?.headers?.["content-encoding"]; + const decompressed = decompressBody(raw, encoding); + if (isBinaryContent(decompressed)) return "binary"; + return { text: decompressed.toString("utf-8"), source: "full" }; + } + if (exchange.request?.bodyPreview) { + return { text: exchange.request.bodyPreview, source: "preview" }; + } + return null; + } + private toIndexEntry( record: PersistedExchangeRecord, location: { recordOffset: number; recordLineBytes: number }, @@ -952,6 +1204,7 @@ export class SessionStore { ja3: exchange.tls?.client?.ja3Fingerprint ?? null, ja4: exchange.tls?.client?.ja4Fingerprint ?? null, ja3s: exchange.tls?.server?.ja3sFingerprint ?? null, + responseContentType: exchange.response?.headers?.["content-type"]?.split(";")[0]?.trim().toLowerCase() ?? null, recordOffset: location.recordOffset, recordLineBytes: location.recordLineBytes, }; diff --git a/src/state.ts b/src/state.ts index a6f9b73..5fccad3 100644 --- a/src/state.ts +++ b/src/state.ts @@ -27,6 +27,8 @@ import { type SessionQuery, type SessionQueryResult, type SessionIndexEntry, + type SessionBodySearchQuery, + type SessionBodySearchResult, type HarImportOptions, type HarImportSummary, } from "./session-store.js"; @@ -670,6 +672,10 @@ export class ProxyManager { return await this.sessionStore.querySession(sessionId, query); } + async searchSessionBodies(sessionId: string, query: SessionBodySearchQuery): Promise { + return await this.sessionStore.searchSessionBodies(sessionId, query); + } + async getSessionExchange( sessionId: string, opts: { seq?: number; exchangeId?: string; includeBody?: boolean }, diff --git a/src/tools/sessions.ts b/src/tools/sessions.ts index cd5b84e..7bb664a 100644 --- a/src/tools/sessions.ts +++ b/src/tools/sessions.ts @@ -150,7 +150,7 @@ export function registerSessionTools(server: McpServer): void { server.tool( "proxy_query_session", - "Query indexed session exchanges with filters and pagination.", + "Query indexed session exchanges by metadata (URL, hostname, method, status) with filters and pagination. Does NOT search body content — use proxy_search_session_bodies for that.", { session_id: z.string().describe("Session ID"), limit: z.number().optional().default(50), @@ -187,6 +187,60 @@ export function registerSessionTools(server: McpServer): void { }, ); + server.tool( + "proxy_search_session_bodies", + "Search inside HTTP request/response bodies stored in a persistent session. " + + "Decompresses and searches actual body content — useful for finding specific text, " + + "prices, API responses, error messages, etc. in recorded traffic. Returns context " + + "snippets around each match (like grep -C).", + { + session_id: z.string().describe("Session ID"), + text: z.string().min(1).describe("Text to search for inside request/response bodies"), + hostname_contains: z.string().optional().describe("Pre-filter: hostname substring"), + url_contains: z.string().optional().describe("Pre-filter: URL substring"), + method: z.string().optional().describe("Pre-filter: HTTP method"), + status_code: z.number().optional().describe("Pre-filter: HTTP status code"), + content_type_contains: z.string().optional() + .describe("Pre-filter: response content-type substring (e.g. 'html', 'json')"), + search_in: z.enum(["response", "request", "both"]).optional().default("both") + .describe("Which bodies to search (default: both)"), + case_sensitive: z.boolean().optional().default(false) + .describe("Case-sensitive search (default: false)"), + limit: z.number().optional().default(10) + .describe("Max matching exchanges to return (default: 10, max: 100)"), + max_scan: z.number().optional().default(200) + .describe("Max bodies to decompress and search (default: 200, max: 5000)"), + context_chars: z.number().optional().default(120) + .describe("Characters of context around each match (default: 120)"), + }, + async ({ + session_id, text, hostname_contains, url_contains, method, + status_code, content_type_contains, search_in, case_sensitive, + limit, max_scan, context_chars, + }) => { + try { + const result = await proxyManager.searchSessionBodies(session_id, { + text, + hostnameContains: hostname_contains, + urlContains: url_contains, + method, + statusCode: status_code, + contentTypeContains: content_type_contains, + searchIn: search_in, + caseSensitive: case_sensitive, + limit, + maxScan: max_scan, + contextChars: context_chars, + }); + return { + content: [{ type: "text", text: truncateResult({ status: "success", ...result }) }], + }; + } catch (e) { + return { content: [{ type: "text", text: JSON.stringify({ status: "error", error: toError(e) }) }] }; + } + }, + ); + server.tool( "proxy_get_session_handshakes", "Summarize TLS handshake/fingerprint availability (JA3/JA4/JA3S) for session exchanges.", diff --git a/test/integration/mcp-server.test.ts b/test/integration/mcp-server.test.ts index 1696e02..2189500 100644 --- a/test/integration/mcp-server.test.ts +++ b/test/integration/mcp-server.test.ts @@ -121,7 +121,8 @@ describe("MCP Server Integration", () => { assert.ok(names.includes("proxy_set_fingerprint_spoof")); assert.ok(names.includes("proxy_list_fingerprint_presets")); assert.ok(names.includes("proxy_check_fingerprint_runtime")); - assert.equal(names.length, 76); + assert.ok(names.includes("proxy_search_session_bodies")); + assert.equal(names.length, 77); }); it("start/status/stop lifecycle via MCP", async (t) => { diff --git a/test/unit/session-store.test.ts b/test/unit/session-store.test.ts index 4698f31..c07befc 100644 --- a/test/unit/session-store.test.ts +++ b/test/unit/session-store.test.ts @@ -165,3 +165,699 @@ describe("SessionStore", () => { assert.equal(exchange.record?.responseBodyText, "{\"ok\":true}"); }); }); + +// ── searchSessionBodies tests ────────────────────────────────────────────── + +function sampleExchangeWithBody( + id: string, + opts: { + requestBody?: string; + responseBody?: string; + responseContentType?: string; + requestContentType?: string; + statusCode?: number; + method?: string; + hostname?: string; + url?: string; + } = {}, +): { exchange: CapturedExchange; requestBody?: Buffer; responseBody?: Buffer } { + const responseBody = opts.responseBody ? Buffer.from(opts.responseBody) : undefined; + const requestBody = opts.requestBody ? Buffer.from(opts.requestBody) : undefined; + const hostname = opts.hostname ?? "example.com"; + const urlPath = opts.url ?? "/page"; + const exchange: CapturedExchange = { + id, + timestamp: Date.now(), + request: { + method: opts.method ?? "GET", + url: `https://${hostname}${urlPath}`, + hostname, + path: urlPath, + headers: { + "user-agent": "unit-test", + ...(opts.requestContentType ? { "content-type": opts.requestContentType } : {}), + }, + bodyPreview: opts.requestBody?.slice(0, 4096) ?? "", + bodySize: requestBody?.length ?? 0, + }, + response: { + statusCode: opts.statusCode ?? 200, + statusMessage: "OK", + headers: { + "content-type": opts.responseContentType ?? "text/html; charset=utf-8", + }, + bodyPreview: opts.responseBody?.slice(0, 4096) ?? "", + bodySize: responseBody?.length ?? 0, + }, + duration: 50, + }; + return { exchange, requestBody, responseBody }; +} + +describe("searchSessionBodies", () => { + // Core functionality + + it("finds text in response body", async () => { + const dir = await makeTempDir(); + const store = new SessionStore(dir); + const session = await store.startSession({ captureProfile: "full" }); + + const { exchange, requestBody, responseBody } = sampleExchangeWithBody("e1", { + responseBody: '
299,-
', + }); + store.recordExchange(exchange, { requestBody, responseBody }); + await store.stopSession(); + + const result = await store.searchSessionBodies(session.id, { text: "299,-" }); + assert.equal(result.totalMatches, 1); + assert.equal(result.matches[0].matchedIn, "response"); + assert.equal(result.matches[0].source, "full"); + assert.ok(result.matches[0].snippets[0].context.includes("[299,-]")); + assert.ok(result.matches[0].snippets[0].context.includes("price")); + }); + + it("finds text in request body", async () => { + const dir = await makeTempDir(); + const store = new SessionStore(dir); + const session = await store.startSession({ captureProfile: "full" }); + + const { exchange, requestBody, responseBody } = sampleExchangeWithBody("e1", { + method: "POST", + requestBody: '{"username":"admin","password":"secret"}', + requestContentType: "application/json", + responseBody: '{"ok":true}', + }); + store.recordExchange(exchange, { requestBody, responseBody }); + await store.stopSession(); + + const result = await store.searchSessionBodies(session.id, { + text: "admin", + searchIn: "request", + }); + assert.equal(result.totalMatches, 1); + assert.equal(result.matches[0].matchedIn, "request"); + }); + + it("searches both sides with response priority", async () => { + const dir = await makeTempDir(); + const store = new SessionStore(dir); + const session = await store.startSession({ captureProfile: "full" }); + + const { exchange, requestBody, responseBody } = sampleExchangeWithBody("e1", { + method: "POST", + requestBody: "token123-in-request", + responseBody: "token123-in-response", + }); + store.recordExchange(exchange, { requestBody, responseBody }); + await store.stopSession(); + + const result = await store.searchSessionBodies(session.id, { + text: "token123", + searchIn: "both", + }); + assert.equal(result.totalMatches, 1); + assert.equal(result.matches[0].matchedIn, "response"); + }); + + it("caps snippets at 3 per exchange", async () => { + const dir = await makeTempDir(); + const store = new SessionStore(dir); + const session = await store.startSession({ captureProfile: "full" }); + + const body = "error occurred. error again. error three. error four. error five."; + const { exchange, requestBody, responseBody } = sampleExchangeWithBody("e1", { + responseBody: body, + }); + store.recordExchange(exchange, { requestBody, responseBody }); + await store.stopSession(); + + const result = await store.searchSessionBodies(session.id, { text: "error" }); + assert.equal(result.totalMatches, 1); + assert.equal(result.matches[0].snippets.length, 3); + }); + + it("returns multiple matching exchanges", async () => { + const dir = await makeTempDir(); + const store = new SessionStore(dir); + const session = await store.startSession({ captureProfile: "full" }); + + for (let i = 1; i <= 3; i++) { + const { exchange, requestBody, responseBody } = sampleExchangeWithBody(`e${i}`, { + responseBody: `Page ${i} contains the keyword findme here`, + url: `/page${i}`, + }); + store.recordExchange(exchange, { requestBody, responseBody }); + } + await store.stopSession(); + + const result = await store.searchSessionBodies(session.id, { text: "findme", limit: 10 }); + assert.equal(result.totalMatches, 3); + assert.equal(result.matches.length, 3); + }); + + // Case sensitivity + + it("searches case-insensitively by default", async () => { + const dir = await makeTempDir(); + const store = new SessionStore(dir); + const session = await store.startSession({ captureProfile: "full" }); + + const { exchange, requestBody, responseBody } = sampleExchangeWithBody("e1", { + responseBody: "The ProductName is great", + }); + store.recordExchange(exchange, { requestBody, responseBody }); + await store.stopSession(); + + const result = await store.searchSessionBodies(session.id, { text: "productname" }); + assert.equal(result.totalMatches, 1); + assert.ok(result.matches[0].snippets[0].context.includes("ProductName")); + }); + + it("respects case_sensitive flag", async () => { + const dir = await makeTempDir(); + const store = new SessionStore(dir); + const session = await store.startSession({ captureProfile: "full" }); + + const { exchange, requestBody, responseBody } = sampleExchangeWithBody("e1", { + responseBody: "The ProductName is great", + }); + store.recordExchange(exchange, { requestBody, responseBody }); + await store.stopSession(); + + const noMatch = await store.searchSessionBodies(session.id, { + text: "productname", + caseSensitive: true, + }); + assert.equal(noMatch.totalMatches, 0); + + const match = await store.searchSessionBodies(session.id, { + text: "ProductName", + caseSensitive: true, + }); + assert.equal(match.totalMatches, 1); + }); + + // Preview fallback + + it("falls back to bodyPreview in preview-profile sessions", async () => { + const dir = await makeTempDir(); + const store = new SessionStore(dir); + const session = await store.startSession({ captureProfile: "preview" }); + + const { exchange } = sampleExchangeWithBody("e1", { + responseBody: "This preview has a secret-token inside", + }); + // No full bodies passed — preview profile + store.recordExchange(exchange); + await store.stopSession(); + + const result = await store.searchSessionBodies(session.id, { text: "secret-token" }); + assert.equal(result.totalMatches, 1); + assert.equal(result.matches[0].source, "preview"); + }); + + it("prefers full body over preview when available", async () => { + const dir = await makeTempDir(); + const store = new SessionStore(dir); + const session = await store.startSession({ captureProfile: "full" }); + + const { exchange, requestBody, responseBody } = sampleExchangeWithBody("e1", { + responseBody: "Full body content with unique-marker-xyz", + }); + store.recordExchange(exchange, { requestBody, responseBody }); + await store.stopSession(); + + const result = await store.searchSessionBodies(session.id, { text: "unique-marker-xyz" }); + assert.equal(result.totalMatches, 1); + assert.equal(result.matches[0].source, "full"); + }); + + // Filtering and skipping + + it("skips binary bodies", async () => { + const dir = await makeTempDir(); + const store = new SessionStore(dir); + const session = await store.startSession({ captureProfile: "full" }); + + // PNG-like binary body with null bytes + const binaryBody = Buffer.from([0x89, 0x50, 0x4e, 0x47, 0x00, 0x00, 0x00, 0x0d]); + const exchange: CapturedExchange = { + id: "e1", + timestamp: Date.now(), + request: { + method: "GET", url: "https://example.com/data", hostname: "example.com", + path: "/data", headers: {}, bodyPreview: "", bodySize: 0, + }, + response: { + statusCode: 200, statusMessage: "OK", + headers: { "content-type": "application/octet-stream" }, + bodyPreview: "", bodySize: binaryBody.length, + }, + duration: 10, + }; + // Force content-type to something non-binary for index so it's not pre-filtered, + // but the actual body is binary + exchange.response!.headers["content-type"] = "text/plain"; + store.recordExchange(exchange, { responseBody: binaryBody }); + await store.stopSession(); + + const result = await store.searchSessionBodies(session.id, { text: "anything" }); + assert.equal(result.skippedBinary, 1); + assert.equal(result.totalMatches, 0); + }); + + it("pre-filters by content_type_contains", async () => { + const dir = await makeTempDir(); + const store = new SessionStore(dir); + const session = await store.startSession({ captureProfile: "full" }); + + const html = sampleExchangeWithBody("e1", { + responseBody: "findme in html", responseContentType: "text/html", + }); + const json = sampleExchangeWithBody("e2", { + responseBody: "findme in json", responseContentType: "application/json", + }); + store.recordExchange(html.exchange, { responseBody: html.responseBody }); + store.recordExchange(json.exchange, { responseBody: json.responseBody }); + await store.stopSession(); + + const result = await store.searchSessionBodies(session.id, { + text: "findme", + contentTypeContains: "json", + }); + assert.equal(result.totalMatches, 1); + assert.equal(result.matches[0].exchangeId, "e2"); + }); + + it("skips known binary MIME types without reading records", async () => { + const dir = await makeTempDir(); + const store = new SessionStore(dir); + const session = await store.startSession({ captureProfile: "full" }); + + const img = sampleExchangeWithBody("e1", { + responseBody: "not really an image findme", + responseContentType: "image/jpeg", + }); + const html = sampleExchangeWithBody("e2", { + responseBody: "findme in html", + responseContentType: "text/html", + }); + store.recordExchange(img.exchange, { responseBody: img.responseBody }); + store.recordExchange(html.exchange, { responseBody: html.responseBody }); + await store.stopSession(); + + const result = await store.searchSessionBodies(session.id, { text: "findme" }); + assert.equal(result.totalMatches, 1); + assert.equal(result.matches[0].exchangeId, "e2"); + // image/jpeg was filtered at index level, so scanned should be 1 + assert.equal(result.scanned, 1); + }); + + it("pre-filters by hostname", async () => { + const dir = await makeTempDir(); + const store = new SessionStore(dir); + const session = await store.startSession({ captureProfile: "full" }); + + const a = sampleExchangeWithBody("e1", { + responseBody: "findme here", hostname: "api.example.com", + }); + const b = sampleExchangeWithBody("e2", { + responseBody: "findme too", hostname: "cdn.other.com", + }); + store.recordExchange(a.exchange, { responseBody: a.responseBody }); + store.recordExchange(b.exchange, { responseBody: b.responseBody }); + await store.stopSession(); + + const result = await store.searchSessionBodies(session.id, { + text: "findme", + hostnameContains: "api.example", + }); + assert.equal(result.totalMatches, 1); + assert.equal(result.matches[0].exchangeId, "e1"); + }); + + it("pre-filters by URL", async () => { + const dir = await makeTempDir(); + const store = new SessionStore(dir); + const session = await store.startSession({ captureProfile: "full" }); + + const a = sampleExchangeWithBody("e1", { + responseBody: "findme", url: "/api/v2/data", + }); + const b = sampleExchangeWithBody("e2", { + responseBody: "findme", url: "/static/style.css", + }); + store.recordExchange(a.exchange, { responseBody: a.responseBody }); + store.recordExchange(b.exchange, { responseBody: b.responseBody }); + await store.stopSession(); + + const result = await store.searchSessionBodies(session.id, { + text: "findme", + urlContains: "/api/v2", + }); + assert.equal(result.totalMatches, 1); + assert.equal(result.matches[0].exchangeId, "e1"); + }); + + it("pre-filters by method", async () => { + const dir = await makeTempDir(); + const store = new SessionStore(dir); + const session = await store.startSession({ captureProfile: "full" }); + + const get = sampleExchangeWithBody("e1", { + responseBody: "findme", method: "GET", + }); + const post = sampleExchangeWithBody("e2", { + responseBody: "findme", method: "POST", + }); + store.recordExchange(get.exchange, { responseBody: get.responseBody }); + store.recordExchange(post.exchange, { responseBody: post.responseBody }); + await store.stopSession(); + + const result = await store.searchSessionBodies(session.id, { + text: "findme", + method: "POST", + }); + assert.equal(result.totalMatches, 1); + assert.equal(result.matches[0].exchangeId, "e2"); + }); + + it("pre-filters by status code", async () => { + const dir = await makeTempDir(); + const store = new SessionStore(dir); + const session = await store.startSession({ captureProfile: "full" }); + + const ok = sampleExchangeWithBody("e1", { + responseBody: "findme", statusCode: 200, + }); + const err = sampleExchangeWithBody("e2", { + responseBody: "findme", statusCode: 500, + }); + store.recordExchange(ok.exchange, { responseBody: ok.responseBody }); + store.recordExchange(err.exchange, { responseBody: err.responseBody }); + await store.stopSession(); + + const result = await store.searchSessionBodies(session.id, { + text: "findme", + statusCode: 200, + }); + assert.equal(result.totalMatches, 1); + assert.equal(result.matches[0].exchangeId, "e1"); + }); + + // Limits + + it("respects max_scan limit", async () => { + const dir = await makeTempDir(); + const store = new SessionStore(dir); + const session = await store.startSession({ captureProfile: "full" }); + + for (let i = 1; i <= 10; i++) { + const { exchange, responseBody } = sampleExchangeWithBody(`e${i}`, { + responseBody: `Body ${i} with data`, + url: `/p${i}`, + }); + store.recordExchange(exchange, { responseBody }); + } + await store.stopSession(); + + const result = await store.searchSessionBodies(session.id, { + text: "data", + maxScan: 3, + }); + assert.equal(result.scanned, 3); + }); + + it("respects limit on matching results", async () => { + const dir = await makeTempDir(); + const store = new SessionStore(dir); + const session = await store.startSession({ captureProfile: "full" }); + + for (let i = 1; i <= 10; i++) { + const { exchange, responseBody } = sampleExchangeWithBody(`e${i}`, { + responseBody: `Body ${i} with data`, + url: `/p${i}`, + }); + store.recordExchange(exchange, { responseBody }); + } + await store.stopSession(); + + const result = await store.searchSessionBodies(session.id, { + text: "data", + limit: 2, + }); + assert.equal(result.matches.length, 2); + assert.equal(result.totalMatches, 2); + }); + + it("both limits interact correctly", async () => { + const dir = await makeTempDir(); + const store = new SessionStore(dir); + const session = await store.startSession({ captureProfile: "full" }); + + for (let i = 1; i <= 10; i++) { + const { exchange, responseBody } = sampleExchangeWithBody(`e${i}`, { + responseBody: i <= 5 ? `match data here` : `no keyword`, + url: `/p${i}`, + }); + store.recordExchange(exchange, { responseBody }); + } + await store.stopSession(); + + const result = await store.searchSessionBodies(session.id, { + text: "match data", + maxScan: 4, + limit: 2, + }); + assert.ok(result.scanned <= 4); + assert.ok(result.matches.length <= 2); + }); + + // Edge cases + + it("handles empty session gracefully", async () => { + const dir = await makeTempDir(); + const store = new SessionStore(dir); + const session = await store.startSession({ captureProfile: "full" }); + await store.stopSession(); + + const result = await store.searchSessionBodies(session.id, { text: "anything" }); + assert.equal(result.totalMatches, 0); + assert.equal(result.scanned, 0); + assert.equal(result.skippedBinary, 0); + assert.equal(result.skippedNoBody, 0); + }); + + it("counts skippedNoBody for exchanges without body", async () => { + const dir = await makeTempDir(); + const store = new SessionStore(dir); + const session = await store.startSession({ captureProfile: "full" }); + + // Exchange with empty bodies + const exchange: CapturedExchange = { + id: "e1", + timestamp: Date.now(), + request: { + method: "GET", url: "https://example.com/empty", hostname: "example.com", + path: "/empty", headers: {}, bodyPreview: "", bodySize: 0, + }, + response: { + statusCode: 204, statusMessage: "No Content", + headers: { "content-type": "text/plain" }, + bodyPreview: "", bodySize: 0, + }, + duration: 5, + }; + store.recordExchange(exchange); + await store.stopSession(); + + const result = await store.searchSessionBodies(session.id, { + text: "anything", + searchIn: "response", + }); + assert.equal(result.skippedNoBody, 1); + }); + + it("handles multi-line content", async () => { + const dir = await makeTempDir(); + const store = new SessionStore(dir); + const session = await store.startSession({ captureProfile: "full" }); + + const { exchange, responseBody } = sampleExchangeWithBody("e1", { + responseBody: "line1\nline2\nline3\nline4", + }); + store.recordExchange(exchange, { responseBody }); + await store.stopSession(); + + const result = await store.searchSessionBodies(session.id, { text: "line2" }); + assert.equal(result.totalMatches, 1); + assert.ok(result.matches[0].snippets[0].context.includes("line1")); + assert.ok(result.matches[0].snippets[0].context.includes("line3")); + }); + + it("handles very long bodies", async () => { + const dir = await makeTempDir(); + const store = new SessionStore(dir); + const session = await store.startSession({ captureProfile: "full" }); + + const padding = "x".repeat(500_000); + const body = `${padding}NEEDLE${padding}`; + const { exchange, responseBody } = sampleExchangeWithBody("e1", { + responseBody: body, + }); + store.recordExchange(exchange, { responseBody }); + await store.stopSession(); + + const result = await store.searchSessionBodies(session.id, { text: "NEEDLE" }); + assert.equal(result.totalMatches, 1); + assert.equal(result.matches[0].snippets[0].position, 500_000); + }); + + it("returns no matches when text is absent", async () => { + const dir = await makeTempDir(); + const store = new SessionStore(dir); + const session = await store.startSession({ captureProfile: "full" }); + + const { exchange, responseBody } = sampleExchangeWithBody("e1", { + responseBody: "This body has no relevant content", + }); + store.recordExchange(exchange, { responseBody }); + await store.stopSession(); + + const result = await store.searchSessionBodies(session.id, { text: "xyz-not-here" }); + assert.equal(result.totalMatches, 0); + assert.equal(result.scanned, 1); + }); + + // responseContentType in index + + it("populates responseContentType in index entries", async () => { + const dir = await makeTempDir(); + const store = new SessionStore(dir); + const session = await store.startSession({ captureProfile: "full" }); + + const { exchange, responseBody } = sampleExchangeWithBody("e1", { + responseBody: "test", + responseContentType: "text/html; charset=utf-8", + }); + store.recordExchange(exchange, { responseBody }); + await store.stopSession(); + + const query = await store.querySession(session.id, { limit: 10, offset: 0 }); + assert.equal(query.items[0].responseContentType, "text/html"); + }); + + it("sets null responseContentType for exchange without response", async () => { + const dir = await makeTempDir(); + const store = new SessionStore(dir); + const session = await store.startSession({ captureProfile: "full" }); + + // Aborted request — no response + const exchange: CapturedExchange = { + id: "e1", + timestamp: Date.now(), + request: { + method: "GET", url: "https://example.com/abort", hostname: "example.com", + path: "/abort", headers: {}, bodyPreview: "", bodySize: 0, + }, + duration: undefined, + }; + store.recordExchange(exchange); + await store.stopSession(); + + const query = await store.querySession(session.id, { limit: 10, offset: 0 }); + assert.equal(query.items[0].responseContentType, null); + }); + + // Snippet details + + it("respects contextChars parameter", async () => { + const dir = await makeTempDir(); + const store = new SessionStore(dir); + const session = await store.startSession({ captureProfile: "full" }); + + const before = "A".repeat(200); + const after = "B".repeat(200); + const { exchange, responseBody } = sampleExchangeWithBody("e1", { + responseBody: `${before}NEEDLE${after}`, + }); + store.recordExchange(exchange, { responseBody }); + await store.stopSession(); + + const result = await store.searchSessionBodies(session.id, { + text: "NEEDLE", + contextChars: 20, + }); + const snippet = result.matches[0].snippets[0].context; + // Before [MATCH]: "..." prefix + ~20 A's + // After [MATCH]: ~20 B's + "..." suffix + assert.ok(snippet.startsWith("...")); + assert.ok(snippet.endsWith("...")); + // Total should be manageable, not 200 chars of context + assert.ok(snippet.length < 80); + }); + + it("no ellipsis prefix when match is at start of body", async () => { + const dir = await makeTempDir(); + const store = new SessionStore(dir); + const session = await store.startSession({ captureProfile: "full" }); + + const { exchange, responseBody } = sampleExchangeWithBody("e1", { + responseBody: "NEEDLE followed by some text here", + }); + store.recordExchange(exchange, { responseBody }); + await store.stopSession(); + + const result = await store.searchSessionBodies(session.id, { text: "NEEDLE" }); + const snippet = result.matches[0].snippets[0].context; + assert.ok(!snippet.startsWith("...")); + assert.equal(result.matches[0].snippets[0].position, 0); + }); + + it("no ellipsis suffix when match is at end of body", async () => { + const dir = await makeTempDir(); + const store = new SessionStore(dir); + const session = await store.startSession({ captureProfile: "full" }); + + const { exchange, responseBody } = sampleExchangeWithBody("e1", { + responseBody: "Some text before NEEDLE", + }); + store.recordExchange(exchange, { responseBody }); + await store.stopSession(); + + const result = await store.searchSessionBodies(session.id, { text: "NEEDLE" }); + const snippet = result.matches[0].snippets[0].context; + assert.ok(!snippet.endsWith("...")); + }); + + // Backward compatibility + + it("old index entries without responseContentType pass content_type_contains filter", async () => { + const dir = await makeTempDir(); + const store = new SessionStore(dir); + const session = await store.startSession({ captureProfile: "full" }); + + const { exchange, responseBody } = sampleExchangeWithBody("e1", { + responseBody: "findme here", + }); + store.recordExchange(exchange, { responseBody }); + await store.stopSession(); + + // Manually rewrite the index to strip responseContentType (simulate old format) + const indexPath = path.join(dir, session.id, "index.ndjson"); + const indexRaw = await fs.readFile(indexPath, "utf8"); + const lines = indexRaw.trim().split("\n").map(line => { + const entry = JSON.parse(line); + delete entry.responseContentType; + return JSON.stringify(entry); + }); + await fs.writeFile(indexPath, lines.join("\n") + "\n"); + + // With contentTypeContains filter, old entries (undefined) should pass through + const result = await store.searchSessionBodies(session.id, { + text: "findme", + contentTypeContains: "html", + }); + assert.equal(result.totalMatches, 1); + }); +});