From a73e40dba869550e9d7481021381e5e366a7631b Mon Sep 17 00:00:00 2001 From: flupkede Date: Fri, 1 May 2026 20:47:12 +0200 Subject: [PATCH 1/2] docs: add CHANGELOG.md for first stable release --- CHANGELOG.md | 82 ++++++++++++++++++++++++++++++++++++++++++++++++++++ Cargo.lock | 2 +- Cargo.toml | 2 +- 3 files changed, 84 insertions(+), 2 deletions(-) create mode 100644 CHANGELOG.md diff --git a/CHANGELOG.md b/CHANGELOG.md new file mode 100644 index 0000000..94165c4 --- /dev/null +++ b/CHANGELOG.md @@ -0,0 +1,82 @@ +# Changelog + +All notable changes to this project will be documented in this file. + +The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/), +and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). + +## [Unreleased] + +First stable release of codesearch — a Rust-based hybrid (vector + BM25 + AST) +code search MCP server, optimised for AI coding agents working across many +repositories. + +### Added + +- **Multi-repository serve mode** (`codesearch serve`): a long-running HTTP/SSE + process that holds many indexed repositories warm at the same time, with + per-project routing via `project=…`, group routing via `group=…`, and + cross-repository search using RRF fusion across project boundaries. +- **Stdio proxy with auto-reconnect**: `codesearch mcp` (stdio mode) detects a + running `serve` process and proxies tool calls to it. The proxy now performs + client-side retries with a forced reconnect when it sees a transport-level + failure (broken TCP keep-alive, stale session 404, server restart, laptop + suspend) so MCP clients like Claude Desktop self-heal transparently. After a + serve restart the first call returns a clear "reconnecting" message and the + next call succeeds. +- **MCP tool surface optimised for agents** to reduce grep-fallback behaviour: + - `search` (semantic / hybrid / lexical / pure-literal regex modes) + - `find` (definition / usages / imports / dependents) + - `explore` (file outline / similar chunks) + - `get_chunk` for cheap follow-up reads of a specific code chunk + - `status` (index / projects) +- **Tree-sitter AST-aware chunking** for 9 languages: Rust, Python, JavaScript, + TypeScript, C, C++, C#, Go, Java. +- **Persistent embedding cache** keyed on SHA-256 of chunk content, surviving + `--force` rebuilds and per-file re-indexes. +- **Git worktree support**: when `.git` is a worktree marker file (not a + directory), the project root is correctly resolved to the worktree itself. +- **Long UNC-path support** on Windows for repositories under `\\?\C:\…` paths. +- **Repository groups** for cross-repo search across user-defined sets of + projects (e.g. all *.Aprimo* repos). + +### Changed + +- **Search quality**: re-tuned RRF fusion of the vector / BM25 / exact-identifier + signals so common tool names and exact strings are no longer drowned out by + semantic neighbours, reducing the rate at which agents fall back to external + grep. +- **Idle eviction**: only refreshes a project's "last accessed" timestamp on a + direct query against that project, not on fan-out queries that touch the + index merely because they routed through the same group. +- **TUI CPU%**: now normalised by core count. + +### Fixed + +- **Security**: validate `CODESEARCH_CONFIG` environment variable against a path + traversal pattern (CodeQL finding). Config path is now rejected if it contains + `..` segments, preventing a directory traversal via env var. +- **Issue #30** ([LMDB resize crash on large repositories](https://github.com/flupkede/codesearch/issues/30)): + When the database grew beyond its initial allocation (`MDB_MAP_FULL`), the + resize failed with `"an environment is already opened with different options"`. + The fix closes and reopens the LMDB environment around the resize, allowing + codesearch to index large repositories (tested: 4400+ files, 89 MB) without + crashing. +- File-change tracking and reaper visibility in `serve` mode. + +### Removed + +- Server-side transparent MCP session-reconnect middleware: replaced by the + client-side retry in the stdio proxy. The middleware could not reach + non-compliant remote MCP clients (their HTTP pool gives up at the TCP layer + before the request hits the server) and added a session-counter leak. + +### Known limitations + +- Remote MCP clients that do not handle 404 "Session not found" per the MCP + spec (e.g. OpenCode 1.14.x at the time of writing) need to be restarted after + a `codesearch serve` restart. +- `codesearch serve` keeps one writer per database (LMDB invariant). Concurrent + reindex from a second process is rejected. + +[Unreleased]: https://github.com/flupkede/codesearch/compare/master...develop diff --git a/Cargo.lock b/Cargo.lock index 3ee4cdc..cf7c5f1 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -634,7 +634,7 @@ dependencies = [ [[package]] name = "codesearch" -version = "1.0.72" +version = "1.0.73" dependencies = [ "anyhow", "arroy", diff --git a/Cargo.toml b/Cargo.toml index 8867500..b84a83c 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "codesearch" -version = "1.0.72" +version = "1.0.73" edition = "2021" authors = ["codesearch contributors"] license = "Apache-2.0" From 64ce385463c39614b7b157670aec68a3fea0694d Mon Sep 17 00:00:00 2001 From: flupkede Date: Fri, 1 May 2026 22:02:02 +0200 Subject: [PATCH 2/2] =?UTF-8?q?fix(serve):=20remove=20session=20keep=5Fali?= =?UTF-8?q?ve=20timeout=20=E2=80=94=20sessions=20live=20until=20TCP=20dies?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- Cargo.lock | 2 +- Cargo.toml | 2 +- src/serve/mod.rs | 8 +++++--- 3 files changed, 7 insertions(+), 5 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index cf7c5f1..52d95a5 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -634,7 +634,7 @@ dependencies = [ [[package]] name = "codesearch" -version = "1.0.73" +version = "1.0.74" dependencies = [ "anyhow", "arroy", diff --git a/Cargo.toml b/Cargo.toml index b84a83c..28076e2 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "codesearch" -version = "1.0.73" +version = "1.0.74" edition = "2021" authors = ["codesearch contributors"] license = "Apache-2.0" diff --git a/src/serve/mod.rs b/src/serve/mod.rs index d3dd6c4..e327c8e 100644 --- a/src/serve/mod.rs +++ b/src/serve/mod.rs @@ -1731,10 +1731,12 @@ pub async fn run_serve( .map_err(std::io::Error::other) }; - // Build session manager with extended keep_alive (default is 5 min which kills - // idle MCP sessions too aggressively). 30 minutes matches our repo idle eviction. + // Build session manager without keep_alive timeout. The default rmcp timeout + // (5 min) kills idle sessions too aggressively for a local long-running serve. + // We run single-user local, so abandoned sessions cost nothing — let TCP + // liveness determine when a session is truly dead. let mut session_manager = LocalSessionManager::default(); - session_manager.session_config.keep_alive = Some(std::time::Duration::from_secs(30 * 60)); + session_manager.session_config.keep_alive = None; let session_manager = Arc::new(session_manager); let config = StreamableHttpServerConfig::default();