From ff56d03a28e53eff611061fdced38bb673229535 Mon Sep 17 00:00:00 2001
From: flupkede <flupkede@users.noreply.github.com>
Date: Fri, 1 May 2026 23:33:40 +0200
Subject: [PATCH 1/6] docs(changelog): remove empty Unreleased section, fix
 links

---
 CHANGELOG.md | 4 +---
 Cargo.lock   | 2 +-
 Cargo.toml   | 2 +-
 3 files changed, 3 insertions(+), 5 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index cb51c30..25fe694 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -5,8 +5,6 @@ All notable changes to this project will be documented in this file.
 The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
 and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 
-## [Unreleased]
-
 ## [1.0.74] - 2026-05-01
 
 ### Fixed
@@ -89,6 +87,6 @@ repositories.
 - `codesearch serve` keeps one writer per database (LMDB invariant). Concurrent
   reindex from a second process is rejected.
 
-[Unreleased]: https://github.com/flupkede/codesearch/compare/v1.0.74...develop
+[1.0.75]: https://github.com/flupkede/codesearch/compare/v1.0.74...v1.0.75
 [1.0.74]: https://github.com/flupkede/codesearch/compare/v1.0.72...v1.0.74
 [1.0.72]: https://github.com/flupkede/codesearch/releases/tag/v1.0.72
diff --git a/Cargo.lock b/Cargo.lock
index 3055fc7..09a9020 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -634,7 +634,7 @@ dependencies = [
 
 [[package]]
 name = "codesearch"
-version = "1.0.75"
+version = "1.0.76"
 dependencies = [
  "anyhow",
  "arroy",
diff --git a/Cargo.toml b/Cargo.toml
index 5c66c0e..085a4e3 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "codesearch"
-version = "1.0.75"
+version = "1.0.76"
 edition = "2021"
 authors = ["codesearch contributors"]
 license = "Apache-2.0"

From 87f6b55c76a2705cbc62d4a5ee6cfcb48e7156b2 Mon Sep 17 00:00:00 2001
From: flupkede <flupkede@users.noreply.github.com>
Date: Fri, 1 May 2026 23:59:04 +0200
Subject: [PATCH 2/6] chore: remove stale planning docs (.docs/) and old
 benchmark results (benchmarks/)

---
 .docs/MCP_HELP_SYSTEM.md                      | 321 ------------------
 AGENTS.md                                     | 198 ++++-------
 Cargo.lock                                    |   2 +-
 Cargo.toml                                    |   2 +-
 benchmarks/BGE-small-en-v1.5.md               |  62 ----
 benchmarks/FULL_BENCHMARK_SUMMARY.md          |  69 ----
 benchmarks/benchmark-20251124-232718.md       |  67 ----
 benchmarks/benchmark-20251124-234722.md       |  45 ---
 benchmarks/benchmark-20251125-103111.md       |  45 ---
 benchmarks/benchmark-20251125-103719.md       |  45 ---
 benchmarks/benchmark-20251125-104204.md       |  23 --
 benchmarks/demongrep_vs_osgrep.md             |  79 -----
 benchmarks/external_repo_bat.md               |  65 ----
 benchmarks/improvement-plan.md                |  22 --
 .../mcp-tool-description-improvements.md      |   7 -
 benchmarks/test_external_repo.sh              | 105 ------
 16 files changed, 61 insertions(+), 1096 deletions(-)
 delete mode 100644 .docs/MCP_HELP_SYSTEM.md
 delete mode 100644 benchmarks/BGE-small-en-v1.5.md
 delete mode 100644 benchmarks/FULL_BENCHMARK_SUMMARY.md
 delete mode 100644 benchmarks/benchmark-20251124-232718.md
 delete mode 100644 benchmarks/benchmark-20251124-234722.md
 delete mode 100644 benchmarks/benchmark-20251125-103111.md
 delete mode 100644 benchmarks/benchmark-20251125-103719.md
 delete mode 100644 benchmarks/benchmark-20251125-104204.md
 delete mode 100644 benchmarks/demongrep_vs_osgrep.md
 delete mode 100644 benchmarks/external_repo_bat.md
 delete mode 100644 benchmarks/improvement-plan.md
 delete mode 100644 benchmarks/mcp-tool-description-improvements.md
 delete mode 100644 benchmarks/test_external_repo.sh

diff --git a/.docs/MCP_HELP_SYSTEM.md b/.docs/MCP_HELP_SYSTEM.md
deleted file mode 100644
index 6e0868d..0000000
--- a/.docs/MCP_HELP_SYSTEM.md
+++ /dev/null
@@ -1,321 +0,0 @@
-# MCP Help System Implementation Summary
-
-## Questions Answered
-
-### 1. Can we add `--help` to the mcp command?
-
-**Yes, but it's already available!**
-
-Since `mcp` is a clap subcommand, users can run:
-
-```bash
-codesearch mcp --help
-```
-
-This displays:
-```text
-Start MCP server for Claude Code integration
-
-Usage: codesearch [OPTIONS] mcp [PATH]
-
-Arguments:
-  [PATH]  Path to project (defaults to current directory)
-
-Options:
-  -h, --help     Print help
-  -v, --verbose  Enable verbose output
-  -q, --quiet    Suppress informational output
-```
-
-### 2. Is there a specific tool an agent calls to get help from an MCP?
-
-**No standard "help" tool exists in the MCP protocol.**
-
-However, MCP servers have an `instructions` field in their server info that's automatically displayed when the AI assistant connects to the server.
-
-## Implementation Details
-
-### Before Enhancement
-
-The original MCP server had minimal instructions:
-
-```rust
-instructions: Some(
-    "codesearch is a semantic code search tool. Use semantic_search to find code \
-     by meaning, get_file_chunks to see all chunks in a file, and index_status \
-     to check if the index is ready."
-        .to_string(),
-),
-```
-
-### After Enhancement
-
-I've expanded the instructions to include comprehensive help:
-
-```rust
-instructions: Some(
-    format!(r#"
-codesearch - Semantic Code Search MCP Server
-
-OVERVIEW:
-codesearch provides fast, local semantic code search using natural language queries.
-Search your codebase by meaning, not just by keywords.
-
-AVAILABLE TOOLS:
-
-1. semantic_search(query, limit=10)
-   Search the codebase using natural language queries.
-   Query examples:
-     - "where do we handle user authentication?"
-     - "how is error logging implemented?"
-     - "functions that process payment data"
-     - "database connection management"
-   Returns: Array of matches with path, line numbers, code content, and relevance scores.
-
-2. get_file_chunks(path)
-   Get all indexed chunks from a specific file.
-   Useful for understanding the complete structure of a file.
-   Returns: All chunks from the file with full context.
-
-3. index_status()
-   Check if the index exists and get database statistics.
-   Use this before searching to verify the index is ready.
-   Returns: Index status, total chunks, files, model info, and dimensions.
-
-USAGE PATTERNS:
-
-Understanding a New Codebase:
-  1. Check index_status() to verify index is ready
-  2. Search for core concepts: semantic_search("main application entry point")
-  3. Explore patterns: semantic_search("error handling strategy")
-  4. Get detailed view: get_file_chunks("src/main.rs")
-
-Finding Implementation Patterns:
-  - semantic_search("how are API endpoints defined?")
-  - semantic_search("database model definitions")
-  - get_file_chunks("src/models/user.rs")
-
-Debugging and Analysis:
-  - semantic_search("error handling for database operations")
-  - semantic_search("user input validation")
-
-Implementing New Features:
-  - semantic_search("authentication handling code") - Find reference implementations
-  - semantic_search("configuration management") - Understand patterns
-  - get_file_chunks("src/config.rs") - See detailed implementation
-
-BEST PRACTICES:
-
-✓ Use natural language queries describing concepts, not exact terms
-✓ Check index_status() before searching
-✓ Use specific queries with context (e.g., "API layer error handling" vs "error handling")
-✓ Combine semantic_search() with get_file_chunks() for detailed analysis
-✓ Start with broader queries, then narrow down
-
-✗ Avoid short, vague queries like "auth" or "db" (use grep for exact matches)
-✗ Don't expect exact string matching (that's what grep is for)
-
-PERFORMANCE:
-- Search speed: ~75ms after initial model load
-- First search: ~2-3s (model loading time)
-- Indexing: 30-60s for initial, incremental updates are instant
-
-SETUP:
-If this MCP server doesn't find an index, the user needs to run:
-  codesearch index
-
-For detailed documentation, visit: https://github.com/yxanul/codesearch
-
-Current database: {db}
-Model: {model}
-Dimensions: {dims}
-"#,
-        db = self.db_path.display(),
-        model = self.model_type.short_name(),
-        dims = self.dimensions
-    )
-),
-```
-
-## How AI Assistants Access MCP Help
-
-### Automatic Display
-
-When Claude Code (or other MCP-compatible assistant) connects to the codesearch MCP server, it automatically:
-
-1. Calls the server's `info` endpoint
-2. Receives the `instructions` field
-3. Displays this to the user or uses it internally to understand available tools
-
-### No Explicit Help Call Needed
-
-Unlike CLI tools where you type `--help`, MCP help is:
-- Automatically provided on connection
-- Available through the assistant's UI
-- Can be queried by asking: "What tools does codesearch provide?"
-
-### Practical Usage
-
-In Claude Code, you might ask:
-
-```
-> What can I do with codesearch?
-> Show me help for the codesearch MCP server
-> How do I search code with codesearch?
-```
-
-Claude will use the `instructions` to answer.
-
-## Key Improvements Made
-
-### 1. Comprehensive Tool Documentation
-- Detailed descriptions of each tool
-- Parameter specifications
-- Return value explanations
-- Usage examples
-
-### 2. Usage Patterns
-- Real-world workflows for common tasks
-- Step-by-step examples
-- Different use cases (understanding, debugging, implementing)
-
-### 3. Best Practices
-- Do's and don'ts for effective queries
-- Performance considerations
-- Common pitfalls to avoid
-
-### 4. Dynamic Information
-- Current database path
-- Active model type
-- Vector dimensions
-
-### 5. Setup Instructions
-- Quick start guide
-- Troubleshooting hints
-- Link to full documentation
-
-## Comparison: CLI Help vs MCP Help
-
-| Aspect | CLI (`codesearch mcp --help`) | MCP Instructions |
-|--------|----------------------------|------------------|
-| **When shown** | When user explicitly requests | On server connection |
-| **Audience** | Humans setting up MCP | AI assistants |
-| **Content** | Command syntax & flags | Tool usage & examples |
-| **Updates** | Static | Can include runtime info |
-| **User control** | Explicit (`--help`) | Automatic |
-
-## Future Enhancements
-
-### Potential Additions
-
-1. **Interactive Help Tool**
-   ```rust
-   #[tool(description = "Get detailed help and usage examples for codesearch tools")]
-   async fn help(&self) -> Result<CallToolResult, McpError> {
-       // Return comprehensive help documentation
-   }
-   ```
-
-2. **Tool-Specific Help**
-   ```rust
-   #[tool(description = "Get help for a specific tool")]
-   async fn tool_help(&self, Parameters(req): Parameters<ToolHelpRequest>) -> Result<CallToolResult, McpError> {
-       // Return detailed help for requested tool
-   }
-   ```
-
-3. **Example Queries**
-   ```rust
-   #[tool(description = "Get example search queries for common scenarios")]
-   async fn example_queries(&self) -> Result<CallToolResult, McpError> {
-       // Return curated query examples
-   }
-   ```
-
-### Why Not Implemented Yet?
-
-- Current instructions cover most use cases
-- Keep MCP interface simple (3 core tools)
-- Can add if user feedback indicates need
-
-## Testing the Help System
-
-### 1. Build and Run
-
-```bash
-# Build the project
-cargo build --release
-
-# Start MCP server
-./target/release/codesearch mcp /path/to/project
-```
-
-### 2. Verify Help Content
-
-The help will be displayed when:
-- You connect Claude Code to the MCP server
-- You ask "What can codesearch do?"
-- You request MCP server info
-
-### 3. Check Dynamic Content
-
-The help includes runtime information:
-- Database path
-- Model type (e.g., `minilm-l6-q`, `jina-code`)
-- Vector dimensions (384, 768, 1024)
-
-## Integration with Documentation
-
-### File Hierarchy
-
-```
-codesearch/
-├── README.md                              # General overview
-├── AI_AGENT_CLI_INSTRUCTIONS.md          # CLI usage for AI agents
-├── MCP_INSTRUCTIONS.md                    # Setup guide for MCP
-├── MCP_HELP_SYSTEM.md                    # This file
-└── src/mcp/mod.rs                        # Enhanced help in code
-```
-
-### Documentation Flow
-
-1. **New User** → README.md → MCP_INSTRUCTIONS.md → Setup
-2. **AI Agent** → AI_AGENT_CLI_INSTRUCTIONS.md → Integration
-3. **Running MCP** → Automatic help on connection
-4. **Need More** → Full documentation at GitHub
-
-## Summary
-
-### ✅ What We Have
-
-1. **CLI Help**: `codesearch mcp --help` - Basic command help
-2. **MCP Instructions**: Comprehensive help displayed on connection
-3. **Dynamic Info**: Runtime data included in help
-4. **Usage Patterns**: Real-world workflows documented
-
-### 📝 What Works
-
-- AI assistants automatically receive help on connection
-- Users can query "What can codesearch do?"
-- Examples and best practices included
-- Performance and setup information provided
-
-### 🚀 What's Possible
-
-- Add interactive `help()` tool if needed
-- Add `tool_help()` for tool-specific documentation
-- Add `example_queries()` for curated query examples
-- Expand based on user feedback
-
-## Conclusion
-
-The MCP help system is now comprehensive and user-friendly. AI assistants like Claude Code automatically receive detailed instructions when connecting to the codesearch MCP server, including:
-
-- Available tools and their usage
-- Real-world usage patterns
-- Best practices and pitfalls
-- Performance characteristics
-- Setup instructions
-- Dynamic runtime information
-
-No explicit help call is needed - it's all automatic!
diff --git a/AGENTS.md b/AGENTS.md
index debc17c..9997559 100644
--- a/AGENTS.md
+++ b/AGENTS.md
@@ -1,163 +1,83 @@
-# AGENTS.md — features/fixes branch plan
+# AGENTS.md — features/cleanup
 
-## Branch: `features/fixes`
-**Base:** `develop`
-**Goal:** Fix idle eviction bug + improve search quality to reduce agent grep fallback
+## Goal
 
----
-
-## Fix 1: Idle eviction — `get_or_open_stores` touches ALL repos on fan-out
-
-### Problem
-
-`get_or_open_stores()` calls `touch_access()` unconditionally (lines 456, 470 in `src/serve/mod.rs`).
-When `get_chunk` is called without `project`/`group` (`allow_unscoped=true`), `resolve_repo_stores_multi`
-fans out to ALL repos via `get_or_open_stores()` — resetting the idle timer on every repo.
-
-Result: repos are never idle, reaper never evicts. The 30-minute timeout is effectively disabled
-whenever any agent uses `get_chunk` without explicit project scope.
-
-Same issue affects `status` tool with explicit project/group (goes through `get_or_open_stores`),
-though the unscoped `status` path uses `repo_statuses_lightweight()` which is safe.
-
-### Fix: Add `touch: bool` parameter to `get_or_open_stores`
-
-**File:** `src/serve/mod.rs`
+Remove stale planning documents and old benchmark results that have no value
+for contributors or users of the released codebase. These directories were
+useful during development but are now clutter.
 
-1. Change signature: `pub(crate) async fn get_or_open_stores(&self, alias: &str, touch: bool)`
-2. Only call `self.touch_access(alias)` when `touch == true`
-3. Update all call sites:
-   - `warmup_repo` (line 456) → `touch: false` (warmup should NOT reset idle timer)
-   - `get_or_open_stores` fast path (line 470) → keep `touch: true` (direct query access)
-   - `resolve_repo_stores_multi` fan-out (line 3113 in `src/mcp/mod.rs`) → `touch: false`
-   - `resolve_repo_stores_multi` single project (line 3130) → `touch: true`
-   - `resolve_repo_stores_multi` group members (line 3141) → `touch: false`
-   - Lazy FSW transition (line ~487) → `touch: true` (first real query)
-   - `spawn_fsw_for_warm` (line ~722) → `touch: false`
-   - `reindex_handler` (lines 1166, 1211) → `touch: true`
-   - Test call sites → `touch: true`
+## Scope
 
-4. After `get_chunk` candidate detection resolves to a single repo, explicitly call
-   `serve_state.touch_access(&resolved_alias)` for just that repo.
-
-5. After group fan-out search completes, touch only repos that contributed results
-   (or touch all group members — acceptable since the agent explicitly requested the group).
-
-### Validation
-- `cargo check && cargo clippy --all-targets -- -D warnings`
-- `cargo test --lib`
-- Manual: start serve with 3+ repos, call `get_chunk` without project, verify reaper log
-  shows idle ages increasing (not resetting) for untouched repos
+This branch touches **only** file deletions — no source code, no Cargo.toml,
+no tests. `cargo check` is not required (no Rust changes).
 
 ---
 
-## Fix 2: Search quality — reduce agent grep fallback
-
-### Problem
-
-Agents fall back to `grep` when `codesearch_search` returns poor or zero results.
-Root causes:
-
-1. **Top-N cutoff too aggressive** — retrieval pool is `limit * 3`, fusion drops relevant results
-2. **Exact identifier boost too weak** — `EXACT_MATCH_RRF_K = 5.0` doesn't sufficiently
-   prioritize exact code matches over semantic similarity
-3. **No auto-fallback** — when semantic search returns few results, no automatic literal retry
-4. **minilm-l6 weak on code** — embedding model is NL-trained, code identifiers get poor vectors.
-   Not fixable without model change, but compensated by stronger FTS fusion.
-
-### Fix 2a: Increase retrieval pool
-
-**File:** `src/mcp/mod.rs`
+## Tasks
 
-Change all `limit * 3` to `limit * 5` in the semantic search pipeline.
-This gives the RRF fusion more candidates to work with, reducing the chance
-that a relevant result falls outside the retrieval window.
+### 1. Delete `.docs/` directory (entire tree)
 
-Affected locations (all in `src/mcp/mod.rs`):
-- Line 3698: `store.search(&query_embedding, limit * 3)` → `limit * 5`
-- Line 3753: `fts_store.search(&request.query, limit * 3, ...)` → `limit * 5`
-- Line 3864: `fts_store.search(&request.query, limit * 3, ...)` → `limit * 5`
-- Line 3925: `store.search(&query_embedding, limit * 3)` → `limit * 5`
-- Line 3955: `fts_store.search(&request.query, limit * 3, ...)` → `limit * 5`
-- Line 4108: `fts_store.search(&request.query, limit * 3, ...)` → `limit * 5`
+Remove all files under `.docs/` including the `done/` subdirectory:
 
-Also in `src/search/mod.rs` (CLI search path) — same pattern.
-
-Leave `search_exact` at `limit * 2` (exact matches are already high-precision).
-Leave `search_phrase` at `limit * 3` (phrase search is already precise).
-
-### Fix 2b: Stronger exact identifier boost
-
-**File:** `src/rerank/mod.rs`
-
-Change `EXACT_MATCH_RRF_K` from `5.0` to `2.0`.
-
-Lower K = steeper rank curve = exact matches get proportionally higher RRF scores.
-At K=5, an exact match at rank 1 gets score `1/(5+1) = 0.167`.
-At K=2, an exact match at rank 1 gets score `1/(2+1) = 0.333` — 2x stronger signal.
-
-This ensures that when an agent searches for `"evict_idle_repos"`, the chunk containing
-that exact identifier dominates the fusion result even if the embedding similarity is low.
-
-### Fix 2c: Auto-fallback to literal search
+```
+.docs/MCP_HELP_SYSTEM.md
+.docs/opencode-reload-commands-pr.md
+.docs/plan-implementation.md
+.docs/plan-testing.md
+.docs/done/benchmarks-improvement-plan.md
+.docs/done/codesearch-improvement-plan.md
+.docs/done/LMDBResilience_GitAware_IndexCompact.md
+.docs/done/old-plan-review.md
+.docs/done/old-testplan.md
+.docs/done/plan-embedding-cache.md
+.docs/done/plan-review.md
+```
 
-**File:** `src/mcp/mod.rs`, in `semantic_search()` (line ~3620)
+### 2. Delete `benchmarks/` directory (entire tree)
 
-After the hybrid search completes and results are built:
+Remove all files under `benchmarks/`:
 
-```rust
-// If semantic/hybrid returned fewer than 3 results and query looks like code,
-// auto-fallback to literal search and merge results.
-if results.len() < 3 && has_identifiers {
-    // Try literal FTS search as fallback
-    let literal_results = fts_store.search(&request.query, limit, None)?;
-    // Deduplicate by chunk_id and append
-    for lr in literal_results {
-        if !results.iter().any(|r| r.id == lr.chunk_id) {
-            // Convert FtsResult to SearchResult and append
-        }
-    }
-}
+```
+benchmarks/benchmark-20251124-232718.md
+benchmarks/benchmark-20251124-234722.md
+benchmarks/benchmark-20251125-103111.md
+benchmarks/benchmark-20251125-103719.md
+benchmarks/benchmark-20251125-104204.md
+benchmarks/BGE-small-en-v1.5.md
+benchmarks/demongrep_vs_osgrep.md
+benchmarks/external_repo_bat.md
+benchmarks/FULL_BENCHMARK_SUMMARY.md
+benchmarks/improvement-plan.md
+benchmarks/mcp-tool-description-improvements.md
+benchmarks/test_external_repo.sh
 ```
 
-Implementation details:
-- Only trigger when `results.len() < 3` AND `has_identifiers` (code-like query)
-- Use `with_fts_store_read_for` to run the fallback FTS search
-- Deduplicate by `chunk_id` before merging
-- Cap total results at `limit`
-- Log when fallback triggers: `tracing::debug!("Auto-fallback: semantic returned {} results, trying literal", results.len())`
+### 3. Commit
 
-### Fix 2d: Increase `search_exact` retrieval for identifiers
+```
+git rm -r .docs benchmarks
+git commit -m "chore: remove stale planning docs and old benchmark results"
+git push origin features/cleanup
+```
 
-**File:** `src/mcp/mod.rs`
+### 4. Update CHANGELOG.md
 
-Change `search_exact(ident, limit * 2, ...)` to `search_exact(ident, limit * 3, ...)`
-in the identifier boost paths (lines 3762, 3876, 3968, 4120).
+Add a line under a new `## [Unreleased]` section — or add to the next release
+section if one already exists:
 
-More exact candidates = better chance the right chunk survives RRF fusion.
+```markdown
+### Removed
 
-### Validation
-- `cargo check && cargo clippy --all-targets -- -D warnings`
-- `cargo test --lib`
-- Manual test queries that previously required grep fallback:
-  - `codesearch search "evict_idle_repos"` — should find the function
-  - `codesearch search "touch_access"` — should find the method
-  - `codesearch search "Database cleared"` — should find the log message (already fixed by AND mode)
-  - `codesearch search "EXACT_MATCH_RRF_K"` — should find the constant
+- Stale planning documents (`.docs/`) and old benchmark results (`benchmarks/`)
+  removed from the repository. These were internal working documents with no
+  value for contributors.
+```
 
 ---
 
-## Execution order
-
-1. **Fix 1** — idle eviction (`touch` parameter)
-2. **Fix 2a** — retrieval pool `limit * 5`
-3. **Fix 2b** — `EXACT_MATCH_RRF_K` = 2.0
-4. **Fix 2c** — auto-fallback to literal
-5. **Fix 2d** — `search_exact` retrieval `limit * 3`
-6. Validate all together
-7. Commit
-
-## Commits
+## Done when
 
-One commit per fix, or group 2a-2d into a single "search quality" commit.
-Prefer: 2 commits total (Fix 1 + Fix 2).
+- [ ] `.docs/` directory no longer exists in the repository
+- [ ] `benchmarks/` directory no longer exists in the repository
+- [ ] Commit on `features/cleanup` pushed to origin
+- [ ] CHANGELOG.md updated
diff --git a/Cargo.lock b/Cargo.lock
index 3055fc7..09a9020 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -634,7 +634,7 @@ dependencies = [
 
 [[package]]
 name = "codesearch"
-version = "1.0.75"
+version = "1.0.76"
 dependencies = [
  "anyhow",
  "arroy",
diff --git a/Cargo.toml b/Cargo.toml
index 5c66c0e..085a4e3 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "codesearch"
-version = "1.0.75"
+version = "1.0.76"
 edition = "2021"
 authors = ["codesearch contributors"]
 license = "Apache-2.0"
diff --git a/benchmarks/BGE-small-en-v1.5.md b/benchmarks/BGE-small-en-v1.5.md
deleted file mode 100644
index 5a477e8..0000000
--- a/benchmarks/BGE-small-en-v1.5.md
+++ /dev/null
@@ -1,62 +0,0 @@
-# Benchmark: BAAI/bge-small-en-v1.5
-
-**Date**: 2024-11-24
-**Model**: `BAAI/bge-small-en-v1.5`
-**Variant**: `BGESmallENV15`
-**Dimensions**: 384
-**Quantized**: No
-
-## Indexing Performance
-
-| Metric | Value |
-|--------|-------|
-| Files indexed | 46 |
-| Chunks created | 592 |
-| Database size | 4.06 MB |
-| Avg per chunk | 7.03 KB |
-
-## Search Performance
-
-| Metric | Value |
-|--------|-------|
-| Database load | ~70µs |
-| Model load | ~150ms |
-| Query embed | ~4ms |
-| Search | ~600µs |
-| **Total latency** | **~155ms** |
-
-## Accuracy Tests
-
-| Query | Expected File | Top Result | Score | Correct |
-|-------|---------------|------------|-------|---------|
-| "SemanticChunker struct" | semantic.rs | `src/chunker/semantic.rs` SemanticChunker | **0.929** | ✅ |
-| "VectorStore insert chunks" | store.rs | `src/vectordb/store.rs` insert_chunks() | **0.912** | ✅ |
-| "tree-sitter grammar loading" | parser.rs | `src/chunker/parser.rs` | **0.903** | ✅ |
-| "extract function signature from AST" | extractor.rs | `src/chunker/extractor.rs` extract_signature() | **0.894** | ✅ |
-| "how do we detect binary files" | binary.rs | `src/file/binary.rs` | **0.909** | ✅ |
-| "where is the main entry point" | main.rs | `src/main.rs` main() | ✅ | ✅ |
-| "CLI argument parsing clap" | cli/mod.rs | `src/cli/mod.rs` Cli struct | ✅ | ✅ |
-| "FileWalker walk directory" | file walker | `examples/file_walker_demo.rs` | ✅ | ✅ |
-| "RustExtractor python typescript" | extractor.rs | `src/chunker/extractor.rs` | **0.894** | ✅ |
-
-### Edge Case (Non-existent content)
-
-| Query | Result | Score | Note |
-|-------|--------|-------|------|
-| "kubernetes deployment yaml" | PROJECT_STATUS.md | **0.825** | False positive, lower score |
-
-## Summary
-
-| Metric | Value |
-|--------|-------|
-| **Accuracy** | 9/9 (100%) |
-| **Avg score (correct)** | 0.90 |
-| **False positive score** | 0.825 |
-| **Suggested threshold** | 0.85 |
-
-## Notes
-
-- Excellent accuracy on code-related queries
-- Natural language questions work well
-- False positives have noticeably lower scores (~0.82 vs ~0.90)
-- Fast search latency after initial model load
diff --git a/benchmarks/FULL_BENCHMARK_SUMMARY.md b/benchmarks/FULL_BENCHMARK_SUMMARY.md
deleted file mode 100644
index 6296b87..0000000
--- a/benchmarks/FULL_BENCHMARK_SUMMARY.md
+++ /dev/null
@@ -1,69 +0,0 @@
-# DemonGrep Embedding Model Benchmark - Full Summary
-
-**Date**: 2025-11-25
-**Test Chunks**: ~607 chunks from codesearch codebase
-**Test Queries**: 9 semantic search queries
-
-## Results Summary (Sorted by Accuracy)
-
-| Model | Dims | Accuracy | Avg Score | Index Time | Query Time | Notes |
-|-------|------|----------|-----------|------------|------------|-------|
-| AllMiniLML6V2Q | 384 | **100%** | 0.554 | ~25s | 1.79ms | **BEST ACCURACY** - Quantized, fastest |
-| JinaEmbeddingsV2BaseCode | 768 | 89% | 0.714 | 73.6s | 10.5ms | Code-optimized, low false positives (0.34) |
-| MultilingualE5Small | 384 | 89% | 0.886 | 28.0s | 3.4ms | High scores but high false positive (0.90) |
-| BGESmallENV15 | 384 | 89% | 0.792 | ~30s | ~2ms | **DEFAULT** - Good balance |
-| BGESmallENV15Q | 384 | 89% | 0.792 | ~30s | ~2ms | Quantized version |
-| AllMiniLML6V2 | 384 | 78% | 0.556 | ~30s | ~3ms | Non-quantized |
-| AllMiniLML12V2Q | 384 | 78% | 0.570 | 25.8s | 1.8ms | Quantized L12 |
-| ParaphraseMLMiniLML12V2 | 384 | 78% | 0.598 | 30.6s | 2.8ms | Paraphrase-optimized |
-| NomicEmbedTextV1 | 768 | 78% | 0.624 | 72.7s | 11.7ms | |
-| NomicEmbedTextV15 | 768 | 78% | 0.666 | 68.4s | 11.7ms | |
-| NomicEmbedTextV15Q | 768 | 78% | 0.662 | 59.9s | 4.7ms | Quantized |
-| MxbaiEmbedLargeV1 | 1024 | 78% | 0.771 | 164.4s | 33.1ms | Large model |
-| BGEBaseENV15 | 768 | 67% | 0.792 | 64.8s | 8.3ms | |
-| AllMiniLML12V2 | 384 | 56% | 0.567 | ~25s | ~2ms | |
-| ModernBertEmbedLarge | 1024 | 56% | 0.699 | 203.1s | 30.6ms | Newest architecture, slow |
-
-**Skipped**: BGELargeENV15 (1024 dims) - Memory intensive
-
-## Key Findings
-
-### Top Performers for Code Search
-1. **AllMiniLML6V2Q** (100% accuracy) - Best overall, quantized = fast
-2. **JinaEmbeddingsV2BaseCode** (89%) - Code-specialized, excellent false positive resistance
-3. **BGESmallENV15** (89%) - Current default, good balance of speed and accuracy
-
-### Speed vs Quality Tradeoffs
-- **Fastest**: AllMiniLML6V2Q - 1.79ms query time, 25s indexing
-- **Slowest**: ModernBertEmbedLarge - 30.63ms query time, 203s indexing
-- **Best balance**: BGESmallENV15 - ~2ms query, ~30s indexing, 89% accuracy
-
-### Observations
-- Quantized models (Q suffix) are faster with minimal accuracy loss
-- Larger models (768/1024 dims) don't necessarily mean better code search accuracy
-- Code-specialized models (Jina) perform well on code search tasks
-- MultilingualE5Small has high scores but poor discrimination (0.90 false positive)
-
-## Recommendations
-
-| Use Case | Recommended Model |
-|----------|-------------------|
-| Best accuracy | AllMiniLML6V2Q |
-| Code-specific search | JinaEmbeddingsV2BaseCode |
-| Balanced (current default) | BGESmallENV15 |
-| Resource constrained | AllMiniLML6V2Q |
-| Need high semantic similarity | MultilingualE5Small (watch false positives) |
-
-## Test Queries Used
-
-1. "SemanticChunker struct" → src/chunker/semantic.rs
-2. "VectorStore insert chunks" → src/vectordb/store.rs
-3. "tree-sitter grammar loading" → src/chunker/parser.rs
-4. "extract function signature from AST" → src/chunker/extractor.rs
-5. "how do we detect binary files" → src/file/binary.rs
-6. "where is the main entry point" → src/main.rs
-7. "CLI argument parsing clap" → src/cli/mod.rs
-8. "FileWalker walk directory" → file_walker
-9. "RustExtractor python typescript" → src/chunker/extractor.rs
-
-False positive test: "kubernetes deployment yaml" (should score < 0.85)
diff --git a/benchmarks/benchmark-20251124-232718.md b/benchmarks/benchmark-20251124-232718.md
deleted file mode 100644
index db136d2..0000000
--- a/benchmarks/benchmark-20251124-232718.md
+++ /dev/null
@@ -1,67 +0,0 @@
-# Embedding Model Benchmark Results
-
-**Date**: 2025-11-24 23:27
-**Chunks**: 604
-
-## Summary
-
-| Model | Dims | Accuracy | Avg Score | Index Time | Query Time |
-|-------|------|----------|-----------|------------|------------|
-| bge-small | 384 | 89% | 0.792 | 30.86s | 2.96ms |
-| bge-small-q | 384 | 89% | 0.792 | 37.49s | 7.75ms |
-| minilm-l6 | 384 | 78% | 0.556 | 15.83s | 1.01ms |
-| minilm-l6-q | 384 | 100% | 0.554 | 13.20s | 1.05ms |
-| minilm-l12 | 384 | 56% | 0.567 | 30.80s | 3.20ms |
-
-## Individual Results
-
-### BAAI/bge-small-en-v1.5
-
-- **Dimensions**: 384
-- **Quantized**: false
-- **Model Load**: 145.721786ms
-- **Index Time**: 30.855070524s
-- **Accuracy**: 89%
-- **Avg Score**: 0.792
-- **False Positive Score**: 0.790
-
-### BAAI/bge-small-en-v1.5 (quantized)
-
-- **Dimensions**: 384
-- **Quantized**: true
-- **Model Load**: 3.774111036s
-- **Index Time**: 37.486437202s
-- **Accuracy**: 89%
-- **Avg Score**: 0.792
-- **False Positive Score**: 0.790
-
-### sentence-transformers/all-MiniLM-L6-v2
-
-- **Dimensions**: 384
-- **Quantized**: false
-- **Model Load**: 3.513947864s
-- **Index Time**: 15.828345778s
-- **Accuracy**: 78%
-- **Avg Score**: 0.556
-- **False Positive Score**: 0.675
-
-### sentence-transformers/all-MiniLM-L6-v2 (quantized)
-
-- **Dimensions**: 384
-- **Quantized**: true
-- **Model Load**: 2.949769726s
-- **Index Time**: 13.204097025s
-- **Accuracy**: 100%
-- **Avg Score**: 0.554
-- **False Positive Score**: 0.687
-
-### sentence-transformers/all-MiniLM-L12-v2
-
-- **Dimensions**: 384
-- **Quantized**: false
-- **Model Load**: 5.757348931s
-- **Index Time**: 30.802592408s
-- **Accuracy**: 56%
-- **Avg Score**: 0.567
-- **False Positive Score**: 0.618
-
diff --git a/benchmarks/benchmark-20251124-234722.md b/benchmarks/benchmark-20251124-234722.md
deleted file mode 100644
index b419f8f..0000000
--- a/benchmarks/benchmark-20251124-234722.md
+++ /dev/null
@@ -1,45 +0,0 @@
-# Embedding Model Benchmark Results
-
-**Date**: 2025-11-24 23:47
-**Chunks**: 605
-
-## Summary
-
-| Model | Dims | Accuracy | Avg Score | Index Time | Query Time |
-|-------|------|----------|-----------|------------|------------|
-| minilm-l12-q | 384 | 78% | 0.570 | 25.76s | 1.79ms |
-| paraphrase-minilm | 384 | 78% | 0.598 | 30.63s | 2.82ms |
-| bge-base | 768 | 67% | 0.792 | 64.83s | 8.29ms |
-
-## Individual Results
-
-### sentence-transformers/all-MiniLM-L12-v2 (quantized)
-
-- **Dimensions**: 384
-- **Quantized**: true
-- **Model Load**: 154.377983ms
-- **Index Time**: 25.762250925s
-- **Accuracy**: 78%
-- **Avg Score**: 0.570
-- **False Positive Score**: 0.644
-
-### sentence-transformers/paraphrase-MiniLM-L6-v2
-
-- **Dimensions**: 384
-- **Quantized**: false
-- **Model Load**: 1.624537749s
-- **Index Time**: 30.630321622s
-- **Accuracy**: 78%
-- **Avg Score**: 0.598
-- **False Positive Score**: 0.472
-
-### BAAI/bge-base-en-v1.5
-
-- **Dimensions**: 768
-- **Quantized**: false
-- **Model Load**: 1.019215842s
-- **Index Time**: 64.827749525s
-- **Accuracy**: 67%
-- **Avg Score**: 0.792
-- **False Positive Score**: 0.729
-
diff --git a/benchmarks/benchmark-20251125-103111.md b/benchmarks/benchmark-20251125-103111.md
deleted file mode 100644
index 597bb80..0000000
--- a/benchmarks/benchmark-20251125-103111.md
+++ /dev/null
@@ -1,45 +0,0 @@
-# Embedding Model Benchmark Results
-
-**Date**: 2025-11-25 10:31
-**Chunks**: 607
-
-## Summary
-
-| Model | Dims | Accuracy | Avg Score | Index Time | Query Time |
-|-------|------|----------|-----------|------------|------------|
-| nomic-v1 | 768 | 78% | 0.624 | 72.70s | 11.73ms |
-| nomic-v1.5 | 768 | 78% | 0.666 | 68.42s | 11.69ms |
-| nomic-v1.5-q | 768 | 78% | 0.662 | 59.93s | 4.66ms |
-
-## Individual Results
-
-### nomic-ai/nomic-embed-text-v1
-
-- **Dimensions**: 768
-- **Quantized**: false
-- **Model Load**: 1.362328404s
-- **Index Time**: 72.700093099s
-- **Accuracy**: 78%
-- **Avg Score**: 0.624
-- **False Positive Score**: 0.570
-
-### nomic-ai/nomic-embed-text-v1.5
-
-- **Dimensions**: 768
-- **Quantized**: false
-- **Model Load**: 1.240813506s
-- **Index Time**: 68.41701559s
-- **Accuracy**: 78%
-- **Avg Score**: 0.666
-- **False Positive Score**: 0.599
-
-### nomic-ai/nomic-embed-text-v1.5 (quantized)
-
-- **Dimensions**: 768
-- **Quantized**: true
-- **Model Load**: 3.38684491s
-- **Index Time**: 59.927164593s
-- **Accuracy**: 78%
-- **Avg Score**: 0.662
-- **False Positive Score**: 0.596
-
diff --git a/benchmarks/benchmark-20251125-103719.md b/benchmarks/benchmark-20251125-103719.md
deleted file mode 100644
index 051a3df..0000000
--- a/benchmarks/benchmark-20251125-103719.md
+++ /dev/null
@@ -1,45 +0,0 @@
-# Embedding Model Benchmark Results
-
-**Date**: 2025-11-25 10:37
-**Chunks**: 608
-
-## Summary
-
-| Model | Dims | Accuracy | Avg Score | Index Time | Query Time |
-|-------|------|----------|-----------|------------|------------|
-| jina-code | 768 | 89% | 0.714 | 73.60s | 10.48ms |
-| e5-multilingual | 384 | 89% | 0.886 | 27.97s | 3.42ms |
-| mxbai-large | 1024 | 78% | 0.771 | 164.38s | 33.09ms |
-
-## Individual Results
-
-### jinaai/jina-embeddings-v2-base-code
-
-- **Dimensions**: 768
-- **Quantized**: false
-- **Model Load**: 9.746430251s
-- **Index Time**: 73.604087631s
-- **Accuracy**: 89%
-- **Avg Score**: 0.714
-- **False Positive Score**: 0.341
-
-### intfloat/multilingual-e5-small
-
-- **Dimensions**: 384
-- **Quantized**: false
-- **Model Load**: 7.576603176s
-- **Index Time**: 27.968048427s
-- **Accuracy**: 89%
-- **Avg Score**: 0.886
-- **False Positive Score**: 0.897
-
-### mixedbread-ai/mxbai-embed-large-v1
-
-- **Dimensions**: 1024
-- **Quantized**: false
-- **Model Load**: 16.951440748s
-- **Index Time**: 164.379326267s
-- **Accuracy**: 78%
-- **Avg Score**: 0.771
-- **False Positive Score**: 0.705
-
diff --git a/benchmarks/benchmark-20251125-104204.md b/benchmarks/benchmark-20251125-104204.md
deleted file mode 100644
index 6657e93..0000000
--- a/benchmarks/benchmark-20251125-104204.md
+++ /dev/null
@@ -1,23 +0,0 @@
-# Embedding Model Benchmark Results
-
-**Date**: 2025-11-25 10:42
-**Chunks**: 609
-
-## Summary
-
-| Model | Dims | Accuracy | Avg Score | Index Time | Query Time |
-|-------|------|----------|-----------|------------|------------|
-| modernbert-large | 1024 | 56% | 0.699 | 203.07s | 30.63ms |
-
-## Individual Results
-
-### lightonai/modernbert-embed-large
-
-- **Dimensions**: 1024
-- **Quantized**: false
-- **Model Load**: 32.599241892s
-- **Index Time**: 203.070309573s
-- **Accuracy**: 56%
-- **Avg Score**: 0.699
-- **False Positive Score**: 0.591
-
diff --git a/benchmarks/demongrep_vs_osgrep.md b/benchmarks/demongrep_vs_osgrep.md
deleted file mode 100644
index cae4c5a..0000000
--- a/benchmarks/demongrep_vs_osgrep.md
+++ /dev/null
@@ -1,79 +0,0 @@
-# Benchmark: codesearch vs osgrep
-
-**Date**: 2025-11-25
-**Test Repository**: sharkdp/bat (cat clone with syntax highlighting)
-**Repository Size**: ~400 files, 2.6 MB
-
-## Tool Comparison
-
-| Feature | codesearch | osgrep |
-|---------|-----------|--------|
-| **Language** | Rust | TypeScript |
-| **Embedding Library** | fastembed (ONNX) | transformers.js |
-| **Vector Store** | arroy + LMDB | LanceDB |
-| **Default Model** | BGE-small-en-v1.5 (384d) | mxbai-embed-xsmall-v1 |
-| **Tested Model** | minilm-l6-q (384d) | mxbai-embed-xsmall-v1 |
-| **Chunking** | tree-sitter (native) | tree-sitter (WASM) |
-| **Reranking** | No (vector only) | Yes (RRF hybrid) |
-| **Parallelism** | Single-threaded embed | 8 worker threads |
-
-## Indexing Performance
-
-| Tool | Files | Chunks | Index Time | Speed |
-|------|-------|--------|------------|-------|
-| **codesearch** (minilm-l6-q) | 396 | 3,518 | **69s** | 51 chunks/sec |
-| **osgrep** | 426 | ? | **120s** | - |
-
-**codesearch is 1.7x faster** despite using single-threaded embedding vs osgrep's 8 workers.
-
-## Search Accuracy
-
-### Test Queries on bat repository
-
-| # | Query | Expected | codesearch | osgrep |
-|---|-------|----------|-----------|--------|
-| 1 | syntax highlighting theme | theme.rs | ✅ theme.rs | ❌ Makefile |
-| 2 | read file from stdin input | input.rs | ✅ input.rs | ❌ output.rs |
-| 3 | pager less integration | pager.rs | ✅ pager.rs | ❌ output.rs |
-| 4 | git diff decorations | diff.rs | ✅ decorations.rs | ❌ requirements.txt |
-| 5 | parse command line config | config.rs | ❌ 50-paru.toml | ❌ command.rs |
-| 6 | error handling Result | error.rs | ✅ error.rs | ❌ output.rs |
-
-### Results Summary
-
-| Tool | Accuracy | Correct | Total |
-|------|----------|---------|-------|
-| **codesearch** (minilm-l6-q) | **83%** | 5 | 6 |
-| **osgrep** | **0%** | 0 | 6 |
-
-## Analysis
-
-### Why codesearch outperforms osgrep:
-
-1. **Better embedding model**: minilm-l6-q (384 dims) appears to have better semantic understanding for code search than mxbai-embed-xsmall-v1
-
-2. **Focused on source code**: codesearch correctly prioritizes `src/` files over test files, while osgrep frequently returns files from `tests/syntax-tests/`
-
-3. **Native performance**: Rust + ONNX (fastembed) is faster than JavaScript + transformers.js even with 8x parallelism
-
-4. **Chunk quality**: codesearch's semantic chunking creates 3,518 meaningful chunks vs osgrep's approach
-
-### osgrep's potential advantages (not measured):
-
-- Hybrid search (RRF) combining vector + FTS
-- Reranking model for result refinement
-- Live file watching with incremental updates
-- Claude Code integration
-
-## Conclusion
-
-On this benchmark, **codesearch significantly outperforms osgrep** in both:
-- **Speed**: 1.7x faster indexing
-- **Accuracy**: 83% vs 0% on semantic code search queries
-
-The choice of embedding model appears to be the primary factor in accuracy differences. codesearch's minilm-l6-q model (which achieved 100% accuracy on its own codebase) proves to be excellent for code search tasks.
-
-## Recommendations
-
-1. **For codesearch**: Consider adding hybrid search (RRF) and reranking for potential accuracy improvements
-2. **For osgrep users**: The mxbai-embed-xsmall model may not be optimal for code search tasks
diff --git a/benchmarks/external_repo_bat.md b/benchmarks/external_repo_bat.md
deleted file mode 100644
index ffb65a7..0000000
--- a/benchmarks/external_repo_bat.md
+++ /dev/null
@@ -1,65 +0,0 @@
-# External Repo Benchmark: bat (sharkdp/bat)
-
-**Date**: 2025-11-25
-**Repository**: https://github.com/sharkdp/bat
-**Size**: 396 files, 3518 chunks, 2.6 MB
-
-## Test Queries
-
-| # | Query | Expected File |
-|---|-------|---------------|
-| 1 | syntax highlighting theme | theme.rs |
-| 2 | read file from stdin input | input.rs |
-| 3 | pager less integration | pager.rs or less.rs |
-| 4 | git diff decorations | diff.rs |
-| 5 | parse command line arguments config | config.rs |
-| 6 | error handling Result type | error.rs |
-
-## Results
-
-### minilm-l6-q (384 dims, quantized)
-- **Index Time**: 69s
-- **Accuracy**: 5/6 (83%)
-
-| Query | Result | Correct |
-|-------|--------|---------|
-| 1 | theme.rs | ✅ |
-| 2 | input.rs | ✅ |
-| 3 | pager.rs | ✅ |
-| 4 | decorations.rs | ✅ |
-| 5 | 50-paru.toml | ❌ |
-| 6 | error.rs | ✅ |
-
-### jina-code (768 dims, code-optimized)
-- **Index Time**: 363s (~5x slower)
-- **Accuracy**: 5/6 (83%)
-
-| Query | Result | Correct |
-|-------|--------|---------|
-| 1 | theme.rs | ✅ |
-| 2 | input.rs | ✅ |
-| 3 | pager.rs | ✅ |
-| 4 | diff.rs | ✅ |
-| 5 | config.rs | ✅ |
-| 6 | numpy_test_multiarray.py | ❌ |
-
-## Analysis
-
-Both models achieved 83% accuracy but on different queries:
-- **minilm-l6-q** correctly found error.rs but missed config.rs
-- **jina-code** correctly found config.rs and diff.rs but missed error.rs
-
-### Performance Comparison
-
-| Model | Dims | Index Time | Query Time | Accuracy |
-|-------|------|------------|------------|----------|
-| minilm-l6-q | 384 | 69s | ~2ms | 83% |
-| jina-code | 768 | 363s | ~10ms | 83% |
-
-### Recommendation
-
-For code search tasks:
-- **minilm-l6-q** offers best speed/accuracy tradeoff (5x faster indexing)
-- **jina-code** may be better for specific code-related queries but much slower
-
-The default BGE-small model (89% on codesearch codebase) is also a good balanced choice.
diff --git a/benchmarks/improvement-plan.md b/benchmarks/improvement-plan.md
deleted file mode 100644
index 40e31c6..0000000
--- a/benchmarks/improvement-plan.md
+++ /dev/null
@@ -1,22 +0,0 @@
-# Codesearch Improvement Plan — Lessons from Benchmark
-
-**Date:** 2026-02-12  
-**Based on:** 20-query benchmark (BOIN.Aprimo C# + Codesearch Rust)  
-**Overall score:** Codesearch 0.61 vs Grep 0.52 — but with critical gaps
-
----
-
-## Executive Summary
-
-Codesearch wins 5/7 categories but has two glaring weaknesses: **exact name matching** (Cat A: 0.29 vs 0.99) and **structural patterns** (Cat B: 0.66 vs 1.00). Both are solvable without fundamental architecture changes. The root causes are:
-
-1. **FTS (Tantivy) is underutilized** — it indexes content but doesn't boost exact identifier matches
-2. **No language-aware filtering** — JavaScript noise pollutes C# results
-3. **RRF fusion treats all signals equally** — no special weight for exact matches
-4. **No project-level language metadata** — the index knows `files_by_language` at walk time but doesn't persist or use it at search time
-
-Below are 7 concrete improvements, ordered by impact, with code-level guidance for your codebase.
-
----
-
-See full plan in the rendered markdown file.
diff --git a/benchmarks/mcp-tool-description-improvements.md b/benchmarks/mcp-tool-description-improvements.md
deleted file mode 100644
index f7a1334..0000000
--- a/benchmarks/mcp-tool-description-improvements.md
+++ /dev/null
@@ -1,7 +0,0 @@
-# MCP Tool Description Improvements for Codesearch
-
-**Problem:** Agents don't know which tool to use for which query type, leading to
-semantic_search being called for exact name lookups (where it scores 0.00) instead
-of find_references (which scores 0.90+).
-
-See full analysis in the outputs file.
diff --git a/benchmarks/test_external_repo.sh b/benchmarks/test_external_repo.sh
deleted file mode 100644
index bb61b51..0000000
--- a/benchmarks/test_external_repo.sh
+++ /dev/null
@@ -1,105 +0,0 @@
-#!/bin/bash
-# Benchmark demongrep on external repo (bat) with top 5 models
-
-REPO_PATH="/tmp/bat"
-DEMONGREP="./target/release/demongrep"
-
-# Top 5 models from our benchmark
-MODELS=("minilm-l6-q" "jina-code" "e5-multilingual" "bge-small" "bge-small-q")
-
-# Test queries for bat codebase (cat clone with syntax highlighting)
-# Format: "query|expected_file_pattern"
-QUERIES=(
-    "syntax highlighting theme|theme"
-    "read file from stdin|input"
-    "pager less integration|less"
-    "git diff decorations|diff"
-    "parse command line arguments|config"
-    "error handling Result|error"
-    "Controller print output|controller"
-    "asset loading syntaxes|assets"
-)
-
-echo "=========================================="
-echo "DEMONGREP EXTERNAL REPO BENCHMARK"
-echo "Repository: bat (sharkdp/bat)"
-echo "=========================================="
-echo ""
-
-# Results file
-RESULTS_FILE="benchmarks/external_repo_results.md"
-echo "# External Repo Benchmark: bat" > $RESULTS_FILE
-echo "" >> $RESULTS_FILE
-echo "**Date**: $(date '+%Y-%m-%d %H:%M')" >> $RESULTS_FILE
-echo "**Repository**: sharkdp/bat" >> $RESULTS_FILE
-echo "" >> $RESULTS_FILE
-echo "## Results Summary" >> $RESULTS_FILE
-echo "" >> $RESULTS_FILE
-echo "| Model | Accuracy | Index Time | Avg Query Time |" >> $RESULTS_FILE
-echo "|-------|----------|------------|----------------|" >> $RESULTS_FILE
-
-for MODEL in "${MODELS[@]}"; do
-    echo ""
-    echo "=========================================="
-    echo "Testing model: $MODEL"
-    echo "=========================================="
-
-    # Clear any existing index
-    rm -rf "$REPO_PATH/.demongrep.db"
-
-    # Index with this model
-    echo "Indexing..."
-    INDEX_START=$(date +%s.%N)
-    $DEMONGREP --model $MODEL index --path $REPO_PATH 2>&1 | grep -E "(chunks|Embedding|Total:)"
-    INDEX_END=$(date +%s.%N)
-    INDEX_TIME=$(echo "$INDEX_END - $INDEX_START" | bc)
-
-    # Run test queries
-    CORRECT=0
-    TOTAL=${#QUERIES[@]}
-    QUERY_TIMES=()
-
-    echo ""
-    echo "Running ${TOTAL} test queries..."
-
-    for QUERY_PAIR in "${QUERIES[@]}"; do
-        QUERY=$(echo $QUERY_PAIR | cut -d'|' -f1)
-        EXPECTED=$(echo $QUERY_PAIR | cut -d'|' -f2)
-
-        QUERY_START=$(date +%s.%N)
-        RESULT=$($DEMONGREP search "$QUERY" --path $REPO_PATH --compact 2>&1 | grep -v "INFO" | head -1)
-        QUERY_END=$(date +%s.%N)
-        QUERY_TIME=$(echo "$QUERY_END - $QUERY_START" | bc)
-        QUERY_TIMES+=($QUERY_TIME)
-
-        if echo "$RESULT" | grep -qi "$EXPECTED"; then
-            echo "  ✅ \"$QUERY\" -> $RESULT"
-            ((CORRECT++))
-        else
-            echo "  ❌ \"$QUERY\" -> $RESULT (expected: *$EXPECTED*)"
-        fi
-    done
-
-    # Calculate average query time
-    TOTAL_QUERY_TIME=0
-    for T in "${QUERY_TIMES[@]}"; do
-        TOTAL_QUERY_TIME=$(echo "$TOTAL_QUERY_TIME + $T" | bc)
-    done
-    AVG_QUERY_TIME=$(echo "scale=3; $TOTAL_QUERY_TIME / $TOTAL" | bc)
-
-    ACCURACY=$(echo "scale=0; $CORRECT * 100 / $TOTAL" | bc)
-
-    echo ""
-    echo "Results for $MODEL:"
-    echo "  Accuracy: $ACCURACY% ($CORRECT/$TOTAL)"
-    echo "  Index time: ${INDEX_TIME}s"
-    echo "  Avg query time: ${AVG_QUERY_TIME}s"
-
-    # Add to results file
-    echo "| $MODEL | $ACCURACY% | ${INDEX_TIME}s | ${AVG_QUERY_TIME}s |" >> $RESULTS_FILE
-done
-
-echo ""
-echo "=========================================="
-echo "Benchmark complete! Results saved to $RESULTS_FILE"
-echo "=========================================="

From d29a820d87ec8b8affe023f3731bd8a2958af984 Mon Sep 17 00:00:00 2001
From: flupkede <flupkede@users.noreply.github.com>
Date: Sat, 2 May 2026 00:13:17 +0200
Subject: [PATCH 3/6] docs(changelog): add 1.0.76 release notes

---
 CHANGELOG.md | 9 +++++++++
 Cargo.lock   | 2 +-
 Cargo.toml   | 2 +-
 3 files changed, 11 insertions(+), 2 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 25fe694..1fdf8ce 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -5,6 +5,14 @@ All notable changes to this project will be documented in this file.
 The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
 and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 
+## [1.0.77] - 2026-05-01
+
+### Removed
+
+- Stale planning documents (`.docs/`) and old benchmark results (`benchmarks/`)
+  removed from the repository. These were internal working documents with no
+  value for contributors.
+
 ## [1.0.74] - 2026-05-01
 
 ### Fixed
@@ -87,6 +95,7 @@ repositories.
 - `codesearch serve` keeps one writer per database (LMDB invariant). Concurrent
   reindex from a second process is rejected.
 
+[1.0.77]: https://github.com/flupkede/codesearch/compare/v1.0.75...v1.0.77
 [1.0.75]: https://github.com/flupkede/codesearch/compare/v1.0.74...v1.0.75
 [1.0.74]: https://github.com/flupkede/codesearch/compare/v1.0.72...v1.0.74
 [1.0.72]: https://github.com/flupkede/codesearch/releases/tag/v1.0.72
diff --git a/Cargo.lock b/Cargo.lock
index 09a9020..3ff0d6d 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -634,7 +634,7 @@ dependencies = [
 
 [[package]]
 name = "codesearch"
-version = "1.0.76"
+version = "1.0.78"
 dependencies = [
  "anyhow",
  "arroy",
diff --git a/Cargo.toml b/Cargo.toml
index 085a4e3..03f354f 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "codesearch"
-version = "1.0.76"
+version = "1.0.78"
 edition = "2021"
 authors = ["codesearch contributors"]
 license = "Apache-2.0"

From 2777e0cb1973a07542c404a24640b6e05472905c Mon Sep 17 00:00:00 2001
From: flupkede <flupkede@users.noreply.github.com>
Date: Sat, 2 May 2026 13:09:49 +0200
Subject: [PATCH 4/6] fix(serve): start idle timer at warmup so unused warm
 repos get evicted

`evict_idle_repos` only iterates `self.last_access`. `warmup_repo`
deliberately did not write to `last_access` (its comment said "warmup is
not a real query, idle timer should only reset on real query"), but the
side effect was that warmed-but-never-queried repos never appeared in
`last_access` at all and therefore stayed warm forever \u2014 holding open
LMDB environments and embedder state.

Visible symptom in the TUI: aliases like KRKA.Aprimo, DPS, BAYR.Aprimo
showed status `warm` with `Last Tool Call = -` indefinitely, while
aliases that had been touched (e.g. aprimo_mcp, investing) correctly
transitioned to `closed` after the idle window.

Fix: have `warmup_repo` start the idle timer at warmup. A real query
still resets the timer via `touch_access`. A repo that is never queried
gets evicted after the idle timeout, which is the desired behaviour.
---
 Cargo.lock       | 2 +-
 Cargo.toml       | 2 +-
 src/serve/mod.rs | 8 ++++++--
 3 files changed, 8 insertions(+), 4 deletions(-)

diff --git a/Cargo.lock b/Cargo.lock
index 3ff0d6d..0344a18 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -634,7 +634,7 @@ dependencies = [
 
 [[package]]
 name = "codesearch"
-version = "1.0.78"
+version = "1.0.79"
 dependencies = [
  "anyhow",
  "arroy",
diff --git a/Cargo.toml b/Cargo.toml
index 03f354f..fbc7432 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "codesearch"
-version = "1.0.78"
+version = "1.0.79"
 edition = "2021"
 authors = ["codesearch contributors"]
 license = "Apache-2.0"
diff --git a/src/serve/mod.rs b/src/serve/mod.rs
index e327c8e..5900e55 100644
--- a/src/serve/mod.rs
+++ b/src/serve/mod.rs
@@ -492,10 +492,14 @@ impl ServeState {
         });
 
         // Store as Warm — FSW will be started lazily on first query.
-        // Do NOT touch_access: warmup is background activity, not a real query.
-        // The idle timer should only reset when a user/agent actually queries this repo.
         self.repos
             .insert(alias.to_string(), RepoState::Warm { stores: stores_arc });
+
+        // Start the idle timer at warmup. A real query will reset it via
+        // touch_access; without this, repos that are warmed but never queried
+        // would never appear in `last_access` and therefore never be evicted
+        // by `evict_idle_repos`, holding LMDB envs and embedder state forever.
+        self.touch_access(alias);
         Ok(())
     }
 

From a8cbfaea51156e23b14cae6aaf391417855b1800 Mon Sep 17 00:00:00 2001
From: flupkede <flupkede@users.noreply.github.com>
Date: Sat, 2 May 2026 14:21:01 +0200
Subject: [PATCH 5/6] fix(serve): evict warm-but-never-queried repos; drop
 Ctrl-C TUI quit

- warmup_repo now calls touch_access so warmed-but-never-queried repos
  appear in last_access and get evicted by the idle reaper. Without this,
  background-warmed aliases stayed Warm forever.
- is_quit_key drops the Ctrl-C arm. crossterm's raw mode delivers Ctrl-C
  as a key event (ENABLE_PROCESSED_INPUT off), so treating it as quit
  tore down the whole serve from a stray terminal Ctrl-C. Use q only.
---
 Cargo.lock       |  2 +-
 Cargo.toml       |  2 +-
 src/serve/tui.rs | 15 ++++++++-------
 3 files changed, 10 insertions(+), 9 deletions(-)

diff --git a/Cargo.lock b/Cargo.lock
index 0344a18..6d33562 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -634,7 +634,7 @@ dependencies = [
 
 [[package]]
 name = "codesearch"
-version = "1.0.79"
+version = "1.0.80"
 dependencies = [
  "anyhow",
  "arroy",
diff --git a/Cargo.toml b/Cargo.toml
index fbc7432..8eb2a47 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "codesearch"
-version = "1.0.79"
+version = "1.0.80"
 edition = "2021"
 authors = ["codesearch contributors"]
 license = "Apache-2.0"
diff --git a/src/serve/tui.rs b/src/serve/tui.rs
index 4a92f23..d3ae888 100644
--- a/src/serve/tui.rs
+++ b/src/serve/tui.rs
@@ -14,7 +14,7 @@ use ratatui::text::{Line, Span};
 use ratatui::widgets::{Block, Borders, Cell, Row, Table, TableState};
 use ratatui::Terminal;
 
-use crossterm::event::{self, Event, KeyCode, KeyEvent, KeyModifiers};
+use crossterm::event::{self, Event, KeyCode, KeyEvent};
 use crossterm::terminal::{self, EnterAlternateScreen, LeaveAlternateScreen};
 
 use tokio_util::sync::CancellationToken;
@@ -27,7 +27,7 @@ use super::ServeState;
 
 /// Run the fullscreen TUI.  Spawns as a tokio task from `run_serve`.
 ///
-/// Returns `Ok(())` when the user presses `q` / `Ctrl-C`, or when the
+/// Returns `Ok(())` when the user presses `q`, or when the
 /// `cancel_token` is cancelled externally (e.g. Ctrl-C from the main task).
 ///
 /// Terminal restoration is guaranteed on normal exit and on errors.
@@ -154,11 +154,12 @@ fn restore_terminal(terminal: &mut Terminal<CrosstermBackend<io::Stdout>>) -> io
 // ---------------------------------------------------------------------------
 
 fn is_quit_key(key: KeyEvent) -> bool {
-    match key.code {
-        KeyCode::Char('q') => true,
-        KeyCode::Char('c') if key.modifiers.contains(KeyModifiers::CONTROL) => true,
-        _ => false,
-    }
+    // Ctrl-C is intentionally NOT a quit key here. crossterm's raw mode delivers
+    // it as a key event (ENABLE_PROCESSED_INPUT off on Windows / ISIG off on Unix),
+    // so the OS-level ctrlc::set_handler in main.rs is bypassed while the TUI runs.
+    // Treating Ctrl-C as quit was a foot-gun: a stray Ctrl-C in the wrong terminal
+    // would tear down the whole serve process. Use `q` instead.
+    matches!(key.code, KeyCode::Char('q'))
 }
 
 fn handle_key(key: KeyEvent, table_state: &mut TableState, row_count: usize) {

From 908697eb147a65c147dd3d3a0af6e6dfa1ae4100 Mon Sep 17 00:00:00 2001
From: flupkede <flupkede@users.noreply.github.com>
Date: Sat, 2 May 2026 14:54:20 +0200
Subject: [PATCH 6/6] docs(unsafe): add SAFETY comments to LMDB env-open blocks

---
 Cargo.lock            |  2 +-
 Cargo.toml            |  2 +-
 src/embed/cache.rs    |  5 +++++
 src/vectordb/store.rs | 13 +++++++++++++
 4 files changed, 20 insertions(+), 2 deletions(-)

diff --git a/Cargo.lock b/Cargo.lock
index 6d33562..00954d4 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -634,7 +634,7 @@ dependencies = [
 
 [[package]]
 name = "codesearch"
-version = "1.0.80"
+version = "1.0.81"
 dependencies = [
  "anyhow",
  "arroy",
diff --git a/Cargo.toml b/Cargo.toml
index 8eb2a47..686e623 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "codesearch"
-version = "1.0.80"
+version = "1.0.81"
 edition = "2021"
 authors = ["codesearch contributors"]
 license = "Apache-2.0"
diff --git a/src/embed/cache.rs b/src/embed/cache.rs
index 5d63e78..b50f4f9 100644
--- a/src/embed/cache.rs
+++ b/src/embed/cache.rs
@@ -308,6 +308,11 @@ impl PersistentEmbeddingCache {
             )
         })?;
 
+        // SAFETY: heed's `EnvOpenOptions::open` is unsafe because the caller must
+        // ensure no other process maps this LMDB environment with incompatible options
+        // (different map_size or flags) at the same time. The cache directory is
+        // process-private under the user's codesearch state directory, and we open it
+        // exactly once per process via this constructor.
         let env = unsafe {
             EnvOpenOptions::new()
                 .map_size(512 * 1024 * 1024) // 512MB — plenty for cache
diff --git a/src/vectordb/store.rs b/src/vectordb/store.rs
index 74c86c9..7816730 100644
--- a/src/vectordb/store.rs
+++ b/src/vectordb/store.rs
@@ -283,6 +283,12 @@ impl VectorStore {
         // one repo has been resized.  Use the max of persisted, env-var, and
         // default to never shrink below what was previously allocated.
         let map_size_mb = resolve_map_size(db_path);
+        // SAFETY: heed's `EnvOpenOptions::open` is unsafe because the caller must
+        // ensure no other process maps this LMDB environment with incompatible options
+        // at the same time. codesearch enforces single-writer-per-DB at the application
+        // level (one `serve` process per machine, and the CLI rejects concurrent
+        // reindex). The map_size is reconciled across opens via `resolve_map_size`
+        // above, so we never reopen with a smaller map than was previously persisted.
         let env = unsafe {
             EnvOpenOptions::new()
                 .map_size(map_size_mb * 1024 * 1024)
@@ -361,6 +367,13 @@ impl VectorStore {
         // Open LMDB environment in read-only mode
         // Use same map-size resolution as new() for consistency
         let map_size_mb = resolve_map_size(db_path);
+        // SAFETY: heed's `EnvOpenOptions::open` is unsafe because of LMDB's mmap
+        // contract; see the SAFETY comment on the read-write `new()` above. This
+        // open is read-only (`EnvFlags::READ_ONLY`), so it cannot conflict with a
+        // concurrent writer's map_size, only with stale handles after a resize —
+        // which is acceptable because the writer's resize logic explicitly
+        // rebuilds the env (see `resize_map` below) before any reader is invited
+        // to reopen.
         let env = unsafe {
             EnvOpenOptions::new()
                 .map_size(map_size_mb * 1024 * 1024)