From ff56d03a28e53eff611061fdced38bb673229535 Mon Sep 17 00:00:00 2001 From: flupkede Date: Fri, 1 May 2026 23:33:40 +0200 Subject: [PATCH 1/6] docs(changelog): remove empty Unreleased section, fix links --- CHANGELOG.md | 4 +--- Cargo.lock | 2 +- Cargo.toml | 2 +- 3 files changed, 3 insertions(+), 5 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index cb51c30..25fe694 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,8 +5,6 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). -## [Unreleased] - ## [1.0.74] - 2026-05-01 ### Fixed @@ -89,6 +87,6 @@ repositories. - `codesearch serve` keeps one writer per database (LMDB invariant). Concurrent reindex from a second process is rejected. -[Unreleased]: https://github.com/flupkede/codesearch/compare/v1.0.74...develop +[1.0.75]: https://github.com/flupkede/codesearch/compare/v1.0.74...v1.0.75 [1.0.74]: https://github.com/flupkede/codesearch/compare/v1.0.72...v1.0.74 [1.0.72]: https://github.com/flupkede/codesearch/releases/tag/v1.0.72 diff --git a/Cargo.lock b/Cargo.lock index 3055fc7..09a9020 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -634,7 +634,7 @@ dependencies = [ [[package]] name = "codesearch" -version = "1.0.75" +version = "1.0.76" dependencies = [ "anyhow", "arroy", diff --git a/Cargo.toml b/Cargo.toml index 5c66c0e..085a4e3 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "codesearch" -version = "1.0.75" +version = "1.0.76" edition = "2021" authors = ["codesearch contributors"] license = "Apache-2.0" From 87f6b55c76a2705cbc62d4a5ee6cfcb48e7156b2 Mon Sep 17 00:00:00 2001 From: flupkede Date: Fri, 1 May 2026 23:59:04 +0200 Subject: [PATCH 2/6] chore: remove stale planning docs (.docs/) and old benchmark results (benchmarks/) --- .docs/MCP_HELP_SYSTEM.md | 321 ------------------ AGENTS.md | 198 ++++------- Cargo.lock | 2 +- Cargo.toml | 2 +- benchmarks/BGE-small-en-v1.5.md | 62 ---- benchmarks/FULL_BENCHMARK_SUMMARY.md | 69 ---- benchmarks/benchmark-20251124-232718.md | 67 ---- benchmarks/benchmark-20251124-234722.md | 45 --- benchmarks/benchmark-20251125-103111.md | 45 --- benchmarks/benchmark-20251125-103719.md | 45 --- benchmarks/benchmark-20251125-104204.md | 23 -- benchmarks/demongrep_vs_osgrep.md | 79 ----- benchmarks/external_repo_bat.md | 65 ---- benchmarks/improvement-plan.md | 22 -- .../mcp-tool-description-improvements.md | 7 - benchmarks/test_external_repo.sh | 105 ------ 16 files changed, 61 insertions(+), 1096 deletions(-) delete mode 100644 .docs/MCP_HELP_SYSTEM.md delete mode 100644 benchmarks/BGE-small-en-v1.5.md delete mode 100644 benchmarks/FULL_BENCHMARK_SUMMARY.md delete mode 100644 benchmarks/benchmark-20251124-232718.md delete mode 100644 benchmarks/benchmark-20251124-234722.md delete mode 100644 benchmarks/benchmark-20251125-103111.md delete mode 100644 benchmarks/benchmark-20251125-103719.md delete mode 100644 benchmarks/benchmark-20251125-104204.md delete mode 100644 benchmarks/demongrep_vs_osgrep.md delete mode 100644 benchmarks/external_repo_bat.md delete mode 100644 benchmarks/improvement-plan.md delete mode 100644 benchmarks/mcp-tool-description-improvements.md delete mode 100644 benchmarks/test_external_repo.sh diff --git a/.docs/MCP_HELP_SYSTEM.md b/.docs/MCP_HELP_SYSTEM.md deleted file mode 100644 index 6e0868d..0000000 --- a/.docs/MCP_HELP_SYSTEM.md +++ /dev/null @@ -1,321 +0,0 @@ -# MCP Help System Implementation Summary - -## Questions Answered - -### 1. Can we add `--help` to the mcp command? - -**Yes, but it's already available!** - -Since `mcp` is a clap subcommand, users can run: - -```bash -codesearch mcp --help -``` - -This displays: -```text -Start MCP server for Claude Code integration - -Usage: codesearch [OPTIONS] mcp [PATH] - -Arguments: - [PATH] Path to project (defaults to current directory) - -Options: - -h, --help Print help - -v, --verbose Enable verbose output - -q, --quiet Suppress informational output -``` - -### 2. Is there a specific tool an agent calls to get help from an MCP? - -**No standard "help" tool exists in the MCP protocol.** - -However, MCP servers have an `instructions` field in their server info that's automatically displayed when the AI assistant connects to the server. - -## Implementation Details - -### Before Enhancement - -The original MCP server had minimal instructions: - -```rust -instructions: Some( - "codesearch is a semantic code search tool. Use semantic_search to find code \ - by meaning, get_file_chunks to see all chunks in a file, and index_status \ - to check if the index is ready." - .to_string(), -), -``` - -### After Enhancement - -I've expanded the instructions to include comprehensive help: - -```rust -instructions: Some( - format!(r#" -codesearch - Semantic Code Search MCP Server - -OVERVIEW: -codesearch provides fast, local semantic code search using natural language queries. -Search your codebase by meaning, not just by keywords. - -AVAILABLE TOOLS: - -1. semantic_search(query, limit=10) - Search the codebase using natural language queries. - Query examples: - - "where do we handle user authentication?" - - "how is error logging implemented?" - - "functions that process payment data" - - "database connection management" - Returns: Array of matches with path, line numbers, code content, and relevance scores. - -2. get_file_chunks(path) - Get all indexed chunks from a specific file. - Useful for understanding the complete structure of a file. - Returns: All chunks from the file with full context. - -3. index_status() - Check if the index exists and get database statistics. - Use this before searching to verify the index is ready. - Returns: Index status, total chunks, files, model info, and dimensions. - -USAGE PATTERNS: - -Understanding a New Codebase: - 1. Check index_status() to verify index is ready - 2. Search for core concepts: semantic_search("main application entry point") - 3. Explore patterns: semantic_search("error handling strategy") - 4. Get detailed view: get_file_chunks("src/main.rs") - -Finding Implementation Patterns: - - semantic_search("how are API endpoints defined?") - - semantic_search("database model definitions") - - get_file_chunks("src/models/user.rs") - -Debugging and Analysis: - - semantic_search("error handling for database operations") - - semantic_search("user input validation") - -Implementing New Features: - - semantic_search("authentication handling code") - Find reference implementations - - semantic_search("configuration management") - Understand patterns - - get_file_chunks("src/config.rs") - See detailed implementation - -BEST PRACTICES: - -✓ Use natural language queries describing concepts, not exact terms -✓ Check index_status() before searching -✓ Use specific queries with context (e.g., "API layer error handling" vs "error handling") -✓ Combine semantic_search() with get_file_chunks() for detailed analysis -✓ Start with broader queries, then narrow down - -✗ Avoid short, vague queries like "auth" or "db" (use grep for exact matches) -✗ Don't expect exact string matching (that's what grep is for) - -PERFORMANCE: -- Search speed: ~75ms after initial model load -- First search: ~2-3s (model loading time) -- Indexing: 30-60s for initial, incremental updates are instant - -SETUP: -If this MCP server doesn't find an index, the user needs to run: - codesearch index - -For detailed documentation, visit: https://github.com/yxanul/codesearch - -Current database: {db} -Model: {model} -Dimensions: {dims} -"#, - db = self.db_path.display(), - model = self.model_type.short_name(), - dims = self.dimensions - ) -), -``` - -## How AI Assistants Access MCP Help - -### Automatic Display - -When Claude Code (or other MCP-compatible assistant) connects to the codesearch MCP server, it automatically: - -1. Calls the server's `info` endpoint -2. Receives the `instructions` field -3. Displays this to the user or uses it internally to understand available tools - -### No Explicit Help Call Needed - -Unlike CLI tools where you type `--help`, MCP help is: -- Automatically provided on connection -- Available through the assistant's UI -- Can be queried by asking: "What tools does codesearch provide?" - -### Practical Usage - -In Claude Code, you might ask: - -``` -> What can I do with codesearch? -> Show me help for the codesearch MCP server -> How do I search code with codesearch? -``` - -Claude will use the `instructions` to answer. - -## Key Improvements Made - -### 1. Comprehensive Tool Documentation -- Detailed descriptions of each tool -- Parameter specifications -- Return value explanations -- Usage examples - -### 2. Usage Patterns -- Real-world workflows for common tasks -- Step-by-step examples -- Different use cases (understanding, debugging, implementing) - -### 3. Best Practices -- Do's and don'ts for effective queries -- Performance considerations -- Common pitfalls to avoid - -### 4. Dynamic Information -- Current database path -- Active model type -- Vector dimensions - -### 5. Setup Instructions -- Quick start guide -- Troubleshooting hints -- Link to full documentation - -## Comparison: CLI Help vs MCP Help - -| Aspect | CLI (`codesearch mcp --help`) | MCP Instructions | -|--------|----------------------------|------------------| -| **When shown** | When user explicitly requests | On server connection | -| **Audience** | Humans setting up MCP | AI assistants | -| **Content** | Command syntax & flags | Tool usage & examples | -| **Updates** | Static | Can include runtime info | -| **User control** | Explicit (`--help`) | Automatic | - -## Future Enhancements - -### Potential Additions - -1. **Interactive Help Tool** - ```rust - #[tool(description = "Get detailed help and usage examples for codesearch tools")] - async fn help(&self) -> Result { - // Return comprehensive help documentation - } - ``` - -2. **Tool-Specific Help** - ```rust - #[tool(description = "Get help for a specific tool")] - async fn tool_help(&self, Parameters(req): Parameters) -> Result { - // Return detailed help for requested tool - } - ``` - -3. **Example Queries** - ```rust - #[tool(description = "Get example search queries for common scenarios")] - async fn example_queries(&self) -> Result { - // Return curated query examples - } - ``` - -### Why Not Implemented Yet? - -- Current instructions cover most use cases -- Keep MCP interface simple (3 core tools) -- Can add if user feedback indicates need - -## Testing the Help System - -### 1. Build and Run - -```bash -# Build the project -cargo build --release - -# Start MCP server -./target/release/codesearch mcp /path/to/project -``` - -### 2. Verify Help Content - -The help will be displayed when: -- You connect Claude Code to the MCP server -- You ask "What can codesearch do?" -- You request MCP server info - -### 3. Check Dynamic Content - -The help includes runtime information: -- Database path -- Model type (e.g., `minilm-l6-q`, `jina-code`) -- Vector dimensions (384, 768, 1024) - -## Integration with Documentation - -### File Hierarchy - -``` -codesearch/ -├── README.md # General overview -├── AI_AGENT_CLI_INSTRUCTIONS.md # CLI usage for AI agents -├── MCP_INSTRUCTIONS.md # Setup guide for MCP -├── MCP_HELP_SYSTEM.md # This file -└── src/mcp/mod.rs # Enhanced help in code -``` - -### Documentation Flow - -1. **New User** → README.md → MCP_INSTRUCTIONS.md → Setup -2. **AI Agent** → AI_AGENT_CLI_INSTRUCTIONS.md → Integration -3. **Running MCP** → Automatic help on connection -4. **Need More** → Full documentation at GitHub - -## Summary - -### ✅ What We Have - -1. **CLI Help**: `codesearch mcp --help` - Basic command help -2. **MCP Instructions**: Comprehensive help displayed on connection -3. **Dynamic Info**: Runtime data included in help -4. **Usage Patterns**: Real-world workflows documented - -### 📝 What Works - -- AI assistants automatically receive help on connection -- Users can query "What can codesearch do?" -- Examples and best practices included -- Performance and setup information provided - -### 🚀 What's Possible - -- Add interactive `help()` tool if needed -- Add `tool_help()` for tool-specific documentation -- Add `example_queries()` for curated query examples -- Expand based on user feedback - -## Conclusion - -The MCP help system is now comprehensive and user-friendly. AI assistants like Claude Code automatically receive detailed instructions when connecting to the codesearch MCP server, including: - -- Available tools and their usage -- Real-world usage patterns -- Best practices and pitfalls -- Performance characteristics -- Setup instructions -- Dynamic runtime information - -No explicit help call is needed - it's all automatic! diff --git a/AGENTS.md b/AGENTS.md index debc17c..9997559 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -1,163 +1,83 @@ -# AGENTS.md — features/fixes branch plan +# AGENTS.md — features/cleanup -## Branch: `features/fixes` -**Base:** `develop` -**Goal:** Fix idle eviction bug + improve search quality to reduce agent grep fallback +## Goal ---- - -## Fix 1: Idle eviction — `get_or_open_stores` touches ALL repos on fan-out - -### Problem - -`get_or_open_stores()` calls `touch_access()` unconditionally (lines 456, 470 in `src/serve/mod.rs`). -When `get_chunk` is called without `project`/`group` (`allow_unscoped=true`), `resolve_repo_stores_multi` -fans out to ALL repos via `get_or_open_stores()` — resetting the idle timer on every repo. - -Result: repos are never idle, reaper never evicts. The 30-minute timeout is effectively disabled -whenever any agent uses `get_chunk` without explicit project scope. - -Same issue affects `status` tool with explicit project/group (goes through `get_or_open_stores`), -though the unscoped `status` path uses `repo_statuses_lightweight()` which is safe. - -### Fix: Add `touch: bool` parameter to `get_or_open_stores` - -**File:** `src/serve/mod.rs` +Remove stale planning documents and old benchmark results that have no value +for contributors or users of the released codebase. These directories were +useful during development but are now clutter. -1. Change signature: `pub(crate) async fn get_or_open_stores(&self, alias: &str, touch: bool)` -2. Only call `self.touch_access(alias)` when `touch == true` -3. Update all call sites: - - `warmup_repo` (line 456) → `touch: false` (warmup should NOT reset idle timer) - - `get_or_open_stores` fast path (line 470) → keep `touch: true` (direct query access) - - `resolve_repo_stores_multi` fan-out (line 3113 in `src/mcp/mod.rs`) → `touch: false` - - `resolve_repo_stores_multi` single project (line 3130) → `touch: true` - - `resolve_repo_stores_multi` group members (line 3141) → `touch: false` - - Lazy FSW transition (line ~487) → `touch: true` (first real query) - - `spawn_fsw_for_warm` (line ~722) → `touch: false` - - `reindex_handler` (lines 1166, 1211) → `touch: true` - - Test call sites → `touch: true` +## Scope -4. After `get_chunk` candidate detection resolves to a single repo, explicitly call - `serve_state.touch_access(&resolved_alias)` for just that repo. - -5. After group fan-out search completes, touch only repos that contributed results - (or touch all group members — acceptable since the agent explicitly requested the group). - -### Validation -- `cargo check && cargo clippy --all-targets -- -D warnings` -- `cargo test --lib` -- Manual: start serve with 3+ repos, call `get_chunk` without project, verify reaper log - shows idle ages increasing (not resetting) for untouched repos +This branch touches **only** file deletions — no source code, no Cargo.toml, +no tests. `cargo check` is not required (no Rust changes). --- -## Fix 2: Search quality — reduce agent grep fallback - -### Problem - -Agents fall back to `grep` when `codesearch_search` returns poor or zero results. -Root causes: - -1. **Top-N cutoff too aggressive** — retrieval pool is `limit * 3`, fusion drops relevant results -2. **Exact identifier boost too weak** — `EXACT_MATCH_RRF_K = 5.0` doesn't sufficiently - prioritize exact code matches over semantic similarity -3. **No auto-fallback** — when semantic search returns few results, no automatic literal retry -4. **minilm-l6 weak on code** — embedding model is NL-trained, code identifiers get poor vectors. - Not fixable without model change, but compensated by stronger FTS fusion. - -### Fix 2a: Increase retrieval pool - -**File:** `src/mcp/mod.rs` +## Tasks -Change all `limit * 3` to `limit * 5` in the semantic search pipeline. -This gives the RRF fusion more candidates to work with, reducing the chance -that a relevant result falls outside the retrieval window. +### 1. Delete `.docs/` directory (entire tree) -Affected locations (all in `src/mcp/mod.rs`): -- Line 3698: `store.search(&query_embedding, limit * 3)` → `limit * 5` -- Line 3753: `fts_store.search(&request.query, limit * 3, ...)` → `limit * 5` -- Line 3864: `fts_store.search(&request.query, limit * 3, ...)` → `limit * 5` -- Line 3925: `store.search(&query_embedding, limit * 3)` → `limit * 5` -- Line 3955: `fts_store.search(&request.query, limit * 3, ...)` → `limit * 5` -- Line 4108: `fts_store.search(&request.query, limit * 3, ...)` → `limit * 5` +Remove all files under `.docs/` including the `done/` subdirectory: -Also in `src/search/mod.rs` (CLI search path) — same pattern. - -Leave `search_exact` at `limit * 2` (exact matches are already high-precision). -Leave `search_phrase` at `limit * 3` (phrase search is already precise). - -### Fix 2b: Stronger exact identifier boost - -**File:** `src/rerank/mod.rs` - -Change `EXACT_MATCH_RRF_K` from `5.0` to `2.0`. - -Lower K = steeper rank curve = exact matches get proportionally higher RRF scores. -At K=5, an exact match at rank 1 gets score `1/(5+1) = 0.167`. -At K=2, an exact match at rank 1 gets score `1/(2+1) = 0.333` — 2x stronger signal. - -This ensures that when an agent searches for `"evict_idle_repos"`, the chunk containing -that exact identifier dominates the fusion result even if the embedding similarity is low. - -### Fix 2c: Auto-fallback to literal search +``` +.docs/MCP_HELP_SYSTEM.md +.docs/opencode-reload-commands-pr.md +.docs/plan-implementation.md +.docs/plan-testing.md +.docs/done/benchmarks-improvement-plan.md +.docs/done/codesearch-improvement-plan.md +.docs/done/LMDBResilience_GitAware_IndexCompact.md +.docs/done/old-plan-review.md +.docs/done/old-testplan.md +.docs/done/plan-embedding-cache.md +.docs/done/plan-review.md +``` -**File:** `src/mcp/mod.rs`, in `semantic_search()` (line ~3620) +### 2. Delete `benchmarks/` directory (entire tree) -After the hybrid search completes and results are built: +Remove all files under `benchmarks/`: -```rust -// If semantic/hybrid returned fewer than 3 results and query looks like code, -// auto-fallback to literal search and merge results. -if results.len() < 3 && has_identifiers { - // Try literal FTS search as fallback - let literal_results = fts_store.search(&request.query, limit, None)?; - // Deduplicate by chunk_id and append - for lr in literal_results { - if !results.iter().any(|r| r.id == lr.chunk_id) { - // Convert FtsResult to SearchResult and append - } - } -} +``` +benchmarks/benchmark-20251124-232718.md +benchmarks/benchmark-20251124-234722.md +benchmarks/benchmark-20251125-103111.md +benchmarks/benchmark-20251125-103719.md +benchmarks/benchmark-20251125-104204.md +benchmarks/BGE-small-en-v1.5.md +benchmarks/demongrep_vs_osgrep.md +benchmarks/external_repo_bat.md +benchmarks/FULL_BENCHMARK_SUMMARY.md +benchmarks/improvement-plan.md +benchmarks/mcp-tool-description-improvements.md +benchmarks/test_external_repo.sh ``` -Implementation details: -- Only trigger when `results.len() < 3` AND `has_identifiers` (code-like query) -- Use `with_fts_store_read_for` to run the fallback FTS search -- Deduplicate by `chunk_id` before merging -- Cap total results at `limit` -- Log when fallback triggers: `tracing::debug!("Auto-fallback: semantic returned {} results, trying literal", results.len())` +### 3. Commit -### Fix 2d: Increase `search_exact` retrieval for identifiers +``` +git rm -r .docs benchmarks +git commit -m "chore: remove stale planning docs and old benchmark results" +git push origin features/cleanup +``` -**File:** `src/mcp/mod.rs` +### 4. Update CHANGELOG.md -Change `search_exact(ident, limit * 2, ...)` to `search_exact(ident, limit * 3, ...)` -in the identifier boost paths (lines 3762, 3876, 3968, 4120). +Add a line under a new `## [Unreleased]` section — or add to the next release +section if one already exists: -More exact candidates = better chance the right chunk survives RRF fusion. +```markdown +### Removed -### Validation -- `cargo check && cargo clippy --all-targets -- -D warnings` -- `cargo test --lib` -- Manual test queries that previously required grep fallback: - - `codesearch search "evict_idle_repos"` — should find the function - - `codesearch search "touch_access"` — should find the method - - `codesearch search "Database cleared"` — should find the log message (already fixed by AND mode) - - `codesearch search "EXACT_MATCH_RRF_K"` — should find the constant +- Stale planning documents (`.docs/`) and old benchmark results (`benchmarks/`) + removed from the repository. These were internal working documents with no + value for contributors. +``` --- -## Execution order - -1. **Fix 1** — idle eviction (`touch` parameter) -2. **Fix 2a** — retrieval pool `limit * 5` -3. **Fix 2b** — `EXACT_MATCH_RRF_K` = 2.0 -4. **Fix 2c** — auto-fallback to literal -5. **Fix 2d** — `search_exact` retrieval `limit * 3` -6. Validate all together -7. Commit - -## Commits +## Done when -One commit per fix, or group 2a-2d into a single "search quality" commit. -Prefer: 2 commits total (Fix 1 + Fix 2). +- [ ] `.docs/` directory no longer exists in the repository +- [ ] `benchmarks/` directory no longer exists in the repository +- [ ] Commit on `features/cleanup` pushed to origin +- [ ] CHANGELOG.md updated diff --git a/Cargo.lock b/Cargo.lock index 3055fc7..09a9020 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -634,7 +634,7 @@ dependencies = [ [[package]] name = "codesearch" -version = "1.0.75" +version = "1.0.76" dependencies = [ "anyhow", "arroy", diff --git a/Cargo.toml b/Cargo.toml index 5c66c0e..085a4e3 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "codesearch" -version = "1.0.75" +version = "1.0.76" edition = "2021" authors = ["codesearch contributors"] license = "Apache-2.0" diff --git a/benchmarks/BGE-small-en-v1.5.md b/benchmarks/BGE-small-en-v1.5.md deleted file mode 100644 index 5a477e8..0000000 --- a/benchmarks/BGE-small-en-v1.5.md +++ /dev/null @@ -1,62 +0,0 @@ -# Benchmark: BAAI/bge-small-en-v1.5 - -**Date**: 2024-11-24 -**Model**: `BAAI/bge-small-en-v1.5` -**Variant**: `BGESmallENV15` -**Dimensions**: 384 -**Quantized**: No - -## Indexing Performance - -| Metric | Value | -|--------|-------| -| Files indexed | 46 | -| Chunks created | 592 | -| Database size | 4.06 MB | -| Avg per chunk | 7.03 KB | - -## Search Performance - -| Metric | Value | -|--------|-------| -| Database load | ~70µs | -| Model load | ~150ms | -| Query embed | ~4ms | -| Search | ~600µs | -| **Total latency** | **~155ms** | - -## Accuracy Tests - -| Query | Expected File | Top Result | Score | Correct | -|-------|---------------|------------|-------|---------| -| "SemanticChunker struct" | semantic.rs | `src/chunker/semantic.rs` SemanticChunker | **0.929** | ✅ | -| "VectorStore insert chunks" | store.rs | `src/vectordb/store.rs` insert_chunks() | **0.912** | ✅ | -| "tree-sitter grammar loading" | parser.rs | `src/chunker/parser.rs` | **0.903** | ✅ | -| "extract function signature from AST" | extractor.rs | `src/chunker/extractor.rs` extract_signature() | **0.894** | ✅ | -| "how do we detect binary files" | binary.rs | `src/file/binary.rs` | **0.909** | ✅ | -| "where is the main entry point" | main.rs | `src/main.rs` main() | ✅ | ✅ | -| "CLI argument parsing clap" | cli/mod.rs | `src/cli/mod.rs` Cli struct | ✅ | ✅ | -| "FileWalker walk directory" | file walker | `examples/file_walker_demo.rs` | ✅ | ✅ | -| "RustExtractor python typescript" | extractor.rs | `src/chunker/extractor.rs` | **0.894** | ✅ | - -### Edge Case (Non-existent content) - -| Query | Result | Score | Note | -|-------|--------|-------|------| -| "kubernetes deployment yaml" | PROJECT_STATUS.md | **0.825** | False positive, lower score | - -## Summary - -| Metric | Value | -|--------|-------| -| **Accuracy** | 9/9 (100%) | -| **Avg score (correct)** | 0.90 | -| **False positive score** | 0.825 | -| **Suggested threshold** | 0.85 | - -## Notes - -- Excellent accuracy on code-related queries -- Natural language questions work well -- False positives have noticeably lower scores (~0.82 vs ~0.90) -- Fast search latency after initial model load diff --git a/benchmarks/FULL_BENCHMARK_SUMMARY.md b/benchmarks/FULL_BENCHMARK_SUMMARY.md deleted file mode 100644 index 6296b87..0000000 --- a/benchmarks/FULL_BENCHMARK_SUMMARY.md +++ /dev/null @@ -1,69 +0,0 @@ -# DemonGrep Embedding Model Benchmark - Full Summary - -**Date**: 2025-11-25 -**Test Chunks**: ~607 chunks from codesearch codebase -**Test Queries**: 9 semantic search queries - -## Results Summary (Sorted by Accuracy) - -| Model | Dims | Accuracy | Avg Score | Index Time | Query Time | Notes | -|-------|------|----------|-----------|------------|------------|-------| -| AllMiniLML6V2Q | 384 | **100%** | 0.554 | ~25s | 1.79ms | **BEST ACCURACY** - Quantized, fastest | -| JinaEmbeddingsV2BaseCode | 768 | 89% | 0.714 | 73.6s | 10.5ms | Code-optimized, low false positives (0.34) | -| MultilingualE5Small | 384 | 89% | 0.886 | 28.0s | 3.4ms | High scores but high false positive (0.90) | -| BGESmallENV15 | 384 | 89% | 0.792 | ~30s | ~2ms | **DEFAULT** - Good balance | -| BGESmallENV15Q | 384 | 89% | 0.792 | ~30s | ~2ms | Quantized version | -| AllMiniLML6V2 | 384 | 78% | 0.556 | ~30s | ~3ms | Non-quantized | -| AllMiniLML12V2Q | 384 | 78% | 0.570 | 25.8s | 1.8ms | Quantized L12 | -| ParaphraseMLMiniLML12V2 | 384 | 78% | 0.598 | 30.6s | 2.8ms | Paraphrase-optimized | -| NomicEmbedTextV1 | 768 | 78% | 0.624 | 72.7s | 11.7ms | | -| NomicEmbedTextV15 | 768 | 78% | 0.666 | 68.4s | 11.7ms | | -| NomicEmbedTextV15Q | 768 | 78% | 0.662 | 59.9s | 4.7ms | Quantized | -| MxbaiEmbedLargeV1 | 1024 | 78% | 0.771 | 164.4s | 33.1ms | Large model | -| BGEBaseENV15 | 768 | 67% | 0.792 | 64.8s | 8.3ms | | -| AllMiniLML12V2 | 384 | 56% | 0.567 | ~25s | ~2ms | | -| ModernBertEmbedLarge | 1024 | 56% | 0.699 | 203.1s | 30.6ms | Newest architecture, slow | - -**Skipped**: BGELargeENV15 (1024 dims) - Memory intensive - -## Key Findings - -### Top Performers for Code Search -1. **AllMiniLML6V2Q** (100% accuracy) - Best overall, quantized = fast -2. **JinaEmbeddingsV2BaseCode** (89%) - Code-specialized, excellent false positive resistance -3. **BGESmallENV15** (89%) - Current default, good balance of speed and accuracy - -### Speed vs Quality Tradeoffs -- **Fastest**: AllMiniLML6V2Q - 1.79ms query time, 25s indexing -- **Slowest**: ModernBertEmbedLarge - 30.63ms query time, 203s indexing -- **Best balance**: BGESmallENV15 - ~2ms query, ~30s indexing, 89% accuracy - -### Observations -- Quantized models (Q suffix) are faster with minimal accuracy loss -- Larger models (768/1024 dims) don't necessarily mean better code search accuracy -- Code-specialized models (Jina) perform well on code search tasks -- MultilingualE5Small has high scores but poor discrimination (0.90 false positive) - -## Recommendations - -| Use Case | Recommended Model | -|----------|-------------------| -| Best accuracy | AllMiniLML6V2Q | -| Code-specific search | JinaEmbeddingsV2BaseCode | -| Balanced (current default) | BGESmallENV15 | -| Resource constrained | AllMiniLML6V2Q | -| Need high semantic similarity | MultilingualE5Small (watch false positives) | - -## Test Queries Used - -1. "SemanticChunker struct" → src/chunker/semantic.rs -2. "VectorStore insert chunks" → src/vectordb/store.rs -3. "tree-sitter grammar loading" → src/chunker/parser.rs -4. "extract function signature from AST" → src/chunker/extractor.rs -5. "how do we detect binary files" → src/file/binary.rs -6. "where is the main entry point" → src/main.rs -7. "CLI argument parsing clap" → src/cli/mod.rs -8. "FileWalker walk directory" → file_walker -9. "RustExtractor python typescript" → src/chunker/extractor.rs - -False positive test: "kubernetes deployment yaml" (should score < 0.85) diff --git a/benchmarks/benchmark-20251124-232718.md b/benchmarks/benchmark-20251124-232718.md deleted file mode 100644 index db136d2..0000000 --- a/benchmarks/benchmark-20251124-232718.md +++ /dev/null @@ -1,67 +0,0 @@ -# Embedding Model Benchmark Results - -**Date**: 2025-11-24 23:27 -**Chunks**: 604 - -## Summary - -| Model | Dims | Accuracy | Avg Score | Index Time | Query Time | -|-------|------|----------|-----------|------------|------------| -| bge-small | 384 | 89% | 0.792 | 30.86s | 2.96ms | -| bge-small-q | 384 | 89% | 0.792 | 37.49s | 7.75ms | -| minilm-l6 | 384 | 78% | 0.556 | 15.83s | 1.01ms | -| minilm-l6-q | 384 | 100% | 0.554 | 13.20s | 1.05ms | -| minilm-l12 | 384 | 56% | 0.567 | 30.80s | 3.20ms | - -## Individual Results - -### BAAI/bge-small-en-v1.5 - -- **Dimensions**: 384 -- **Quantized**: false -- **Model Load**: 145.721786ms -- **Index Time**: 30.855070524s -- **Accuracy**: 89% -- **Avg Score**: 0.792 -- **False Positive Score**: 0.790 - -### BAAI/bge-small-en-v1.5 (quantized) - -- **Dimensions**: 384 -- **Quantized**: true -- **Model Load**: 3.774111036s -- **Index Time**: 37.486437202s -- **Accuracy**: 89% -- **Avg Score**: 0.792 -- **False Positive Score**: 0.790 - -### sentence-transformers/all-MiniLM-L6-v2 - -- **Dimensions**: 384 -- **Quantized**: false -- **Model Load**: 3.513947864s -- **Index Time**: 15.828345778s -- **Accuracy**: 78% -- **Avg Score**: 0.556 -- **False Positive Score**: 0.675 - -### sentence-transformers/all-MiniLM-L6-v2 (quantized) - -- **Dimensions**: 384 -- **Quantized**: true -- **Model Load**: 2.949769726s -- **Index Time**: 13.204097025s -- **Accuracy**: 100% -- **Avg Score**: 0.554 -- **False Positive Score**: 0.687 - -### sentence-transformers/all-MiniLM-L12-v2 - -- **Dimensions**: 384 -- **Quantized**: false -- **Model Load**: 5.757348931s -- **Index Time**: 30.802592408s -- **Accuracy**: 56% -- **Avg Score**: 0.567 -- **False Positive Score**: 0.618 - diff --git a/benchmarks/benchmark-20251124-234722.md b/benchmarks/benchmark-20251124-234722.md deleted file mode 100644 index b419f8f..0000000 --- a/benchmarks/benchmark-20251124-234722.md +++ /dev/null @@ -1,45 +0,0 @@ -# Embedding Model Benchmark Results - -**Date**: 2025-11-24 23:47 -**Chunks**: 605 - -## Summary - -| Model | Dims | Accuracy | Avg Score | Index Time | Query Time | -|-------|------|----------|-----------|------------|------------| -| minilm-l12-q | 384 | 78% | 0.570 | 25.76s | 1.79ms | -| paraphrase-minilm | 384 | 78% | 0.598 | 30.63s | 2.82ms | -| bge-base | 768 | 67% | 0.792 | 64.83s | 8.29ms | - -## Individual Results - -### sentence-transformers/all-MiniLM-L12-v2 (quantized) - -- **Dimensions**: 384 -- **Quantized**: true -- **Model Load**: 154.377983ms -- **Index Time**: 25.762250925s -- **Accuracy**: 78% -- **Avg Score**: 0.570 -- **False Positive Score**: 0.644 - -### sentence-transformers/paraphrase-MiniLM-L6-v2 - -- **Dimensions**: 384 -- **Quantized**: false -- **Model Load**: 1.624537749s -- **Index Time**: 30.630321622s -- **Accuracy**: 78% -- **Avg Score**: 0.598 -- **False Positive Score**: 0.472 - -### BAAI/bge-base-en-v1.5 - -- **Dimensions**: 768 -- **Quantized**: false -- **Model Load**: 1.019215842s -- **Index Time**: 64.827749525s -- **Accuracy**: 67% -- **Avg Score**: 0.792 -- **False Positive Score**: 0.729 - diff --git a/benchmarks/benchmark-20251125-103111.md b/benchmarks/benchmark-20251125-103111.md deleted file mode 100644 index 597bb80..0000000 --- a/benchmarks/benchmark-20251125-103111.md +++ /dev/null @@ -1,45 +0,0 @@ -# Embedding Model Benchmark Results - -**Date**: 2025-11-25 10:31 -**Chunks**: 607 - -## Summary - -| Model | Dims | Accuracy | Avg Score | Index Time | Query Time | -|-------|------|----------|-----------|------------|------------| -| nomic-v1 | 768 | 78% | 0.624 | 72.70s | 11.73ms | -| nomic-v1.5 | 768 | 78% | 0.666 | 68.42s | 11.69ms | -| nomic-v1.5-q | 768 | 78% | 0.662 | 59.93s | 4.66ms | - -## Individual Results - -### nomic-ai/nomic-embed-text-v1 - -- **Dimensions**: 768 -- **Quantized**: false -- **Model Load**: 1.362328404s -- **Index Time**: 72.700093099s -- **Accuracy**: 78% -- **Avg Score**: 0.624 -- **False Positive Score**: 0.570 - -### nomic-ai/nomic-embed-text-v1.5 - -- **Dimensions**: 768 -- **Quantized**: false -- **Model Load**: 1.240813506s -- **Index Time**: 68.41701559s -- **Accuracy**: 78% -- **Avg Score**: 0.666 -- **False Positive Score**: 0.599 - -### nomic-ai/nomic-embed-text-v1.5 (quantized) - -- **Dimensions**: 768 -- **Quantized**: true -- **Model Load**: 3.38684491s -- **Index Time**: 59.927164593s -- **Accuracy**: 78% -- **Avg Score**: 0.662 -- **False Positive Score**: 0.596 - diff --git a/benchmarks/benchmark-20251125-103719.md b/benchmarks/benchmark-20251125-103719.md deleted file mode 100644 index 051a3df..0000000 --- a/benchmarks/benchmark-20251125-103719.md +++ /dev/null @@ -1,45 +0,0 @@ -# Embedding Model Benchmark Results - -**Date**: 2025-11-25 10:37 -**Chunks**: 608 - -## Summary - -| Model | Dims | Accuracy | Avg Score | Index Time | Query Time | -|-------|------|----------|-----------|------------|------------| -| jina-code | 768 | 89% | 0.714 | 73.60s | 10.48ms | -| e5-multilingual | 384 | 89% | 0.886 | 27.97s | 3.42ms | -| mxbai-large | 1024 | 78% | 0.771 | 164.38s | 33.09ms | - -## Individual Results - -### jinaai/jina-embeddings-v2-base-code - -- **Dimensions**: 768 -- **Quantized**: false -- **Model Load**: 9.746430251s -- **Index Time**: 73.604087631s -- **Accuracy**: 89% -- **Avg Score**: 0.714 -- **False Positive Score**: 0.341 - -### intfloat/multilingual-e5-small - -- **Dimensions**: 384 -- **Quantized**: false -- **Model Load**: 7.576603176s -- **Index Time**: 27.968048427s -- **Accuracy**: 89% -- **Avg Score**: 0.886 -- **False Positive Score**: 0.897 - -### mixedbread-ai/mxbai-embed-large-v1 - -- **Dimensions**: 1024 -- **Quantized**: false -- **Model Load**: 16.951440748s -- **Index Time**: 164.379326267s -- **Accuracy**: 78% -- **Avg Score**: 0.771 -- **False Positive Score**: 0.705 - diff --git a/benchmarks/benchmark-20251125-104204.md b/benchmarks/benchmark-20251125-104204.md deleted file mode 100644 index 6657e93..0000000 --- a/benchmarks/benchmark-20251125-104204.md +++ /dev/null @@ -1,23 +0,0 @@ -# Embedding Model Benchmark Results - -**Date**: 2025-11-25 10:42 -**Chunks**: 609 - -## Summary - -| Model | Dims | Accuracy | Avg Score | Index Time | Query Time | -|-------|------|----------|-----------|------------|------------| -| modernbert-large | 1024 | 56% | 0.699 | 203.07s | 30.63ms | - -## Individual Results - -### lightonai/modernbert-embed-large - -- **Dimensions**: 1024 -- **Quantized**: false -- **Model Load**: 32.599241892s -- **Index Time**: 203.070309573s -- **Accuracy**: 56% -- **Avg Score**: 0.699 -- **False Positive Score**: 0.591 - diff --git a/benchmarks/demongrep_vs_osgrep.md b/benchmarks/demongrep_vs_osgrep.md deleted file mode 100644 index cae4c5a..0000000 --- a/benchmarks/demongrep_vs_osgrep.md +++ /dev/null @@ -1,79 +0,0 @@ -# Benchmark: codesearch vs osgrep - -**Date**: 2025-11-25 -**Test Repository**: sharkdp/bat (cat clone with syntax highlighting) -**Repository Size**: ~400 files, 2.6 MB - -## Tool Comparison - -| Feature | codesearch | osgrep | -|---------|-----------|--------| -| **Language** | Rust | TypeScript | -| **Embedding Library** | fastembed (ONNX) | transformers.js | -| **Vector Store** | arroy + LMDB | LanceDB | -| **Default Model** | BGE-small-en-v1.5 (384d) | mxbai-embed-xsmall-v1 | -| **Tested Model** | minilm-l6-q (384d) | mxbai-embed-xsmall-v1 | -| **Chunking** | tree-sitter (native) | tree-sitter (WASM) | -| **Reranking** | No (vector only) | Yes (RRF hybrid) | -| **Parallelism** | Single-threaded embed | 8 worker threads | - -## Indexing Performance - -| Tool | Files | Chunks | Index Time | Speed | -|------|-------|--------|------------|-------| -| **codesearch** (minilm-l6-q) | 396 | 3,518 | **69s** | 51 chunks/sec | -| **osgrep** | 426 | ? | **120s** | - | - -**codesearch is 1.7x faster** despite using single-threaded embedding vs osgrep's 8 workers. - -## Search Accuracy - -### Test Queries on bat repository - -| # | Query | Expected | codesearch | osgrep | -|---|-------|----------|-----------|--------| -| 1 | syntax highlighting theme | theme.rs | ✅ theme.rs | ❌ Makefile | -| 2 | read file from stdin input | input.rs | ✅ input.rs | ❌ output.rs | -| 3 | pager less integration | pager.rs | ✅ pager.rs | ❌ output.rs | -| 4 | git diff decorations | diff.rs | ✅ decorations.rs | ❌ requirements.txt | -| 5 | parse command line config | config.rs | ❌ 50-paru.toml | ❌ command.rs | -| 6 | error handling Result | error.rs | ✅ error.rs | ❌ output.rs | - -### Results Summary - -| Tool | Accuracy | Correct | Total | -|------|----------|---------|-------| -| **codesearch** (minilm-l6-q) | **83%** | 5 | 6 | -| **osgrep** | **0%** | 0 | 6 | - -## Analysis - -### Why codesearch outperforms osgrep: - -1. **Better embedding model**: minilm-l6-q (384 dims) appears to have better semantic understanding for code search than mxbai-embed-xsmall-v1 - -2. **Focused on source code**: codesearch correctly prioritizes `src/` files over test files, while osgrep frequently returns files from `tests/syntax-tests/` - -3. **Native performance**: Rust + ONNX (fastembed) is faster than JavaScript + transformers.js even with 8x parallelism - -4. **Chunk quality**: codesearch's semantic chunking creates 3,518 meaningful chunks vs osgrep's approach - -### osgrep's potential advantages (not measured): - -- Hybrid search (RRF) combining vector + FTS -- Reranking model for result refinement -- Live file watching with incremental updates -- Claude Code integration - -## Conclusion - -On this benchmark, **codesearch significantly outperforms osgrep** in both: -- **Speed**: 1.7x faster indexing -- **Accuracy**: 83% vs 0% on semantic code search queries - -The choice of embedding model appears to be the primary factor in accuracy differences. codesearch's minilm-l6-q model (which achieved 100% accuracy on its own codebase) proves to be excellent for code search tasks. - -## Recommendations - -1. **For codesearch**: Consider adding hybrid search (RRF) and reranking for potential accuracy improvements -2. **For osgrep users**: The mxbai-embed-xsmall model may not be optimal for code search tasks diff --git a/benchmarks/external_repo_bat.md b/benchmarks/external_repo_bat.md deleted file mode 100644 index ffb65a7..0000000 --- a/benchmarks/external_repo_bat.md +++ /dev/null @@ -1,65 +0,0 @@ -# External Repo Benchmark: bat (sharkdp/bat) - -**Date**: 2025-11-25 -**Repository**: https://github.com/sharkdp/bat -**Size**: 396 files, 3518 chunks, 2.6 MB - -## Test Queries - -| # | Query | Expected File | -|---|-------|---------------| -| 1 | syntax highlighting theme | theme.rs | -| 2 | read file from stdin input | input.rs | -| 3 | pager less integration | pager.rs or less.rs | -| 4 | git diff decorations | diff.rs | -| 5 | parse command line arguments config | config.rs | -| 6 | error handling Result type | error.rs | - -## Results - -### minilm-l6-q (384 dims, quantized) -- **Index Time**: 69s -- **Accuracy**: 5/6 (83%) - -| Query | Result | Correct | -|-------|--------|---------| -| 1 | theme.rs | ✅ | -| 2 | input.rs | ✅ | -| 3 | pager.rs | ✅ | -| 4 | decorations.rs | ✅ | -| 5 | 50-paru.toml | ❌ | -| 6 | error.rs | ✅ | - -### jina-code (768 dims, code-optimized) -- **Index Time**: 363s (~5x slower) -- **Accuracy**: 5/6 (83%) - -| Query | Result | Correct | -|-------|--------|---------| -| 1 | theme.rs | ✅ | -| 2 | input.rs | ✅ | -| 3 | pager.rs | ✅ | -| 4 | diff.rs | ✅ | -| 5 | config.rs | ✅ | -| 6 | numpy_test_multiarray.py | ❌ | - -## Analysis - -Both models achieved 83% accuracy but on different queries: -- **minilm-l6-q** correctly found error.rs but missed config.rs -- **jina-code** correctly found config.rs and diff.rs but missed error.rs - -### Performance Comparison - -| Model | Dims | Index Time | Query Time | Accuracy | -|-------|------|------------|------------|----------| -| minilm-l6-q | 384 | 69s | ~2ms | 83% | -| jina-code | 768 | 363s | ~10ms | 83% | - -### Recommendation - -For code search tasks: -- **minilm-l6-q** offers best speed/accuracy tradeoff (5x faster indexing) -- **jina-code** may be better for specific code-related queries but much slower - -The default BGE-small model (89% on codesearch codebase) is also a good balanced choice. diff --git a/benchmarks/improvement-plan.md b/benchmarks/improvement-plan.md deleted file mode 100644 index 40e31c6..0000000 --- a/benchmarks/improvement-plan.md +++ /dev/null @@ -1,22 +0,0 @@ -# Codesearch Improvement Plan — Lessons from Benchmark - -**Date:** 2026-02-12 -**Based on:** 20-query benchmark (BOIN.Aprimo C# + Codesearch Rust) -**Overall score:** Codesearch 0.61 vs Grep 0.52 — but with critical gaps - ---- - -## Executive Summary - -Codesearch wins 5/7 categories but has two glaring weaknesses: **exact name matching** (Cat A: 0.29 vs 0.99) and **structural patterns** (Cat B: 0.66 vs 1.00). Both are solvable without fundamental architecture changes. The root causes are: - -1. **FTS (Tantivy) is underutilized** — it indexes content but doesn't boost exact identifier matches -2. **No language-aware filtering** — JavaScript noise pollutes C# results -3. **RRF fusion treats all signals equally** — no special weight for exact matches -4. **No project-level language metadata** — the index knows `files_by_language` at walk time but doesn't persist or use it at search time - -Below are 7 concrete improvements, ordered by impact, with code-level guidance for your codebase. - ---- - -See full plan in the rendered markdown file. diff --git a/benchmarks/mcp-tool-description-improvements.md b/benchmarks/mcp-tool-description-improvements.md deleted file mode 100644 index f7a1334..0000000 --- a/benchmarks/mcp-tool-description-improvements.md +++ /dev/null @@ -1,7 +0,0 @@ -# MCP Tool Description Improvements for Codesearch - -**Problem:** Agents don't know which tool to use for which query type, leading to -semantic_search being called for exact name lookups (where it scores 0.00) instead -of find_references (which scores 0.90+). - -See full analysis in the outputs file. diff --git a/benchmarks/test_external_repo.sh b/benchmarks/test_external_repo.sh deleted file mode 100644 index bb61b51..0000000 --- a/benchmarks/test_external_repo.sh +++ /dev/null @@ -1,105 +0,0 @@ -#!/bin/bash -# Benchmark demongrep on external repo (bat) with top 5 models - -REPO_PATH="/tmp/bat" -DEMONGREP="./target/release/demongrep" - -# Top 5 models from our benchmark -MODELS=("minilm-l6-q" "jina-code" "e5-multilingual" "bge-small" "bge-small-q") - -# Test queries for bat codebase (cat clone with syntax highlighting) -# Format: "query|expected_file_pattern" -QUERIES=( - "syntax highlighting theme|theme" - "read file from stdin|input" - "pager less integration|less" - "git diff decorations|diff" - "parse command line arguments|config" - "error handling Result|error" - "Controller print output|controller" - "asset loading syntaxes|assets" -) - -echo "==========================================" -echo "DEMONGREP EXTERNAL REPO BENCHMARK" -echo "Repository: bat (sharkdp/bat)" -echo "==========================================" -echo "" - -# Results file -RESULTS_FILE="benchmarks/external_repo_results.md" -echo "# External Repo Benchmark: bat" > $RESULTS_FILE -echo "" >> $RESULTS_FILE -echo "**Date**: $(date '+%Y-%m-%d %H:%M')" >> $RESULTS_FILE -echo "**Repository**: sharkdp/bat" >> $RESULTS_FILE -echo "" >> $RESULTS_FILE -echo "## Results Summary" >> $RESULTS_FILE -echo "" >> $RESULTS_FILE -echo "| Model | Accuracy | Index Time | Avg Query Time |" >> $RESULTS_FILE -echo "|-------|----------|------------|----------------|" >> $RESULTS_FILE - -for MODEL in "${MODELS[@]}"; do - echo "" - echo "==========================================" - echo "Testing model: $MODEL" - echo "==========================================" - - # Clear any existing index - rm -rf "$REPO_PATH/.demongrep.db" - - # Index with this model - echo "Indexing..." - INDEX_START=$(date +%s.%N) - $DEMONGREP --model $MODEL index --path $REPO_PATH 2>&1 | grep -E "(chunks|Embedding|Total:)" - INDEX_END=$(date +%s.%N) - INDEX_TIME=$(echo "$INDEX_END - $INDEX_START" | bc) - - # Run test queries - CORRECT=0 - TOTAL=${#QUERIES[@]} - QUERY_TIMES=() - - echo "" - echo "Running ${TOTAL} test queries..." - - for QUERY_PAIR in "${QUERIES[@]}"; do - QUERY=$(echo $QUERY_PAIR | cut -d'|' -f1) - EXPECTED=$(echo $QUERY_PAIR | cut -d'|' -f2) - - QUERY_START=$(date +%s.%N) - RESULT=$($DEMONGREP search "$QUERY" --path $REPO_PATH --compact 2>&1 | grep -v "INFO" | head -1) - QUERY_END=$(date +%s.%N) - QUERY_TIME=$(echo "$QUERY_END - $QUERY_START" | bc) - QUERY_TIMES+=($QUERY_TIME) - - if echo "$RESULT" | grep -qi "$EXPECTED"; then - echo " ✅ \"$QUERY\" -> $RESULT" - ((CORRECT++)) - else - echo " ❌ \"$QUERY\" -> $RESULT (expected: *$EXPECTED*)" - fi - done - - # Calculate average query time - TOTAL_QUERY_TIME=0 - for T in "${QUERY_TIMES[@]}"; do - TOTAL_QUERY_TIME=$(echo "$TOTAL_QUERY_TIME + $T" | bc) - done - AVG_QUERY_TIME=$(echo "scale=3; $TOTAL_QUERY_TIME / $TOTAL" | bc) - - ACCURACY=$(echo "scale=0; $CORRECT * 100 / $TOTAL" | bc) - - echo "" - echo "Results for $MODEL:" - echo " Accuracy: $ACCURACY% ($CORRECT/$TOTAL)" - echo " Index time: ${INDEX_TIME}s" - echo " Avg query time: ${AVG_QUERY_TIME}s" - - # Add to results file - echo "| $MODEL | $ACCURACY% | ${INDEX_TIME}s | ${AVG_QUERY_TIME}s |" >> $RESULTS_FILE -done - -echo "" -echo "==========================================" -echo "Benchmark complete! Results saved to $RESULTS_FILE" -echo "==========================================" From d29a820d87ec8b8affe023f3731bd8a2958af984 Mon Sep 17 00:00:00 2001 From: flupkede Date: Sat, 2 May 2026 00:13:17 +0200 Subject: [PATCH 3/6] docs(changelog): add 1.0.76 release notes --- CHANGELOG.md | 9 +++++++++ Cargo.lock | 2 +- Cargo.toml | 2 +- 3 files changed, 11 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 25fe694..1fdf8ce 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,14 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## [1.0.77] - 2026-05-01 + +### Removed + +- Stale planning documents (`.docs/`) and old benchmark results (`benchmarks/`) + removed from the repository. These were internal working documents with no + value for contributors. + ## [1.0.74] - 2026-05-01 ### Fixed @@ -87,6 +95,7 @@ repositories. - `codesearch serve` keeps one writer per database (LMDB invariant). Concurrent reindex from a second process is rejected. +[1.0.77]: https://github.com/flupkede/codesearch/compare/v1.0.75...v1.0.77 [1.0.75]: https://github.com/flupkede/codesearch/compare/v1.0.74...v1.0.75 [1.0.74]: https://github.com/flupkede/codesearch/compare/v1.0.72...v1.0.74 [1.0.72]: https://github.com/flupkede/codesearch/releases/tag/v1.0.72 diff --git a/Cargo.lock b/Cargo.lock index 09a9020..3ff0d6d 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -634,7 +634,7 @@ dependencies = [ [[package]] name = "codesearch" -version = "1.0.76" +version = "1.0.78" dependencies = [ "anyhow", "arroy", diff --git a/Cargo.toml b/Cargo.toml index 085a4e3..03f354f 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "codesearch" -version = "1.0.76" +version = "1.0.78" edition = "2021" authors = ["codesearch contributors"] license = "Apache-2.0" From 2777e0cb1973a07542c404a24640b6e05472905c Mon Sep 17 00:00:00 2001 From: flupkede Date: Sat, 2 May 2026 13:09:49 +0200 Subject: [PATCH 4/6] fix(serve): start idle timer at warmup so unused warm repos get evicted `evict_idle_repos` only iterates `self.last_access`. `warmup_repo` deliberately did not write to `last_access` (its comment said "warmup is not a real query, idle timer should only reset on real query"), but the side effect was that warmed-but-never-queried repos never appeared in `last_access` at all and therefore stayed warm forever \u2014 holding open LMDB environments and embedder state. Visible symptom in the TUI: aliases like KRKA.Aprimo, DPS, BAYR.Aprimo showed status `warm` with `Last Tool Call = -` indefinitely, while aliases that had been touched (e.g. aprimo_mcp, investing) correctly transitioned to `closed` after the idle window. Fix: have `warmup_repo` start the idle timer at warmup. A real query still resets the timer via `touch_access`. A repo that is never queried gets evicted after the idle timeout, which is the desired behaviour. --- Cargo.lock | 2 +- Cargo.toml | 2 +- src/serve/mod.rs | 8 ++++++-- 3 files changed, 8 insertions(+), 4 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 3ff0d6d..0344a18 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -634,7 +634,7 @@ dependencies = [ [[package]] name = "codesearch" -version = "1.0.78" +version = "1.0.79" dependencies = [ "anyhow", "arroy", diff --git a/Cargo.toml b/Cargo.toml index 03f354f..fbc7432 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "codesearch" -version = "1.0.78" +version = "1.0.79" edition = "2021" authors = ["codesearch contributors"] license = "Apache-2.0" diff --git a/src/serve/mod.rs b/src/serve/mod.rs index e327c8e..5900e55 100644 --- a/src/serve/mod.rs +++ b/src/serve/mod.rs @@ -492,10 +492,14 @@ impl ServeState { }); // Store as Warm — FSW will be started lazily on first query. - // Do NOT touch_access: warmup is background activity, not a real query. - // The idle timer should only reset when a user/agent actually queries this repo. self.repos .insert(alias.to_string(), RepoState::Warm { stores: stores_arc }); + + // Start the idle timer at warmup. A real query will reset it via + // touch_access; without this, repos that are warmed but never queried + // would never appear in `last_access` and therefore never be evicted + // by `evict_idle_repos`, holding LMDB envs and embedder state forever. + self.touch_access(alias); Ok(()) } From a8cbfaea51156e23b14cae6aaf391417855b1800 Mon Sep 17 00:00:00 2001 From: flupkede Date: Sat, 2 May 2026 14:21:01 +0200 Subject: [PATCH 5/6] fix(serve): evict warm-but-never-queried repos; drop Ctrl-C TUI quit - warmup_repo now calls touch_access so warmed-but-never-queried repos appear in last_access and get evicted by the idle reaper. Without this, background-warmed aliases stayed Warm forever. - is_quit_key drops the Ctrl-C arm. crossterm's raw mode delivers Ctrl-C as a key event (ENABLE_PROCESSED_INPUT off), so treating it as quit tore down the whole serve from a stray terminal Ctrl-C. Use q only. --- Cargo.lock | 2 +- Cargo.toml | 2 +- src/serve/tui.rs | 15 ++++++++------- 3 files changed, 10 insertions(+), 9 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 0344a18..6d33562 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -634,7 +634,7 @@ dependencies = [ [[package]] name = "codesearch" -version = "1.0.79" +version = "1.0.80" dependencies = [ "anyhow", "arroy", diff --git a/Cargo.toml b/Cargo.toml index fbc7432..8eb2a47 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "codesearch" -version = "1.0.79" +version = "1.0.80" edition = "2021" authors = ["codesearch contributors"] license = "Apache-2.0" diff --git a/src/serve/tui.rs b/src/serve/tui.rs index 4a92f23..d3ae888 100644 --- a/src/serve/tui.rs +++ b/src/serve/tui.rs @@ -14,7 +14,7 @@ use ratatui::text::{Line, Span}; use ratatui::widgets::{Block, Borders, Cell, Row, Table, TableState}; use ratatui::Terminal; -use crossterm::event::{self, Event, KeyCode, KeyEvent, KeyModifiers}; +use crossterm::event::{self, Event, KeyCode, KeyEvent}; use crossterm::terminal::{self, EnterAlternateScreen, LeaveAlternateScreen}; use tokio_util::sync::CancellationToken; @@ -27,7 +27,7 @@ use super::ServeState; /// Run the fullscreen TUI. Spawns as a tokio task from `run_serve`. /// -/// Returns `Ok(())` when the user presses `q` / `Ctrl-C`, or when the +/// Returns `Ok(())` when the user presses `q`, or when the /// `cancel_token` is cancelled externally (e.g. Ctrl-C from the main task). /// /// Terminal restoration is guaranteed on normal exit and on errors. @@ -154,11 +154,12 @@ fn restore_terminal(terminal: &mut Terminal>) -> io // --------------------------------------------------------------------------- fn is_quit_key(key: KeyEvent) -> bool { - match key.code { - KeyCode::Char('q') => true, - KeyCode::Char('c') if key.modifiers.contains(KeyModifiers::CONTROL) => true, - _ => false, - } + // Ctrl-C is intentionally NOT a quit key here. crossterm's raw mode delivers + // it as a key event (ENABLE_PROCESSED_INPUT off on Windows / ISIG off on Unix), + // so the OS-level ctrlc::set_handler in main.rs is bypassed while the TUI runs. + // Treating Ctrl-C as quit was a foot-gun: a stray Ctrl-C in the wrong terminal + // would tear down the whole serve process. Use `q` instead. + matches!(key.code, KeyCode::Char('q')) } fn handle_key(key: KeyEvent, table_state: &mut TableState, row_count: usize) { From 908697eb147a65c147dd3d3a0af6e6dfa1ae4100 Mon Sep 17 00:00:00 2001 From: flupkede Date: Sat, 2 May 2026 14:54:20 +0200 Subject: [PATCH 6/6] docs(unsafe): add SAFETY comments to LMDB env-open blocks --- Cargo.lock | 2 +- Cargo.toml | 2 +- src/embed/cache.rs | 5 +++++ src/vectordb/store.rs | 13 +++++++++++++ 4 files changed, 20 insertions(+), 2 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 6d33562..00954d4 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -634,7 +634,7 @@ dependencies = [ [[package]] name = "codesearch" -version = "1.0.80" +version = "1.0.81" dependencies = [ "anyhow", "arroy", diff --git a/Cargo.toml b/Cargo.toml index 8eb2a47..686e623 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "codesearch" -version = "1.0.80" +version = "1.0.81" edition = "2021" authors = ["codesearch contributors"] license = "Apache-2.0" diff --git a/src/embed/cache.rs b/src/embed/cache.rs index 5d63e78..b50f4f9 100644 --- a/src/embed/cache.rs +++ b/src/embed/cache.rs @@ -308,6 +308,11 @@ impl PersistentEmbeddingCache { ) })?; + // SAFETY: heed's `EnvOpenOptions::open` is unsafe because the caller must + // ensure no other process maps this LMDB environment with incompatible options + // (different map_size or flags) at the same time. The cache directory is + // process-private under the user's codesearch state directory, and we open it + // exactly once per process via this constructor. let env = unsafe { EnvOpenOptions::new() .map_size(512 * 1024 * 1024) // 512MB — plenty for cache diff --git a/src/vectordb/store.rs b/src/vectordb/store.rs index 74c86c9..7816730 100644 --- a/src/vectordb/store.rs +++ b/src/vectordb/store.rs @@ -283,6 +283,12 @@ impl VectorStore { // one repo has been resized. Use the max of persisted, env-var, and // default to never shrink below what was previously allocated. let map_size_mb = resolve_map_size(db_path); + // SAFETY: heed's `EnvOpenOptions::open` is unsafe because the caller must + // ensure no other process maps this LMDB environment with incompatible options + // at the same time. codesearch enforces single-writer-per-DB at the application + // level (one `serve` process per machine, and the CLI rejects concurrent + // reindex). The map_size is reconciled across opens via `resolve_map_size` + // above, so we never reopen with a smaller map than was previously persisted. let env = unsafe { EnvOpenOptions::new() .map_size(map_size_mb * 1024 * 1024) @@ -361,6 +367,13 @@ impl VectorStore { // Open LMDB environment in read-only mode // Use same map-size resolution as new() for consistency let map_size_mb = resolve_map_size(db_path); + // SAFETY: heed's `EnvOpenOptions::open` is unsafe because of LMDB's mmap + // contract; see the SAFETY comment on the read-write `new()` above. This + // open is read-only (`EnvFlags::READ_ONLY`), so it cannot conflict with a + // concurrent writer's map_size, only with stale handles after a resize — + // which is acceptable because the writer's resize logic explicitly + // rebuilds the env (see `resize_map` below) before any reader is invited + // to reopen. let env = unsafe { EnvOpenOptions::new() .map_size(map_size_mb * 1024 * 1024)