Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 17 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,23 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0



## [1.0.156] - 2026-06-02

### Fixed

- **`reconcile_all_paths` no longer blocks the Tokio async runtime** — the
function spawns git subprocesses and holds the config `RwLock` write-guard
while scanning the filesystem. It is now offloaded via
`tokio::task::spawn_blocking` so Tokio worker threads stay responsive during
startup reconciliation.
- **Phase 1 auto-prune now honours `config_path_override`** — the prune path
wrote `repos.json` via `config.save()`, bypassing `ServeState::persist_config`.
All save sites in `ServeState` must route through `persist_config` so the
override (used in integration tests) is respected. Fixed to use
`self.persist_config(&config)`.



## [1.0.154] - 2026-06-02

### Fixed
Expand Down
2 changes: 1 addition & 1 deletion Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "codesearch"
version = "1.0.154"
version = "1.0.156"
edition = "2021"
authors = ["codesearch contributors"]
license = "Apache-2.0"
Expand Down
115 changes: 95 additions & 20 deletions src/index/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -628,6 +628,34 @@ async fn index_with_options(

// If no changes and no deleted files, we're done
if changed_files.is_empty() && deleted_files.is_empty() {
// Safety net: if a previous run was cancelled/interrupted mid-way,
// the HNSW vector index may never have been built. Detect this and
// rebuild now so the database is usable without requiring --force.
{
let mut vs = VectorStore::new(&db_path, model_type.dimensions())?;
match vs.stats() {
Ok(s) if s.total_chunks > 0 && !s.indexed => {
log_print!(
"\n{}",
format!(
"🔨 Vector index not built ({} chunks found from previous run). Rebuilding...",
s.total_chunks
)
.yellow()
);
vs.build_index()?;
log_print!("{}", "✅ Vector index rebuilt successfully!".green());
}
Ok(_) => {} // already indexed or no chunks — all good
Err(e) => {
log_print!(
"{}",
format!("⚠️ Could not check vector index status: {}", e).yellow()
);
}
}
}

log_print!("\n{}", "✅ Database is up to date!".green());
return Ok(());
}
Expand Down Expand Up @@ -896,39 +924,86 @@ async fn index_with_options(
// Memory is freed here - chunks/embeddings dropped before next file
}

// Handle cancellation: exit quickly without blocking on build_index
// Handle cancellation: still finalize the index properly so the database
// remains usable. Skipping build_index() was the old behaviour — it left
// the database in a broken state that a subsequent incremental run could
// not recover from (no changed files → early return → index never built).
if cancelled {
pb.finish_with_message("Cancelled!");
log_print!("\n{}", "⚠️ Indexing cancelled by user".yellow());
log_print!(
"\n{}",
"⚠️ Indexing cancelled — finalising partial index...".yellow()
);

// Free ONNX model memory immediately
// Free ONNX model memory before build_index (releases hundreds of MB)
drop(embedding_service);
drop(chunker);

// Don't call build_index() — it blocks for 10-30 seconds on large datasets.
// The database is in a partially written state, user can re-run with --force.
// Commit FTS with retry to avoid index corruption on shutdown.
// Commit FTS
if total_chunks > 0 {
if let Err(e) = fts_store.commit() {
// Log the error - best-effort commit failed
log_print!(
"{} FTS commit warning: {} (index may need recovery)",
"⚠️ ".yellow(),
e
);
log_print!(
"{} Run {} to rebuild the index cleanly if needed",
"💡 ".cyan(),
"codesearch index -f".bright_cyan()
);
log_print!("{} FTS commit warning: {}", "⚠️ ".yellow(), e);
}
}
drop(fts_store);

// Build vector index from the chunks that were successfully inserted
if total_chunks > 0 {
log_print!(
" Building vector index for {} partial chunks...",
total_chunks
);
store.build_index()?;
log_print!(" ✅ Vector index built");
}

// Save metadata
std::fs::write(
db_path.join("metadata.json"),
serde_json::to_string_pretty(&serde_json::json!({
"model_short_name": model_type.short_name(),
"model_name": model_type.name(),
"dimensions": model_type.dimensions(),
"indexed_at": chrono::Utc::now().to_rfc3339(),
"partial": true,
}))?,
)?;

// Update FileMetaStore with the files that were actually processed
if !file_chunks.is_empty() {
if is_incremental {
let mut meta = file_meta_store.take().unwrap();
for (file_path, chunk_ids) in file_chunks {
meta.update_file(Path::new(&file_path), chunk_ids)?;
}
meta.save(&db_path)?;
} else {
log_print!(
" Partial progress: {} chunks written (re-run with --force for clean index)",
total_chunks
let mut meta = FileMetaStore::new(
model_type.short_name().to_string(),
model_type.dimensions(),
);
for (file_path, chunk_ids) in file_chunks {
meta.update_file(Path::new(&file_path), chunk_ids)?;
}
meta.save(&db_path)?;
}
}

// Persist stats
let db_stats = store.stats()?;
update_metadata_stats(&db_path, db_stats.total_chunks, db_stats.total_files);

log_print!(
" Partial index finalised: {} chunks, {} files",
db_stats.total_chunks,
db_stats.total_files
);
log_print!(
"{} Run {} to index the remaining files",
"💡 ".cyan(),
"codesearch index".bright_cyan()
);

return Ok(());
}

Expand Down
19 changes: 16 additions & 3 deletions src/serve/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -648,10 +648,12 @@ impl ServeState {
self.repos.remove(alias);
self.last_access.remove(alias);

// Unregister from repos.json
// Unregister from repos.json — route through persist_config
// so the config_path_override is honoured (same as all
// other save sites in ServeState).
if let Ok(mut config) = self.config.write() {
if config.unregister_alias(alias) {
if let Err(save_err) = config.save() {
if let Err(save_err) = self.persist_config(&config) {
warn!(
"phase-1: failed to save repos.json after pruning '{}': {}",
alias, save_err
Expand Down Expand Up @@ -3057,7 +3059,18 @@ pub async fn run_serve(
{
let phase_state = serve_state.clone();
tokio::spawn(async move {
phase_state.reconcile_all_paths();
// reconcile_all_paths spawns git subprocesses and traverses the
// filesystem while holding the config RwLock write-guard. Running
// it on a Tokio worker thread would starve the async runtime and
// block all concurrent config.read() calls for the entire duration.
// spawn_blocking offloads the synchronous work to the blocking
// thread pool, then we await the handle before proceeding to Phase 1.
let reconcile_state = phase_state.clone();
if let Err(e) =
tokio::task::spawn_blocking(move || reconcile_state.reconcile_all_paths()).await
{
warn!("reconcile: spawn_blocking panicked: {:?}", e);
}
phase_state.run_phase_1_warmup_all().await;
phase_state.run_phase_2_csharp_scip().await;
phase_state.run_phase_3_prewarm().await;
Expand Down
Loading