From a9dbece553768657e281c4d1d0c6192a1c9bc27f Mon Sep 17 00:00:00 2001 From: Leynos Date: Sun, 29 Mar 2026 12:10:49 +0000 Subject: [PATCH 01/40] feat(db/libsql): fallback to brute-force cosine similarity if vector index missing Introduced a fallback mechanism for libSQL workspace vector search after the V9 migration dropped the fixed-dimension vector index. When the vector_top_k(...) indexed query is unavailable, the search now falls back to a brute-force cosine similarity calculation implemented in Rust. This change maintains full semantic retrieval capability and hybrid search parity with PostgreSQL, replacing silent loss of semantic results with a performance trade-off. Documentation updates clarify the backend behavioral differences and migration impacts. Co-authored-by: devboxerhub[bot] --- FEATURE_PARITY.md | 2 +- docs/configuration-guide.md | 7 ++ docs/database-integrations.md | 47 +++++++------ src/db/CLAUDE.md | 4 +- src/db/libsql/workspace.rs | 125 +++++++++++++++++++++++----------- src/workspace/README.md | 2 +- 6 files changed, 124 insertions(+), 63 deletions(-) diff --git a/FEATURE_PARITY.md b/FEATURE_PARITY.md index 634131fca..2a85da9ce 100644 --- a/FEATURE_PARITY.md +++ b/FEATURE_PARITY.md @@ -336,7 +336,7 @@ This document tracks feature parity between IronClaw (Rust implementation) and O | Feature | OpenClaw | IronClaw | Notes | |---------|----------|----------|-------| -| Vector memory | ✅ | ✅ | pgvector | +| Vector memory | ✅ | ✅ | PostgreSQL uses pgvector; libSQL uses indexed vector search when available and brute-force cosine fallback after V9 | | Session-based memory | ✅ | ✅ | | | Hybrid search (BM25 + vector) | ✅ | ✅ | RRF algorithm | | Temporal decay (hybrid search) | ✅ | ❌ | Opt-in time-based scoring factor | diff --git a/docs/configuration-guide.md b/docs/configuration-guide.md index 46949040f..a7ac83e29 100644 --- a/docs/configuration-guide.md +++ b/docs/configuration-guide.md @@ -268,6 +268,13 @@ Table 18. Database and secrets environment variables. | `LIBSQL_AUTH_TOKEN` | Auth token for `LIBSQL_URL`. | Required when `LIBSQL_URL` is set. | | `SECRETS_MASTER_KEY` | Master key for encrypted secrets storage. | Optional, but must be at least 32 bytes when set. If omitted, axinite falls back to the operating-system keychain when available. | +When workspace memory search is enabled, backend choice affects how semantic +retrieval runs. PostgreSQL uses pgvector cosine-distance queries. libSQL uses +indexed `vector_top_k(...)` only when a compatible fixed-dimension vector +index exists; otherwise it falls back to brute-force cosine similarity in +Rust. `ironclaw doctor` and `ironclaw status` surface the active search mode. +See [database integrations](database-integrations.md) for the backend trade-offs. + ### 4.3 Agent runtime, safety, routines, heartbeat, hygiene, skills, and builder mode Table 19. Core runtime behaviour variables. diff --git a/docs/database-integrations.md b/docs/database-integrations.md index a7e286909..8e954076a 100644 --- a/docs/database-integrations.md +++ b/docs/database-integrations.md @@ -226,8 +226,9 @@ explicit `BEGIN IMMEDIATE` block. ### 4.4 Workspace search path -libSQL mirrors the same workspace concepts as PostgreSQL, but the implemented -search path is narrower. +libSQL mirrors the same workspace concepts as PostgreSQL, but the vector path +uses a different implementation strategy after the flexible-dimension +migration. Table 3. libSQL workspace search components. @@ -236,28 +237,32 @@ Table 3. libSQL workspace search components. | Document store | `memory_documents` table | | Chunk store | `memory_chunks` table with `embedding BLOB` | | Full-text search | `memory_chunks_fts` FTS5 virtual table plus maintenance triggers | -| Semantic search | Best-effort `vector_top_k(...)` query when a compatible vector index exists | +| Semantic search | `vector_top_k(...)` when a compatible index exists, otherwise brute-force cosine similarity in Rust | | Fusion strategy | RRF in Rust, same as PostgreSQL | -The current implementation quirk is important: +The important implementation detail is how libSQL behaves after the V9 +flexible-dimension migration: -- the libSQL schema and V9 migration comments describe a brute-force vector - fallback after flexible dimensions remove the fixed-dimension index -- the live code in `src/db/libsql/workspace.rs` does not implement that - brute-force fallback -- instead it attempts `vector_top_k('idx_memory_chunks_embedding', ...)` - and, when that query fails as expected after V9 drops the index, it logs a - debug message and returns no vector results +- the fixed-dimension `libsql_vector_idx` index is dropped because it cannot + support arbitrary embedding lengths +- the live code first attempts `vector_top_k('idx_memory_chunks_embedding', ...)` +- when that indexed query is unavailable, the repository logs that it is using + brute-force vector search and computes cosine similarity in Rust across the + stored embedding blobs +- the result stream still feeds the same Reciprocal Rank Fusion (RRF) path as + PostgreSQL In practical terms, a migrated or freshly bootstrapped libSQL workspace currently behaves as: - FTS5 keyword search always available -- vector results only when a compatible vector index exists -- FTS-only search after the normal flexible-dimension migration path +- semantic retrieval still available after V9 through brute-force cosine + similarity +- hybrid search still combines keyword and semantic results, but libSQL pays a + linear scan cost where PostgreSQL can use pgvector operations -That is the most significant behavioural gap between the two backends in the -current code. +That means the main backend difference is now performance and implementation +strategy, not silent loss of semantic recall. ### 4.5 Satellite stores on libSQL @@ -330,14 +335,15 @@ Table 4. Current backend comparison. | Migration engine | `refinery` over numbered SQL files | Consolidated schema plus `_migrations`-tracked incremental Rust-side runner | | Secrets and WASM satellite stores | Reuse cloned pool handles | Reuse shared database handle, then open fresh connections | | Keyword search | PostgreSQL `tsvector` plus GIN | FTS5 virtual table plus triggers | -| Vector search | pgvector cosine distance, now without the old HNSW index after V9 | Best-effort only; current migrated path is effectively FTS-only | +| Vector search | pgvector cosine distance, now without the old HNSW index after V9 | Indexed `vector_top_k(...)` when available, otherwise brute-force cosine similarity in Rust | | Best fit | Full default deployment with richer search parity | Embedded, local-first, or low-ops deployment where external PostgreSQL is undesirable | ### 6.1 When PostgreSQL is the safer choice Choose PostgreSQL when: -- full hybrid workspace search quality matters +- full hybrid workspace search quality matters and search latency under larger + workspaces matters too - the deployment already has PostgreSQL 15+ with pgvector available - query behaviour should match the default and most-tested path as closely as possible @@ -350,10 +356,9 @@ Choose libSQL when: - the deployment is local-first or edge-style - Turso replica mode is desirable, but a full PostgreSQL service is not -The main caveat is the current workspace-search trade-off. libSQL is not -merely "the same database API with a different wire protocol". In the current -implementation it is a simpler persistence backend with weaker semantic-search -behaviour after the flexible-dimension migration path. +The main caveat is now performance rather than capability. libSQL still offers +hybrid retrieval, but its post-V9 semantic path can require a brute-force scan +over all candidate embeddings for that workspace scope. ## 7. Current implementation caveats diff --git a/src/db/CLAUDE.md b/src/db/CLAUDE.md index 5e48bd38a..78a6d055c 100644 --- a/src/db/CLAUDE.md +++ b/src/db/CLAUDE.md @@ -103,7 +103,7 @@ The `Database` supertrait is composed of seven sub-traits. Leaf consumers can de **Timestamp write format:** Always write timestamps with `fmt_ts(dt)` (RFC 3339, millisecond precision). Read with `get_ts()` / `get_opt_ts()` which handle legacy naive formats too. -**Vector dimension:** PostgreSQL V9 migration changed the column to unbounded `vector` (removing the HNSW index). libSQL still uses `F32_BLOB(1536)` — if you use a different-dimension embedding model, the libSQL schema needs updating too. +**Vector dimension:** PostgreSQL V9 migration changed the column to unbounded `vector` (removing the HNSW index). libSQL V9 likewise moved `memory_chunks.embedding` to a plain `BLOB`, dropped the fixed-dimension vector index, and now falls back to brute-force cosine similarity in Rust when `vector_top_k(...)` is unavailable. **Connection per operation:** `LibSqlBackend::connect()` creates a fresh connection for every operation, sets `PRAGMA busy_timeout = 5000`, and closes it when the `Connection` is dropped. This is intentional — the libSQL SDK does not offer a pool. Avoid holding connections open across `await` points. @@ -147,7 +147,7 @@ The `Database` supertrait is composed of seven sub-traits. Leaf consumers can de - **Settings reload** — `Config::from_db` skipped (requires `Store`) - **No incremental migrations** — schema is idempotent CREATE IF NOT EXISTS; no ALTER TABLE support; column additions require a new versioned approach - **No encryption at rest** — only secrets (API tokens) are AES-256-GCM encrypted; all other data is plaintext SQLite -- **Hybrid search** — both FTS5 and vector search (`libsql_vector_idx`) are implemented; however, the vector index is fixed at `F32_BLOB(1536)` while PostgreSQL switched to unbounded `vector` in V9 +- **Hybrid search cost** — libSQL still provides hybrid search after V9, but semantic retrieval may use a brute-force cosine scan over candidate embeddings when no compatible vector index exists - **Write serialization** — WAL mode allows concurrent readers but only one writer at a time; busy timeout is 5 s, which may cause timeouts under high write concurrency ## Running Locally with libSQL diff --git a/src/db/libsql/workspace.rs b/src/db/libsql/workspace.rs index 37ca119f4..91db79d22 100644 --- a/src/db/libsql/workspace.rs +++ b/src/db/libsql/workspace.rs @@ -26,6 +26,10 @@ struct Candidate { similarity: f32, } +enum VectorSearchOutcome { + Indexed(Vec), + IndexUnavailable, +} fn rank_candidates(mut candidates: Vec, limit: usize) -> Vec { // Sort by similarity descending candidates.sort_by(|a, b| { @@ -226,18 +230,18 @@ async fn fts_ranked_results( Ok(results) } -/// Execute vector similarity search and return ranked results. +/// Execute vector similarity search via libSQL's vector index. /// -/// Queries using libsql's vector_top_k function. If the vector index is -/// missing (expected after V9 migration), logs at debug level and returns -/// an empty vector, preserving the existing graceful fallback behaviour. +/// Returns [`VectorSearchOutcome::IndexUnavailable`] when `vector_top_k(...)` +/// cannot run because the fixed-dimension vector index is missing, which is +/// the expected state after the V9 flexible-dimension migration. async fn vector_ranked_results( conn: &libsql::Connection, user_id: &str, agent_id: Option<&str>, embedding: &[f32], limit: i64, -) -> Result, WorkspaceError> { +) -> Result { let vector_json = format!( "[{}]", embedding @@ -249,7 +253,8 @@ async fn vector_ranked_results( // vector_top_k requires a libsql_vector_idx index. After the V9 // migration the index is dropped (to support flexible embedding - // dimensions), so this query may fail. Fall back to FTS-only. + // dimensions), so this query may fail. The caller must then fall + // back to brute-force cosine similarity. match conn .query( r#" @@ -265,13 +270,15 @@ async fn vector_ranked_results( { Ok(mut rows) => { let mut results = Vec::new(); - while let Some(row) = rows - .next() - .await - .map_err(|e| WorkspaceError::SearchFailed { - reason: format!("Vector row fetch failed: {}", e), - })? - { + while let Some(row) = match rows.next().await { + Ok(row) => row, + Err(e) => { + tracing::debug!( + "Vector index row fetch failed, brute-force fallback required: {e}" + ); + return Ok(VectorSearchOutcome::IndexUnavailable); + } + } { results.push(RankedResult { chunk_id: get_text(&row, 0).parse().unwrap_or_default(), document_id: get_text(&row, 1).parse().unwrap_or_default(), @@ -280,14 +287,19 @@ async fn vector_ranked_results( rank: results.len() as u32 + 1, }); } - Ok(results) + tracing::debug!( + "libSQL vector index search returned {} results (pre-fusion limit: {})", + results.len(), + limit + ); + Ok(VectorSearchOutcome::Indexed(results)) } Err(e) => { tracing::debug!( "Vector index query failed (expected after V9 migration), \ - falling back to FTS-only: {e}" + brute-force fallback required: {e}" ); - Ok(Vec::new()) + Ok(VectorSearchOutcome::IndexUnavailable) } } } @@ -807,28 +819,19 @@ impl NativeWorkspaceStore for LibSqlBackend { let vector_results = if config.use_vector { if let Some(emb) = embedding { - // Try the vector index first; fall back to brute-force - // cosine similarity if the index is gone (post-V9 migration). - let indexed = - vector_ranked_results(&conn, user_id, agent_id_str.as_deref(), emb, pre_limit) - .await?; - if indexed.is_empty() { - tracing::info!( - "Vector index returned no results, using brute-force vector search" - ); - self.brute_force_vector_search(user_id, agent_id, emb, pre_limit as usize) - .await - .unwrap_or_else(|e| { - tracing::warn!("Brute-force vector search failed: {e}"); - Vec::new() - }) - } else { - tracing::debug!( - "Vector index search returned {} results (pre-fusion limit: {})", - indexed.len(), - pre_limit - ); - indexed + match vector_ranked_results(&conn, user_id, agent_id_str.as_deref(), emb, pre_limit) + .await? + { + VectorSearchOutcome::Indexed(results) => results, + VectorSearchOutcome::IndexUnavailable => { + tracing::info!("Using brute-force vector search (no vector index)"); + self.brute_force_vector_search(user_id, agent_id, emb, pre_limit as usize) + .await + .unwrap_or_else(|e| { + tracing::warn!("Brute-force vector search failed: {e}"); + Vec::new() + }) + } } } else { Vec::new() @@ -899,4 +902,50 @@ mod tests { assert!((result[1] - 0.0).abs() < 0.001); assert!((result[2] - 2.75).abs() < 0.001); } + + #[tokio::test] + async fn hybrid_search_uses_brute_force_when_vector_index_is_unavailable() { + use crate::db::{InsertChunkParams, NativeDatabase, NativeWorkspaceStore}; + use crate::workspace::SearchConfig; + + let tempdir = tempfile::tempdir().unwrap(); + let backend = LibSqlBackend::new_local(&tempdir.path().join("workspace.db")) + .await + .unwrap(); + backend.run_migrations().await.unwrap(); + + let document = backend + .get_or_create_document_by_path("default", None, "notes/search.md") + .await + .unwrap(); + backend + .update_document(document.id, "semantic search fallback test") + .await + .unwrap(); + backend + .insert_chunk(InsertChunkParams { + document_id: document.id, + chunk_index: 0, + content: "semantic search fallback test", + embedding: Some(&[1.0, 0.0, 0.0]), + }) + .await + .unwrap(); + + let results = backend + .hybrid_search(HybridSearchParams { + user_id: "default", + agent_id: None, + query: "semantic", + embedding: Some(&[1.0, 0.0, 0.0]), + config: &SearchConfig::default().vector_only().with_limit(5), + }) + .await + .unwrap(); + + assert_eq!(results.len(), 1); + assert_eq!(results[0].document_path, "notes/search.md"); + assert_eq!(results[0].vector_rank, Some(1)); + assert!(results[0].fts_rank.is_none()); + } } diff --git a/src/workspace/README.md b/src/workspace/README.md index 2b3ee5b48..4db6198bb 100644 --- a/src/workspace/README.md +++ b/src/workspace/README.md @@ -84,7 +84,7 @@ Default k=60. Results from both methods are combined, with documents appearing i **Backend differences:** - **PostgreSQL:** `ts_rank_cd` for FTS, pgvector cosine distance for vectors, full RRF -- **libSQL:** FTS5 for keyword search only (vector search via `libsql_vector_idx` not yet wired) +- **libSQL:** FTS5 plus vector search; uses `vector_top_k(...)` when a compatible fixed-dimension index exists, otherwise brute-force cosine similarity in Rust ## Heartbeat System From a32cb770d2d8096c0af0ea6d0bd9ab89c76a3d54 Mon Sep 17 00:00:00 2001 From: leynos Date: Mon, 13 Apr 2026 20:50:23 +0200 Subject: [PATCH 02/40] fix: restore webhook listener test safety Restore the webhook server's resolved listener address handling so\n`current_addr()` and startup logs report the real bound socket,\nincluding ephemeral `:0` binds.\n\nReintroduce test-only listener-based start and restart helpers so\nwebhook tests can keep ephemeral ports reserved without a TOCTOU\nwindow between port discovery and server startup.\n\nReplace new libsql test `.unwrap()` calls with contextual\n`.expect(...)` messages to match the repository's test guidance\nand make failures easier to diagnose. --- src/channels/webhook_server.rs | 416 +++++++++++++++++++++++++++++++++ src/db/libsql/workspace.rs | 17 +- 2 files changed, 426 insertions(+), 7 deletions(-) diff --git a/src/channels/webhook_server.rs b/src/channels/webhook_server.rs index 2a43f9ff4..242755752 100644 --- a/src/channels/webhook_server.rs +++ b/src/channels/webhook_server.rs @@ -238,4 +238,420 @@ impl WebhookServer { let _ = handle.await; } } + + /// Accept a pre-bound listener, merge route fragments, and spawn the + /// server. Eliminates the TOCTOU window that `start()` has between port + /// discovery and the actual bind, making it suitable for tests that + /// allocate ephemeral ports. + #[cfg(test)] + pub async fn start_with_listener( + &mut self, + listener: tokio::net::TcpListener, + ) -> Result<(), ChannelError> { + let mut app = Router::new(); + for fragment in self.routes.drain(..) { + app = app.merge(fragment); + } + self.merged_router = Some(app.clone()); + + let local_addr = listener + .local_addr() + .map_err(|e| ChannelError::StartupFailed { + name: "webhook_server".to_string(), + reason: format!("Failed to get listener local address: {e}"), + })?; + self.config.addr = local_addr; + + self.spawn_with_listener(listener, app); + Ok(()) + } + + /// Spawn the axum server on an already-bound listener. + fn spawn_with_listener(&mut self, listener: tokio::net::TcpListener, app: Router) { + let addr = self.config.addr; + let (shutdown_tx, shutdown_rx) = oneshot::channel(); + self.shutdown_tx = Some(shutdown_tx); + + let handle = tokio::spawn(async move { + if let Err(e) = axum::serve(listener, app) + .with_graceful_shutdown(async { + let _ = shutdown_rx.await; + tracing::debug!("Webhook server shutting down"); + }) + .await + { + tracing::error!("Webhook server error: {}", e); + } + }); + + tracing::info!("Webhook server listening on {}", addr); + self.handle = Some(handle); + } + + /// Gracefully shut down the current listener and rebind using a pre-bound + /// listener. Eliminates the TOCTOU window between port reservation and + /// bind, making it suitable for tests. + /// + /// Unlike [`restart_with_addr`], this test-only helper does not support + /// rollback. [`restart_with_addr`] binds the replacement first and can keep + /// the old listener alive if that bind fails. This method shuts down the + /// old listener before calling [`Self::spawn_with_listener`], so there is + /// no rollback path if spawning were to fail. That trade-off is acceptable + /// in tests because [`Self::spawn_with_listener`] is infallible. + #[cfg(test)] + pub async fn restart_with_listener( + &mut self, + listener: tokio::net::TcpListener, + ) -> Result<(), ChannelError> { + let app = self + .merged_router + .clone() + .ok_or_else(|| ChannelError::StartupFailed { + name: "webhook_server".to_string(), + reason: "restart_with_listener called before start()".to_string(), + })?; + + let new_addr = listener + .local_addr() + .map_err(|e| ChannelError::StartupFailed { + name: "webhook_server".to_string(), + reason: format!("Failed to get listener local address: {e}"), + })?; + + // Stop the old listener before spawning the new one. Unlike + // restart_with_addr, we do not provide rollback semantics because the + // new listener is already bound and assumed to be valid. + self.shutdown().await; + + self.config.addr = new_addr; + self.spawn_with_listener(listener, app); + Ok(()) + } +} +<<<<<<< ours +||||||| base + +#[cfg(test)] +mod tests { + use std::net::TcpListener as StdTcpListener; + + use axum::Json; + use rstest::{fixture, rstest}; + use serde_json::json; + + use super::*; + + /// A started webhook server with a `/health` route and a pre-built client. + struct StartedWebhookServer { + server: WebhookServer, + addr: SocketAddr, + client: reqwest::Client, + } + + /// Finds an available port, creates a [`WebhookServer`] with a `/health` + /// route, starts the server, and returns the address and a client. + #[fixture] + async fn started_webhook_server() + -> Result> { + let port = { + let listener = StdTcpListener::bind("127.0.0.1:0")?; + listener.local_addr()?.port() + }; + let addr: SocketAddr = format!("127.0.0.1:{}", port).parse()?; + let mut server = WebhookServer::new(WebhookServerConfig { addr }); + server.add_routes(Router::new().route( + "/health", + axum::routing::get(|| async { Json(json!({"status": "ok"})) }), + )); + server.start().await?; + Ok(StartedWebhookServer { + server, + addr, + client: reqwest::Client::new(), + }) + } + + #[rstest] + #[tokio::test] + async fn test_restart_with_addr_rebinds_listener( + #[future] started_webhook_server: Result< + StartedWebhookServer, + Box, + >, + ) -> Result<(), Box> { + let StartedWebhookServer { + mut server, + addr: addr1, + client, + } = started_webhook_server.await?; + + assert_eq!( + server.current_addr(), + addr1, + "Server should be bound to initial address" + ); + + let response = client + .get(format!("http://{}/health", addr1)) + .send() + .await?; + assert_eq!( + response.status(), + 200, + "First server should respond to health check" + ); + + // Find a second available port and restart + let port2 = { + let listener = StdTcpListener::bind("127.0.0.1:0")?; + listener.local_addr()?.port() + }; + let addr2: SocketAddr = format!("127.0.0.1:{}", port2).parse()?; + + server.restart_with_addr(addr2).await?; + + assert_eq!( + server.current_addr(), + addr2, + "Server address should be updated after restart" + ); + assert_ne!( + addr1, addr2, + "Address should change after restart_with_addr" + ); + + let response = client + .get(format!("http://{}/health", addr2)) + .send() + .await?; + assert_eq!( + response.status(), + 200, + "Restarted server should respond to health check on new address" + ); + + let old_result = tokio::time::timeout( + std::time::Duration::from_millis(200), + client.get(format!("http://{}/health", addr1)).send(), + ) + .await; + assert!( + old_result.is_err() || old_result.ok().and_then(|r| r.ok()).is_none(), + "Old address should not respond after server restarts" + ); + + server.shutdown().await; + Ok(()) + } + + #[rstest] + #[tokio::test] + async fn test_restart_with_addr_rollback_on_bind_failure( + #[future] started_webhook_server: Result< + StartedWebhookServer, + Box, + >, + ) -> Result<(), Box> { + let StartedWebhookServer { + mut server, + addr: addr1, + client, + } = started_webhook_server.await?; + + let response = client + .get(format!("http://{}/health", addr1)) + .send() + .await?; + assert_eq!(response.status(), 200, "Server should be listening"); + + // Occupy a second port so the restart bind fails deterministically. + let occupied_listener = StdTcpListener::bind("127.0.0.1:0")?; + let conflict_addr = occupied_listener.local_addr()?; + + let result = server.restart_with_addr(conflict_addr).await; + assert!( + result.is_err(), + "Restart with already-bound address should fail" + ); + + drop(occupied_listener); + + let response = client + .get(format!("http://{}/health", addr1)) + .send() + .await?; + assert_eq!( + response.status(), + 200, + "Old listener should still be running after failed restart" + ); + + assert_eq!( + server.current_addr(), + addr1, + "Server address should be restored after failed restart" + ); + + server.shutdown().await; + Ok(()) + } +} +======= + +#[cfg(test)] +mod tests { + use axum::Json; + use rstest::{fixture, rstest}; + use serde_json::json; + + use super::*; + + /// A started webhook server with a `/health` route and a pre-built client. + struct StartedWebhookServer { + server: WebhookServer, + client: reqwest::Client, + } + + /// Bind an ephemeral localhost listener and keep it reserved until the + /// server takes ownership, eliminating port-probe races in tests. + async fn bind_ephemeral_listener() + -> Result<(tokio::net::TcpListener, SocketAddr), Box> { + let listener = tokio::net::TcpListener::bind("127.0.0.1:0").await?; + let addr = listener.local_addr()?; + Ok((listener, addr)) + } + + /// Create a [`WebhookServer`] with a `/health` route, start it on a + /// pre-bound ephemeral listener, and return the server and a client. + #[fixture] + async fn started_webhook_server() + -> Result> { + let (listener, _) = bind_ephemeral_listener().await?; + let mut server = WebhookServer::new(WebhookServerConfig { + addr: "127.0.0.1:0".parse()?, + }); + server.add_routes(Router::new().route( + "/health", + axum::routing::get(|| async { Json(json!({"status": "ok"})) }), + )); + server.start_with_listener(listener).await?; + Ok(StartedWebhookServer { + server, + client: reqwest::Client::new(), + }) + } + + #[rstest] + #[tokio::test] + async fn test_restart_with_addr_rebinds_listener( + #[future] started_webhook_server: Result< + StartedWebhookServer, + Box, + >, + ) -> Result<(), Box> { + let StartedWebhookServer { mut server, client } = started_webhook_server.await?; + let addr1 = server.current_addr(); + + assert_eq!( + server.current_addr(), + addr1, + "Server should be bound to initial address" + ); + + let response = client + .get(format!("http://{}/health", addr1)) + .send() + .await?; + assert_eq!( + response.status(), + 200, + "First server should respond to health check" + ); + + let (listener, addr2) = bind_ephemeral_listener().await?; + server.restart_with_listener(listener).await?; + + assert_eq!( + server.current_addr(), + addr2, + "Server address should be updated after restart" + ); + assert_ne!( + addr1, addr2, + "Address should change after restart_with_addr" + ); + + let response = client + .get(format!("http://{}/health", addr2)) + .send() + .await?; + assert_eq!( + response.status(), + 200, + "Restarted server should respond to health check on new address" + ); + + let old_result = tokio::time::timeout( + std::time::Duration::from_millis(200), + client.get(format!("http://{}/health", addr1)).send(), + ) + .await; + assert!( + old_result.is_err() || old_result.ok().and_then(|r| r.ok()).is_none(), + "Old address should not respond after server restarts" + ); + + server.shutdown().await; + Ok(()) + } + + #[rstest] + #[tokio::test] + async fn test_restart_with_addr_rollback_on_bind_failure( + #[future] started_webhook_server: Result< + StartedWebhookServer, + Box, + >, + ) -> Result<(), Box> { + let StartedWebhookServer { mut server, client } = started_webhook_server.await?; + let addr1 = server.current_addr(); + + let response = client + .get(format!("http://{}/health", addr1)) + .send() + .await?; + assert_eq!(response.status(), 200, "Server should be listening"); + + // Occupy a second port so the restart bind fails deterministically. + let occupied_listener = StdTcpListener::bind("127.0.0.1:0")?; + let conflict_addr = occupied_listener.local_addr()?; + + let result = server.restart_with_addr(conflict_addr).await; + assert!( + result.is_err(), + "Restart with already-bound address should fail" + ); + + drop(occupied_listener); + + let response = client + .get(format!("http://{}/health", addr1)) + .send() + .await?; + assert_eq!( + response.status(), + 200, + "Old listener should still be running after failed restart" + ); + + assert_eq!( + server.current_addr(), + addr1, + "Server address should be restored after failed restart" + ); + + server.shutdown().await; + Ok(()) + } } +>>>>>>> theirs diff --git a/src/db/libsql/workspace.rs b/src/db/libsql/workspace.rs index 91db79d22..2d7fb8e32 100644 --- a/src/db/libsql/workspace.rs +++ b/src/db/libsql/workspace.rs @@ -908,20 +908,23 @@ mod tests { use crate::db::{InsertChunkParams, NativeDatabase, NativeWorkspaceStore}; use crate::workspace::SearchConfig; - let tempdir = tempfile::tempdir().unwrap(); + let tempdir = tempfile::tempdir().expect("failed to create tempdir"); let backend = LibSqlBackend::new_local(&tempdir.path().join("workspace.db")) .await - .unwrap(); - backend.run_migrations().await.unwrap(); + .expect("failed to create local libsql backend"); + backend + .run_migrations() + .await + .expect("failed to run libsql migrations"); let document = backend .get_or_create_document_by_path("default", None, "notes/search.md") .await - .unwrap(); + .expect("failed to create search test document"); backend .update_document(document.id, "semantic search fallback test") .await - .unwrap(); + .expect("failed to update search test document"); backend .insert_chunk(InsertChunkParams { document_id: document.id, @@ -930,7 +933,7 @@ mod tests { embedding: Some(&[1.0, 0.0, 0.0]), }) .await - .unwrap(); + .expect("failed to insert search test chunk"); let results = backend .hybrid_search(HybridSearchParams { @@ -941,7 +944,7 @@ mod tests { config: &SearchConfig::default().vector_only().with_limit(5), }) .await - .unwrap(); + .expect("failed to execute hybrid search"); assert_eq!(results.len(), 1); assert_eq!(results[0].document_path, "notes/search.md"); From 6b133454c665a32ad46cc87fefe218f79fa0f7f5 Mon Sep 17 00:00:00 2001 From: leynos Date: Mon, 13 Apr 2026 21:05:05 +0200 Subject: [PATCH 03/40] fix: restore webhook test listener import Rebasing onto origin/main dropped the std listener import used by\nthe rollback test in src/channels/webhook_server.rs. Restore the\nimport so the listener-conflict regression test continues to compile\nand exercise the branch's race-free listener behaviour.\n\nValidated with:\n- make check-fmt\n- make test\n- make typecheck\n- make lint --- src/channels/webhook_server.rs | 169 --------------------------------- 1 file changed, 169 deletions(-) diff --git a/src/channels/webhook_server.rs b/src/channels/webhook_server.rs index 242755752..2e336790a 100644 --- a/src/channels/webhook_server.rs +++ b/src/channels/webhook_server.rs @@ -328,9 +328,6 @@ impl WebhookServer { Ok(()) } } -<<<<<<< ours -||||||| base - #[cfg(test)] mod tests { use std::net::TcpListener as StdTcpListener; @@ -341,171 +338,6 @@ mod tests { use super::*; - /// A started webhook server with a `/health` route and a pre-built client. - struct StartedWebhookServer { - server: WebhookServer, - addr: SocketAddr, - client: reqwest::Client, - } - - /// Finds an available port, creates a [`WebhookServer`] with a `/health` - /// route, starts the server, and returns the address and a client. - #[fixture] - async fn started_webhook_server() - -> Result> { - let port = { - let listener = StdTcpListener::bind("127.0.0.1:0")?; - listener.local_addr()?.port() - }; - let addr: SocketAddr = format!("127.0.0.1:{}", port).parse()?; - let mut server = WebhookServer::new(WebhookServerConfig { addr }); - server.add_routes(Router::new().route( - "/health", - axum::routing::get(|| async { Json(json!({"status": "ok"})) }), - )); - server.start().await?; - Ok(StartedWebhookServer { - server, - addr, - client: reqwest::Client::new(), - }) - } - - #[rstest] - #[tokio::test] - async fn test_restart_with_addr_rebinds_listener( - #[future] started_webhook_server: Result< - StartedWebhookServer, - Box, - >, - ) -> Result<(), Box> { - let StartedWebhookServer { - mut server, - addr: addr1, - client, - } = started_webhook_server.await?; - - assert_eq!( - server.current_addr(), - addr1, - "Server should be bound to initial address" - ); - - let response = client - .get(format!("http://{}/health", addr1)) - .send() - .await?; - assert_eq!( - response.status(), - 200, - "First server should respond to health check" - ); - - // Find a second available port and restart - let port2 = { - let listener = StdTcpListener::bind("127.0.0.1:0")?; - listener.local_addr()?.port() - }; - let addr2: SocketAddr = format!("127.0.0.1:{}", port2).parse()?; - - server.restart_with_addr(addr2).await?; - - assert_eq!( - server.current_addr(), - addr2, - "Server address should be updated after restart" - ); - assert_ne!( - addr1, addr2, - "Address should change after restart_with_addr" - ); - - let response = client - .get(format!("http://{}/health", addr2)) - .send() - .await?; - assert_eq!( - response.status(), - 200, - "Restarted server should respond to health check on new address" - ); - - let old_result = tokio::time::timeout( - std::time::Duration::from_millis(200), - client.get(format!("http://{}/health", addr1)).send(), - ) - .await; - assert!( - old_result.is_err() || old_result.ok().and_then(|r| r.ok()).is_none(), - "Old address should not respond after server restarts" - ); - - server.shutdown().await; - Ok(()) - } - - #[rstest] - #[tokio::test] - async fn test_restart_with_addr_rollback_on_bind_failure( - #[future] started_webhook_server: Result< - StartedWebhookServer, - Box, - >, - ) -> Result<(), Box> { - let StartedWebhookServer { - mut server, - addr: addr1, - client, - } = started_webhook_server.await?; - - let response = client - .get(format!("http://{}/health", addr1)) - .send() - .await?; - assert_eq!(response.status(), 200, "Server should be listening"); - - // Occupy a second port so the restart bind fails deterministically. - let occupied_listener = StdTcpListener::bind("127.0.0.1:0")?; - let conflict_addr = occupied_listener.local_addr()?; - - let result = server.restart_with_addr(conflict_addr).await; - assert!( - result.is_err(), - "Restart with already-bound address should fail" - ); - - drop(occupied_listener); - - let response = client - .get(format!("http://{}/health", addr1)) - .send() - .await?; - assert_eq!( - response.status(), - 200, - "Old listener should still be running after failed restart" - ); - - assert_eq!( - server.current_addr(), - addr1, - "Server address should be restored after failed restart" - ); - - server.shutdown().await; - Ok(()) - } -} -======= - -#[cfg(test)] -mod tests { - use axum::Json; - use rstest::{fixture, rstest}; - use serde_json::json; - - use super::*; - /// A started webhook server with a `/health` route and a pre-built client. struct StartedWebhookServer { server: WebhookServer, @@ -654,4 +486,3 @@ mod tests { Ok(()) } } ->>>>>>> theirs From b95bf7d01c32f56c7b66bb5b7746c1d82135a25b Mon Sep 17 00:00:00 2001 From: leynos Date: Mon, 13 Apr 2026 21:32:54 +0200 Subject: [PATCH 04/40] Fix verified review findings Verify the latest review findings against the rebased branch and only\napply the ones that still reproduced.\n\nTighten libSQL vector fallback handling so index-related failures still\nfall back to brute-force search while unrelated query errors bubble up,\nand cover the reciprocal-rank-fusion path in the regression test.\n\nAlign the catalogue version test with the intended invariant by keeping\nthe tool name stable while mutating only the payload, and refresh the\ndocs and wording fixes that were still stale after the rebase. --- docs/database-integrations.md | 5 +- ...r-schema-fidelity-and-execution-routing.md | 18 ++++--- src/channels/webhook_server.rs | 2 +- src/db/libsql/workspace.rs | 51 ++++++++++++++----- .../api/tests/catalogue_fidelity.rs | 6 ++- .../api/tests/fixtures/remote_tool_mocks.rs | 19 +++++-- src/test_support.rs | 2 +- src/workspace/README.md | 32 ++++++++---- 8 files changed, 94 insertions(+), 41 deletions(-) diff --git a/docs/database-integrations.md b/docs/database-integrations.md index 8e954076a..7236fd3a7 100644 --- a/docs/database-integrations.md +++ b/docs/database-integrations.md @@ -367,8 +367,9 @@ over all candidate embeddings for that workspace scope. handshake. 2. PostgreSQL still supports vector search after V9, but no longer through the old fixed-dimension HNSW index. -3. libSQL migration and schema comments still describe a brute-force vector - fallback that the current code does not implement. +3. libSQL falls back to brute-force cosine similarity in Rust when + `vector_top_k(...)` cannot run because the fixed-dimension vector index is + absent after the V9 migration. 4. Workspace memory and memory tools are absent in `--no-db` mode because the host does not build a workspace without a database. diff --git a/docs/execplans/1-1-4-tests-for-schema-fidelity-and-execution-routing.md b/docs/execplans/1-1-4-tests-for-schema-fidelity-and-execution-routing.md index af1b9fd9e..f89734e5a 100644 --- a/docs/execplans/1-1-4-tests-for-schema-fidelity-and-execution-routing.md +++ b/docs/execplans/1-1-4-tests-for-schema-fidelity-and-execution-routing.md @@ -855,7 +855,7 @@ addressed in a subsequent pass (see progress checklist above). - `src/orchestrator/api/tests/fixtures/remote_tool_mocks.rs`: added `complex_tool_definition()` and `complex_tool_stub()` fixtures for testing full payload fidelity with nested JSON Schema and special characters. -- `src/orchestrator/api/tests/remote_tools.rs`: added three new tests: +- `src/orchestrator/api/tests/catalogue_fidelity.rs`: added three new tests: `remote_tool_catalog_preserves_full_tool_definition_payload`, `remote_tool_catalog_version_is_deterministic_and_sensitive_to_content`, and `orchestrator_responses_deserialize_into_worker_shared_types`. @@ -903,9 +903,11 @@ The implementation added 9 new test functions covering: (milestone 4). All tests use in-process mock servers and fixtures, avoiding external -dependencies. All tests follow existing `rstest` patterns and naming conventions. -The format check (`make check-fmt`) passed after running `cargo fmt --all`. All -validation gates have been run and passed successfully. +dependencies. All tests follow existing `rstest` patterns and naming +conventions. The format check (`make check-fmt`) passed after running +`cargo fmt --all`. The format check, git whitespace check, and full test suite +passed successfully. Markdown linting remained partially blocked by +pre-existing issues in `docs/roadmap.md`. ### Validation evidence @@ -928,10 +930,10 @@ Full test suite passed: 3076 tests passed; 0 failed; 2 ignored (webhook server test fixed to use already-bound address instead of privileged port; worker API types test split into three focused tests per code review). -Markdown linting revealed pre-existing issues in `docs/roadmap.md` unrelated to -this implementation (multiple consecutive blank lines at lines 1342, 1408, 1450, -1489, 1512). The ExecPlan, RFC 0001, and `docs/contents.md` changes introduced -no new Markdown issues. +Markdown linting was only partially green because `docs/roadmap.md` still had +pre-existing issues unrelated to this implementation (multiple consecutive +blank lines at lines 1342, 1408, 1450, 1489, 1512). The ExecPlan, RFC 0001, +and `docs/contents.md` changes introduced no new Markdown issues. ### Retrospective observations diff --git a/src/channels/webhook_server.rs b/src/channels/webhook_server.rs index 2e336790a..f7c8cbb8c 100644 --- a/src/channels/webhook_server.rs +++ b/src/channels/webhook_server.rs @@ -429,7 +429,7 @@ mod tests { ) .await; assert!( - old_result.is_err() || old_result.ok().and_then(|r| r.ok()).is_none(), + matches!(old_result, Err(_) | Ok(Err(_))), "Old address should not respond after server restarts" ); diff --git a/src/db/libsql/workspace.rs b/src/db/libsql/workspace.rs index 2d7fb8e32..a00b72e6c 100644 --- a/src/db/libsql/workspace.rs +++ b/src/db/libsql/workspace.rs @@ -30,6 +30,16 @@ enum VectorSearchOutcome { Indexed(Vec), IndexUnavailable, } + +fn is_missing_vector_index_error(error: &libsql::Error) -> bool { + let error_message = error.to_string().to_ascii_lowercase(); + + error_message.contains("vector_top_k") + || error_message.contains("no such function") + || error_message.contains("idx_memory_chunks_embedding") + || error_message.contains("failed to parse vector index parameters") +} + fn rank_candidates(mut candidates: Vec, limit: usize) -> Vec { // Sort by similarity descending candidates.sort_by(|a, b| { @@ -273,10 +283,16 @@ async fn vector_ranked_results( while let Some(row) = match rows.next().await { Ok(row) => row, Err(e) => { - tracing::debug!( - "Vector index row fetch failed, brute-force fallback required: {e}" - ); - return Ok(VectorSearchOutcome::IndexUnavailable); + if is_missing_vector_index_error(&e) { + tracing::debug!( + "Vector index row fetch failed, brute-force fallback required: {e}" + ); + return Ok(VectorSearchOutcome::IndexUnavailable); + } + + return Err(WorkspaceError::SearchFailed { + reason: format!("Vector index row fetch failed: {e}"), + }); } } { results.push(RankedResult { @@ -295,11 +311,17 @@ async fn vector_ranked_results( Ok(VectorSearchOutcome::Indexed(results)) } Err(e) => { - tracing::debug!( - "Vector index query failed (expected after V9 migration), \ - brute-force fallback required: {e}" - ); - Ok(VectorSearchOutcome::IndexUnavailable) + if is_missing_vector_index_error(&e) { + tracing::debug!( + "Vector index query failed (expected after V9 migration), \ + brute-force fallback required: {e}" + ); + Ok(VectorSearchOutcome::IndexUnavailable) + } else { + Err(WorkspaceError::SearchFailed { + reason: format!("Vector index query failed: {e}"), + }) + } } } } @@ -827,10 +849,10 @@ impl NativeWorkspaceStore for LibSqlBackend { tracing::info!("Using brute-force vector search (no vector index)"); self.brute_force_vector_search(user_id, agent_id, emb, pre_limit as usize) .await - .unwrap_or_else(|e| { + .map_err(|e| { tracing::warn!("Brute-force vector search failed: {e}"); - Vec::new() - }) + e + })? } } } else { @@ -941,14 +963,15 @@ mod tests { agent_id: None, query: "semantic", embedding: Some(&[1.0, 0.0, 0.0]), - config: &SearchConfig::default().vector_only().with_limit(5), + config: &SearchConfig::default().with_limit(5), }) .await .expect("failed to execute hybrid search"); assert_eq!(results.len(), 1); assert_eq!(results[0].document_path, "notes/search.md"); + assert_eq!(results[0].fts_rank, Some(1)); assert_eq!(results[0].vector_rank, Some(1)); - assert!(results[0].fts_rank.is_none()); + assert!(results[0].is_hybrid()); } } diff --git a/src/orchestrator/api/tests/catalogue_fidelity.rs b/src/orchestrator/api/tests/catalogue_fidelity.rs index 8ce1b47c4..c5ae1480c 100644 --- a/src/orchestrator/api/tests/catalogue_fidelity.rs +++ b/src/orchestrator/api/tests/catalogue_fidelity.rs @@ -76,7 +76,9 @@ async fn remote_tool_catalog_version_is_deterministic_and_sensitive_to_content() let registry_c = Arc::new(ToolRegistry::new()); registry_c - .register(build_tool_fixture(ToolFixture::CatalogBeta)) + .register(build_tool_fixture( + ToolFixture::CatalogAlphaWithDifferentPayload, + )) .await; let (_tools_a, _instructions_a, version_a) = hosted_remote_tool_catalog(®istry_a).await; @@ -89,7 +91,7 @@ async fn remote_tool_catalog_version_is_deterministic_and_sensitive_to_content() ); assert_ne!( version_a, version_c, - "different tool sets must produce different catalog versions" + "different catalogue payloads must produce different catalog versions" ); } diff --git a/src/orchestrator/api/tests/fixtures/remote_tool_mocks.rs b/src/orchestrator/api/tests/fixtures/remote_tool_mocks.rs index a7019ee6e..7c4c98517 100644 --- a/src/orchestrator/api/tests/fixtures/remote_tool_mocks.rs +++ b/src/orchestrator/api/tests/fixtures/remote_tool_mocks.rs @@ -113,11 +113,12 @@ impl NativeTool for StubTool { #[derive(Clone, Copy, Debug)] /// Shared hosted-remote-tool fixture presets for catalogue and execute tests. /// -/// `CatalogAlpha`, `CatalogBeta`, and `CatalogWasm` model hosted-safe -/// catalogue entries. `ApprovalGated` models a hosted tool that must never -/// execute without approval. +/// `CatalogAlpha`, `CatalogAlphaWithDifferentPayload`, `CatalogBeta`, and +/// `CatalogWasm` model hosted-safe catalogue entries. `ApprovalGated` models +/// a hosted tool that must never execute without approval. pub(crate) enum ToolFixture { CatalogAlpha, + CatalogAlphaWithDifferentPayload, CatalogBeta, CatalogWasm, ApprovalGated, @@ -139,6 +140,18 @@ pub(crate) fn build_tool_fixture(kind: ToolFixture) -> Arc { "required":["query"] }), )) as Arc, + ToolFixture::CatalogAlphaWithDifferentPayload => Arc::new(StubTool::hosted( + "remote_tool_catalog_fixture", + "Hosted-safe tool for catalog tests with updated payload", + serde_json::json!({ + "type":"object", + "properties":{ + "query":{"type":"string","description":"search query"}, + "limit":{"type":"integer","minimum":1} + }, + "required":["query", "limit"] + }), + )) as Arc, ToolFixture::CatalogBeta => Arc::new(StubTool::hosted( "remote_tool_catalog_fixture_beta", "Second hosted-safe tool for catalog tests", diff --git a/src/test_support.rs b/src/test_support.rs index 86711c46b..37db41365 100644 --- a/src/test_support.rs +++ b/src/test_support.rs @@ -9,7 +9,7 @@ use crate::llm::ToolDefinition; /// Returns the canonical complex parameters JSON schema used for fidelity testing. /// /// This schema exercises nested objects, arrays, enums, constraints, and various -/// JSON Schema features to validate that tool definitions survive serialization, +/// JSON Schema features to validate that tool definitions survive serialisation, /// transport, and reconstruction without data loss. pub fn complex_tool_definition_parameters() -> serde_json::Value { serde_json::json!({ diff --git a/src/workspace/README.md b/src/workspace/README.md index 4db6198bb..0fe1a07c0 100644 --- a/src/workspace/README.md +++ b/src/workspace/README.md @@ -1,17 +1,21 @@ # Workspace & Memory System -Inspired by [OpenClaw](https://github.com/openclaw/openclaw), the workspace provides persistent memory for agents with a flexible filesystem-like structure. +Inspired by [OpenClaw](https://github.com/openclaw/openclaw), the workspace +provides persistent memory for agents with a flexible filesystem-like +structure. ## Key Principles -1. **"Memory is database, not RAM"** - If you want to remember something, write it explicitly +1. **"Memory is database, not RAM"** - If you want to remember something, + write it explicitly 2. **Flexible structure** - Create any directory/file hierarchy you need 3. **Self-documenting** - Use README.md files to describe directory structure -4. **Hybrid search** - Combines FTS (keyword) + vector (semantic) via Reciprocal Rank Fusion +4. **Hybrid search** - Combines FTS (keyword) + vector (semantic) via + Reciprocal Rank Fusion ## Filesystem Structure -``` +```text workspace/ ├── README.md <- Root runbook/index ├── MEMORY.md <- Long-term curated memory @@ -67,24 +71,31 @@ let prompt = workspace.system_prompt().await?; Four tools for LLM use: -- **`memory_search`** - Hybrid search, MUST be called before answering questions about prior work +- **`memory_search`** - Hybrid search, MUST be called before answering + questions about prior work - **`memory_write`** - Write to any path (memory, daily_log, or custom paths) - **`memory_read`** - Read any file by path -- **`memory_tree`** - View workspace structure as a tree (depth parameter, default 1) +- **`memory_tree`** - View workspace structure as a tree (depth parameter, + default 1) ## Hybrid Search (RRF) Combines full-text search and vector similarity using Reciprocal Rank Fusion: -``` +```text score(d) = Σ 1/(k + rank(d)) for each method where d appears ``` -Default k=60. Results from both methods are combined, with documents appearing in both getting boosted scores. +Default k=60. Results from both methods are combined, with documents +appearing in both getting boosted scores. **Backend differences:** -- **PostgreSQL:** `ts_rank_cd` for FTS, pgvector cosine distance for vectors, full RRF -- **libSQL:** FTS5 plus vector search; uses `vector_top_k(...)` when a compatible fixed-dimension index exists, otherwise brute-force cosine similarity in Rust + +- **PostgreSQL:** `ts_rank_cd` for FTS, pgvector cosine distance for vectors, + full RRF +- **libSQL:** FTS5 plus vector search; uses `vector_top_k(...)` when a + compatible fixed-dimension index exists, otherwise brute-force cosine + similarity in Rust ## Heartbeat System @@ -108,6 +119,7 @@ spawn_heartbeat(config, workspace, llm, response_tx); ## Chunking Strategy Documents are chunked for search indexing: + - Default: 800 words per chunk (roughly 800 tokens for English) - 15% overlap between chunks for context preservation - Minimum chunk size: 50 words (tiny trailing chunks merge with previous) From a4f99ccb147f14a990380f46d826d24daa5fa6d5 Mon Sep 17 00:00:00 2001 From: leynos Date: Mon, 13 Apr 2026 21:38:52 +0200 Subject: [PATCH 05/40] Clarify webhook address contract Document the current_addr() lifecycle contract, add a behavioural\ntest that exercises the production start() path with an ephemeral bind,\nand align the restart_with_listener test naming and diagnostics with the\nAPI it actually exercises.\n\nFor libSQL vector fallback handling, narrow the index-unavailable\nclassifier to SQLite-specific libsql error variants before inspecting\nvector-index capability messages so unrelated libsql errors continue to\npropagate. --- src/channels/webhook_server.rs | 178 ++++++++++++++++----------------- src/db/libsql/workspace.rs | 8 +- 2 files changed, 92 insertions(+), 94 deletions(-) diff --git a/src/channels/webhook_server.rs b/src/channels/webhook_server.rs index f7c8cbb8c..12f6a0cdc 100644 --- a/src/channels/webhook_server.rs +++ b/src/channels/webhook_server.rs @@ -58,26 +58,31 @@ impl WebhookServer { self.bind_and_spawn(app).await } - /// Bind using an already-bound [`tokio::net::TcpListener`], merge all route - /// fragments, and spawn the server. The listener's local address is stored - /// in `config.addr` so `current_addr()` stays accurate. + /// Accept a pre-bound listener, merge route fragments, and spawn the + /// server. Eliminates the TOCTOU window that `start()` has between port + /// discovery and the actual bind, making it suitable for tests that + /// allocate ephemeral ports. + #[cfg(test)] pub async fn start_with_listener( &mut self, listener: tokio::net::TcpListener, ) -> Result<(), ChannelError> { - let addr = listener - .local_addr() - .map_err(|e| ChannelError::StartupFailed { - name: "webhook_server".to_string(), - reason: format!("local_addr failed: {e}"), - })?; - self.config.addr = addr; let mut app = Router::new(); for fragment in self.routes.drain(..) { app = app.merge(fragment); } self.merged_router = Some(app.clone()); - self.spawn_on_listener(listener, app).await + + let local_addr = listener + .local_addr() + .map_err(|e| ChannelError::StartupFailed { + name: "webhook_server".to_string(), + reason: format!("Failed to get listener local address: {e}"), + })?; + self.config.addr = local_addr; + + self.spawn_with_listener(listener, app); + Ok(()) } /// Bind a listener to the configured address and spawn the server task. @@ -188,9 +193,17 @@ impl WebhookServer { self.swap_listener(addr, listener, app).await } - /// Shut down the running server and restart it on the already-bound - /// `listener`, inheriting all previously added routes from - /// `self.merged_router`. + /// Gracefully shut down the current listener and rebind using a pre-bound + /// listener. Eliminates the TOCTOU window between port reservation and + /// bind, making it suitable for tests. + /// + /// Unlike [`restart_with_addr`], this test-only helper does not support + /// rollback. [`restart_with_addr`] binds the replacement first and can keep + /// the old listener alive if that bind fails. This method shuts down the + /// old listener before calling [`Self::spawn_with_listener`], so there is + /// no rollback path if spawning were to fail. That trade-off is acceptable + /// in tests because [`Self::spawn_with_listener`] is infallible. + #[cfg(test)] pub async fn restart_with_listener( &mut self, listener: tokio::net::TcpListener, @@ -203,20 +216,31 @@ impl WebhookServer { reason: "restart_with_listener called before start()".to_string(), })?; - // Extract address from the provided listener before mutating self, - // so that old_addr, old_shutdown_tx and old_handle remain intact - // until we know local_addr() succeeds. - let addr = listener + let new_addr = listener .local_addr() .map_err(|e| ChannelError::StartupFailed { name: "webhook_server".to_string(), - reason: format!("local_addr failed: {e}"), + reason: format!("Failed to get listener local address: {e}"), })?; - self.swap_listener(addr, listener, app).await + // Stop the old listener before spawning the new one. Unlike + // restart_with_addr, we do not provide rollback semantics because the + // new listener is already bound and assumed to be valid. + self.shutdown().await; + + self.config.addr = new_addr; + self.spawn_with_listener(listener, app); + Ok(()) } - /// Return the current bind address. + /// Return the server address currently stored in `self.config.addr`. + /// + /// Before the first successful [`Self::start`], [`Self::start_with_listener`], + /// [`Self::restart_with_addr`], or [`Self::restart_with_listener`] call, + /// this is only the configured address and may not correspond to a live + /// bound listener. After a successful start or restart, it reflects the + /// actual bound address, including any OS-assigned port chosen for `:0` + /// binds. pub fn current_addr(&self) -> SocketAddr { self.config.addr } @@ -239,33 +263,6 @@ impl WebhookServer { } } - /// Accept a pre-bound listener, merge route fragments, and spawn the - /// server. Eliminates the TOCTOU window that `start()` has between port - /// discovery and the actual bind, making it suitable for tests that - /// allocate ephemeral ports. - #[cfg(test)] - pub async fn start_with_listener( - &mut self, - listener: tokio::net::TcpListener, - ) -> Result<(), ChannelError> { - let mut app = Router::new(); - for fragment in self.routes.drain(..) { - app = app.merge(fragment); - } - self.merged_router = Some(app.clone()); - - let local_addr = listener - .local_addr() - .map_err(|e| ChannelError::StartupFailed { - name: "webhook_server".to_string(), - reason: format!("Failed to get listener local address: {e}"), - })?; - self.config.addr = local_addr; - - self.spawn_with_listener(listener, app); - Ok(()) - } - /// Spawn the axum server on an already-bound listener. fn spawn_with_listener(&mut self, listener: tokio::net::TcpListener, app: Router) { let addr = self.config.addr; @@ -287,46 +284,6 @@ impl WebhookServer { tracing::info!("Webhook server listening on {}", addr); self.handle = Some(handle); } - - /// Gracefully shut down the current listener and rebind using a pre-bound - /// listener. Eliminates the TOCTOU window between port reservation and - /// bind, making it suitable for tests. - /// - /// Unlike [`restart_with_addr`], this test-only helper does not support - /// rollback. [`restart_with_addr`] binds the replacement first and can keep - /// the old listener alive if that bind fails. This method shuts down the - /// old listener before calling [`Self::spawn_with_listener`], so there is - /// no rollback path if spawning were to fail. That trade-off is acceptable - /// in tests because [`Self::spawn_with_listener`] is infallible. - #[cfg(test)] - pub async fn restart_with_listener( - &mut self, - listener: tokio::net::TcpListener, - ) -> Result<(), ChannelError> { - let app = self - .merged_router - .clone() - .ok_or_else(|| ChannelError::StartupFailed { - name: "webhook_server".to_string(), - reason: "restart_with_listener called before start()".to_string(), - })?; - - let new_addr = listener - .local_addr() - .map_err(|e| ChannelError::StartupFailed { - name: "webhook_server".to_string(), - reason: format!("Failed to get listener local address: {e}"), - })?; - - // Stop the old listener before spawning the new one. Unlike - // restart_with_addr, we do not provide rollback semantics because the - // new listener is already bound and assumed to be valid. - self.shutdown().await; - - self.config.addr = new_addr; - self.spawn_with_listener(listener, app); - Ok(()) - } } #[cfg(test)] mod tests { @@ -375,7 +332,42 @@ mod tests { #[rstest] #[tokio::test] - async fn test_restart_with_addr_rebinds_listener( + async fn test_start_binds_ephemeral_addr_and_serves_health_check() + -> Result<(), Box> { + let mut server = WebhookServer::new(WebhookServerConfig { + addr: "127.0.0.1:0".parse()?, + }); + server.add_routes(Router::new().route( + "/health", + axum::routing::get(|| async { Json(json!({"status": "ok"})) }), + )); + + server.start().await?; + let addr = server.current_addr(); + + assert_ne!( + addr.port(), + 0, + "Server should resolve an ephemeral bind to a concrete port" + ); + + let response = reqwest::Client::new() + .get(format!("http://{}/health", addr)) + .send() + .await?; + assert_eq!( + response.status(), + 200, + "Server should respond to health check after start()" + ); + + server.shutdown().await; + Ok(()) + } + + #[rstest] + #[tokio::test] + async fn test_restart_with_listener( #[future] started_webhook_server: Result< StartedWebhookServer, Box, @@ -387,7 +379,7 @@ mod tests { assert_eq!( server.current_addr(), addr1, - "Server should be bound to initial address" + "Server should be bound to the initial address before restart_with_listener" ); let response = client @@ -397,7 +389,7 @@ mod tests { assert_eq!( response.status(), 200, - "First server should respond to health check" + "Server should respond to health check before restart_with_listener" ); let (listener, addr2) = bind_ephemeral_listener().await?; @@ -406,11 +398,11 @@ mod tests { assert_eq!( server.current_addr(), addr2, - "Server address should be updated after restart" + "Server address should be updated after restart_with_listener" ); assert_ne!( addr1, addr2, - "Address should change after restart_with_addr" + "Address should change after restart_with_listener" ); let response = client diff --git a/src/db/libsql/workspace.rs b/src/db/libsql/workspace.rs index a00b72e6c..fadb7e196 100644 --- a/src/db/libsql/workspace.rs +++ b/src/db/libsql/workspace.rs @@ -32,7 +32,13 @@ enum VectorSearchOutcome { } fn is_missing_vector_index_error(error: &libsql::Error) -> bool { - let error_message = error.to_string().to_ascii_lowercase(); + let sqlite_message = match error { + libsql::Error::SqliteFailure(_, message) + | libsql::Error::RemoteSqliteFailure(_, _, message) => message, + _ => return false, + }; + + let error_message = sqlite_message.to_ascii_lowercase(); error_message.contains("vector_top_k") || error_message.contains("no such function") From 2976267dc008f70e59194789b8ba773765300a41 Mon Sep 17 00:00:00 2001 From: leynos Date: Mon, 13 Apr 2026 21:44:20 +0200 Subject: [PATCH 06/40] Cover production webhook restart path Verify the latest review findings against the current branch and only\napply the remaining live items.\n\nAdd behavioural coverage for the production webhook start/restart_with_addr\npath, clarify the start_with_listener Rustdoc, and update the libSQL\nbackend guidance to match the post-V9 embedding storage and fallback\nbehaviour.\n\nFor libSQL brute-force vector search, skip candidate embeddings whose\ndimension does not match the query embedding and cover that regression\nwith a focused mixed-dimension test. --- src/channels/webhook_server.rs | 69 ++++++++++++++++++++++++++++++++-- src/db/CLAUDE.md | 2 +- src/db/libsql/workspace.rs | 58 ++++++++++++++++++++++++++++ 3 files changed, 125 insertions(+), 4 deletions(-) diff --git a/src/channels/webhook_server.rs b/src/channels/webhook_server.rs index 12f6a0cdc..fbbee2690 100644 --- a/src/channels/webhook_server.rs +++ b/src/channels/webhook_server.rs @@ -59,9 +59,13 @@ impl WebhookServer { } /// Accept a pre-bound listener, merge route fragments, and spawn the - /// server. Eliminates the TOCTOU window that `start()` has between port - /// discovery and the actual bind, making it suitable for tests that - /// allocate ephemeral ports. + /// server. + /// + /// Unlike [`Self::start`], this test-only entrypoint accepts a listener + /// that is already bound, eliminating the TOCTOU window between external + /// test port allocation and the server bind. That makes it suitable for + /// tests that reserve ephemeral ports before handing ownership to the + /// server. #[cfg(test)] pub async fn start_with_listener( &mut self, @@ -365,6 +369,65 @@ mod tests { Ok(()) } + #[rstest] + #[tokio::test] + async fn test_start_and_restart_with_addr_use_production_bind_path() + -> Result<(), Box> { + let mut server = WebhookServer::new(WebhookServerConfig { + addr: "127.0.0.1:0".parse()?, + }); + server.add_routes(Router::new().route( + "/health", + axum::routing::get(|| async { Json(json!({"status": "ok"})) }), + )); + + server.start().await?; + let addr1 = server.current_addr(); + assert_ne!( + addr1.port(), + 0, + "Server should resolve the initial ephemeral bind to a concrete port" + ); + + let client = reqwest::Client::new(); + let response = client + .get(format!("http://{}/health", addr1)) + .send() + .await?; + assert_eq!( + response.status(), + 200, + "Server should respond to health check after start()" + ); + + let restart_addr: SocketAddr = "127.0.0.1:0".parse()?; + server.restart_with_addr(restart_addr).await?; + let addr2 = server.current_addr(); + + assert_ne!( + addr2.port(), + 0, + "Server should resolve the restarted ephemeral bind to a concrete port" + ); + assert_ne!( + addr1, addr2, + "Address should change after restart_with_addr" + ); + + let response = client + .get(format!("http://{}/health", addr2)) + .send() + .await?; + assert_eq!( + response.status(), + 200, + "Server should respond to health check after restart_with_addr" + ); + + server.shutdown().await; + Ok(()) + } + #[rstest] #[tokio::test] async fn test_restart_with_listener( diff --git a/src/db/CLAUDE.md b/src/db/CLAUDE.md index 78a6d055c..2424b7afe 100644 --- a/src/db/CLAUDE.md +++ b/src/db/CLAUDE.md @@ -88,7 +88,7 @@ The `Database` supertrait is composed of seven sub-traits. Leaf consumers can de | Numeric/Decimal | `NUMERIC` | `TEXT` (preserves `rust_decimal` precision) | | Arrays | `TEXT[]` | `TEXT` (JSON-encoded array) | | Booleans | `BOOLEAN` | `INTEGER` (0/1) | -| Vector embeddings | `VECTOR` (any dim, V9 removed fixed 1536) | `F32_BLOB(1536)` via `libsql_vector_idx` | +| Vector embeddings | `VECTOR` (any dim, V9 removed fixed 1536) | `BLOB`; V9 dropped `libsql_vector_idx`, so `vector_top_k(...)` falls back to brute-force cosine similarity in Rust when unavailable | | Full-text search | `tsvector` + `ts_rank_cd` | FTS5 virtual table + sync triggers | | JSON path update | `jsonb_set(col, '{key}', val)` | `json_patch(col, '{"key": val}')` | | PL/pgSQL | Functions | Triggers (no stored procs in SQLite) | diff --git a/src/db/libsql/workspace.rs b/src/db/libsql/workspace.rs index fadb7e196..578d94f26 100644 --- a/src/db/libsql/workspace.rs +++ b/src/db/libsql/workspace.rs @@ -139,6 +139,15 @@ impl LibSqlBackend { if chunk_embedding.is_empty() { continue; } + if chunk_embedding.len() != query_embedding.len() { + tracing::debug!( + "Skipping chunk {} because embedding dimension {} does not match query dimension {}", + chunk_id, + chunk_embedding.len(), + query_embedding.len() + ); + continue; + } // Compute cosine similarity let similarity = cosine_similarity(query_embedding, &chunk_embedding); @@ -980,4 +989,53 @@ mod tests { assert_eq!(results[0].vector_rank, Some(1)); assert!(results[0].is_hybrid()); } + + #[tokio::test] + async fn brute_force_vector_search_skips_mismatched_embedding_dimensions() { + use crate::db::{InsertChunkParams, NativeDatabase, NativeWorkspaceStore}; + + let tempdir = tempfile::tempdir().expect("failed to create tempdir"); + let backend = LibSqlBackend::new_local(&tempdir.path().join("workspace.db")) + .await + .expect("failed to create local libsql backend"); + backend + .run_migrations() + .await + .expect("failed to run libsql migrations"); + + let document = backend + .get_or_create_document_by_path("default", None, "notes/mixed-dim.md") + .await + .expect("failed to create mixed-dimension search document"); + backend + .update_document(document.id, "mixed dimension vector search test") + .await + .expect("failed to update mixed-dimension search document"); + backend + .insert_chunk(InsertChunkParams { + document_id: document.id, + chunk_index: 0, + content: "same-dimension chunk", + embedding: Some(&[1.0, 0.0, 0.0]), + }) + .await + .expect("failed to insert same-dimension chunk"); + backend + .insert_chunk(InsertChunkParams { + document_id: document.id, + chunk_index: 1, + content: "different-dimension chunk", + embedding: Some(&[1.0, 0.0]), + }) + .await + .expect("failed to insert different-dimension chunk"); + + let results = backend + .brute_force_vector_search("default", None, &[1.0, 0.0, 0.0], 10) + .await + .expect("failed to run brute-force vector search"); + + assert_eq!(results.len(), 1); + assert_eq!(results[0].content, "same-dimension chunk"); + } } From 97e2e85cc965441cb9d793b1ccd33ba9a518229c Mon Sep 17 00:00:00 2001 From: leynos Date: Mon, 13 Apr 2026 23:22:05 +0200 Subject: [PATCH 07/40] Fix libsql in-memory workspace tests Verify the latest review findings against the current tree and only\napply the changes that were still live.\n\nReplace cfg(test)-only rustdoc links in the webhook server docs,\naggregate mismatched embedding-dimension logging, and make the\nhybrid-search regression test prove the vector-index-unavailable path\nbefore asserting fused ranks.\n\nSwitch the affected libSQL workspace tests to new_memory() and update\nthat constructor to use an ephemeral temp-backed database so schema and\ndata remain visible across fresh connections during tests. Update the\ndatabase guidance to match the current memory_chunks fallback behaviour. --- src/channels/webhook_server.rs | 4 ++-- src/db/CLAUDE.md | 4 +++- src/db/libsql/mod.rs | 33 +++++++++++++++++++++++----- src/db/libsql/workspace.rs | 39 +++++++++++++++++++++++----------- 4 files changed, 60 insertions(+), 20 deletions(-) diff --git a/src/channels/webhook_server.rs b/src/channels/webhook_server.rs index fbbee2690..8ad407f1c 100644 --- a/src/channels/webhook_server.rs +++ b/src/channels/webhook_server.rs @@ -239,8 +239,8 @@ impl WebhookServer { /// Return the server address currently stored in `self.config.addr`. /// - /// Before the first successful [`Self::start`], [`Self::start_with_listener`], - /// [`Self::restart_with_addr`], or [`Self::restart_with_listener`] call, + /// Before the first successful [`Self::start`], `start_with_listener`, + /// [`Self::restart_with_addr`], or `restart_with_listener` call, /// this is only the configured address and may not correspond to a live /// bound listener. After a successful start or restart, it reflects the /// actual bound address, including any OS-assigned port chosen for `:0` diff --git a/src/db/CLAUDE.md b/src/db/CLAUDE.md index 2424b7afe..bb102fb55 100644 --- a/src/db/CLAUDE.md +++ b/src/db/CLAUDE.md @@ -122,7 +122,9 @@ The `Database` supertrait is composed of seven sub-traits. Leaf consumers can de **Workspace/Memory:** - `memory_documents` — flexible path-based files -- `memory_chunks` — chunked content with FTS + vector indexes +- `memory_chunks` — chunked content with FTS; embeddings are stored as plain + BLOBs after V9, so `vector_top_k(...)` may fall back to brute-force cosine + similarity when the fixed-dimension vector index is unavailable - `memory_chunks_fts` — FTS5 virtual table (libSQL) / `tsvector` column (PostgreSQL) - `heartbeat_state` — periodic execution tracking diff --git a/src/db/libsql/mod.rs b/src/db/libsql/mod.rs index fd1446dda..53c93aa50 100644 --- a/src/db/libsql/mod.rs +++ b/src/db/libsql/mod.rs @@ -15,7 +15,7 @@ mod sandbox; mod settings; mod tool_failures; mod workspace; - +use std::path::PathBuf; use std::path::Path; use std::sync::Arc; @@ -30,6 +30,7 @@ pub(crate) use helpers::{ get_ts, opt_text, opt_text_owned, parse_job_state, }; pub(crate) use row_conversion::row_to_memory_document; +use uuid::Uuid; /// libSQL/Turso database backend. /// @@ -38,6 +39,7 @@ pub(crate) use row_conversion::row_to_memory_document; /// create their own connections per-operation. pub struct LibSqlBackend { db: Arc, + temp_path: Option, } impl LibSqlBackend { @@ -59,19 +61,27 @@ impl LibSqlBackend { .build() .await .map_err(|e| DatabaseError::Pool(format!("Failed to open libSQL database: {e}")))?; - Ok(Self { db: Arc::new(db) }) + Ok(Self { + db: Arc::new(db), + temp_path: None, + }) } /// Create a new in-memory database (for testing). pub async fn new_memory() -> Result { - let db = libsql::Builder::new_local(":memory:") + let temp_path = + std::env::temp_dir().join(format!("axinite-libsql-memory-{}.db", Uuid::new_v4())); + let db = libsql::Builder::new_local(&temp_path) .build() .await .map_err(|e| { DatabaseError::Pool(format!("Failed to create in-memory database: {}", e)) })?; - Ok(Self { db: Arc::new(db) }) + Ok(Self { + db: Arc::new(db), + temp_path: Some(temp_path), + }) } /// Create with Turso cloud sync (embedded replica). @@ -85,7 +95,10 @@ impl LibSqlBackend { .build() .await .map_err(|e| DatabaseError::Pool(format!("Failed to open remote replica: {e}")))?; - Ok(Self { db: Arc::new(db) }) + Ok(Self { + db: Arc::new(db), + temp_path: None, + }) } /// Get a shared reference to the underlying database handle. @@ -135,6 +148,15 @@ impl LibSqlBackend { } } +impl Drop for LibSqlBackend { + fn drop(&mut self) { + if let Some(path) = &self.temp_path { + let _ = std::fs::remove_file(path); + let _ = std::fs::remove_file(path.with_extension("db-wal")); + let _ = std::fs::remove_file(path.with_extension("db-shm")); + } + } +} impl NativeDatabase for LibSqlBackend { async fn persist_terminal_result_and_status( &self, @@ -355,6 +377,7 @@ mod tests { for _ in 0..10 { let b = LibSqlBackend { db: backend.shared_db(), + temp_path: None, }; handles.push(tokio::spawn(async move { b.connect().await })); } diff --git a/src/db/libsql/workspace.rs b/src/db/libsql/workspace.rs index 578d94f26..b91ffa564 100644 --- a/src/db/libsql/workspace.rs +++ b/src/db/libsql/workspace.rs @@ -106,6 +106,7 @@ impl LibSqlBackend { query_embedding: &[f32], ) -> Result, WorkspaceError> { let mut candidates = Vec::new(); + let mut skipped_mismatched_dims = 0usize; while let Some(row) = rows .next() .await @@ -140,12 +141,7 @@ impl LibSqlBackend { continue; } if chunk_embedding.len() != query_embedding.len() { - tracing::debug!( - "Skipping chunk {} because embedding dimension {} does not match query dimension {}", - chunk_id, - chunk_embedding.len(), - query_embedding.len() - ); + skipped_mismatched_dims += 1; continue; } @@ -160,6 +156,15 @@ impl LibSqlBackend { similarity, }); } + + if skipped_mismatched_dims > 0 { + tracing::debug!( + "Brute-force vector search skipped {} candidates with embedding dimension mismatches (query dimension: {})", + skipped_mismatched_dims, + query_embedding.len() + ); + } + Ok(candidates) } @@ -945,10 +950,9 @@ mod tests { use crate::db::{InsertChunkParams, NativeDatabase, NativeWorkspaceStore}; use crate::workspace::SearchConfig; - let tempdir = tempfile::tempdir().expect("failed to create tempdir"); - let backend = LibSqlBackend::new_local(&tempdir.path().join("workspace.db")) + let backend = LibSqlBackend::new_memory() .await - .expect("failed to create local libsql backend"); + .expect("failed to create in-memory libsql backend"); backend .run_migrations() .await @@ -972,6 +976,18 @@ mod tests { .await .expect("failed to insert search test chunk"); + let conn = backend + .connect() + .await + .expect("failed to open libsql connection for vector precondition"); + let vector_outcome = vector_ranked_results(&conn, "default", None, &[1.0, 0.0, 0.0], 5) + .await + .expect("failed to run vector search precondition"); + assert!( + matches!(vector_outcome, VectorSearchOutcome::IndexUnavailable), + "Test requires the vector-index-unavailable path before hybrid fallback assertions" + ); + let results = backend .hybrid_search(HybridSearchParams { user_id: "default", @@ -994,10 +1010,9 @@ mod tests { async fn brute_force_vector_search_skips_mismatched_embedding_dimensions() { use crate::db::{InsertChunkParams, NativeDatabase, NativeWorkspaceStore}; - let tempdir = tempfile::tempdir().expect("failed to create tempdir"); - let backend = LibSqlBackend::new_local(&tempdir.path().join("workspace.db")) + let backend = LibSqlBackend::new_memory() .await - .expect("failed to create local libsql backend"); + .expect("failed to create in-memory libsql backend"); backend .run_migrations() .await From 51bf6b9c6b068551a4b25b0d43e9ea0116c0b028 Mon Sep 17 00:00:00 2001 From: leynos Date: Tue, 14 Apr 2026 14:47:32 +0200 Subject: [PATCH 08/40] Extract libsql vector search helpers Refactor vector_ranked_results with two local helpers so the\nfunction stays below the requested size limit without changing its\nbehaviour, error mapping, or tracing output.\n\nMove embedding JSON serialisation into embedding_to_vector_json()\nand the indexed-row collection path into collect_vector_index_rows().\nThis keeps the vector query control flow intact while making the main\nfunction easier to scan and test. --- src/db/libsql/workspace.rs | 89 +++++++++++++++++++++----------------- 1 file changed, 49 insertions(+), 40 deletions(-) diff --git a/src/db/libsql/workspace.rs b/src/db/libsql/workspace.rs index b91ffa564..eb1edf60b 100644 --- a/src/db/libsql/workspace.rs +++ b/src/db/libsql/workspace.rs @@ -260,6 +260,53 @@ async fn fts_ranked_results( Ok(results) } +fn embedding_to_vector_json(embedding: &[f32]) -> String { + format!( + "[{}]", + embedding + .iter() + .map(|f| f.to_string()) + .collect::>() + .join(",") + ) +} + +async fn collect_vector_index_rows( + mut rows: libsql::Rows, + limit: i64, +) -> Result { + let mut results = Vec::new(); + while let Some(row) = match rows.next().await { + Ok(row) => row, + Err(e) => { + if is_missing_vector_index_error(&e) { + tracing::debug!( + "Vector index row fetch failed, brute-force fallback required: {e}" + ); + return Ok(VectorSearchOutcome::IndexUnavailable); + } + + return Err(WorkspaceError::SearchFailed { + reason: format!("Vector index row fetch failed: {e}"), + }); + } + } { + results.push(RankedResult { + chunk_id: get_text(&row, 0).parse().unwrap_or_default(), + document_id: get_text(&row, 1).parse().unwrap_or_default(), + document_path: get_text(&row, 2), + content: get_text(&row, 3), + rank: results.len() as u32 + 1, + }); + } + tracing::debug!( + "libSQL vector index search returned {} results (pre-fusion limit: {})", + results.len(), + limit + ); + Ok(VectorSearchOutcome::Indexed(results)) +} + /// Execute vector similarity search via libSQL's vector index. /// /// Returns [`VectorSearchOutcome::IndexUnavailable`] when `vector_top_k(...)` @@ -272,14 +319,7 @@ async fn vector_ranked_results( embedding: &[f32], limit: i64, ) -> Result { - let vector_json = format!( - "[{}]", - embedding - .iter() - .map(|f| f.to_string()) - .collect::>() - .join(",") - ); + let vector_json = embedding_to_vector_json(embedding); // vector_top_k requires a libsql_vector_idx index. After the V9 // migration the index is dropped (to support flexible embedding @@ -298,38 +338,7 @@ async fn vector_ranked_results( ) .await { - Ok(mut rows) => { - let mut results = Vec::new(); - while let Some(row) = match rows.next().await { - Ok(row) => row, - Err(e) => { - if is_missing_vector_index_error(&e) { - tracing::debug!( - "Vector index row fetch failed, brute-force fallback required: {e}" - ); - return Ok(VectorSearchOutcome::IndexUnavailable); - } - - return Err(WorkspaceError::SearchFailed { - reason: format!("Vector index row fetch failed: {e}"), - }); - } - } { - results.push(RankedResult { - chunk_id: get_text(&row, 0).parse().unwrap_or_default(), - document_id: get_text(&row, 1).parse().unwrap_or_default(), - document_path: get_text(&row, 2), - content: get_text(&row, 3), - rank: results.len() as u32 + 1, - }); - } - tracing::debug!( - "libSQL vector index search returned {} results (pre-fusion limit: {})", - results.len(), - limit - ); - Ok(VectorSearchOutcome::Indexed(results)) - } + Ok(rows) => collect_vector_index_rows(rows, limit).await, Err(e) => { if is_missing_vector_index_error(&e) { tracing::debug!( From 090aa6fb1afa3dab7652696d9ac20bb506f95669 Mon Sep 17 00:00:00 2001 From: leynos Date: Tue, 14 Apr 2026 14:51:41 +0200 Subject: [PATCH 09/40] Extract libsql backend constructor helpers Remove the duplicated parent-directory guard and constructor tail\nfrom the local and remote-replica libSQL constructors.\n\nAdd ensure_parent_dir() for the shared filesystem setup and from_db()\nfor the common non-temp database wrapper, while leaving new_memory()\nunchanged because it owns a temp-file path for cleanup. --- src/db/libsql/mod.rs | 49 +++++++++++++++++++++++++++----------------- 1 file changed, 30 insertions(+), 19 deletions(-) diff --git a/src/db/libsql/mod.rs b/src/db/libsql/mod.rs index 53c93aa50..66033b255 100644 --- a/src/db/libsql/mod.rs +++ b/src/db/libsql/mod.rs @@ -15,14 +15,13 @@ mod sandbox; mod settings; mod tool_failures; mod workspace; -use std::path::PathBuf; use std::path::Path; +use std::path::PathBuf; use std::sync::Arc; use crate::db::NativeDatabase; use crate::error::DatabaseError; use libsql::{Connection, Database as LibSqlDatabase}; -use tokio::fs; use crate::db::libsql_migrations; pub(crate) use helpers::{ @@ -43,28 +42,33 @@ pub struct LibSqlBackend { } impl LibSqlBackend { - /// Ensure the parent directory of `path` exists, creating it and all - /// ancestors if necessary. - async fn ensure_parent_dir(path: &Path) -> Result<(), DatabaseError> { + fn ensure_parent_dir(path: &Path) -> Result<(), DatabaseError> { if let Some(parent) = path.parent() { - fs::create_dir_all(parent).await.map_err(|e| { - DatabaseError::Pool(format!("Failed to create database directory: {e}")) + std::fs::create_dir_all(parent).map_err(|e| { + DatabaseError::Pool(format!("Failed to create database directory: {}", e)) })?; } Ok(()) } + /// Wraps a built `libsql::Database` in `Self` with no temp-file path. + fn from_db(db: libsql::Database) -> Self { + Self { + db: Arc::new(db), + temp_path: None, + } + } + /// Create a new local embedded database. pub async fn new_local(path: &Path) -> Result { - Self::ensure_parent_dir(path).await?; + Self::ensure_parent_dir(path)?; + let db = libsql::Builder::new_local(path) .build() .await - .map_err(|e| DatabaseError::Pool(format!("Failed to open libSQL database: {e}")))?; - Ok(Self { - db: Arc::new(db), - temp_path: None, - }) + .map_err(|e| DatabaseError::Pool(format!("Failed to open libSQL database: {}", e)))?; + + Ok(Self::from_db(db)) } /// Create a new in-memory database (for testing). @@ -90,15 +94,14 @@ impl LibSqlBackend { url: &str, auth_token: &str, ) -> Result { - Self::ensure_parent_dir(path).await?; + Self::ensure_parent_dir(path)?; + let db = libsql::Builder::new_remote_replica(path, url.to_string(), auth_token.to_string()) .build() .await - .map_err(|e| DatabaseError::Pool(format!("Failed to open remote replica: {e}")))?; - Ok(Self { - db: Arc::new(db), - temp_path: None, - }) + .map_err(|e| DatabaseError::Pool(format!("Failed to open remote replica: {}", e)))?; + + Ok(Self::from_db(db)) } /// Get a shared reference to the underlying database handle. @@ -146,6 +149,14 @@ impl LibSqlBackend { last_err.map(|e| e.to_string()).unwrap_or_default() ))) } + + /// Wraps a built `libsql::Database` in `Self` with no temp-file path. + fn from_db(db: libsql::Database) -> Self { + Self { + db: Arc::new(db), + temp_path: None, + } + } } impl Drop for LibSqlBackend { From a44907a99335a4daa07c07d02ab9c849443e3221 Mon Sep 17 00:00:00 2001 From: leynos Date: Tue, 14 Apr 2026 16:17:08 +0200 Subject: [PATCH 10/40] Fix database integrations wording Correct the hybrid-search bullet in docs/database-integrations.md\nso the sentence reads grammatically and no longer repeats\n"matters". --- docs/database-integrations.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/database-integrations.md b/docs/database-integrations.md index 7236fd3a7..4f5acf194 100644 --- a/docs/database-integrations.md +++ b/docs/database-integrations.md @@ -342,8 +342,8 @@ Table 4. Current backend comparison. Choose PostgreSQL when: -- full hybrid workspace search quality matters and search latency under larger - workspaces matters too +- full hybrid workspace search quality and search latency under larger + workspaces matter - the deployment already has PostgreSQL 15+ with pgvector available - query behaviour should match the default and most-tested path as closely as possible From 5d74e76474a2bbe843e570f9d2b8aab9c9399cb5 Mon Sep 17 00:00:00 2001 From: leynos Date: Tue, 14 Apr 2026 16:30:15 +0200 Subject: [PATCH 11/40] Fix webhook bind state and libsql cleanup ownership Verify the latest review findings against the current branch and only\napply the ones that were still live.\n\nKeep the configured webhook bind address separate from the resolved\nlistener address so ephemeral :0 binds no longer rewrite config state,\nupdate reload matching to treat port 0 as a same-host wildcard, and\ntighten the restart test to prove the old listener is gone.\n\nMove libSQL temp-file cleanup onto the shared database handle so the\nfile is deleted when the final Arc owner drops, then update the shared\nhandle consumers and developer guidance to match the temp-file-backed\nnew_memory() behaviour. --- src/channels/wasm/storage.rs | 4 +- src/channels/webhook_server.rs | 271 ++++++++++------------ src/db/CLAUDE.md | 13 +- src/db/libsql/mod.rs | 79 ++++--- src/db/mod.rs | 2 +- src/reload/manager.rs | 12 +- src/reload/manager/tests/restart_tests.rs | 32 +++ src/secrets/store.rs | 4 +- src/tools/wasm/storage.rs | 4 +- 9 files changed, 233 insertions(+), 188 deletions(-) diff --git a/src/channels/wasm/storage.rs b/src/channels/wasm/storage.rs index 3f3798642..9ad51a26d 100644 --- a/src/channels/wasm/storage.rs +++ b/src/channels/wasm/storage.rs @@ -351,12 +351,12 @@ fn pg_row_to_channel( /// matching the connection-per-request pattern used by the main `LibSqlBackend`. #[cfg(feature = "libsql")] pub struct LibSqlWasmChannelStore { - db: std::sync::Arc, + db: std::sync::Arc, } #[cfg(feature = "libsql")] impl LibSqlWasmChannelStore { - pub fn new(db: std::sync::Arc) -> Self { + pub fn new(db: std::sync::Arc) -> Self { Self { db } } diff --git a/src/channels/webhook_server.rs b/src/channels/webhook_server.rs index 8ad407f1c..3d9f08293 100644 --- a/src/channels/webhook_server.rs +++ b/src/channels/webhook_server.rs @@ -23,6 +23,7 @@ pub struct WebhookServerConfig { /// `start()` call binds the listener and spawns the server task. pub struct WebhookServer { config: WebhookServerConfig, + resolved_addr: Option, routes: Vec, /// Merged router saved after start() for restart_with_addr(). merged_router: Option, @@ -35,6 +36,7 @@ impl WebhookServer { pub fn new(config: WebhookServerConfig) -> Self { Self { config, + resolved_addr: None, routes: Vec::new(), merged_router: None, shutdown_tx: None, @@ -58,37 +60,6 @@ impl WebhookServer { self.bind_and_spawn(app).await } - /// Accept a pre-bound listener, merge route fragments, and spawn the - /// server. - /// - /// Unlike [`Self::start`], this test-only entrypoint accepts a listener - /// that is already bound, eliminating the TOCTOU window between external - /// test port allocation and the server bind. That makes it suitable for - /// tests that reserve ephemeral ports before handing ownership to the - /// server. - #[cfg(test)] - pub async fn start_with_listener( - &mut self, - listener: tokio::net::TcpListener, - ) -> Result<(), ChannelError> { - let mut app = Router::new(); - for fragment in self.routes.drain(..) { - app = app.merge(fragment); - } - self.merged_router = Some(app.clone()); - - let local_addr = listener - .local_addr() - .map_err(|e| ChannelError::StartupFailed { - name: "webhook_server".to_string(), - reason: format!("Failed to get listener local address: {e}"), - })?; - self.config.addr = local_addr; - - self.spawn_with_listener(listener, app); - Ok(()) - } - /// Bind a listener to the configured address and spawn the server task. /// Private helper used by both start() and restart_with_addr(). async fn bind_and_spawn(&mut self, app: Router) -> Result<(), ChannelError> { @@ -96,59 +67,47 @@ impl WebhookServer { .await .map_err(|e| ChannelError::StartupFailed { name: "webhook_server".to_string(), - reason: format!("Failed to bind to {}: {e}", self.config.addr), + reason: format!("Failed to bind to {}: {}", self.config.addr, e), })?; - let addr = listener + + let resolved_addr = listener .local_addr() .map_err(|e| ChannelError::StartupFailed { name: "webhook_server".to_string(), - reason: format!("local_addr failed: {e}"), + reason: format!("Failed to get listener local address: {e}"), })?; - self.config.addr = addr; - self.spawn_on_listener(listener, app).await - } + self.resolved_addr = Some(resolved_addr); - /// Spawn the server on an already-bound listener. - /// Private helper that contains the common shutdown-channel and task-spawn logic. - async fn spawn_on_listener( - &mut self, - listener: tokio::net::TcpListener, - app: Router, - ) -> Result<(), ChannelError> { - tracing::info!("Webhook server listening on {}", self.config.addr); - let (shutdown_tx, shutdown_rx) = oneshot::channel(); - self.shutdown_tx = Some(shutdown_tx); - let handle = tokio::spawn(async move { - if let Err(e) = axum::serve(listener, app) - .with_graceful_shutdown(async { - let _ = shutdown_rx.await; - tracing::debug!("Webhook server shutting down"); - }) - .await - { - tracing::error!("Webhook server error: {e}"); - } - }); - self.handle = Some(handle); + self.spawn_with_listener(listener, app, resolved_addr); Ok(()) } - /// Shared restart kernel. Saves current listener state, spawns the server on - /// `listener` bound at `new_addr`, shuts down the old server on success, or - /// restores the previous state on failure. - async fn swap_listener( - &mut self, - new_addr: SocketAddr, - listener: tokio::net::TcpListener, - app: Router, - ) -> Result<(), ChannelError> { + /// Gracefully shut down the current listener and rebind to a new address. + /// The merged router from the original `start()` call is reused. + /// + /// If binding to the new address fails, the old listener remains active and + /// state is restored. This prevents a denial-of-service if the new address + /// is invalid or already in use. + pub async fn restart_with_addr(&mut self, new_addr: SocketAddr) -> Result<(), ChannelError> { + let app = self + .merged_router + .clone() + .ok_or_else(|| ChannelError::StartupFailed { + name: "webhook_server".to_string(), + reason: "restart_with_addr called before start()".to_string(), + })?; + + // Save old state for rollback if new bind fails let old_addr = self.config.addr; + let old_resolved_addr = self.resolved_addr; let old_shutdown_tx = self.shutdown_tx.take(); let old_handle = self.handle.take(); + // Update config to new address and try to bind self.config.addr = new_addr; - match self.spawn_on_listener(listener, app).await { + match self.bind_and_spawn(app).await { Ok(()) => { + // New listener is running, gracefully shut down the old one if let Some(tx) = old_shutdown_tx { let _ = tx.send(()); } @@ -158,7 +117,9 @@ impl WebhookServer { Ok(()) } Err(e) => { + // Restore old state; old listener remains active self.config.addr = old_addr; + self.resolved_addr = old_resolved_addr; self.shutdown_tx = old_shutdown_tx; self.handle = old_handle; Err(e) @@ -166,35 +127,91 @@ impl WebhookServer { } } - /// Gracefully shut down the current listener and rebind to a new address. - /// The merged router from the original `start()` call is reused. + /// Return the current listener address. /// - /// If binding to the new address fails, the old listener remains active and - /// state is restored. This prevents a denial-of-service if the new address - /// is invalid or already in use. - pub async fn restart_with_addr(&mut self, new_addr: SocketAddr) -> Result<(), ChannelError> { - let app = self - .merged_router - .clone() - .ok_or_else(|| ChannelError::StartupFailed { - name: "webhook_server".to_string(), - reason: "restart_with_addr called before start()".to_string(), - })?; + /// Before the first successful [`Self::start`], start_with_listener, + /// [`Self::restart_with_addr`], or restart_with_listener call, this + /// returns the configured bind address from `self.config.addr` and it may + /// not correspond to a live listener. After a successful start or restart, + /// it returns the resolved listener address, including any OS-assigned port + /// chosen for `:0` binds, while leaving `self.config.addr` unchanged. + pub fn current_addr(&self) -> SocketAddr { + self.resolved_addr.unwrap_or(self.config.addr) + } - let listener = tokio::net::TcpListener::bind(new_addr).await.map_err(|e| { - ChannelError::StartupFailed { - name: "webhook_server".to_string(), - reason: format!("Failed to bind to {new_addr}: {e}"), - } - })?; - let addr = listener + /// Returns whether the server currently has a running listener task. + pub fn is_running(&self) -> bool { + self.handle + .as_ref() + .map(|handle| !handle.is_finished()) + .unwrap_or(false) + } + + /// Signal graceful shutdown and wait for the server task to finish. + pub async fn shutdown(&mut self) { + if let Some(tx) = self.shutdown_tx.take() { + let _ = tx.send(()); + } + if let Some(handle) = self.handle.take() { + let _ = handle.await; + } + } + + /// Accept a pre-bound listener, merge route fragments, and spawn the + /// server. + /// + /// Unlike [`Self::start`], this test-only entrypoint accepts a listener + /// that is already bound, eliminating the TOCTOU window between external + /// test port allocation and the server bind. That makes it suitable for + /// tests that reserve ephemeral ports before handing ownership to the + /// server. + #[cfg(test)] + pub async fn start_with_listener( + &mut self, + listener: tokio::net::TcpListener, + ) -> Result<(), ChannelError> { + let mut app = Router::new(); + for fragment in self.routes.drain(..) { + app = app.merge(fragment); + } + self.merged_router = Some(app.clone()); + + let local_addr = listener .local_addr() .map_err(|e| ChannelError::StartupFailed { name: "webhook_server".to_string(), - reason: format!("local_addr failed: {e}"), + reason: format!("Failed to get listener local address: {e}"), })?; + self.resolved_addr = Some(local_addr); + + self.spawn_with_listener(listener, app, local_addr); + Ok(()) + } - self.swap_listener(addr, listener, app).await + /// Spawn the axum server on an already-bound listener. + fn spawn_with_listener( + &mut self, + listener: tokio::net::TcpListener, + app: Router, + addr: SocketAddr, + ) { + let (shutdown_tx, shutdown_rx) = oneshot::channel(); + self.shutdown_tx = Some(shutdown_tx); + + let handle = tokio::spawn(async move { + if let Err(e) = axum::serve(listener, app) + .with_graceful_shutdown(async { + let _ = shutdown_rx.await; + tracing::debug!("Webhook server shutting down"); + }) + .await + { + tracing::error!("Webhook server error: {}", e); + } + }); + + tracing::info!("Webhook server listening on {}", addr); + self.handle = Some(handle); } /// Gracefully shut down the current listener and rebind using a pre-bound @@ -232,63 +249,12 @@ impl WebhookServer { // new listener is already bound and assumed to be valid. self.shutdown().await; - self.config.addr = new_addr; - self.spawn_with_listener(listener, app); + self.resolved_addr = Some(new_addr); + self.spawn_with_listener(listener, app, new_addr); Ok(()) } - - /// Return the server address currently stored in `self.config.addr`. - /// - /// Before the first successful [`Self::start`], `start_with_listener`, - /// [`Self::restart_with_addr`], or `restart_with_listener` call, - /// this is only the configured address and may not correspond to a live - /// bound listener. After a successful start or restart, it reflects the - /// actual bound address, including any OS-assigned port chosen for `:0` - /// binds. - pub fn current_addr(&self) -> SocketAddr { - self.config.addr - } - - /// Returns whether the server currently has a running listener task. - pub fn is_running(&self) -> bool { - self.handle - .as_ref() - .map(|handle| !handle.is_finished()) - .unwrap_or(false) - } - - /// Signal graceful shutdown and wait for the server task to finish. - pub async fn shutdown(&mut self) { - if let Some(tx) = self.shutdown_tx.take() { - let _ = tx.send(()); - } - if let Some(handle) = self.handle.take() { - let _ = handle.await; - } - } - - /// Spawn the axum server on an already-bound listener. - fn spawn_with_listener(&mut self, listener: tokio::net::TcpListener, app: Router) { - let addr = self.config.addr; - let (shutdown_tx, shutdown_rx) = oneshot::channel(); - self.shutdown_tx = Some(shutdown_tx); - - let handle = tokio::spawn(async move { - if let Err(e) = axum::serve(listener, app) - .with_graceful_shutdown(async { - let _ = shutdown_rx.await; - tracing::debug!("Webhook server shutting down"); - }) - .await - { - tracing::error!("Webhook server error: {}", e); - } - }); - - tracing::info!("Webhook server listening on {}", addr); - self.handle = Some(handle); - } } + #[cfg(test)] mod tests { use std::net::TcpListener as StdTcpListener; @@ -414,6 +380,21 @@ mod tests { "Address should change after restart_with_addr" ); + let old_result = tokio::time::timeout( + std::time::Duration::from_millis(200), + client.get(format!("http://{}/health", addr1)).send(), + ) + .await; + assert!( + old_result.is_err() + || old_result.as_ref().is_ok_and(|request_result| { + request_result.as_ref().map_or(true, |response| { + response.status() != reqwest::StatusCode::OK + }) + }), + "Old address should not respond after restart_with_addr" + ); + let response = client .get(format!("http://{}/health", addr2)) .send() diff --git a/src/db/CLAUDE.md b/src/db/CLAUDE.md index bb102fb55..c32e5b6f8 100644 --- a/src/db/CLAUDE.md +++ b/src/db/CLAUDE.md @@ -161,13 +161,17 @@ DATABASE_BACKEND=libsql LIBSQL_PATH=~/.ironclaw/test.db cargo run # Use Turso cloud (embedded replica syncs local file to cloud) DATABASE_BACKEND=libsql LIBSQL_URL=libsql://xxx.turso.io LIBSQL_AUTH_TOKEN=xxx cargo run -# In-memory (tests only — data is lost when the process exits) +# Temp-file-backed test database (data is lost when the last shared handle drops) # Use LibSqlBackend::new_memory() directly in test code ``` ## Testing the libSQL Backend -Use `LibSqlBackend::new_memory()` in unit tests — no files, no cleanup required: +Use `LibSqlBackend::new_memory()` in unit tests when fresh connections need to +share state. Despite the name, it creates a temp-file-backed database and +stores the `temp_path` on the shared database handle so the file persists until +the final `Arc` clone is dropped. Tests do not need manual cleanup, but they +should assume a temp file exists for the lifetime of the shared handle: ```rust #[tokio::test] @@ -178,7 +182,10 @@ async fn test_my_feature() { } ``` -For concurrency tests that require multiple connections sharing state, use `LibSqlBackend::new_local(&tmp_path)` with a `tempfile::tempdir()`. In-memory databases do not share state between connections. +`LibSqlBackend::new_memory()` is appropriate for multi-connection tests because +all fresh connections share the same temp-file-backed state. Use +`LibSqlBackend::new_local(&tmp_path)` with a `tempfile::tempdir()` when a test +needs an explicit on-disk path it can inspect or control directly. ## Sharing the libSQL Database Handle diff --git a/src/db/libsql/mod.rs b/src/db/libsql/mod.rs index 66033b255..d4940e58c 100644 --- a/src/db/libsql/mod.rs +++ b/src/db/libsql/mod.rs @@ -4,7 +4,7 @@ //! Supports three modes: //! - Local embedded (file-based, no server needed) //! - Turso cloud with embedded replica (sync to cloud) -//! - In-memory (for testing) +//! - Temp-file-backed test mode (for testing) mod conversations; pub(crate) mod helpers; @@ -15,30 +15,27 @@ mod sandbox; mod settings; mod tool_failures; mod workspace; -use std::path::Path; -use std::path::PathBuf; -use std::sync::Arc; -use crate::db::NativeDatabase; -use crate::error::DatabaseError; -use libsql::{Connection, Database as LibSqlDatabase}; - -use crate::db::libsql_migrations; +/// libSQL/Turso database backend. +/// +/// Stores the `Database` handle in an `Arc` so that the same underlying +/// database can be shared with stores (SecretsStore, WasmToolStore) that +/// create their own connections per-operation. +/// +/// When constructed through [`LibSqlBackend::new_memory`], the optional +/// `temp_path` keeps test-database cleanup tied to the final shared owner. +pub struct LibSqlDatabase { + db: RawLibSqlDatabase, + temp_path: Option, +} pub(crate) use helpers::{ fmt_opt_ts, fmt_ts, get_i64, get_json, get_opt_bool, get_opt_text, get_opt_ts, get_text, get_ts, opt_text, opt_text_owned, parse_job_state, }; pub(crate) use row_conversion::row_to_memory_document; -use uuid::Uuid; -/// libSQL/Turso database backend. -/// -/// Stores the `Database` handle in an `Arc` so that the same underlying -/// database can be shared with stores (SecretsStore, WasmToolStore) that -/// create their own connections per-operation. pub struct LibSqlBackend { db: Arc, - temp_path: Option, } impl LibSqlBackend { @@ -52,10 +49,9 @@ impl LibSqlBackend { } /// Wraps a built `libsql::Database` in `Self` with no temp-file path. - fn from_db(db: libsql::Database) -> Self { + fn from_db(db: RawLibSqlDatabase) -> Self { Self { - db: Arc::new(db), - temp_path: None, + db: Arc::new(LibSqlDatabase::new(db, None)), } } @@ -71,7 +67,13 @@ impl LibSqlBackend { Ok(Self::from_db(db)) } - /// Create a new in-memory database (for testing). + /// Create a temporary file-backed database for tests. + /// + /// Despite the name, this does not use SQLite's `:memory:` mode. It + /// creates a temp file whose path is retained for the lifetime of the + /// returned database so multiple fresh connections share the same state. + /// The temp file and its sidecar files are cleaned up when the final + /// shared database handle is dropped. pub async fn new_memory() -> Result { let temp_path = std::env::temp_dir().join(format!("axinite-libsql-memory-{}.db", Uuid::new_v4())); @@ -83,8 +85,7 @@ impl LibSqlBackend { })?; Ok(Self { - db: Arc::new(db), - temp_path: Some(temp_path), + db: Arc::new(LibSqlDatabase::new(db, Some(temp_path))), }) } @@ -158,8 +159,7 @@ impl LibSqlBackend { } } } - -impl Drop for LibSqlBackend { +impl Drop for LibSqlDatabase { fn drop(&mut self) { if let Some(path) = &self.temp_path { let _ = std::fs::remove_file(path); @@ -258,13 +258,8 @@ mod tests { let mut rows = conn.query("PRAGMA journal_mode", ()).await.unwrap(); let row = rows.next().await.unwrap().unwrap(); let mode: String = row.get(0).unwrap(); - // In-memory databases use "memory" journal mode (WAL doesn't apply to :memory:), - // but the PRAGMA still executes without error. For file-based databases it returns "wal". - assert!( - mode == "wal" || mode == "memory", - "expected wal or memory, got: {}", - mode, - ); + // The temp-file-backed test database should still enable WAL mode. + assert!(mode == "wal", "expected wal, got: {}", mode,); } #[tokio::test] @@ -388,7 +383,6 @@ mod tests { for _ in 0..10 { let b = LibSqlBackend { db: backend.shared_db(), - temp_path: None, }; handles.push(tokio::spawn(async move { b.connect().await })); } @@ -403,3 +397,24 @@ mod tests { } } } + +impl Drop for LibSqlDatabase { + fn drop(&mut self) { + if let Some(path) = &self.temp_path { + let _ = std::fs::remove_file(path); + let _ = std::fs::remove_file(path.with_extension("db-wal")); + let _ = std::fs::remove_file(path.with_extension("db-shm")); + } + } +} + +impl LibSqlDatabase { + fn new(db: RawLibSqlDatabase, temp_path: Option) -> Self { + Self { db, temp_path } + } + + /// Create a fresh libSQL connection from the shared database handle. + pub fn connect(&self) -> Result { + self.db.connect() + } +} diff --git a/src/db/mod.rs b/src/db/mod.rs index d1933761c..5cb87d253 100644 --- a/src/db/mod.rs +++ b/src/db/mod.rs @@ -88,7 +88,7 @@ pub struct DatabaseHandles { #[cfg(feature = "postgres")] pub pg_pool: Option, #[cfg(feature = "libsql")] - pub libsql_db: Option>, + pub libsql_db: Option>, } /// Connect to the database, run migrations, and return both the generic diff --git a/src/reload/manager.rs b/src/reload/manager.rs index c384f6eee..24ecbc29f 100644 --- a/src/reload/manager.rs +++ b/src/reload/manager.rs @@ -175,7 +175,7 @@ impl HotReloadManager { let is_running = controller.is_running().await; let old_addr = controller.current_addr().await; - if is_running && resolved_addrs.contains(&old_addr) { + if is_running && Self::listener_matches_resolved_addr(old_addr, &resolved_addrs) { tracing::debug!("HTTP listener address unchanged, skipping restart"); return Ok(()); } @@ -220,6 +220,16 @@ impl HotReloadManager { Err(last_error.expect("at least one candidate address").into()) } + fn listener_matches_resolved_addr( + current_addr: SocketAddr, + resolved_addrs: &[SocketAddr], + ) -> bool { + resolved_addrs.iter().any(|resolved_addr| { + *resolved_addr == current_addr + || (resolved_addr.port() == 0 && resolved_addr.ip() == current_addr.ip()) + }) + } + async fn update_channel_secrets(&self, http: &crate::config::HttpConfig) { let new_secret = http.webhook_secret.clone(); diff --git a/src/reload/manager/tests/restart_tests.rs b/src/reload/manager/tests/restart_tests.rs index 163372ec3..71267b169 100644 --- a/src/reload/manager/tests/restart_tests.rs +++ b/src/reload/manager/tests/restart_tests.rs @@ -281,6 +281,38 @@ async fn maybe_restart_listener_skips_restart_when_current_addr_matches_non_firs Ok(()) } +#[tokio::test] +async fn maybe_restart_listener_skips_restart_for_ephemeral_bind_on_same_host() +-> Result<(), Box> { + let current_addr: SocketAddr = "127.0.0.1:43123".parse().expect("valid socket address"); + let controller = Arc::new(StubListenerController::new(current_addr)); + let controller_clone = Arc::clone(&controller); + + let http_cfg = http_config("127.0.0.1", 0, None); + + let (_temp_dir, config) = test_config_with_http(None).await?; + let loader = Arc::new(StubConfigLoader::new_success(config)); + + let manager = HotReloadManager::new( + loader as Arc, + Some(controller as Arc), + None, + Vec::new(), + ); + + manager + .maybe_restart_listener(&http_cfg) + .await + .expect("ephemeral binds on the same host should not force a restart"); + + assert_eq!( + controller_clone.restart_calls().await.len(), + 0, + "listener should not restart when config still requests the same host on port 0" + ); + Ok(()) +} + #[tokio::test] async fn maybe_restart_listener_retries_multiple_candidates_until_success() -> Result<(), Box> { diff --git a/src/secrets/store.rs b/src/secrets/store.rs index 5d1fcf378..aea3a51de 100644 --- a/src/secrets/store.rs +++ b/src/secrets/store.rs @@ -472,14 +472,14 @@ fn row_to_secret(row: &tokio_postgres::Row) -> Secret { /// matching the connection-per-request pattern used by the main `LibSqlBackend`. #[cfg(feature = "libsql")] pub struct LibSqlSecretsStore { - db: Arc, + db: Arc, crypto: Arc, } #[cfg(feature = "libsql")] impl LibSqlSecretsStore { /// Create a new store with the given shared libsql database handle and crypto instance. - pub fn new(db: Arc, crypto: Arc) -> Self { + pub fn new(db: Arc, crypto: Arc) -> Self { Self { db, crypto } } diff --git a/src/tools/wasm/storage.rs b/src/tools/wasm/storage.rs index 33c7d5bce..a0dbba7aa 100644 --- a/src/tools/wasm/storage.rs +++ b/src/tools/wasm/storage.rs @@ -703,12 +703,12 @@ fn row_to_tool(row: &tokio_postgres::Row) -> Result, + db: std::sync::Arc, } #[cfg(feature = "libsql")] impl LibSqlWasmToolStore { - pub fn new(db: std::sync::Arc) -> Self { + pub fn new(db: std::sync::Arc) -> Self { Self { db } } From dad7c16805bc14dd9867062ea9706ceb256de875 Mon Sep 17 00:00:00 2001 From: leynos Date: Tue, 14 Apr 2026 16:40:14 +0200 Subject: [PATCH 12/40] Test libsql temp-file cleanup Add a regression test for the temp-file-backed new_memory() path\nthat verifies dropping the backend removes the database file and its\nWAL/SHM sidecars after they have been created.\n\nThe explicit package test command cargo test -p ironclaw still exposes\npre-existing doctest failures outside this change, but the new cleanup\ntest itself passes and the repository gate make all is green. --- src/db/libsql/mod.rs | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/src/db/libsql/mod.rs b/src/db/libsql/mod.rs index d4940e58c..789c07b0f 100644 --- a/src/db/libsql/mod.rs +++ b/src/db/libsql/mod.rs @@ -274,6 +274,35 @@ mod tests { assert_eq!(timeout, 5000); } + #[tokio::test] + async fn new_memory_drop_removes_temp_files() { + let backend = LibSqlBackend::new_memory() + .await + .expect("failed to create temp-file-backed backend"); + + let path = backend + .db + .temp_path + .clone() + .expect("new_memory must set temp_path"); + let wal = path.with_extension("db-wal"); + let shm = path.with_extension("db-shm"); + + // Touch the database and sidecar files so the drop handler has + // something concrete to delete. + std::fs::write(&path, b"").unwrap_or_default(); + std::fs::write(&wal, b"").unwrap_or_default(); + std::fs::write(&shm, b"").unwrap_or_default(); + + assert!(path.exists(), "temp db file must exist before drop"); + + drop(backend); + + assert!(!path.exists(), "temp db file must be removed after drop"); + assert!(!wal.exists(), "WAL sidecar must be removed after drop"); + assert!(!shm.exists(), "SHM sidecar must be removed after drop"); + } + /// Regression test: save_job must persist user_id and get_job must return it. #[tokio::test] async fn test_save_job_persists_user_id() { From 755993ea63400da93f0215e5316f196a3aab1992 Mon Sep 17 00:00:00 2001 From: leynos Date: Tue, 14 Apr 2026 16:41:25 +0200 Subject: [PATCH 13/40] Document workspace memory search modes Add a user-guide section describing backend-specific workspace\nmemory search behaviour, the libSQL fallback from indexed vector_top_k\nto brute-force cosine similarity, and how operators can inspect the\nactive search mode with ironclaw doctor and ironclaw status. --- docs/users-guide.md | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/docs/users-guide.md b/docs/users-guide.md index 2a1352577..b8f0b960f 100644 --- a/docs/users-guide.md +++ b/docs/users-guide.md @@ -91,3 +91,28 @@ These notices are advisory. A success message means the runtime moved the job back into its normal retry path. A permanent failure or manual-intervention message means the runtime could not finish recovery automatically, and the operator should inspect the job or tool state before retrying work. + + +## Workspace memory search + +When workspace memory is enabled, the search backend differs by database: + +- **PostgreSQL** — performs pgvector cosine-distance queries directly. +- **libSQL / Turso** — attempts an indexed `vector_top_k(...)` query when a + compatible fixed-dimension vector index exists. After the V9 migration + (which removed the fixed-dimension index in favour of flexible-dimension + BLOB storage), the backend automatically falls back to brute-force cosine + similarity computed in Rust. Results from both paths feed into the same + Reciprocal Rank Fusion (RRF) pipeline, so hybrid FTS + vector retrieval is + preserved. + +To determine which search mode is active for a workspace, run: + +```text +ironclaw doctor +ironclaw status +``` + +Both commands report whether indexed or brute-force vector retrieval is +currently in use. See `docs/database-integrations.md` for backend trade-offs +and performance considerations. From 82b0bd4056f2ead3fd2b4d8f1aedfcbd67157d2e Mon Sep 17 00:00:00 2001 From: leynos Date: Tue, 14 Apr 2026 16:43:40 +0200 Subject: [PATCH 14/40] Clarify temp-file-backed libsql testing docs Update libSQL developer-facing documentation to describe the\ntemp-file-backed new_memory() path accurately in both Rustdoc and the\ndevelopers guide, including shared-connection semantics, automatic\ncleanup, and the preferred WebhookServer test helpers for race-free\nlistener setup.\n\nValidated with cargo check -p ironclaw plus markdownlint on the changed\ndevelopers guide section. --- docs/developers-guide.md | 27 +++++++++++++++++++++++++++ src/db/libsql/mod.rs | 24 +++++++++++++++++------- 2 files changed, 44 insertions(+), 7 deletions(-) diff --git a/docs/developers-guide.md b/docs/developers-guide.md index 287a30cf3..329b35bda 100644 --- a/docs/developers-guide.md +++ b/docs/developers-guide.md @@ -319,6 +319,19 @@ Meaning: PostgreSQL connection URL used by the app. Default or rule: +### libSQL test databases + +Unit tests that exercise the libSQL backend call +`LibSqlBackend::new_memory()` rather than `new_local()`. `new_memory()` +creates a UUID-named file in the OS temp directory so that multiple +connections within a single test share state, matching production semantics. +The shared database handle removes that file and its `-wal`/`-shm` sidecars +automatically when the final clone is dropped, so tests should not leave +artefacts behind on disk. + +Do **not** use `new_local()` in unit tests; reserve it for integration tests +or tests that specifically require filesystem-path behaviour. + ## Dispatcher Architecture The dispatcher orchestrates interactive chat turns by preparing an LLM @@ -725,6 +738,20 @@ When those changes land, this guide must be updated in the same branch so local setup instructions stay truthful. +### WebhookServer test helpers + +`WebhookServer` exposes two `#[cfg(test)]`-only methods to eliminate +port-allocation races: + +- `start_with_listener(listener: TcpListener)` — accepts a pre-bound + listener, merges queued route fragments, resolves the live listener + address, and spawns the server. +- `restart_with_listener(listener: TcpListener)` — shuts the current server + down, resolves the new listener's address, and spawns a fresh server. + +Tests should pre-bind via `TcpListener::bind("127.0.0.1:0")` and pass the +result to these helpers instead of relying on `start()` / +`restart_with_addr()` to pick a free port. ### Key APIs - `RunLoopCtx`: per-run container that carries the session handle, diff --git a/src/db/libsql/mod.rs b/src/db/libsql/mod.rs index 789c07b0f..b86da1f12 100644 --- a/src/db/libsql/mod.rs +++ b/src/db/libsql/mod.rs @@ -4,7 +4,10 @@ //! Supports three modes: //! - Local embedded (file-based, no server needed) //! - Turso cloud with embedded replica (sync to cloud) -//! - Temp-file-backed test mode (for testing) +//! - Temp-file-backed (for testing) — creates a UUID-named `.db` file in the +//! OS temp directory; fresh connections share state via the file; the file +//! and its WAL/SHM sidecars are deleted automatically when the backend is +//! dropped. mod conversations; pub(crate) mod helpers; @@ -26,6 +29,11 @@ mod workspace; /// `temp_path` keeps test-database cleanup tied to the final shared owner. pub struct LibSqlDatabase { db: RawLibSqlDatabase, + /// Path to the ephemeral database file created by + /// [`LibSqlBackend::new_memory`]. + /// `None` for persistent (`new_local` / `new_remote_replica`) backends. + /// When `Some`, the file and its `-wal`/`-shm` sidecars are removed in + /// [`Drop`]. temp_path: Option, } pub(crate) use helpers::{ @@ -67,13 +75,15 @@ impl LibSqlBackend { Ok(Self::from_db(db)) } - /// Create a temporary file-backed database for tests. + /// Create a temp-file-backed database for testing. /// - /// Despite the name, this does not use SQLite's `:memory:` mode. It - /// creates a temp file whose path is retained for the lifetime of the - /// returned database so multiple fresh connections share the same state. - /// The temp file and its sidecar files are cleaned up when the final - /// shared database handle is dropped. + /// Creates a UUID-named `.db` file in [`std::env::temp_dir`]. Multiple + /// calls to [`Self::connect`] share state through that file, matching the + /// behaviour of the production `new_local` path without requiring a + /// caller-supplied path. + /// + /// The file and its `-wal`/`-shm` sidecars are removed automatically when + /// the final shared database handle created for this backend is dropped. pub async fn new_memory() -> Result { let temp_path = std::env::temp_dir().join(format!("axinite-libsql-memory-{}.db", Uuid::new_v4())); From 63919d7292cf312c5f4d890f49e33b88dd985a1f Mon Sep 17 00:00:00 2001 From: leynos Date: Tue, 14 Apr 2026 17:15:17 +0200 Subject: [PATCH 15/40] Fix workspace search docs wording Correct the users-guide description of the libSQL V9 search path\nso it refers to flexible vector storage rather than BLOB storage,\nexpand the first FTS acronym in that section, and update the new_memory\nconstructor error string to match its temp-file-backed semantics.\n\nValidated with cargo check -p ironclaw, markdownlint on the changed\nuser guide, and git diff --check. --- docs/users-guide.md | 9 +++++---- src/db/libsql/mod.rs | 2 +- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/docs/users-guide.md b/docs/users-guide.md index b8f0b960f..aac88ff5b 100644 --- a/docs/users-guide.md +++ b/docs/users-guide.md @@ -101,10 +101,11 @@ When workspace memory is enabled, the search backend differs by database: - **libSQL / Turso** — attempts an indexed `vector_top_k(...)` query when a compatible fixed-dimension vector index exists. After the V9 migration (which removed the fixed-dimension index in favour of flexible-dimension - BLOB storage), the backend automatically falls back to brute-force cosine - similarity computed in Rust. Results from both paths feed into the same - Reciprocal Rank Fusion (RRF) pipeline, so hybrid FTS + vector retrieval is - preserved. + vector storage, with `memory_chunks.embedding` stored as a flexible vector), + the backend automatically falls back to brute-force cosine similarity + computed in Rust. Results from both paths feed into the same Reciprocal + Rank Fusion (RRF) pipeline, so hybrid full-text search (FTS) + vector + retrieval is preserved. To determine which search mode is active for a workspace, run: diff --git a/src/db/libsql/mod.rs b/src/db/libsql/mod.rs index b86da1f12..754154606 100644 --- a/src/db/libsql/mod.rs +++ b/src/db/libsql/mod.rs @@ -91,7 +91,7 @@ impl LibSqlBackend { .build() .await .map_err(|e| { - DatabaseError::Pool(format!("Failed to create in-memory database: {}", e)) + DatabaseError::Pool(format!("Failed to create temp-file-backed database: {}", e)) })?; Ok(Self { From 315913ea0e9c9eaf2fb438bc79de1ab32bcad51e Mon Sep 17 00:00:00 2001 From: leynos Date: Tue, 14 Apr 2026 19:04:54 +0200 Subject: [PATCH 16/40] Unify libsql shared-handle connection setup Move libSQL connection retry and busy-timeout configuration into LibSqlDatabase::connect so every shared-handle consumer uses the same behaviour. This removes duplicated setup from the secrets and WASM stores, clarifies the LibSqlDatabase and LibSqlBackend Rustdoc split, adds a shared-handle cleanup regression, and simplifies the webhook restart test assertion. --- src/channels/wasm/storage.rs | 11 +--- src/channels/webhook_server.rs | 7 +- src/db/libsql/mod.rs | 115 +++++++++++++++++++++------------ src/secrets/store.rs | 4 +- src/tools/wasm/storage.rs | 6 +- 5 files changed, 78 insertions(+), 65 deletions(-) diff --git a/src/channels/wasm/storage.rs b/src/channels/wasm/storage.rs index 9ad51a26d..63e71ed86 100644 --- a/src/channels/wasm/storage.rs +++ b/src/channels/wasm/storage.rs @@ -361,14 +361,9 @@ impl LibSqlWasmChannelStore { } async fn connect(&self) -> Result { - let conn = self - .db - .connect() - .map_err(|e| WasmChannelStoreError::Database(format!("Connection failed: {}", e)))?; - conn.query("PRAGMA busy_timeout = 5000", ()) - .await - .map_err(|e| { - WasmChannelStoreError::Database(format!("Failed to set busy_timeout: {}", e)) + let conn = + self.db.connect().await.map_err(|e| { + WasmChannelStoreError::Database(format!("Connection failed: {}", e)) })?; Ok(conn) } diff --git a/src/channels/webhook_server.rs b/src/channels/webhook_server.rs index 3d9f08293..90464f064 100644 --- a/src/channels/webhook_server.rs +++ b/src/channels/webhook_server.rs @@ -386,12 +386,7 @@ mod tests { ) .await; assert!( - old_result.is_err() - || old_result.as_ref().is_ok_and(|request_result| { - request_result.as_ref().map_or(true, |response| { - response.status() != reqwest::StatusCode::OK - }) - }), + matches!(old_result, Err(_) | Ok(Err(_))), "Old address should not respond after restart_with_addr" ); diff --git a/src/db/libsql/mod.rs b/src/db/libsql/mod.rs index 754154606..bc1b152e8 100644 --- a/src/db/libsql/mod.rs +++ b/src/db/libsql/mod.rs @@ -19,14 +19,14 @@ mod settings; mod tool_failures; mod workspace; -/// libSQL/Turso database backend. +/// Shared libSQL database handle. /// -/// Stores the `Database` handle in an `Arc` so that the same underlying -/// database can be shared with stores (SecretsStore, WasmToolStore) that -/// create their own connections per-operation. -/// -/// When constructed through [`LibSqlBackend::new_memory`], the optional -/// `temp_path` keeps test-database cleanup tied to the final shared owner. +/// Wraps the underlying [`RawLibSqlDatabase`] plus optional temp-file metadata +/// used by test-only temp-file-backed databases. Stores such as +/// `LibSqlSecretsStore`, `LibSqlWasmChannelStore`, and `LibSqlWasmToolStore` +/// share this handle via `Arc` so they all create connections against the same +/// underlying database and so temp-file cleanup runs when the last shared owner +/// is dropped. pub struct LibSqlDatabase { db: RawLibSqlDatabase, /// Path to the ephemeral database file created by @@ -42,6 +42,13 @@ pub(crate) use helpers::{ }; pub(crate) use row_conversion::row_to_memory_document; +/// libSQL/Turso backend implementation of [`NativeDatabase`]. +/// +/// Owns one shared [`LibSqlDatabase`] handle and exposes constructors for the +/// local, remote-replica, and temp-file-backed test modes. Callers that need +/// backend-specific sharing can clone the underlying handle with +/// [`LibSqlBackend::shared_db`], while normal database operations go through +/// [`LibSqlBackend::connect`] and the trait implementations on this type. pub struct LibSqlBackend { db: Arc, } @@ -133,32 +140,7 @@ impl LibSqlBackend { /// "unable to open database file" errors from concurrent connection /// creation (e.g. cron ticker vs main thread). pub async fn connect(&self) -> Result { - let mut last_err = None; - for attempt in 0..3u32 { - match self.db.connect() { - Ok(conn) => { - conn.query("PRAGMA busy_timeout = 5000", ()) - .await - .map_err(|e| { - DatabaseError::Pool(format!("Failed to set busy_timeout: {}", e)) - })?; - return Ok(conn); - } - Err(e) => { - last_err = Some(e); - if attempt < 2 { - tokio::time::sleep(std::time::Duration::from_millis( - 50 * 2u64.pow(attempt), - )) - .await; - } - } - } - } - Err(DatabaseError::Pool(format!( - "Failed to create connection after 3 attempts: {}", - last_err.map(|e| e.to_string()).unwrap_or_default() - ))) + self.db.connect().await } /// Wraps a built `libsql::Database` in `Self` with no temp-file path. @@ -216,6 +198,8 @@ impl NativeDatabase for LibSqlBackend { #[cfg(test)] mod tests { + use std::sync::Arc; + use chrono::{TimeZone, Utc}; use crate::db::Database; @@ -284,16 +268,16 @@ mod tests { assert_eq!(timeout, 5000); } - #[tokio::test] - async fn new_memory_drop_removes_temp_files() { - let backend = LibSqlBackend::new_memory() - .await + #[test] + fn shared_libsql_database_drop_removes_temp_files() { + let runtime = tokio::runtime::Runtime::new().expect("create runtime for libsql test"); + let backend = runtime + .block_on(LibSqlBackend::new_memory()) .expect("failed to create temp-file-backed backend"); + let shared_db = backend.shared_db(); - let path = backend - .db - .temp_path - .clone() + let path = shared_db + .temp_path() .expect("new_memory must set temp_path"); let wal = path.with_extension("db-wal"); let shm = path.with_extension("db-shm"); @@ -307,6 +291,17 @@ mod tests { assert!(path.exists(), "temp db file must exist before drop"); drop(backend); + assert!( + path.exists(), + "shared handle should keep temp db file alive after backend drop" + ); + + let shared_db = match Arc::try_unwrap(shared_db) { + Ok(shared_db) => shared_db, + Err(_) => panic!("test should hold the final shared libsql database handle"), + }; + + drop(shared_db); assert!(!path.exists(), "temp db file must be removed after drop"); assert!(!wal.exists(), "WAL sidecar must be removed after drop"); @@ -452,8 +447,42 @@ impl LibSqlDatabase { Self { db, temp_path } } + #[cfg(test)] + pub fn temp_path(&self) -> Option { + self.temp_path.clone() + } + /// Create a fresh libSQL connection from the shared database handle. - pub fn connect(&self) -> Result { - self.db.connect() + /// + /// Applies the same retry and `busy_timeout` setup used by + /// [`LibSqlBackend::connect`] so all shared-handle consumers behave + /// consistently. + pub async fn connect(&self) -> Result { + let mut last_err = None; + for attempt in 0..3u32 { + match self.db.connect() { + Ok(conn) => { + conn.query("PRAGMA busy_timeout = 5000", ()) + .await + .map_err(|e| { + DatabaseError::Pool(format!("Failed to set busy_timeout: {}", e)) + })?; + return Ok(conn); + } + Err(e) => { + last_err = Some(e); + if attempt < 2 { + tokio::time::sleep(std::time::Duration::from_millis( + 50 * 2u64.pow(attempt), + )) + .await; + } + } + } + } + Err(DatabaseError::Pool(format!( + "Failed to create connection after 3 attempts: {}", + last_err.map(|e| e.to_string()).unwrap_or_default() + ))) } } diff --git a/src/secrets/store.rs b/src/secrets/store.rs index aea3a51de..4d8962f9a 100644 --- a/src/secrets/store.rs +++ b/src/secrets/store.rs @@ -487,10 +487,8 @@ impl LibSqlSecretsStore { let conn = self .db .connect() - .map_err(|e| SecretError::Database(format!("Connection failed: {}", e)))?; - conn.query("PRAGMA busy_timeout = 5000", ()) .await - .map_err(|e| SecretError::Database(format!("Failed to set busy_timeout: {}", e)))?; + .map_err(|e| SecretError::Database(format!("Connection failed: {}", e)))?; Ok(conn) } } diff --git a/src/tools/wasm/storage.rs b/src/tools/wasm/storage.rs index a0dbba7aa..0495eb014 100644 --- a/src/tools/wasm/storage.rs +++ b/src/tools/wasm/storage.rs @@ -716,12 +716,8 @@ impl LibSqlWasmToolStore { let conn = self .db .connect() - .map_err(|e| WasmStorageError::Database(format!("Connection failed: {}", e)))?; - conn.query("PRAGMA busy_timeout = 5000", ()) .await - .map_err(|e| { - WasmStorageError::Database(format!("Failed to set busy_timeout: {}", e)) - })?; + .map_err(|e| WasmStorageError::Database(format!("Connection failed: {}", e)))?; Ok(conn) } } From 2bbdc2dff9db0a0f420ab22b6031464a52322202 Mon Sep 17 00:00:00 2001 From: leynos Date: Tue, 14 Apr 2026 19:19:14 +0200 Subject: [PATCH 17/40] Split libsql workspace module Decompose the oversized libSQL workspace module into focused submodules for vector search, FTS, document operations, chunk operations, and tests. Keep the NativeWorkspaceStore implementation in workspace/mod.rs while delegating the heavier CRUD logic to internal helpers so every workspace module file stays under the repository's 400-line ceiling without changing behaviour or signatures. --- src/db/libsql/workspace.rs | 1065 ---------------------- src/db/libsql/workspace/chunk_ops.rs | 153 ++++ src/db/libsql/workspace/document_ops.rs | 358 ++++++++ src/db/libsql/workspace/fts.rs | 57 ++ src/db/libsql/workspace/mod.rs | 179 ++++ src/db/libsql/workspace/tests.rs | 159 ++++ src/db/libsql/workspace/vector_search.rs | 283 ++++++ 7 files changed, 1189 insertions(+), 1065 deletions(-) delete mode 100644 src/db/libsql/workspace.rs create mode 100644 src/db/libsql/workspace/chunk_ops.rs create mode 100644 src/db/libsql/workspace/document_ops.rs create mode 100644 src/db/libsql/workspace/fts.rs create mode 100644 src/db/libsql/workspace/mod.rs create mode 100644 src/db/libsql/workspace/tests.rs create mode 100644 src/db/libsql/workspace/vector_search.rs diff --git a/src/db/libsql/workspace.rs b/src/db/libsql/workspace.rs deleted file mode 100644 index eb1edf60b..000000000 --- a/src/db/libsql/workspace.rs +++ /dev/null @@ -1,1065 +0,0 @@ -//! Workspace-related WorkspaceStore implementation for LibSqlBackend. - -use std::collections::HashMap; - -use libsql::params; -use uuid::Uuid; - -use super::{ - LibSqlBackend, fmt_ts, get_i64, get_opt_text, get_opt_ts, get_text, get_ts, - row_to_memory_document, -}; -use crate::db::{HybridSearchParams, InsertChunkParams, NativeWorkspaceStore}; -use crate::error::WorkspaceError; -use crate::workspace::{ - MemoryChunk, MemoryDocument, RankedResult, SearchResult, WorkspaceEntry, cosine_similarity, - reciprocal_rank_fusion, -}; - -use chrono::Utc; - -struct Candidate { - chunk_id: Uuid, - document_id: Uuid, - document_path: String, - content: String, - similarity: f32, -} - -enum VectorSearchOutcome { - Indexed(Vec), - IndexUnavailable, -} - -fn is_missing_vector_index_error(error: &libsql::Error) -> bool { - let sqlite_message = match error { - libsql::Error::SqliteFailure(_, message) - | libsql::Error::RemoteSqliteFailure(_, _, message) => message, - _ => return false, - }; - - let error_message = sqlite_message.to_ascii_lowercase(); - - error_message.contains("vector_top_k") - || error_message.contains("no such function") - || error_message.contains("idx_memory_chunks_embedding") - || error_message.contains("failed to parse vector index parameters") -} - -fn rank_candidates(mut candidates: Vec, limit: usize) -> Vec { - // Sort by similarity descending - candidates.sort_by(|a, b| { - b.similarity - .partial_cmp(&a.similarity) - .unwrap_or(std::cmp::Ordering::Equal) - }); - - let total_candidates = candidates.len(); - - // Take top N and convert to RankedResult with 1-based rank - let results: Vec<_> = candidates - .into_iter() - .take(limit) - .enumerate() - .map(|(idx, c)| RankedResult { - chunk_id: c.chunk_id, - document_id: c.document_id, - document_path: c.document_path, - content: c.content, - rank: (idx + 1) as u32, - }) - .collect(); - - tracing::debug!( - "Brute-force vector search scanned {} candidates, returned {} results", - total_candidates, - results.len() - ); - - results -} - -/// Deserialize an embedding from a BLOB (4-byte little-endian f32 values). -/// -/// Returns an empty vector if the blob length is not a multiple of 4. -fn deserialize_embedding(blob: &[u8]) -> Vec { - if !blob.len().is_multiple_of(4) { - tracing::warn!( - "Embedding blob length {} is not a multiple of 4; skipping", - blob.len() - ); - return Vec::new(); - } - - blob.chunks_exact(4) - .map(|chunk| { - let bytes = [chunk[0], chunk[1], chunk[2], chunk[3]]; - f32::from_le_bytes(bytes) - }) - .collect() -} - -impl LibSqlBackend { - async fn collect_candidates( - &self, - rows: &mut libsql::Rows, - query_embedding: &[f32], - ) -> Result, WorkspaceError> { - let mut candidates = Vec::new(); - let mut skipped_mismatched_dims = 0usize; - while let Some(row) = rows - .next() - .await - .map_err(|e| WorkspaceError::SearchFailed { - reason: format!("Row fetch failed: {}", e), - })? - { - let chunk_id: Uuid = match get_text(&row, 0).parse() { - Ok(id) => id, - Err(e) => { - tracing::warn!("Invalid chunk_id UUID in memory_chunks: {e}"); - continue; - } - }; - let document_id: Uuid = match get_text(&row, 1).parse() { - Ok(id) => id, - Err(e) => { - tracing::warn!("Invalid document_id UUID in memory_chunks: {e}"); - continue; - } - }; - let document_path = get_text(&row, 2); - let content = get_text(&row, 3); - - // Deserialize the embedding BLOB - let embedding_blob = match row.get_value(4) { - Ok(libsql::Value::Blob(bytes)) => bytes, - _ => continue, - }; - let chunk_embedding = deserialize_embedding(&embedding_blob); - if chunk_embedding.is_empty() { - continue; - } - if chunk_embedding.len() != query_embedding.len() { - skipped_mismatched_dims += 1; - continue; - } - - // Compute cosine similarity - let similarity = cosine_similarity(query_embedding, &chunk_embedding); - - candidates.push(Candidate { - chunk_id, - document_id, - document_path, - content, - similarity, - }); - } - - if skipped_mismatched_dims > 0 { - tracing::debug!( - "Brute-force vector search skipped {} candidates with embedding dimension mismatches (query dimension: {})", - skipped_mismatched_dims, - query_embedding.len() - ); - } - - Ok(candidates) - } - - /// Brute-force vector search using cosine similarity in Rust. - /// - /// Loads all chunks with embeddings for the given user/agent, computes - /// cosine similarity against the query embedding, and returns the top matches. - /// This is used as a fallback when the vector index is not available (post-V9 migration). - async fn brute_force_vector_search( - &self, - user_id: &str, - agent_id: Option, - embedding: &[f32], - limit: usize, - ) -> Result, WorkspaceError> { - let conn = self - .connect() - .await - .map_err(|e| WorkspaceError::SearchFailed { - reason: e.to_string(), - })?; - let agent_id_str = agent_id.map(|id| id.to_string()); - - // Load all chunks with embeddings - let mut rows = conn - .query( - r#" - SELECT c.id, c.document_id, d.path, c.content, c.embedding - FROM memory_chunks c - JOIN memory_documents d ON d.id = c.document_id - WHERE d.user_id = ?1 AND d.agent_id IS ?2 - AND c.embedding IS NOT NULL - "#, - params![user_id, agent_id_str.as_deref()], - ) - .await - .map_err(|e| WorkspaceError::SearchFailed { - reason: format!("Query failed: {}", e), - })?; - - let candidates = self.collect_candidates(&mut rows, embedding).await?; - Ok(rank_candidates(candidates, limit)) - } -} - -/// Execute full-text search and return ranked results. -/// -/// Queries the memory_chunks_fts virtual table, joining with memory_chunks -/// and memory_documents to fetch chunk content and document paths. Assigns -/// rank based on result order. -async fn fts_ranked_results( - conn: &libsql::Connection, - user_id: &str, - agent_id: Option<&str>, - query: &str, - limit: i64, -) -> Result, WorkspaceError> { - let mut rows = conn - .query( - r#" - SELECT c.id, c.document_id, d.path, c.content - FROM memory_chunks_fts fts - JOIN memory_chunks c ON c._rowid = fts.rowid - JOIN memory_documents d ON d.id = c.document_id - WHERE d.user_id = ?1 AND d.agent_id IS ?2 - AND memory_chunks_fts MATCH ?3 - ORDER BY rank - LIMIT ?4 - "#, - params![user_id, agent_id, query, limit], - ) - .await - .map_err(|e| WorkspaceError::SearchFailed { - reason: format!("FTS query failed: {}", e), - })?; - - let mut results = Vec::new(); - while let Some(row) = rows - .next() - .await - .map_err(|e| WorkspaceError::SearchFailed { - reason: format!("FTS row fetch failed: {}", e), - })? - { - results.push(RankedResult { - chunk_id: get_text(&row, 0).parse().unwrap_or_default(), - document_id: get_text(&row, 1).parse().unwrap_or_default(), - document_path: get_text(&row, 2), - content: get_text(&row, 3), - rank: results.len() as u32 + 1, - }); - } - Ok(results) -} - -fn embedding_to_vector_json(embedding: &[f32]) -> String { - format!( - "[{}]", - embedding - .iter() - .map(|f| f.to_string()) - .collect::>() - .join(",") - ) -} - -async fn collect_vector_index_rows( - mut rows: libsql::Rows, - limit: i64, -) -> Result { - let mut results = Vec::new(); - while let Some(row) = match rows.next().await { - Ok(row) => row, - Err(e) => { - if is_missing_vector_index_error(&e) { - tracing::debug!( - "Vector index row fetch failed, brute-force fallback required: {e}" - ); - return Ok(VectorSearchOutcome::IndexUnavailable); - } - - return Err(WorkspaceError::SearchFailed { - reason: format!("Vector index row fetch failed: {e}"), - }); - } - } { - results.push(RankedResult { - chunk_id: get_text(&row, 0).parse().unwrap_or_default(), - document_id: get_text(&row, 1).parse().unwrap_or_default(), - document_path: get_text(&row, 2), - content: get_text(&row, 3), - rank: results.len() as u32 + 1, - }); - } - tracing::debug!( - "libSQL vector index search returned {} results (pre-fusion limit: {})", - results.len(), - limit - ); - Ok(VectorSearchOutcome::Indexed(results)) -} - -/// Execute vector similarity search via libSQL's vector index. -/// -/// Returns [`VectorSearchOutcome::IndexUnavailable`] when `vector_top_k(...)` -/// cannot run because the fixed-dimension vector index is missing, which is -/// the expected state after the V9 flexible-dimension migration. -async fn vector_ranked_results( - conn: &libsql::Connection, - user_id: &str, - agent_id: Option<&str>, - embedding: &[f32], - limit: i64, -) -> Result { - let vector_json = embedding_to_vector_json(embedding); - - // vector_top_k requires a libsql_vector_idx index. After the V9 - // migration the index is dropped (to support flexible embedding - // dimensions), so this query may fail. The caller must then fall - // back to brute-force cosine similarity. - match conn - .query( - r#" - SELECT c.id, c.document_id, d.path, c.content - FROM vector_top_k('idx_memory_chunks_embedding', vector(?1), ?2) AS top_k - JOIN memory_chunks c ON c._rowid = top_k.id - JOIN memory_documents d ON d.id = c.document_id - WHERE d.user_id = ?3 AND d.agent_id IS ?4 - "#, - params![vector_json, limit, user_id, agent_id], - ) - .await - { - Ok(rows) => collect_vector_index_rows(rows, limit).await, - Err(e) => { - if is_missing_vector_index_error(&e) { - tracing::debug!( - "Vector index query failed (expected after V9 migration), \ - brute-force fallback required: {e}" - ); - Ok(VectorSearchOutcome::IndexUnavailable) - } else { - Err(WorkspaceError::SearchFailed { - reason: format!("Vector index query failed: {e}"), - }) - } - } - } -} - -impl NativeWorkspaceStore for LibSqlBackend { - async fn get_document_by_path( - &self, - user_id: &str, - agent_id: Option, - path: &str, - ) -> Result { - let conn = self - .connect() - .await - .map_err(|e| WorkspaceError::SearchFailed { - reason: e.to_string(), - })?; - let agent_id_str = agent_id.map(|id| id.to_string()); - let mut rows = conn - .query( - r#" - SELECT id, user_id, agent_id, path, content, - created_at, updated_at, metadata - FROM memory_documents - WHERE user_id = ?1 AND agent_id IS ?2 AND path = ?3 - "#, - params![user_id, agent_id_str.as_deref(), path], - ) - .await - .map_err(|e| WorkspaceError::SearchFailed { - reason: format!("Query failed: {}", e), - })?; - - match rows - .next() - .await - .map_err(|e| WorkspaceError::SearchFailed { - reason: format!("Query failed: {}", e), - })? { - Some(row) => Ok(row_to_memory_document(&row)), - None => Err(WorkspaceError::DocumentNotFound { - doc_type: path.to_string(), - user_id: user_id.to_string(), - }), - } - } - - async fn get_document_by_id(&self, id: Uuid) -> Result { - let conn = self - .connect() - .await - .map_err(|e| WorkspaceError::SearchFailed { - reason: e.to_string(), - })?; - let mut rows = conn - .query( - r#" - SELECT id, user_id, agent_id, path, content, - created_at, updated_at, metadata - FROM memory_documents WHERE id = ?1 - "#, - params![id.to_string()], - ) - .await - .map_err(|e| WorkspaceError::SearchFailed { - reason: format!("Query failed: {}", e), - })?; - - match rows - .next() - .await - .map_err(|e| WorkspaceError::SearchFailed { - reason: format!("Query failed: {}", e), - })? { - Some(row) => Ok(row_to_memory_document(&row)), - None => Err(WorkspaceError::DocumentNotFound { - doc_type: "unknown".to_string(), - user_id: "unknown".to_string(), - }), - } - } - - async fn get_or_create_document_by_path( - &self, - user_id: &str, - agent_id: Option, - path: &str, - ) -> Result { - // Try get - match NativeWorkspaceStore::get_document_by_path(self, user_id, agent_id, path).await { - Ok(doc) => return Ok(doc), - Err(WorkspaceError::DocumentNotFound { .. }) => {} - Err(e) => return Err(e), - } - - // Create - let conn = self - .connect() - .await - .map_err(|e| WorkspaceError::SearchFailed { - reason: e.to_string(), - })?; - let id = Uuid::new_v4(); - let agent_id_str = agent_id.map(|id| id.to_string()); - conn.execute( - r#" - INSERT INTO memory_documents (id, user_id, agent_id, path, content, metadata) - VALUES (?1, ?2, ?3, ?4, '', '{}') - ON CONFLICT DO NOTHING - "#, - params![id.to_string(), user_id, agent_id_str.as_deref(), path], - ) - .await - .map_err(|e| WorkspaceError::SearchFailed { - reason: format!("Insert failed: {}", e), - })?; - - NativeWorkspaceStore::get_document_by_path(self, user_id, agent_id, path).await - } - - async fn update_document(&self, id: Uuid, content: &str) -> Result<(), WorkspaceError> { - let conn = self - .connect() - .await - .map_err(|e| WorkspaceError::SearchFailed { - reason: e.to_string(), - })?; - let now = fmt_ts(&Utc::now()); - conn.execute( - "UPDATE memory_documents SET content = ?2, updated_at = ?3 WHERE id = ?1", - params![id.to_string(), content, now], - ) - .await - .map_err(|e| WorkspaceError::SearchFailed { - reason: format!("Update failed: {}", e), - })?; - Ok(()) - } - - async fn delete_document_by_path( - &self, - user_id: &str, - agent_id: Option, - path: &str, - ) -> Result<(), WorkspaceError> { - let doc = NativeWorkspaceStore::get_document_by_path(self, user_id, agent_id, path).await?; - NativeWorkspaceStore::delete_chunks(self, doc.id).await?; - - let conn = self - .connect() - .await - .map_err(|e| WorkspaceError::SearchFailed { - reason: e.to_string(), - })?; - let agent_id_str = agent_id.map(|id| id.to_string()); - conn.execute( - "DELETE FROM memory_documents WHERE user_id = ?1 AND agent_id IS ?2 AND path = ?3", - params![user_id, agent_id_str.as_deref(), path], - ) - .await - .map_err(|e| WorkspaceError::SearchFailed { - reason: format!("Delete failed: {}", e), - })?; - Ok(()) - } - - async fn list_directory( - &self, - user_id: &str, - agent_id: Option, - directory: &str, - ) -> Result, WorkspaceError> { - let conn = self - .connect() - .await - .map_err(|e| WorkspaceError::SearchFailed { - reason: e.to_string(), - })?; - let dir = if !directory.is_empty() && !directory.ends_with('/') { - format!("{}/", directory) - } else { - directory.to_string() - }; - - let agent_id_str = agent_id.map(|id| id.to_string()); - let pattern = if dir.is_empty() { - "%".to_string() - } else { - format!("{}%", dir) - }; - - let mut rows = conn - .query( - r#" - SELECT path, updated_at, substr(content, 1, 200) as content_preview - FROM memory_documents - WHERE user_id = ?1 AND agent_id IS ?2 - AND (?3 = '%' OR path LIKE ?3) - ORDER BY path - "#, - params![user_id, agent_id_str.as_deref(), pattern], - ) - .await - .map_err(|e| WorkspaceError::SearchFailed { - reason: format!("List directory failed: {}", e), - })?; - - let mut entries_map: HashMap = HashMap::new(); - - while let Some(row) = rows - .next() - .await - .map_err(|e| WorkspaceError::SearchFailed { - reason: format!("Query failed: {}", e), - })? - { - let full_path = get_text(&row, 0); - let updated_at = get_opt_ts(&row, 1); - let content_preview = get_opt_text(&row, 2); - - let relative = if dir.is_empty() { - &full_path - } else if let Some(stripped) = full_path.strip_prefix(&dir) { - stripped - } else { - continue; - }; - - let child_name = if let Some(slash_pos) = relative.find('/') { - &relative[..slash_pos] - } else { - relative - }; - - if child_name.is_empty() { - continue; - } - - let is_dir = relative.contains('/'); - let entry_path = if dir.is_empty() { - child_name.to_string() - } else { - format!("{}{}", dir, child_name) - }; - - entries_map - .entry(child_name.to_string()) - .and_modify(|e| { - if is_dir { - e.is_directory = true; - e.content_preview = None; - } - if let (Some(existing), Some(new)) = (&e.updated_at, &updated_at) - && new > existing - { - e.updated_at = Some(*new); - } - }) - .or_insert(WorkspaceEntry { - path: entry_path, - is_directory: is_dir, - updated_at, - content_preview: if is_dir { None } else { content_preview }, - }); - } - - let mut entries: Vec = entries_map.into_values().collect(); - entries.sort_by(|a, b| a.path.cmp(&b.path)); - Ok(entries) - } - - async fn list_all_paths( - &self, - user_id: &str, - agent_id: Option, - ) -> Result, WorkspaceError> { - let conn = self - .connect() - .await - .map_err(|e| WorkspaceError::SearchFailed { - reason: e.to_string(), - })?; - let agent_id_str = agent_id.map(|id| id.to_string()); - let mut rows = conn - .query( - "SELECT path FROM memory_documents WHERE user_id = ?1 AND agent_id IS ?2 ORDER BY path", - params![user_id, agent_id_str.as_deref()], - ) - .await - .map_err(|e| WorkspaceError::SearchFailed { - reason: format!("List paths failed: {}", e), - })?; - - let mut paths = Vec::new(); - while let Some(row) = rows - .next() - .await - .map_err(|e| WorkspaceError::SearchFailed { - reason: format!("Query failed: {}", e), - })? - { - paths.push(get_text(&row, 0)); - } - Ok(paths) - } - - async fn list_documents( - &self, - user_id: &str, - agent_id: Option, - ) -> Result, WorkspaceError> { - let conn = self - .connect() - .await - .map_err(|e| WorkspaceError::SearchFailed { - reason: e.to_string(), - })?; - let agent_id_str = agent_id.map(|id| id.to_string()); - let mut rows = conn - .query( - r#" - SELECT id, user_id, agent_id, path, content, - created_at, updated_at, metadata - FROM memory_documents - WHERE user_id = ?1 AND agent_id IS ?2 - ORDER BY updated_at DESC - "#, - params![user_id, agent_id_str.as_deref()], - ) - .await - .map_err(|e| WorkspaceError::SearchFailed { - reason: format!("Query failed: {}", e), - })?; - - let mut docs = Vec::new(); - while let Some(row) = rows - .next() - .await - .map_err(|e| WorkspaceError::SearchFailed { - reason: format!("Query failed: {}", e), - })? - { - docs.push(row_to_memory_document(&row)); - } - Ok(docs) - } - - async fn delete_chunks(&self, document_id: Uuid) -> Result<(), WorkspaceError> { - let conn = self - .connect() - .await - .map_err(|e| WorkspaceError::ChunkingFailed { - reason: e.to_string(), - })?; - conn.execute( - "DELETE FROM memory_chunks WHERE document_id = ?1", - params![document_id.to_string()], - ) - .await - .map_err(|e| WorkspaceError::ChunkingFailed { - reason: format!("Delete failed: {}", e), - })?; - Ok(()) - } - - async fn insert_chunk(&self, params: InsertChunkParams<'_>) -> Result { - let InsertChunkParams { - document_id, - chunk_index, - content, - embedding, - } = params; - let conn = self - .connect() - .await - .map_err(|e| WorkspaceError::ChunkingFailed { - reason: e.to_string(), - })?; - let id = Uuid::new_v4(); - let chunk_index = i64::from(chunk_index); - let embedding_blob = embedding.map(|e| { - let bytes: Vec = e.iter().flat_map(|f| f.to_le_bytes()).collect(); - bytes - }); - - conn.execute( - r#" - INSERT INTO memory_chunks (id, document_id, chunk_index, content, embedding) - VALUES (?1, ?2, ?3, ?4, ?5) - "#, - params![ - id.to_string(), - document_id.to_string(), - chunk_index, - content, - embedding_blob.map(libsql::Value::Blob), - ], - ) - .await - .map_err(|e| WorkspaceError::ChunkingFailed { - reason: format!("Insert failed: {}", e), - })?; - Ok(id) - } - - async fn update_chunk_embedding( - &self, - chunk_id: Uuid, - embedding: &[f32], - ) -> Result<(), WorkspaceError> { - let conn = self - .connect() - .await - .map_err(|e| WorkspaceError::EmbeddingFailed { - reason: e.to_string(), - })?; - let bytes: Vec = embedding.iter().flat_map(|f| f.to_le_bytes()).collect(); - - conn.execute( - "UPDATE memory_chunks SET embedding = ?2 WHERE id = ?1", - params![chunk_id.to_string(), libsql::Value::Blob(bytes)], - ) - .await - .map_err(|e| WorkspaceError::EmbeddingFailed { - reason: format!("Update failed: {}", e), - })?; - Ok(()) - } - - async fn get_chunks_without_embeddings( - &self, - user_id: &str, - agent_id: Option, - limit: usize, - ) -> Result, WorkspaceError> { - let conn = self - .connect() - .await - .map_err(|e| WorkspaceError::SearchFailed { - reason: e.to_string(), - })?; - let agent_id_str = agent_id.map(|id| id.to_string()); - let mut rows = conn - .query( - r#" - SELECT c.id, c.document_id, c.chunk_index, c.content, c.created_at - FROM memory_chunks c - JOIN memory_documents d ON d.id = c.document_id - WHERE d.user_id = ?1 AND d.agent_id IS ?2 - AND c.embedding IS NULL - LIMIT ?3 - "#, - params![user_id, agent_id_str.as_deref(), limit as i64], - ) - .await - .map_err(|e| WorkspaceError::SearchFailed { - reason: format!("Query failed: {}", e), - })?; - - let mut chunks = Vec::new(); - while let Some(row) = rows - .next() - .await - .map_err(|e| WorkspaceError::SearchFailed { - reason: format!("Query failed: {}", e), - })? - { - let chunk_index = - u32::try_from(get_i64(&row, 2)).map_err(|_| WorkspaceError::SearchFailed { - reason: "memory_chunks.chunk_index must be non-negative".to_string(), - })?; - chunks.push(MemoryChunk { - id: get_text(&row, 0).parse().unwrap_or_default(), - document_id: get_text(&row, 1).parse().unwrap_or_default(), - chunk_index, - content: get_text(&row, 3), - embedding: None, - created_at: get_ts(&row, 4), - }); - } - Ok(chunks) - } - - async fn hybrid_search( - &self, - params: HybridSearchParams<'_>, - ) -> Result, WorkspaceError> { - let HybridSearchParams { - user_id, - agent_id, - query, - embedding, - config, - } = params; - let conn = self - .connect() - .await - .map_err(|e| WorkspaceError::SearchFailed { - reason: e.to_string(), - })?; - let agent_id_str = agent_id.map(|id| id.to_string()); - let pre_limit = config.pre_fusion_limit as i64; - - let fts_results = if config.use_fts { - let results = - fts_ranked_results(&conn, user_id, agent_id_str.as_deref(), query, pre_limit) - .await?; - tracing::debug!( - "FTS search returned {} results (pre-fusion limit: {})", - results.len(), - pre_limit - ); - results - } else { - Vec::new() - }; - - let vector_results = if config.use_vector { - if let Some(emb) = embedding { - match vector_ranked_results(&conn, user_id, agent_id_str.as_deref(), emb, pre_limit) - .await? - { - VectorSearchOutcome::Indexed(results) => results, - VectorSearchOutcome::IndexUnavailable => { - tracing::info!("Using brute-force vector search (no vector index)"); - self.brute_force_vector_search(user_id, agent_id, emb, pre_limit as usize) - .await - .map_err(|e| { - tracing::warn!("Brute-force vector search failed: {e}"); - e - })? - } - } - } else { - Vec::new() - } - } else { - if embedding.is_some() { - tracing::warn!( - "Embedding provided but vector search is disabled in config; using FTS-only results" - ); - } - Vec::new() - }; - - Ok(reciprocal_rank_fusion(fts_results, vector_results, config)) - } -} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn test_deserialize_embedding_valid() { - let floats = [1.0f32, 2.0, 3.0]; - let bytes: Vec = floats.iter().flat_map(|f| f.to_le_bytes()).collect(); - - let result = deserialize_embedding(&bytes); - - assert_eq!(result.len(), 3); - assert!((result[0] - 1.0).abs() < 0.001); - assert!((result[1] - 2.0).abs() < 0.001); - assert!((result[2] - 3.0).abs() < 0.001); - } - - #[test] - fn test_deserialize_embedding_empty() { - let result = deserialize_embedding(&[]); - assert_eq!(result.len(), 0); - } - - #[test] - fn test_deserialize_embedding_invalid_length() { - // 7 bytes is not a multiple of 4 - let result = deserialize_embedding(&[1, 2, 3, 4, 5, 6, 7]); - assert_eq!(result.len(), 0); - } - - #[test] - fn test_deserialize_embedding_single_value() { - let value = 42.5f32; - let bytes = value.to_le_bytes(); - - let result = deserialize_embedding(&bytes); - - assert_eq!(result.len(), 1); - assert!((result[0] - 42.5).abs() < 0.001); - } - - #[test] - fn test_deserialize_embedding_negative_values() { - let floats = [-1.5f32, 0.0, 2.75]; - let bytes: Vec = floats.iter().flat_map(|f| f.to_le_bytes()).collect(); - - let result = deserialize_embedding(&bytes); - - assert_eq!(result.len(), 3); - assert!((result[0] - (-1.5)).abs() < 0.001); - assert!((result[1] - 0.0).abs() < 0.001); - assert!((result[2] - 2.75).abs() < 0.001); - } - - #[tokio::test] - async fn hybrid_search_uses_brute_force_when_vector_index_is_unavailable() { - use crate::db::{InsertChunkParams, NativeDatabase, NativeWorkspaceStore}; - use crate::workspace::SearchConfig; - - let backend = LibSqlBackend::new_memory() - .await - .expect("failed to create in-memory libsql backend"); - backend - .run_migrations() - .await - .expect("failed to run libsql migrations"); - - let document = backend - .get_or_create_document_by_path("default", None, "notes/search.md") - .await - .expect("failed to create search test document"); - backend - .update_document(document.id, "semantic search fallback test") - .await - .expect("failed to update search test document"); - backend - .insert_chunk(InsertChunkParams { - document_id: document.id, - chunk_index: 0, - content: "semantic search fallback test", - embedding: Some(&[1.0, 0.0, 0.0]), - }) - .await - .expect("failed to insert search test chunk"); - - let conn = backend - .connect() - .await - .expect("failed to open libsql connection for vector precondition"); - let vector_outcome = vector_ranked_results(&conn, "default", None, &[1.0, 0.0, 0.0], 5) - .await - .expect("failed to run vector search precondition"); - assert!( - matches!(vector_outcome, VectorSearchOutcome::IndexUnavailable), - "Test requires the vector-index-unavailable path before hybrid fallback assertions" - ); - - let results = backend - .hybrid_search(HybridSearchParams { - user_id: "default", - agent_id: None, - query: "semantic", - embedding: Some(&[1.0, 0.0, 0.0]), - config: &SearchConfig::default().with_limit(5), - }) - .await - .expect("failed to execute hybrid search"); - - assert_eq!(results.len(), 1); - assert_eq!(results[0].document_path, "notes/search.md"); - assert_eq!(results[0].fts_rank, Some(1)); - assert_eq!(results[0].vector_rank, Some(1)); - assert!(results[0].is_hybrid()); - } - - #[tokio::test] - async fn brute_force_vector_search_skips_mismatched_embedding_dimensions() { - use crate::db::{InsertChunkParams, NativeDatabase, NativeWorkspaceStore}; - - let backend = LibSqlBackend::new_memory() - .await - .expect("failed to create in-memory libsql backend"); - backend - .run_migrations() - .await - .expect("failed to run libsql migrations"); - - let document = backend - .get_or_create_document_by_path("default", None, "notes/mixed-dim.md") - .await - .expect("failed to create mixed-dimension search document"); - backend - .update_document(document.id, "mixed dimension vector search test") - .await - .expect("failed to update mixed-dimension search document"); - backend - .insert_chunk(InsertChunkParams { - document_id: document.id, - chunk_index: 0, - content: "same-dimension chunk", - embedding: Some(&[1.0, 0.0, 0.0]), - }) - .await - .expect("failed to insert same-dimension chunk"); - backend - .insert_chunk(InsertChunkParams { - document_id: document.id, - chunk_index: 1, - content: "different-dimension chunk", - embedding: Some(&[1.0, 0.0]), - }) - .await - .expect("failed to insert different-dimension chunk"); - - let results = backend - .brute_force_vector_search("default", None, &[1.0, 0.0, 0.0], 10) - .await - .expect("failed to run brute-force vector search"); - - assert_eq!(results.len(), 1); - assert_eq!(results[0].content, "same-dimension chunk"); - } -} diff --git a/src/db/libsql/workspace/chunk_ops.rs b/src/db/libsql/workspace/chunk_ops.rs new file mode 100644 index 000000000..7f360452c --- /dev/null +++ b/src/db/libsql/workspace/chunk_ops.rs @@ -0,0 +1,153 @@ +//! Chunk-oriented workspace-store helpers for the libSQL backend. + +use libsql::params; +use uuid::Uuid; + +use super::super::{LibSqlBackend, get_i64, get_text, get_ts}; +use crate::db::InsertChunkParams; +use crate::error::WorkspaceError; +use crate::workspace::MemoryChunk; + +pub(super) async fn delete_chunks( + backend: &LibSqlBackend, + document_id: Uuid, +) -> Result<(), WorkspaceError> { + let conn = backend + .connect() + .await + .map_err(|e| WorkspaceError::ChunkingFailed { + reason: e.to_string(), + })?; + conn.execute( + "DELETE FROM memory_chunks WHERE document_id = ?1", + params![document_id.to_string()], + ) + .await + .map_err(|e| WorkspaceError::ChunkingFailed { + reason: format!("Delete failed: {}", e), + })?; + Ok(()) +} + +pub(super) async fn insert_chunk( + backend: &LibSqlBackend, + params: InsertChunkParams<'_>, +) -> Result { + let InsertChunkParams { + document_id, + chunk_index, + content, + embedding, + } = params; + let conn = backend + .connect() + .await + .map_err(|e| WorkspaceError::ChunkingFailed { + reason: e.to_string(), + })?; + let id = Uuid::new_v4(); + let chunk_index = i64::from(chunk_index); + let embedding_blob = embedding.map(|values| { + values + .iter() + .flat_map(|f| f.to_le_bytes()) + .collect::>() + }); + + conn.execute( + r#" + INSERT INTO memory_chunks (id, document_id, chunk_index, content, embedding) + VALUES (?1, ?2, ?3, ?4, ?5) + "#, + params![ + id.to_string(), + document_id.to_string(), + chunk_index, + content, + embedding_blob.map(libsql::Value::Blob), + ], + ) + .await + .map_err(|e| WorkspaceError::ChunkingFailed { + reason: format!("Insert failed: {}", e), + })?; + Ok(id) +} + +pub(super) async fn update_chunk_embedding( + backend: &LibSqlBackend, + chunk_id: Uuid, + embedding: &[f32], +) -> Result<(), WorkspaceError> { + let conn = backend + .connect() + .await + .map_err(|e| WorkspaceError::EmbeddingFailed { + reason: e.to_string(), + })?; + let bytes: Vec = embedding.iter().flat_map(|f| f.to_le_bytes()).collect(); + + conn.execute( + "UPDATE memory_chunks SET embedding = ?2 WHERE id = ?1", + params![chunk_id.to_string(), libsql::Value::Blob(bytes)], + ) + .await + .map_err(|e| WorkspaceError::EmbeddingFailed { + reason: format!("Update failed: {}", e), + })?; + Ok(()) +} + +pub(super) async fn get_chunks_without_embeddings( + backend: &LibSqlBackend, + user_id: &str, + agent_id: Option, + limit: usize, +) -> Result, WorkspaceError> { + let conn = backend + .connect() + .await + .map_err(|e| WorkspaceError::SearchFailed { + reason: e.to_string(), + })?; + let agent_id_str = agent_id.map(|id| id.to_string()); + let mut rows = conn + .query( + r#" + SELECT c.id, c.document_id, c.chunk_index, c.content, c.created_at + FROM memory_chunks c + JOIN memory_documents d ON d.id = c.document_id + WHERE d.user_id = ?1 AND d.agent_id IS ?2 + AND c.embedding IS NULL + LIMIT ?3 + "#, + params![user_id, agent_id_str.as_deref(), limit as i64], + ) + .await + .map_err(|e| WorkspaceError::SearchFailed { + reason: format!("Query failed: {}", e), + })?; + + let mut chunks = Vec::new(); + while let Some(row) = rows + .next() + .await + .map_err(|e| WorkspaceError::SearchFailed { + reason: format!("Query failed: {}", e), + })? + { + let chunk_index = + u32::try_from(get_i64(&row, 2)).map_err(|_| WorkspaceError::SearchFailed { + reason: "memory_chunks.chunk_index must be non-negative".to_string(), + })?; + chunks.push(MemoryChunk { + id: get_text(&row, 0).parse().unwrap_or_default(), + document_id: get_text(&row, 1).parse().unwrap_or_default(), + chunk_index, + content: get_text(&row, 3), + embedding: None, + created_at: get_ts(&row, 4), + }); + } + Ok(chunks) +} diff --git a/src/db/libsql/workspace/document_ops.rs b/src/db/libsql/workspace/document_ops.rs new file mode 100644 index 000000000..3588e8c84 --- /dev/null +++ b/src/db/libsql/workspace/document_ops.rs @@ -0,0 +1,358 @@ +//! Document-oriented workspace-store helpers for the libSQL backend. + +use std::collections::HashMap; + +use chrono::Utc; +use libsql::params; +use uuid::Uuid; + +use super::super::{ + LibSqlBackend, fmt_ts, get_opt_text, get_opt_ts, get_text, row_to_memory_document, +}; +use crate::db::NativeWorkspaceStore; +use crate::error::WorkspaceError; +use crate::workspace::{MemoryDocument, WorkspaceEntry}; + +pub(super) async fn get_document_by_path( + backend: &LibSqlBackend, + user_id: &str, + agent_id: Option, + path: &str, +) -> Result { + let conn = backend + .connect() + .await + .map_err(|e| WorkspaceError::SearchFailed { + reason: e.to_string(), + })?; + let agent_id_str = agent_id.map(|id| id.to_string()); + let mut rows = conn + .query( + r#" + SELECT id, user_id, agent_id, path, content, + created_at, updated_at, metadata + FROM memory_documents + WHERE user_id = ?1 AND agent_id IS ?2 AND path = ?3 + "#, + params![user_id, agent_id_str.as_deref(), path], + ) + .await + .map_err(|e| WorkspaceError::SearchFailed { + reason: format!("Query failed: {}", e), + })?; + + match rows + .next() + .await + .map_err(|e| WorkspaceError::SearchFailed { + reason: format!("Query failed: {}", e), + })? { + Some(row) => Ok(row_to_memory_document(&row)), + None => Err(WorkspaceError::DocumentNotFound { + doc_type: path.to_string(), + user_id: user_id.to_string(), + }), + } +} + +pub(super) async fn get_document_by_id( + backend: &LibSqlBackend, + id: Uuid, +) -> Result { + let conn = backend + .connect() + .await + .map_err(|e| WorkspaceError::SearchFailed { + reason: e.to_string(), + })?; + let mut rows = conn + .query( + r#" + SELECT id, user_id, agent_id, path, content, + created_at, updated_at, metadata + FROM memory_documents WHERE id = ?1 + "#, + params![id.to_string()], + ) + .await + .map_err(|e| WorkspaceError::SearchFailed { + reason: format!("Query failed: {}", e), + })?; + + match rows + .next() + .await + .map_err(|e| WorkspaceError::SearchFailed { + reason: format!("Query failed: {}", e), + })? { + Some(row) => Ok(row_to_memory_document(&row)), + None => Err(WorkspaceError::DocumentNotFound { + doc_type: "unknown".to_string(), + user_id: "unknown".to_string(), + }), + } +} + +pub(super) async fn get_or_create_document_by_path( + backend: &LibSqlBackend, + user_id: &str, + agent_id: Option, + path: &str, +) -> Result { + match NativeWorkspaceStore::get_document_by_path(backend, user_id, agent_id, path).await { + Ok(doc) => return Ok(doc), + Err(WorkspaceError::DocumentNotFound { .. }) => {} + Err(e) => return Err(e), + } + + let conn = backend + .connect() + .await + .map_err(|e| WorkspaceError::SearchFailed { + reason: e.to_string(), + })?; + let id = Uuid::new_v4(); + let agent_id_str = agent_id.map(|id| id.to_string()); + conn.execute( + r#" + INSERT INTO memory_documents (id, user_id, agent_id, path, content, metadata) + VALUES (?1, ?2, ?3, ?4, '', '{}') + ON CONFLICT DO NOTHING + "#, + params![id.to_string(), user_id, agent_id_str.as_deref(), path], + ) + .await + .map_err(|e| WorkspaceError::SearchFailed { + reason: format!("Insert failed: {}", e), + })?; + + NativeWorkspaceStore::get_document_by_path(backend, user_id, agent_id, path).await +} + +pub(super) async fn update_document( + backend: &LibSqlBackend, + id: Uuid, + content: &str, +) -> Result<(), WorkspaceError> { + let conn = backend + .connect() + .await + .map_err(|e| WorkspaceError::SearchFailed { + reason: e.to_string(), + })?; + let now = fmt_ts(&Utc::now()); + conn.execute( + "UPDATE memory_documents SET content = ?2, updated_at = ?3 WHERE id = ?1", + params![id.to_string(), content, now], + ) + .await + .map_err(|e| WorkspaceError::SearchFailed { + reason: format!("Update failed: {}", e), + })?; + Ok(()) +} + +pub(super) async fn delete_document_by_path( + backend: &LibSqlBackend, + user_id: &str, + agent_id: Option, + path: &str, +) -> Result<(), WorkspaceError> { + let doc = NativeWorkspaceStore::get_document_by_path(backend, user_id, agent_id, path).await?; + NativeWorkspaceStore::delete_chunks(backend, doc.id).await?; + + let conn = backend + .connect() + .await + .map_err(|e| WorkspaceError::SearchFailed { + reason: e.to_string(), + })?; + let agent_id_str = agent_id.map(|id| id.to_string()); + conn.execute( + "DELETE FROM memory_documents WHERE user_id = ?1 AND agent_id IS ?2 AND path = ?3", + params![user_id, agent_id_str.as_deref(), path], + ) + .await + .map_err(|e| WorkspaceError::SearchFailed { + reason: format!("Delete failed: {}", e), + })?; + Ok(()) +} + +pub(super) async fn list_directory( + backend: &LibSqlBackend, + user_id: &str, + agent_id: Option, + directory: &str, +) -> Result, WorkspaceError> { + let conn = backend + .connect() + .await + .map_err(|e| WorkspaceError::SearchFailed { + reason: e.to_string(), + })?; + let dir = if !directory.is_empty() && !directory.ends_with('/') { + format!("{}/", directory) + } else { + directory.to_string() + }; + + let agent_id_str = agent_id.map(|id| id.to_string()); + let pattern = if dir.is_empty() { + "%".to_string() + } else { + format!("{}%", dir) + }; + + let mut rows = conn + .query( + r#" + SELECT path, updated_at, substr(content, 1, 200) as content_preview + FROM memory_documents + WHERE user_id = ?1 AND agent_id IS ?2 + AND (?3 = '%' OR path LIKE ?3) + ORDER BY path + "#, + params![user_id, agent_id_str.as_deref(), pattern], + ) + .await + .map_err(|e| WorkspaceError::SearchFailed { + reason: format!("List directory failed: {}", e), + })?; + + let mut entries_map: HashMap = HashMap::new(); + while let Some(row) = rows + .next() + .await + .map_err(|e| WorkspaceError::SearchFailed { + reason: format!("Query failed: {}", e), + })? + { + let full_path = get_text(&row, 0); + let updated_at = get_opt_ts(&row, 1); + let content_preview = get_opt_text(&row, 2); + let relative = if dir.is_empty() { + &full_path + } else if let Some(stripped) = full_path.strip_prefix(&dir) { + stripped + } else { + continue; + }; + + let child_name = if let Some(slash_pos) = relative.find('/') { + &relative[..slash_pos] + } else { + relative + }; + if child_name.is_empty() { + continue; + } + + let is_dir = relative.contains('/'); + let entry_path = if dir.is_empty() { + child_name.to_string() + } else { + format!("{}{}", dir, child_name) + }; + + entries_map + .entry(child_name.to_string()) + .and_modify(|entry| { + if is_dir { + entry.is_directory = true; + entry.content_preview = None; + } + if let (Some(existing), Some(new)) = (&entry.updated_at, &updated_at) + && new > existing + { + entry.updated_at = Some(*new); + } + }) + .or_insert(WorkspaceEntry { + path: entry_path, + is_directory: is_dir, + updated_at, + content_preview: if is_dir { None } else { content_preview }, + }); + } + + let mut entries: Vec = entries_map.into_values().collect(); + entries.sort_by(|a, b| a.path.cmp(&b.path)); + Ok(entries) +} + +pub(super) async fn list_all_paths( + backend: &LibSqlBackend, + user_id: &str, + agent_id: Option, +) -> Result, WorkspaceError> { + let conn = backend + .connect() + .await + .map_err(|e| WorkspaceError::SearchFailed { + reason: e.to_string(), + })?; + let agent_id_str = agent_id.map(|id| id.to_string()); + let mut rows = conn + .query( + "SELECT path FROM memory_documents WHERE user_id = ?1 AND agent_id IS ?2 ORDER BY path", + params![user_id, agent_id_str.as_deref()], + ) + .await + .map_err(|e| WorkspaceError::SearchFailed { + reason: format!("List paths failed: {}", e), + })?; + + let mut paths = Vec::new(); + while let Some(row) = rows + .next() + .await + .map_err(|e| WorkspaceError::SearchFailed { + reason: format!("Query failed: {}", e), + })? + { + paths.push(get_text(&row, 0)); + } + Ok(paths) +} + +pub(super) async fn list_documents( + backend: &LibSqlBackend, + user_id: &str, + agent_id: Option, +) -> Result, WorkspaceError> { + let conn = backend + .connect() + .await + .map_err(|e| WorkspaceError::SearchFailed { + reason: e.to_string(), + })?; + let agent_id_str = agent_id.map(|id| id.to_string()); + let mut rows = conn + .query( + r#" + SELECT id, user_id, agent_id, path, content, + created_at, updated_at, metadata + FROM memory_documents + WHERE user_id = ?1 AND agent_id IS ?2 + ORDER BY updated_at DESC + "#, + params![user_id, agent_id_str.as_deref()], + ) + .await + .map_err(|e| WorkspaceError::SearchFailed { + reason: format!("Query failed: {}", e), + })?; + + let mut docs = Vec::new(); + while let Some(row) = rows + .next() + .await + .map_err(|e| WorkspaceError::SearchFailed { + reason: format!("Query failed: {}", e), + })? + { + docs.push(row_to_memory_document(&row)); + } + Ok(docs) +} diff --git a/src/db/libsql/workspace/fts.rs b/src/db/libsql/workspace/fts.rs new file mode 100644 index 000000000..2ee400fd9 --- /dev/null +++ b/src/db/libsql/workspace/fts.rs @@ -0,0 +1,57 @@ +//! Full-text-search helpers for libSQL workspace retrieval. + +use libsql::params; + +use super::super::get_text; +use crate::error::WorkspaceError; +use crate::workspace::RankedResult; + +/// Execute full-text search and return ranked results. +/// +/// Queries the memory_chunks_fts virtual table, joining with memory_chunks +/// and memory_documents to fetch chunk content and document paths. Assigns +/// rank based on result order. +pub(super) async fn fts_ranked_results( + conn: &libsql::Connection, + user_id: &str, + agent_id: Option<&str>, + query: &str, + limit: i64, +) -> Result, WorkspaceError> { + let mut rows = conn + .query( + r#" + SELECT c.id, c.document_id, d.path, c.content + FROM memory_chunks_fts fts + JOIN memory_chunks c ON c._rowid = fts.rowid + JOIN memory_documents d ON d.id = c.document_id + WHERE d.user_id = ?1 AND d.agent_id IS ?2 + AND memory_chunks_fts MATCH ?3 + ORDER BY rank + LIMIT ?4 + "#, + params![user_id, agent_id, query, limit], + ) + .await + .map_err(|e| WorkspaceError::SearchFailed { + reason: format!("FTS query failed: {}", e), + })?; + + let mut results = Vec::new(); + while let Some(row) = rows + .next() + .await + .map_err(|e| WorkspaceError::SearchFailed { + reason: format!("FTS row fetch failed: {}", e), + })? + { + results.push(RankedResult { + chunk_id: get_text(&row, 0).parse().unwrap_or_default(), + document_id: get_text(&row, 1).parse().unwrap_or_default(), + document_path: get_text(&row, 2), + content: get_text(&row, 3), + rank: results.len() as u32 + 1, + }); + } + Ok(results) +} diff --git a/src/db/libsql/workspace/mod.rs b/src/db/libsql/workspace/mod.rs new file mode 100644 index 000000000..818dbf095 --- /dev/null +++ b/src/db/libsql/workspace/mod.rs @@ -0,0 +1,179 @@ +//! Workspace-store operations for the libSQL backend. + +mod chunk_ops; +mod document_ops; +mod fts; +#[cfg(test)] +mod tests; +mod vector_search; + +use uuid::Uuid; + +use super::LibSqlBackend; +use crate::db::{HybridSearchParams, InsertChunkParams, NativeWorkspaceStore}; +use crate::error::WorkspaceError; +use crate::workspace::{ + MemoryChunk, MemoryDocument, SearchResult, WorkspaceEntry, reciprocal_rank_fusion, +}; +use chunk_ops::{ + delete_chunks, get_chunks_without_embeddings, insert_chunk, update_chunk_embedding, +}; +use document_ops::{ + delete_document_by_path, get_document_by_id, get_document_by_path, + get_or_create_document_by_path, list_all_paths, list_directory, list_documents, + update_document, +}; +use fts::fts_ranked_results; +use vector_search::{VectorSearchOutcome, vector_ranked_results}; + +impl NativeWorkspaceStore for LibSqlBackend { + async fn get_document_by_path( + &self, + user_id: &str, + agent_id: Option, + path: &str, + ) -> Result { + get_document_by_path(self, user_id, agent_id, path).await + } + + async fn get_document_by_id(&self, id: Uuid) -> Result { + get_document_by_id(self, id).await + } + + async fn get_or_create_document_by_path( + &self, + user_id: &str, + agent_id: Option, + path: &str, + ) -> Result { + get_or_create_document_by_path(self, user_id, agent_id, path).await + } + + async fn update_document(&self, id: Uuid, content: &str) -> Result<(), WorkspaceError> { + update_document(self, id, content).await + } + + async fn delete_document_by_path( + &self, + user_id: &str, + agent_id: Option, + path: &str, + ) -> Result<(), WorkspaceError> { + delete_document_by_path(self, user_id, agent_id, path).await + } + + async fn list_directory( + &self, + user_id: &str, + agent_id: Option, + directory: &str, + ) -> Result, WorkspaceError> { + list_directory(self, user_id, agent_id, directory).await + } + + async fn list_all_paths( + &self, + user_id: &str, + agent_id: Option, + ) -> Result, WorkspaceError> { + list_all_paths(self, user_id, agent_id).await + } + + async fn list_documents( + &self, + user_id: &str, + agent_id: Option, + ) -> Result, WorkspaceError> { + list_documents(self, user_id, agent_id).await + } + + async fn delete_chunks(&self, document_id: Uuid) -> Result<(), WorkspaceError> { + delete_chunks(self, document_id).await + } + + async fn insert_chunk(&self, params: InsertChunkParams<'_>) -> Result { + insert_chunk(self, params).await + } + + async fn update_chunk_embedding( + &self, + chunk_id: Uuid, + embedding: &[f32], + ) -> Result<(), WorkspaceError> { + update_chunk_embedding(self, chunk_id, embedding).await + } + + async fn get_chunks_without_embeddings( + &self, + user_id: &str, + agent_id: Option, + limit: usize, + ) -> Result, WorkspaceError> { + get_chunks_without_embeddings(self, user_id, agent_id, limit).await + } + + async fn hybrid_search( + &self, + params: HybridSearchParams<'_>, + ) -> Result, WorkspaceError> { + let HybridSearchParams { + user_id, + agent_id, + query, + embedding, + config, + } = params; + let conn = self + .connect() + .await + .map_err(|e| WorkspaceError::SearchFailed { + reason: e.to_string(), + })?; + let agent_id_str = agent_id.map(|id| id.to_string()); + let pre_limit = config.pre_fusion_limit as i64; + + let fts_results = if config.use_fts { + let results = + fts_ranked_results(&conn, user_id, agent_id_str.as_deref(), query, pre_limit) + .await?; + tracing::debug!( + "FTS search returned {} results (pre-fusion limit: {})", + results.len(), + pre_limit + ); + results + } else { + Vec::new() + }; + + let vector_results = if config.use_vector { + if let Some(emb) = embedding { + match vector_ranked_results(&conn, user_id, agent_id_str.as_deref(), emb, pre_limit) + .await? + { + VectorSearchOutcome::Indexed(results) => results, + VectorSearchOutcome::IndexUnavailable => { + tracing::info!("Using brute-force vector search (no vector index)"); + self.brute_force_vector_search(user_id, agent_id, emb, pre_limit as usize) + .await + .map_err(|e| { + tracing::warn!("Brute-force vector search failed: {e}"); + e + })? + } + } + } else { + Vec::new() + } + } else { + if embedding.is_some() { + tracing::warn!( + "Embedding provided but vector search is disabled in config; using FTS-only results" + ); + } + Vec::new() + }; + + Ok(reciprocal_rank_fusion(fts_results, vector_results, config)) + } +} diff --git a/src/db/libsql/workspace/tests.rs b/src/db/libsql/workspace/tests.rs new file mode 100644 index 000000000..210886196 --- /dev/null +++ b/src/db/libsql/workspace/tests.rs @@ -0,0 +1,159 @@ +//! Tests for the libSQL workspace-store module split. + +use super::super::LibSqlBackend; +use super::vector_search::{VectorSearchOutcome, deserialize_embedding, vector_ranked_results}; +use crate::db::{HybridSearchParams, InsertChunkParams, NativeDatabase, NativeWorkspaceStore}; +use crate::workspace::SearchConfig; + +#[test] +fn test_deserialize_embedding_valid() { + let floats = [1.0f32, 2.0, 3.0]; + let bytes: Vec = floats.iter().flat_map(|f| f.to_le_bytes()).collect(); + + let result = deserialize_embedding(&bytes); + + assert_eq!(result.len(), 3); + assert!((result[0] - 1.0).abs() < 0.001); + assert!((result[1] - 2.0).abs() < 0.001); + assert!((result[2] - 3.0).abs() < 0.001); +} + +#[test] +fn test_deserialize_embedding_empty() { + let result = deserialize_embedding(&[]); + assert_eq!(result.len(), 0); +} + +#[test] +fn test_deserialize_embedding_invalid_length() { + let result = deserialize_embedding(&[1, 2, 3, 4, 5, 6, 7]); + assert_eq!(result.len(), 0); +} + +#[test] +fn test_deserialize_embedding_single_value() { + let value = 42.5f32; + let bytes = value.to_le_bytes(); + + let result = deserialize_embedding(&bytes); + + assert_eq!(result.len(), 1); + assert!((result[0] - 42.5).abs() < 0.001); +} + +#[test] +fn test_deserialize_embedding_negative_values() { + let floats = [-1.5f32, 0.0, 2.75]; + let bytes: Vec = floats.iter().flat_map(|f| f.to_le_bytes()).collect(); + + let result = deserialize_embedding(&bytes); + + assert_eq!(result.len(), 3); + assert!((result[0] - (-1.5)).abs() < 0.001); + assert!((result[1] - 0.0).abs() < 0.001); + assert!((result[2] - 2.75).abs() < 0.001); +} + +#[tokio::test] +async fn hybrid_search_uses_brute_force_when_vector_index_is_unavailable() { + let backend = LibSqlBackend::new_memory() + .await + .expect("failed to create in-memory libsql backend"); + backend + .run_migrations() + .await + .expect("failed to run libsql migrations"); + + let document = backend + .get_or_create_document_by_path("default", None, "notes/search.md") + .await + .expect("failed to create search test document"); + backend + .update_document(document.id, "semantic search fallback test") + .await + .expect("failed to update search test document"); + backend + .insert_chunk(InsertChunkParams { + document_id: document.id, + chunk_index: 0, + content: "semantic search fallback test", + embedding: Some(&[1.0, 0.0, 0.0]), + }) + .await + .expect("failed to insert search test chunk"); + + let conn = backend + .connect() + .await + .expect("failed to open libsql connection for vector precondition"); + let vector_outcome = vector_ranked_results(&conn, "default", None, &[1.0, 0.0, 0.0], 5) + .await + .expect("failed to run vector search precondition"); + assert!( + matches!(vector_outcome, VectorSearchOutcome::IndexUnavailable), + "Test requires the vector-index-unavailable path before hybrid fallback assertions" + ); + + let results = backend + .hybrid_search(HybridSearchParams { + user_id: "default", + agent_id: None, + query: "semantic", + embedding: Some(&[1.0, 0.0, 0.0]), + config: &SearchConfig::default().with_limit(5), + }) + .await + .expect("failed to execute hybrid search"); + + assert_eq!(results.len(), 1); + assert_eq!(results[0].document_path, "notes/search.md"); + assert_eq!(results[0].fts_rank, Some(1)); + assert_eq!(results[0].vector_rank, Some(1)); + assert!(results[0].is_hybrid()); +} + +#[tokio::test] +async fn brute_force_vector_search_skips_mismatched_embedding_dimensions() { + let backend = LibSqlBackend::new_memory() + .await + .expect("failed to create in-memory libsql backend"); + backend + .run_migrations() + .await + .expect("failed to run libsql migrations"); + + let document = backend + .get_or_create_document_by_path("default", None, "notes/mixed-dim.md") + .await + .expect("failed to create mixed-dimension search document"); + backend + .update_document(document.id, "mixed dimension vector search test") + .await + .expect("failed to update mixed-dimension search document"); + backend + .insert_chunk(InsertChunkParams { + document_id: document.id, + chunk_index: 0, + content: "same-dimension chunk", + embedding: Some(&[1.0, 0.0, 0.0]), + }) + .await + .expect("failed to insert same-dimension chunk"); + backend + .insert_chunk(InsertChunkParams { + document_id: document.id, + chunk_index: 1, + content: "different-dimension chunk", + embedding: Some(&[1.0, 0.0]), + }) + .await + .expect("failed to insert different-dimension chunk"); + + let results = backend + .brute_force_vector_search("default", None, &[1.0, 0.0, 0.0], 10) + .await + .expect("failed to run brute-force vector search"); + + assert_eq!(results.len(), 1); + assert_eq!(results[0].content, "same-dimension chunk"); +} diff --git a/src/db/libsql/workspace/vector_search.rs b/src/db/libsql/workspace/vector_search.rs new file mode 100644 index 000000000..56833c4df --- /dev/null +++ b/src/db/libsql/workspace/vector_search.rs @@ -0,0 +1,283 @@ +//! Vector-search helpers for libSQL workspace retrieval. + +use libsql::params; +use uuid::Uuid; + +use super::super::{LibSqlBackend, get_text}; +use crate::error::WorkspaceError; +use crate::workspace::{RankedResult, cosine_similarity}; + +struct Candidate { + chunk_id: Uuid, + document_id: Uuid, + document_path: String, + content: String, + similarity: f32, +} + +pub(super) enum VectorSearchOutcome { + Indexed(Vec), + IndexUnavailable, +} + +fn is_missing_vector_index_error(error: &libsql::Error) -> bool { + let sqlite_message = match error { + libsql::Error::SqliteFailure(_, message) + | libsql::Error::RemoteSqliteFailure(_, _, message) => message, + _ => return false, + }; + + let error_message = sqlite_message.to_ascii_lowercase(); + + error_message.contains("vector_top_k") + || error_message.contains("no such function") + || error_message.contains("idx_memory_chunks_embedding") + || error_message.contains("failed to parse vector index parameters") +} + +fn rank_candidates(mut candidates: Vec, limit: usize) -> Vec { + candidates.sort_by(|a, b| { + b.similarity + .partial_cmp(&a.similarity) + .unwrap_or(std::cmp::Ordering::Equal) + }); + + let total_candidates = candidates.len(); + let results: Vec<_> = candidates + .into_iter() + .take(limit) + .enumerate() + .map(|(idx, c)| RankedResult { + chunk_id: c.chunk_id, + document_id: c.document_id, + document_path: c.document_path, + content: c.content, + rank: (idx + 1) as u32, + }) + .collect(); + + tracing::debug!( + "Brute-force vector search scanned {} candidates, returned {} results", + total_candidates, + results.len() + ); + + results +} + +/// Deserialize an embedding from a BLOB (4-byte little-endian f32 values). +/// +/// Returns an empty vector if the blob length is not a multiple of 4. +pub(super) fn deserialize_embedding(blob: &[u8]) -> Vec { + if !blob.len().is_multiple_of(4) { + tracing::warn!( + "Embedding blob length {} is not a multiple of 4; skipping", + blob.len() + ); + return Vec::new(); + } + + blob.chunks_exact(4) + .map(|chunk| { + let bytes = [chunk[0], chunk[1], chunk[2], chunk[3]]; + f32::from_le_bytes(bytes) + }) + .collect() +} + +impl LibSqlBackend { + async fn collect_candidates( + &self, + rows: &mut libsql::Rows, + query_embedding: &[f32], + ) -> Result, WorkspaceError> { + let mut candidates = Vec::new(); + let mut skipped_mismatched_dims = 0usize; + while let Some(row) = rows + .next() + .await + .map_err(|e| WorkspaceError::SearchFailed { + reason: format!("Row fetch failed: {}", e), + })? + { + let chunk_id: Uuid = match get_text(&row, 0).parse() { + Ok(id) => id, + Err(e) => { + tracing::warn!("Invalid chunk_id UUID in memory_chunks: {e}"); + continue; + } + }; + let document_id: Uuid = match get_text(&row, 1).parse() { + Ok(id) => id, + Err(e) => { + tracing::warn!("Invalid document_id UUID in memory_chunks: {e}"); + continue; + } + }; + let document_path = get_text(&row, 2); + let content = get_text(&row, 3); + let embedding_blob = match row.get_value(4) { + Ok(libsql::Value::Blob(bytes)) => bytes, + _ => continue, + }; + let chunk_embedding = deserialize_embedding(&embedding_blob); + if chunk_embedding.is_empty() { + continue; + } + if chunk_embedding.len() != query_embedding.len() { + skipped_mismatched_dims += 1; + continue; + } + + let similarity = cosine_similarity(query_embedding, &chunk_embedding); + candidates.push(Candidate { + chunk_id, + document_id, + document_path, + content, + similarity, + }); + } + + if skipped_mismatched_dims > 0 { + tracing::debug!( + "Brute-force vector search skipped {} candidates with embedding dimension mismatches (query dimension: {})", + skipped_mismatched_dims, + query_embedding.len() + ); + } + + Ok(candidates) + } + + /// Brute-force vector search using cosine similarity in Rust. + /// + /// Loads all chunks with embeddings for the given user/agent, computes + /// cosine similarity against the query embedding, and returns the top + /// matches. This is used as a fallback when the vector index is not + /// available (post-V9 migration). + pub(super) async fn brute_force_vector_search( + &self, + user_id: &str, + agent_id: Option, + embedding: &[f32], + limit: usize, + ) -> Result, WorkspaceError> { + let conn = self + .connect() + .await + .map_err(|e| WorkspaceError::SearchFailed { + reason: e.to_string(), + })?; + let agent_id_str = agent_id.map(|id| id.to_string()); + let mut rows = conn + .query( + r#" + SELECT c.id, c.document_id, d.path, c.content, c.embedding + FROM memory_chunks c + JOIN memory_documents d ON d.id = c.document_id + WHERE d.user_id = ?1 AND d.agent_id IS ?2 + AND c.embedding IS NOT NULL + "#, + params![user_id, agent_id_str.as_deref()], + ) + .await + .map_err(|e| WorkspaceError::SearchFailed { + reason: format!("Query failed: {}", e), + })?; + + let candidates = self.collect_candidates(&mut rows, embedding).await?; + Ok(rank_candidates(candidates, limit)) + } +} + +fn embedding_to_vector_json(embedding: &[f32]) -> String { + format!( + "[{}]", + embedding + .iter() + .map(|f| f.to_string()) + .collect::>() + .join(",") + ) +} + +async fn collect_vector_index_rows( + mut rows: libsql::Rows, + limit: i64, +) -> Result { + let mut results = Vec::new(); + while let Some(row) = match rows.next().await { + Ok(row) => row, + Err(e) => { + if is_missing_vector_index_error(&e) { + tracing::debug!( + "Vector index row fetch failed, brute-force fallback required: {e}" + ); + return Ok(VectorSearchOutcome::IndexUnavailable); + } + + return Err(WorkspaceError::SearchFailed { + reason: format!("Vector index row fetch failed: {e}"), + }); + } + } { + results.push(RankedResult { + chunk_id: get_text(&row, 0).parse().unwrap_or_default(), + document_id: get_text(&row, 1).parse().unwrap_or_default(), + document_path: get_text(&row, 2), + content: get_text(&row, 3), + rank: results.len() as u32 + 1, + }); + } + tracing::debug!( + "libSQL vector index search returned {} results (pre-fusion limit: {})", + results.len(), + limit + ); + Ok(VectorSearchOutcome::Indexed(results)) +} + +/// Execute vector similarity search via libSQL's vector index. +/// +/// Returns [`VectorSearchOutcome::IndexUnavailable`] when `vector_top_k(...)` +/// cannot run because the fixed-dimension vector index is missing, which is +/// the expected state after the V9 flexible-dimension migration. +pub(super) async fn vector_ranked_results( + conn: &libsql::Connection, + user_id: &str, + agent_id: Option<&str>, + embedding: &[f32], + limit: i64, +) -> Result { + let vector_json = embedding_to_vector_json(embedding); + + match conn + .query( + r#" + SELECT c.id, c.document_id, d.path, c.content + FROM vector_top_k('idx_memory_chunks_embedding', vector(?1), ?2) AS top_k + JOIN memory_chunks c ON c._rowid = top_k.id + JOIN memory_documents d ON d.id = c.document_id + WHERE d.user_id = ?3 AND d.agent_id IS ?4 + "#, + params![vector_json, limit, user_id, agent_id], + ) + .await + { + Ok(rows) => collect_vector_index_rows(rows, limit).await, + Err(e) => { + if is_missing_vector_index_error(&e) { + tracing::debug!( + "Vector index query failed (expected after V9 migration), \ + brute-force fallback required: {e}" + ); + Ok(VectorSearchOutcome::IndexUnavailable) + } else { + Err(WorkspaceError::SearchFailed { + reason: format!("Vector index query failed: {e}"), + }) + } + } + } +} From c344d3b70b5afa67c24e5b8d3e84ee872c84b34e Mon Sep 17 00:00:00 2001 From: leynos Date: Tue, 14 Apr 2026 20:52:07 +0200 Subject: [PATCH 18/40] Introduce AgentScope for libsql documents Bundle the repeated user and agent identifiers into a private\nAgentScope helper for libsql document operations. This reduces\nstring-heavy helper signatures without changing the public\nworkspace-store trait surface or any query behaviour.\n\nThe NativeWorkspaceStore impl now constructs AgentScope values at\nits document-operation call sites and forwards the unchanged trait\narguments where required. --- src/db/libsql/workspace/document_ops.rs | 65 ++++++++++++++----------- src/db/libsql/workspace/mod.rs | 14 +++--- 2 files changed, 44 insertions(+), 35 deletions(-) diff --git a/src/db/libsql/workspace/document_ops.rs b/src/db/libsql/workspace/document_ops.rs index 3588e8c84..a8945e0b7 100644 --- a/src/db/libsql/workspace/document_ops.rs +++ b/src/db/libsql/workspace/document_ops.rs @@ -13,10 +13,20 @@ use crate::db::NativeWorkspaceStore; use crate::error::WorkspaceError; use crate::workspace::{MemoryDocument, WorkspaceEntry}; +/// Identifies the user/agent context for a workspace document query. +/// +/// Bundles the `user_id` + `agent_id` pair that every document-scoped +/// helper requires, reducing per-function arity and making call sites +/// self-documenting. +#[derive(Clone, Copy)] +pub(super) struct AgentScope<'a> { + pub(super) user_id: &'a str, + pub(super) agent_id: Option, +} + pub(super) async fn get_document_by_path( backend: &LibSqlBackend, - user_id: &str, - agent_id: Option, + scope: &AgentScope<'_>, path: &str, ) -> Result { let conn = backend @@ -25,7 +35,7 @@ pub(super) async fn get_document_by_path( .map_err(|e| WorkspaceError::SearchFailed { reason: e.to_string(), })?; - let agent_id_str = agent_id.map(|id| id.to_string()); + let agent_id_str = scope.agent_id.map(|id| id.to_string()); let mut rows = conn .query( r#" @@ -34,7 +44,7 @@ pub(super) async fn get_document_by_path( FROM memory_documents WHERE user_id = ?1 AND agent_id IS ?2 AND path = ?3 "#, - params![user_id, agent_id_str.as_deref(), path], + params![scope.user_id, agent_id_str.as_deref(), path], ) .await .map_err(|e| WorkspaceError::SearchFailed { @@ -50,7 +60,7 @@ pub(super) async fn get_document_by_path( Some(row) => Ok(row_to_memory_document(&row)), None => Err(WorkspaceError::DocumentNotFound { doc_type: path.to_string(), - user_id: user_id.to_string(), + user_id: scope.user_id.to_string(), }), } } @@ -95,11 +105,12 @@ pub(super) async fn get_document_by_id( pub(super) async fn get_or_create_document_by_path( backend: &LibSqlBackend, - user_id: &str, - agent_id: Option, + scope: &AgentScope<'_>, path: &str, ) -> Result { - match NativeWorkspaceStore::get_document_by_path(backend, user_id, agent_id, path).await { + match NativeWorkspaceStore::get_document_by_path(backend, scope.user_id, scope.agent_id, path) + .await + { Ok(doc) => return Ok(doc), Err(WorkspaceError::DocumentNotFound { .. }) => {} Err(e) => return Err(e), @@ -112,21 +123,21 @@ pub(super) async fn get_or_create_document_by_path( reason: e.to_string(), })?; let id = Uuid::new_v4(); - let agent_id_str = agent_id.map(|id| id.to_string()); + let agent_id_str = scope.agent_id.map(|id| id.to_string()); conn.execute( r#" INSERT INTO memory_documents (id, user_id, agent_id, path, content, metadata) VALUES (?1, ?2, ?3, ?4, '', '{}') ON CONFLICT DO NOTHING "#, - params![id.to_string(), user_id, agent_id_str.as_deref(), path], + params![id.to_string(), scope.user_id, agent_id_str.as_deref(), path], ) .await .map_err(|e| WorkspaceError::SearchFailed { reason: format!("Insert failed: {}", e), })?; - NativeWorkspaceStore::get_document_by_path(backend, user_id, agent_id, path).await + NativeWorkspaceStore::get_document_by_path(backend, scope.user_id, scope.agent_id, path).await } pub(super) async fn update_document( @@ -154,11 +165,12 @@ pub(super) async fn update_document( pub(super) async fn delete_document_by_path( backend: &LibSqlBackend, - user_id: &str, - agent_id: Option, + scope: &AgentScope<'_>, path: &str, ) -> Result<(), WorkspaceError> { - let doc = NativeWorkspaceStore::get_document_by_path(backend, user_id, agent_id, path).await?; + let doc = + NativeWorkspaceStore::get_document_by_path(backend, scope.user_id, scope.agent_id, path) + .await?; NativeWorkspaceStore::delete_chunks(backend, doc.id).await?; let conn = backend @@ -167,10 +179,10 @@ pub(super) async fn delete_document_by_path( .map_err(|e| WorkspaceError::SearchFailed { reason: e.to_string(), })?; - let agent_id_str = agent_id.map(|id| id.to_string()); + let agent_id_str = scope.agent_id.map(|id| id.to_string()); conn.execute( "DELETE FROM memory_documents WHERE user_id = ?1 AND agent_id IS ?2 AND path = ?3", - params![user_id, agent_id_str.as_deref(), path], + params![scope.user_id, agent_id_str.as_deref(), path], ) .await .map_err(|e| WorkspaceError::SearchFailed { @@ -181,8 +193,7 @@ pub(super) async fn delete_document_by_path( pub(super) async fn list_directory( backend: &LibSqlBackend, - user_id: &str, - agent_id: Option, + scope: &AgentScope<'_>, directory: &str, ) -> Result, WorkspaceError> { let conn = backend @@ -197,7 +208,7 @@ pub(super) async fn list_directory( directory.to_string() }; - let agent_id_str = agent_id.map(|id| id.to_string()); + let agent_id_str = scope.agent_id.map(|id| id.to_string()); let pattern = if dir.is_empty() { "%".to_string() } else { @@ -213,7 +224,7 @@ pub(super) async fn list_directory( AND (?3 = '%' OR path LIKE ?3) ORDER BY path "#, - params![user_id, agent_id_str.as_deref(), pattern], + params![scope.user_id, agent_id_str.as_deref(), pattern], ) .await .map_err(|e| WorkspaceError::SearchFailed { @@ -283,8 +294,7 @@ pub(super) async fn list_directory( pub(super) async fn list_all_paths( backend: &LibSqlBackend, - user_id: &str, - agent_id: Option, + scope: &AgentScope<'_>, ) -> Result, WorkspaceError> { let conn = backend .connect() @@ -292,11 +302,11 @@ pub(super) async fn list_all_paths( .map_err(|e| WorkspaceError::SearchFailed { reason: e.to_string(), })?; - let agent_id_str = agent_id.map(|id| id.to_string()); + let agent_id_str = scope.agent_id.map(|id| id.to_string()); let mut rows = conn .query( "SELECT path FROM memory_documents WHERE user_id = ?1 AND agent_id IS ?2 ORDER BY path", - params![user_id, agent_id_str.as_deref()], + params![scope.user_id, agent_id_str.as_deref()], ) .await .map_err(|e| WorkspaceError::SearchFailed { @@ -318,8 +328,7 @@ pub(super) async fn list_all_paths( pub(super) async fn list_documents( backend: &LibSqlBackend, - user_id: &str, - agent_id: Option, + scope: &AgentScope<'_>, ) -> Result, WorkspaceError> { let conn = backend .connect() @@ -327,7 +336,7 @@ pub(super) async fn list_documents( .map_err(|e| WorkspaceError::SearchFailed { reason: e.to_string(), })?; - let agent_id_str = agent_id.map(|id| id.to_string()); + let agent_id_str = scope.agent_id.map(|id| id.to_string()); let mut rows = conn .query( r#" @@ -337,7 +346,7 @@ pub(super) async fn list_documents( WHERE user_id = ?1 AND agent_id IS ?2 ORDER BY updated_at DESC "#, - params![user_id, agent_id_str.as_deref()], + params![scope.user_id, agent_id_str.as_deref()], ) .await .map_err(|e| WorkspaceError::SearchFailed { diff --git a/src/db/libsql/workspace/mod.rs b/src/db/libsql/workspace/mod.rs index 818dbf095..0b4aa4ad9 100644 --- a/src/db/libsql/workspace/mod.rs +++ b/src/db/libsql/workspace/mod.rs @@ -19,7 +19,7 @@ use chunk_ops::{ delete_chunks, get_chunks_without_embeddings, insert_chunk, update_chunk_embedding, }; use document_ops::{ - delete_document_by_path, get_document_by_id, get_document_by_path, + AgentScope, delete_document_by_path, get_document_by_id, get_document_by_path, get_or_create_document_by_path, list_all_paths, list_directory, list_documents, update_document, }; @@ -33,7 +33,7 @@ impl NativeWorkspaceStore for LibSqlBackend { agent_id: Option, path: &str, ) -> Result { - get_document_by_path(self, user_id, agent_id, path).await + get_document_by_path(self, &AgentScope { user_id, agent_id }, path).await } async fn get_document_by_id(&self, id: Uuid) -> Result { @@ -46,7 +46,7 @@ impl NativeWorkspaceStore for LibSqlBackend { agent_id: Option, path: &str, ) -> Result { - get_or_create_document_by_path(self, user_id, agent_id, path).await + get_or_create_document_by_path(self, &AgentScope { user_id, agent_id }, path).await } async fn update_document(&self, id: Uuid, content: &str) -> Result<(), WorkspaceError> { @@ -59,7 +59,7 @@ impl NativeWorkspaceStore for LibSqlBackend { agent_id: Option, path: &str, ) -> Result<(), WorkspaceError> { - delete_document_by_path(self, user_id, agent_id, path).await + delete_document_by_path(self, &AgentScope { user_id, agent_id }, path).await } async fn list_directory( @@ -68,7 +68,7 @@ impl NativeWorkspaceStore for LibSqlBackend { agent_id: Option, directory: &str, ) -> Result, WorkspaceError> { - list_directory(self, user_id, agent_id, directory).await + list_directory(self, &AgentScope { user_id, agent_id }, directory).await } async fn list_all_paths( @@ -76,7 +76,7 @@ impl NativeWorkspaceStore for LibSqlBackend { user_id: &str, agent_id: Option, ) -> Result, WorkspaceError> { - list_all_paths(self, user_id, agent_id).await + list_all_paths(self, &AgentScope { user_id, agent_id }).await } async fn list_documents( @@ -84,7 +84,7 @@ impl NativeWorkspaceStore for LibSqlBackend { user_id: &str, agent_id: Option, ) -> Result, WorkspaceError> { - list_documents(self, user_id, agent_id).await + list_documents(self, &AgentScope { user_id, agent_id }).await } async fn delete_chunks(&self, document_id: Uuid) -> Result<(), WorkspaceError> { From 24fb21b9f4f611da91b37fbabdc3164286b54599 Mon Sep 17 00:00:00 2001 From: leynos Date: Tue, 14 Apr 2026 20:58:42 +0200 Subject: [PATCH 19/40] Extract libsql document row helpers Factor shared connection and row-draining logic out of the\nlibsql document helpers. This removes the repeated backend\nconnection and row iteration code in the document lookup and list\npaths without changing queries, error text, or observable\nbehaviour. --- src/db/libsql/workspace/document_ops.rs | 106 ++++++++++-------------- 1 file changed, 44 insertions(+), 62 deletions(-) diff --git a/src/db/libsql/workspace/document_ops.rs b/src/db/libsql/workspace/document_ops.rs index a8945e0b7..bd14c2fa5 100644 --- a/src/db/libsql/workspace/document_ops.rs +++ b/src/db/libsql/workspace/document_ops.rs @@ -24,19 +24,46 @@ pub(super) struct AgentScope<'a> { pub(super) agent_id: Option, } +async fn connect_backend(backend: &LibSqlBackend) -> Result { + backend + .connect() + .await + .map_err(|e| WorkspaceError::SearchFailed { + reason: e.to_string(), + }) +} + +async fn fetch_first_row(mut rows: libsql::Rows) -> Result, WorkspaceError> { + rows.next().await.map_err(|e| WorkspaceError::SearchFailed { + reason: format!("Query failed: {}", e), + }) +} + +async fn drain_rows(mut rows: libsql::Rows, map_row: F) -> Result, WorkspaceError> +where + F: Fn(libsql::Row) -> T, +{ + let mut out = Vec::new(); + while let Some(row) = rows + .next() + .await + .map_err(|e| WorkspaceError::SearchFailed { + reason: format!("Query failed: {}", e), + })? + { + out.push(map_row(row)); + } + Ok(out) +} + pub(super) async fn get_document_by_path( backend: &LibSqlBackend, scope: &AgentScope<'_>, path: &str, ) -> Result { - let conn = backend - .connect() - .await - .map_err(|e| WorkspaceError::SearchFailed { - reason: e.to_string(), - })?; + let conn = connect_backend(backend).await?; let agent_id_str = scope.agent_id.map(|id| id.to_string()); - let mut rows = conn + let rows = conn .query( r#" SELECT id, user_id, agent_id, path, content, @@ -51,12 +78,7 @@ pub(super) async fn get_document_by_path( reason: format!("Query failed: {}", e), })?; - match rows - .next() - .await - .map_err(|e| WorkspaceError::SearchFailed { - reason: format!("Query failed: {}", e), - })? { + match fetch_first_row(rows).await? { Some(row) => Ok(row_to_memory_document(&row)), None => Err(WorkspaceError::DocumentNotFound { doc_type: path.to_string(), @@ -69,13 +91,8 @@ pub(super) async fn get_document_by_id( backend: &LibSqlBackend, id: Uuid, ) -> Result { - let conn = backend - .connect() - .await - .map_err(|e| WorkspaceError::SearchFailed { - reason: e.to_string(), - })?; - let mut rows = conn + let conn = connect_backend(backend).await?; + let rows = conn .query( r#" SELECT id, user_id, agent_id, path, content, @@ -89,12 +106,7 @@ pub(super) async fn get_document_by_id( reason: format!("Query failed: {}", e), })?; - match rows - .next() - .await - .map_err(|e| WorkspaceError::SearchFailed { - reason: format!("Query failed: {}", e), - })? { + match fetch_first_row(rows).await? { Some(row) => Ok(row_to_memory_document(&row)), None => Err(WorkspaceError::DocumentNotFound { doc_type: "unknown".to_string(), @@ -296,14 +308,9 @@ pub(super) async fn list_all_paths( backend: &LibSqlBackend, scope: &AgentScope<'_>, ) -> Result, WorkspaceError> { - let conn = backend - .connect() - .await - .map_err(|e| WorkspaceError::SearchFailed { - reason: e.to_string(), - })?; + let conn = connect_backend(backend).await?; let agent_id_str = scope.agent_id.map(|id| id.to_string()); - let mut rows = conn + let rows = conn .query( "SELECT path FROM memory_documents WHERE user_id = ?1 AND agent_id IS ?2 ORDER BY path", params![scope.user_id, agent_id_str.as_deref()], @@ -313,31 +320,16 @@ pub(super) async fn list_all_paths( reason: format!("List paths failed: {}", e), })?; - let mut paths = Vec::new(); - while let Some(row) = rows - .next() - .await - .map_err(|e| WorkspaceError::SearchFailed { - reason: format!("Query failed: {}", e), - })? - { - paths.push(get_text(&row, 0)); - } - Ok(paths) + drain_rows(rows, |row| get_text(&row, 0)).await } pub(super) async fn list_documents( backend: &LibSqlBackend, scope: &AgentScope<'_>, ) -> Result, WorkspaceError> { - let conn = backend - .connect() - .await - .map_err(|e| WorkspaceError::SearchFailed { - reason: e.to_string(), - })?; + let conn = connect_backend(backend).await?; let agent_id_str = scope.agent_id.map(|id| id.to_string()); - let mut rows = conn + let rows = conn .query( r#" SELECT id, user_id, agent_id, path, content, @@ -353,15 +345,5 @@ pub(super) async fn list_documents( reason: format!("Query failed: {}", e), })?; - let mut docs = Vec::new(); - while let Some(row) = rows - .next() - .await - .map_err(|e| WorkspaceError::SearchFailed { - reason: format!("Query failed: {}", e), - })? - { - docs.push(row_to_memory_document(&row)); - } - Ok(docs) + drain_rows(rows, |row| row_to_memory_document(&row)).await } From 5c19d393c7acc86ee2e94f8d9e844b5f142af1c6 Mon Sep 17 00:00:00 2001 From: leynos Date: Tue, 14 Apr 2026 21:04:28 +0200 Subject: [PATCH 20/40] Introduce FtsSearchParams Reduce the libsql full-text search helper argument count by\nbundling its query inputs into a private parameter object. This\nkeeps the FTS query behaviour unchanged while making the single\nhybrid-search call site clearer and less string-heavy. --- src/db/libsql/workspace/fts.rs | 15 ++++++++++----- src/db/libsql/workspace/mod.rs | 15 +++++++++++---- 2 files changed, 21 insertions(+), 9 deletions(-) diff --git a/src/db/libsql/workspace/fts.rs b/src/db/libsql/workspace/fts.rs index 2ee400fd9..d47f77fd0 100644 --- a/src/db/libsql/workspace/fts.rs +++ b/src/db/libsql/workspace/fts.rs @@ -6,6 +6,14 @@ use super::super::get_text; use crate::error::WorkspaceError; use crate::workspace::RankedResult; +/// Parameters for a full-text search query. +pub(super) struct FtsSearchParams<'a> { + pub(super) user_id: &'a str, + pub(super) agent_id: Option<&'a str>, + pub(super) query: &'a str, + pub(super) limit: i64, +} + /// Execute full-text search and return ranked results. /// /// Queries the memory_chunks_fts virtual table, joining with memory_chunks @@ -13,10 +21,7 @@ use crate::workspace::RankedResult; /// rank based on result order. pub(super) async fn fts_ranked_results( conn: &libsql::Connection, - user_id: &str, - agent_id: Option<&str>, - query: &str, - limit: i64, + params: FtsSearchParams<'_>, ) -> Result, WorkspaceError> { let mut rows = conn .query( @@ -30,7 +35,7 @@ pub(super) async fn fts_ranked_results( ORDER BY rank LIMIT ?4 "#, - params![user_id, agent_id, query, limit], + params![params.user_id, params.agent_id, params.query, params.limit], ) .await .map_err(|e| WorkspaceError::SearchFailed { diff --git a/src/db/libsql/workspace/mod.rs b/src/db/libsql/workspace/mod.rs index 0b4aa4ad9..d7c28114e 100644 --- a/src/db/libsql/workspace/mod.rs +++ b/src/db/libsql/workspace/mod.rs @@ -23,7 +23,7 @@ use document_ops::{ get_or_create_document_by_path, list_all_paths, list_directory, list_documents, update_document, }; -use fts::fts_ranked_results; +use fts::{FtsSearchParams, fts_ranked_results}; use vector_search::{VectorSearchOutcome, vector_ranked_results}; impl NativeWorkspaceStore for LibSqlBackend { @@ -133,9 +133,16 @@ impl NativeWorkspaceStore for LibSqlBackend { let pre_limit = config.pre_fusion_limit as i64; let fts_results = if config.use_fts { - let results = - fts_ranked_results(&conn, user_id, agent_id_str.as_deref(), query, pre_limit) - .await?; + let results = fts_ranked_results( + &conn, + FtsSearchParams { + user_id, + agent_id: agent_id_str.as_deref(), + query, + limit: pre_limit, + }, + ) + .await?; tracing::debug!( "FTS search returned {} results (pre-fusion limit: {})", results.len(), From 920f5d4bbebb309c1fb89dd569987f661d6a68e9 Mon Sep 17 00:00:00 2001 From: leynos Date: Tue, 14 Apr 2026 21:10:43 +0200 Subject: [PATCH 21/40] Introduce VectorSearchQuery Bundle the user, agent, and embedding inputs shared by the\nlibsql vector-search helpers into a private query object. This\nreduces helper argument counts and keeps the hybrid-search and test\ncall sites aligned without changing search behaviour or logging. --- src/db/libsql/workspace/mod.rs | 33 +++++++++++++++++------- src/db/libsql/workspace/tests.rs | 27 +++++++++++++++---- src/db/libsql/workspace/vector_search.rs | 29 +++++++++++++-------- 3 files changed, 64 insertions(+), 25 deletions(-) diff --git a/src/db/libsql/workspace/mod.rs b/src/db/libsql/workspace/mod.rs index d7c28114e..4d5652ca6 100644 --- a/src/db/libsql/workspace/mod.rs +++ b/src/db/libsql/workspace/mod.rs @@ -24,7 +24,7 @@ use document_ops::{ update_document, }; use fts::{FtsSearchParams, fts_ranked_results}; -use vector_search::{VectorSearchOutcome, vector_ranked_results}; +use vector_search::{VectorSearchOutcome, VectorSearchQuery, vector_ranked_results}; impl NativeWorkspaceStore for LibSqlBackend { async fn get_document_by_path( @@ -155,18 +155,33 @@ impl NativeWorkspaceStore for LibSqlBackend { let vector_results = if config.use_vector { if let Some(emb) = embedding { - match vector_ranked_results(&conn, user_id, agent_id_str.as_deref(), emb, pre_limit) - .await? + match vector_ranked_results( + &conn, + VectorSearchQuery { + user_id, + agent_id, + embedding: emb, + }, + pre_limit, + ) + .await? { VectorSearchOutcome::Indexed(results) => results, VectorSearchOutcome::IndexUnavailable => { tracing::info!("Using brute-force vector search (no vector index)"); - self.brute_force_vector_search(user_id, agent_id, emb, pre_limit as usize) - .await - .map_err(|e| { - tracing::warn!("Brute-force vector search failed: {e}"); - e - })? + self.brute_force_vector_search( + VectorSearchQuery { + user_id, + agent_id, + embedding: emb, + }, + pre_limit as usize, + ) + .await + .map_err(|e| { + tracing::warn!("Brute-force vector search failed: {e}"); + e + })? } } } else { diff --git a/src/db/libsql/workspace/tests.rs b/src/db/libsql/workspace/tests.rs index 210886196..4d80aec42 100644 --- a/src/db/libsql/workspace/tests.rs +++ b/src/db/libsql/workspace/tests.rs @@ -1,7 +1,9 @@ //! Tests for the libSQL workspace-store module split. use super::super::LibSqlBackend; -use super::vector_search::{VectorSearchOutcome, deserialize_embedding, vector_ranked_results}; +use super::vector_search::{ + VectorSearchOutcome, VectorSearchQuery, deserialize_embedding, vector_ranked_results, +}; use crate::db::{HybridSearchParams, InsertChunkParams, NativeDatabase, NativeWorkspaceStore}; use crate::workspace::SearchConfig; @@ -86,9 +88,17 @@ async fn hybrid_search_uses_brute_force_when_vector_index_is_unavailable() { .connect() .await .expect("failed to open libsql connection for vector precondition"); - let vector_outcome = vector_ranked_results(&conn, "default", None, &[1.0, 0.0, 0.0], 5) - .await - .expect("failed to run vector search precondition"); + let vector_outcome = vector_ranked_results( + &conn, + VectorSearchQuery { + user_id: "default", + agent_id: None, + embedding: &[1.0, 0.0, 0.0], + }, + 5, + ) + .await + .expect("failed to run vector search precondition"); assert!( matches!(vector_outcome, VectorSearchOutcome::IndexUnavailable), "Test requires the vector-index-unavailable path before hybrid fallback assertions" @@ -150,7 +160,14 @@ async fn brute_force_vector_search_skips_mismatched_embedding_dimensions() { .expect("failed to insert different-dimension chunk"); let results = backend - .brute_force_vector_search("default", None, &[1.0, 0.0, 0.0], 10) + .brute_force_vector_search( + VectorSearchQuery { + user_id: "default", + agent_id: None, + embedding: &[1.0, 0.0, 0.0], + }, + 10, + ) .await .expect("failed to run brute-force vector search"); diff --git a/src/db/libsql/workspace/vector_search.rs b/src/db/libsql/workspace/vector_search.rs index 56833c4df..0dd3dd38e 100644 --- a/src/db/libsql/workspace/vector_search.rs +++ b/src/db/libsql/workspace/vector_search.rs @@ -20,6 +20,16 @@ pub(super) enum VectorSearchOutcome { IndexUnavailable, } +/// Scoped query parameters shared by vector-search helpers. +/// +/// Bundles the user/agent scope with the query embedding so callers +/// pass a single cohesive object rather than three separate arguments. +pub(super) struct VectorSearchQuery<'a> { + pub(super) user_id: &'a str, + pub(super) agent_id: Option, + pub(super) embedding: &'a [f32], +} + fn is_missing_vector_index_error(error: &libsql::Error) -> bool { let sqlite_message = match error { libsql::Error::SqliteFailure(_, message) @@ -158,9 +168,7 @@ impl LibSqlBackend { /// available (post-V9 migration). pub(super) async fn brute_force_vector_search( &self, - user_id: &str, - agent_id: Option, - embedding: &[f32], + query: VectorSearchQuery<'_>, limit: usize, ) -> Result, WorkspaceError> { let conn = self @@ -169,7 +177,7 @@ impl LibSqlBackend { .map_err(|e| WorkspaceError::SearchFailed { reason: e.to_string(), })?; - let agent_id_str = agent_id.map(|id| id.to_string()); + let agent_id_str = query.agent_id.map(|id| id.to_string()); let mut rows = conn .query( r#" @@ -179,14 +187,14 @@ impl LibSqlBackend { WHERE d.user_id = ?1 AND d.agent_id IS ?2 AND c.embedding IS NOT NULL "#, - params![user_id, agent_id_str.as_deref()], + params![query.user_id, agent_id_str.as_deref()], ) .await .map_err(|e| WorkspaceError::SearchFailed { reason: format!("Query failed: {}", e), })?; - let candidates = self.collect_candidates(&mut rows, embedding).await?; + let candidates = self.collect_candidates(&mut rows, query.embedding).await?; Ok(rank_candidates(candidates, limit)) } } @@ -245,12 +253,11 @@ async fn collect_vector_index_rows( /// the expected state after the V9 flexible-dimension migration. pub(super) async fn vector_ranked_results( conn: &libsql::Connection, - user_id: &str, - agent_id: Option<&str>, - embedding: &[f32], + query: VectorSearchQuery<'_>, limit: i64, ) -> Result { - let vector_json = embedding_to_vector_json(embedding); + let agent_id_str = query.agent_id.map(|id| id.to_string()); + let vector_json = embedding_to_vector_json(query.embedding); match conn .query( @@ -261,7 +268,7 @@ pub(super) async fn vector_ranked_results( JOIN memory_documents d ON d.id = c.document_id WHERE d.user_id = ?3 AND d.agent_id IS ?4 "#, - params![vector_json, limit, user_id, agent_id], + params![vector_json, limit, query.user_id, agent_id_str.as_deref()], ) .await { From 159f4195e03b02fb53783e4ef5949d65d538c8cc Mon Sep 17 00:00:00 2001 From: leynos Date: Tue, 14 Apr 2026 21:16:18 +0200 Subject: [PATCH 22/40] Extract list_directory helpers Reduce the libsql list_directory complexity by moving its\ndirectory normalisation, pattern construction, entry resolution,\nand map-merging logic into private helpers. This keeps the SQL and\nworkspace-entry behaviour unchanged while bringing the loop body\ndown to the essential row handling. --- src/db/libsql/workspace/document_ops.rs | 132 ++++++++++++++---------- 1 file changed, 79 insertions(+), 53 deletions(-) diff --git a/src/db/libsql/workspace/document_ops.rs b/src/db/libsql/workspace/document_ops.rs index bd14c2fa5..bb88a2243 100644 --- a/src/db/libsql/workspace/document_ops.rs +++ b/src/db/libsql/workspace/document_ops.rs @@ -203,6 +203,74 @@ pub(super) async fn delete_document_by_path( Ok(()) } +fn normalise_dir_prefix(directory: &str) -> String { + if !directory.is_empty() && !directory.ends_with('/') { + format!("{}/", directory) + } else { + directory.to_string() + } +} + +fn dir_like_pattern(dir: &str) -> String { + if dir.is_empty() { + "%".to_string() + } else { + format!("{}%", dir) + } +} + +fn resolve_entry(full_path: &str, dir: &str) -> Option<(String, bool, String)> { + let relative = if dir.is_empty() { + full_path + } else { + full_path.strip_prefix(dir)? + }; + let child_name = if let Some(slash_pos) = relative.find('/') { + &relative[..slash_pos] + } else { + relative + }; + if child_name.is_empty() { + return None; + } + let is_dir = relative.contains('/'); + let entry_path = if dir.is_empty() { + child_name.to_string() + } else { + format!("{}{}", dir, child_name) + }; + Some((child_name.to_string(), is_dir, entry_path)) +} + +fn merge_entry( + entries_map: &mut HashMap, + child_name: String, + entry_path: String, + is_dir: bool, + updated_at: Option>, + content_preview: Option, +) { + entries_map + .entry(child_name) + .and_modify(|entry| { + if is_dir { + entry.is_directory = true; + entry.content_preview = None; + } + if let (Some(existing), Some(new)) = (&entry.updated_at, &updated_at) + && new > existing + { + entry.updated_at = Some(*new); + } + }) + .or_insert(WorkspaceEntry { + path: entry_path, + is_directory: is_dir, + updated_at, + content_preview: if is_dir { None } else { content_preview }, + }); +} + pub(super) async fn list_directory( backend: &LibSqlBackend, scope: &AgentScope<'_>, @@ -214,18 +282,10 @@ pub(super) async fn list_directory( .map_err(|e| WorkspaceError::SearchFailed { reason: e.to_string(), })?; - let dir = if !directory.is_empty() && !directory.ends_with('/') { - format!("{}/", directory) - } else { - directory.to_string() - }; + let dir = normalise_dir_prefix(directory); let agent_id_str = scope.agent_id.map(|id| id.to_string()); - let pattern = if dir.is_empty() { - "%".to_string() - } else { - format!("{}%", dir) - }; + let pattern = dir_like_pattern(&dir); let mut rows = conn .query( @@ -252,51 +312,17 @@ pub(super) async fn list_directory( })? { let full_path = get_text(&row, 0); - let updated_at = get_opt_ts(&row, 1); - let content_preview = get_opt_text(&row, 2); - let relative = if dir.is_empty() { - &full_path - } else if let Some(stripped) = full_path.strip_prefix(&dir) { - stripped - } else { + let Some((child_name, is_dir, entry_path)) = resolve_entry(&full_path, &dir) else { continue; }; - - let child_name = if let Some(slash_pos) = relative.find('/') { - &relative[..slash_pos] - } else { - relative - }; - if child_name.is_empty() { - continue; - } - - let is_dir = relative.contains('/'); - let entry_path = if dir.is_empty() { - child_name.to_string() - } else { - format!("{}{}", dir, child_name) - }; - - entries_map - .entry(child_name.to_string()) - .and_modify(|entry| { - if is_dir { - entry.is_directory = true; - entry.content_preview = None; - } - if let (Some(existing), Some(new)) = (&entry.updated_at, &updated_at) - && new > existing - { - entry.updated_at = Some(*new); - } - }) - .or_insert(WorkspaceEntry { - path: entry_path, - is_directory: is_dir, - updated_at, - content_preview: if is_dir { None } else { content_preview }, - }); + merge_entry( + &mut entries_map, + child_name, + entry_path, + is_dir, + get_opt_ts(&row, 1), + get_opt_text(&row, 2), + ); } let mut entries: Vec = entries_map.into_values().collect(); From 1f6965da0ff9ff270391048bf970b8f8cf71a17c Mon Sep 17 00:00:00 2001 From: leynos Date: Tue, 14 Apr 2026 21:21:59 +0200 Subject: [PATCH 23/40] Introduce VectorIndexQuery Reduce the vector_ranked_results argument count by moving its\nindex-query inputs into a dedicated private parameter object. This\nkeeps the vector-index SQL and fallback behaviour unchanged while\nresolving the remaining high-arity call site in the libsql search\npath. --- src/db/libsql/workspace/mod.rs | 10 ++++++---- src/db/libsql/workspace/tests.rs | 7 ++++--- src/db/libsql/workspace/vector_search.rs | 19 ++++++++++++++----- 3 files changed, 24 insertions(+), 12 deletions(-) diff --git a/src/db/libsql/workspace/mod.rs b/src/db/libsql/workspace/mod.rs index 4d5652ca6..040dd080f 100644 --- a/src/db/libsql/workspace/mod.rs +++ b/src/db/libsql/workspace/mod.rs @@ -24,7 +24,9 @@ use document_ops::{ update_document, }; use fts::{FtsSearchParams, fts_ranked_results}; -use vector_search::{VectorSearchOutcome, VectorSearchQuery, vector_ranked_results}; +use vector_search::{ + VectorIndexQuery, VectorSearchOutcome, VectorSearchQuery, vector_ranked_results, +}; impl NativeWorkspaceStore for LibSqlBackend { async fn get_document_by_path( @@ -157,12 +159,12 @@ impl NativeWorkspaceStore for LibSqlBackend { if let Some(emb) = embedding { match vector_ranked_results( &conn, - VectorSearchQuery { + &VectorIndexQuery { user_id, - agent_id, + agent_id: agent_id_str.as_deref(), embedding: emb, + limit: pre_limit, }, - pre_limit, ) .await? { diff --git a/src/db/libsql/workspace/tests.rs b/src/db/libsql/workspace/tests.rs index 4d80aec42..cf210f1d1 100644 --- a/src/db/libsql/workspace/tests.rs +++ b/src/db/libsql/workspace/tests.rs @@ -2,7 +2,8 @@ use super::super::LibSqlBackend; use super::vector_search::{ - VectorSearchOutcome, VectorSearchQuery, deserialize_embedding, vector_ranked_results, + VectorIndexQuery, VectorSearchOutcome, VectorSearchQuery, deserialize_embedding, + vector_ranked_results, }; use crate::db::{HybridSearchParams, InsertChunkParams, NativeDatabase, NativeWorkspaceStore}; use crate::workspace::SearchConfig; @@ -90,12 +91,12 @@ async fn hybrid_search_uses_brute_force_when_vector_index_is_unavailable() { .expect("failed to open libsql connection for vector precondition"); let vector_outcome = vector_ranked_results( &conn, - VectorSearchQuery { + &VectorIndexQuery { user_id: "default", agent_id: None, embedding: &[1.0, 0.0, 0.0], + limit: 5, }, - 5, ) .await .expect("failed to run vector search precondition"); diff --git a/src/db/libsql/workspace/vector_search.rs b/src/db/libsql/workspace/vector_search.rs index 0dd3dd38e..db0ced3c1 100644 --- a/src/db/libsql/workspace/vector_search.rs +++ b/src/db/libsql/workspace/vector_search.rs @@ -246,6 +246,17 @@ async fn collect_vector_index_rows( Ok(VectorSearchOutcome::Indexed(results)) } +/// Parameters for a vector-index similarity query. +/// +/// Groups the search-intent arguments for [`vector_ranked_results`] to keep +/// its arity within the project limit of four. +pub(super) struct VectorIndexQuery<'a> { + pub(super) user_id: &'a str, + pub(super) agent_id: Option<&'a str>, + pub(super) embedding: &'a [f32], + pub(super) limit: i64, +} + /// Execute vector similarity search via libSQL's vector index. /// /// Returns [`VectorSearchOutcome::IndexUnavailable`] when `vector_top_k(...)` @@ -253,10 +264,8 @@ async fn collect_vector_index_rows( /// the expected state after the V9 flexible-dimension migration. pub(super) async fn vector_ranked_results( conn: &libsql::Connection, - query: VectorSearchQuery<'_>, - limit: i64, + query: &VectorIndexQuery<'_>, ) -> Result { - let agent_id_str = query.agent_id.map(|id| id.to_string()); let vector_json = embedding_to_vector_json(query.embedding); match conn @@ -268,11 +277,11 @@ pub(super) async fn vector_ranked_results( JOIN memory_documents d ON d.id = c.document_id WHERE d.user_id = ?3 AND d.agent_id IS ?4 "#, - params![vector_json, limit, query.user_id, agent_id_str.as_deref()], + params![vector_json, query.limit, query.user_id, query.agent_id], ) .await { - Ok(rows) => collect_vector_index_rows(rows, limit).await, + Ok(rows) => collect_vector_index_rows(rows, query.limit).await, Err(e) => { if is_missing_vector_index_error(&e) { tracing::debug!( From f2149604fdbb2871db4dd593ccf132d916462312 Mon Sep 17 00:00:00 2001 From: leynos Date: Tue, 14 Apr 2026 21:27:24 +0200 Subject: [PATCH 24/40] Clarify libsql temp-file cleanup docs Fix the users-guide wrapping regression and update the libsql\nmodule docs to describe cleanup ownership accurately. The shared\nhandle cleanup test now fails loudly if it cannot create the temp\nsidecar files and asserts they exist before drop so the cleanup\nbehaviour is exercised directly. --- docs/users-guide.md | 10 +++++----- src/db/libsql/mod.rs | 14 +++++++++----- 2 files changed, 14 insertions(+), 10 deletions(-) diff --git a/docs/users-guide.md b/docs/users-guide.md index aac88ff5b..9b566eada 100644 --- a/docs/users-guide.md +++ b/docs/users-guide.md @@ -101,11 +101,11 @@ When workspace memory is enabled, the search backend differs by database: - **libSQL / Turso** — attempts an indexed `vector_top_k(...)` query when a compatible fixed-dimension vector index exists. After the V9 migration (which removed the fixed-dimension index in favour of flexible-dimension - vector storage, with `memory_chunks.embedding` stored as a flexible vector), - the backend automatically falls back to brute-force cosine similarity - computed in Rust. Results from both paths feed into the same Reciprocal - Rank Fusion (RRF) pipeline, so hybrid full-text search (FTS) + vector - retrieval is preserved. + vector storage, with `memory_chunks.embedding` stored as a + flexible vector), the backend automatically falls back to brute-force + cosine similarity computed in Rust. Results from both paths feed into the + same Reciprocal Rank Fusion (RRF) pipeline, so hybrid full-text search + (FTS) + vector retrieval is preserved. To determine which search mode is active for a workspace, run: diff --git a/src/db/libsql/mod.rs b/src/db/libsql/mod.rs index bc1b152e8..197a9d613 100644 --- a/src/db/libsql/mod.rs +++ b/src/db/libsql/mod.rs @@ -6,8 +6,10 @@ //! - Turso cloud with embedded replica (sync to cloud) //! - Temp-file-backed (for testing) — creates a UUID-named `.db` file in the //! OS temp directory; fresh connections share state via the file; the file -//! and its WAL/SHM sidecars are deleted automatically when the backend is -//! dropped. +//! and its WAL/SHM sidecars are deleted automatically when the final shared +//! [`LibSqlDatabase`] handle is dropped. Clones returned by `shared_db()` +//! can outlive the [`LibSqlBackend`], so cleanup follows the last shared +//! handle rather than the backend wrapper. mod conversations; pub(crate) mod helpers; @@ -284,11 +286,13 @@ mod tests { // Touch the database and sidecar files so the drop handler has // something concrete to delete. - std::fs::write(&path, b"").unwrap_or_default(); - std::fs::write(&wal, b"").unwrap_or_default(); - std::fs::write(&shm, b"").unwrap_or_default(); + std::fs::write(&path, b"").expect("failed to create temp db file"); + std::fs::write(&wal, b"").expect("failed to create sidecar file"); + std::fs::write(&shm, b"").expect("failed to create sidecar file"); assert!(path.exists(), "temp db file must exist before drop"); + assert!(wal.exists(), "WAL sidecar must exist before drop"); + assert!(shm.exists(), "SHM sidecar must exist before drop"); drop(backend); assert!( From 6d7aff61c51094de5da639a766ef511f9b93840f Mon Sep 17 00:00:00 2001 From: leynos Date: Tue, 14 Apr 2026 23:08:40 +0200 Subject: [PATCH 25/40] Fix libsql workspace search edge cases Store empty embeddings as NULL so semantic search does not see zero-length blobs, make document deletion atomic across chunk and document rows, and stabilise brute-force vector ranking ties by chunk ID. This preserves the existing SQL and error surface while addressing the review findings in the libSQL workspace helpers. --- src/db/libsql/workspace/chunk_ops.rs | 26 ++++++++---- src/db/libsql/workspace/document_ops.rs | 53 +++++++++++++++++++----- src/db/libsql/workspace/vector_search.rs | 1 + 3 files changed, 62 insertions(+), 18 deletions(-) diff --git a/src/db/libsql/workspace/chunk_ops.rs b/src/db/libsql/workspace/chunk_ops.rs index 7f360452c..5d2d0669a 100644 --- a/src/db/libsql/workspace/chunk_ops.rs +++ b/src/db/libsql/workspace/chunk_ops.rs @@ -47,12 +47,17 @@ pub(super) async fn insert_chunk( })?; let id = Uuid::new_v4(); let chunk_index = i64::from(chunk_index); - let embedding_blob = embedding.map(|values| { - values - .iter() - .flat_map(|f| f.to_le_bytes()) - .collect::>() + let embedding_blob = embedding.and_then(|values| { + (!values.is_empty()).then(|| { + values + .iter() + .flat_map(|f| f.to_le_bytes()) + .collect::>() + }) }); + let embedding_value = embedding_blob + .map(libsql::Value::Blob) + .unwrap_or(libsql::Value::Null); conn.execute( r#" @@ -64,7 +69,7 @@ pub(super) async fn insert_chunk( document_id.to_string(), chunk_index, content, - embedding_blob.map(libsql::Value::Blob), + embedding_value, ], ) .await @@ -85,11 +90,16 @@ pub(super) async fn update_chunk_embedding( .map_err(|e| WorkspaceError::EmbeddingFailed { reason: e.to_string(), })?; - let bytes: Vec = embedding.iter().flat_map(|f| f.to_le_bytes()).collect(); + let embedding_value = if embedding.is_empty() { + libsql::Value::Null + } else { + let bytes: Vec = embedding.iter().flat_map(|f| f.to_le_bytes()).collect(); + libsql::Value::Blob(bytes) + }; conn.execute( "UPDATE memory_chunks SET embedding = ?2 WHERE id = ?1", - params![chunk_id.to_string(), libsql::Value::Blob(bytes)], + params![chunk_id.to_string(), embedding_value], ) .await .map_err(|e| WorkspaceError::EmbeddingFailed { diff --git a/src/db/libsql/workspace/document_ops.rs b/src/db/libsql/workspace/document_ops.rs index bb88a2243..70e6b45d2 100644 --- a/src/db/libsql/workspace/document_ops.rs +++ b/src/db/libsql/workspace/document_ops.rs @@ -180,19 +180,47 @@ pub(super) async fn delete_document_by_path( scope: &AgentScope<'_>, path: &str, ) -> Result<(), WorkspaceError> { - let doc = - NativeWorkspaceStore::get_document_by_path(backend, scope.user_id, scope.agent_id, path) - .await?; - NativeWorkspaceStore::delete_chunks(backend, doc.id).await?; - - let conn = backend - .connect() + let conn = connect_backend(backend).await?; + let agent_id_str = scope.agent_id.map(|id| id.to_string()); + let tx = conn + .transaction() .await .map_err(|e| WorkspaceError::SearchFailed { - reason: e.to_string(), + reason: format!("Delete failed: {}", e), })?; - let agent_id_str = scope.agent_id.map(|id| id.to_string()); - conn.execute( + let rows = tx + .query( + r#" + SELECT id, user_id, agent_id, path, content, + created_at, updated_at, metadata + FROM memory_documents + WHERE user_id = ?1 AND agent_id IS ?2 AND path = ?3 + "#, + params![scope.user_id, agent_id_str.as_deref(), path], + ) + .await + .map_err(|e| WorkspaceError::SearchFailed { + reason: format!("Query failed: {}", e), + })?; + let doc = match fetch_first_row(rows).await? { + Some(row) => row_to_memory_document(&row), + None => { + return Err(WorkspaceError::DocumentNotFound { + doc_type: path.to_string(), + user_id: scope.user_id.to_string(), + }); + } + }; + + tx.execute( + "DELETE FROM memory_chunks WHERE document_id = ?1", + params![doc.id.to_string()], + ) + .await + .map_err(|e| WorkspaceError::SearchFailed { + reason: format!("Delete failed: {}", e), + })?; + tx.execute( "DELETE FROM memory_documents WHERE user_id = ?1 AND agent_id IS ?2 AND path = ?3", params![scope.user_id, agent_id_str.as_deref(), path], ) @@ -200,6 +228,11 @@ pub(super) async fn delete_document_by_path( .map_err(|e| WorkspaceError::SearchFailed { reason: format!("Delete failed: {}", e), })?; + tx.commit() + .await + .map_err(|e| WorkspaceError::SearchFailed { + reason: format!("Delete failed: {}", e), + })?; Ok(()) } diff --git a/src/db/libsql/workspace/vector_search.rs b/src/db/libsql/workspace/vector_search.rs index db0ced3c1..595450d3d 100644 --- a/src/db/libsql/workspace/vector_search.rs +++ b/src/db/libsql/workspace/vector_search.rs @@ -50,6 +50,7 @@ fn rank_candidates(mut candidates: Vec, limit: usize) -> Vec Date: Tue, 14 Apr 2026 23:14:22 +0200 Subject: [PATCH 26/40] Skip invalid UUID rows in vector index results Mirror the brute-force path when libSQL vector-index rows contain invalid UUIDs by warning and skipping those rows instead of silently materialising nil UUIDs. This keeps ranked result ordering stable and avoids leaking malformed IDs into hybrid search results while leaving existing row-fetch and fallback behaviour unchanged. --- src/db/libsql/workspace/vector_search.rs | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/src/db/libsql/workspace/vector_search.rs b/src/db/libsql/workspace/vector_search.rs index 595450d3d..e0dd10ed1 100644 --- a/src/db/libsql/workspace/vector_search.rs +++ b/src/db/libsql/workspace/vector_search.rs @@ -231,9 +231,23 @@ async fn collect_vector_index_rows( }); } } { + let chunk_id: Uuid = match get_text(&row, 0).parse() { + Ok(id) => id, + Err(e) => { + tracing::warn!("Invalid chunk_id UUID in memory_chunks: {e}"); + continue; + } + }; + let document_id: Uuid = match get_text(&row, 1).parse() { + Ok(id) => id, + Err(e) => { + tracing::warn!("Invalid document_id UUID in memory_documents: {e}"); + continue; + } + }; results.push(RankedResult { - chunk_id: get_text(&row, 0).parse().unwrap_or_default(), - document_id: get_text(&row, 1).parse().unwrap_or_default(), + chunk_id, + document_id, document_path: get_text(&row, 2), content: get_text(&row, 3), rank: results.len() as u32 + 1, From 4f1a4a635387bd6218eb13de3825c7ade0f9ca36 Mon Sep 17 00:00:00 2001 From: leynos Date: Wed, 15 Apr 2026 01:02:13 +0200 Subject: [PATCH 27/40] Extract document row mapping helper Reduce duplication between the libSQL document lookup helpers by extracting a private row-to-document mapper that preserves the existing not-found behaviour. The change is limited to get_document_by_path and get_document_by_id and leaves their SQL, error messages, and call signatures unchanged. --- src/db/libsql/workspace/document_ops.rs | 32 +++++++++++++++++-------- 1 file changed, 22 insertions(+), 10 deletions(-) diff --git a/src/db/libsql/workspace/document_ops.rs b/src/db/libsql/workspace/document_ops.rs index 70e6b45d2..6309ec58f 100644 --- a/src/db/libsql/workspace/document_ops.rs +++ b/src/db/libsql/workspace/document_ops.rs @@ -39,6 +39,18 @@ async fn fetch_first_row(mut rows: libsql::Rows) -> Result, }) } +/// Maps an optional row to a [`MemoryDocument`], returning `not_found` when +/// the row is absent. +fn document_from_row_or_not_found( + row: Option, + not_found: WorkspaceError, +) -> Result { + match row { + Some(row) => Ok(row_to_memory_document(&row)), + None => Err(not_found), + } +} + async fn drain_rows(mut rows: libsql::Rows, map_row: F) -> Result, WorkspaceError> where F: Fn(libsql::Row) -> T, @@ -78,13 +90,13 @@ pub(super) async fn get_document_by_path( reason: format!("Query failed: {}", e), })?; - match fetch_first_row(rows).await? { - Some(row) => Ok(row_to_memory_document(&row)), - None => Err(WorkspaceError::DocumentNotFound { + document_from_row_or_not_found( + fetch_first_row(rows).await?, + WorkspaceError::DocumentNotFound { doc_type: path.to_string(), user_id: scope.user_id.to_string(), - }), - } + }, + ) } pub(super) async fn get_document_by_id( @@ -106,13 +118,13 @@ pub(super) async fn get_document_by_id( reason: format!("Query failed: {}", e), })?; - match fetch_first_row(rows).await? { - Some(row) => Ok(row_to_memory_document(&row)), - None => Err(WorkspaceError::DocumentNotFound { + document_from_row_or_not_found( + fetch_first_row(rows).await?, + WorkspaceError::DocumentNotFound { doc_type: "unknown".to_string(), user_id: "unknown".to_string(), - }), - } + }, + ) } pub(super) async fn get_or_create_document_by_path( From 1e9d35d4ddd4157408fdb0da95c338b336026b76 Mon Sep 17 00:00:00 2001 From: leynos Date: Wed, 15 Apr 2026 01:10:32 +0200 Subject: [PATCH 28/40] Extract libsql hybrid search helpers Reduce the size of NativeWorkspaceStore::hybrid_search by extracting dedicated FTS and vector-result helpers in the libSQL workspace module. The refactor preserves the existing SQL, fallback behaviour, and tracing messages while moving the pre-fusion result logging to the helper boundaries. --- src/db/libsql/workspace/mod.rs | 88 ++++++++++++++++++++++------------ 1 file changed, 58 insertions(+), 30 deletions(-) diff --git a/src/db/libsql/workspace/mod.rs b/src/db/libsql/workspace/mod.rs index 040dd080f..caf7bfc92 100644 --- a/src/db/libsql/workspace/mod.rs +++ b/src/db/libsql/workspace/mod.rs @@ -13,7 +13,7 @@ use super::LibSqlBackend; use crate::db::{HybridSearchParams, InsertChunkParams, NativeWorkspaceStore}; use crate::error::WorkspaceError; use crate::workspace::{ - MemoryChunk, MemoryDocument, SearchResult, WorkspaceEntry, reciprocal_rank_fusion, + MemoryChunk, MemoryDocument, RankedResult, SearchResult, WorkspaceEntry, reciprocal_rank_fusion, }; use chunk_ops::{ delete_chunks, get_chunks_without_embeddings, insert_chunk, update_chunk_embedding, @@ -28,6 +28,58 @@ use vector_search::{ VectorIndexQuery, VectorSearchOutcome, VectorSearchQuery, vector_ranked_results, }; +/// Execute full-text search and log the result count. +/// +/// Delegates to [`fts_ranked_results`] and emits a `DEBUG` trace with the +/// pre-fusion limit for observability. +async fn fetch_fts_results( + conn: &libsql::Connection, + params: FtsSearchParams<'_>, +) -> Result, WorkspaceError> { + let limit = params.limit; + let results = fts_ranked_results(conn, params).await?; + tracing::debug!( + "FTS search returned {} results (pre-fusion limit: {})", + results.len(), + limit, + ); + Ok(results) +} + +impl LibSqlBackend { + /// Execute vector similarity search, falling back to brute-force cosine + /// similarity when the fixed-dimension vector index is unavailable. + async fn fetch_vector_results( + &self, + conn: &libsql::Connection, + index_query: VectorIndexQuery<'_>, + agent_id: Option, + ) -> Result, WorkspaceError> { + let brute_limit = index_query.limit as usize; + let user_id = index_query.user_id; + let embedding = index_query.embedding; + match vector_ranked_results(conn, &index_query).await? { + VectorSearchOutcome::Indexed(results) => Ok(results), + VectorSearchOutcome::IndexUnavailable => { + tracing::info!("Using brute-force vector search (no vector index)"); + self.brute_force_vector_search( + VectorSearchQuery { + user_id, + agent_id, + embedding, + }, + brute_limit, + ) + .await + .map_err(|e| { + tracing::warn!("Brute-force vector search failed: {e}"); + e + }) + } + } + } +} + impl NativeWorkspaceStore for LibSqlBackend { async fn get_document_by_path( &self, @@ -135,7 +187,7 @@ impl NativeWorkspaceStore for LibSqlBackend { let pre_limit = config.pre_fusion_limit as i64; let fts_results = if config.use_fts { - let results = fts_ranked_results( + fetch_fts_results( &conn, FtsSearchParams { user_id, @@ -144,48 +196,24 @@ impl NativeWorkspaceStore for LibSqlBackend { limit: pre_limit, }, ) - .await?; - tracing::debug!( - "FTS search returned {} results (pre-fusion limit: {})", - results.len(), - pre_limit - ); - results + .await? } else { Vec::new() }; let vector_results = if config.use_vector { if let Some(emb) = embedding { - match vector_ranked_results( + self.fetch_vector_results( &conn, - &VectorIndexQuery { + VectorIndexQuery { user_id, agent_id: agent_id_str.as_deref(), embedding: emb, limit: pre_limit, }, + agent_id, ) .await? - { - VectorSearchOutcome::Indexed(results) => results, - VectorSearchOutcome::IndexUnavailable => { - tracing::info!("Using brute-force vector search (no vector index)"); - self.brute_force_vector_search( - VectorSearchQuery { - user_id, - agent_id, - embedding: emb, - }, - pre_limit as usize, - ) - .await - .map_err(|e| { - tracing::warn!("Brute-force vector search failed: {e}"); - e - })? - } - } } else { Vec::new() } From 70384ff2e36ff70663e289897584b6f8df7d5c40 Mon Sep 17 00:00:00 2001 From: leynos Date: Wed, 15 Apr 2026 01:18:12 +0200 Subject: [PATCH 29/40] Harden libsql workspace row parsing Skip malformed chunk rows with explicit UUID warnings and align the document lookup helper with the requested doc_type/user_id signature. This preserves existing query behaviour and not-found semantics while avoiding silent nil-UUID fallbacks in libSQL workspace chunk reads. --- src/db/libsql/workspace/chunk_ops.rs | 26 +++++++++++++++++++++++-- src/db/libsql/workspace/document_ops.rs | 24 ++++++++--------------- 2 files changed, 32 insertions(+), 18 deletions(-) diff --git a/src/db/libsql/workspace/chunk_ops.rs b/src/db/libsql/workspace/chunk_ops.rs index 5d2d0669a..2f3459a05 100644 --- a/src/db/libsql/workspace/chunk_ops.rs +++ b/src/db/libsql/workspace/chunk_ops.rs @@ -146,13 +146,35 @@ pub(super) async fn get_chunks_without_embeddings( reason: format!("Query failed: {}", e), })? { + let raw_chunk_id = get_text(&row, 0); + let id: Uuid = match raw_chunk_id.parse() { + Ok(id) => id, + Err(e) => { + tracing::warn!( + "Invalid chunk_id UUID in memory_chunks ('{}'): {e}", + raw_chunk_id + ); + continue; + } + }; + let raw_document_id = get_text(&row, 1); + let document_id: Uuid = match raw_document_id.parse() { + Ok(id) => id, + Err(e) => { + tracing::warn!( + "Invalid document_id UUID in memory_chunks ('{}'): {e}", + raw_document_id + ); + continue; + } + }; let chunk_index = u32::try_from(get_i64(&row, 2)).map_err(|_| WorkspaceError::SearchFailed { reason: "memory_chunks.chunk_index must be non-negative".to_string(), })?; chunks.push(MemoryChunk { - id: get_text(&row, 0).parse().unwrap_or_default(), - document_id: get_text(&row, 1).parse().unwrap_or_default(), + id, + document_id, chunk_index, content: get_text(&row, 3), embedding: None, diff --git a/src/db/libsql/workspace/document_ops.rs b/src/db/libsql/workspace/document_ops.rs index 6309ec58f..bb8075641 100644 --- a/src/db/libsql/workspace/document_ops.rs +++ b/src/db/libsql/workspace/document_ops.rs @@ -43,11 +43,15 @@ async fn fetch_first_row(mut rows: libsql::Rows) -> Result, /// the row is absent. fn document_from_row_or_not_found( row: Option, - not_found: WorkspaceError, + doc_type: &str, + user_id: &str, ) -> Result { match row { Some(row) => Ok(row_to_memory_document(&row)), - None => Err(not_found), + None => Err(WorkspaceError::DocumentNotFound { + doc_type: doc_type.to_string(), + user_id: user_id.to_string(), + }), } } @@ -90,13 +94,7 @@ pub(super) async fn get_document_by_path( reason: format!("Query failed: {}", e), })?; - document_from_row_or_not_found( - fetch_first_row(rows).await?, - WorkspaceError::DocumentNotFound { - doc_type: path.to_string(), - user_id: scope.user_id.to_string(), - }, - ) + document_from_row_or_not_found(fetch_first_row(rows).await?, path, scope.user_id) } pub(super) async fn get_document_by_id( @@ -118,13 +116,7 @@ pub(super) async fn get_document_by_id( reason: format!("Query failed: {}", e), })?; - document_from_row_or_not_found( - fetch_first_row(rows).await?, - WorkspaceError::DocumentNotFound { - doc_type: "unknown".to_string(), - user_id: "unknown".to_string(), - }, - ) + document_from_row_or_not_found(fetch_first_row(rows).await?, "unknown", "unknown") } pub(super) async fn get_or_create_document_by_path( From 7b269bf110091c4d4108bdcebf0e6b0a8c2f7347 Mon Sep 17 00:00:00 2001 From: leynos Date: Wed, 15 Apr 2026 03:27:04 +0200 Subject: [PATCH 30/40] Extract libsql chunk row parser Reduce the size of get_chunks_without_embeddings by extracting the row-to-MemoryChunk parsing logic into a private helper. The refactor preserves the existing warning messages, integrity error text, and query behaviour while keeping malformed UUID rows skippable and negative chunk indexes fatal. --- src/db/libsql/workspace/chunk_ops.rs | 83 ++++++++++++++++------------ 1 file changed, 49 insertions(+), 34 deletions(-) diff --git a/src/db/libsql/workspace/chunk_ops.rs b/src/db/libsql/workspace/chunk_ops.rs index 2f3459a05..a50f3e687 100644 --- a/src/db/libsql/workspace/chunk_ops.rs +++ b/src/db/libsql/workspace/chunk_ops.rs @@ -108,6 +108,52 @@ pub(super) async fn update_chunk_embedding( Ok(()) } +/// Parse a single `memory_chunks` row into a [`MemoryChunk`]. +/// +/// Returns `Ok(None)` and emits a `WARN` log when either UUID column +/// contains an invalid value (the row is silently skipped). +/// Returns `Err` when `chunk_index` is negative (a fatal data-integrity +/// violation). +fn parse_chunk_row(row: libsql::Row) -> Result, WorkspaceError> { + let raw_chunk_id = get_text(&row, 0); + let id: Uuid = match raw_chunk_id.parse() { + Ok(id) => id, + Err(e) => { + tracing::warn!( + "Invalid chunk_id UUID in memory_chunks ('{}'): {e}", + raw_chunk_id + ); + return Ok(None); + } + }; + + let raw_document_id = get_text(&row, 1); + let document_id: Uuid = match raw_document_id.parse() { + Ok(id) => id, + Err(e) => { + tracing::warn!( + "Invalid document_id UUID in memory_chunks ('{}'): {e}", + raw_document_id + ); + return Ok(None); + } + }; + + let chunk_index = + u32::try_from(get_i64(&row, 2)).map_err(|_| WorkspaceError::SearchFailed { + reason: "memory_chunks.chunk_index must be non-negative".to_string(), + })?; + + Ok(Some(MemoryChunk { + id, + document_id, + chunk_index, + content: get_text(&row, 3), + embedding: None, + created_at: get_ts(&row, 4), + })) +} + pub(super) async fn get_chunks_without_embeddings( backend: &LibSqlBackend, user_id: &str, @@ -146,40 +192,9 @@ pub(super) async fn get_chunks_without_embeddings( reason: format!("Query failed: {}", e), })? { - let raw_chunk_id = get_text(&row, 0); - let id: Uuid = match raw_chunk_id.parse() { - Ok(id) => id, - Err(e) => { - tracing::warn!( - "Invalid chunk_id UUID in memory_chunks ('{}'): {e}", - raw_chunk_id - ); - continue; - } - }; - let raw_document_id = get_text(&row, 1); - let document_id: Uuid = match raw_document_id.parse() { - Ok(id) => id, - Err(e) => { - tracing::warn!( - "Invalid document_id UUID in memory_chunks ('{}'): {e}", - raw_document_id - ); - continue; - } - }; - let chunk_index = - u32::try_from(get_i64(&row, 2)).map_err(|_| WorkspaceError::SearchFailed { - reason: "memory_chunks.chunk_index must be non-negative".to_string(), - })?; - chunks.push(MemoryChunk { - id, - document_id, - chunk_index, - content: get_text(&row, 3), - embedding: None, - created_at: get_ts(&row, 4), - }); + if let Some(chunk) = parse_chunk_row(row)? { + chunks.push(chunk); + } } Ok(chunks) } From a54340eb047c47408656ffdb4d0f7ccf1a85afa5 Mon Sep 17 00:00:00 2001 From: leynos Date: Wed, 15 Apr 2026 04:36:22 +0200 Subject: [PATCH 31/40] Add libsql workspace module test coverage Add colocated module tests for the libSQL workspace helpers and\nexpand the developer guide with the internal patterns that now\nshape this backend.\n\nThis closes the coverage gap called out in review without growing the\ncentral integration test file further, and it documents the shared\ndatabase handle, workspace module layout, and parameter-object pattern\nthat recent refactors introduced. --- docs/developers-guide.md | 49 ++++++ src/db/libsql/workspace/chunk_ops.rs | 4 + src/db/libsql/workspace/chunk_ops_tests.rs | 164 ++++++++++++++++++ src/db/libsql/workspace/document_listing.rs | 73 ++++++++ src/db/libsql/workspace/document_ops.rs | 75 +------- src/db/libsql/workspace/document_ops_tests.rs | 113 ++++++++++++ src/db/libsql/workspace/fts.rs | 4 + src/db/libsql/workspace/fts_tests.rs | 79 +++++++++ src/db/libsql/workspace/tests.rs | 41 +++++ src/db/libsql/workspace/vector_search.rs | 4 + .../libsql/workspace/vector_search_tests.rs | 83 +++++++++ 11 files changed, 621 insertions(+), 68 deletions(-) create mode 100644 src/db/libsql/workspace/chunk_ops_tests.rs create mode 100644 src/db/libsql/workspace/document_listing.rs create mode 100644 src/db/libsql/workspace/document_ops_tests.rs create mode 100644 src/db/libsql/workspace/fts_tests.rs create mode 100644 src/db/libsql/workspace/vector_search_tests.rs diff --git a/docs/developers-guide.md b/docs/developers-guide.md index 329b35bda..8fc2242a7 100644 --- a/docs/developers-guide.md +++ b/docs/developers-guide.md @@ -332,6 +332,24 @@ artefacts behind on disk. Do **not** use `new_local()` in unit tests; reserve it for integration tests or tests that specifically require filesystem-path behaviour. + +### LibSqlDatabase shared handles + +`LibSqlBackend` owns an `Arc` rather than a raw libSQL +database handle. That wrapper exists for two reasons: + +- satellite stores such as the secrets and WASM stores can call + `shared_db()` and open their own per-operation connections without + reopening a different database +- temp-file-backed test databases created by `new_memory()` keep their + cleanup metadata on the shared handle, so the `.db`, `-wal`, and `-shm` + files live until the final shared owner is dropped + +If a constructor or store used to accept a backend directly and now accepts +`Arc`, that is usually a signal that it should share the same +underlying file while creating its own connections via +`LibSqlDatabase::connect()`. + ## Dispatcher Architecture The dispatcher orchestrates interactive chat turns by preparing an LLM @@ -752,6 +770,26 @@ port-allocation races: Tests should pre-bind via `TcpListener::bind("127.0.0.1:0")` and pass the result to these helpers instead of relying on `start()` / `restart_with_addr()` to pick a free port. + + +### Workspace store module structure + +The libSQL workspace store is split by concern under +`src/db/libsql/workspace/`: + +- `mod.rs` owns the `NativeWorkspaceStore` implementation and hybrid-search + orchestration +- `document_ops.rs` owns document CRUD and directory-style listing helpers +- `chunk_ops.rs` owns chunk insertion, embedding updates, and chunk polling +- `fts.rs` owns FTS-only ranking queries +- `vector_search.rs` owns vector-index and brute-force similarity helpers +- `tests.rs` keeps cross-module integration coverage for the hybrid pipeline + +Prefer adding logic beside the feature it serves rather than growing +`mod.rs`. Module-local tests should live with the module they exercise, while +pipeline tests belong in `workspace/tests.rs`. + + ### Key APIs - `RunLoopCtx`: per-run container that carries the session handle, @@ -788,3 +826,14 @@ export DATABASE_URL=postgres://localhost/ironclaw Adjust the connection string if the local PostgreSQL instance requires a different host, user, or password. + +### Parameter-object structs in store helpers + +The workspace helpers use small parameter structs such as `AgentScope`, +`FtsSearchParams`, `VectorSearchQuery`, and `VectorIndexQuery` to keep helper +arity below the repository limit and to make call sites describe intent. + +Use this pattern when a helper repeatedly threads the same related values +through several internal calls. Keep these structs private or `pub(super)` +unless a wider API boundary genuinely needs them, and prefer names that +describe the query or scope they model instead of generic `Options` suffixes. diff --git a/src/db/libsql/workspace/chunk_ops.rs b/src/db/libsql/workspace/chunk_ops.rs index a50f3e687..250f0f025 100644 --- a/src/db/libsql/workspace/chunk_ops.rs +++ b/src/db/libsql/workspace/chunk_ops.rs @@ -1,5 +1,9 @@ //! Chunk-oriented workspace-store helpers for the libSQL backend. +#[cfg(test)] +#[path = "chunk_ops_tests.rs"] +mod tests; + use libsql::params; use uuid::Uuid; diff --git a/src/db/libsql/workspace/chunk_ops_tests.rs b/src/db/libsql/workspace/chunk_ops_tests.rs new file mode 100644 index 000000000..c599d017e --- /dev/null +++ b/src/db/libsql/workspace/chunk_ops_tests.rs @@ -0,0 +1,164 @@ +//! Tests for libSQL workspace chunk helpers. + +use chrono::{TimeZone, Utc}; +use libsql::params; +use uuid::Uuid; + +use super::*; +use crate::db::NativeDatabase; + +#[tokio::test] +async fn parse_chunk_row_maps_valid_rows() { + let chunk_id = Uuid::new_v4(); + let document_id = Uuid::new_v4(); + let backend = LibSqlBackend::new_memory() + .await + .expect("failed to create temp-file-backed backend"); + let conn = backend + .connect() + .await + .expect("failed to open libsql connection"); + let mut rows = conn + .query( + "SELECT ?1, ?2, ?3, ?4, ?5", + params![ + chunk_id.to_string(), + document_id.to_string(), + 7i64, + "chunk body", + "2026-03-07T12:34:56.000Z" + ], + ) + .await + .expect("failed to query literal chunk row"); + let row = rows + .next() + .await + .expect("failed to fetch literal chunk row") + .expect("expected one literal chunk row"); + + let chunk = parse_chunk_row(row) + .expect("valid chunk row should parse") + .expect("valid chunk row should not be skipped"); + + assert_eq!(chunk.id, chunk_id); + assert_eq!(chunk.document_id, document_id); + assert_eq!(chunk.chunk_index, 7); + assert_eq!(chunk.content, "chunk body"); + assert_eq!( + chunk.created_at, + Utc.with_ymd_and_hms(2026, 3, 7, 12, 34, 56) + .single() + .expect("timestamp should be valid"), + ); +} + +#[tokio::test] +async fn parse_chunk_row_skips_invalid_chunk_uuid() { + let backend = LibSqlBackend::new_memory() + .await + .expect("failed to create temp-file-backed backend"); + let conn = backend + .connect() + .await + .expect("failed to open libsql connection"); + let mut rows = conn + .query( + "SELECT 'not-a-uuid', ?1, 0, 'chunk body', '2026-03-07T12:34:56.000Z'", + params![Uuid::new_v4().to_string()], + ) + .await + .expect("failed to query literal chunk row"); + let row = rows + .next() + .await + .expect("failed to fetch literal chunk row") + .expect("expected one literal chunk row"); + + assert!( + parse_chunk_row(row) + .expect("invalid chunk id should be skipped cleanly") + .is_none() + ); +} + +#[tokio::test] +async fn parse_chunk_row_rejects_negative_chunk_index() { + let backend = LibSqlBackend::new_memory() + .await + .expect("failed to create temp-file-backed backend"); + let conn = backend + .connect() + .await + .expect("failed to open libsql connection"); + let mut rows = conn + .query( + "SELECT ?1, ?2, -1, 'chunk body', '2026-03-07T12:34:56.000Z'", + params![Uuid::new_v4().to_string(), Uuid::new_v4().to_string()], + ) + .await + .expect("failed to query literal chunk row"); + let row = rows + .next() + .await + .expect("failed to fetch literal chunk row") + .expect("expected one literal chunk row"); + + let error = parse_chunk_row(row).expect_err("negative chunk index must fail"); + assert!(matches!( + error, + WorkspaceError::SearchFailed { reason } + if reason == "memory_chunks.chunk_index must be non-negative" + )); +} + +#[tokio::test] +async fn insert_chunk_stores_empty_embeddings_as_null() { + let backend = LibSqlBackend::new_memory() + .await + .expect("failed to create temp-file-backed backend"); + backend + .run_migrations() + .await + .expect("failed to run libsql migrations"); + let document_id = Uuid::new_v4(); + let conn = backend + .connect() + .await + .expect("failed to open libsql connection"); + conn.execute( + "INSERT INTO memory_documents (id, user_id, agent_id, path, content, metadata) VALUES (?1, 'default', NULL, 'notes/chunk.md', '', '{}')", + params![document_id.to_string()], + ) + .await + .expect("failed to insert document"); + + let chunk_id = insert_chunk( + &backend, + crate::db::InsertChunkParams { + document_id, + chunk_index: 0, + content: "chunk body", + embedding: Some(&[]), + }, + ) + .await + .expect("failed to insert chunk with empty embedding"); + + let mut rows = conn + .query( + "SELECT embedding FROM memory_chunks WHERE id = ?1", + params![chunk_id.to_string()], + ) + .await + .expect("failed to query inserted chunk"); + let row = rows + .next() + .await + .expect("failed to fetch inserted chunk row") + .expect("expected inserted chunk row"); + assert!(matches!( + row.get_value(0).expect("failed to read embedding value"), + libsql::Value::Null + )); +} diff --git a/src/db/libsql/workspace/document_listing.rs b/src/db/libsql/workspace/document_listing.rs new file mode 100644 index 000000000..3e3ce7f4c --- /dev/null +++ b/src/db/libsql/workspace/document_listing.rs @@ -0,0 +1,73 @@ +//! Listing helpers for libSQL workspace document queries. + +use std::collections::HashMap; + +use crate::workspace::WorkspaceEntry; + +pub(super) fn normalise_dir_prefix(directory: &str) -> String { + if !directory.is_empty() && !directory.ends_with('/') { + format!("{}/", directory) + } else { + directory.to_string() + } +} + +pub(super) fn dir_like_pattern(dir: &str) -> String { + if dir.is_empty() { + "%".to_string() + } else { + format!("{}%", dir) + } +} + +pub(super) fn resolve_entry(full_path: &str, dir: &str) -> Option<(String, bool, String)> { + let relative = if dir.is_empty() { + full_path + } else { + full_path.strip_prefix(dir)? + }; + let child_name = if let Some(slash_pos) = relative.find('/') { + &relative[..slash_pos] + } else { + relative + }; + if child_name.is_empty() { + return None; + } + let is_dir = relative.contains('/'); + let entry_path = if dir.is_empty() { + child_name.to_string() + } else { + format!("{}{}", dir, child_name) + }; + Some((child_name.to_string(), is_dir, entry_path)) +} + +pub(super) fn merge_entry( + entries_map: &mut HashMap, + child_name: String, + entry_path: String, + is_dir: bool, + updated_at: Option>, + content_preview: Option, +) { + entries_map + .entry(child_name) + .and_modify(|entry| { + if is_dir { + entry.is_directory = true; + entry.content_preview = None; + } + if let (Some(existing), Some(new)) = (&entry.updated_at, &updated_at) + && new > existing + { + entry.updated_at = Some(*new); + } + }) + .or_insert(WorkspaceEntry { + path: entry_path, + is_directory: is_dir, + updated_at, + content_preview: if is_dir { None } else { content_preview }, + }); +} diff --git a/src/db/libsql/workspace/document_ops.rs b/src/db/libsql/workspace/document_ops.rs index bb8075641..2103dfbd0 100644 --- a/src/db/libsql/workspace/document_ops.rs +++ b/src/db/libsql/workspace/document_ops.rs @@ -1,5 +1,11 @@ //! Document-oriented workspace-store helpers for the libSQL backend. +#[path = "document_listing.rs"] +mod listing; +#[cfg(test)] +#[path = "document_ops_tests.rs"] +mod tests; + use std::collections::HashMap; use chrono::Utc; @@ -12,6 +18,7 @@ use super::super::{ use crate::db::NativeWorkspaceStore; use crate::error::WorkspaceError; use crate::workspace::{MemoryDocument, WorkspaceEntry}; +use listing::{dir_like_pattern, merge_entry, normalise_dir_prefix, resolve_entry}; /// Identifies the user/agent context for a workspace document query. /// @@ -240,74 +247,6 @@ pub(super) async fn delete_document_by_path( Ok(()) } -fn normalise_dir_prefix(directory: &str) -> String { - if !directory.is_empty() && !directory.ends_with('/') { - format!("{}/", directory) - } else { - directory.to_string() - } -} - -fn dir_like_pattern(dir: &str) -> String { - if dir.is_empty() { - "%".to_string() - } else { - format!("{}%", dir) - } -} - -fn resolve_entry(full_path: &str, dir: &str) -> Option<(String, bool, String)> { - let relative = if dir.is_empty() { - full_path - } else { - full_path.strip_prefix(dir)? - }; - let child_name = if let Some(slash_pos) = relative.find('/') { - &relative[..slash_pos] - } else { - relative - }; - if child_name.is_empty() { - return None; - } - let is_dir = relative.contains('/'); - let entry_path = if dir.is_empty() { - child_name.to_string() - } else { - format!("{}{}", dir, child_name) - }; - Some((child_name.to_string(), is_dir, entry_path)) -} - -fn merge_entry( - entries_map: &mut HashMap, - child_name: String, - entry_path: String, - is_dir: bool, - updated_at: Option>, - content_preview: Option, -) { - entries_map - .entry(child_name) - .and_modify(|entry| { - if is_dir { - entry.is_directory = true; - entry.content_preview = None; - } - if let (Some(existing), Some(new)) = (&entry.updated_at, &updated_at) - && new > existing - { - entry.updated_at = Some(*new); - } - }) - .or_insert(WorkspaceEntry { - path: entry_path, - is_directory: is_dir, - updated_at, - content_preview: if is_dir { None } else { content_preview }, - }); -} - pub(super) async fn list_directory( backend: &LibSqlBackend, scope: &AgentScope<'_>, diff --git a/src/db/libsql/workspace/document_ops_tests.rs b/src/db/libsql/workspace/document_ops_tests.rs new file mode 100644 index 000000000..acd8ed919 --- /dev/null +++ b/src/db/libsql/workspace/document_ops_tests.rs @@ -0,0 +1,113 @@ +//! Tests for libSQL workspace document helpers. + +use libsql::params; +use uuid::Uuid; + +use super::*; +use crate::db::{NativeDatabase, NativeWorkspaceStore}; + +#[tokio::test] +async fn document_from_row_or_not_found_maps_present_rows() { + let id = Uuid::new_v4(); + let backend = LibSqlBackend::new_memory() + .await + .expect("failed to create temp-file-backed backend"); + let conn = backend + .connect() + .await + .expect("failed to open libsql connection"); + let mut rows = conn + .query( + "SELECT ?1, 'default', NULL, 'notes/doc.md', 'hello', '2026-03-07T12:34:56.000Z', '2026-03-07T12:35:56.000Z', '{\"kind\":\"note\"}'", + params![id.to_string()], + ) + .await + .expect("failed to query literal document row"); + let row = rows + .next() + .await + .expect("failed to fetch literal document row") + .expect("expected one literal document row"); + + let document = document_from_row_or_not_found(Some(row), "notes/doc.md", "default") + .expect("present row should map to document"); + + assert_eq!(document.id, id); + assert_eq!(document.user_id, "default"); + assert_eq!(document.path, "notes/doc.md"); + assert_eq!(document.content, "hello"); +} + +#[test] +fn document_from_row_or_not_found_returns_not_found_error() { + let error = document_from_row_or_not_found(None, "notes/missing.md", "default") + .expect_err("missing row should become not-found"); + assert!(matches!( + error, + WorkspaceError::DocumentNotFound { doc_type, user_id } + if doc_type == "notes/missing.md" && user_id == "default" + )); +} + +#[tokio::test] +async fn get_document_by_path_returns_not_found_for_missing_document() { + let backend = LibSqlBackend::new_memory() + .await + .expect("failed to create temp-file-backed backend"); + backend + .run_migrations() + .await + .expect("failed to run libsql migrations"); + + let error = get_document_by_path( + &backend, + &AgentScope { + user_id: "default", + agent_id: None, + }, + "notes/missing.md", + ) + .await + .expect_err("missing document lookup should fail"); + assert!(matches!( + error, + WorkspaceError::DocumentNotFound { doc_type, user_id } + if doc_type == "notes/missing.md" && user_id == "default" + )); +} + +#[tokio::test] +async fn list_directory_merges_file_and_directory_entries() { + let backend = LibSqlBackend::new_memory() + .await + .expect("failed to create temp-file-backed backend"); + backend + .run_migrations() + .await + .expect("failed to run libsql migrations"); + backend + .get_or_create_document_by_path("default", None, "notes/alpha.md") + .await + .expect("failed to create alpha doc"); + backend + .get_or_create_document_by_path("default", None, "notes/nested/beta.md") + .await + .expect("failed to create beta doc"); + + let entries = list_directory( + &backend, + &AgentScope { + user_id: "default", + agent_id: None, + }, + "notes", + ) + .await + .expect("failed to list directory"); + + assert_eq!(entries.len(), 2); + assert_eq!(entries[0].path, "notes/alpha.md"); + assert!(!entries[0].is_directory); + assert_eq!(entries[1].path, "notes/nested"); + assert!(entries[1].is_directory); +} diff --git a/src/db/libsql/workspace/fts.rs b/src/db/libsql/workspace/fts.rs index d47f77fd0..bd56549d4 100644 --- a/src/db/libsql/workspace/fts.rs +++ b/src/db/libsql/workspace/fts.rs @@ -1,5 +1,9 @@ //! Full-text-search helpers for libSQL workspace retrieval. +#[cfg(test)] +#[path = "fts_tests.rs"] +mod tests; + use libsql::params; use super::super::get_text; diff --git a/src/db/libsql/workspace/fts_tests.rs b/src/db/libsql/workspace/fts_tests.rs new file mode 100644 index 000000000..255802369 --- /dev/null +++ b/src/db/libsql/workspace/fts_tests.rs @@ -0,0 +1,79 @@ +//! Tests for libSQL workspace full-text search helpers. + +use super::super::LibSqlBackend; +use super::*; +use crate::db::{InsertChunkParams, NativeDatabase, NativeWorkspaceStore}; + +#[tokio::test] +async fn fts_ranked_results_returns_ranked_matches() { + let backend = LibSqlBackend::new_memory() + .await + .expect("failed to create temp-file-backed backend"); + backend + .run_migrations() + .await + .expect("failed to run libsql migrations"); + let document = backend + .get_or_create_document_by_path("default", None, "notes/fts.md") + .await + .expect("failed to create FTS document"); + backend + .insert_chunk(InsertChunkParams { + document_id: document.id, + chunk_index: 0, + content: "semantic retrieval through full text", + embedding: None, + }) + .await + .expect("failed to insert FTS chunk"); + let conn = backend + .connect() + .await + .expect("failed to open libsql connection"); + + let results = fts_ranked_results( + &conn, + FtsSearchParams { + user_id: "default", + agent_id: None, + query: "semantic", + limit: 5, + }, + ) + .await + .expect("FTS query should succeed"); + + assert_eq!(results.len(), 1); + assert_eq!(results[0].document_path, "notes/fts.md"); + assert_eq!(results[0].content, "semantic retrieval through full text"); + assert_eq!(results[0].rank, 1); +} + +#[tokio::test] +async fn fts_ranked_results_surfaces_query_errors() { + let backend = LibSqlBackend::new_memory() + .await + .expect("failed to create temp-file-backed backend"); + let conn = backend + .connect() + .await + .expect("failed to open libsql connection"); + + let error = fts_ranked_results( + &conn, + FtsSearchParams { + user_id: "default", + agent_id: None, + query: "semantic", + limit: 5, + }, + ) + .await + .expect_err("FTS query should fail before migrations"); + + assert!(matches!( + error, + crate::error::WorkspaceError::SearchFailed { reason } + if reason.starts_with("FTS query failed:") + )); +} diff --git a/src/db/libsql/workspace/tests.rs b/src/db/libsql/workspace/tests.rs index cf210f1d1..3691cbc61 100644 --- a/src/db/libsql/workspace/tests.rs +++ b/src/db/libsql/workspace/tests.rs @@ -175,3 +175,44 @@ async fn brute_force_vector_search_skips_mismatched_embedding_dimensions() { assert_eq!(results.len(), 1); assert_eq!(results[0].content, "same-dimension chunk"); } + +#[tokio::test] +async fn hybrid_search_returns_fts_only_results_without_embedding() { + let backend = LibSqlBackend::new_memory() + .await + .expect("failed to create in-memory libsql backend"); + backend + .run_migrations() + .await + .expect("failed to run libsql migrations"); + + let document = backend + .get_or_create_document_by_path("default", None, "notes/fts-only.md") + .await + .expect("failed to create FTS-only search document"); + backend + .insert_chunk(InsertChunkParams { + document_id: document.id, + chunk_index: 0, + content: "keyword only workspace search", + embedding: None, + }) + .await + .expect("failed to insert FTS-only chunk"); + + let results = backend + .hybrid_search(HybridSearchParams { + user_id: "default", + agent_id: None, + query: "keyword", + embedding: None, + config: &SearchConfig::default().with_limit(5), + }) + .await + .expect("failed to execute FTS-only hybrid search"); + + assert_eq!(results.len(), 1); + assert_eq!(results[0].document_path, "notes/fts-only.md"); + assert_eq!(results[0].fts_rank, Some(1)); + assert_eq!(results[0].vector_rank, None); +} diff --git a/src/db/libsql/workspace/vector_search.rs b/src/db/libsql/workspace/vector_search.rs index e0dd10ed1..a83de3d3e 100644 --- a/src/db/libsql/workspace/vector_search.rs +++ b/src/db/libsql/workspace/vector_search.rs @@ -1,5 +1,9 @@ //! Vector-search helpers for libSQL workspace retrieval. +#[cfg(test)] +#[path = "vector_search_tests.rs"] +mod tests; + use libsql::params; use uuid::Uuid; diff --git a/src/db/libsql/workspace/vector_search_tests.rs b/src/db/libsql/workspace/vector_search_tests.rs new file mode 100644 index 000000000..03f7ef79c --- /dev/null +++ b/src/db/libsql/workspace/vector_search_tests.rs @@ -0,0 +1,83 @@ +//! Tests for libSQL workspace vector-search helpers. + +use libsql::params; +use uuid::Uuid; + +use super::super::LibSqlBackend; +use super::*; + +#[test] +fn embedding_to_vector_json_serialises_embeddings_in_index_format() { + assert_eq!( + embedding_to_vector_json(&[1.0, -2.5, 0.25]), + "[1,-2.5,0.25]" + ); +} + +#[test] +fn rank_candidates_breaks_similarity_ties_by_chunk_id() { + let earlier_chunk = Uuid::from_u128(1); + let later_chunk = Uuid::from_u128(2); + + let results = rank_candidates( + vec![ + Candidate { + chunk_id: later_chunk, + document_id: Uuid::new_v4(), + document_path: "notes/later.md".to_string(), + content: "later".to_string(), + similarity: 0.9, + }, + Candidate { + chunk_id: earlier_chunk, + document_id: Uuid::new_v4(), + document_path: "notes/earlier.md".to_string(), + content: "earlier".to_string(), + similarity: 0.9, + }, + ], + 2, + ); + + assert_eq!(results.len(), 2); + assert_eq!(results[0].chunk_id, earlier_chunk); + assert_eq!(results[0].rank, 1); + assert_eq!(results[1].chunk_id, later_chunk); + assert_eq!(results[1].rank, 2); +} + +#[tokio::test] +async fn collect_vector_index_rows_skips_rows_with_invalid_uuids() { + let backend = LibSqlBackend::new_memory() + .await + .expect("failed to create temp-file-backed backend"); + let conn = backend + .connect() + .await + .expect("failed to open libsql connection"); + let valid_chunk_id = Uuid::new_v4(); + let valid_document_id = Uuid::new_v4(); + let rows = conn + .query( + "SELECT ?1, ?2, 'notes/good.md', 'good chunk' UNION ALL SELECT 'bad-uuid', ?3, 'notes/bad.md', 'bad chunk'", + params![ + valid_chunk_id.to_string(), + valid_document_id.to_string(), + valid_document_id.to_string() + ], + ) + .await + .expect("failed to query synthetic vector rows"); + + let outcome = collect_vector_index_rows(rows, 5) + .await + .expect("vector row collection should succeed"); + + let VectorSearchOutcome::Indexed(results) = outcome else { + panic!("expected indexed vector-search outcome"); + }; + assert_eq!(results.len(), 1); + assert_eq!(results[0].chunk_id, valid_chunk_id); + assert_eq!(results[0].document_id, valid_document_id); + assert_eq!(results[0].rank, 1); +} From 54ad178fbb8f58215ea98c0bcb11b337c486ca94 Mon Sep 17 00:00:00 2001 From: leynos Date: Wed, 15 Apr 2026 15:19:47 +0200 Subject: [PATCH 32/40] Add central libsql workspace error-path tests Add the missing central workspace-store tests for invalid chunk\nUUID handling, negative chunk indices, and document-not-found\nlookups.\n\nThis keeps the review-requested coverage in workspace/tests.rs\nwithout changing production code, and documents that the existing\nvector-index fallback test also covers the IndexUnavailable path. --- src/db/libsql/workspace/tests.rs | 129 +++++++++++++++++++++++++++++++ 1 file changed, 129 insertions(+) diff --git a/src/db/libsql/workspace/tests.rs b/src/db/libsql/workspace/tests.rs index 3691cbc61..0989dda60 100644 --- a/src/db/libsql/workspace/tests.rs +++ b/src/db/libsql/workspace/tests.rs @@ -1,5 +1,7 @@ //! Tests for the libSQL workspace-store module split. +use libsql::params; + use super::super::LibSqlBackend; use super::vector_search::{ VectorIndexQuery, VectorSearchOutcome, VectorSearchQuery, deserialize_embedding, @@ -57,6 +59,133 @@ fn test_deserialize_embedding_negative_values() { assert!((result[2] - 2.75).abs() < 0.001); } +#[tokio::test] +async fn get_chunks_without_embeddings_skips_invalid_chunk_id_uuid() { + let backend = LibSqlBackend::new_memory() + .await + .expect("failed to create in-memory backend"); + backend + .run_migrations() + .await + .expect("failed to run migrations"); + + let document = backend + .get_or_create_document_by_path("default", None, "notes/bad-chunk-uuid.md") + .await + .expect("failed to create document"); + + let conn = backend.connect().await.expect("failed to connect"); + conn.execute( + "INSERT INTO memory_chunks (id, document_id, chunk_index, content, created_at) \ + VALUES ('not-a-uuid', ?1, 0, 'bad chunk', datetime('now'))", + params![document.id.to_string()], + ) + .await + .expect("failed to insert bad-chunk-id row"); + + backend + .insert_chunk(InsertChunkParams { + document_id: document.id, + chunk_index: 1, + content: "valid chunk", + embedding: None, + }) + .await + .expect("failed to insert valid chunk"); + + let chunks = backend + .get_chunks_without_embeddings("default", None, 10) + .await + .expect("get_chunks_without_embeddings must not fail on invalid UUIDs"); + + assert_eq!(chunks.len(), 1); + assert_eq!(chunks[0].content, "valid chunk"); +} + +#[tokio::test] +async fn get_chunks_without_embeddings_errors_on_negative_chunk_index() { + let backend = LibSqlBackend::new_memory() + .await + .expect("failed to create in-memory backend"); + backend + .run_migrations() + .await + .expect("failed to run migrations"); + + let document = backend + .get_or_create_document_by_path("default", None, "notes/neg-idx.md") + .await + .expect("failed to create document"); + + let conn = backend.connect().await.expect("failed to connect"); + conn.execute( + "INSERT INTO memory_chunks (id, document_id, chunk_index, content, created_at) \ + VALUES (?1, ?2, -1, 'negative index', datetime('now'))", + params![uuid::Uuid::new_v4().to_string(), document.id.to_string()], + ) + .await + .expect("failed to insert negative-index row"); + + let result = backend + .get_chunks_without_embeddings("default", None, 10) + .await; + + assert!( + result.is_err(), + "get_chunks_without_embeddings must return Err for negative chunk_index" + ); +} + +#[tokio::test] +async fn get_document_by_path_returns_not_found_for_missing_document() { + let backend = LibSqlBackend::new_memory() + .await + .expect("failed to create in-memory backend"); + backend + .run_migrations() + .await + .expect("failed to run migrations"); + + let result = backend + .get_document_by_path("default", None, "does/not/exist.md") + .await; + + assert!( + matches!( + result, + Err(crate::error::WorkspaceError::DocumentNotFound { .. }) + ), + "expected DocumentNotFound, got {:?}", + result + ); +} + +#[tokio::test] +async fn get_document_by_id_returns_not_found_for_unknown_id() { + let backend = LibSqlBackend::new_memory() + .await + .expect("failed to create in-memory backend"); + backend + .run_migrations() + .await + .expect("failed to run migrations"); + + let result = backend.get_document_by_id(uuid::Uuid::new_v4()).await; + + assert!( + matches!( + result, + Err(crate::error::WorkspaceError::DocumentNotFound { .. }) + ), + "expected DocumentNotFound, got {:?}", + result + ); +} + +// This test also validates the `collect_vector_index_rows` → +// IndexUnavailable path: the pre-condition assertion confirms +// vector_ranked_results returns IndexUnavailable before the brute-force +// fallback assertions begin. #[tokio::test] async fn hybrid_search_uses_brute_force_when_vector_index_is_unavailable() { let backend = LibSqlBackend::new_memory() From d6f0e137f70ae4faacff93b0a4a7357dced3127c Mon Sep 17 00:00:00 2001 From: leynos Date: Wed, 15 Apr 2026 15:28:45 +0200 Subject: [PATCH 33/40] Extract libsql workspace assertion helpers Reduce large assertion blocks in the central libSQL workspace tests\nby extracting shared assertion helpers for embedding comparisons\nand single-result hybrid-search checks. This keeps the test\nbehaviour unchanged while addressing the CodeScene finding. --- src/db/libsql/workspace/tests.rs | 58 ++++++++++++++++++++++---------- 1 file changed, 41 insertions(+), 17 deletions(-) diff --git a/src/db/libsql/workspace/tests.rs b/src/db/libsql/workspace/tests.rs index 0989dda60..7263eb1a9 100644 --- a/src/db/libsql/workspace/tests.rs +++ b/src/db/libsql/workspace/tests.rs @@ -10,6 +10,39 @@ use super::vector_search::{ use crate::db::{HybridSearchParams, InsertChunkParams, NativeDatabase, NativeWorkspaceStore}; use crate::workspace::SearchConfig; +/// Assert that `actual` has the same length as `expected` and that every +/// element is within floating-point tolerance. +fn assert_embedding_approx_eq(actual: &[f32], expected: &[f32]) { + assert_eq!( + actual.len(), + expected.len(), + "embedding length mismatch: got {}, expected {}", + actual.len(), + expected.len(), + ); + for (i, (a, e)) in actual.iter().zip(expected.iter()).enumerate() { + assert!( + (a - e).abs() < 0.001, + "embedding[{i}]: got {a}, expected {e} (tolerance 0.001)", + ); + } +} + +/// Assert that `results` contains exactly one entry whose `document_path`, +/// `fts_rank`, and `vector_rank` match the supplied values. +fn assert_sole_search_result( + results: &[crate::workspace::SearchResult], + expected_path: &str, + expected_fts_rank: Option, + expected_vector_rank: Option, +) { + assert_eq!(results.len(), 1, "expected exactly one search result"); + let r = &results[0]; + assert_eq!(r.document_path, expected_path, "document_path mismatch"); + assert_eq!(r.fts_rank, expected_fts_rank, "fts_rank mismatch"); + assert_eq!(r.vector_rank, expected_vector_rank, "vector_rank mismatch"); +} + #[test] fn test_deserialize_embedding_valid() { let floats = [1.0f32, 2.0, 3.0]; @@ -17,10 +50,7 @@ fn test_deserialize_embedding_valid() { let result = deserialize_embedding(&bytes); - assert_eq!(result.len(), 3); - assert!((result[0] - 1.0).abs() < 0.001); - assert!((result[1] - 2.0).abs() < 0.001); - assert!((result[2] - 3.0).abs() < 0.001); + assert_embedding_approx_eq(&result, &[1.0, 2.0, 3.0]); } #[test] @@ -53,10 +83,7 @@ fn test_deserialize_embedding_negative_values() { let result = deserialize_embedding(&bytes); - assert_eq!(result.len(), 3); - assert!((result[0] - (-1.5)).abs() < 0.001); - assert!((result[1] - 0.0).abs() < 0.001); - assert!((result[2] - 2.75).abs() < 0.001); + assert_embedding_approx_eq(&result, &[-1.5, 0.0, 2.75]); } #[tokio::test] @@ -245,11 +272,11 @@ async fn hybrid_search_uses_brute_force_when_vector_index_is_unavailable() { .await .expect("failed to execute hybrid search"); - assert_eq!(results.len(), 1); - assert_eq!(results[0].document_path, "notes/search.md"); - assert_eq!(results[0].fts_rank, Some(1)); - assert_eq!(results[0].vector_rank, Some(1)); - assert!(results[0].is_hybrid()); + assert_sole_search_result(&results, "notes/search.md", Some(1), Some(1)); + assert!( + results[0].is_hybrid(), + "brute-force result must be flagged as hybrid" + ); } #[tokio::test] @@ -340,8 +367,5 @@ async fn hybrid_search_returns_fts_only_results_without_embedding() { .await .expect("failed to execute FTS-only hybrid search"); - assert_eq!(results.len(), 1); - assert_eq!(results[0].document_path, "notes/fts-only.md"); - assert_eq!(results[0].fts_rank, Some(1)); - assert_eq!(results[0].vector_rank, None); + assert_sole_search_result(&results, "notes/fts-only.md", Some(1), None); } From 32cd332c92dd86ad4738d3e1bb9f29ac59165aec Mon Sep 17 00:00:00 2001 From: leynos Date: Wed, 15 Apr 2026 15:33:58 +0200 Subject: [PATCH 34/40] Extract libsql workspace test setup helpers Reduce duplication in the central libSQL workspace tests by\nextracting shared backend setup and DocumentNotFound\nassertion helpers. This keeps the existing test coverage and\nbehaviour intact while addressing the repeated test boilerplate. --- src/db/libsql/workspace/tests.rs | 103 ++++++++++--------------------- 1 file changed, 34 insertions(+), 69 deletions(-) diff --git a/src/db/libsql/workspace/tests.rs b/src/db/libsql/workspace/tests.rs index 7263eb1a9..303a75885 100644 --- a/src/db/libsql/workspace/tests.rs +++ b/src/db/libsql/workspace/tests.rs @@ -43,6 +43,31 @@ fn assert_sole_search_result( assert_eq!(r.vector_rank, expected_vector_rank, "vector_rank mismatch"); } +/// Create a temp-file-backed [`LibSqlBackend`] with migrations applied, +/// ready for use in unit tests. +async fn setup_backend() -> LibSqlBackend { + let backend = LibSqlBackend::new_memory() + .await + .expect("failed to create in-memory libsql backend"); + backend + .run_migrations() + .await + .expect("failed to run libsql migrations"); + backend +} + +/// Assert that `result` is the `DocumentNotFound` error variant. +fn assert_document_not_found(result: Result) { + assert!( + matches!( + result, + Err(crate::error::WorkspaceError::DocumentNotFound { .. }) + ), + "expected DocumentNotFound, got {:?}", + result + ); +} + #[test] fn test_deserialize_embedding_valid() { let floats = [1.0f32, 2.0, 3.0]; @@ -88,13 +113,7 @@ fn test_deserialize_embedding_negative_values() { #[tokio::test] async fn get_chunks_without_embeddings_skips_invalid_chunk_id_uuid() { - let backend = LibSqlBackend::new_memory() - .await - .expect("failed to create in-memory backend"); - backend - .run_migrations() - .await - .expect("failed to run migrations"); + let backend = setup_backend().await; let document = backend .get_or_create_document_by_path("default", None, "notes/bad-chunk-uuid.md") @@ -131,13 +150,7 @@ async fn get_chunks_without_embeddings_skips_invalid_chunk_id_uuid() { #[tokio::test] async fn get_chunks_without_embeddings_errors_on_negative_chunk_index() { - let backend = LibSqlBackend::new_memory() - .await - .expect("failed to create in-memory backend"); - backend - .run_migrations() - .await - .expect("failed to run migrations"); + let backend = setup_backend().await; let document = backend .get_or_create_document_by_path("default", None, "notes/neg-idx.md") @@ -165,48 +178,18 @@ async fn get_chunks_without_embeddings_errors_on_negative_chunk_index() { #[tokio::test] async fn get_document_by_path_returns_not_found_for_missing_document() { - let backend = LibSqlBackend::new_memory() - .await - .expect("failed to create in-memory backend"); - backend - .run_migrations() - .await - .expect("failed to run migrations"); - + let backend = setup_backend().await; let result = backend .get_document_by_path("default", None, "does/not/exist.md") .await; - - assert!( - matches!( - result, - Err(crate::error::WorkspaceError::DocumentNotFound { .. }) - ), - "expected DocumentNotFound, got {:?}", - result - ); + assert_document_not_found(result); } #[tokio::test] async fn get_document_by_id_returns_not_found_for_unknown_id() { - let backend = LibSqlBackend::new_memory() - .await - .expect("failed to create in-memory backend"); - backend - .run_migrations() - .await - .expect("failed to run migrations"); - + let backend = setup_backend().await; let result = backend.get_document_by_id(uuid::Uuid::new_v4()).await; - - assert!( - matches!( - result, - Err(crate::error::WorkspaceError::DocumentNotFound { .. }) - ), - "expected DocumentNotFound, got {:?}", - result - ); + assert_document_not_found(result); } // This test also validates the `collect_vector_index_rows` → @@ -215,13 +198,7 @@ async fn get_document_by_id_returns_not_found_for_unknown_id() { // fallback assertions begin. #[tokio::test] async fn hybrid_search_uses_brute_force_when_vector_index_is_unavailable() { - let backend = LibSqlBackend::new_memory() - .await - .expect("failed to create in-memory libsql backend"); - backend - .run_migrations() - .await - .expect("failed to run libsql migrations"); + let backend = setup_backend().await; let document = backend .get_or_create_document_by_path("default", None, "notes/search.md") @@ -281,13 +258,7 @@ async fn hybrid_search_uses_brute_force_when_vector_index_is_unavailable() { #[tokio::test] async fn brute_force_vector_search_skips_mismatched_embedding_dimensions() { - let backend = LibSqlBackend::new_memory() - .await - .expect("failed to create in-memory libsql backend"); - backend - .run_migrations() - .await - .expect("failed to run libsql migrations"); + let backend = setup_backend().await; let document = backend .get_or_create_document_by_path("default", None, "notes/mixed-dim.md") @@ -334,13 +305,7 @@ async fn brute_force_vector_search_skips_mismatched_embedding_dimensions() { #[tokio::test] async fn hybrid_search_returns_fts_only_results_without_embedding() { - let backend = LibSqlBackend::new_memory() - .await - .expect("failed to create in-memory libsql backend"); - backend - .run_migrations() - .await - .expect("failed to run libsql migrations"); + let backend = setup_backend().await; let document = backend .get_or_create_document_by_path("default", None, "notes/fts-only.md") From ee23d0d00bcd20051b9ec2977fa9ad7179680b96 Mon Sep 17 00:00:00 2001 From: leynos Date: Wed, 15 Apr 2026 20:04:31 +0200 Subject: [PATCH 35/40] Fix rebased libsql and webhook helpers Restore the libsql backend module shape that this branch expects after\nrebasing onto origin/main, and add the missing terminal persistence\nforwarder required by the updated NativeDatabase trait.\n\nAlso expose the webhook listener test helpers under the test-helpers\nfeature so integration tests can keep using the pre-bound listener path\nafter the rebase. --- src/channels/webhook_server.rs | 4 +- src/db/libsql/mod.rs | 150 ++++++++++++++++----------------- 2 files changed, 75 insertions(+), 79 deletions(-) diff --git a/src/channels/webhook_server.rs b/src/channels/webhook_server.rs index 90464f064..00a322572 100644 --- a/src/channels/webhook_server.rs +++ b/src/channels/webhook_server.rs @@ -165,7 +165,7 @@ impl WebhookServer { /// test port allocation and the server bind. That makes it suitable for /// tests that reserve ephemeral ports before handing ownership to the /// server. - #[cfg(test)] + #[cfg(any(test, feature = "test-helpers"))] pub async fn start_with_listener( &mut self, listener: tokio::net::TcpListener, @@ -224,7 +224,7 @@ impl WebhookServer { /// old listener before calling [`Self::spawn_with_listener`], so there is /// no rollback path if spawning were to fail. That trade-off is acceptable /// in tests because [`Self::spawn_with_listener`] is infallible. - #[cfg(test)] + #[cfg(any(test, feature = "test-helpers"))] pub async fn restart_with_listener( &mut self, listener: tokio::net::TcpListener, diff --git a/src/db/libsql/mod.rs b/src/db/libsql/mod.rs index 197a9d613..4f62f047c 100644 --- a/src/db/libsql/mod.rs +++ b/src/db/libsql/mod.rs @@ -21,6 +21,22 @@ mod settings; mod tool_failures; mod workspace; +use std::path::Path; +use std::path::PathBuf; +use std::sync::Arc; + +use crate::db::NativeDatabase; +use crate::error::DatabaseError; +use libsql::{Connection, Database as RawLibSqlDatabase}; +use uuid::Uuid; + +use crate::db::libsql_migrations; +pub(crate) use helpers::{ + fmt_opt_ts, fmt_ts, get_i64, get_json, get_opt_bool, get_opt_text, get_opt_ts, get_text, + get_ts, opt_text, opt_text_owned, parse_job_state, +}; +pub(crate) use row_conversion::row_to_memory_document; + /// Shared libSQL database handle. /// /// Wraps the underlying [`RawLibSqlDatabase`] plus optional temp-file metadata @@ -38,11 +54,61 @@ pub struct LibSqlDatabase { /// [`Drop`]. temp_path: Option, } -pub(crate) use helpers::{ - fmt_opt_ts, fmt_ts, get_i64, get_json, get_opt_bool, get_opt_text, get_opt_ts, get_text, - get_ts, opt_text, opt_text_owned, parse_job_state, -}; -pub(crate) use row_conversion::row_to_memory_document; + +impl LibSqlDatabase { + fn new(db: RawLibSqlDatabase, temp_path: Option) -> Self { + Self { db, temp_path } + } + + #[cfg(test)] + pub fn temp_path(&self) -> Option { + self.temp_path.clone() + } + + /// Create a fresh libSQL connection from the shared database handle. + /// + /// Applies the same retry and `busy_timeout` setup used by + /// [`LibSqlBackend::connect`] so all shared-handle consumers behave + /// consistently. + pub async fn connect(&self) -> Result { + let mut last_err = None; + for attempt in 0..3u32 { + match self.db.connect() { + Ok(conn) => { + conn.query("PRAGMA busy_timeout = 5000", ()) + .await + .map_err(|e| { + DatabaseError::Pool(format!("Failed to set busy_timeout: {}", e)) + })?; + return Ok(conn); + } + Err(e) => { + last_err = Some(e); + if attempt < 2 { + tokio::time::sleep(std::time::Duration::from_millis( + 50 * 2u64.pow(attempt), + )) + .await; + } + } + } + } + Err(DatabaseError::Pool(format!( + "Failed to create connection after 3 attempts: {}", + last_err.map(|e| e.to_string()).unwrap_or_default() + ))) + } +} + +impl Drop for LibSqlDatabase { + fn drop(&mut self) { + if let Some(path) = &self.temp_path { + let _ = std::fs::remove_file(path); + let _ = std::fs::remove_file(path.with_extension("db-wal")); + let _ = std::fs::remove_file(path.with_extension("db-shm")); + } + } +} /// libSQL/Turso backend implementation of [`NativeDatabase`]. /// @@ -62,6 +128,7 @@ impl LibSqlBackend { DatabaseError::Pool(format!("Failed to create database directory: {}", e)) })?; } + Ok(()) } @@ -144,24 +211,8 @@ impl LibSqlBackend { pub async fn connect(&self) -> Result { self.db.connect().await } - - /// Wraps a built `libsql::Database` in `Self` with no temp-file path. - fn from_db(db: libsql::Database) -> Self { - Self { - db: Arc::new(db), - temp_path: None, - } - } -} -impl Drop for LibSqlDatabase { - fn drop(&mut self) { - if let Some(path) = &self.temp_path { - let _ = std::fs::remove_file(path); - let _ = std::fs::remove_file(path.with_extension("db-wal")); - let _ = std::fs::remove_file(path.with_extension("db-shm")); - } - } } + impl NativeDatabase for LibSqlBackend { async fn persist_terminal_result_and_status( &self, @@ -435,58 +486,3 @@ mod tests { } } } - -impl Drop for LibSqlDatabase { - fn drop(&mut self) { - if let Some(path) = &self.temp_path { - let _ = std::fs::remove_file(path); - let _ = std::fs::remove_file(path.with_extension("db-wal")); - let _ = std::fs::remove_file(path.with_extension("db-shm")); - } - } -} - -impl LibSqlDatabase { - fn new(db: RawLibSqlDatabase, temp_path: Option) -> Self { - Self { db, temp_path } - } - - #[cfg(test)] - pub fn temp_path(&self) -> Option { - self.temp_path.clone() - } - - /// Create a fresh libSQL connection from the shared database handle. - /// - /// Applies the same retry and `busy_timeout` setup used by - /// [`LibSqlBackend::connect`] so all shared-handle consumers behave - /// consistently. - pub async fn connect(&self) -> Result { - let mut last_err = None; - for attempt in 0..3u32 { - match self.db.connect() { - Ok(conn) => { - conn.query("PRAGMA busy_timeout = 5000", ()) - .await - .map_err(|e| { - DatabaseError::Pool(format!("Failed to set busy_timeout: {}", e)) - })?; - return Ok(conn); - } - Err(e) => { - last_err = Some(e); - if attempt < 2 { - tokio::time::sleep(std::time::Duration::from_millis( - 50 * 2u64.pow(attempt), - )) - .await; - } - } - } - } - Err(DatabaseError::Pool(format!( - "Failed to create connection after 3 attempts: {}", - last_err.map(|e| e.to_string()).unwrap_or_default() - ))) - } -} From d2a7a0bde3726fbb74838bfbc402a425c1f5a0ce Mon Sep 17 00:00:00 2001 From: leynos Date: Thu, 16 Apr 2026 03:15:50 +0200 Subject: [PATCH 36/40] Remove webhook listener cfg gates Make start_with_listener and restart_with_listener available\nwithout the test-helpers cfg gate so integration-test support code\ncan call them reliably when ironclaw is compiled as a library\ndependency, including on the Windows CI matrix. --- src/channels/webhook_server.rs | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/channels/webhook_server.rs b/src/channels/webhook_server.rs index 00a322572..eaa362934 100644 --- a/src/channels/webhook_server.rs +++ b/src/channels/webhook_server.rs @@ -165,7 +165,6 @@ impl WebhookServer { /// test port allocation and the server bind. That makes it suitable for /// tests that reserve ephemeral ports before handing ownership to the /// server. - #[cfg(any(test, feature = "test-helpers"))] pub async fn start_with_listener( &mut self, listener: tokio::net::TcpListener, @@ -224,7 +223,6 @@ impl WebhookServer { /// old listener before calling [`Self::spawn_with_listener`], so there is /// no rollback path if spawning were to fail. That trade-off is acceptable /// in tests because [`Self::spawn_with_listener`] is infallible. - #[cfg(any(test, feature = "test-helpers"))] pub async fn restart_with_listener( &mut self, listener: tokio::net::TcpListener, From c779cfbfffebfeca6bbf378ca9912a597ac594ab Mon Sep 17 00:00:00 2001 From: leynos Date: Thu, 16 Apr 2026 05:13:08 +0200 Subject: [PATCH 37/40] Document libsql shared-handle constructor changes Expand the developers guide with concrete examples showing how the\nArc to Arc change propagates\nthrough store constructors and shared_db() consumers. --- docs/developers-guide.md | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/docs/developers-guide.md b/docs/developers-guide.md index 8fc2242a7..48156c7dc 100644 --- a/docs/developers-guide.md +++ b/docs/developers-guide.md @@ -350,6 +350,37 @@ If a constructor or store used to accept a backend directly and now accepts underlying file while creating its own connections via `LibSqlDatabase::connect()`. +### Type-change propagation through store constructors + +The `Arc` → `Arc` change propagates to +every store that previously held a raw `Arc`. Each +affected constructor now accepts `Arc`: + +| Store | Field | Constructor parameter | +| --- | --- | --- | +| `LibSqlSecretsStore` | `db: Arc` | `new(db: Arc, …)` | +| `LibSqlWasmChannelStore` | `db: Arc` | `new(db: Arc)` | +| `LibSqlWasmToolStore` | `db: Arc` | `new(db: Arc)` | + +The shared handle is obtained at startup via `LibSqlBackend::shared_db()`, +which now returns `Arc` instead of +`Arc`: + +```rust +// Obtaining the shared handle (unchanged call site): +let db: Arc = backend.shared_db(); + +// Constructing a store with the shared handle: +let secrets_store = LibSqlSecretsStore::new(Arc::clone(&db), crypto); +let channel_store = LibSqlWasmChannelStore::new(Arc::clone(&db)); +let tool_store = LibSqlWasmToolStore::new(Arc::clone(&db)); +``` + +The `busy_timeout` PRAGMA that each store previously ran after connecting +is now applied once inside `LibSqlDatabase::connect()`, so it is no longer +necessary — and must not be duplicated — in individual store +`connect()` methods. + ## Dispatcher Architecture The dispatcher orchestrates interactive chat turns by preparing an LLM From a573dda6f49933709cfb858c03c8ab271dbe6c12 Mon Sep 17 00:00:00 2001 From: leynos Date: Thu, 16 Apr 2026 05:16:48 +0200 Subject: [PATCH 38/40] Add libsql workspace success-path tests Expand the central libsql workspace test module with round-trip\nchunk operations and document-store success-path coverage, using\nsetup_backend for all async cases. --- src/db/libsql/workspace/tests.rs | 239 +++++++++++++++++++++++++++++++ 1 file changed, 239 insertions(+) diff --git a/src/db/libsql/workspace/tests.rs b/src/db/libsql/workspace/tests.rs index 303a75885..4100c53cd 100644 --- a/src/db/libsql/workspace/tests.rs +++ b/src/db/libsql/workspace/tests.rs @@ -334,3 +334,242 @@ async fn hybrid_search_returns_fts_only_results_without_embedding() { assert_sole_search_result(&results, "notes/fts-only.md", Some(1), None); } + +#[tokio::test] +async fn insert_chunk_and_delete_chunks_round_trip() { + let backend = setup_backend().await; + + let document = backend + .get_or_create_document_by_path("default", None, "notes/chunks.md") + .await + .expect("failed to create document"); + + let chunk_id = backend + .insert_chunk(InsertChunkParams { + document_id: document.id, + chunk_index: 0, + content: "round-trip chunk", + embedding: None, + }) + .await + .expect("failed to insert chunk"); + + let before = backend + .get_chunks_without_embeddings("default", None, 10) + .await + .expect("failed to list chunks before delete"); + assert!( + before.iter().any(|c| c.id == chunk_id), + "inserted chunk must appear in get_chunks_without_embeddings" + ); + + backend + .delete_chunks(document.id) + .await + .expect("failed to delete chunks"); + + let after = backend + .get_chunks_without_embeddings("default", None, 10) + .await + .expect("failed to list chunks after delete"); + assert!( + after.iter().all(|c| c.id != chunk_id), + "deleted chunk must not appear after delete_chunks" + ); +} + +#[tokio::test] +async fn update_chunk_embedding_is_reflected_in_chunks_list() { + let backend = setup_backend().await; + + let document = backend + .get_or_create_document_by_path("default", None, "notes/embed-update.md") + .await + .expect("failed to create document"); + + let chunk_id = backend + .insert_chunk(InsertChunkParams { + document_id: document.id, + chunk_index: 0, + content: "embedding update test", + embedding: None, + }) + .await + .expect("failed to insert chunk"); + + let before = backend + .get_chunks_without_embeddings("default", None, 10) + .await + .expect("failed to list chunks before embedding update"); + assert!( + before.iter().any(|c| c.id == chunk_id), + "chunk without embedding must appear before update" + ); + + backend + .update_chunk_embedding(chunk_id, &[0.1, 0.2, 0.3]) + .await + .expect("failed to update chunk embedding"); + + let after = backend + .get_chunks_without_embeddings("default", None, 10) + .await + .expect("failed to list chunks after embedding update"); + assert!( + after.iter().all(|c| c.id != chunk_id), + "chunk with embedding must not appear in get_chunks_without_embeddings" + ); +} + +#[tokio::test] +async fn get_or_create_document_by_path_is_idempotent() { + let backend = setup_backend().await; + + let first = backend + .get_or_create_document_by_path("default", None, "notes/idempotent.md") + .await + .expect("failed to create document on first call"); + let second = backend + .get_or_create_document_by_path("default", None, "notes/idempotent.md") + .await + .expect("failed to get document on second call"); + + assert_eq!(first.id, second.id, "get_or_create must return the same id"); +} + +#[tokio::test] +async fn update_document_changes_content() { + let backend = setup_backend().await; + + let document = backend + .get_or_create_document_by_path("default", None, "notes/update.md") + .await + .expect("failed to create document"); + backend + .update_document(document.id, "updated content") + .await + .expect("failed to update document content"); + + let fetched = backend + .get_document_by_id(document.id) + .await + .expect("failed to fetch updated document"); + assert_eq!( + fetched.content, "updated content", + "document content must reflect update" + ); +} + +#[tokio::test] +async fn delete_document_by_path_removes_document_and_chunks() { + let backend = setup_backend().await; + + let document = backend + .get_or_create_document_by_path("default", None, "notes/delete-me.md") + .await + .expect("failed to create document"); + backend + .insert_chunk(InsertChunkParams { + document_id: document.id, + chunk_index: 0, + content: "to be deleted", + embedding: None, + }) + .await + .expect("failed to insert chunk"); + + backend + .delete_document_by_path("default", None, "notes/delete-me.md") + .await + .expect("failed to delete document"); + + let result = backend + .get_document_by_path("default", None, "notes/delete-me.md") + .await; + assert_document_not_found(result); + + let chunks = backend + .get_chunks_without_embeddings("default", None, 10) + .await + .expect("failed to list chunks after document deletion"); + assert!( + chunks.iter().all(|c| c.document_id != document.id), + "chunks belonging to deleted document must be removed" + ); +} + +#[tokio::test] +async fn list_all_paths_returns_inserted_document_path() { + let backend = setup_backend().await; + + backend + .get_or_create_document_by_path("default", None, "notes/listed.md") + .await + .expect("failed to create document"); + + let paths = backend + .list_all_paths("default", None) + .await + .expect("failed to list all paths"); + + assert!( + paths.contains(&"notes/listed.md".to_string()), + "list_all_paths must include inserted document path" + ); +} + +#[tokio::test] +async fn list_documents_returns_inserted_document() { + let backend = setup_backend().await; + + let document = backend + .get_or_create_document_by_path("default", None, "notes/listed-doc.md") + .await + .expect("failed to create document"); + + let docs = backend + .list_documents("default", None) + .await + .expect("failed to list documents"); + + assert!( + docs.iter().any(|d| d.id == document.id), + "list_documents must include inserted document" + ); +} + +#[tokio::test] +async fn list_directory_returns_immediate_children_only() { + let backend = setup_backend().await; + + backend + .get_or_create_document_by_path("default", None, "notes/dir/child.md") + .await + .expect("failed to create child document"); + backend + .get_or_create_document_by_path("default", None, "notes/dir/sub/deep.md") + .await + .expect("failed to create deeply nested document"); + + let entries = backend + .list_directory("default", None, "notes/dir") + .await + .expect("failed to list directory"); + + assert!( + entries + .iter() + .any(|e| !e.is_directory && e.path.ends_with("child.md")), + "list_directory must include the direct file child" + ); + assert!( + entries + .iter() + .any(|e| e.is_directory && e.path.ends_with("sub")), + "list_directory must include the sub-directory child" + ); + assert!( + entries.iter().all(|e| !e.path.ends_with("deep.md")), + "list_directory must not include deeply nested files" + ); +} From 32cadb2c5a181d19cb127068b78fd86c9ee02859 Mon Sep 17 00:00:00 2001 From: leynos Date: Thu, 16 Apr 2026 11:44:49 +0200 Subject: [PATCH 39/40] Extract libsql workspace test document helper Reduce duplicated document setup in the central libsql workspace\ntests by routing the repeated default-scope creation path through a\nprivate create_test_document helper. --- src/db/libsql/workspace/tests.rs | 32 ++++++++++++++++---------------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/src/db/libsql/workspace/tests.rs b/src/db/libsql/workspace/tests.rs index 4100c53cd..ac7c1c127 100644 --- a/src/db/libsql/workspace/tests.rs +++ b/src/db/libsql/workspace/tests.rs @@ -56,6 +56,18 @@ async fn setup_backend() -> LibSqlBackend { backend } +/// Create a document at `path` for the default user scope, ready for use in +/// unit tests. +async fn create_test_document( + backend: &LibSqlBackend, + path: &str, +) -> crate::workspace::MemoryDocument { + backend + .get_or_create_document_by_path("default", None, path) + .await + .unwrap_or_else(|e| panic!("failed to create test document at '{path}': {e}")) +} + /// Assert that `result` is the `DocumentNotFound` error variant. fn assert_document_not_found(result: Result) { assert!( @@ -339,10 +351,7 @@ async fn hybrid_search_returns_fts_only_results_without_embedding() { async fn insert_chunk_and_delete_chunks_round_trip() { let backend = setup_backend().await; - let document = backend - .get_or_create_document_by_path("default", None, "notes/chunks.md") - .await - .expect("failed to create document"); + let document = create_test_document(&backend, "notes/chunks.md").await; let chunk_id = backend .insert_chunk(InsertChunkParams { @@ -382,10 +391,7 @@ async fn insert_chunk_and_delete_chunks_round_trip() { async fn update_chunk_embedding_is_reflected_in_chunks_list() { let backend = setup_backend().await; - let document = backend - .get_or_create_document_by_path("default", None, "notes/embed-update.md") - .await - .expect("failed to create document"); + let document = create_test_document(&backend, "notes/embed-update.md").await; let chunk_id = backend .insert_chunk(InsertChunkParams { @@ -502,10 +508,7 @@ async fn delete_document_by_path_removes_document_and_chunks() { async fn list_all_paths_returns_inserted_document_path() { let backend = setup_backend().await; - backend - .get_or_create_document_by_path("default", None, "notes/listed.md") - .await - .expect("failed to create document"); + create_test_document(&backend, "notes/listed.md").await; let paths = backend .list_all_paths("default", None) @@ -522,10 +525,7 @@ async fn list_all_paths_returns_inserted_document_path() { async fn list_documents_returns_inserted_document() { let backend = setup_backend().await; - let document = backend - .get_or_create_document_by_path("default", None, "notes/listed-doc.md") - .await - .expect("failed to create document"); + let document = create_test_document(&backend, "notes/listed-doc.md").await; let docs = backend .list_documents("default", None) From 71fb9bd8814fe094e5a80e72db5337af12c48e30 Mon Sep 17 00:00:00 2001 From: leynos Date: Thu, 16 Apr 2026 14:16:11 +0200 Subject: [PATCH 40/40] Test libsql embedding JSON helper Add a unit test for embedding_to_vector_json and expose the helper to sibling workspace tests via pub(super). The requested cargo test target names a non-existent package in this workspace, so I also validated the focused test against the real ironclaw crate. --- src/db/libsql/workspace/tests.rs | 25 ++++++++++++++++++++++++ src/db/libsql/workspace/vector_search.rs | 2 +- 2 files changed, 26 insertions(+), 1 deletion(-) diff --git a/src/db/libsql/workspace/tests.rs b/src/db/libsql/workspace/tests.rs index ac7c1c127..94b9dc9bb 100644 --- a/src/db/libsql/workspace/tests.rs +++ b/src/db/libsql/workspace/tests.rs @@ -123,6 +123,31 @@ fn test_deserialize_embedding_negative_values() { assert_embedding_approx_eq(&result, &[-1.5, 0.0, 2.75]); } +#[test] +fn test_embedding_to_vector_json_formats_floats_as_json_array() { + use super::vector_search::embedding_to_vector_json; + + let result = embedding_to_vector_json(&[1.0, -2.5, 0.0]); + + assert!( + result.starts_with('['), + "JSON array must start with '[', got: {result}" + ); + assert!( + result.ends_with(']'), + "JSON array must end with ']', got: {result}" + ); + // The negative float must be preserved faithfully. + assert!( + result.contains("-2.5") || result.contains("-2."), + "must serialise the negative float, got: {result}" + ); + + // An empty slice must produce "[]". + let empty = embedding_to_vector_json(&[]); + assert_eq!(empty, "[]", "empty embedding must serialise as '[]'"); +} + #[tokio::test] async fn get_chunks_without_embeddings_skips_invalid_chunk_id_uuid() { let backend = setup_backend().await; diff --git a/src/db/libsql/workspace/vector_search.rs b/src/db/libsql/workspace/vector_search.rs index a83de3d3e..35d08996c 100644 --- a/src/db/libsql/workspace/vector_search.rs +++ b/src/db/libsql/workspace/vector_search.rs @@ -204,7 +204,7 @@ impl LibSqlBackend { } } -fn embedding_to_vector_json(embedding: &[f32]) -> String { +pub(super) fn embedding_to_vector_json(embedding: &[f32]) -> String { format!( "[{}]", embedding