From 433e8512343608673b6c8b19705f968589c57ff3 Mon Sep 17 00:00:00 2001 From: Raphael Sourty Date: Wed, 17 Jun 2026 14:43:26 +0200 Subject: [PATCH] perf(filtering): make metadata delete re-sequencing fat-row-independent (#136) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Deleting a single file from a large fat metadata DB took seconds because `_subset_` is the `INTEGER PRIMARY KEY` (rowid): re-sequencing it on delete relocates every shifted row in the table b-tree, rewriting overflow pages for large TEXT columns. A `COUNT(*)` guard added a full-table scan on top. This keeps every column physically in METADATA (so a deployed next-plaid-api reads the index unchanged — `SELECT *` output is byte-identical) while removing the hotspot: - v1 layout: `_subset_` becomes a regular `INTEGER NOT NULL` column with a plain index instead of the rowid PK. Re-sequencing `UPDATE`s no longer relocate rows in the b-tree. All joins/FTS already key on the `_subset_` value, not the physical rowid, so behavior is unchanged. - Lazy, gated migration: legacy v0 indexes (where `_subset_` is the PK) rebuild to v1 once on the first delete; `PRAGMA user_version` makes the check O(1) thereafter. Pure row shuffle, no re-encoding. - Replace the `COUNT(*)` re-sequence guard with an O(1) `MAX(_subset_)`. Measured (single-file delete, code~24KB/row): - localized / recent deletes (high `_subset_`): ~100-180x faster (e.g. 50K rows 1.33GB: 1379ms -> 8ms) -- the common incremental-update case is now instant. - worst case (delete oldest content, shifts ~all rows): ~2-3x (10.8s -> 4.8s). Bounded by SQLite re-reading each shifted row's payload on UPDATE; going further needs a thin/fat split, which would break forward compatibility. Tests: dense re-sequencing, v0->v1 migration, and forward-compat column identity; full next-plaid suite green. Heavy benchmark added as an #[ignore]d test. Refs #136 --- next-plaid/src/filtering.rs | 333 ++++++++++++++++++++-- next-plaid/tests/metadata_delete_bench.rs | 122 ++++++++ 2 files changed, 439 insertions(+), 16 deletions(-) create mode 100644 next-plaid/tests/metadata_delete_bench.rs diff --git a/next-plaid/src/filtering.rs b/next-plaid/src/filtering.rs index 531d07a7..82fa7e41 100644 --- a/next-plaid/src/filtering.rs +++ b/next-plaid/src/filtering.rs @@ -49,6 +49,19 @@ const SQLITE_PARAM_LIMIT: usize = 900; /// Primary key column name (matches fast-plaid). pub(crate) const SUBSET_COLUMN: &str = "_subset_"; +/// Index over `_subset_` used by the fast-delete (v1) layout. +const SUBSET_INDEX_NAME: &str = "idx_metadata_subset"; + +/// `PRAGMA user_version` value marking the fast-delete metadata layout, in which +/// `_subset_` is a regular indexed column rather than the `INTEGER PRIMARY KEY` +/// (rowid). Re-sequencing on delete then updates only a small integer + its index +/// instead of relocating each (potentially multi-KB) row in the table b-tree. +/// +/// The `SELECT *` projection is unchanged versus the legacy (v0) layout — the +/// demoted `_subset_` still appears, and the implicit rowid is hidden — so older +/// binaries (e.g. a deployed next-plaid-api) read a v1 index without modification. +const METADATA_SCHEMA_V1: i64 = 1; + /// Validate that a column name is a safe SQL identifier. /// /// Column names must start with a letter or underscore, followed by @@ -686,15 +699,142 @@ fn validate_fixed_columns(columns: &[(&str, &str)]) -> Result<()> { Ok(()) } +/// Read the metadata schema version from `PRAGMA user_version` (0 = legacy). +fn metadata_schema_version(conn: &Connection) -> i64 { + conn.query_row("PRAGMA user_version", [], |row| row.get(0)) + .unwrap_or(0) +} + +/// Create the (non-unique) index over `_subset_` used by the v1 layout. +/// +/// Non-unique on purpose: the dense-0..N-1 uniqueness of `_subset_` is guaranteed +/// by construction (the caller's doc IDs and the delete re-sequencing math), and a +/// UNIQUE index could spuriously reject a transient state mid-`UPDATE` during the +/// range-shift re-sequencing. The index exists purely to make `WHERE _subset_ = ?` +/// / `IN (...)` and `MAX(_subset_)` cheap now that `_subset_` is not the rowid. +fn create_subset_index(conn: &Connection) -> Result<()> { + conn.execute( + &format!( + "CREATE INDEX IF NOT EXISTS \"{}\" ON METADATA (\"{}\")", + SUBSET_INDEX_NAME, SUBSET_COLUMN + ), + [], + )?; + Ok(()) +} + +/// Ensure the metadata DB uses the v1 fast-delete layout, migrating a legacy v0 +/// index in place if needed. +/// +/// A v0 index stores `_subset_` as the `INTEGER PRIMARY KEY` (rowid), so the +/// delete re-sequencing relocates every shifted row — rewriting overflow pages for +/// large TEXT columns. The migration rebuilds the table with `_subset_` demoted to +/// a regular indexed column. This is a one-time table copy (pure row shuffling, no +/// re-encoding) run lazily on the first delete; afterwards the layout is permanent +/// and `PRAGMA user_version` gates the check to O(1). +/// +/// Forward compatibility is preserved: the rebuilt table exposes exactly the same +/// columns under `SELECT *`, so an older binary reads it unchanged. +fn ensure_fast_subset_schema(conn: &Connection) -> Result<()> { + if metadata_schema_version(conn) >= METADATA_SCHEMA_V1 { + return Ok(()); + } + + let has_table: i64 = conn + .query_row( + "SELECT COUNT(*) FROM sqlite_master WHERE type='table' AND name='METADATA'", + [], + |row| row.get(0), + ) + .unwrap_or(0); + if has_table == 0 { + // Nothing to migrate; create() stamps the version for fresh indexes. + return Ok(()); + } + + // Inspect the live schema: is `_subset_` still the rowid PK (legacy)? + let mut cols: Vec<(String, String)> = Vec::new(); // (name, declared_type) + let mut subset_is_pk = false; + { + let mut stmt = conn.prepare("PRAGMA table_info(METADATA)")?; + let rows = stmt.query_map([], |row| { + Ok(( + row.get::<_, String>(1)?, // name + row.get::<_, String>(2)?, // type + row.get::<_, i64>(5)?, // pk + )) + })?; + for r in rows { + let (name, ty, pk) = r?; + if name == SUBSET_COLUMN && pk > 0 { + subset_is_pk = true; + } + cols.push((name, ty)); + } + } + + if !subset_is_pk { + // Already non-PK; just ensure the index + stamp the version. + create_subset_index(conn)?; + conn.execute_batch(&format!("PRAGMA user_version={}", METADATA_SCHEMA_V1))?; + return Ok(()); + } + + // Rebuild with `_subset_` as a plain column, preserving column order/types. + let col_defs: Vec = cols + .iter() + .map(|(name, ty)| { + if name == SUBSET_COLUMN { + format!("\"{}\" INTEGER NOT NULL", SUBSET_COLUMN) + } else { + let ty = if ty.trim().is_empty() { + "TEXT" + } else { + ty.as_str() + }; + format!("\"{}\" {}", name, ty) + } + }) + .collect(); + let all_cols = cols + .iter() + .map(|(name, _)| format!("\"{}\"", name)) + .collect::>() + .join(", "); + + conn.execute_batch("BEGIN")?; + conn.execute("ALTER TABLE METADATA RENAME TO _METADATA_V0", [])?; + conn.execute( + &format!("CREATE TABLE METADATA ({})", col_defs.join(", ")), + [], + )?; + conn.execute( + &format!( + "INSERT INTO METADATA ({0}) SELECT {0} FROM _METADATA_V0", + all_cols + ), + [], + )?; + create_subset_index(conn)?; + conn.execute("DROP TABLE _METADATA_V0", [])?; + conn.execute_batch(&format!("PRAGMA user_version={}", METADATA_SCHEMA_V1))?; + conn.execute_batch("COMMIT")?; + Ok(()) +} + fn create_fixed_metadata_table(conn: &Connection, columns: &[(&str, &str)]) -> Result<()> { // The caller has already committed to a stable schema, so we can skip the // generic "discover columns as we go" logic and create the table directly. - let mut col_defs = vec![format!("\"{}\" INTEGER PRIMARY KEY", SUBSET_COLUMN)]; + // `_subset_` is a regular column (v1 layout) so delete re-sequencing never + // relocates fat rows; a separate index keeps lookups fast. + let mut col_defs = vec![format!("\"{}\" INTEGER NOT NULL", SUBSET_COLUMN)]; for (name, sql_type) in columns { col_defs.push(format!("\"{}\" {}", name, sql_type)); } let create_sql = format!("CREATE TABLE METADATA ({})", col_defs.join(", ")); conn.execute(&create_sql, [])?; + create_subset_index(conn)?; + conn.execute_batch(&format!("PRAGMA user_version={}", METADATA_SCHEMA_V1))?; Ok(()) } @@ -908,8 +1048,9 @@ pub fn create(index_path: &str, metadata: &[Value], doc_ids: &[i64]) -> Result Result = std::iter::repeat_n("?", columns.len() + 1).collect(); @@ -954,6 +1102,9 @@ pub fn create(index_path: &str, metadata: &[Value], doc_ids: &[i64]) -> Result Result { let conn = open_db_write(&db_path)?; + // Migrate a legacy (v0) index to the fast-delete layout on the first delete, so + // the re-sequencing below updates a plain indexed column instead of relocating + // each fat row. No-op once the index is v1. + ensure_fast_subset_schema(&conn)?; + // Start transaction conn.execute("BEGIN", [])?; + // `_subset_` is contiguous 0..N-1 before this call, so MAX+1 is the pre-delete + // row count. Read via the `_subset_` index (O(log n)) rather than a COUNT(*) + // full scan. Used to discard stray out-of-range/non-existent delete ids whose + // shift counts would otherwise corrupt every survivor's id. + let original_count: i64 = conn + .query_row( + &format!( + "SELECT COALESCE(MAX(\"{}\"), -1) FROM METADATA", + SUBSET_COLUMN + ), + [], + |row| row.get::<_, i64>(0), + ) + .unwrap_or(-1) + + 1; + // Delete specified rows let (in_clause, in_params, temp_table) = crate::text_search::build_in_clause(&conn, subset)?; let delete_sql = format!( @@ -1154,22 +1326,13 @@ pub fn delete(index_path: &str, subset: &[i64]) -> Result { // Re-sequence _subset_ IDs to be contiguous 0-based. // Instead of copying the entire table (expensive for large tables with TEXT/BLOB - // columns), use range-based UPDATEs that only touch the integer primary key. - // Process gaps in ascending order so decremented values never collide. + // columns), use range-based UPDATEs. Because `_subset_` is now a regular column + // (not the rowid), each UPDATE rewrites only the small integer value and its + // index entry — the fat row stays put. Process gaps in ascending order so + // decremented values never collide. let mut sorted_ids: Vec = subset.to_vec(); sorted_ids.sort_unstable(); sorted_ids.dedup(); - // Keep ONLY the ids that were actually present and removed. The range-shift - // math below assumes `sorted_ids` is exactly the set of deleted `_subset_` - // values: a stray out-of-range or non-existent id would inflate the shift - // counts and corrupt every survivor's id. `_subset_` is contiguous 0..N-1 - // before this call, so the pre-delete row count is (survivors + deleted). - let original_count: i64 = conn - .query_row("SELECT COUNT(*) FROM METADATA", [], |row| { - row.get::<_, i64>(0) - }) - .unwrap_or(0) - + deleted as i64; sorted_ids.retain(|&id| id >= 0 && id < original_count); let max_id: i64 = conn @@ -2567,4 +2730,142 @@ mod tests { assert_eq!(count(&path).unwrap(), 80); } + + // ---- fast-delete (v1) layout: schema, re-sequencing, migration ---- + + fn meta_db(path: &str) -> Connection { + Connection::open(std::path::Path::new(path).join(METADATA_DB_NAME)).unwrap() + } + + fn user_version_of(path: &str) -> i64 { + meta_db(path) + .query_row("PRAGMA user_version", [], |r| r.get(0)) + .unwrap() + } + + fn subset_is_pk(path: &str) -> bool { + let c = meta_db(path); + let mut stmt = c.prepare("PRAGMA table_info(METADATA)").unwrap(); + let rows = stmt + .query_map([], |row| { + Ok((row.get::<_, String>(1)?, row.get::<_, i64>(5)?)) + }) + .unwrap(); + for r in rows { + let (name, pk) = r.unwrap(); + if name == SUBSET_COLUMN && pk > 0 { + return true; + } + } + false + } + + fn select_star_columns(path: &str) -> Vec { + let rows = get(path, None, &[], None).unwrap(); + rows[0].as_object().unwrap().keys().cloned().collect() + } + + #[test] + fn test_create_uses_v1_layout() { + let dir = setup_test_dir(); + let path = dir.path().to_str().unwrap(); + let meta: Vec = (0..5) + .map(|i| json!({"file": format!("f{i}.rs"), "code": format!("c{i}")})) + .collect(); + create(path, &meta, &(0..5).collect::>()).unwrap(); + + assert_eq!(user_version_of(path), 1); + assert!(!subset_is_pk(path), "v1: _subset_ must not be the PK/rowid"); + + // Forward-compat guarantee: SELECT * exposes exactly _subset_ + user + // columns and leaks no internal column, so older binaries read it unchanged. + let cols = select_star_columns(path); + assert!(cols.iter().any(|c| c == SUBSET_COLUMN)); + assert!(cols.iter().any(|c| c == "file") && cols.iter().any(|c| c == "code")); + assert!(!cols + .iter() + .any(|c| c == "rowid" || c.starts_with("_METADATA"))); + } + + #[test] + fn test_delete_resequences_dense() { + let dir = setup_test_dir(); + let path = dir.path().to_str().unwrap(); + let meta: Vec = (0..10) + .map(|i| json!({"file": format!("f{i}.rs")})) + .collect(); + create(path, &meta, &(0..10).collect::>()).unwrap(); + + assert_eq!(delete(path, &[2, 5, 7]).unwrap(), 3); + + let rows = get(path, None, &[], None).unwrap(); + assert_eq!(rows.len(), 7); + let expected = [ + "f0.rs", "f1.rs", "f3.rs", "f4.rs", "f6.rs", "f8.rs", "f9.rs", + ]; + for (i, row) in rows.iter().enumerate() { + assert_eq!( + row[SUBSET_COLUMN].as_i64().unwrap(), + i as i64, + "dense 0-based" + ); + assert_eq!( + row["file"].as_str().unwrap(), + expected[i], + "survivor order preserved" + ); + } + } + + #[test] + fn test_legacy_v0_index_migrates_on_delete() { + let dir = setup_test_dir(); + let path = dir.path().to_str().unwrap(); + + // Hand-build a legacy v0 index: `_subset_` is the INTEGER PRIMARY KEY and + // user_version is 0 — exactly what a deployed next-plaid-api produced. + { + let c = meta_db(path); + c.execute_batch("PRAGMA user_version=0;").unwrap(); + c.execute( + &format!( + "CREATE TABLE METADATA (\"{}\" INTEGER PRIMARY KEY, file TEXT, code TEXT)", + SUBSET_COLUMN + ), + [], + ) + .unwrap(); + for i in 0..10i64 { + c.execute( + "INSERT INTO METADATA VALUES (?, ?, ?)", + rusqlite::params![i, format!("f{i}.rs"), format!("c{i}")], + ) + .unwrap(); + } + } + assert!(subset_is_pk(path), "precondition: legacy v0 layout"); + assert_eq!(user_version_of(path), 0); + + // First delete migrates in place, then re-sequences. + assert_eq!(delete(path, &[3]).unwrap(), 1); + + assert_eq!(user_version_of(path), 1, "migrated to v1"); + assert!(!subset_is_pk(path), "_subset_ demoted from PK"); + + // Forward-compat: identical columns under SELECT *, nothing leaked. + let cols = select_star_columns(path); + assert!(!cols + .iter() + .any(|c| c == "rowid" || c.starts_with("_METADATA"))); + assert!(cols.iter().any(|c| c == "file") && cols.iter().any(|c| c == "code")); + + // Data intact, dense, survivor order preserved (f3 removed → f4 now at id 3). + let rows = get(path, None, &[], None).unwrap(); + assert_eq!(rows.len(), 9); + for (i, row) in rows.iter().enumerate() { + assert_eq!(row[SUBSET_COLUMN].as_i64().unwrap(), i as i64); + } + assert_eq!(rows[3]["file"].as_str().unwrap(), "f4.rs"); + assert_eq!(rows[3]["code"].as_str().unwrap(), "c4"); + } } diff --git a/next-plaid/tests/metadata_delete_bench.rs b/next-plaid/tests/metadata_delete_bench.rs new file mode 100644 index 00000000..eaba8d01 --- /dev/null +++ b/next-plaid/tests/metadata_delete_bench.rs @@ -0,0 +1,122 @@ +// Benchmark: real `filtering::delete` re-sequencing cost on a colgrep-shaped +// fat metadata DB, on current main (post-#139 range-UPDATE re-sequencing). +// +// Answers: did #139 already make single-file deletes acceptable, or does the +// fat-row relocation (because `_subset_` is the rowid) still dominate? +// +// cargo test --release --test metadata_delete_bench -- --nocapture +// +// METABENCH,,,,,, + +use next_plaid::filtering; +use serde_json::{json, Value}; +use std::time::Instant; +use tempfile::tempdir; + +fn fat_row(i: usize, code_bytes: usize) -> Value { + // `code` dominates row size and lands in SQLite overflow pages — the thing + // that gets rewritten when the rowid (_subset_) is relocated on re-sequence. + let code = format!( + "fn unit_{i} {{\n{}\n}}", + " let v = compute();\n".repeat(code_bytes / 22) + ); + json!({ + // thin / identity columns + "file": format!("src/mod_{}/file_{}.rs", i % 200, i), + "name": format!("unit_{i}"), + "qualified_name": format!("crate::mod_{}::unit_{i}", i % 200), + "line": (i % 2000) as i64, + "end_line": (i % 2000 + 40) as i64, + "language": "rust", + "unit_type": "function", + "complexity": (i % 30) as i64, + "has_loops": (i % 2) as i64, + "has_branches": i.is_multiple_of(3) as i64, + "has_error_handling": i.is_multiple_of(5) as i64, + // fat / content columns + "code": code, + "signature": format!("fn unit_{i}(a: u32, b: &str, c: Vec) -> Result"), + "docstring": "Performs the unit operation and returns its result. ".repeat(8), + "parameters": "[\"a: u32\", \"b: &str\", \"c: Vec\"]", + "calls": "[\"compute\",\"validate\",\"persist\",\"emit\"]", + "called_by": "[\"orchestrate\",\"main\"]", + "variables": "[\"v\",\"acc\",\"out\",\"err\"]", + "imports": "[\"std::fmt\",\"crate::error::Error\"]", + "return_type": "Result", + "extends": "", + "parent_class": "", + }) +} + +fn build_db(path: &str, n: usize, code_bytes: usize) { + let chunk = 1000; + let mut created = false; + for start in (0..n).step_by(chunk) { + let end = (start + chunk).min(n); + let meta: Vec = (start..end).map(|i| fat_row(i, code_bytes)).collect(); + let ids: Vec = (start..end).map(|i| i as i64).collect(); + if !created { + filtering::create(path, &meta, &ids).unwrap(); + created = true; + } else { + filtering::update(path, &meta, &ids).unwrap(); + } + } +} + +fn copy_db(from_dir: &std::path::Path, to_dir: &std::path::Path) { + for entry in std::fs::read_dir(from_dir).unwrap() { + let p = entry.unwrap().path(); + let name = p.file_name().unwrap(); + if name.to_string_lossy().starts_with("metadata.db") { + std::fs::copy(&p, to_dir.join(name)).unwrap(); + } + } +} + +#[test] +#[ignore = "heavy benchmark (builds multi-GB DBs); run manually: cargo test --release --test metadata_delete_bench -- --ignored --nocapture"] +fn bench_metadata_single_file_delete() { + // (N rows, code bytes/row) + let configs = [ + (20_000usize, 24 * 1024usize), + (50_000, 24 * 1024), + (50_000, 4 * 1024), + ]; + let file_units = 30usize; // a single file's worth of units to delete + + println!("METABENCH_HEADER,N,code_kb,db_mb,position,deleted,delete_ms"); + for &(n, code_bytes) in &configs { + let template = tempdir().unwrap(); + let tpath = template.path().to_str().unwrap(); + build_db(tpath, n, code_bytes); + let db_file = template.path().join("metadata.db"); + let db_mb = std::fs::metadata(&db_file) + .map(|m| m.len() as f64 / 1e6) + .unwrap_or(0.0); + + let positions: [(&str, usize); 3] = [("front", 0), ("mid", n / 2), ("end", n - file_units)]; + + for (label, start) in positions { + // fresh copy of the pristine fat DB so each position starts identical + let work = tempdir().unwrap(); + copy_db(template.path(), work.path()); + let wpath = work.path().to_str().unwrap(); + + let ids: Vec = (start..start + file_units).map(|i| i as i64).collect(); + let t = Instant::now(); + let deleted = filtering::delete(wpath, &ids).unwrap(); + let ms = t.elapsed().as_secs_f64() * 1000.0; + assert_eq!(deleted, file_units); + println!( + "METABENCH,{},{},{:.0},{},{},{:.2}", + n, + code_bytes / 1024, + db_mb, + label, + deleted, + ms + ); + } + } +}