diff --git a/.changeset/fts-corrupt-vtab-fix.md b/.changeset/fts-corrupt-vtab-fix.md new file mode 100644 index 000000000..16cb1b767 --- /dev/null +++ b/.changeset/fts-corrupt-vtab-fix.md @@ -0,0 +1,9 @@ +--- +"emdash": patch +--- + +Fixes `SQLITE_CORRUPT_VTAB` (`database disk image is malformed`) when editing or publishing content on collections that have search enabled, and on restore-from-trash, permanent-delete, and edit-while-trashed flows. + +The FTS5 sync triggers used the contentless-table form (`DELETE FROM fts WHERE rowid = OLD.rowid`) on what is actually an external-content FTS5 table. After an UPDATE on `ec_`, FTS5 then read NEW column values from the (already updated) content table while trying to remove OLD tokens from the inverted index, drifting the index out of sync until SQLite refused further reads. Rewrites the triggers to use the documented external-content-safe `INSERT INTO fts(fts, rowid, ...) VALUES('delete', OLD.rowid, OLD.col1, ...)` pattern, gated on `OLD.deleted_at IS NULL` so we don't try to remove rows that were never indexed (which would itself raise `SQLITE_CORRUPT_VTAB` on restore-from-trash and permanent-delete). + +Adds migration `039_fix_fts5_triggers` that rebuilds the FTS index for every search-enabled collection on upgrade, replacing the broken triggers and recovering from any latent index corruption left behind by earlier mutations. The migration runs once at startup before the first request can hit the affected paths, so upgrading sites get the fix on their next deploy without depending on a search-endpoint visit to trigger lazy auto-repair. diff --git a/packages/core/src/database/migrations/039_fix_fts5_triggers.ts b/packages/core/src/database/migrations/039_fix_fts5_triggers.ts new file mode 100644 index 000000000..4c27b3b14 --- /dev/null +++ b/packages/core/src/database/migrations/039_fix_fts5_triggers.ts @@ -0,0 +1,252 @@ +import type { Kysely } from "kysely"; +import { sql } from "kysely"; + +import { isSqlite } from "../dialect-helpers.js"; +import { validateIdentifier } from "../validate.js"; + +/** + * Migration: Rebuild FTS5 indexes with corruption-safe triggers + * + * Background: FTS5 virtual tables created with `content='ec_'` are + * *external content* tables. Pre-fix versions of EmDash used the + * *contentless*-table sync pattern in their triggers + * (`DELETE FROM fts WHERE rowid = OLD.rowid`), which is unsafe for + * external-content FTS5 because FTS5 then reads the *current* row from + * `ec_` to compute the tokens to remove. By the time + * `AFTER UPDATE` fires the row already holds NEW values, so the wrong + * tokens are removed and the inverted index drifts out of sync until + * SQLite raises `SQLITE_CORRUPT_VTAB` on the next mutation + * (see issue #649, PR #768). + * + * The fix that ships in the same release rewrites the triggers to use + * the documented external-content-safe form: + * + * INSERT INTO fts(fts, rowid, col1, col2) + * VALUES('delete', OLD.rowid, OLD.col1, OLD.col2); + * + * gated on `OLD.deleted_at IS NULL` so we don't try to remove rows that + * were never indexed (which would itself raise `SQLITE_CORRUPT_VTAB`). + * + * This migration finalises the fix for *upgrading* sites. New triggers + * alone are not enough: existing sites still carry the broken triggers in + * `sqlite_master` and, in many cases, an already-corrupted FTS shadow + * index. We: + * + * 1. Find every collection with FTS enabled + * (`_emdash_collections.search_config -> enabled = true`). + * 2. Drop its three sync triggers and the FTS5 virtual table itself + * (which removes the shadow tables `_data`, `_idx`, + * `_docsize`, `_config`, `_content`). + * 3. Recreate the FTS5 table and triggers with the corruption-safe + * `'delete'` form. + * 4. Repopulate from `ec_ WHERE deleted_at IS NULL`. + * + * The trigger SQL emitted here MUST stay in lock-step with + * `FTSManager.createTriggers` in `src/search/fts-manager.ts`. The two + * code paths build the same triggers from the same field list; keeping + * them aligned means a fresh install (via FTSManager) and an upgraded + * install (via this migration) end up with identical schemas. If + * `createTriggers` ever changes again, add a new migration rather than + * editing this one -- migrations are forward-only. + * + * Postgres: no-op. FTS5 is SQLite-only; the `FTSManager` already + * short-circuits on non-SQLite, and there are no FTS tables to rebuild. + * + * D1: the migration is idempotent at the granularity we care about + * (drop-then-create + repopulate). A partial re-apply that gets as far + * as dropping the FTS table but not recreating it leaves the collection + * without an index; the next runtime call to `verifyAndRepairIndex` + * (e.g. on a search request) detects the missing table and rebuilds. + * Re-running this migration in full is also safe -- it always replaces + * the index from `ec_` content. + */ + +interface CollectionRow { + slug: string; + search_config: string | null; +} + +interface FieldRow { + slug: string; +} + +export async function up(db: Kysely): Promise { + if (!isSqlite(db)) return; + + const collections = await sql` + SELECT slug, search_config FROM _emdash_collections + WHERE search_config IS NOT NULL + `.execute(db); + + for (const collection of collections.rows) { + if (!isSearchEnabled(collection.search_config)) continue; + + // Slug came from `_emdash_collections.slug`, where the schema + // registry validates it against the same identifier rules on + // write. Re-validate here defensively before interpolating it + // into raw SQL -- this is a migration, so a malformed slug + // that somehow landed in the DB must not become an injection + // vector. + try { + validateIdentifier(collection.slug, "collection slug"); + } catch (error) { + console.warn( + `[migration 039] skipping FTS rebuild for collection "${collection.slug}": ${ + error instanceof Error ? error.message : String(error) + }`, + ); + continue; + } + + const fields = await getSearchableFields(db, collection.slug); + if (fields.length === 0) { + // `search_config.enabled = true` but no searchable fields: + // disable search and drop any orphan FTS objects. This + // matches FTSManager's "no fields -> disable" behavior. + await dropFtsObjects(db, collection.slug); + await sql` + UPDATE _emdash_collections + SET search_config = json_set(search_config, '$.enabled', json('false')) + WHERE slug = ${collection.slug} + `.execute(db); + continue; + } + + await rebuildIndex(db, collection.slug, fields); + } +} + +/** + * Forward-only migration. Down is a no-op: we cannot meaningfully + * "restore the broken triggers" and there is no migration-level state + * to roll back. The FTS tables themselves are managed by `FTSManager` + * at runtime, not by this migration, so leaving them in their + * corruption-safe state on rollback is correct. + */ +export async function down(_db: Kysely): Promise { + // no-op +} + +function isSearchEnabled(searchConfig: string | null): boolean { + if (!searchConfig) return false; + try { + const parsed: unknown = JSON.parse(searchConfig); + return ( + typeof parsed === "object" && + parsed !== null && + "enabled" in parsed && + (parsed as { enabled: unknown }).enabled === true + ); + } catch { + return false; + } +} + +async function getSearchableFields(db: Kysely, collectionSlug: string): Promise { + const rows = await sql` + SELECT f.slug FROM _emdash_fields f + INNER JOIN _emdash_collections c ON c.id = f.collection_id + WHERE c.slug = ${collectionSlug} AND f.searchable = 1 + `.execute(db); + + const out: string[] = []; + for (const row of rows.rows) { + try { + validateIdentifier(row.slug, "searchable field name"); + out.push(row.slug); + } catch { + console.warn( + `[migration 039] skipping invalid searchable field "${row.slug}" on collection "${collectionSlug}"`, + ); + } + } + return out; +} + +async function rebuildIndex( + db: Kysely, + collectionSlug: string, + fields: string[], +): Promise { + const ftsTable = `_emdash_fts_${collectionSlug}`; + const contentTable = `ec_${collectionSlug}`; + const columnList = ["id UNINDEXED", "locale UNINDEXED", ...fields].join(", "); + const fieldList = fields.join(", "); + const newFieldList = fields.map((f) => `NEW.${f}`).join(", "); + const oldFieldList = fields.map((f) => `OLD.${f}`).join(", "); + + await dropFtsObjects(db, collectionSlug); + + await sql + .raw(` + CREATE VIRTUAL TABLE "${ftsTable}" USING fts5( + ${columnList}, + content='${contentTable}', + content_rowid='rowid', + tokenize='porter unicode61' + ) + `) + .execute(db); + + // Insert trigger -- only index non-deleted content. + await sql + .raw(` + CREATE TRIGGER "${ftsTable}_insert" + AFTER INSERT ON "${contentTable}" + WHEN NEW.deleted_at IS NULL + BEGIN + INSERT INTO "${ftsTable}"(rowid, id, locale, ${fieldList}) + VALUES (NEW.rowid, NEW.id, NEW.locale, ${newFieldList}); + END + `) + .execute(db); + + // Update trigger -- corruption-safe external-content `'delete'` form, + // gated on OLD.deleted_at IS NULL so we never issue `'delete'` for a + // rowid that was never indexed. + await sql + .raw(` + CREATE TRIGGER "${ftsTable}_update" + AFTER UPDATE ON "${contentTable}" + BEGIN + INSERT INTO "${ftsTable}"("${ftsTable}", rowid, id, locale, ${fieldList}) + SELECT 'delete', OLD.rowid, OLD.id, OLD.locale, ${oldFieldList} + WHERE OLD.deleted_at IS NULL; + INSERT INTO "${ftsTable}"(rowid, id, locale, ${fieldList}) + SELECT NEW.rowid, NEW.id, NEW.locale, ${newFieldList} + WHERE NEW.deleted_at IS NULL; + END + `) + .execute(db); + + // Delete trigger -- same corruption-safe form, same gate. + await sql + .raw(` + CREATE TRIGGER "${ftsTable}_delete" + AFTER DELETE ON "${contentTable}" + BEGIN + INSERT INTO "${ftsTable}"("${ftsTable}", rowid, id, locale, ${fieldList}) + SELECT 'delete', OLD.rowid, OLD.id, OLD.locale, ${oldFieldList} + WHERE OLD.deleted_at IS NULL; + END + `) + .execute(db); + + // Populate from existing content (non-deleted rows only). + await sql + .raw(` + INSERT INTO "${ftsTable}"(rowid, id, locale, ${fieldList}) + SELECT rowid, id, locale, ${fieldList} FROM "${contentTable}" + WHERE deleted_at IS NULL + `) + .execute(db); +} + +async function dropFtsObjects(db: Kysely, collectionSlug: string): Promise { + const ftsTable = `_emdash_fts_${collectionSlug}`; + + await sql.raw(`DROP TRIGGER IF EXISTS "${ftsTable}_insert"`).execute(db); + await sql.raw(`DROP TRIGGER IF EXISTS "${ftsTable}_update"`).execute(db); + await sql.raw(`DROP TRIGGER IF EXISTS "${ftsTable}_delete"`).execute(db); + await sql.raw(`DROP TABLE IF EXISTS "${ftsTable}"`).execute(db); +} diff --git a/packages/core/src/database/migrations/runner.ts b/packages/core/src/database/migrations/runner.ts index 81102b200..64aefda37 100644 --- a/packages/core/src/database/migrations/runner.ts +++ b/packages/core/src/database/migrations/runner.ts @@ -39,6 +39,7 @@ import * as m035 from "./035_bounded_404_log.js"; import * as m036 from "./036_i18n_menus_and_taxonomies.js"; import * as m037 from "./037_credential_algorithm.js"; import * as m038 from "./038_registry_plugin_state.js"; +import * as m039 from "./039_fix_fts5_triggers.js"; const MIGRATIONS: Readonly> = Object.freeze({ "001_initial": m001, @@ -78,6 +79,7 @@ const MIGRATIONS: Readonly> = Object.freeze({ "036_i18n_menus_and_taxonomies": m036, "037_credential_algorithm": m037, "038_registry_plugin_state": m038, + "039_fix_fts5_triggers": m039, }); /** Total number of registered migrations. Exported for use in tests. */ diff --git a/packages/core/src/search/fts-manager.ts b/packages/core/src/search/fts-manager.ts index e6fece262..85e53b8c1 100644 --- a/packages/core/src/search/fts-manager.ts +++ b/packages/core/src/search/fts-manager.ts @@ -81,8 +81,13 @@ export class FTSManager { // id and locale are UNINDEXED (used for joining/filtering, not searched) const columns = ["id UNINDEXED", "locale UNINDEXED", ...searchableFields].join(", "); - // Create the FTS5 virtual table - // Using content= to make it a contentless FTS table (we manage sync ourselves) + // Create the FTS5 virtual table. + // `content=''` makes this an *external content* FTS5 table: + // the inverted index lives in the FTS shadow tables, but the actual + // row data lives in the backing content table. The triggers in + // `createTriggers` keep the index in sync; they MUST use the + // external-content-safe `'delete'` command (see notes there) to + // avoid `SQLITE_CORRUPT_VTAB` on UPDATE/DELETE. // tokenize='porter unicode61' enables stemming (run matches running, ran, etc.) await sql .raw(` @@ -106,13 +111,51 @@ export class FTSManager { * `deleted_at IS NULL`. This keeps soft-deleted content out of the * search index and ensures the FTS row count matches the non-deleted * content count (which `verifyAndRepairIndex` relies on). + * + * IMPORTANT: The FTS5 virtual table is created with `content='ec_'` + * which makes it an *external content* FTS5 table. For external-content + * tables, removing a row must use the documented `'delete'` command and + * supply the OLD column values explicitly, e.g.: + * + * INSERT INTO fts(fts, rowid, col1, col2) + * VALUES('delete', OLD.rowid, OLD.col1, OLD.col2); + * + * Using `DELETE FROM fts WHERE rowid = OLD.rowid` is the correct form + * for *contentless* tables but is unsafe for external-content tables: + * FTS5 then reads column values from the backing content table, which + * in an AFTER UPDATE trigger already holds the NEW values. The wrong + * tokens get removed and the inverted index drifts out of sync until + * SQLite raises `SQLITE_CORRUPT_VTAB` on the next mutation. See + * https://www.sqlite.org/fts5.html#external_content_tables. + * + * The UPDATE and DELETE triggers gate the `'delete'` on + * `OLD.deleted_at IS NULL` because the INSERT trigger never indexed + * rows that were already soft-deleted. Issuing `'delete'` for a rowid + * that was never inserted into the FTS index is itself a corruption + * trigger -- FTS5's `'delete'` is not a no-op on missing rowids and + * raises `SQLITE_CORRUPT_VTAB`. Affected paths include restore-from- + * trash (UPDATE where `OLD.deleted_at IS NOT NULL`), permanent-delete + * from trash (DELETE on a soft-deleted row), and any edit on a row + * that's currently in the trash. */ private async createTriggers(collectionSlug: string, searchableFields: string[]): Promise { this.validateInputs(collectionSlug, searchableFields); + if (searchableFields.length === 0) { + throw new Error( + `Cannot create FTS triggers for collection "${collectionSlug}": no searchable fields. ` + + `Mark at least one field as searchable before enabling search.`, + ); + } const ftsTable = this.getFtsTableName(collectionSlug); const contentTable = this.getContentTableName(collectionSlug); const fieldList = searchableFields.join(", "); const newFieldList = searchableFields.map((f) => `NEW.${f}`).join(", "); + // `'delete'` takes the FTS5 virtual table name as the first column, + // then the rowid being removed, then the OLD value of every column + // declared on the FTS5 table (in declaration order: id, locale, + // then each searchable field). + const oldFieldList = searchableFields.map((f) => `OLD.${f}`).join(", "); + // Insert trigger - only index non-deleted content await sql .raw(` @@ -126,15 +169,24 @@ export class FTSManager { `) .execute(this.db); - // Update trigger - always remove the old FTS row, only re-insert - // if the row is not soft-deleted. This handles both content edits - // and soft-delete operations (UPDATE SET deleted_at = ...). + // Update trigger - remove the old row from the FTS index using the + // external-content-safe `'delete'` command (which uses OLD column + // values, captured before the row was modified), then re-insert + // the new values when the row is still visible. + // + // `'delete'` is gated on `OLD.deleted_at IS NULL` because rows that + // were soft-deleted are not in the FTS index (the INSERT trigger + // skips them). Issuing `'delete'` for a missing rowid raises + // `SQLITE_CORRUPT_VTAB`, which would break restore-from-trash and + // edits to soft-deleted rows. await sql .raw(` CREATE TRIGGER IF NOT EXISTS "${ftsTable}_update" AFTER UPDATE ON "${contentTable}" BEGIN - DELETE FROM "${ftsTable}" WHERE rowid = OLD.rowid; + INSERT INTO "${ftsTable}"("${ftsTable}", rowid, id, locale, ${fieldList}) + SELECT 'delete', OLD.rowid, OLD.id, OLD.locale, ${oldFieldList} + WHERE OLD.deleted_at IS NULL; INSERT INTO "${ftsTable}"(rowid, id, locale, ${fieldList}) SELECT NEW.rowid, NEW.id, NEW.locale, ${newFieldList} WHERE NEW.deleted_at IS NULL; @@ -142,13 +194,18 @@ export class FTSManager { `) .execute(this.db); - // Delete trigger + // Delete trigger - same external-content-safe `'delete'` form, + // gated on `OLD.deleted_at IS NULL` for the same reason as the + // UPDATE trigger: permanent-delete from trash hits a row whose + // `deleted_at` is already set and which was never indexed. await sql .raw(` CREATE TRIGGER IF NOT EXISTS "${ftsTable}_delete" AFTER DELETE ON "${contentTable}" BEGIN - DELETE FROM "${ftsTable}" WHERE rowid = OLD.rowid; + INSERT INTO "${ftsTable}"("${ftsTable}", rowid, id, locale, ${fieldList}) + SELECT 'delete', OLD.rowid, OLD.id, OLD.locale, ${oldFieldList} + WHERE OLD.deleted_at IS NULL; END `) .execute(this.db); @@ -372,9 +429,15 @@ export class FTSManager { } /** - * Verify FTS index integrity and rebuild if corrupted. + * Verify FTS index integrity and rebuild if drift is detected. + * + * Cheap belt-and-braces check, run lazily on the first search request + * per isolate. The expensive cases (corrupted indexes from pre-fix + * EmDash versions, broken legacy triggers) are handled at boot time by + * migration `039_fix_fts5_triggers`, not here. This routine sticks to: * - * Checks for row count mismatch between content table and FTS table. + * 1. FTS table missing while config says search is enabled -> rebuild. + * 2. Row count mismatch between content table and FTS docsize -> rebuild. * * Returns true if the index was rebuilt, false if it was healthy. */ @@ -397,16 +460,15 @@ export class FTSManager { return true; } - // Check 1: Row count mismatch + // Row count parity check. For external-content FTS tables, COUNT(*) + // on the virtual table is answered from the backing content table + // (including soft-deleted rows), so we use the docsize shadow table + // which tracks rows actually present in the full-text index. const contentCount = await sql<{ count: number }>` SELECT COUNT(*) as count FROM ${sql.ref(contentTable)} WHERE deleted_at IS NULL `.execute(this.db); - // For external-content FTS tables, COUNT(*) on the virtual table is - // answered from the backing content table, including soft-deleted rows. - // The docsize shadow table tracks the rows actually present in the - // full-text index, which is what we need for repair decisions. const ftsCount = await sql<{ count: number }>` SELECT COUNT(*) as count FROM "${sql.raw(ftsDocsizeTable)}" `.execute(this.db); diff --git a/packages/core/tests/integration/database/migrations.test.ts b/packages/core/tests/integration/database/migrations.test.ts index 72d6e1bc2..17b145579 100644 --- a/packages/core/tests/integration/database/migrations.test.ts +++ b/packages/core/tests/integration/database/migrations.test.ts @@ -118,6 +118,7 @@ describe("Database Migrations (Integration)", () => { "036_i18n_menus_and_taxonomies", "037_credential_algorithm", "038_registry_plugin_state", + "039_fix_fts5_triggers", ]; await db.deleteFrom("_emdash_migrations").where("name", "in", trailing).execute(); diff --git a/packages/core/tests/integration/search/fts-corruption.test.ts b/packages/core/tests/integration/search/fts-corruption.test.ts new file mode 100644 index 000000000..88a132cf9 --- /dev/null +++ b/packages/core/tests/integration/search/fts-corruption.test.ts @@ -0,0 +1,326 @@ +/** + * Regression test for SQLITE_CORRUPT_VTAB on publish. + * + * Reproduces the corruption pattern reported in the issue: create a content + * item, publish it, edit and publish again. The FTS5 update trigger uses the + * external-content-safe `'delete'` command, so the index stays consistent and + * `pragma integrity_check` (which FTS5 hooks via `'integrity-check'`) does not + * report a malformed disk image. + */ + +import type { Kysely } from "kysely"; +import { sql } from "kysely"; +import { afterEach, beforeEach, describe, expect, it } from "vitest"; + +import { ContentRepository } from "../../../src/database/repositories/content.js"; +import type { Database } from "../../../src/database/types.js"; +import { SchemaRegistry } from "../../../src/schema/registry.js"; +import { FTSManager } from "../../../src/search/fts-manager.js"; +import { setupTestDatabase, teardownTestDatabase } from "../../utils/test-db.js"; + +describe("FTS corruption on publish (SQLITE_CORRUPT_VTAB)", () => { + let db: Kysely; + let registry: SchemaRegistry; + let repo: ContentRepository; + let ftsManager: FTSManager; + + beforeEach(async () => { + db = await setupTestDatabase(); + registry = new SchemaRegistry(db); + repo = new ContentRepository(db); + ftsManager = new FTSManager(db); + + // Mirror the default `pages` collection: search enabled, title + content + // (portableText, stored as JSON) both searchable. + await registry.createCollection({ + slug: "pages", + label: "Pages", + labelSingular: "Page", + supports: ["drafts", "revisions", "search"], + }); + await registry.createField("pages", { + slug: "title", + label: "Title", + type: "string", + required: true, + searchable: true, + }); + await registry.createField("pages", { + slug: "content", + label: "Content", + type: "portableText", + searchable: true, + }); + + await ftsManager.enableSearch("pages"); + }); + + afterEach(async () => { + await teardownTestDatabase(db); + }); + + it("does not corrupt the FTS index when a published page is edited and re-published", async () => { + const created = await repo.create({ + type: "pages", + slug: "about", + status: "draft", + data: { + title: "About", + content: [ + { + _type: "block", + _key: "a", + style: "normal", + children: [{ _type: "span", _key: "s", text: "Initial about page body." }], + }, + ], + }, + }); + + // First publish — promotes draft into the live columns and fires the + // update trigger on `ec_pages`. With the broken triggers from + // pre-fix versions this is the operation that begins corrupting + // the index. + await repo.publish("pages", created.id); + + // Edit the content (this is what the `publish` API route does after + // a draft is saved — it issues an UPDATE against `ec_pages`). The + // previously broken AFTER UPDATE trigger then issues + // `DELETE FROM fts WHERE rowid = OLD.rowid`, which reads NEW + // column values out of the (already updated) content table and + // corrupts the inverted index. + await repo.update("pages", created.id, { + data: { + title: "About v2", + content: [ + { + _type: "block", + _key: "a", + style: "normal", + children: [ + { + _type: "span", + _key: "s", + text: "Revised body with different searchable terms.", + }, + ], + }, + ], + }, + }); + + // FTS5 exposes integrity-check as a special command. With the broken + // triggers it throws SQLITE_CORRUPT_VTAB after the UPDATE above. + // This assertion is the actual regression test for the reported bug. + await expect( + sql + .raw(`INSERT INTO _emdash_fts_pages(_emdash_fts_pages) VALUES('integrity-check')`) + .execute(db), + ).resolves.toBeDefined(); + + // Republish — this is the call from the issue trace + // (`Content publish error: ... SQLITE_CORRUPT_VTAB`). With the + // broken triggers it throws on the first UPDATE inside `publish`. + await expect(repo.publish("pages", created.id)).resolves.toBeDefined(); + + // And one more integrity-check after the republish. + await expect( + sql + .raw(`INSERT INTO _emdash_fts_pages(_emdash_fts_pages) VALUES('integrity-check')`) + .execute(db), + ).resolves.toBeDefined(); + + // FTS docsize must still match the content table (one non-deleted row). + const docsize = await sql<{ count: number }>` + SELECT COUNT(*) as count FROM "_emdash_fts_pages_docsize" + `.execute(db); + expect(docsize.rows[0]?.count).toBe(1); + }); + + // Upgrade-from-pre-fix-version is covered by the migration test: + // `tests/unit/database/migrations/039_fix_fts5_triggers.test.ts`. + + it("survives the full trash lifecycle (soft-delete -> restore -> integrity-check)", async () => { + // The INSERT trigger only indexes rows where `deleted_at IS NULL`, so + // the UPDATE/DELETE triggers must gate `'delete'` on the same + // condition. Without that gate, restoring a row from trash issues + // `'delete'` for a rowid that's not in the FTS index, which raises + // `SQLITE_CORRUPT_VTAB`. Reproduces the regression introduced + // alongside the original fix. + const created = await repo.create({ + type: "pages", + slug: "trashable", + status: "draft", + data: { + title: "Trashable", + content: [ + { + _type: "block", + _key: "a", + style: "normal", + children: [{ _type: "span", _key: "s", text: "Body before trash." }], + }, + ], + }, + }); + + await repo.publish("pages", created.id); + + // Soft-delete -- this UPDATE moves OLD (in-index) -> NEW (deleted). + await expect(repo.delete("pages", created.id)).resolves.toBe(true); + await expect( + sql + .raw(`INSERT INTO _emdash_fts_pages(_emdash_fts_pages) VALUES('integrity-check')`) + .execute(db), + ).resolves.toBeDefined(); + + // Restore -- OLD has `deleted_at IS NOT NULL` (not in index), NEW + // has `deleted_at IS NULL`. The UPDATE trigger must NOT issue + // `'delete'` here. + await expect(repo.restore("pages", created.id)).resolves.toBe(true); + await expect( + sql + .raw(`INSERT INTO _emdash_fts_pages(_emdash_fts_pages) VALUES('integrity-check')`) + .execute(db), + ).resolves.toBeDefined(); + + // After restore the row should be back in the index. + const docsizeAfterRestore = await sql<{ count: number }>` + SELECT COUNT(*) as count FROM "_emdash_fts_pages_docsize" + `.execute(db); + expect(docsizeAfterRestore.rows[0]?.count).toBe(1); + + // Edit after restore -- exercises the normal indexed-OLD path again. + await repo.update("pages", created.id, { + data: { + title: "Restored and edited", + content: [ + { + _type: "block", + _key: "a", + style: "normal", + children: [{ _type: "span", _key: "s", text: "Body after restore." }], + }, + ], + }, + }); + await expect( + sql + .raw(`INSERT INTO _emdash_fts_pages(_emdash_fts_pages) VALUES('integrity-check')`) + .execute(db), + ).resolves.toBeDefined(); + }); + + it("survives permanent delete of a soft-deleted row without corrupting the index", async () => { + // `permanentDelete` is a hard `DELETE FROM ec_pages WHERE id = ?` + // applied to a row that's already soft-deleted (i.e. not in the FTS + // index). The DELETE trigger must not issue `'delete'` for this + // rowid -- otherwise SQLITE_CORRUPT_VTAB. + const created = await repo.create({ + type: "pages", + slug: "purgeable", + status: "draft", + data: { + title: "Purgeable", + content: [ + { + _type: "block", + _key: "a", + style: "normal", + children: [{ _type: "span", _key: "s", text: "Will be purged." }], + }, + ], + }, + }); + await repo.publish("pages", created.id); + await expect(repo.delete("pages", created.id)).resolves.toBe(true); + + // Permanent delete of a soft-deleted row. + await expect(repo.permanentDelete("pages", created.id)).resolves.toBe(true); + await expect( + sql + .raw(`INSERT INTO _emdash_fts_pages(_emdash_fts_pages) VALUES('integrity-check')`) + .execute(db), + ).resolves.toBeDefined(); + + const docsize = await sql<{ count: number }>` + SELECT COUNT(*) as count FROM "_emdash_fts_pages_docsize" + `.execute(db); + expect(docsize.rows[0]?.count).toBe(0); + }); + + it("survives editing a row while it's in the trash without corrupting the index", async () => { + // An UPDATE on a row whose OLD.deleted_at is set should not issue + // `'delete'` to the FTS index (the row was never indexed). The + // regression hit any UPDATE on a soft-deleted row, including the + // ones the API issues when restoring metadata. + const created = await repo.create({ + type: "pages", + slug: "edit-while-trashed", + status: "draft", + data: { + title: "Trashed", + content: [], + }, + }); + await repo.publish("pages", created.id); + await expect(repo.delete("pages", created.id)).resolves.toBe(true); + + // Update directly via SQL -- repo.update filters out deleted rows, + // but other paths (admin tooling, migrations) can UPDATE in place. + await sql` + UPDATE ec_pages + SET title = 'edited while trashed' + WHERE id = ${created.id} + `.execute(db); + + await expect( + sql + .raw(`INSERT INTO _emdash_fts_pages(_emdash_fts_pages) VALUES('integrity-check')`) + .execute(db), + ).resolves.toBeDefined(); + }); + + it("survives many edit/publish cycles without corrupting the index", async () => { + const created = await repo.create({ + type: "pages", + slug: "stress", + status: "draft", + data: { title: "Stress", content: [] }, + }); + + await repo.publish("pages", created.id); + + // Many cycles is what tends to surface FTS5 corruption -- a single + // bad UPDATE leaves a small amount of garbage; repeated bad UPDATEs + // compound it until SQLite refuses to read the segment. + for (let i = 0; i < 25; i++) { + await repo.update("pages", created.id, { + data: { + title: `Stress ${i}`, + content: [ + { + _type: "block", + _key: `b${i}`, + style: "normal", + children: [ + { + _type: "span", + _key: `s${i}`, + text: `Iteration ${i} unique-token-${i} alpha beta gamma`, + }, + ], + }, + ], + }, + }); + await repo.publish("pages", created.id); + } + + await expect( + sql + .raw(`INSERT INTO _emdash_fts_pages(_emdash_fts_pages) VALUES('integrity-check')`) + .execute(db), + ).resolves.toBeDefined(); + }); +}); diff --git a/packages/core/tests/unit/database/migrations/039_fix_fts5_triggers.test.ts b/packages/core/tests/unit/database/migrations/039_fix_fts5_triggers.test.ts new file mode 100644 index 000000000..fd9ff4053 --- /dev/null +++ b/packages/core/tests/unit/database/migrations/039_fix_fts5_triggers.test.ts @@ -0,0 +1,299 @@ +import type { Kysely } from "kysely"; +import { sql } from "kysely"; +import { afterEach, beforeEach, describe, expect, it } from "vitest"; + +import { ContentRepository } from "../../../../src/database/repositories/content.js"; +import type { Database } from "../../../../src/database/types.js"; +import { SchemaRegistry } from "../../../../src/schema/registry.js"; +import { FTSManager } from "../../../../src/search/fts-manager.js"; +import { setupTestDatabase, teardownTestDatabase } from "../../../utils/test-db.js"; + +/** + * Migration 039 rebuilds every FTS5 index with corruption-safe triggers. + * + * The "happy path" cases (fresh install with the fixed triggers in place) + * are covered by `tests/integration/search/fts-corruption.test.ts`. These + * tests exercise the migration directly against pre-fix state — the case + * a real upgrade hits. + */ + +describe("migration 039: rebuild FTS5 triggers", () => { + let db: Kysely; + let registry: SchemaRegistry; + let repo: ContentRepository; + + beforeEach(async () => { + db = await setupTestDatabase(); + registry = new SchemaRegistry(db); + repo = new ContentRepository(db); + }); + + afterEach(async () => { + await teardownTestDatabase(db); + }); + + /** + * Replace the FTS update + delete triggers on a collection with the + * broken pre-fix form so we can verify migration 039 fixes them. + * + * Mirrors exactly what `sqlite_master` looked like on every site that + * ran a pre-fix EmDash version: the contentless-table sync pattern + * (`DELETE FROM "" WHERE rowid = OLD.rowid`) installed on what is + * actually an external-content FTS5 table. + */ + async function installPreFixTriggers(collectionSlug: string, fields: string[]): Promise { + const ftsTable = `_emdash_fts_${collectionSlug}`; + const contentTable = `ec_${collectionSlug}`; + const fieldList = fields.join(", "); + const newFieldList = fields.map((f) => `NEW.${f}`).join(", "); + + await sql.raw(`DROP TRIGGER IF EXISTS "${ftsTable}_update"`).execute(db); + await sql.raw(`DROP TRIGGER IF EXISTS "${ftsTable}_delete"`).execute(db); + + await sql + .raw(` + CREATE TRIGGER "${ftsTable}_update" + AFTER UPDATE ON "${contentTable}" + BEGIN + DELETE FROM "${ftsTable}" WHERE rowid = OLD.rowid; + INSERT INTO "${ftsTable}"(rowid, id, locale, ${fieldList}) + SELECT NEW.rowid, NEW.id, NEW.locale, ${newFieldList} + WHERE NEW.deleted_at IS NULL; + END + `) + .execute(db); + + await sql + .raw(` + CREATE TRIGGER "${ftsTable}_delete" + AFTER DELETE ON "${contentTable}" + BEGIN + DELETE FROM "${ftsTable}" WHERE rowid = OLD.rowid; + END + `) + .execute(db); + } + + async function setupSearchEnabledPages(): Promise { + await registry.createCollection({ + slug: "pages", + label: "Pages", + labelSingular: "Page", + supports: ["drafts", "revisions", "search"], + }); + await registry.createField("pages", { + slug: "title", + label: "Title", + type: "string", + searchable: true, + }); + await registry.createField("pages", { + slug: "body", + label: "Body", + type: "text", + searchable: true, + }); + + const fts = new FTSManager(db); + await fts.enableSearch("pages"); + } + + async function runMigration039(): Promise { + const { up } = await import("../../../../src/database/migrations/039_fix_fts5_triggers.js"); + await up(db as unknown as Kysely); + } + + it("is a no-op on databases with no search-enabled collections", async () => { + // Empty DB — just the system tables, no `_emdash_collections` rows + // with a `search_config`. Migration must not throw. + await expect(runMigration039()).resolves.toBeUndefined(); + }); + + it("rebuilds the FTS index for every search-enabled collection", async () => { + await setupSearchEnabledPages(); + + const created = await repo.create({ + type: "pages", + slug: "about", + status: "published", + publishedAt: new Date().toISOString(), + data: { title: "About", body: "Some searchable body text." }, + }); + + // Simulate the pre-fix state: broken triggers + a published row. + // The legacy triggers are functional on INSERT (the contentless and + // external-content forms agree there), so the row is in the index + // at this point. The migration must replace the triggers without + // losing that row. + await installPreFixTriggers("pages", ["title", "body"]); + + await runMigration039(); + + // New triggers in place (no `DELETE FROM "" WHERE rowid = OLD.rowid`). + const triggers = await sql<{ name: string; sql: string }>` + SELECT name, sql FROM sqlite_master + WHERE type = 'trigger' AND tbl_name = 'ec_pages' + `.execute(db); + for (const trigger of triggers.rows) { + expect(trigger.sql).not.toMatch(/DELETE\s+FROM\s+"_emdash_fts_pages"\s+WHERE\s+rowid/i); + } + // The new triggers use the `'delete'` command. + const updateTrigger = triggers.rows.find((r) => r.name === "_emdash_fts_pages_update"); + expect(updateTrigger?.sql).toMatch(/'delete'/); + + // Content row is still in the index after rebuild. + const docsize = await sql<{ count: number }>` + SELECT COUNT(*) as count FROM "_emdash_fts_pages_docsize" + `.execute(db); + expect(docsize.rows[0]?.count).toBe(1); + + // And an edit + publish on the rebuilt index does not corrupt it + // (the regression PR #768 was originally trying to fix). + await repo.update("pages", created.id, { + data: { title: "About v2", body: "Revised body." }, + }); + await expect( + sql + .raw(`INSERT INTO _emdash_fts_pages(_emdash_fts_pages) VALUES('integrity-check')`) + .execute(db), + ).resolves.toBeDefined(); + }); + + it("repairs a database whose FTS index is already corrupted from earlier mutations", async () => { + await setupSearchEnabledPages(); + + const created = await repo.create({ + type: "pages", + slug: "corrupt-me", + status: "published", + publishedAt: new Date().toISOString(), + data: { title: "Corrupt me", body: "Original body." }, + }); + + // Install the broken triggers and then *fire them* by issuing the + // kind of UPDATE the publish path does. The broken trigger's + // `DELETE FROM fts WHERE rowid = OLD.rowid` on an external-content + // table reads NEW values from the content table and corrupts the + // inverted index. On vanilla SQLite this throws SQLITE_CORRUPT_VTAB + // directly; some builds let one bad update through silently and + // throw on the next. We accept both outcomes — the assertion is + // that the migration recovers the database either way. + await installPreFixTriggers("pages", ["title", "body"]); + try { + await sql` + UPDATE ec_pages SET title = 'Updated', body = 'Updated body.' WHERE id = ${created.id} + `.execute(db); + } catch { + // Expected on some SQLite builds — the corruption surfaces immediately. + } + + // Run the migration. This must succeed regardless of the corrupt state. + await runMigration039(); + + // After the migration the index is consistent. + await expect( + sql + .raw(`INSERT INTO _emdash_fts_pages(_emdash_fts_pages) VALUES('integrity-check')`) + .execute(db), + ).resolves.toBeDefined(); + + const docsize = await sql<{ count: number }>` + SELECT COUNT(*) as count FROM "_emdash_fts_pages_docsize" + `.execute(db); + expect(docsize.rows[0]?.count).toBe(1); + }); + + it("disables FTS cleanly when search_config.enabled is true but no fields are searchable", async () => { + // Edge case: search was enabled, then every searchable field was + // later unmarked, leaving an FTS table with no columns to index. + // The migration should drop the FTS table and flip search_config + // to enabled=false instead of recreating a useless index. + await registry.createCollection({ + slug: "items", + label: "Items", + labelSingular: "Item", + supports: ["search"], + }); + await registry.createField("items", { + slug: "title", + label: "Title", + type: "string", + searchable: true, + }); + + const fts = new FTSManager(db); + await fts.enableSearch("items"); + + // Now unmark the field as searchable directly in the DB so the + // rebuild path is exercised with `searchableFields.length === 0`. + await sql` + UPDATE _emdash_fields SET searchable = 0 + WHERE collection_id = (SELECT id FROM _emdash_collections WHERE slug = 'items') + `.execute(db); + + await runMigration039(); + + const ftsTableExists = await sql<{ count: number }>` + SELECT COUNT(*) as count FROM sqlite_master + WHERE type = 'table' AND name = '_emdash_fts_items' + `.execute(db); + expect(ftsTableExists.rows[0]?.count).toBe(0); + + const config = await sql<{ search_config: string | null }>` + SELECT search_config FROM _emdash_collections WHERE slug = 'items' + `.execute(db); + const parsed = JSON.parse(config.rows[0]?.search_config ?? "{}"); + expect(parsed.enabled).toBe(false); + }); + + it("ignores collections whose search_config has enabled=false", async () => { + await registry.createCollection({ + slug: "drafts", + label: "Drafts", + labelSingular: "Draft", + supports: ["search"], + }); + await registry.createField("drafts", { + slug: "title", + label: "Title", + type: "string", + searchable: true, + }); + + // Explicitly disabled (the normal state for collections that + // declare `supports: ['search']` but the operator never flipped + // on). The migration should not touch them. + await sql` + UPDATE _emdash_collections SET search_config = '{"enabled":false}' + WHERE slug = 'drafts' + `.execute(db); + + await expect(runMigration039()).resolves.toBeUndefined(); + }); + + it("is safe to re-run", async () => { + await setupSearchEnabledPages(); + await repo.create({ + type: "pages", + slug: "idempotent", + status: "published", + publishedAt: new Date().toISOString(), + data: { title: "Idempotent", body: "Body." }, + }); + + await runMigration039(); + await runMigration039(); + await runMigration039(); + + await expect( + sql + .raw(`INSERT INTO _emdash_fts_pages(_emdash_fts_pages) VALUES('integrity-check')`) + .execute(db), + ).resolves.toBeDefined(); + + const docsize = await sql<{ count: number }>` + SELECT COUNT(*) as count FROM "_emdash_fts_pages_docsize" + `.execute(db); + expect(docsize.rows[0]?.count).toBe(1); + }); +});