Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1,297 changes: 1,297 additions & 0 deletions docs/rfcs/recovery-snapshot-redesign.md

Large diffs are not rendered by default.

111 changes: 109 additions & 2 deletions server/src/routes/snapshots.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@ import {
createSnapshot,
getSnapshotPayload,
listSnapshots,
applyRetention,
getLatestSnapshotIndex,
type SnapshotResult,
} from "../snapshot";
import type { Env, JsonResponse } from "./types";
Expand Down Expand Up @@ -80,10 +82,95 @@ export async function handleSnapshotRoute(
return json({ error: "snapshots_unavailable" }, 503);
}

const snapshots = await listSnapshots(vaultId, env.YAOS_BUCKET);
const url = new URL(req.url);
const limitParam = url.searchParams.get("limit");
const limit = limitParam ? Math.min(Math.max(1, parseInt(limitParam, 10) || 50), 200) : 50;
const format = url.searchParams.get("format");

const { snapshots, totalIndexKeys, limited } = await listSnapshots(vaultId, env.YAOS_BUCKET, limit);

// Legacy compatibility: default response is { snapshots: [...] }
// which old clients destructure as `result.snapshots`.
// New clients can request ?format=v2 for richer metadata.
if (format === "v2") {
return json({
snapshots,
totalIndexKeys,
fetchedCount: snapshots.length,
limited,
});
}

// Default: legacy-compatible shape (old clients expect { snapshots })
return json({ snapshots });
}

if (req.method === "GET" && rest.length === 1 && rest[0] === "status") {
if (!env.YAOS_BUCKET) {
return json({ error: "snapshots_unavailable" }, 503);
}

const latest = await getLatestSnapshotIndex(vaultId, env.YAOS_BUCKET);
// Use a high limit but be honest that it's a lower bound.
const { snapshots: all, totalIndexKeys, limited } = await listSnapshots(vaultId, env.YAOS_BUCKET, 200);
const fetchedBytes = all.reduce((sum, s) => sum + s.crdtSizeBytes, 0);

const pinnedCount = all.filter((s) => s.pinned).length;

return json({
// New honest fields (prefer these in new clients)
snapshotCountLowerBound: totalIndexKeys,
listedSnapshotCount: all.length,
listingLimited: limited,
estimatedStorageBytesLowerBound: fetchedBytes,
pinnedCountLowerBound: pinnedCount,
// Legacy aliases (kept for old clients — same values, less honest names)
snapshotCount: totalIndexKeys,
estimatedStorageBytes: fetchedBytes,
pinnedCount,
// Common fields
latestSnapshotId: latest?.snapshotId ?? null,
latestCreatedAt: latest?.createdAt ?? null,
});
}

if (req.method === "POST" && rest.length === 1 && rest[0] === "prune") {
if (!env.YAOS_BUCKET) {
return json({ error: "snapshots_unavailable" }, 503);
}

let body: { pruneLegacy?: boolean; confirmLegacyPrune?: string } = {};
try {
body = await req.json();
} catch {
body = {};
}

// Safety latch: pruneLegacy requires explicit confirmation string.
// Legacy snapshots have unknown origin — deleting them is destructive
// and irreversible. Make it ugly on purpose.
const pruneLegacy = body.pruneLegacy === true &&
body.confirmLegacyPrune === "DELETE_LEGACY_SNAPSHOTS";

if (body.pruneLegacy === true && !pruneLegacy) {
return json({
error: "pruneLegacy requires confirmLegacyPrune: 'DELETE_LEGACY_SNAPSHOTS'",
}, 400);
}

const result = await applyRetention(vaultId, env.YAOS_BUCKET, undefined, {
pruneLegacy,
});
await options.recordVaultTrace(env, vaultId, "snapshot-retention-applied", {
kept: result.kept,
pruned: result.pruned,
failed: result.failed,
pruneLegacy: body.pruneLegacy === true,
errors: result.errors.slice(0, 10),
});
return json({ kept: result.kept, pruned: result.pruned, failed: result.failed });
}

if (req.method === "GET" && rest.length === 1) {
if (!env.YAOS_BUCKET) {
return json({ error: "snapshots_unavailable" }, 503);
Expand Down Expand Up @@ -127,16 +214,36 @@ async function createSnapshotFromLiveDoc(
};
}

const previous = await getLatestSnapshotIndex(vaultId, env.YAOS_BUCKET);

const update = await fetchVaultDocument(env, vaultId);
const doc = new Y.Doc();
if (update.byteLength > 0) {
Y.applyUpdate(doc, update);
}

const index = await createSnapshot(doc, vaultId, env.YAOS_BUCKET, triggeredBy);
const index = await createSnapshot(doc, vaultId, env.YAOS_BUCKET, {
triggeredBy,
reason: "manual",
pinned: true,
});

// Use fullUpdateHash for the "identical" check. This is meaningful:
// it means the entire CRDT state (including content and delete set) is
// byte-for-byte identical to the latest snapshot. Only then do we say
// "snapshot identical to latest."
const snapshotIdenticalToLatest = !!(
previous?.fullUpdateHash &&
index.fullUpdateHash &&
previous.fullUpdateHash === index.fullUpdateHash
);

return {
status: "created",
snapshotId: index.snapshotId,
index,
snapshotIdenticalToLatest,
// Legacy alias for old clients that check this field
semanticUnchanged: snapshotIdenticalToLatest,
};
}
96 changes: 90 additions & 6 deletions server/src/server.ts
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,10 @@ import { readRoomMeta, type RoomMeta, writeRoomMeta } from "./roomMeta";
import {
createSnapshot,
hasSnapshotForDay,
getLatestSnapshotIndex,
verifySnapshotExists,
computeFullUpdateHash,
applyRetention,
type SnapshotResult,
} from "./snapshot";
import {
Expand All @@ -20,6 +24,7 @@ import {
import { trySendSvEcho, type SvEchoSendResult } from "./svEcho";
import { isUpdateBearingSyncMessage } from "./syncMessageClassifier";
import { bytesToHex } from "./hex";
import { sha256Hex } from "./hex";
import {
PersistenceCoordinator,
type PersistenceHealth,
Expand Down Expand Up @@ -587,20 +592,99 @@ export class VaultSyncServer extends YServer {
} satisfies SnapshotResult;
}

const currentDay = new Date().toISOString().slice(0, 10);
if (await hasSnapshotForDay(this.getRoomId(), currentDay, bucket)) {
const vaultId = this.getRoomId();

// Dedup: skip if the full encoded CRDT (including delete set) is unchanged.
// We use fullUpdateHash because Yjs state vectors do NOT track deletions.
// A state-vector-only check would miss delete-only changes, which is
// catastrophic for a recovery system.
//
// Cost: O(doc size) to encode + hash. Acceptable at daily frequency.
const latest = await getLatestSnapshotIndex(vaultId, bucket);
if (latest?.fullUpdateHash) {
const rawUpdate = Y.encodeStateAsUpdate(this.document);
const currentHash = await sha256Hex(rawUpdate);
if (latest.fullUpdateHash === currentHash) {
// Before skipping: verify the pointed snapshot actually exists.
// A poisoned latest pointer (payload never written) would
// otherwise cause us to skip forever.
const exists = await verifySnapshotExists(vaultId, latest, bucket);
if (exists) {
return {
status: "noop",
reason: "No changes since last snapshot (full CRDT state identical)",
} satisfies SnapshotResult;
}
// Pointer is poisoned — fall through to create a new snapshot.
// The precomputed update is still valid, pass it along.
}
// Hash changed — create snapshot. Pass precomputed values to avoid re-encoding.
const index = await createSnapshot(
this.document,
vaultId,
bucket,
{
triggeredBy,
reason: "daily",
pinned: false,
precomputedRawUpdate: rawUpdate,
precomputedFullUpdateHash: currentHash,
},
);

// Retention: await so failures are observable.
try {
const retentionResult = await applyRetention(vaultId, bucket);
if (retentionResult.failed > 0) {
console.error(
`${LOG_PREFIX} retention: ${retentionResult.failed} delete(s) failed:`,
retentionResult.errors.slice(0, 5),
);
}
} catch (err) {
console.error(`${LOG_PREFIX} retention failed:`, err);
}

return {
status: "noop",
reason: `Snapshot already taken today (${currentDay})`,
status: "created",
snapshotId: index.snapshotId,
index,
} satisfies SnapshotResult;
} else if (latest?.stateVectorHash) {
// Transitional: old snapshot has stateVectorHash but no fullUpdateHash.
// Cannot safely skip — state vector misses deletes.
// Fall through to create a new snapshot with fullUpdateHash.
} else if (latest) {
// Ancient legacy path: no hash fields at all. Day-based dedup.
const currentDay = new Date().toISOString().slice(0, 10);
if (await hasSnapshotForDay(vaultId, currentDay, bucket)) {
return {
status: "noop",
reason: `Snapshot already taken today (${currentDay})`,
} satisfies SnapshotResult;
}
}

const index = await createSnapshot(
this.document,
this.getRoomId(),
vaultId,
bucket,
triggeredBy,
{ triggeredBy, reason: "daily", pinned: false },
);

// Retention: await so failures are observable.
try {
const retentionResult = await applyRetention(vaultId, bucket);
if (retentionResult.failed > 0) {
console.error(
`${LOG_PREFIX} retention: ${retentionResult.failed} delete(s) failed:`,
retentionResult.errors.slice(0, 5),
);
}
} catch (err) {
console.error(`${LOG_PREFIX} retention failed:`, err);
}

return {
status: "created",
snapshotId: index.snapshotId,
Expand Down
Loading
Loading