From ca5120ccb1873de1b02395f8f6004e14c08a1c84 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Sat, 14 Mar 2026 01:45:24 +0000 Subject: [PATCH 1/5] Initial plan From ffd41edecbff68e593a1dcd2d276451ab79dae87 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Sat, 14 Mar 2026 02:02:09 +0000 Subject: [PATCH 2/5] =?UTF-8?q?Fix=20build=20errors,=20rename=20MetroidNei?= =?UTF-8?q?ghbor=E2=86=92SemanticNeighbor,=20fix=20getAllVolumes=20syntax,?= =?UTF-8?q?=20fix=20FullNeighborRecalc=20budget=20logic?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: devlux76 <86517969+devlux76@users.noreply.github.com> --- daydreamer/ClusterStability.ts | 54 +++++++++----------- daydreamer/FullNeighborRecalc.ts | 19 +++---- sharing/SubgraphExporter.ts | 4 +- storage/IndexedDbMetadataStore.ts | 3 ++ tests/benchmarks/HotpathScaling.bench.ts | 3 ++ tests/daydreamer/ClusterStability.test.ts | 38 +++++++------- tests/daydreamer/FullNeighborRecalc.test.ts | 35 +++++++------ tests/daydreamer/HebbianUpdater.test.ts | 19 +++---- tests/daydreamer/PrototypeRecomputer.test.ts | 19 +++---- tests/integration/Daydreamer.test.ts | 6 +-- tests/sharing/SubgraphExchange.test.ts | 19 +++---- 11 files changed, 113 insertions(+), 106 deletions(-) diff --git a/daydreamer/ClusterStability.ts b/daydreamer/ClusterStability.ts index 1420df1..1e50d65 100644 --- a/daydreamer/ClusterStability.ts +++ b/daydreamer/ClusterStability.ts @@ -1,22 +1,36 @@ // --------------------------------------------------------------------------- -// ClusterStability — Community detection via label propagation (P2-F) +// ClusterStability — Community detection via label propagation (P2-F) and +// volume split/merge for balanced cluster maintenance (P2-F3) // --------------------------------------------------------------------------- // // Assigns community labels to pages by running lightweight label propagation -// on the semantic (Metroid) neighbor graph. Labels are stored in +// on the semantic neighbor graph. Labels are stored in // PageActivity.communityId and propagate into SalienceEngine community quotas. // // Label propagation terminates when assignments stabilise (no label changes) // or a maximum iteration limit is reached. +// +// The Daydreamer background worker also calls ClusterStability periodically to +// detect and fix unstable volumes: +// - HIGH-VARIANCE volumes are split into two balanced sub-volumes. +// - LOW-COUNT volumes are merged into the nearest neighbour volume. +// - Community labels are updated after structural changes. // --------------------------------------------------------------------------- -import type { Hash, MetadataStore, PageActivity } from "../core/types"; +import { hashText } from "../core/crypto/hash"; +import type { + Book, + Hash, + MetadataStore, + PageActivity, + Volume, +} from "../core/types"; // --------------------------------------------------------------------------- -// Options +// Label propagation options // --------------------------------------------------------------------------- -export interface ClusterStabilityOptions { +export interface LabelPropagationOptions { metadataStore: MetadataStore; /** Maximum number of label propagation iterations. Default: 20. */ maxIterations?: number; @@ -55,7 +69,7 @@ async function propagationPass( const sorted = [...pageIds].sort(); for (const pageId of sorted) { - const neighbors = await metadataStore.getMetroidNeighbors(pageId); + const neighbors = await metadataStore.getSemanticNeighbors(pageId); if (neighbors.length === 0) continue; // Count neighbor labels @@ -103,7 +117,7 @@ async function propagationPass( * `MetadataStore.putPageActivity`. */ export async function runLabelPropagation( - options: ClusterStabilityOptions, + options: LabelPropagationOptions, ): Promise { const { metadataStore, @@ -200,32 +214,12 @@ export function detectEmptyCommunities( } } return empty; -// ClusterStability — Volume split/merge for balanced cluster maintenance +} + // --------------------------------------------------------------------------- -// -// The Daydreamer background worker calls ClusterStability periodically to -// detect and fix unstable volumes: -// -// - HIGH-VARIANCE volumes are split into two balanced sub-volumes using -// K-means with K=2 (one pass). -// - LOW-COUNT volumes are merged into the nearest neighbour volume -// (by medoid distance). -// - Community labels on PageActivity records are updated after structural -// changes so downstream salience computation stays coherent. -// -// All operations are idempotent: re-running on a stable set of volumes is a -// no-op. +// ClusterStability class — Volume split/merge configuration // --------------------------------------------------------------------------- -import { hashText } from "../core/crypto/hash"; -import type { - Book, - Hash, - MetadataStore, - PageActivity, - Volume, -} from "../core/types"; - // --------------------------------------------------------------------------- // Configuration // --------------------------------------------------------------------------- diff --git a/daydreamer/FullNeighborRecalc.ts b/daydreamer/FullNeighborRecalc.ts index a9e63e5..6c0e399 100644 --- a/daydreamer/FullNeighborRecalc.ts +++ b/daydreamer/FullNeighborRecalc.ts @@ -11,7 +11,7 @@ // pairwise comparisons (O(sqrt(t * log(1+t))) growth). // --------------------------------------------------------------------------- -import type { Hash, MetadataStore, MetroidNeighbor, Page, VectorStore } from "../core/types"; +import type { Hash, MetadataStore, SemanticNeighbor, Page, VectorStore } from "../core/types"; import { computeCapacity, DEFAULT_HOTPATH_POLICY, type HotpathPolicy } from "../core/HotpathPolicy"; import { batchComputeSalience, runPromotionSweep } from "../core/SalienceEngine"; @@ -61,7 +61,7 @@ function cosineSimilarity(a: Float32Array, b: Float32Array): number { /** * Run one cycle of full neighbor graph recalculation. * - * Finds all volumes flagged as dirty (via `needsMetroidRecalc`), loads + * Finds all volumes flagged as dirty (via `needsNeighborRecalc`), loads * their pages, computes pairwise cosine similarities, and updates the * Metroid neighbor index. Processing is bounded by the Williams-Bound-derived * maintenance budget to avoid blocking the idle loop. @@ -86,7 +86,7 @@ export async function runFullNeighborRecalc( await Promise.all( allVolumes.map(async (v) => ({ volume: v, - dirty: await metadataStore.needsMetroidRecalc(v.volumeId), + dirty: await metadataStore.needsNeighborRecalc(v.volumeId), })), ) ) @@ -122,7 +122,7 @@ export async function runFullNeighborRecalc( } if (volumePages.length === 0) { - await metadataStore.clearMetroidRecalcFlag(volume.volumeId); + await metadataStore.clearNeighborRecalcFlag(volume.volumeId); totalVolumesProcessed++; continue; } @@ -137,8 +137,9 @@ export async function runFullNeighborRecalc( // Compute pairwise similarities and build neighbor lists const pairsInVolume = volumePages.length * (volumePages.length - 1); const remainingBudget = pairBudget - totalPairsComputed; - if (pairsInVolume > remainingBudget) { - // Budget exhausted — leave this volume dirty for next cycle + if (pairsInVolume > remainingBudget && totalVolumesProcessed > 0) { + // Budget exhausted after processing at least one volume — defer the rest. + // We always process at least one volume per cycle to guarantee progress. break; } @@ -146,7 +147,7 @@ export async function runFullNeighborRecalc( const page = volumePages[i]; const vecI = vectors[i]; - const neighbors: MetroidNeighbor[] = []; + const neighbors: SemanticNeighbor[] = []; for (let j = 0; j < volumePages.length; j++) { if (i === j) continue; @@ -167,12 +168,12 @@ export async function runFullNeighborRecalc( ); const topNeighbors = neighbors.slice(0, maxNeighbors); - await metadataStore.putMetroidNeighbors(page.pageId, topNeighbors); + await metadataStore.putSemanticNeighbors(page.pageId, topNeighbors); affectedPageIds.add(page.pageId); } // Clear the dirty flag - await metadataStore.clearMetroidRecalcFlag(volume.volumeId); + await metadataStore.clearNeighborRecalcFlag(volume.volumeId); totalVolumesProcessed++; totalPagesProcessed += volumePages.length; } diff --git a/sharing/SubgraphExporter.ts b/sharing/SubgraphExporter.ts index 300852d..a32db9e 100644 --- a/sharing/SubgraphExporter.ts +++ b/sharing/SubgraphExporter.ts @@ -11,7 +11,7 @@ // --------------------------------------------------------------------------- import { randomUUID } from "../core/crypto/uuid"; -import type { Edge, Hash, MetadataStore, MetroidNeighbor, Page } from "../core/types"; +import type { Edge, Hash, MetadataStore, SemanticNeighbor, Page } from "../core/types"; import { filterEligible } from "./EligibilityClassifier"; import type { CuriosityProbe, SubgraphSlice } from "./types"; @@ -84,7 +84,7 @@ async function expandSeeds( if (collectedPages.length >= maxNodes) break; // Expand via Metroid (semantic) neighbors - const metroidNeighbors: MetroidNeighbor[] = await metadataStore.getMetroidNeighbors(pageId); + const metroidNeighbors: SemanticNeighbor[] = await metadataStore.getSemanticNeighbors(pageId); for (const n of metroidNeighbors) { if (!visited.has(n.neighborPageId) && collectedPages.length < maxNodes) { visited.add(n.neighborPageId); diff --git a/storage/IndexedDbMetadataStore.ts b/storage/IndexedDbMetadataStore.ts index ef1da1d..2ffb384 100644 --- a/storage/IndexedDbMetadataStore.ts +++ b/storage/IndexedDbMetadataStore.ts @@ -240,6 +240,9 @@ export class IndexedDbMetadataStore implements MetadataStore { const req = tx.objectStore(STORE.volumes).getAll(); req.onsuccess = () => resolve(req.result as Volume[]); req.onerror = () => reject(req.error); + }); + } + /** * Delete a volume and clean up its reverse-index entries: * - Removes the volume from the `bookToVolume` index for each of its books. diff --git a/tests/benchmarks/HotpathScaling.bench.ts b/tests/benchmarks/HotpathScaling.bench.ts index b38e13f..31a97a8 100644 --- a/tests/benchmarks/HotpathScaling.bench.ts +++ b/tests/benchmarks/HotpathScaling.bench.ts @@ -102,9 +102,12 @@ class BenchMetadataStore implements MetadataStore { async getBook(): Promise { return undefined; } async putVolume(): Promise { /* stub */ } async getVolume(): Promise { return undefined; } + async getAllVolumes(): Promise { return []; } async deleteVolume(): Promise { /* stub */ } async putShelf(): Promise { /* stub */ } async getShelf(): Promise { return undefined; } + async getAllShelves(): Promise { return []; } + async deleteEdge(): Promise { /* stub */ } async getBooksByPage(): Promise { return []; } async getVolumesByBook(): Promise { return []; } async getShelvesByVolume(): Promise { return []; } diff --git a/tests/daydreamer/ClusterStability.test.ts b/tests/daydreamer/ClusterStability.test.ts index 83cd31e..aa6ba8a 100644 --- a/tests/daydreamer/ClusterStability.test.ts +++ b/tests/daydreamer/ClusterStability.test.ts @@ -7,8 +7,6 @@ import { beforeEach, describe, expect, it } from "vitest"; -import { beforeEach, describe, expect, it } from "vitest"; - import { ClusterStability } from "../../daydreamer/ClusterStability"; import type { Book, @@ -16,8 +14,6 @@ import type { Hash, HotpathEntry, MetadataStore, - MetroidNeighbor, - MetroidSubgraph, SemanticNeighbor, SemanticNeighborSubgraph, Page, @@ -51,7 +47,7 @@ function makePage(pageId: Hash): Page { }; } -class MockMetadataStore implements MetadataStore { +class LabelPropMockStore implements MetadataStore { private pages = new Map(); private books = new Map(); private volumes = new Map(); @@ -59,7 +55,7 @@ class MockMetadataStore implements MetadataStore { private edgeMap = new Map(); private activities = new Map(); private hotpath = new Map(); - private metroidNeighbors = new Map(); + private semanticNeighbors = new Map(); private dirtyFlags = new Map(); async putPage(page: Page) { this.pages.set(page.pageId, page); } @@ -72,6 +68,7 @@ class MockMetadataStore implements MetadataStore { async putVolume(v: Volume) { this.volumes.set(v.volumeId, v); } async getVolume(id: Hash) { return this.volumes.get(id); } async getAllVolumes() { return [...this.volumes.values()]; } + async deleteVolume(id: Hash) { this.volumes.delete(id); } async putShelf(s: Shelf) { this.shelves.set(s.shelfId, s); } async getShelf(id: Hash) { return this.shelves.get(id); } @@ -87,15 +84,15 @@ class MockMetadataStore implements MetadataStore { async getVolumesByBook() { return []; } async getShelvesByVolume() { return []; } - async putMetroidNeighbors(pageId: Hash, neighbors: MetroidNeighbor[]) { - this.metroidNeighbors.set(pageId, [...neighbors]); + async putSemanticNeighbors(pageId: Hash, neighbors: SemanticNeighbor[]) { + this.semanticNeighbors.set(pageId, [...neighbors]); } - async getMetroidNeighbors(pageId: Hash) { return this.metroidNeighbors.get(pageId) ?? []; } - async getInducedMetroidSubgraph(): Promise { return { nodes: [], edges: [] }; } + async getSemanticNeighbors(pageId: Hash) { return this.semanticNeighbors.get(pageId) ?? []; } + async getInducedNeighborSubgraph(): Promise { return { nodes: [], edges: [] }; } - async needsMetroidRecalc(id: Hash) { return this.dirtyFlags.get(id) === true; } - async flagVolumeForMetroidRecalc(id: Hash) { this.dirtyFlags.set(id, true); } - async clearMetroidRecalcFlag(id: Hash) { this.dirtyFlags.set(id, false); } + async needsNeighborRecalc(id: Hash) { return this.dirtyFlags.get(id) === true; } + async flagVolumeForNeighborRecalc(id: Hash) { this.dirtyFlags.set(id, true); } + async clearNeighborRecalcFlag(id: Hash) { this.dirtyFlags.set(id, false); } async putHotpathEntry(entry: HotpathEntry) { this.hotpath.set(entry.entityId, { ...entry }); } async getHotpathEntries(tier?: HotpathEntry["tier"]) { @@ -122,16 +119,16 @@ class MockMetadataStore implements MetadataStore { // --------------------------------------------------------------------------- function addNeighbors( - store: MockMetadataStore, + store: LabelPropMockStore, pageId: Hash, neighborIds: Hash[], ): void { - const neighbors: MetroidNeighbor[] = neighborIds.map((id) => ({ + const neighbors: SemanticNeighbor[] = neighborIds.map((id) => ({ neighborPageId: id, cosineSimilarity: 0.9, distance: 0.1, })); - void store.putMetroidNeighbors(pageId, neighbors); + void store.putSemanticNeighbors(pageId, neighbors); } // --------------------------------------------------------------------------- @@ -139,10 +136,10 @@ function addNeighbors( // --------------------------------------------------------------------------- describe("runLabelPropagation", () => { - let store: MockMetadataStore; + let store: LabelPropMockStore; beforeEach(() => { - store = new MockMetadataStore(); + store = new LabelPropMockStore(); }); it("returns empty communityMap for empty store", async () => { @@ -299,6 +296,8 @@ describe("detectEmptyCommunities", () => { const known = new Set(["c1", "c2"]); const empty = detectEmptyCommunities(known, new Set()); expect(empty).toEqual(new Set(["c1", "c2"])); + }); +}); // --------------------------------------------------------------------------- // In-memory MetadataStore mock @@ -325,14 +324,17 @@ class MockMetadataStore implements MetadataStore { // Volumes async putVolume(volume: Volume): Promise { this.volumes.set(volume.volumeId, { ...volume }); } async getVolume(id: Hash): Promise { return this.volumes.get(id); } + async getAllVolumes(): Promise { return [...this.volumes.values()]; } async deleteVolume(volumeId: Hash): Promise { this.volumes.delete(volumeId); } // Shelves async putShelf(shelf: Shelf): Promise { this.shelves.set(shelf.shelfId, { ...shelf }); } async getShelf(id: Hash): Promise { return this.shelves.get(id); } + async getAllShelves(): Promise { return [...this.shelves.values()]; } // Edges async putEdges(edges: Edge[]): Promise { this.edges.push(...edges); } + async deleteEdge(from: Hash, to: Hash): Promise { this.edges = this.edges.filter((e) => !(e.fromPageId === from && e.toPageId === to)); } async getNeighbors(pageId: Hash): Promise { return this.edges.filter((e) => e.fromPageId === pageId); } diff --git a/tests/daydreamer/FullNeighborRecalc.test.ts b/tests/daydreamer/FullNeighborRecalc.test.ts index b19d0ab..18c0f7d 100644 --- a/tests/daydreamer/FullNeighborRecalc.test.ts +++ b/tests/daydreamer/FullNeighborRecalc.test.ts @@ -13,8 +13,8 @@ import type { Hash, HotpathEntry, MetadataStore, - MetroidNeighbor, - MetroidSubgraph, + SemanticNeighbor, + SemanticNeighborSubgraph, Page, PageActivity, Shelf, @@ -71,7 +71,7 @@ class FullMockMetadataStore implements MetadataStore { private edgeMap = new Map(); private activities = new Map(); private hotpath = new Map(); - private metroidNeighbors = new Map(); + private metroidNeighbors = new Map(); private dirtyFlags = new Map(); async putPage(page: Page) { this.pages.set(page.pageId, page); } @@ -84,6 +84,7 @@ class FullMockMetadataStore implements MetadataStore { async putVolume(v: Volume) { this.volumes.set(v.volumeId, v); } async getVolume(id: Hash) { return this.volumes.get(id); } async getAllVolumes() { return [...this.volumes.values()]; } + async deleteVolume(id: Hash) { this.volumes.delete(id); } async putShelf(s: Shelf) { this.shelves.set(s.shelfId, s); } async getShelf(id: Hash) { return this.shelves.get(id); } @@ -101,17 +102,17 @@ class FullMockMetadataStore implements MetadataStore { async getVolumesByBook() { return []; } async getShelvesByVolume() { return []; } - async putMetroidNeighbors(pageId: Hash, neighbors: MetroidNeighbor[]) { + async putSemanticNeighbors(pageId: Hash, neighbors: SemanticNeighbor[]) { this.metroidNeighbors.set(pageId, [...neighbors]); } - async getMetroidNeighbors(pageId: Hash) { + async getSemanticNeighbors(pageId: Hash) { return this.metroidNeighbors.get(pageId) ?? []; } - async getInducedMetroidSubgraph(): Promise { return { nodes: [], edges: [] }; } + async getInducedNeighborSubgraph(): Promise { return { nodes: [], edges: [] }; } - async needsMetroidRecalc(id: Hash) { return this.dirtyFlags.get(id) === true; } - async flagVolumeForMetroidRecalc(id: Hash) { this.dirtyFlags.set(id, true); } - async clearMetroidRecalcFlag(id: Hash) { this.dirtyFlags.set(id, false); } + async needsNeighborRecalc(id: Hash) { return this.dirtyFlags.get(id) === true; } + async flagVolumeForNeighborRecalc(id: Hash) { this.dirtyFlags.set(id, true); } + async clearNeighborRecalcFlag(id: Hash) { this.dirtyFlags.set(id, false); } async putHotpathEntry(entry: HotpathEntry) { this.hotpath.set(entry.entityId, { ...entry }); } async getHotpathEntries(tier?: HotpathEntry["tier"]) { @@ -131,7 +132,7 @@ class FullMockMetadataStore implements MetadataStore { async getPageActivity(id: Hash) { return this.activities.get(id); } isDirty(volumeId: Hash): boolean { return this.dirtyFlags.get(volumeId) === true; } - getMetroidNeighborsSync(pageId: Hash) { return this.metroidNeighbors.get(pageId) ?? []; } + getSemanticNeighborsSync(pageId: Hash) { return this.metroidNeighbors.get(pageId) ?? []; } } // --------------------------------------------------------------------------- @@ -181,7 +182,7 @@ async function buildStoreWithVolume( await store.putVolume(volume); if (dirty) { - await store.flagVolumeForMetroidRecalc(volumeId); + await store.flagVolumeForNeighborRecalc(volumeId); } return { store, vectorStore, volumeId }; @@ -206,12 +207,12 @@ describe("FullNeighborRecalc", () => { const { store, vectorStore } = await buildStoreWithVolume(3, true); // Initially no neighbors - expect(store.getMetroidNeighborsSync("page-0")).toHaveLength(0); + expect(store.getSemanticNeighborsSync("page-0")).toHaveLength(0); await runFullNeighborRecalc({ metadataStore: store, vectorStore, now: NOW }); // After recalc, each page should have neighbors - const neighbors = store.getMetroidNeighborsSync("page-0"); + const neighbors = store.getSemanticNeighborsSync("page-0"); expect(neighbors.length).toBeGreaterThan(0); expect(neighbors.length).toBeLessThanOrEqual(2); // 3 pages → max 2 neighbors each }); @@ -220,7 +221,7 @@ describe("FullNeighborRecalc", () => { const { store, vectorStore } = await buildStoreWithVolume(4, true); await runFullNeighborRecalc({ metadataStore: store, vectorStore, now: NOW }); - const neighbors = store.getMetroidNeighborsSync("page-0"); + const neighbors = store.getSemanticNeighborsSync("page-0"); for (let i = 1; i < neighbors.length; i++) { expect(neighbors[i - 1].cosineSimilarity).toBeGreaterThanOrEqual( neighbors[i].cosineSimilarity, @@ -238,7 +239,7 @@ describe("FullNeighborRecalc", () => { now: NOW, }); - const neighbors = store.getMetroidNeighborsSync("page-0"); + const neighbors = store.getSemanticNeighborsSync("page-0"); expect(neighbors.length).toBeLessThanOrEqual(2); }); @@ -253,7 +254,7 @@ describe("FullNeighborRecalc", () => { expect(result.volumesProcessed).toBe(0); expect(store.isDirty(volumeId)).toBe(false); - expect(store.getMetroidNeighborsSync("page-0")).toHaveLength(0); + expect(store.getSemanticNeighborsSync("page-0")).toHaveLength(0); }); it("batch pairsComputed does not exceed computeCapacity(graphMass)", async () => { @@ -286,7 +287,7 @@ describe("FullNeighborRecalc", () => { const { store, vectorStore } = await buildStoreWithVolume(2, true); await runFullNeighborRecalc({ metadataStore: store, vectorStore, now: NOW }); - const neighbors = store.getMetroidNeighborsSync("page-0"); + const neighbors = store.getSemanticNeighborsSync("page-0"); for (const n of neighbors) { expect(n.distance).toBeCloseTo(1 - n.cosineSimilarity); } diff --git a/tests/daydreamer/HebbianUpdater.test.ts b/tests/daydreamer/HebbianUpdater.test.ts index c9f9d84..d7d8488 100644 --- a/tests/daydreamer/HebbianUpdater.test.ts +++ b/tests/daydreamer/HebbianUpdater.test.ts @@ -13,8 +13,8 @@ import type { Hash, HotpathEntry, MetadataStore, - MetroidNeighbor, - MetroidSubgraph, + SemanticNeighbor, + SemanticNeighborSubgraph, Page, PageActivity, Shelf, @@ -58,7 +58,7 @@ class FullMockMetadataStore implements MetadataStore { private edgeMap = new Map(); private activities = new Map(); private hotpath = new Map(); - private metroidNeighbors = new Map(); + private metroidNeighbors = new Map(); private dirtyFlags = new Map(); async putPage(page: Page) { this.pages.set(page.pageId, page); } @@ -71,6 +71,7 @@ class FullMockMetadataStore implements MetadataStore { async putVolume(v: Volume) { this.volumes.set(v.volumeId, v); } async getVolume(id: Hash) { return this.volumes.get(id); } async getAllVolumes() { return [...this.volumes.values()]; } + async deleteVolume(id: Hash) { this.volumes.delete(id); } async putShelf(s: Shelf) { this.shelves.set(s.shelfId, s); } async getShelf(id: Hash) { return this.shelves.get(id); } @@ -92,17 +93,17 @@ class FullMockMetadataStore implements MetadataStore { async getVolumesByBook() { return []; } async getShelvesByVolume() { return []; } - async putMetroidNeighbors(pageId: Hash, neighbors: MetroidNeighbor[]) { + async putSemanticNeighbors(pageId: Hash, neighbors: SemanticNeighbor[]) { this.metroidNeighbors.set(pageId, neighbors); } - async getMetroidNeighbors(pageId: Hash) { + async getSemanticNeighbors(pageId: Hash) { return this.metroidNeighbors.get(pageId) ?? []; } - async getInducedMetroidSubgraph(): Promise { return { nodes: [], edges: [] }; } + async getInducedNeighborSubgraph(): Promise { return { nodes: [], edges: [] }; } - async needsMetroidRecalc(id: Hash) { return this.dirtyFlags.get(id) === true; } - async flagVolumeForMetroidRecalc(id: Hash) { this.dirtyFlags.set(id, true); } - async clearMetroidRecalcFlag(id: Hash) { this.dirtyFlags.set(id, false); } + async needsNeighborRecalc(id: Hash) { return this.dirtyFlags.get(id) === true; } + async flagVolumeForNeighborRecalc(id: Hash) { this.dirtyFlags.set(id, true); } + async clearNeighborRecalcFlag(id: Hash) { this.dirtyFlags.set(id, false); } async putHotpathEntry(entry: HotpathEntry) { this.hotpath.set(entry.entityId, { ...entry }); } async getHotpathEntries(tier?: HotpathEntry["tier"]) { diff --git a/tests/daydreamer/PrototypeRecomputer.test.ts b/tests/daydreamer/PrototypeRecomputer.test.ts index ce92060..4daa781 100644 --- a/tests/daydreamer/PrototypeRecomputer.test.ts +++ b/tests/daydreamer/PrototypeRecomputer.test.ts @@ -13,8 +13,8 @@ import type { Hash, HotpathEntry, MetadataStore, - MetroidNeighbor, - MetroidSubgraph, + SemanticNeighbor, + SemanticNeighborSubgraph, Page, PageActivity, Shelf, @@ -72,7 +72,7 @@ class FullMockMetadataStore implements MetadataStore { private edgeMap = new Map(); private activities = new Map(); private hotpath = new Map(); - private metroidNeighbors = new Map(); + private metroidNeighbors = new Map(); private dirtyFlags = new Map(); async putPage(page: Page) { this.pages.set(page.pageId, page); } @@ -91,6 +91,7 @@ class FullMockMetadataStore implements MetadataStore { async putVolume(v: Volume) { this.volumes.set(v.volumeId, v); } async getVolume(id: Hash) { return this.volumes.get(id); } async getAllVolumes() { return [...this.volumes.values()]; } + async deleteVolume(id: Hash) { this.volumes.delete(id); } async putShelf(s: Shelf) { this.shelves.set(s.shelfId, s); } async getShelf(id: Hash) { return this.shelves.get(id); } @@ -109,15 +110,15 @@ class FullMockMetadataStore implements MetadataStore { async getVolumesByBook() { return []; } async getShelvesByVolume() { return []; } - async putMetroidNeighbors(pageId: Hash, neighbors: MetroidNeighbor[]) { + async putSemanticNeighbors(pageId: Hash, neighbors: SemanticNeighbor[]) { this.metroidNeighbors.set(pageId, neighbors); } - async getMetroidNeighbors(pageId: Hash) { return this.metroidNeighbors.get(pageId) ?? []; } - async getInducedMetroidSubgraph(): Promise { return { nodes: [], edges: [] }; } + async getSemanticNeighbors(pageId: Hash) { return this.metroidNeighbors.get(pageId) ?? []; } + async getInducedNeighborSubgraph(): Promise { return { nodes: [], edges: [] }; } - async needsMetroidRecalc(id: Hash) { return this.dirtyFlags.get(id) === true; } - async flagVolumeForMetroidRecalc(id: Hash) { this.dirtyFlags.set(id, true); } - async clearMetroidRecalcFlag(id: Hash) { this.dirtyFlags.set(id, false); } + async needsNeighborRecalc(id: Hash) { return this.dirtyFlags.get(id) === true; } + async flagVolumeForNeighborRecalc(id: Hash) { this.dirtyFlags.set(id, true); } + async clearNeighborRecalcFlag(id: Hash) { this.dirtyFlags.set(id, false); } async putHotpathEntry(entry: HotpathEntry) { this.hotpath.set(entry.entityId, { ...entry }); } async getHotpathEntries(tier?: HotpathEntry["tier"]) { diff --git a/tests/integration/Daydreamer.test.ts b/tests/integration/Daydreamer.test.ts index 935c402..d4c3d88 100644 --- a/tests/integration/Daydreamer.test.ts +++ b/tests/integration/Daydreamer.test.ts @@ -158,13 +158,13 @@ describe("Daydreamer integration", () => { prototypeDim: EMBEDDING_DIM, variance: 0, }); - await metadataStore.flagVolumeForMetroidRecalc(volumeId); + await metadataStore.flagVolumeForNeighborRecalc(volumeId); - expect(await metadataStore.needsMetroidRecalc(volumeId)).toBe(true); + expect(await metadataStore.needsNeighborRecalc(volumeId)).toBe(true); await runFullNeighborRecalc({ metadataStore, vectorStore, now }); - expect(await metadataStore.needsMetroidRecalc(volumeId)).toBe(false); + expect(await metadataStore.needsNeighborRecalc(volumeId)).toBe(false); }); it("prototypes are updated after recompute", async () => { diff --git a/tests/sharing/SubgraphExchange.test.ts b/tests/sharing/SubgraphExchange.test.ts index cdc0265..d8b945c 100644 --- a/tests/sharing/SubgraphExchange.test.ts +++ b/tests/sharing/SubgraphExchange.test.ts @@ -15,8 +15,8 @@ import type { Hash, HotpathEntry, MetadataStore, - MetroidNeighbor, - MetroidSubgraph, + SemanticNeighbor, + SemanticNeighborSubgraph, Page, PageActivity, Shelf, @@ -66,7 +66,7 @@ class FullMockMetadataStore implements MetadataStore { private edgeMap = new Map(); private activities = new Map(); private hotpath = new Map(); - private metroidNeighbors = new Map(); + private metroidNeighbors = new Map(); private dirtyFlags = new Map(); async putPage(page: Page) { this.pages.set(page.pageId, page); } @@ -79,6 +79,7 @@ class FullMockMetadataStore implements MetadataStore { async putVolume(v: Volume) { this.volumes.set(v.volumeId, v); } async getVolume(id: Hash) { return this.volumes.get(id); } async getAllVolumes() { return [...this.volumes.values()]; } + async deleteVolume(id: Hash) { this.volumes.delete(id); } async putShelf(s: Shelf) { this.shelves.set(s.shelfId, s); } async getShelf(id: Hash) { return this.shelves.get(id); } @@ -94,15 +95,15 @@ class FullMockMetadataStore implements MetadataStore { async getVolumesByBook() { return []; } async getShelvesByVolume() { return []; } - async putMetroidNeighbors(pageId: Hash, neighbors: MetroidNeighbor[]) { + async putSemanticNeighbors(pageId: Hash, neighbors: SemanticNeighbor[]) { this.metroidNeighbors.set(pageId, neighbors); } - async getMetroidNeighbors(pageId: Hash) { return this.metroidNeighbors.get(pageId) ?? []; } - async getInducedMetroidSubgraph(): Promise { return { nodes: [], edges: [] }; } + async getSemanticNeighbors(pageId: Hash) { return this.metroidNeighbors.get(pageId) ?? []; } + async getInducedNeighborSubgraph(): Promise { return { nodes: [], edges: [] }; } - async needsMetroidRecalc(id: Hash) { return this.dirtyFlags.get(id) === true; } - async flagVolumeForMetroidRecalc(id: Hash) { this.dirtyFlags.set(id, true); } - async clearMetroidRecalcFlag(id: Hash) { this.dirtyFlags.set(id, false); } + async needsNeighborRecalc(id: Hash) { return this.dirtyFlags.get(id) === true; } + async flagVolumeForNeighborRecalc(id: Hash) { this.dirtyFlags.set(id, true); } + async clearNeighborRecalcFlag(id: Hash) { this.dirtyFlags.set(id, false); } async putHotpathEntry(entry: HotpathEntry) { this.hotpath.set(entry.entityId, { ...entry }); } async getHotpathEntries(tier?: HotpathEntry["tier"]) { From e0447fc523f6f1d8de0c7924e3c9acf52089bf00 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Sat, 14 Mar 2026 02:11:37 +0000 Subject: [PATCH 3/5] P1 implementations: Williams-bound maxDegree, dynamic subgraph bounds, HierarchyBuilder quota enforcement, fix pre-existing lint errors Co-authored-by: devlux76 <86517969+devlux76@users.noreply.github.com> --- core/HotpathPolicy.ts | 109 +++++++++++++++++++++++++++++ cortex/Query.ts | 20 ++++-- daydreamer/FullNeighborRecalc.ts | 7 +- daydreamer/PrototypeRecomputer.ts | 6 +- hippocampus/FastNeighborInsert.ts | 25 +++++-- hippocampus/HierarchyBuilder.ts | 102 ++++++++++++++++++++++++++- tests/HotpathPolicy.test.ts | 111 ++++++++++++++++++++++++++++++ 7 files changed, 359 insertions(+), 21 deletions(-) diff --git a/core/HotpathPolicy.ts b/core/HotpathPolicy.ts index 6362755..d8368f7 100644 --- a/core/HotpathPolicy.ts +++ b/core/HotpathPolicy.ts @@ -194,3 +194,112 @@ export function deriveCommunityQuotas( for (let i = 0; i < n; i++) quotas[i] += floors[i]; return quotas; } + +// --------------------------------------------------------------------------- +// Semantic neighbor degree limit — Williams-bound derived +// --------------------------------------------------------------------------- + +// Bootstrap floor for Williams-bound log formulas: ensures t_eff ≥ 2 so that +// log₂(t_eff) > 0 and log₂(log₂(1+t_eff)) is defined and positive. +const MIN_GRAPH_MASS_FOR_LOGS = 2; + +/** + * Compute the Williams-bound-derived maximum degree for the semantic neighbor + * graph given a corpus of `graphMass` total pages. + * + * The degree limit uses the same H(t) formula as the hotpath capacity but is + * bounded by a hard cap to keep the graph sparse. At small corpora the + * Williams formula naturally returns small values (e.g. 1–5 for t < 10); + * at large corpora the `hardCap` clamps growth to prevent the graph becoming + * too dense. + * + * @param graphMass Total number of pages in the corpus. + * @param c Williams Bound scaling constant (default from policy). + * @param hardCap Maximum degree regardless of formula result. Default: 32. + */ +export function computeNeighborMaxDegree( + graphMass: number, + c: number = DEFAULT_HOTPATH_POLICY.c, + hardCap = 32, +): number { + const derived = computeCapacity(graphMass, c); + return Math.min(hardCap, Math.max(1, derived)); +} + +// --------------------------------------------------------------------------- +// Dynamic subgraph expansion bounds — Williams-bound derived +// --------------------------------------------------------------------------- + +export interface SubgraphBounds { + /** Maximum number of nodes to include in the induced subgraph. */ + maxSubgraphSize: number; + /** Maximum BFS hops from seed nodes. */ + maxHops: number; + /** Maximum fanout per hop (branching factor). */ + perHopBranching: number; +} + +/** + * Compute dynamic Williams-derived bounds for subgraph expansion (step 9 of + * the Cortex query path). + * + * Formulas from DESIGN.md "Dynamic Subgraph Expansion Bounds": + * + * t_eff = max(t, 2) + * maxSubgraphSize = min(30, ⌊√(t_eff · log₂(1+t_eff)) / log₂(t_eff)⌋) + * maxHops = max(1, ⌈log₂(log₂(1 + t_eff))⌉) + * perHopBranching = max(1, ⌊maxSubgraphSize ^ (1/maxHops)⌋) + * + * The bootstrap floor `t_eff = max(t, 2)` eliminates division-by-zero for + * t ≤ 1 and ensures a safe minimum of `maxSubgraphSize=1, maxHops=1`. + * + * @param graphMass Total number of pages in the corpus. + */ +export function computeSubgraphBounds(graphMass: number): SubgraphBounds { + const tEff = Math.max(graphMass, MIN_GRAPH_MASS_FOR_LOGS); + const log2tEff = Math.log2(tEff); + + const maxSubgraphSize = Math.min( + 30, + Math.floor(Math.sqrt(tEff * Math.log2(1 + tEff)) / log2tEff), + ); + + const maxHops = Math.max(1, Math.ceil(Math.log2(Math.log2(1 + tEff)))); + + const perHopBranching = Math.max( + 1, + Math.floor(Math.pow(maxSubgraphSize, 1 / maxHops)), + ); + + return { + maxSubgraphSize: Math.max(1, maxSubgraphSize), + maxHops, + perHopBranching, + }; +} + +// --------------------------------------------------------------------------- +// Williams-derived hierarchy fanout limit +// --------------------------------------------------------------------------- + +/** + * Compute the Williams-derived fanout limit for a hierarchy node that + * currently has `childCount` children. + * + * Per DESIGN.md "Sublinear Fanout Bounds": + * Max children = O(√(childCount · log childCount)) + * + * The formula is evaluated with a bootstrap floor of t_eff = max(t, 2) to + * avoid log(0) and returns at least 1 child. + * + * @param childCount Current number of children for the parent node. + * @param c Williams Bound scaling constant. + */ +export function computeFanoutLimit( + childCount: number, + c: number = DEFAULT_HOTPATH_POLICY.c, +): number { + const tEff = Math.max(childCount, MIN_GRAPH_MASS_FOR_LOGS); + const raw = c * Math.sqrt(tEff * Math.log2(1 + tEff)); + return Math.max(1, Math.ceil(raw)); +} diff --git a/cortex/Query.ts b/cortex/Query.ts index 610a737..28283cf 100644 --- a/cortex/Query.ts +++ b/cortex/Query.ts @@ -2,6 +2,7 @@ import type { ModelProfile } from "../core/ModelProfile"; import type { Hash, MetadataStore, Page, VectorStore } from "../core/types"; import type { EmbeddingRunner } from "../embeddings/EmbeddingRunner"; import { runPromotionSweep } from "../core/SalienceEngine"; +import { computeSubgraphBounds } from "../core/HotpathPolicy"; import type { QueryResult } from "./QueryResult"; import { rankPages, spillToWarm } from "./Ranking"; import { buildMetroid } from "./MetroidBuilder"; @@ -14,9 +15,13 @@ export interface QueryOptions { vectorStore: VectorStore; metadataStore: MetadataStore; topK?: number; - /** BFS depth for semantic neighbor subgraph expansion. 2 hops covers direct - * neighbors and their neighbors, which is the minimum needed to surface - * bridge nodes without exploding the graph size. */ + /** + * Maximum BFS depth for semantic neighbor subgraph expansion. + * + * When omitted, a dynamic Williams-derived value is computed from the + * corpus size via `computeSubgraphBounds(t)`. Providing an explicit value + * overrides the dynamic bound (useful for tests and controlled experiments). + */ maxHops?: number; } @@ -30,7 +35,6 @@ export async function query( vectorStore, metadataStore, topK = 10, - maxHops = 2, } = options; const nowIso = new Date().toISOString(); @@ -116,8 +120,14 @@ export async function query( ); // --- Subgraph expansion --- + // Use dynamic Williams-derived bounds unless the caller has pinned an + // explicit maxHops value. The bounds are derived from the current corpus + // size so expansion cost stays sublinear as the graph grows. const topPageIds = topPages.map((p) => p.pageId); - const subgraph = await metadataStore.getInducedNeighborSubgraph(topPageIds, maxHops); + const allPages = await metadataStore.getAllPages(); + const subgraphBounds = computeSubgraphBounds(allPages.length); + const effectiveMaxHops = options.maxHops ?? subgraphBounds.maxHops; + const subgraph = await metadataStore.getInducedNeighborSubgraph(topPageIds, effectiveMaxHops); // --- TSP coherence path --- const coherencePath = solveOpenTSP(subgraph); diff --git a/daydreamer/FullNeighborRecalc.ts b/daydreamer/FullNeighborRecalc.ts index 6c0e399..dfc6c0e 100644 --- a/daydreamer/FullNeighborRecalc.ts +++ b/daydreamer/FullNeighborRecalc.ts @@ -137,9 +137,10 @@ export async function runFullNeighborRecalc( // Compute pairwise similarities and build neighbor lists const pairsInVolume = volumePages.length * (volumePages.length - 1); const remainingBudget = pairBudget - totalPairsComputed; - if (pairsInVolume > remainingBudget && totalVolumesProcessed > 0) { - // Budget exhausted after processing at least one volume — defer the rest. - // We always process at least one volume per cycle to guarantee progress. + // Always process at least one volume per cycle to guarantee forward + // progress; budget exhaustion only defers additional volumes. + const budgetExhausted = pairsInVolume > remainingBudget && totalVolumesProcessed > 0; + if (budgetExhausted) { break; } diff --git a/daydreamer/PrototypeRecomputer.ts b/daydreamer/PrototypeRecomputer.ts index 9770b69..5dac5b2 100644 --- a/daydreamer/PrototypeRecomputer.ts +++ b/daydreamer/PrototypeRecomputer.ts @@ -12,7 +12,7 @@ import type { Hash, HotpathEntry, MetadataStore, Shelf, Volume, VectorStore } from "../core/types"; import { DEFAULT_HOTPATH_POLICY, type HotpathPolicy } from "../core/HotpathPolicy"; -import { batchComputeSalience, runPromotionSweep } from "../core/SalienceEngine"; +import { runPromotionSweep } from "../core/SalienceEngine"; // --------------------------------------------------------------------------- // Helpers @@ -119,8 +119,6 @@ async function recomputeVolumePrototypes( const { metadataStore, vectorStore, - policy = DEFAULT_HOTPATH_POLICY, - now = Date.now(), } = options; const allVolumes = await metadataStore.getAllVolumes(); @@ -144,8 +142,6 @@ async function recomputeVolumePrototypes( if (pageEntries.length === 0) continue; const vectors = pageEntries.map((e) => e.vector); - const medoidIdx = selectMedoidIndex(vectors); - const medoidPageId = pageEntries[medoidIdx].pageId; const centroidVec = computeCentroid(vectors); // Append centroid to vector store diff --git a/hippocampus/FastNeighborInsert.ts b/hippocampus/FastNeighborInsert.ts index 6334faf..2678d1c 100644 --- a/hippocampus/FastNeighborInsert.ts +++ b/hippocampus/FastNeighborInsert.ts @@ -1,12 +1,16 @@ import type { Hash, MetadataStore, SemanticNeighbor, VectorStore } from "../core/types"; import type { ModelProfile } from "../core/ModelProfile"; import type { HotpathPolicy } from "../core/HotpathPolicy"; +import { computeNeighborMaxDegree } from "../core/HotpathPolicy"; import { runPromotionSweep } from "../core/SalienceEngine"; -// Policy constants, not model-derived. -// 16 neighbors keeps the graph sparse while giving enough connectivity for BFS. -// 0.5 cosine distance (≥0.5 similarity) filters noise without losing near-duplicates. -const DEFAULT_MAX_DEGREE = 16; +// Hard cap for the semantic neighbor degree: even if the Williams formula +// returns a higher value, we never allow a node to have more than this many +// semantic neighbors. Kept as a policy constant (not model-derived). +const NEIGHBOR_DEGREE_HARD_CAP = 32; + +// Default cosine-distance cutoff when no policy hint is available. +// Cosine distance 0.5 ≡ cosine similarity 0.5 (≥ 0.5 similarity passes). const DEFAULT_CUTOFF_DISTANCE = 0.5; export interface FastNeighborInsertOptions { @@ -81,10 +85,21 @@ export async function insertSemanticNeighbors( vectorStore, metadataStore, policy, - maxDegree = DEFAULT_MAX_DEGREE, cutoffDistance = DEFAULT_CUTOFF_DISTANCE, } = options; + // Derive maxDegree from the Williams bound if a policy is supplied and the + // caller has not pinned an explicit value. This keeps the semantic neighbor + // graph sparse in proportion to corpus size rather than hardcoding a constant. + let maxDegree: number; + if (options.maxDegree !== undefined) { + maxDegree = options.maxDegree; + } else if (policy) { + maxDegree = computeNeighborMaxDegree(allPageIds.length, policy.c, NEIGHBOR_DEGREE_HARD_CAP); + } else { + maxDegree = NEIGHBOR_DEGREE_HARD_CAP; + } + if (newPageIds.length === 0) return; const dim = modelProfile.embeddingDimension; diff --git a/hippocampus/HierarchyBuilder.ts b/hippocampus/HierarchyBuilder.ts index 41969df..ff10689 100644 --- a/hippocampus/HierarchyBuilder.ts +++ b/hippocampus/HierarchyBuilder.ts @@ -1,13 +1,16 @@ import type { Book, Hash, MetadataStore, SemanticNeighbor, Shelf, Volume, VectorStore } from "../core/types"; import type { ModelProfile } from "../core/ModelProfile"; import type { HotpathPolicy } from "../core/HotpathPolicy"; +import { computeFanoutLimit, DEFAULT_HOTPATH_POLICY } from "../core/HotpathPolicy"; import { hashText } from "../core/crypto/hash"; import { runPromotionSweep } from "../core/SalienceEngine"; // Clustering fan-out targets — policy constants, not model-derived. -// 8 pages/book keeps books coarse enough for medoid selection to be meaningful -// without O(n²) pair-wise cost blowing up. 4 books/volume and 4 volumes/shelf -// mirror a balanced 4-ary hierarchy consistent with Williams Bound routing. +// These are chosen to be consistent with the Williams Bound fanout limit: +// computeFanoutLimit(N) ≈ ceil(0.5 * sqrt(N * log2(1+N))) +// At typical early-corpus sizes these constants (4-8) sit comfortably within +// the Williams-derived quota. ClusterStability handles splits at runtime if +// volumes grow beyond their quota after the initial hierarchy build. const PAGES_PER_BOOK = 8; const BOOKS_PER_VOLUME = 4; const VOLUMES_PER_SHELF = 4; @@ -261,5 +264,98 @@ export async function buildHierarchy( await runPromotionSweep(shelves.map((s) => s.shelfId), metadataStore, policy); + // ------------------------------------------------------------------------- + // Williams fanout quota enforcement + // ------------------------------------------------------------------------- + // Validate that no volume or shelf exceeds its Williams-derived fanout + // quota based on the TOTAL count of nodes at that tier. The static chunking + // above already enforces the budgets during initial build, so this guard + // should only trigger when this function is called with pre-existing data + // that has grown past quota (incremental ingestion paths). In those cases + // we re-chunk the oversized parent's children and persist the new structure. + const policyC = policy?.c ?? DEFAULT_HOTPATH_POLICY.c; + const totalBooks = books.length; + const totalVolumes = volumes.length; + + // Check volumes: each volume must have ≤ fanout limit based on total book count. + const volumeLimit = computeFanoutLimit(totalBooks, policyC); + for (const volume of [...volumes]) { + if (volume.bookIds.length > volumeLimit) { + const subChunks = chunkArray(volume.bookIds, volumeLimit); + const subVolumes: Volume[] = []; + + for (const sub of subChunks) { + const sortedSub = [...sub].sort(); + const subVolumeId = await hashText(`split-vol:${volume.volumeId}:${sortedSub.join("|")}`); + const subVolumeBooks = ( + await Promise.all(sub.map((id) => metadataStore.getBook(id))) + ).filter((b): b is Book => b !== undefined); + + const protoVecs = subVolumeBooks.map((b) => pageVectorMap.get(b.medoidPageId)).filter((v): v is Float32Array => v !== undefined); + const centroid = protoVecs.length > 0 ? computeCentroid(protoVecs) : new Float32Array(dim); + const protoOffset = await vectorStore.appendVector(centroid); + + const subVol: Volume = { + volumeId: subVolumeId, + bookIds: sub, + prototypeOffsets: [protoOffset], + prototypeDim: dim, + variance: volume.variance / 2, + }; + await metadataStore.putVolume(subVol); + subVolumes.push(subVol); + } + + // Replace the oversized volume in shelves + for (const shelf of shelves) { + const idx = shelf.volumeIds.indexOf(volume.volumeId); + if (idx === -1) continue; + const newVolumeIds = [ + ...shelf.volumeIds.slice(0, idx), + ...subVolumes.map((v) => v.volumeId), + ...shelf.volumeIds.slice(idx + 1), + ]; + const updated: Shelf = { ...shelf, volumeIds: newVolumeIds }; + await metadataStore.putShelf(updated); + Object.assign(shelf, updated); + } + + volumes.splice(volumes.indexOf(volume), 1, ...subVolumes); + } + } + + // Check shelves: each shelf must have ≤ fanout limit based on total volume count. + const shelfLimit = computeFanoutLimit(totalVolumes, policyC); + for (const shelf of [...shelves]) { + if (shelf.volumeIds.length > shelfLimit) { + const subChunks = chunkArray(shelf.volumeIds, shelfLimit); + const subShelves: Shelf[] = []; + + for (const sub of subChunks) { + const sortedSub = [...sub].sort(); + const subShelfId = await hashText(`split-shelf:${shelf.shelfId}:${sortedSub.join("|")}`); + const subShelfVols = ( + await Promise.all(sub.map((id) => metadataStore.getVolume(id))) + ).filter((v): v is Volume => v !== undefined); + + const protoVecs = subShelfVols.map((v) => vectorStore.readVector(v.prototypeOffsets[0], dim)); + const resolvedProtos = await Promise.all(protoVecs); + const centroid = resolvedProtos.length > 0 ? computeCentroid(resolvedProtos) : new Float32Array(dim); + const routingOffset = await vectorStore.appendVector(centroid); + + const subShelf: Shelf = { + shelfId: subShelfId, + volumeIds: sub, + routingPrototypeOffsets: [routingOffset], + routingDim: dim, + }; + await metadataStore.putShelf(subShelf); + subShelves.push(subShelf); + } + + shelves.splice(shelves.indexOf(shelf), 1, ...subShelves); + } + } + return { books, volumes, shelves }; } diff --git a/tests/HotpathPolicy.test.ts b/tests/HotpathPolicy.test.ts index 07af156..bacc18d 100644 --- a/tests/HotpathPolicy.test.ts +++ b/tests/HotpathPolicy.test.ts @@ -2,7 +2,10 @@ import { describe, expect, it } from "vitest"; import { computeCapacity, + computeFanoutLimit, + computeNeighborMaxDegree, computeSalience, + computeSubgraphBounds, deriveCommunityQuotas, deriveTierQuotas, DEFAULT_HOTPATH_POLICY, @@ -214,3 +217,111 @@ describe("DEFAULT_HOTPATH_POLICY", () => { expect(DEFAULT_HOTPATH_POLICY.tierQuotaRatios.page).toBe(0.50); }); }); + +// --------------------------------------------------------------------------- +// computeNeighborMaxDegree — P1-C: Williams-derived max degree for neighbor graph +// --------------------------------------------------------------------------- + +describe("computeNeighborMaxDegree", () => { + it("returns at least 1 for any corpus size", () => { + for (const t of [0, 1, 2, 10, 100, 1_000]) { + expect(computeNeighborMaxDegree(t)).toBeGreaterThanOrEqual(1); + } + }); + + it("never exceeds hardCap (32 by default)", () => { + for (const t of [10, 100, 1_000, 100_000]) { + expect(computeNeighborMaxDegree(t)).toBeLessThanOrEqual(32); + } + }); + + it("grows sublinearly (degree/t decreases as t increases)", () => { + const ratio10k = computeNeighborMaxDegree(10_000) / 10_000; + const ratio1k = computeNeighborMaxDegree(1_000) / 1_000; + expect(ratio10k).toBeLessThanOrEqual(ratio1k); + }); + + it("respects custom hardCap", () => { + expect(computeNeighborMaxDegree(10_000, 0.5, 5)).toBeLessThanOrEqual(5); + }); + + it("returns a finite positive integer for t = 0", () => { + const r = computeNeighborMaxDegree(0); + expect(Number.isFinite(r)).toBe(true); + expect(Number.isInteger(r)).toBe(true); + expect(r).toBeGreaterThanOrEqual(1); + }); +}); + +// --------------------------------------------------------------------------- +// computeSubgraphBounds — P1-E: Dynamic Williams-derived expansion bounds +// --------------------------------------------------------------------------- + +describe("computeSubgraphBounds", () => { + it("returns maxHops >= 1 for any corpus size", () => { + for (const t of [0, 1, 2, 10, 100, 10_000]) { + expect(computeSubgraphBounds(t).maxHops).toBeGreaterThanOrEqual(1); + } + }); + + it("returns maxSubgraphSize >= 1 for any corpus size", () => { + for (const t of [0, 1, 2, 10, 100, 10_000]) { + expect(computeSubgraphBounds(t).maxSubgraphSize).toBeGreaterThanOrEqual(1); + } + }); + + it("maxSubgraphSize is bounded by 30", () => { + for (const t of [100, 10_000, 1_000_000]) { + expect(computeSubgraphBounds(t).maxSubgraphSize).toBeLessThanOrEqual(30); + } + }); + + it("perHopBranching >= 1 and <= maxSubgraphSize", () => { + const t = 1_000; + const bounds = computeSubgraphBounds(t); + expect(bounds.perHopBranching).toBeGreaterThanOrEqual(1); + expect(bounds.perHopBranching).toBeLessThanOrEqual(bounds.maxSubgraphSize); + }); + + it("all fields are finite positive integers", () => { + const bounds = computeSubgraphBounds(100); + for (const key of ["maxSubgraphSize", "maxHops", "perHopBranching"] as const) { + expect(Number.isFinite(bounds[key])).toBe(true); + expect(Number.isInteger(bounds[key])).toBe(true); + expect(bounds[key]).toBeGreaterThanOrEqual(1); + } + }); + + it("maxHops grows logarithmically with corpus size", () => { + const smallHops = computeSubgraphBounds(10).maxHops; + const largeHops = computeSubgraphBounds(1_000_000).maxHops; + expect(largeHops).toBeGreaterThanOrEqual(smallHops); + // Logarithmic growth: even 1 billion pages should give maxHops ≤ 10 + expect(computeSubgraphBounds(1_000_000_000).maxHops).toBeLessThanOrEqual(10); + }); +}); + +// --------------------------------------------------------------------------- +// computeFanoutLimit — P1-A: Williams-derived hierarchy fanout limit +// --------------------------------------------------------------------------- + +describe("computeFanoutLimit", () => { + it("returns at least 1 for any node count", () => { + for (const n of [0, 1, 2, 10, 100]) { + expect(computeFanoutLimit(n)).toBeGreaterThanOrEqual(1); + } + }); + + it("grows sublinearly: limit/n decreases as n increases", () => { + const ratio100 = computeFanoutLimit(100) / 100; + const ratio10 = computeFanoutLimit(10) / 10; + expect(ratio100).toBeLessThan(ratio10); + }); + + it("returns a finite positive integer", () => { + const r = computeFanoutLimit(50); + expect(Number.isFinite(r)).toBe(true); + expect(Number.isInteger(r)).toBe(true); + expect(r).toBeGreaterThanOrEqual(1); + }); +}); From 901d002870a901ce4071035e8e9038a6e312e9a3 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Sat, 14 Mar 2026 02:59:32 +0000 Subject: [PATCH 4/5] Fix HierarchyBuilder split/delete, Query lazy page scan, FullNeighborRecalc budget floor, FastNeighborInsert default degree, PrototypeRecomputer docstring Co-authored-by: devlux76 <86517969+devlux76@users.noreply.github.com> --- cortex/Query.ts | 15 ++- daydreamer/FullNeighborRecalc.ts | 21 ++-- daydreamer/PrototypeRecomputer.ts | 14 ++- hippocampus/FastNeighborInsert.ts | 21 ++-- hippocampus/HierarchyBuilder.ts | 173 +++++++++++++++++++----------- 5 files changed, 152 insertions(+), 92 deletions(-) diff --git a/cortex/Query.ts b/cortex/Query.ts index 28283cf..488a1ba 100644 --- a/cortex/Query.ts +++ b/cortex/Query.ts @@ -121,12 +121,17 @@ export async function query( // --- Subgraph expansion --- // Use dynamic Williams-derived bounds unless the caller has pinned an - // explicit maxHops value. The bounds are derived from the current corpus - // size so expansion cost stays sublinear as the graph grows. + // explicit maxHops value. Only load all pages when we actually need to + // compute bounds — skip the full-page scan on the hot path when maxHops is + // already known. const topPageIds = topPages.map((p) => p.pageId); - const allPages = await metadataStore.getAllPages(); - const subgraphBounds = computeSubgraphBounds(allPages.length); - const effectiveMaxHops = options.maxHops ?? subgraphBounds.maxHops; + let effectiveMaxHops: number; + if (options.maxHops !== undefined) { + effectiveMaxHops = options.maxHops; + } else { + const allPages = await metadataStore.getAllPages(); + effectiveMaxHops = computeSubgraphBounds(allPages.length).maxHops; + } const subgraph = await metadataStore.getInducedNeighborSubgraph(topPageIds, effectiveMaxHops); // --- TSP coherence path --- diff --git a/daydreamer/FullNeighborRecalc.ts b/daydreamer/FullNeighborRecalc.ts index dfc6c0e..acd0ecc 100644 --- a/daydreamer/FullNeighborRecalc.ts +++ b/daydreamer/FullNeighborRecalc.ts @@ -7,14 +7,22 @@ // bounded by the Williams-Bound-derived maintenance budget so the idle loop // is not starved. // -// Per idle cycle, the scheduler processes at most computeCapacity(graphMass) -// pairwise comparisons (O(sqrt(t * log(1+t))) growth). +// Per idle cycle, the scheduler processes at most max(MIN_RECALC_PAIR_BUDGET, +// computeCapacity(graphMass)) pairwise comparisons. The minimum floor ensures +// forward progress for small corpora where the Williams formula may return a +// value smaller than a single typical volume's pair count. // --------------------------------------------------------------------------- import type { Hash, MetadataStore, SemanticNeighbor, Page, VectorStore } from "../core/types"; import { computeCapacity, DEFAULT_HOTPATH_POLICY, type HotpathPolicy } from "../core/HotpathPolicy"; import { batchComputeSalience, runPromotionSweep } from "../core/SalienceEngine"; +// Minimum pair budget per idle recalc cycle. +// Sized to cover the theoretical maximum for a single well-formed volume +// (BOOKS_PER_VOLUME=4 books × PAGES_PER_BOOK=8 pages = 32 pages, +// 32 × 31 = 992 pairs). Using 2048 gives a comfortable margin. +export const MIN_RECALC_PAIR_BUDGET = 2048; + // --------------------------------------------------------------------------- // Options // --------------------------------------------------------------------------- @@ -97,9 +105,10 @@ export async function runFullNeighborRecalc( return { volumesProcessed: 0, pagesProcessed: 0, pairsComputed: 0 }; } - // Compute per-cycle pair budget: O(sqrt(t * log(1+t))) + // Compute per-cycle pair budget: max of Williams-derived capacity and + // the minimum floor so even small corpora make forward progress. const totalGraphMass = (await metadataStore.getAllPages()).length; - const pairBudget = Math.max(1, computeCapacity(totalGraphMass, policy.c)); + const pairBudget = Math.max(MIN_RECALC_PAIR_BUDGET, computeCapacity(totalGraphMass, policy.c)); let totalVolumesProcessed = 0; let totalPagesProcessed = 0; @@ -137,9 +146,7 @@ export async function runFullNeighborRecalc( // Compute pairwise similarities and build neighbor lists const pairsInVolume = volumePages.length * (volumePages.length - 1); const remainingBudget = pairBudget - totalPairsComputed; - // Always process at least one volume per cycle to guarantee forward - // progress; budget exhaustion only defers additional volumes. - const budgetExhausted = pairsInVolume > remainingBudget && totalVolumesProcessed > 0; + const budgetExhausted = pairsInVolume > remainingBudget; if (budgetExhausted) { break; } diff --git a/daydreamer/PrototypeRecomputer.ts b/daydreamer/PrototypeRecomputer.ts index 5dac5b2..867fbf3 100644 --- a/daydreamer/PrototypeRecomputer.ts +++ b/daydreamer/PrototypeRecomputer.ts @@ -104,14 +104,18 @@ export interface RecomputeResult { // --------------------------------------------------------------------------- /** - * Recompute medoid and centroid prototypes for all volumes. + * Recompute centroid prototypes for all volumes. * * For each volume: * 1. Load all page embeddings for every book in the volume. - * 2. Select the medoid page (minimises average distance to all others). - * 3. Compute the centroid embedding across all pages. - * 4. Append updated vectors to VectorStore; update volume metadata. - * 5. Refresh salience and run promotion sweep for the volume tier. + * 2. Compute the centroid embedding across all pages. + * 3. Append updated centroid vector to VectorStore; update volume metadata. + * + * Note: Medoid selection and salience/promotion sweeps are intentionally + * omitted here. SalienceEngine methods currently assume page-tier entities; + * running them with volume IDs would produce incorrect tier assignments. + * Volume-tier salience should be wired up once SalienceEngine supports + * non-page tiers. */ async function recomputeVolumePrototypes( options: PrototypeRecomputerOptions, diff --git a/hippocampus/FastNeighborInsert.ts b/hippocampus/FastNeighborInsert.ts index 2678d1c..5c27460 100644 --- a/hippocampus/FastNeighborInsert.ts +++ b/hippocampus/FastNeighborInsert.ts @@ -1,12 +1,12 @@ import type { Hash, MetadataStore, SemanticNeighbor, VectorStore } from "../core/types"; import type { ModelProfile } from "../core/ModelProfile"; import type { HotpathPolicy } from "../core/HotpathPolicy"; -import { computeNeighborMaxDegree } from "../core/HotpathPolicy"; +import { computeNeighborMaxDegree, DEFAULT_HOTPATH_POLICY } from "../core/HotpathPolicy"; import { runPromotionSweep } from "../core/SalienceEngine"; -// Hard cap for the semantic neighbor degree: even if the Williams formula -// returns a higher value, we never allow a node to have more than this many -// semantic neighbors. Kept as a policy constant (not model-derived). +// Absolute upper cap for the semantic neighbor degree. The Williams formula +// can produce larger values for very large corpora; this hard cap keeps the +// neighbor graph manageable even at scale. const NEIGHBOR_DEGREE_HARD_CAP = 32; // Default cosine-distance cutoff when no policy hint is available. @@ -88,16 +88,17 @@ export async function insertSemanticNeighbors( cutoffDistance = DEFAULT_CUTOFF_DISTANCE, } = options; - // Derive maxDegree from the Williams bound if a policy is supplied and the - // caller has not pinned an explicit value. This keeps the semantic neighbor - // graph sparse in proportion to corpus size rather than hardcoding a constant. + // Derive maxDegree from the Williams bound, scaled by corpus size. + // When a policy is provided, use its scaling constant; otherwise fall back + // to the default constant so that corpus-proportional scaling applies in + // all cases (not just when a policy object is explicitly threaded through). + // An explicit options.maxDegree always wins. let maxDegree: number; if (options.maxDegree !== undefined) { maxDegree = options.maxDegree; - } else if (policy) { - maxDegree = computeNeighborMaxDegree(allPageIds.length, policy.c, NEIGHBOR_DEGREE_HARD_CAP); } else { - maxDegree = NEIGHBOR_DEGREE_HARD_CAP; + const c = policy?.c ?? DEFAULT_HOTPATH_POLICY.c; + maxDegree = computeNeighborMaxDegree(allPageIds.length, c, NEIGHBOR_DEGREE_HARD_CAP); } if (newPageIds.length === 0) return; diff --git a/hippocampus/HierarchyBuilder.ts b/hippocampus/HierarchyBuilder.ts index ff10689..e63aba7 100644 --- a/hippocampus/HierarchyBuilder.ts +++ b/hippocampus/HierarchyBuilder.ts @@ -267,94 +267,137 @@ export async function buildHierarchy( // ------------------------------------------------------------------------- // Williams fanout quota enforcement // ------------------------------------------------------------------------- - // Validate that no volume or shelf exceeds its Williams-derived fanout - // quota based on the TOTAL count of nodes at that tier. The static chunking - // above already enforces the budgets during initial build, so this guard - // should only trigger when this function is called with pre-existing data - // that has grown past quota (incremental ingestion paths). In those cases - // we re-chunk the oversized parent's children and persist the new structure. + // Per DESIGN.md "Sublinear Fanout Bounds": when a node's child count exceeds + // its Williams-derived limit, HierarchyBuilder triggers a split. + // + // The split threshold is max(STATIC_CONSTANT, computeFanoutLimit(nodeSize)): + // - For freshly built nodes (size ≤ STATIC_CONSTANT), the Williams formula + // may return a smaller value, so we use the static constant as a floor to + // prevent splitting a structure that was just constructed correctly. + // - For nodes that have grown organically past the static constant, the + // Williams limit takes over and drives the split. const policyC = policy?.c ?? DEFAULT_HOTPATH_POLICY.c; - const totalBooks = books.length; - const totalVolumes = volumes.length; - // Check volumes: each volume must have ≤ fanout limit based on total book count. - const volumeLimit = computeFanoutLimit(totalBooks, policyC); + // ---- Volumes ---- for (const volume of [...volumes]) { - if (volume.bookIds.length > volumeLimit) { - const subChunks = chunkArray(volume.bookIds, volumeLimit); - const subVolumes: Volume[] = []; - - for (const sub of subChunks) { - const sortedSub = [...sub].sort(); - const subVolumeId = await hashText(`split-vol:${volume.volumeId}:${sortedSub.join("|")}`); - const subVolumeBooks = ( - await Promise.all(sub.map((id) => metadataStore.getBook(id))) - ).filter((b): b is Book => b !== undefined); - - const protoVecs = subVolumeBooks.map((b) => pageVectorMap.get(b.medoidPageId)).filter((v): v is Float32Array => v !== undefined); - const centroid = protoVecs.length > 0 ? computeCentroid(protoVecs) : new Float32Array(dim); - const protoOffset = await vectorStore.appendVector(centroid); - - const subVol: Volume = { - volumeId: subVolumeId, - bookIds: sub, - prototypeOffsets: [protoOffset], - prototypeDim: dim, - variance: volume.variance / 2, - }; - await metadataStore.putVolume(subVol); - subVolumes.push(subVol); - } - - // Replace the oversized volume in shelves - for (const shelf of shelves) { - const idx = shelf.volumeIds.indexOf(volume.volumeId); - if (idx === -1) continue; - const newVolumeIds = [ - ...shelf.volumeIds.slice(0, idx), - ...subVolumes.map((v) => v.volumeId), - ...shelf.volumeIds.slice(idx + 1), - ]; - const updated: Shelf = { ...shelf, volumeIds: newVolumeIds }; - await metadataStore.putShelf(updated); - Object.assign(shelf, updated); + const nodeLimit = Math.max(BOOKS_PER_VOLUME, computeFanoutLimit(volume.bookIds.length, policyC)); + if (volume.bookIds.length <= nodeLimit) continue; + + const subChunks = chunkArray(volume.bookIds, nodeLimit); + const subVolumes: Volume[] = []; + + for (const sub of subChunks) { + const sortedSub = [...sub].sort(); + const subVolumeId = await hashText(`split-vol:${volume.volumeId}:${sortedSub.join("|")}`); + + const subBooks = ( + await Promise.all(sub.map((id) => metadataStore.getBook(id))) + ).filter((b): b is Book => b !== undefined); + + // Compute sub-volume variance from actual medoid vectors. + const medoidVecs = subBooks + .map((b) => pageVectorMap.get(b.medoidPageId)) + .filter((v): v is Float32Array => v !== undefined); + const centroid = medoidVecs.length > 0 ? computeCentroid(medoidVecs) : new Float32Array(dim); + const protoOffset = await vectorStore.appendVector(centroid); + + let subVariance = 0; + for (const v of medoidVecs) { + const d = cosineDistance(v, centroid); + subVariance += d * d; } + if (medoidVecs.length > 0) subVariance /= medoidVecs.length; + + const subVol: Volume = { + volumeId: subVolumeId, + bookIds: sub, + prototypeOffsets: [protoOffset], + prototypeDim: dim, + variance: subVariance, + }; + await metadataStore.putVolume(subVol); + subVolumes.push(subVol); + } - volumes.splice(volumes.indexOf(volume), 1, ...subVolumes); + // Replace the oversized volume in every shelf that references it. + for (const shelf of shelves) { + const idx = shelf.volumeIds.indexOf(volume.volumeId); + if (idx === -1) continue; + const newVolumeIds = [ + ...shelf.volumeIds.slice(0, idx), + ...subVolumes.map((v) => v.volumeId), + ...shelf.volumeIds.slice(idx + 1), + ]; + const updated: Shelf = { ...shelf, volumeIds: newVolumeIds }; + await metadataStore.putShelf(updated); + Object.assign(shelf, updated); } + + // Delete the original oversized volume (and its reverse-index entries). + await metadataStore.deleteVolume(volume.volumeId); + volumes.splice(volumes.indexOf(volume), 1, ...subVolumes); } - // Check shelves: each shelf must have ≤ fanout limit based on total volume count. - const shelfLimit = computeFanoutLimit(totalVolumes, policyC); + // ---- Shelves ---- for (const shelf of [...shelves]) { - if (shelf.volumeIds.length > shelfLimit) { - const subChunks = chunkArray(shelf.volumeIds, shelfLimit); - const subShelves: Shelf[] = []; + const nodeLimit = Math.max(VOLUMES_PER_SHELF, computeFanoutLimit(shelf.volumeIds.length, policyC)); + if (shelf.volumeIds.length <= nodeLimit) continue; + + const subChunks = chunkArray(shelf.volumeIds, nodeLimit); + + // Update the existing shelf record to hold only the FIRST sub-chunk's + // volumes. All remaining sub-chunks get fresh shelf records. + // Note: volumes that move to new shelves keep a stale volumeToShelf entry + // pointing at this shelf's ID; that entry will be cleaned up by a future + // Daydreamer ClusterStability pass (deleteShelf is not yet on the interface). + const newShelves: Shelf[] = []; + for (let ci = 0; ci < subChunks.length; ci++) { + const sub = subChunks[ci]; + const sortedSub = [...sub].sort(); + + if (ci === 0) { + // Re-use the original shelfId for the first sub-chunk. + const subShelfVols = ( + await Promise.all(sub.map((id) => metadataStore.getVolume(id))) + ).filter((v): v is Volume => v !== undefined); + const protoVecs = await Promise.all( + subShelfVols.map((v) => vectorStore.readVector(v.prototypeOffsets[0], dim)), + ); + const centroid = protoVecs.length > 0 ? computeCentroid(protoVecs) : new Float32Array(dim); + const routingOffset = await vectorStore.appendVector(centroid); - for (const sub of subChunks) { - const sortedSub = [...sub].sort(); + const updated: Shelf = { + shelfId: shelf.shelfId, + volumeIds: sub, + routingPrototypeOffsets: [routingOffset], + routingDim: dim, + }; + await metadataStore.putShelf(updated); + Object.assign(shelf, updated); + newShelves.push(updated); + } else { const subShelfId = await hashText(`split-shelf:${shelf.shelfId}:${sortedSub.join("|")}`); const subShelfVols = ( await Promise.all(sub.map((id) => metadataStore.getVolume(id))) ).filter((v): v is Volume => v !== undefined); - - const protoVecs = subShelfVols.map((v) => vectorStore.readVector(v.prototypeOffsets[0], dim)); - const resolvedProtos = await Promise.all(protoVecs); - const centroid = resolvedProtos.length > 0 ? computeCentroid(resolvedProtos) : new Float32Array(dim); + const protoVecs = await Promise.all( + subShelfVols.map((v) => vectorStore.readVector(v.prototypeOffsets[0], dim)), + ); + const centroid = protoVecs.length > 0 ? computeCentroid(protoVecs) : new Float32Array(dim); const routingOffset = await vectorStore.appendVector(centroid); - const subShelf: Shelf = { + const newShelf: Shelf = { shelfId: subShelfId, volumeIds: sub, routingPrototypeOffsets: [routingOffset], routingDim: dim, }; - await metadataStore.putShelf(subShelf); - subShelves.push(subShelf); + await metadataStore.putShelf(newShelf); + newShelves.push(newShelf); } - - shelves.splice(shelves.indexOf(shelf), 1, ...subShelves); } + + shelves.splice(shelves.indexOf(shelf), 1, ...newShelves); } return { books, volumes, shelves }; From c81a90c692ec1472be8bd0ea302b07bff8206b26 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Sat, 14 Mar 2026 03:04:21 +0000 Subject: [PATCH 5/5] docs: Add Williams bound / curse of dimensionality explanation to DESIGN.md and README.md Co-authored-by: devlux76 <86517969+devlux76@users.noreply.github.com> --- DESIGN.md | 18 +++++++++++++++++- README.md | 2 +- 2 files changed, 18 insertions(+), 2 deletions(-) diff --git a/DESIGN.md b/DESIGN.md index f11a910..d2e72aa 100644 --- a/DESIGN.md +++ b/DESIGN.md @@ -283,8 +283,24 @@ This mechanism enables **distributed learning without hallucination**: the syste ### Motivation +#### The Geometric Root: Curse of Dimensionality + +CORTEX operates on high-dimensional Matryoshka embeddings. In `n`-dimensional Euclidean space the volume of the unit ball is: + +``` +Vol(B²ᵐ) = πᵐ / m! (n = 2m, even dimension) +``` + +As `m` (half the embedding dimension) grows, this volume collapses toward zero exponentially fast. This is the geometric driver of the **curse of dimensionality**: pairwise distances concentrate (everything looks equally far away), interiors vanish (rejection sampling and kernel methods fail), and any linear or polynomial scaling law blows up. Naïve nearest-neighbor search, flat clustering, fixed-K neighbor graphs, and uniform fan-out become either useless or unboundedly expensive as the corpus scales. + +Every structural decision in CORTEX — protected Matryoshka layers, hierarchical medoids, the Metroid antithesis hunt, dimensional unwinding, Williams-derived index sizes — is a direct geometric counter-measure to this collapse. + +#### The Fix: Williams 2025 Sublinear Bound + CORTEX applies the Williams 2025 result — S = O(√(t log t)) — as a universal sublinear growth law everywhere the system trades space against time: the resident hotpath index, per-tier hierarchy quotas, per-community graph budgets, semantic neighbor degree limits, and Daydreamer maintenance batch sizing. This single principle ensures the system stays efficient as the memory graph scales from hundreds to millions of nodes. +Concretely: where a naïve system would grow capacity linearly (O(t)) or even quadratically (O(t²) for pairwise operations), CORTEX caps every space-or-time budget at O(√(t log t)). This is the mathematically precise bound that keeps the engine on-device forever, regardless of corpus size. + ### Graph Mass Definition ``` @@ -797,7 +813,7 @@ relative to frozen c. Planned module: `cortex/MetroidBuilder.ts`. **Hotpath**: The in-memory resident index of H(t) entries spanning all four hierarchy tiers. The hotpath is the first lookup target for every query; misses spill to WARM/COLD storage. HOT membership and salience are checkpointed to the `hotpath_index` IndexedDB store by Daydreamer each maintenance cycle, allowing the RAM index to be restored after a page reload or machine reboot without full corpus replay. -**Williams Bound**: The theoretical result S = O(√(t log t)) from Williams 2025, applied here as a universal sublinear growth law for all space-time tradeoff subsystems in CORTEX. +**Williams Bound**: The theoretical result S = O(√(t log t)) from Williams 2025, applied here as a universal sublinear growth law for all space-time tradeoff subsystems in CORTEX. The bound is the constructive answer to the curse of dimensionality: in `n`-dimensional space the unit-ball volume collapses as `πᵐ/m!` (n = 2m), making linear-scale data structures infeasible. The Williams sublinear bound keeps every budget — hotpath capacity, hierarchy fanout, neighbor degree, maintenance batch size — proportional to √(t log t) rather than t, ensuring on-device viability at any corpus scale. **Graph mass (t)**: t = |V| + |E| = total pages plus all edges (Hebbian + semantic neighbor). The canonical input to all capacity and bound formulas. diff --git a/README.md b/README.md index 99bf30a..1dbcc92 100644 --- a/README.md +++ b/README.md @@ -94,7 +94,7 @@ This is the "dreaming" phase that prevents catastrophic forgetting and forces ab ## Core Design Principles - **Biological Scarcity** — Only a fixed number of active prototypes live in memory. Everything else is gracefully demoted to disk. -- **Sublinear Growth (Williams Bound)** — The resident hotpath index is bounded to H(t) = ⌈c·√(t·log₂(1+t))⌉ where t = total graph mass (pages + edges). Memory scales sublinearly as the graph grows, trading time for space at a mathematically principled rate. See [`DESIGN.md`](DESIGN.md) for the full theorem mapping. +- **Sublinear Growth (Williams Bound)** — In `n`-dimensional embedding space the unit-ball volume collapses as `πᵐ/m!` (n = 2m). This geometric fact — the curse of dimensionality — makes linear-scale data structures infeasible as corpora grow. CORTEX counters it with the Williams 2025 result S = O(√(t log t)), used as a universal sublinear growth law: the resident hotpath index is bounded to H(t) = ⌈c·√(t·log₂(1+t))⌉, with the same formula driving hierarchy fanout limits, semantic-neighbor degree caps, and Daydreamer maintenance batch sizes. Every space-or-time budget scales sublinearly, keeping the engine on-device at any corpus size. See [`DESIGN.md`](DESIGN.md) for the full theorem mapping. - **Three-Zone Memory** — HOT (resident in-memory index, capacity H(t)), WARM (indexed in IndexedDB, reachable via nearest-neighbor search), COLD (metadata in IndexedDB + raw vectors in OPFS, but semantically isolated from the search path — no strong nearest neighbors in vector space at insertion time; only discoverable by a deliberate random walk). All data is retained locally forever; zones control lookup cost and discoverability, not data lifetime. - **Hierarchical & Sparse** — Progressive dimensionality reduction + medoid clustering keeps memory efficient at any scale, with Williams-derived fanout bounds preventing any single tier from monopolising the index. - **Hebbian & Dynamic** — Connections strengthen and weaken naturally. Node salience (σ = α·H_in + β·R + γ·Q) drives promotion into and eviction from the resident hotpath.