diff --git a/knowledge-graph-ontology-drift/README.md b/knowledge-graph-ontology-drift/README.md new file mode 100644 index 0000000..a024b11 --- /dev/null +++ b/knowledge-graph-ontology-drift/README.md @@ -0,0 +1,23 @@ +# Knowledge Graph Ontology Drift + +This module adds a focused Scientific Knowledge Graph Integration control for reviewing ontology release drift before graph data, recommendations, and linked entity pages are migrated. + +## What it checks + +- Deprecated ontology concepts still referenced by graph nodes or relationships +- Replacement concept validity +- Synonym collisions that can corrupt entity search or recommendation expansion +- Active recommendation paths that depend on deprecated concepts +- Relationship evidence freshness +- Migration actions for nodes, relationships, and recommendations +- Impact scoring and deterministic audit digest for graph review + +## Run locally + +```bash +npm run check +npm test +npm run demo +``` + +The sample data intentionally includes one deprecated concept still in use, a synonym collision, a stale recommendation path, and one relationship with stale evidence. diff --git a/knowledge-graph-ontology-drift/demo.js b/knowledge-graph-ontology-drift/demo.js new file mode 100644 index 0000000..f05da0b --- /dev/null +++ b/knowledge-graph-ontology-drift/demo.js @@ -0,0 +1,25 @@ +"use strict"; + +const sampleBundle = require("./sample-data.json"); +const { + analyzeOntologyDrift, +} = require("./src/knowledge-graph-ontology-drift"); + +const result = analyzeOntologyDrift(sampleBundle); + +console.log(`Ontology release: ${result.previousVersion} -> ${result.nextVersion}`); +console.log(`Decision: ${result.decision}`); +console.log(`Impact score: ${result.impactScore}`); +console.log(`Audit digest: ${result.auditDigest}`); +console.log(""); +console.log("Migration actions:"); +for (const action of result.migrationActions) { + console.log(`- ${action.type} ${action.targetId}`); +} +console.log(""); +console.log("Findings:"); +for (const finding of result.findings) { + console.log(`- [${finding.severity}] ${finding.id}: ${finding.title}`); + console.log(` detail: ${finding.detail}`); + console.log(` remediation: ${finding.remediation}`); +} diff --git a/knowledge-graph-ontology-drift/docs/ontology-drift-demo.mp4 b/knowledge-graph-ontology-drift/docs/ontology-drift-demo.mp4 new file mode 100644 index 0000000..d0fac28 Binary files /dev/null and b/knowledge-graph-ontology-drift/docs/ontology-drift-demo.mp4 differ diff --git a/knowledge-graph-ontology-drift/docs/requirement-map.md b/knowledge-graph-ontology-drift/docs/requirement-map.md new file mode 100644 index 0000000..83bc3f8 --- /dev/null +++ b/knowledge-graph-ontology-drift/docs/requirement-map.md @@ -0,0 +1,14 @@ +# Requirement Map + +| Scientific Knowledge Graph requirement | Implementation | +| --- | --- | +| Linked scientific entities | `graphNodes` model papers, datasets, titles, types, and ontology concept identifiers. | +| Relationships and evidence | `relationships` include source/target nodes, relationship type, concept tags, DOI evidence, and evidence dates. | +| Entity normalization | Deprecated concept mappings produce explicit migration actions from old identifiers to replacement concepts. | +| Graph search and recommendations | Synonym collisions and stale recommendation paths are flagged before search expansion or recommendation reuse. | +| Recommendation provenance | Active recommendations record concept paths and are invalidated when those paths depend on deprecated concepts. | +| Ontology-backed governance | `analyzeOntologyDrift` returns findings, migration actions, impact score, release versions, and an audit digest. | + +## Demo Video + +The PR includes `docs/ontology-drift-demo.mp4`, a real terminal walkthrough running the local check, test, and demo scripts. diff --git a/knowledge-graph-ontology-drift/package.json b/knowledge-graph-ontology-drift/package.json new file mode 100644 index 0000000..8aab3d3 --- /dev/null +++ b/knowledge-graph-ontology-drift/package.json @@ -0,0 +1,12 @@ +{ + "name": "knowledge-graph-ontology-drift", + "version": "1.0.0", + "description": "Ontology drift migration controls for scientific knowledge graphs.", + "main": "src/knowledge-graph-ontology-drift.js", + "scripts": { + "check": "node --check src/knowledge-graph-ontology-drift.js && node --check test.js && node --check demo.js", + "test": "node test.js", + "demo": "node demo.js" + }, + "license": "MIT" +} diff --git a/knowledge-graph-ontology-drift/sample-data.json b/knowledge-graph-ontology-drift/sample-data.json new file mode 100644 index 0000000..53e0d69 --- /dev/null +++ b/knowledge-graph-ontology-drift/sample-data.json @@ -0,0 +1,79 @@ +{ + "now": "2026-05-15T00:00:00.000Z", + "maxEvidenceAgeDays": 540, + "ontologyRelease": { + "previousVersion": "sci-kg-2026.04", + "nextVersion": "sci-kg-2026.06", + "concepts": [ + { + "id": "concept:single-cell-rna-seq", + "label": "single-cell RNA sequencing", + "synonyms": ["scRNA-seq", "single cell transcriptomics"] + }, + { + "id": "concept:spatial-transcriptomics", + "label": "spatial transcriptomics", + "synonyms": ["spatial RNA-seq", "spatial assay"] + }, + { + "id": "concept:spatial-rnaseq-panel", + "label": "spatial RNA sequencing panel", + "synonyms": ["spatial assay"] + }, + { + "id": "concept:quality-control-pipeline", + "label": "quality control pipeline", + "synonyms": ["QC workflow"] + } + ], + "deprecatedConcepts": [ + { + "oldId": "concept:single-cell-old", + "replacementId": "concept:single-cell-rna-seq", + "reason": "merged into normalized assay method concept" + } + ] + }, + "graphNodes": [ + { + "id": "paper:10.5555/cell-atlas.2024", + "type": "paper", + "title": "Cross-lab cell atlas replication", + "conceptIds": ["concept:single-cell-old", "concept:quality-control-pipeline"] + }, + { + "id": "dataset:atlas-v2", + "type": "dataset", + "title": "Atlas release v2", + "conceptIds": ["concept:single-cell-rna-seq", "concept:spatial-transcriptomics"] + } + ], + "relationships": [ + { + "id": "rel:paper-method-dataset", + "sourceId": "paper:10.5555/cell-atlas.2024", + "targetId": "dataset:atlas-v2", + "type": "uses-dataset", + "conceptIds": ["concept:single-cell-old"], + "evidenceDoi": "10.5555/cell-atlas.2024", + "evidenceDate": "2024-01-10T00:00:00.000Z" + }, + { + "id": "rel:dataset-qc", + "sourceId": "dataset:atlas-v2", + "targetId": "paper:10.5555/cell-atlas.2024", + "type": "validated-by", + "conceptIds": ["concept:quality-control-pipeline"], + "evidenceDoi": "10.5555/qc-pipeline.2026", + "evidenceDate": "2026-04-15T00:00:00.000Z" + } + ], + "recommendations": [ + { + "id": "rec:reuse-atlas-v2", + "status": "active", + "conceptPath": ["concept:single-cell-old", "concept:quality-control-pipeline"], + "reason": "dataset reuse candidate for single-cell analysis" + } + ] +} diff --git a/knowledge-graph-ontology-drift/src/knowledge-graph-ontology-drift.js b/knowledge-graph-ontology-drift/src/knowledge-graph-ontology-drift.js new file mode 100644 index 0000000..b0d7e50 --- /dev/null +++ b/knowledge-graph-ontology-drift/src/knowledge-graph-ontology-drift.js @@ -0,0 +1,284 @@ +"use strict"; + +const crypto = require("node:crypto"); + +function stableStringify(value) { + if (Array.isArray(value)) return `[${value.map(stableStringify).join(",")}]`; + if (value && typeof value === "object") { + return `{${Object.keys(value) + .sort() + .map((key) => `${JSON.stringify(key)}:${stableStringify(value[key])}`) + .join(",")}}`; + } + return JSON.stringify(value); +} + +function stableHash(value) { + return crypto.createHash("sha256").update(stableStringify(value)).digest("hex"); +} + +function requireFields(object, fields, label) { + const missing = fields.filter((field) => object[field] === undefined || object[field] === null); + if (missing.length > 0) throw new Error(`${label} is missing required field(s): ${missing.join(", ")}`); +} + +function finding(severity, id, title, detail, remediation, entityIds = []) { + return { severity, id, title, detail, remediation, entityIds }; +} + +function normalizeTerm(value) { + return value.toLowerCase().replace(/[^a-z0-9]+/g, " ").trim(); +} + +function parseDate(value) { + const parsed = new Date(value); + if (Number.isNaN(parsed.getTime())) throw new Error(`Invalid date: ${value}`); + return parsed; +} + +function assertBundle(bundle) { + requireFields(bundle, ["ontologyRelease", "graphNodes", "relationships", "recommendations"], "ontology drift bundle"); + requireFields(bundle.ontologyRelease, ["previousVersion", "nextVersion", "concepts", "deprecatedConcepts"], "ontology release"); +} + +function conceptById(concepts) { + return new Map(concepts.map((concept) => [concept.id, concept])); +} + +function deprecatedById(deprecatedConcepts) { + return new Map(deprecatedConcepts.map((concept) => [concept.oldId, concept])); +} + +function collectConceptUsage(graphNodes, relationships) { + const usage = new Map(); + + for (const node of graphNodes) { + for (const conceptId of node.conceptIds) { + if (!usage.has(conceptId)) usage.set(conceptId, { nodes: [], relationships: [] }); + usage.get(conceptId).nodes.push(node.id); + } + } + + for (const relationship of relationships) { + for (const conceptId of relationship.conceptIds) { + if (!usage.has(conceptId)) usage.set(conceptId, { nodes: [], relationships: [] }); + usage.get(conceptId).relationships.push(relationship.id); + } + } + + return usage; +} + +function evaluateDeprecations(ontologyRelease, graphNodes, relationships) { + const findings = []; + const usage = collectConceptUsage(graphNodes, relationships); + const nextConcepts = conceptById(ontologyRelease.concepts); + + for (const deprecated of ontologyRelease.deprecatedConcepts) { + const affected = usage.get(deprecated.oldId); + if (!affected) continue; + + if (!deprecated.replacementId || !nextConcepts.has(deprecated.replacementId)) { + findings.push(finding( + "blocker", + "deprecated-concept-without-valid-replacement", + "Deprecated concept is still in use without a valid replacement", + `${deprecated.oldId} affects ${affected.nodes.length} node(s) and ${affected.relationships.length} relationship(s).`, + "Add a replacement concept to the ontology release before migrating graph references.", + [...affected.nodes, ...affected.relationships], + )); + continue; + } + + findings.push(finding( + "blocker", + "deprecated-concept-in-use", + "Deprecated concept still appears in graph data", + `${deprecated.oldId} should migrate to ${deprecated.replacementId}.`, + "Apply a reviewed concept migration to affected nodes, relationships, and recommendations.", + [...affected.nodes, ...affected.relationships], + )); + } + + return findings; +} + +function evaluateSynonymCollisions(concepts) { + const findings = []; + const bySynonym = new Map(); + + for (const concept of concepts) { + const terms = [concept.label, ...(concept.synonyms || [])].map(normalizeTerm).filter(Boolean); + for (const term of new Set(terms)) { + if (!bySynonym.has(term)) bySynonym.set(term, []); + bySynonym.get(term).push(concept.id); + } + } + + for (const [term, conceptIds] of bySynonym) { + const uniqueIds = [...new Set(conceptIds)]; + if (uniqueIds.length > 1) { + findings.push(finding( + "warning", + "synonym-collision", + "Ontology synonym maps to multiple concepts", + `"${term}" maps to ${uniqueIds.join(", ")}.`, + "Add disambiguation rules or remove ambiguous synonyms before enabling search/recommendation expansion.", + uniqueIds, + )); + } + } + + return findings; +} + +function evaluateRecommendationPaths(recommendations, deprecatedConcepts) { + const findings = []; + const deprecatedIds = new Set(deprecatedConcepts.map((concept) => concept.oldId)); + + for (const recommendation of recommendations) { + const staleConcepts = recommendation.conceptPath.filter((conceptId) => deprecatedIds.has(conceptId)); + if (staleConcepts.length > 0 && recommendation.status === "active") { + findings.push(finding( + "blocker", + "stale-recommendation-path", + "Active recommendation depends on deprecated ontology concepts", + `${recommendation.id} includes deprecated concept(s): ${staleConcepts.join(", ")}.`, + "Invalidate or regenerate the recommendation after ontology migration.", + [recommendation.id], + )); + } + } + + return findings; +} + +function evaluateRelationshipEvidence(relationships, options) { + const findings = []; + const now = parseDate(options.now); + const maxEvidenceAgeDays = options.maxEvidenceAgeDays || 540; + + for (const relationship of relationships) { + const evidenceDate = parseDate(relationship.evidenceDate); + const ageDays = Math.floor((now.getTime() - evidenceDate.getTime()) / 86400000); + if (ageDays > maxEvidenceAgeDays) { + findings.push(finding( + "warning", + "relationship-evidence-stale", + "Relationship evidence is older than the freshness window", + `${relationship.id} evidence is ${ageDays} days old.`, + "Revalidate stale evidence before using it for graph recommendations.", + [relationship.id], + )); + } + } + + return findings; +} + +function buildMigrationActions(ontologyRelease, graphNodes, relationships, recommendations) { + const deprecated = deprecatedById(ontologyRelease.deprecatedConcepts); + const actions = []; + + for (const node of graphNodes) { + const replacements = node.conceptIds + .filter((conceptId) => deprecated.has(conceptId)) + .map((conceptId) => ({ from: conceptId, to: deprecated.get(conceptId).replacementId })); + if (replacements.length > 0) { + actions.push({ + type: "node-concept-migration", + targetId: node.id, + replacements, + }); + } + } + + for (const relationship of relationships) { + const replacements = relationship.conceptIds + .filter((conceptId) => deprecated.has(conceptId)) + .map((conceptId) => ({ from: conceptId, to: deprecated.get(conceptId).replacementId })); + if (replacements.length > 0) { + actions.push({ + type: "relationship-concept-migration", + targetId: relationship.id, + replacements, + }); + } + } + + for (const recommendation of recommendations) { + if (recommendation.conceptPath.some((conceptId) => deprecated.has(conceptId))) { + actions.push({ + type: "recommendation-regeneration", + targetId: recommendation.id, + reason: "recommendation path references deprecated concepts", + }); + } + } + + return actions; +} + +function calculateImpactScore(actions, graphNodes, relationships, recommendations) { + const denominator = graphNodes.length + relationships.length + recommendations.length; + if (denominator === 0) return 0; + return Number((actions.length / denominator).toFixed(3)); +} + +function decide(findings) { + if (findings.some((item) => item.severity === "blocker")) return "migration-required"; + if (findings.some((item) => item.severity === "warning")) return "manual-review"; + return "graph-current"; +} + +function analyzeOntologyDrift(bundle, options = {}) { + assertBundle(bundle); + const now = options.now || bundle.now || new Date().toISOString(); + const findings = [ + ...evaluateDeprecations(bundle.ontologyRelease, bundle.graphNodes, bundle.relationships), + ...evaluateSynonymCollisions(bundle.ontologyRelease.concepts), + ...evaluateRecommendationPaths(bundle.recommendations, bundle.ontologyRelease.deprecatedConcepts), + ...evaluateRelationshipEvidence(bundle.relationships, { + now, + maxEvidenceAgeDays: options.maxEvidenceAgeDays || bundle.maxEvidenceAgeDays, + }), + ]; + const migrationActions = buildMigrationActions( + bundle.ontologyRelease, + bundle.graphNodes, + bundle.relationships, + bundle.recommendations, + ); + const decision = decide(findings); + const impactScore = calculateImpactScore( + migrationActions, + bundle.graphNodes, + bundle.relationships, + bundle.recommendations, + ); + const auditDigest = stableHash({ + previousVersion: bundle.ontologyRelease.previousVersion, + nextVersion: bundle.ontologyRelease.nextVersion, + decision, + findings, + migrationActions, + impactScore, + }); + + return { + previousVersion: bundle.ontologyRelease.previousVersion, + nextVersion: bundle.ontologyRelease.nextVersion, + evaluatedAt: parseDate(now).toISOString(), + decision, + impactScore, + findings, + migrationActions, + auditDigest: `sha256:${auditDigest}`, + }; +} + +module.exports = { + analyzeOntologyDrift, + stableHash, + stableStringify, +}; diff --git a/knowledge-graph-ontology-drift/test.js b/knowledge-graph-ontology-drift/test.js new file mode 100644 index 0000000..0e84ed6 --- /dev/null +++ b/knowledge-graph-ontology-drift/test.js @@ -0,0 +1,46 @@ +"use strict"; + +const assert = require("node:assert/strict"); +const sampleBundle = require("./sample-data.json"); +const { + analyzeOntologyDrift, + stableHash, +} = require("./src/knowledge-graph-ontology-drift"); + +function clone(value) { + return JSON.parse(JSON.stringify(value)); +} + +function ids(result) { + return new Set(result.findings.map((finding) => finding.id)); +} + +const drift = analyzeOntologyDrift(sampleBundle); +const driftIds = ids(drift); + +assert.equal(drift.decision, "migration-required"); +assert.match(drift.auditDigest, /^sha256:[a-f0-9]{64}$/); +assert.equal(drift.impactScore, 0.6); +assert(driftIds.has("deprecated-concept-in-use")); +assert(driftIds.has("synonym-collision")); +assert(driftIds.has("stale-recommendation-path")); +assert(driftIds.has("relationship-evidence-stale")); +assert.equal(drift.migrationActions.length, 3); + +const currentBundle = clone(sampleBundle); +currentBundle.ontologyRelease.deprecatedConcepts = []; +currentBundle.ontologyRelease.concepts[2].synonyms = ["spatial panel assay"]; +currentBundle.graphNodes[0].conceptIds[0] = "concept:single-cell-rna-seq"; +currentBundle.relationships[0].conceptIds[0] = "concept:single-cell-rna-seq"; +currentBundle.relationships[0].evidenceDate = "2026-04-10T00:00:00.000Z"; +currentBundle.recommendations[0].conceptPath[0] = "concept:single-cell-rna-seq"; + +const current = analyzeOntologyDrift(currentBundle); +assert.equal(current.decision, "graph-current"); +assert.equal(current.findings.length, 0); +assert.equal(current.migrationActions.length, 0); +assert.equal(current.impactScore, 0); +assert.notEqual(drift.auditDigest, current.auditDigest); +assert.equal(stableHash({ z: 1, a: ["b"] }), stableHash({ a: ["b"], z: 1 })); + +console.log("knowledge graph ontology drift tests passed");