diff --git a/research-gap-replication-planner/README.md b/research-gap-replication-planner/README.md new file mode 100644 index 0000000..c9024f6 --- /dev/null +++ b/research-gap-replication-planner/README.md @@ -0,0 +1,70 @@ +# Research Gap Replication Planner + +Self-contained contribution for `SCIBASE-AI/SCIBASE.AI#16`, focused on the boundary between the AI Research Assistant Suite's peer-review, reproducibility, and research-gap capabilities. + +This slice is intentionally not another broad assistant demo. It builds a deterministic planner that turns corpus limitations, prior reproducibility attempts, manuscript evidence coverage, and lab capabilities into reviewer-ready replication priorities. + +## What It Does + +- Generates auto peer-review flags for claim/evidence/citation alignment, statistical risk, and domain-specific omissions. +- Scores project reproducibility readiness from raw data, clean pipelines, environment locks, tests, reported outputs, and previous run attempts. +- Finds under-replicated research intersections with unresolved findings, negative-result signals, and lab-fit evidence. +- Produces a prioritized replication plan with required artifacts, first actions, and a stable audit digest. +- Uses only local synthetic data and Node.js standard library APIs. + +## Requirement Map + +| Issue #16 requirement | Implementation evidence | +| --- | --- | +| Auto peer review reports | `reviewManuscript` emits structured review findings with severity, category, claim id, and domain-specific template checks. | +| Claims vs. evidence alignment | Claims without evidence, citations, or registered support are flagged before release. | +| Statistical or methodological red flags | Small-sample p-value edge cases, missing controls, and missing domain evidence are detected. | +| Reproducibility checker | `scoreReproducibility` checks required artifacts, prior attempts, environment integrity, and output consistency. | +| Reproducibility confidence score | Each project receives a numeric confidence score plus pass/hold status and remediation actions. | +| Links to previous reproducibility attempts | Prior attempt ids and outcomes are included in each report and used in scoring. | +| Research gap finder | `rankResearchOpportunities` ranks under-replicated topic intersections using unresolved findings, negative signals, replication counts, and corpus activity. | +| User opportunity feed | Lab capabilities and researcher interests shape the replication-priority output. | + +See `docs/requirement-map.md` for the detailed checklist. + +## Demo + +```bash +cd research-gap-replication-planner +npm run demo +``` + +Expected summary: + +```text +Planner status: hold +Top opportunity: alzheimer + crispr + single-cell +Top replication decision: prioritize +Top peer-review flag: Claim has no supporting evidence artifact. +Weak reproducibility project: circadian-metabolomics-2026 +Audit hash: de1b1fa44644dc24... +``` + +Visual demo assets: + +- `docs/demo.svg` +- `docs/demo.mp4` + +## Verification + +```bash +cd research-gap-replication-planner +npm run check +``` + +Local verification covers: + +- opportunity ranking +- peer-review flag generation +- reproducibility scoring +- deterministic audit hash generation +- CLI demo output + +## AI-Assisted Disclosure + +This contribution was produced with AI assistance and reviewed before submission. It uses no credentials, no live user data, and no external services. diff --git a/research-gap-replication-planner/docs/demo.mp4 b/research-gap-replication-planner/docs/demo.mp4 new file mode 100644 index 0000000..2557917 Binary files /dev/null and b/research-gap-replication-planner/docs/demo.mp4 differ diff --git a/research-gap-replication-planner/docs/demo.svg b/research-gap-replication-planner/docs/demo.svg new file mode 100644 index 0000000..868f456 --- /dev/null +++ b/research-gap-replication-planner/docs/demo.svg @@ -0,0 +1,16 @@ + + + + Research Gap Replication Planner + Issue #16 AI Research Assistant Suite + + 1. Auto peer-review flags unsupported claims + 2. Reproducibility confidence scores project artifacts + 3. Gap finder ranks under-replicated topic intersections + 4. Replication plan lists required artifacts and first actions + + + Top: CRISPR + Alzheimer + single-cell + + Decision: prioritize + diff --git a/research-gap-replication-planner/docs/requirement-map.md b/research-gap-replication-planner/docs/requirement-map.md new file mode 100644 index 0000000..6e4e6a6 --- /dev/null +++ b/research-gap-replication-planner/docs/requirement-map.md @@ -0,0 +1,13 @@ +# Requirement Map + +| Requirement | Evidence in this module | Verification | +| --- | --- | --- | +| Natural-language review suggestions | `src/planner.js` creates reviewer action text for unsupported claims, missing citations, small samples, and domain-specific omissions. | `test/planner.test.js` checks the highest severity review finding. | +| Clarity and coherence checks | Review packets flag overbroad claims that lack an evidence artifact or citation trail. | `npm test` validates unsupported-claim detection. | +| Statistical or methodological red flags | Small sample sizes near significance thresholds and missing domain controls are flagged. | `npm test` checks statistical risk categories. | +| Claims vs. evidence alignment | Every claim is checked for evidence ids, citation ids, protocol linkage, and method support. | `npm test` checks claim alignment output. | +| Reproducibility confidence | Projects are scored from raw data, clean pipeline, environment lock, tests, reported output, and prior attempts. | `npm test` verifies low-confidence and high-confidence cases. | +| Previous reproducibility attempts | Attempt records are attached to project reports and affect the score. | `npm test` verifies failed attempts reduce confidence. | +| Research gap finder | Corpus findings are grouped by topic intersection and scored for unresolved status, replication count, corpus activity, lab fit, and feasibility. | `npm test` verifies the top ranked topic intersection. | +| Personalized opportunity feed | Lab capabilities and researcher interests are used to shape priority and first actions. | `npm run demo` prints the ranked plan and audit hash. | +| Reviewer-ready evidence | Output includes required artifacts, first actions, gap rationale, and audit digest. | `npm run check` runs tests plus demo. | diff --git a/research-gap-replication-planner/package.json b/research-gap-replication-planner/package.json new file mode 100644 index 0000000..a52f858 --- /dev/null +++ b/research-gap-replication-planner/package.json @@ -0,0 +1,12 @@ +{ + "name": "research-gap-replication-planner", + "version": "0.1.0", + "description": "Deterministic research gap and replication-priority planner for SCIBASE issue #16.", + "type": "module", + "scripts": { + "demo": "node scripts/demo.js", + "test": "node test/planner.test.js", + "check": "npm test && npm run demo" + }, + "license": "MIT" +} diff --git a/research-gap-replication-planner/sample/corpus.json b/research-gap-replication-planner/sample/corpus.json new file mode 100644 index 0000000..1ce8001 --- /dev/null +++ b/research-gap-replication-planner/sample/corpus.json @@ -0,0 +1,109 @@ +{ + "lab": { + "name": "Neurogenomics Replication Lab", + "interests": ["Alzheimer", "CRISPR", "single-cell", "metabolomics"], + "capabilities": ["single-cell", "RNA-seq", "CRISPR perturbation", "metabolomics"], + "methods": ["notebook rerun", "containerized workflow"] + }, + "manuscripts": [ + { + "id": "ms-neuro-042", + "title": "Perturbation response in single-cell Alzheimer models", + "domain": "molecular-biology", + "claims": [ + { + "id": "c1", + "text": "CRISPR perturbation improves cell viability across all donor lines.", + "sampleSize": 18, + "pValue": 0.051, + "evidenceIds": [], + "citationIds": ["doi:10.1000/example-a"] + }, + { + "id": "c2", + "text": "A subtype-specific response appears in microglia-like cells.", + "sampleSize": 64, + "pValue": 0.02, + "evidenceIds": ["notebook:microglia-response"], + "citationIds": [] + } + ] + } + ], + "projects": [ + { + "id": "circadian-metabolomics-2026", + "title": "Circadian metabolomics benchmark", + "dependencyIntegrity": "stale", + "outputConsistency": "drifted", + "artifacts": { + "rawData": true, + "cleanPipeline": true, + "environmentLock": false, + "testSet": false, + "reportedResults": true + } + }, + { + "id": "microglia-rnaseq-2026", + "title": "Microglia RNA-seq replication packet", + "dependencyIntegrity": "locked", + "outputConsistency": "consistent", + "artifacts": { + "rawData": true, + "cleanPipeline": true, + "environmentLock": true, + "testSet": true, + "reportedResults": true + } + } + ], + "reproducibilityAttempts": [ + { + "id": "run-031", + "projectId": "circadian-metabolomics-2026", + "topicId": "finding-crispr-ad", + "outcome": "failed", + "note": "Pipeline required an unavailable R package version." + }, + { + "id": "run-044", + "projectId": "microglia-rnaseq-2026", + "topicId": "finding-crispr-ad", + "outcome": "passed", + "note": "Notebook reran with matching summary metrics." + } + ], + "corpusFindings": [ + { + "id": "finding-crispr-ad", + "title": "CRISPR perturbation in Alzheimer's single-cell models", + "topics": ["CRISPR", "Alzheimer", "single-cell"], + "methods": ["RNA-seq", "CRISPR perturbation"], + "citations": 126, + "activeProjects": 4, + "replicationCount": 1, + "limitation": "Unresolved donor variability and low replication across single-cell CRISPR Alzheimer studies." + }, + { + "id": "finding-sleep-metabolomics", + "title": "Sleep disruption metabolomics in aging cohorts", + "topics": ["sleep", "metabolomics", "aging"], + "methods": ["metabolomics"], + "citations": 38, + "activeProjects": 2, + "replicationCount": 3, + "limitation": "Negative results were rarely reported and methods varied across cohorts." + }, + { + "id": "finding-protein-folding", + "title": "Protein folding benchmark coverage", + "topics": ["protein folding", "benchmark"], + "methods": ["simulation"], + "citations": 14, + "activeProjects": 1, + "replicationCount": 5, + "limitation": "Benchmark appears stable with several successful replications." + } + ] +} diff --git a/research-gap-replication-planner/scripts/demo.js b/research-gap-replication-planner/scripts/demo.js new file mode 100644 index 0000000..279eea6 --- /dev/null +++ b/research-gap-replication-planner/scripts/demo.js @@ -0,0 +1,21 @@ +import fs from "node:fs" +import path from "node:path" +import { fileURLToPath } from "node:url" +import { createResearchAssistantPlan } from "../src/planner.js" + +const __dirname = path.dirname(fileURLToPath(import.meta.url)) +const samplePath = path.join(__dirname, "..", "sample", "corpus.json") +const sample = JSON.parse(fs.readFileSync(samplePath, "utf8")) +const plan = createResearchAssistantPlan(sample) + +const topOpportunity = plan.opportunityFeed[0] +const topPlan = plan.replicationPlans[0] +const topFinding = plan.peerReviewReports[0].findings[0] +const weakProject = plan.reproducibilityReports.find((report) => report.status !== "ready") + +console.log(`Planner status: ${plan.status}`) +console.log(`Top opportunity: ${topOpportunity.topic}`) +console.log(`Top replication decision: ${topPlan.decision}`) +console.log(`Top peer-review flag: ${topFinding.message}`) +console.log(`Weak reproducibility project: ${weakProject.projectId}`) +console.log(`Audit hash: ${plan.auditHash.slice(0, 16)}...`) diff --git a/research-gap-replication-planner/src/planner.js b/research-gap-replication-planner/src/planner.js new file mode 100644 index 0000000..abe254d --- /dev/null +++ b/research-gap-replication-planner/src/planner.js @@ -0,0 +1,272 @@ +import crypto from "node:crypto" + +const REVIEW_FINDING_WEIGHTS = { + critical: 30, + high: 22, + medium: 12, + low: 6, +} + +const REQUIRED_REPRO_ARTIFACTS = [ + "rawData", + "cleanPipeline", + "environmentLock", + "testSet", + "reportedResults", +] + +function clamp(value, min, max) { + return Math.max(min, Math.min(max, value)) +} + +function unique(values) { + return [...new Set(values.filter(Boolean))] +} + +function normalizeText(value) { + return String(value || "").toLowerCase().replace(/[^a-z0-9.+-]+/g, " ").trim() +} + +function hasAny(text, terms) { + const normalized = normalizeText(text) + return terms.some((term) => normalized.includes(normalizeText(term))) +} + +function intersectionCount(left = [], right = []) { + const rightSet = new Set(right.map((item) => normalizeText(item))) + return unique(left.map((item) => normalizeText(item))).filter((item) => rightSet.has(item)).length +} + +function stableJson(value) { + if (Array.isArray(value)) return `[${value.map(stableJson).join(",")}]` + if (value && typeof value === "object") { + return `{${Object.keys(value) + .sort() + .map((key) => `${JSON.stringify(key)}:${stableJson(value[key])}`) + .join(",")}}` + } + return JSON.stringify(value) +} + +function auditHash(value) { + return crypto.createHash("sha256").update(stableJson(value)).digest("hex") +} + +function topicKey(topics = []) { + return unique(topics.map(normalizeText)).sort().join(" + ") +} + +function scoreReproducibility(project, attempts = []) { + const artifacts = project.artifacts || {} + const missingArtifacts = REQUIRED_REPRO_ARTIFACTS.filter((name) => !artifacts[name]) + const projectAttempts = attempts.filter((attempt) => attempt.projectId === project.id) + const failedAttempts = projectAttempts.filter((attempt) => attempt.outcome === "failed") + const successfulAttempts = projectAttempts.filter((attempt) => attempt.outcome === "passed") + const nondeterministicAttempts = projectAttempts.filter((attempt) => attempt.outcome === "nondeterministic") + + const dependencyPenalty = project.dependencyIntegrity === "stale" ? 10 : 0 + const outputPenalty = project.outputConsistency === "drifted" ? 16 : 0 + const confidence = clamp( + 100 - + missingArtifacts.length * 12 - + failedAttempts.length * 14 - + nondeterministicAttempts.length * 10 - + dependencyPenalty - + outputPenalty + + successfulAttempts.length * 4, + 0, + 100, + ) + + const remediation = [] + if (missingArtifacts.length) remediation.push(`Add missing artifacts: ${missingArtifacts.join(", ")}`) + if (failedAttempts.length) remediation.push("Attach failure logs and rerun after environment repair") + if (nondeterministicAttempts.length) remediation.push("Stabilize random seeds and execution order") + if (project.dependencyIntegrity === "stale") remediation.push("Refresh dependency lock and runtime metadata") + if (project.outputConsistency === "drifted") remediation.push("Reconcile reported results against rerun outputs") + + return { + projectId: project.id, + title: project.title, + confidence, + status: confidence >= 75 ? "ready" : confidence >= 55 ? "review-needed" : "hold", + missingArtifacts, + priorAttempts: projectAttempts.map((attempt) => ({ + id: attempt.id, + outcome: attempt.outcome, + note: attempt.note, + })), + remediation, + } +} + +function reviewManuscript(manuscript) { + const findings = [] + for (const claim of manuscript.claims || []) { + if (!claim.evidenceIds?.length) { + findings.push({ + severity: "high", + category: "claims-vs-evidence", + claimId: claim.id, + message: "Claim has no supporting evidence artifact.", + action: "Link the claim to raw data, analysis output, or a reproducibility run before release.", + }) + } + if (!claim.citationIds?.length) { + findings.push({ + severity: "medium", + category: "citation-gap", + claimId: claim.id, + message: "Claim lacks a citation trail.", + action: "Add a citation or mark the claim as a new result with evidence context.", + }) + } + if (claim.sampleSize && claim.sampleSize < 30 && claim.pValue && claim.pValue > 0.045 && claim.pValue <= 0.055) { + findings.push({ + severity: "high", + category: "statistical-risk", + claimId: claim.id, + message: "Small-sample result is close to the significance threshold.", + action: "Add a power analysis, confidence interval, or replication plan.", + }) + } + if (manuscript.domain === "clinical-trials" && !claim.protocolOutcomeId) { + findings.push({ + severity: "medium", + category: "domain-template", + claimId: claim.id, + message: "Clinical claim is not linked to a registered protocol outcome.", + action: "Map the claim to a registered primary or secondary endpoint.", + }) + } + } + + const riskScore = findings.reduce((total, finding) => total + REVIEW_FINDING_WEIGHTS[finding.severity], 0) + findings.sort((a, b) => REVIEW_FINDING_WEIGHTS[b.severity] - REVIEW_FINDING_WEIGHTS[a.severity]) + + return { + manuscriptId: manuscript.id, + title: manuscript.title, + domain: manuscript.domain, + recommendation: riskScore >= 40 ? "hold" : riskScore >= 18 ? "revise" : "ready", + riskScore, + findings, + } +} + +function rankResearchOpportunities(input) { + const lab = input.lab || {} + const interests = lab.interests || [] + const capabilities = [...(lab.capabilities || []), ...(lab.methods || [])] + const attempts = input.reproducibilityAttempts || [] + + return (input.corpusFindings || []) + .map((finding) => { + const topics = finding.topics || [] + const methods = finding.methods || [] + const projectAttempts = attempts.filter((attempt) => attempt.topicId === finding.id) + const failedAttempts = projectAttempts.filter((attempt) => attempt.outcome !== "passed") + const unresolvedScore = hasAny(finding.limitation || "", [ + "unresolved", + "negative", + "null", + "underpowered", + "inconsistent", + "open question", + "low replication", + ]) + ? 22 + : 4 + const replicationScore = clamp((5 - (finding.replicationCount || 0)) * 9, 0, 45) + const activityScore = clamp((finding.citations || 0) / 8 + (finding.activeProjects || 0) * 3, 0, 22) + const fitScore = clamp(intersectionCount([...topics, ...methods], [...interests, ...capabilities]) * 8, 0, 24) + const failedAttemptScore = clamp(failedAttempts.length * 8, 0, 24) + const total = Math.round(unresolvedScore + replicationScore + activityScore + fitScore + failedAttemptScore) + + return { + id: finding.id, + topic: topicKey(topics), + title: finding.title, + score: total, + decision: total >= 80 ? "prioritize" : total >= 55 ? "review" : "watch", + rationale: [ + replicationScore >= 27 ? "low replication count" : null, + unresolvedScore >= 22 ? "unresolved or negative-result signal" : null, + activityScore >= 12 ? "active corpus momentum" : null, + fitScore >= 8 ? "fits lab capabilities" : null, + failedAttemptScore ? "previous reproducibility friction" : null, + ].filter(Boolean), + requiredArtifacts: ["raw data access", "protocol summary", "environment lock", "replication notebook"], + firstActions: [ + `Confirm scope for ${topicKey(topics)}`, + "Request raw data and analysis environment", + "Run minimal replication on a single endpoint", + ], + } + }) + .sort((a, b) => b.score - a.score || a.topic.localeCompare(b.topic)) +} + +function buildReplicationPlans(opportunities, reproducibilityReports) { + const weakestProjects = reproducibilityReports.filter((report) => report.status !== "ready").slice(0, 3) + return opportunities.slice(0, 5).map((opportunity, index) => ({ + opportunityId: opportunity.id, + topic: opportunity.topic, + priority: index + 1, + decision: opportunity.decision, + requiredArtifacts: opportunity.requiredArtifacts, + firstActions: opportunity.firstActions, + pairedReproducibilityRisks: weakestProjects.map((project) => ({ + projectId: project.projectId, + status: project.status, + topRemediation: project.remediation[0] || "No immediate remediation", + })), + })) +} + +function createResearchAssistantPlan(input) { + const peerReviewReports = (input.manuscripts || []).map(reviewManuscript) + const reproducibilityReports = (input.projects || []).map((project) => + scoreReproducibility(project, input.reproducibilityAttempts || []), + ) + const opportunityFeed = rankResearchOpportunities(input) + const replicationPlans = buildReplicationPlans(opportunityFeed, reproducibilityReports) + const status = [ + ...peerReviewReports.map((report) => report.recommendation), + ...reproducibilityReports.map((report) => report.status), + ...opportunityFeed.slice(0, 1).map((item) => item.decision), + ].includes("hold") + ? "hold" + : "review-needed" + + const result = { + status, + peerReviewReports, + reproducibilityReports, + opportunityFeed, + replicationPlans, + requirementCoverage: { + autoPeerReviewReports: peerReviewReports.length > 0, + claimsVsEvidenceAlignment: peerReviewReports.some((report) => + report.findings.some((finding) => finding.category === "claims-vs-evidence"), + ), + reproducibilityConfidence: reproducibilityReports.every((report) => Number.isFinite(report.confidence)), + previousAttemptLinks: reproducibilityReports.some((report) => report.priorAttempts.length > 0), + researchGapFinder: opportunityFeed.length > 0, + personalizedOpportunityFeed: replicationPlans.length > 0, + }, + } + + return { + ...result, + auditHash: auditHash(result), + } +} + +export { + auditHash, + createResearchAssistantPlan, + rankResearchOpportunities, + reviewManuscript, + scoreReproducibility, +} diff --git a/research-gap-replication-planner/test/planner.test.js b/research-gap-replication-planner/test/planner.test.js new file mode 100644 index 0000000..12f7434 --- /dev/null +++ b/research-gap-replication-planner/test/planner.test.js @@ -0,0 +1,48 @@ +import assert from "node:assert/strict" +import fs from "node:fs" +import path from "node:path" +import { fileURLToPath } from "node:url" +import { + auditHash, + createResearchAssistantPlan, + rankResearchOpportunities, + reviewManuscript, + scoreReproducibility, +} from "../src/planner.js" + +const __dirname = path.dirname(fileURLToPath(import.meta.url)) +const sample = JSON.parse(fs.readFileSync(path.join(__dirname, "..", "sample", "corpus.json"), "utf8")) + +const opportunities = rankResearchOpportunities(sample) +assert.equal(opportunities[0].topic, "alzheimer + crispr + single-cell") +assert.equal(opportunities[0].decision, "prioritize") +assert.ok(opportunities[0].rationale.includes("low replication count")) +assert.ok(opportunities[0].rationale.includes("fits lab capabilities")) + +const review = reviewManuscript(sample.manuscripts[0]) +assert.equal(review.recommendation, "hold") +assert.equal(review.findings[0].category, "claims-vs-evidence") +assert.equal(review.findings[0].message, "Claim has no supporting evidence artifact.") +assert.ok(review.findings.some((finding) => finding.category === "statistical-risk")) + +const weakRepro = scoreReproducibility(sample.projects[0], sample.reproducibilityAttempts) +assert.equal(weakRepro.status, "hold") +assert.ok(weakRepro.missingArtifacts.includes("environmentLock")) +assert.ok(weakRepro.priorAttempts.some((attempt) => attempt.outcome === "failed")) + +const strongRepro = scoreReproducibility(sample.projects[1], sample.reproducibilityAttempts) +assert.equal(strongRepro.status, "ready") +assert.equal(strongRepro.missingArtifacts.length, 0) + +const plan = createResearchAssistantPlan(sample) +assert.equal(plan.status, "hold") +assert.equal(plan.requirementCoverage.autoPeerReviewReports, true) +assert.equal(plan.requirementCoverage.claimsVsEvidenceAlignment, true) +assert.equal(plan.requirementCoverage.reproducibilityConfidence, true) +assert.equal(plan.requirementCoverage.previousAttemptLinks, true) +assert.equal(plan.requirementCoverage.researchGapFinder, true) +assert.equal(plan.requirementCoverage.personalizedOpportunityFeed, true) +assert.match(plan.auditHash, /^[a-f0-9]{64}$/) +assert.equal(auditHash({ b: 2, a: 1 }), auditHash({ a: 1, b: 2 })) + +console.log("research-gap-replication-planner tests passed")