diff --git a/review-calibration-bench/README.md b/review-calibration-bench/README.md
new file mode 100644
index 0000000..95b0d70
--- /dev/null
+++ b/review-calibration-bench/README.md
@@ -0,0 +1,35 @@
+# Review Calibration Bench
+
+Dependency-free peer-review calibration and coaching signals for the Community & Reputation System bounty.
+
+This module focuses on the quality gate before peer-review activity increases a researcher's public reputation. It compares structured review rubric scores against consensus panels, identifies leniency/severity drift, flags reproducibility blind spots, and emits transparent trust-adjustment and coaching actions.
+
+## Run
+
+```bash
+npm run check
+npm test
+npm run demo
+```
+
+## Demo Output
+
+```text
+Program: community-review-q2-2026
+Status: coaching-needed
+Reviewers calibrated: 3
+Coaching actions: 7
+Quarantined reviewers: 1
+Top reviewer: ada (trusted-reviewer)
+Top action: Review practice set: rigor scores are higher than consensus.
+```
+
+## Files
+
+- `src/review-calibration.js` builds calibration reports, leaderboard scores, coaching actions, trust adjustments, dashboard summary, audit trail, and stable digest.
+- `data/sample-calibration.json` contains synthetic structured reviews, consensus panels, reviewer modes, and contributor signals.
+- `test/review-calibration.test.js` verifies calibration scores, drift classification, blind-spot detection, quarantine behavior, leaderboard ordering, and digest stability.
+- `docs/requirement-map.md` maps this slice to issue #15.
+- `docs/demo.svg` and `docs/demo.mp4` provide a short visual artifact for review.
+
+No real private review content, identity secret, or external service credential is used.
diff --git a/review-calibration-bench/data/sample-calibration.json b/review-calibration-bench/data/sample-calibration.json
new file mode 100644
index 0000000..915de21
--- /dev/null
+++ b/review-calibration-bench/data/sample-calibration.json
@@ -0,0 +1,171 @@
+{
+  "programId": "community-review-q2-2026",
+  "asOf": "2026-05-15T00:00:00Z",
+  "rubric": {
+    "criteria": [
+      "clarity",
+      "rigor",
+      "novelty",
+      "reproducibility"
+    ],
+    "weights": {
+      "clarity": 0.2,
+      "rigor": 0.3,
+      "novelty": 0.2,
+      "reproducibility": 0.3
+    },
+    "calibrationThreshold": 0.7
+  },
+  "projects": [
+    {
+      "id": "project-organoid-benchmark",
+      "domain": "biology",
+      "consensus": {
+        "clarity": 4,
+        "rigor": 4,
+        "novelty": 3,
+        "reproducibility": 2
+      }
+    },
+    {
+      "id": "project-graph-protocol",
+      "domain": "computational-science",
+      "consensus": {
+        "clarity": 3,
+        "rigor": 5,
+        "novelty": 4,
+        "reproducibility": 4
+      }
+    },
+    {
+      "id": "project-open-dataset-release",
+      "domain": "data-science",
+      "consensus": {
+        "clarity": 5,
+        "rigor": 3,
+        "novelty": 2,
+        "reproducibility": 5
+      }
+    }
+  ],
+  "reviews": [
+    {
+      "id": "review-ada-1",
+      "reviewerId": "ada",
+      "projectId": "project-organoid-benchmark",
+      "mode": "public",
+      "scores": {
+        "clarity": 4,
+        "rigor": 4,
+        "novelty": 3,
+        "reproducibility": 3
+      },
+      "comments": [
+        "Strong protocol trace, but execution container still needs one replication note."
+      ]
+    },
+    {
+      "id": "review-ada-2",
+      "reviewerId": "ada",
+      "projectId": "project-graph-protocol",
+      "mode": "public",
+      "scores": {
+        "clarity": 3,
+        "rigor": 4,
+        "novelty": 4,
+        "reproducibility": 4
+      },
+      "comments": [
+        "Good evidence paths and adequate reproducibility metadata."
+      ]
+    },
+    {
+      "id": "review-bohr-1",
+      "reviewerId": "bohr",
+      "projectId": "project-organoid-benchmark",
+      "mode": "anonymous",
+      "scores": {
+        "clarity": 5,
+        "rigor": 5,
+        "novelty": 5,
+        "reproducibility": 5
+      },
+      "comments": [
+        "Excellent across all dimensions."
+      ]
+    },
+    {
+      "id": "review-bohr-2",
+      "reviewerId": "bohr",
+      "projectId": "project-open-dataset-release",
+      "mode": "anonymous",
+      "scores": {
+        "clarity": 5,
+        "rigor": 5,
+        "novelty": 4,
+        "reproducibility": 5
+      },
+      "comments": [
+        "Dataset appears ready for reuse."
+      ]
+    },
+    {
+      "id": "review-curie-1",
+      "reviewerId": "curie",
+      "projectId": "project-graph-protocol",
+      "mode": "double-blind",
+      "scores": {
+        "clarity": 2,
+        "rigor": 4,
+        "novelty": 3,
+        "reproducibility": 2
+      },
+      "comments": [
+        "Promising, but missing a complete notebook execution trail."
+      ]
+    },
+    {
+      "id": "review-curie-2",
+      "reviewerId": "curie",
+      "projectId": "project-open-dataset-release",
+      "mode": "double-blind",
+      "scores": {
+        "clarity": 4,
+        "rigor": 3,
+        "novelty": 2,
+        "reproducibility": 3
+      },
+      "comments": [
+        "Good data dictionary, but independent rerun evidence is incomplete."
+      ]
+    }
+  ],
+  "contributors": [
+    {
+      "reviewerId": "ada",
+      "roles": [
+        "review",
+        "validation"
+      ],
+      "completedBounties": 2,
+      "endorsements": 5
+    },
+    {
+      "reviewerId": "bohr",
+      "roles": [
+        "review"
+      ],
+      "completedBounties": 4,
+      "endorsements": 8
+    },
+    {
+      "reviewerId": "curie",
+      "roles": [
+        "review",
+        "reproducibility"
+      ],
+      "completedBounties": 1,
+      "endorsements": 3
+    }
+  ]
+}
diff --git a/review-calibration-bench/docs/demo.mp4 b/review-calibration-bench/docs/demo.mp4
new file mode 100644
index 0000000..e9f885b
Binary files /dev/null and b/review-calibration-bench/docs/demo.mp4 differ
diff --git a/review-calibration-bench/docs/demo.svg b/review-calibration-bench/docs/demo.svg
new file mode 100644
index 0000000..9ac20e1
--- /dev/null
+++ b/review-calibration-bench/docs/demo.svg
@@ -0,0 +1,29 @@
+<svg xmlns="http://www.w3.org/2000/svg" width="1280" height="720" viewBox="0 0 1280 720" role="img" aria-labelledby="title desc">
+  <title id="title">Review calibration bench demo</title>
+  <desc id="desc">Dashboard-style summary of peer-review calibration, coaching queue, reviewer quarantine, and trusted reviewer tier.</desc>
+  <rect width="1280" height="720" fill="#101827"/>
+  <rect x="56" y="48" width="1168" height="624" rx="18" fill="#0f172a" stroke="#334155" stroke-width="2"/>
+  <text x="96" y="118" fill="#f8fafc" font-family="Arial, sans-serif" font-size="42" font-weight="700">Review Calibration Bench</text>
+  <text x="96" y="158" fill="#94a3b8" font-family="Arial, sans-serif" font-size="24">Structured peer-review quality signals before reputation credit</text>
+  <g transform="translate(96 220)">
+    <rect width="320" height="140" rx="12" fill="#111827" stroke="#1e293b"/>
+    <text x="24" y="48" fill="#38bdf8" font-family="Arial, sans-serif" font-size="22" font-weight="700">Reviewers</text>
+    <text x="24" y="98" fill="#f8fafc" font-family="Arial, sans-serif" font-size="40" font-weight="700">3 calibrated</text>
+  </g>
+  <g transform="translate(480 220)">
+    <rect width="320" height="140" rx="12" fill="#111827" stroke="#1e293b"/>
+    <text x="24" y="48" fill="#f59e0b" font-family="Arial, sans-serif" font-size="22" font-weight="700">Coaching</text>
+    <text x="24" y="98" fill="#f8fafc" font-family="Arial, sans-serif" font-size="40" font-weight="700">7 actions</text>
+  </g>
+  <g transform="translate(864 220)">
+    <rect width="320" height="140" rx="12" fill="#111827" stroke="#1e293b"/>
+    <text x="24" y="48" fill="#ef4444" font-family="Arial, sans-serif" font-size="22" font-weight="700">Quarantine</text>
+    <text x="24" y="98" fill="#f8fafc" font-family="Arial, sans-serif" font-size="40" font-weight="700">1 reviewer</text>
+  </g>
+  <g transform="translate(96 430)">
+    <rect width="1088" height="168" rx="12" fill="#111827" stroke="#1e293b"/>
+    <text x="28" y="48" fill="#e5e7eb" font-family="Arial, sans-serif" font-size="26" font-weight="700">Top action</text>
+    <text x="28" y="96" fill="#f8fafc" font-family="Arial, sans-serif" font-size="34" font-weight="700">Review practice set: rigor scores are higher than consensus.</text>
+    <text x="28" y="136" fill="#94a3b8" font-family="Arial, sans-serif" font-size="22">Transparent trust adjustment · reproducibility blind spot detected</text>
+  </g>
+</svg>
diff --git a/review-calibration-bench/docs/requirement-map.md b/review-calibration-bench/docs/requirement-map.md
new file mode 100644
index 0000000..1feda74
--- /dev/null
+++ b/review-calibration-bench/docs/requirement-map.md
@@ -0,0 +1,30 @@
+# Requirement Map
+
+This module contributes a focused structured-review quality layer for issue #15, "Community & Reputation System."
+
+| Issue area | Covered by this module |
+| --- | --- |
+| Peer reviews and comments | Scores structured reviews against discipline-neutral rubric criteria: clarity, rigor, novelty, reproducibility |
+| Optional scoring quality | Compares reviewer scores against consensus panels and identifies systematic leniency, severity, and inconsistency |
+| Review history on profiles | Builds reviewer calibration reports, modes used, review counts, and audit-trail events |
+| Contributor credits | Includes CRediT-style reviewer roles and completed bounty / endorsement contribution signals |
+| Reputation scoring | Emits transparent trust adjustments, tiers, and quarantine decisions when calibration is weak |
+| Incentive tiers | Produces trusted reviewer, calibrated reviewer, coaching-needed, and mentor-required tiers |
+
+## Distinctness
+
+Existing #15 submissions cover broad community reputation ledgers, CRediT graphs, badges, leaderboards, abuse detection, and appeals. This module focuses on the quality gate before peer-review activity increases reputation:
+
+- Does the reviewer score close to consensus?
+- Is a reviewer systematically too lenient or too severe?
+- Does a reviewer overlook reproducibility?
+- Should the review count toward reputation immediately, or enter a coaching queue first?
+
+## Verification
+
+```bash
+cd review-calibration-bench
+npm run check
+npm test
+npm run demo
+```
diff --git a/review-calibration-bench/package.json b/review-calibration-bench/package.json
new file mode 100644
index 0000000..286aa01
--- /dev/null
+++ b/review-calibration-bench/package.json
@@ -0,0 +1,18 @@
+{
+  "name": "review-calibration-bench",
+  "version": "1.0.0",
+  "private": true,
+  "description": "Dependency-free peer-review calibration and coaching signals for scientific reputation systems.",
+  "scripts": {
+    "check": "node --check src/review-calibration.js && node --check scripts/demo.js && node --check test/review-calibration.test.js",
+    "demo": "node scripts/demo.js",
+    "test": "node test/review-calibration.test.js"
+  },
+  "keywords": [
+    "peer-review",
+    "reputation",
+    "calibration",
+    "review-quality"
+  ],
+  "license": "MIT"
+}
diff --git a/review-calibration-bench/scripts/demo.js b/review-calibration-bench/scripts/demo.js
new file mode 100644
index 0000000..30a9312
--- /dev/null
+++ b/review-calibration-bench/scripts/demo.js
@@ -0,0 +1,16 @@
+const fs = require("node:fs");
+const path = require("node:path");
+const { buildCalibrationBench } = require("../src/review-calibration");
+
+const samplePath = path.join(__dirname, "..", "data", "sample-calibration.json");
+const input = JSON.parse(fs.readFileSync(samplePath, "utf8"));
+const report = buildCalibrationBench(input);
+
+console.log(`Program: ${report.programId}`);
+console.log(`Status: ${report.dashboard.status}`);
+console.log(`Reviewers calibrated: ${report.dashboard.reviewerCount}`);
+console.log(`Coaching actions: ${report.dashboard.coachingActionCount}`);
+console.log(`Quarantined reviewers: ${report.dashboard.quarantinedReviewerCount}`);
+console.log(`Top reviewer: ${report.leaderboard[0].reviewerId} (${report.leaderboard[0].tier})`);
+console.log(`Top action: ${report.dashboard.topAction}`);
+console.log(`Digest: ${report.digest}`);
diff --git a/review-calibration-bench/src/review-calibration.js b/review-calibration-bench/src/review-calibration.js
new file mode 100644
index 0000000..d09a9d7
--- /dev/null
+++ b/review-calibration-bench/src/review-calibration.js
@@ -0,0 +1,274 @@
+const crypto = require("node:crypto");
+
+function buildCalibrationBench(input) {
+  const validation = validateCalibrationInput(input);
+  const projectIndex = new Map((input.projects || []).map((project) => [project.id, project]));
+  const reviewerGroups = groupReviewsByReviewer(input.reviews || []);
+  const reviewerReports = Array.from(reviewerGroups.entries()).map(([reviewerId, reviews]) =>
+    evaluateReviewer(reviewerId, reviews, projectIndex, input.rubric || {}, input.contributors || [])
+  );
+  const leaderboard = buildLeaderboard(reviewerReports);
+  const coachingQueue = buildCoachingQueue(reviewerReports);
+  const trustAdjustments = buildTrustAdjustments(reviewerReports, input.rubric || {});
+  const dashboard = buildDashboard(input, reviewerReports, coachingQueue, trustAdjustments);
+
+  const report = {
+    programId: input.programId,
+    asOf: input.asOf,
+    validation,
+    reviewerReports,
+    leaderboard,
+    coachingQueue,
+    trustAdjustments,
+    dashboard,
+    auditTrail: buildAuditTrail(input, reviewerReports, coachingQueue, trustAdjustments)
+  };
+
+  report.digest = stableDigest(report);
+  return report;
+}
+
+function validateCalibrationInput(input) {
+  const required = [
+    ["programId", input.programId],
+    ["rubric.criteria", input.rubric && (input.rubric.criteria || []).length],
+    ["projects", (input.projects || []).length],
+    ["reviews", (input.reviews || []).length]
+  ];
+  const missing = required.filter(([, value]) => !value).map(([field]) => field);
+  const reviewIssues = (input.reviews || []).flatMap((review) => {
+    const issues = [];
+    if (!review.id) issues.push("review.id");
+    if (!review.reviewerId) issues.push(      `${review.id || "unknown"}.reviewerId`
+    );
+    if (!review.projectId) issues.push(`${review.id || "unknown"}.projectId`);
+    if (!review.scores) issues.push(`${review.id || "unknown"}.scores`);
+    return issues;
+  });
+
+  return {
+    status: missing.length === 0 && reviewIssues.length === 0 ? "passed" : "incomplete",
+    score: Math.max(0, 100 - missing.length * 15 - reviewIssues.length * 5),
+    missing,
+    reviewIssues
+  };
+}
+
+function evaluateReviewer(reviewerId, reviews, projectIndex, rubric, contributors) {
+  const criteria = rubric.criteria || [];
+  const deltas = reviews.flatMap((review) => {
+    const project = projectIndex.get(review.projectId);
+    if (!project) return [];
+    return criteria.map((criterion) => ({
+      reviewId: review.id,
+      projectId: review.projectId,
+      criterion,
+      score: Number(review.scores[criterion] || 0),
+      consensus: Number(project.consensus[criterion] || 0),
+      delta: Number(review.scores[criterion] || 0) - Number(project.consensus[criterion] || 0)
+    }));
+  });
+  const byCriterion = criteria.map((criterion) => {
+    const criterionDeltas = deltas.filter((item) => item.criterion === criterion);
+    const averageDelta = average(criterionDeltas.map((item) => item.delta));
+    const meanAbsoluteError = average(criterionDeltas.map((item) => Math.abs(item.delta)));
+    return {
+      criterion,
+      averageDelta: round(averageDelta),
+      meanAbsoluteError: round(meanAbsoluteError),
+      drift: classifyDrift(averageDelta, meanAbsoluteError)
+    };
+  });
+  const weightedError = weightedMean(byCriterion, rubric.weights || {});
+  const calibrationScore = round(Math.max(0, 1 - weightedError / 4));
+  const reproducibilityBlindSpot = byCriterion.find((item) => item.criterion === "reproducibility" && item.averageDelta > 0.75);
+  const contributor = contributors.find((item) => item.reviewerId === reviewerId) || {};
+  const reputationSignal = buildReputationSignal(calibrationScore, contributor, reproducibilityBlindSpot);
+
+  return {
+    reviewerId,
+    reviewCount: reviews.length,
+    modes: Array.from(new Set(reviews.map((review) => review.mode))),
+    byCriterion,
+    weightedError: round(weightedError),
+    calibrationScore,
+    reproducibilityBlindSpot: Boolean(reproducibilityBlindSpot),
+    reputationSignal,
+    coachingActions: buildReviewerCoachingActions(reviewerId, byCriterion, calibrationScore, reproducibilityBlindSpot)
+  };
+}
+
+function buildReputationSignal(calibrationScore, contributor, reproducibilityBlindSpot) {
+  const contributionBonus = Math.min(0.12, (contributor.completedBounties || 0) * 0.02 + (contributor.endorsements || 0) * 0.005);
+  const blindSpotPenalty = reproducibilityBlindSpot ? 0.08 : 0;
+  const calibratedScore = round(Math.max(0, Math.min(1, calibrationScore + contributionBonus - blindSpotPenalty)));
+  return {
+    calibratedScore,
+    contributionBonus: round(contributionBonus),
+    blindSpotPenalty,
+    roles: contributor.roles || [],
+    tier: chooseTier(calibratedScore)
+  };
+}
+
+function buildReviewerCoachingActions(reviewerId, byCriterion, calibrationScore, reproducibilityBlindSpot) {
+  const actions = [];
+  for (const item of byCriterion) {
+    if (item.drift === "lenient") {
+      actions.push({
+        reviewerId,
+        type: "leniency-calibration",
+        criterion: item.criterion,
+        message: `Review practice set: ${item.criterion} scores are higher than consensus.`
+      });
+    }
+    if (item.drift === "severe") {
+      actions.push({
+        reviewerId,
+        type: "severity-calibration",
+        criterion: item.criterion,
+        message: `Review practice set: ${item.criterion} scores are lower than consensus.`
+      });
+    }
+  }
+  if (reproducibilityBlindSpot) {
+    actions.push({
+      reviewerId,
+      type: "reproducibility-blind-spot",
+      criterion: "reproducibility",
+      message: "Add reproducibility evidence checklist before assigning Trusted Reviewer status."
+    });
+  }
+  if (calibrationScore < 0.7) {
+    actions.push({
+      reviewerId,
+      type: "mentor-review",
+      criterion: "overall",
+      message: "Pair with a calibrated reviewer for the next structured peer review."
+    });
+  }
+  return actions;
+}
+
+function buildLeaderboard(reviewerReports) {
+  return reviewerReports
+    .map((report) => ({
+      reviewerId: report.reviewerId,
+      calibrationScore: report.calibrationScore,
+      trustScore: report.reputationSignal.calibratedScore,
+      tier: report.reputationSignal.tier,
+      reviewCount: report.reviewCount
+    }))
+    .sort((a, b) => b.trustScore - a.trustScore || b.calibrationScore - a.calibrationScore);
+}
+
+function buildCoachingQueue(reviewerReports) {
+  return reviewerReports.flatMap((report) => report.coachingActions);
+}
+
+function buildTrustAdjustments(reviewerReports, rubric) {
+  const threshold = Number(rubric.calibrationThreshold || 0.65);
+  return reviewerReports.map((report) => ({
+    reviewerId: report.reviewerId,
+    status: report.calibrationScore >= threshold ? "eligible" : "quarantine-until-coached",
+    calibrationScore: report.calibrationScore,
+    trustScore: report.reputationSignal.calibratedScore,
+    tier: report.reputationSignal.tier,
+    reason: report.calibrationScore >= threshold
+      ? "Structured review scores are close enough to consensus for reputation credit."
+      : "Review scores require calibration before they increase public reputation."
+  }));
+}
+
+function buildDashboard(input, reviewerReports, coachingQueue, trustAdjustments) {
+  const quarantined = trustAdjustments.filter((item) => item.status === "quarantine-until-coached");
+  return {
+    title: `Review calibration ${input.programId}`,
+    status: coachingQueue.length === 0 ? "ready-for-reputation-credit" : "coaching-needed",
+    reviewerCount: reviewerReports.length,
+    coachingActionCount: coachingQueue.length,
+    quarantinedReviewerCount: quarantined.length,
+    topAction: coachingQueue[0] ? coachingQueue[0].message : "Publish calibrated reputation scores."
+  };
+}
+
+function buildAuditTrail(input, reviewerReports, coachingQueue, trustAdjustments) {
+  return [
+    {
+      type: "calibration-built",
+      programId: input.programId,
+      reviewerCount: reviewerReports.length,
+      coachingActionCount: coachingQueue.length
+    },
+    ...reviewerReports.map((report) => ({
+      type: "reviewer-scored",
+      reviewerId: report.reviewerId,
+      calibrationScore: report.calibrationScore,
+      tier: report.reputationSignal.tier
+    })),
+    ...trustAdjustments.map((adjustment) => ({
+      type: "trust-adjustment",
+      reviewerId: adjustment.reviewerId,
+      status: adjustment.status,
+      trustScore: adjustment.trustScore
+    }))
+  ];
+}
+
+function groupReviewsByReviewer(reviews) {
+  const groups = new Map();
+  for (const review of reviews) {
+    const current = groups.get(review.reviewerId) || [];
+    current.push(review);
+    groups.set(review.reviewerId, current);
+  }
+  return groups;
+}
+
+function weightedMean(byCriterion, weights) {
+  const totalWeight = byCriterion.reduce((sum, item) => sum + Number(weights[item.criterion] || 1), 0);
+  return byCriterion.reduce((sum, item) => sum + item.meanAbsoluteError * Number(weights[item.criterion] || 1), 0) / totalWeight;
+}
+
+function classifyDrift(averageDelta, meanAbsoluteError) {
+  if (averageDelta >= 0.75 && meanAbsoluteError >= 0.75) return "lenient";
+  if (averageDelta <= -0.75 && meanAbsoluteError >= 0.75) return "severe";
+  if (meanAbsoluteError >= 1.25) return "inconsistent";
+  return "calibrated";
+}
+
+function chooseTier(score) {
+  if (score >= 0.88) return "trusted-reviewer";
+  if (score >= 0.75) return "calibrated-reviewer";
+  if (score >= 0.6) return "needs-light-coaching";
+  return "mentor-required";
+}
+
+function average(values) {
+  if (values.length === 0) return 0;
+  return values.reduce((sum, value) => sum + value, 0) / values.length;
+}
+
+function round(value) {
+  return Math.round((value + Number.EPSILON) * 1000) / 1000;
+}
+
+function stableDigest(value) {
+  return crypto.createHash("sha256").update(stableStringify(value)).digest("hex");
+}
+
+function stableStringify(value) {
+  if (Array.isArray(value)) return `[${value.map(stableStringify).join(",")}]`;
+  if (value && typeof value === "object") {
+    return `{${Object.keys(value).sort().map((key) => `${JSON.stringify(key)}:${stableStringify(value[key])}`).join(",")} }`.replace(' }', '}');
+  }
+  return JSON.stringify(value);
+}
+
+module.exports = {
+  buildCalibrationBench,
+  validateCalibrationInput,
+  evaluateReviewer,
+  classifyDrift,
+  stableDigest
+};
diff --git a/review-calibration-bench/test/review-calibration.test.js b/review-calibration-bench/test/review-calibration.test.js
new file mode 100644
index 0000000..8523310
--- /dev/null
+++ b/review-calibration-bench/test/review-calibration.test.js
@@ -0,0 +1,49 @@
+const assert = require("node:assert/strict");
+const fs = require("node:fs");
+const path = require("node:path");
+const {
+  buildCalibrationBench,
+  classifyDrift,
+  validateCalibrationInput
+} = require("../src/review-calibration");
+
+const samplePath = path.join(__dirname, "..", "data", "sample-calibration.json");
+const input = JSON.parse(fs.readFileSync(samplePath, "utf8"));
+const report = buildCalibrationBench(input);
+
+assert.equal(report.validation.status, "passed");
+assert.equal(report.reviewerReports.length, 3);
+assert.equal(report.dashboard.status, "coaching-needed");
+assert.equal(report.dashboard.reviewerCount, 3);
+assert.equal(report.dashboard.quarantinedReviewerCount, 1);
+assert.ok(report.coachingQueue.some((action) => action.type === "reproducibility-blind-spot"));
+
+const ada = report.reviewerReports.find((item) => item.reviewerId === "ada");
+assert.equal(ada.calibrationScore, 0.925);
+assert.equal(ada.reputationSignal.tier, "trusted-reviewer");
+assert.equal(ada.coachingActions.length, 0);
+
+const bohr = report.reviewerReports.find((item) => item.reviewerId === "bohr");
+assert.equal(bohr.reproducibilityBlindSpot, true);
+assert.equal(bohr.reputationSignal.tier, "needs-light-coaching");
+assert.ok(bohr.coachingActions.some((action) => action.type === "leniency-calibration"));
+assert.ok(bohr.coachingActions.some((action) => action.type === "reproducibility-blind-spot"));
+
+const curie = report.reviewerReports.find((item) => item.reviewerId === "curie");
+assert.equal(curie.reputationSignal.tier, "calibrated-reviewer");
+assert.ok(curie.coachingActions.some((action) => action.type === "severity-calibration"));
+
+assert.equal(report.leaderboard[0].reviewerId, "ada");
+assert.equal(report.trustAdjustments.find((item) => item.reviewerId === "bohr").status, "quarantine-until-coached");
+assert.equal(report.digest, buildCalibrationBench(input).digest);
+
+assert.equal(classifyDrift(1, 1), "lenient");
+assert.equal(classifyDrift(-1, 1), "severe");
+assert.equal(classifyDrift(0.2, 1.5), "inconsistent");
+assert.equal(classifyDrift(0.2, 0.3), "calibrated");
+
+const incomplete = validateCalibrationInput({ programId: "draft" });
+assert.equal(incomplete.status, "incomplete");
+assert.ok(incomplete.missing.includes("rubric.criteria"));
+
+console.log("review-calibration-bench tests passed");