SCIBASE-AI · zhengjynicolas · May 15, 2026
diff --git a/artifact-quarantine-rerun-governance/README.md b/artifact-quarantine-rerun-governance/README.md
@@ -0,0 +1,27 @@
+# Artifact Quarantine Rerun Governance
+
+This module adds a focused Scientific/Engineering Data & Code Hosting slice for artifact safety and reproducible compute readiness. It avoids live storage or container execution and instead produces deterministic reviewer evidence from synthetic project data.
+
+## What it checks
+
+- Scientific artifact type and preview safety for datasets, notebooks, figures, models, and environment specs
+- Sensitive data quarantine for human-subjects data, raw identifiers, secrets, and missing de-identification evidence
+- Metadata completeness for DOI/DataCite-style discovery and FAIR reuse
+- License and access gates before public export
+- Pinned executable environment readiness for Docker, Conda, and notebook reruns
+- Compute trigger eligibility, including quarantined inputs, stale environments, schedule approval, and budget limits
+- Retention and export actions plus a deterministic audit digest
+
+## Run locally
+
+```bash
+npm run check
+npm test
+npm run demo
+```
+
+The sample bundle intentionally blocks one patient-level dataset and one scheduled rerun, while allowing a public aggregate table and a locked Python analysis run.
+
+## Demo video
+
+The reviewer demo is at `docs/artifact-quarantine-rerun-demo.mp4`. It shows a terminal-style walkthrough of `npm run check`, `npm test`, and `npm run demo` using the current module output.
diff --git a/artifact-quarantine-rerun-governance/demo.js b/artifact-quarantine-rerun-governance/demo.js
@@ -0,0 +1,23 @@
+"use strict";
+
+const sampleBundle = require("./sample-data.json");
+const { evaluateHostingGovernance } = require("./src/artifact-quarantine-rerun-governance");
+
+const report = evaluateHostingGovernance(sampleBundle);
+
+console.log(JSON.stringify({
+  projectId: report.projectId,
+  decision: report.decision,
+  fairScore: report.fairScore,
+  quarantinedArtifacts: report.artifactReports
+    .filter((artifact) => artifact.decision === "quarantine")
+    .map((artifact) => artifact.artifactId),
+  readyComputeTriggers: report.computePlans
+    .filter((trigger) => trigger.decision === "ready")
+    .map((trigger) => trigger.triggerId),
+  blockedComputeTriggers: report.computePlans
+    .filter((trigger) => trigger.decision === "blocked")
+    .map((trigger) => trigger.triggerId),
+  retentionActions: report.retentionActions,
+  auditDigest: report.auditDigest,
+}, null, 2));
diff --git a/artifact-quarantine-rerun-governance/docs/artifact-quarantine-rerun-demo.mp4 b/artifact-quarantine-rerun-governance/docs/artifact-quarantine-rerun-demo.mp4
diff --git a/artifact-quarantine-rerun-governance/docs/requirement-map.md b/artifact-quarantine-rerun-governance/docs/requirement-map.md
@@ -0,0 +1,15 @@
+# Requirement Map
+
+Issue: SCIBASE-AI/SCIBASE.AI#14 - Scientific/Engineering Data & Code Hosting
+
+| Requirement | Implementation |
+| --- | --- |
+| Support major scientific file types | `classifyArtifact` maps CSV, TSV, XLSX, JSON, Parquet, notebooks, code, figures, model files, and environment specs into stable hosting categories. |
+| Metadata-aware previews | `buildPreviewDescriptor` decides safe preview mode, disables previews for secrets and oversized files, and annotates sensitive previews. |
+| Upload versioning and diffing | Artifact inputs include `version`, `previousHash`, and `diffSummary`; the audit report surfaces version and retention decisions. |
+| JSON-LD, DataCite, schema.org metadata | `evaluateMetadata` checks DOI/identifier, title, creators, publisher, year, resource type, license, keywords, and JSON-LD/schema.org flags. |
+| FAIR compliance | `calculateFairScore` converts findings into a deterministic FAIR readiness score and decision. |
+| Access control and persistent links | `evaluateArtifact` blocks public export when licenses, access class, or sensitivity evidence are missing. |
+| Executable environments | `evaluateEnvironment` validates pinned Docker digests, Conda lock hashes, notebook kernels, network policy, and storage mounts. |
+| Sandboxed execution and compute triggers | `evaluateComputeTrigger` checks input quarantine, missing inputs, unpinned environments, budget, approval, and scheduled rerun policy. |
+| Reviewer-ready evidence | `evaluateHostingGovernance` returns quarantine decisions, preview descriptors, compute plans, retention/export actions, and a SHA-256 audit digest. |
diff --git a/artifact-quarantine-rerun-governance/package.json b/artifact-quarantine-rerun-governance/package.json
@@ -0,0 +1,12 @@
+{
+  "name": "artifact-quarantine-rerun-governance",
+  "version": "1.0.0",
+  "description": "Sensitive artifact quarantine and executable rerun governance for scientific data/code hosting.",
+  "main": "src/artifact-quarantine-rerun-governance.js",
+  "scripts": {
+    "check": "node --check src/artifact-quarantine-rerun-governance.js && node --check test.js && node --check demo.js",
+    "test": "node test.js",
+    "demo": "node demo.js"
+  },
+  "license": "MIT"
+}
diff --git a/artifact-quarantine-rerun-governance/sample-data.json b/artifact-quarantine-rerun-governance/sample-data.json
@@ -0,0 +1,154 @@
+{
+  "now": "2026-05-15T12:00:00.000Z",
+  "project": {
+    "id": "proj-neuro-metabolomics",
+    "title": "Neuro Metabolomics Replication Pack"
+  },
+  "policy": {
+    "maxArtifactBytes": 10737418240,
+    "maxInlinePreviewBytes": 10485760,
+    "maxRunCostCents": 5000
+  },
+  "artifacts": [
+    {
+      "id": "art-aggregate-csv",
+      "path": "datasets/aggregate-biomarkers.csv",
+      "hash": "sha256:7b7f5e4f55a1c2b9",
+      "previousHash": "sha256:18bbf31a",
+      "version": "v2",
+      "sizeBytes": 842190,
+      "access": "public",
+      "diffSummary": {
+        "rowsAdded": 18,
+        "rowsRemoved": 2,
+        "columnsChanged": ["batch_id"]
+      },
+      "metadata": {
+        "title": "Aggregate biomarker table",
+        "creators": ["Nguyen Lab"],
+        "publisher": "SCIBASE Demo",
+        "publicationYear": 2026,
+        "resourceType": "Dataset",
+        "license": "CC-BY-4.0",
+        "keywords": ["metabolomics", "replication"],
+        "doi": "10.5555/scibase.aggregate.2",
+        "jsonLd": true,
+        "schemaOrg": true
+      },
+      "scan": {
+        "secretsDetected": 0
+      }
+    },
+    {
+      "id": "art-patient-tsv",
+      "path": "restricted/raw-patient-measurements.tsv",
+      "hash": "sha256:8e9f951bb2df",
+      "version": "v1",
+      "sizeBytes": 2351440,
+      "access": "restricted",
+      "sensitivity": ["human-subjects", "phi"],
+      "deidentificationEvidence": "pending",
+      "metadata": {
+        "title": "Patient-level raw measurements",
+        "creators": ["Nguyen Lab"],
+        "publisher": "SCIBASE Demo",
+        "publicationYear": 2026,
+        "resourceType": "Dataset",
+        "license": "restricted-research-use",
+        "keywords": ["metabolomics", "clinical"],
+        "identifier": "uuid:patient-measurements-v1",
+        "jsonLd": true,
+        "schemaOrg": false
+      },
+      "scan": {
+        "secretsDetected": 0
+      }
+    },
+    {
+      "id": "art-analysis-notebook",
+      "path": "notebooks/reproduce-figures.ipynb",
+      "hash": "sha256:aa013ff77e",
+      "version": "v4",
+      "sizeBytes": 1181120,
+      "access": "project",
+      "metadata": {
+        "title": "Figure reproduction notebook",
+        "creators": ["Nguyen Lab"],
+        "publisher": "SCIBASE Demo",
+        "publicationYear": 2026,
+        "resourceType": "Software",
+        "license": "MIT",
+        "keywords": ["notebook", "figures"],
+        "identifier": "uuid:notebook-v4",
+        "jsonLd": true,
+        "schemaOrg": true
+      },
+      "scan": {
+        "secretsDetected": 0
+      }
+    },
+    {
+      "id": "art-env-secret",
+      "path": "config/.env",
+      "hash": "sha256:deadcafe",
+      "version": "v1",
+      "sizeBytes": 1200,
+      "access": "private",
+      "metadata": {
+        "title": "Local development environment variables",
+        "creators": ["Nguyen Lab"],
+        "publisher": "SCIBASE Demo",
+        "publicationYear": 2026,
+        "resourceType": "Configuration",
+        "license": "internal",
+        "keywords": ["configuration"],
+        "identifier": "uuid:env-secret",
+        "jsonLd": false,
+        "schemaOrg": false
+      },
+      "scan": {
+        "secretsDetected": 2
+      }
+    }
+  ],
+  "environments": [
+    {
+      "id": "env-python-locked",
+      "kind": "docker",
+      "name": "Python 3.12 metabolomics runtime",
+      "image": "ghcr.io/scibase/demo-python:3.12",
+      "imageDigest": "sha256:3bf96c7adf",
+      "networkAccess": false
+    },
+    {
+      "id": "env-r-unpinned",
+      "kind": "conda",
+      "name": "R notebook runtime",
+      "networkAccess": true,
+      "networkJustification": "pending"
+    }
+  ],
+  "computeTriggers": [
+    {
+      "id": "run-reproduce-figures",
+      "environmentId": "env-python-locked",
+      "inputArtifactIds": ["art-aggregate-csv", "art-analysis-notebook"],
+      "command": "python scripts/reproduce_figures.py",
+      "estimatedCostCents": 1400,
+      "schedule": {
+        "enabled": false
+      }
+    },
+    {
+      "id": "run-refresh-clinical-model",
+      "environmentId": "env-r-unpinned",
+      "inputArtifactIds": ["art-patient-tsv", "art-env-secret"],
+      "command": "Rscript models/train.R",
+      "estimatedCostCents": 7200,
+      "schedule": {
+        "enabled": true,
+        "approved": false
+      }
+    }
+  ]
+}