Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 27 additions & 0 deletions artifact-quarantine-rerun-governance/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
# Artifact Quarantine Rerun Governance

This module adds a focused Scientific/Engineering Data & Code Hosting slice for artifact safety and reproducible compute readiness. It avoids live storage or container execution and instead produces deterministic reviewer evidence from synthetic project data.

## What it checks

- Scientific artifact type and preview safety for datasets, notebooks, figures, models, and environment specs
- Sensitive data quarantine for human-subjects data, raw identifiers, secrets, and missing de-identification evidence
- Metadata completeness for DOI/DataCite-style discovery and FAIR reuse
- License and access gates before public export
- Pinned executable environment readiness for Docker, Conda, and notebook reruns
- Compute trigger eligibility, including quarantined inputs, stale environments, schedule approval, and budget limits
- Retention and export actions plus a deterministic audit digest

## Run locally

```bash
npm run check
npm test
npm run demo
```

The sample bundle intentionally blocks one patient-level dataset and one scheduled rerun, while allowing a public aggregate table and a locked Python analysis run.

## Demo video

The reviewer demo is at `docs/artifact-quarantine-rerun-demo.mp4`. It shows a terminal-style walkthrough of `npm run check`, `npm test`, and `npm run demo` using the current module output.
23 changes: 23 additions & 0 deletions artifact-quarantine-rerun-governance/demo.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
"use strict";

const sampleBundle = require("./sample-data.json");
const { evaluateHostingGovernance } = require("./src/artifact-quarantine-rerun-governance");

const report = evaluateHostingGovernance(sampleBundle);

console.log(JSON.stringify({
projectId: report.projectId,
decision: report.decision,
fairScore: report.fairScore,
quarantinedArtifacts: report.artifactReports
.filter((artifact) => artifact.decision === "quarantine")
.map((artifact) => artifact.artifactId),
readyComputeTriggers: report.computePlans
.filter((trigger) => trigger.decision === "ready")
.map((trigger) => trigger.triggerId),
blockedComputeTriggers: report.computePlans
.filter((trigger) => trigger.decision === "blocked")
.map((trigger) => trigger.triggerId),
retentionActions: report.retentionActions,
auditDigest: report.auditDigest,
}, null, 2));
Binary file not shown.
15 changes: 15 additions & 0 deletions artifact-quarantine-rerun-governance/docs/requirement-map.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
# Requirement Map

Issue: SCIBASE-AI/SCIBASE.AI#14 - Scientific/Engineering Data & Code Hosting

| Requirement | Implementation |
| --- | --- |
| Support major scientific file types | `classifyArtifact` maps CSV, TSV, XLSX, JSON, Parquet, notebooks, code, figures, model files, and environment specs into stable hosting categories. |
| Metadata-aware previews | `buildPreviewDescriptor` decides safe preview mode, disables previews for secrets and oversized files, and annotates sensitive previews. |
| Upload versioning and diffing | Artifact inputs include `version`, `previousHash`, and `diffSummary`; the audit report surfaces version and retention decisions. |
| JSON-LD, DataCite, schema.org metadata | `evaluateMetadata` checks DOI/identifier, title, creators, publisher, year, resource type, license, keywords, and JSON-LD/schema.org flags. |
| FAIR compliance | `calculateFairScore` converts findings into a deterministic FAIR readiness score and decision. |
| Access control and persistent links | `evaluateArtifact` blocks public export when licenses, access class, or sensitivity evidence are missing. |
| Executable environments | `evaluateEnvironment` validates pinned Docker digests, Conda lock hashes, notebook kernels, network policy, and storage mounts. |
| Sandboxed execution and compute triggers | `evaluateComputeTrigger` checks input quarantine, missing inputs, unpinned environments, budget, approval, and scheduled rerun policy. |
| Reviewer-ready evidence | `evaluateHostingGovernance` returns quarantine decisions, preview descriptors, compute plans, retention/export actions, and a SHA-256 audit digest. |
12 changes: 12 additions & 0 deletions artifact-quarantine-rerun-governance/package.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
{
"name": "artifact-quarantine-rerun-governance",
"version": "1.0.0",
"description": "Sensitive artifact quarantine and executable rerun governance for scientific data/code hosting.",
"main": "src/artifact-quarantine-rerun-governance.js",
"scripts": {
"check": "node --check src/artifact-quarantine-rerun-governance.js && node --check test.js && node --check demo.js",
"test": "node test.js",
"demo": "node demo.js"
},
"license": "MIT"
}
154 changes: 154 additions & 0 deletions artifact-quarantine-rerun-governance/sample-data.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,154 @@
{
"now": "2026-05-15T12:00:00.000Z",
"project": {
"id": "proj-neuro-metabolomics",
"title": "Neuro Metabolomics Replication Pack"
},
"policy": {
"maxArtifactBytes": 10737418240,
"maxInlinePreviewBytes": 10485760,
"maxRunCostCents": 5000
},
"artifacts": [
{
"id": "art-aggregate-csv",
"path": "datasets/aggregate-biomarkers.csv",
"hash": "sha256:7b7f5e4f55a1c2b9",
"previousHash": "sha256:18bbf31a",
"version": "v2",
"sizeBytes": 842190,
"access": "public",
"diffSummary": {
"rowsAdded": 18,
"rowsRemoved": 2,
"columnsChanged": ["batch_id"]
},
"metadata": {
"title": "Aggregate biomarker table",
"creators": ["Nguyen Lab"],
"publisher": "SCIBASE Demo",
"publicationYear": 2026,
"resourceType": "Dataset",
"license": "CC-BY-4.0",
"keywords": ["metabolomics", "replication"],
"doi": "10.5555/scibase.aggregate.2",
"jsonLd": true,
"schemaOrg": true
},
"scan": {
"secretsDetected": 0
}
},
{
"id": "art-patient-tsv",
"path": "restricted/raw-patient-measurements.tsv",
"hash": "sha256:8e9f951bb2df",
"version": "v1",
"sizeBytes": 2351440,
"access": "restricted",
"sensitivity": ["human-subjects", "phi"],
"deidentificationEvidence": "pending",
"metadata": {
"title": "Patient-level raw measurements",
"creators": ["Nguyen Lab"],
"publisher": "SCIBASE Demo",
"publicationYear": 2026,
"resourceType": "Dataset",
"license": "restricted-research-use",
"keywords": ["metabolomics", "clinical"],
"identifier": "uuid:patient-measurements-v1",
"jsonLd": true,
"schemaOrg": false
},
"scan": {
"secretsDetected": 0
}
},
{
"id": "art-analysis-notebook",
"path": "notebooks/reproduce-figures.ipynb",
"hash": "sha256:aa013ff77e",
"version": "v4",
"sizeBytes": 1181120,
"access": "project",
"metadata": {
"title": "Figure reproduction notebook",
"creators": ["Nguyen Lab"],
"publisher": "SCIBASE Demo",
"publicationYear": 2026,
"resourceType": "Software",
"license": "MIT",
"keywords": ["notebook", "figures"],
"identifier": "uuid:notebook-v4",
"jsonLd": true,
"schemaOrg": true
},
"scan": {
"secretsDetected": 0
}
},
{
"id": "art-env-secret",
"path": "config/.env",
"hash": "sha256:deadcafe",
"version": "v1",
"sizeBytes": 1200,
"access": "private",
"metadata": {
"title": "Local development environment variables",
"creators": ["Nguyen Lab"],
"publisher": "SCIBASE Demo",
"publicationYear": 2026,
"resourceType": "Configuration",
"license": "internal",
"keywords": ["configuration"],
"identifier": "uuid:env-secret",
"jsonLd": false,
"schemaOrg": false
},
"scan": {
"secretsDetected": 2
}
}
],
"environments": [
{
"id": "env-python-locked",
"kind": "docker",
"name": "Python 3.12 metabolomics runtime",
"image": "ghcr.io/scibase/demo-python:3.12",
"imageDigest": "sha256:3bf96c7adf",
"networkAccess": false
},
{
"id": "env-r-unpinned",
"kind": "conda",
"name": "R notebook runtime",
"networkAccess": true,
"networkJustification": "pending"
}
],
"computeTriggers": [
{
"id": "run-reproduce-figures",
"environmentId": "env-python-locked",
"inputArtifactIds": ["art-aggregate-csv", "art-analysis-notebook"],
"command": "python scripts/reproduce_figures.py",
"estimatedCostCents": 1400,
"schedule": {
"enabled": false
}
},
{
"id": "run-refresh-clinical-model",
"environmentId": "env-r-unpinned",
"inputArtifactIds": ["art-patient-tsv", "art-env-secret"],
"command": "Rscript models/train.R",
"estimatedCostCents": 7200,
"schedule": {
"enabled": true,
"approved": false
}
}
]
}
Loading