diff --git a/artifact-quarantine-rerun-governance/README.md b/artifact-quarantine-rerun-governance/README.md new file mode 100644 index 0000000..ad472a7 --- /dev/null +++ b/artifact-quarantine-rerun-governance/README.md @@ -0,0 +1,27 @@ +# Artifact Quarantine Rerun Governance + +This module adds a focused Scientific/Engineering Data & Code Hosting slice for artifact safety and reproducible compute readiness. It avoids live storage or container execution and instead produces deterministic reviewer evidence from synthetic project data. + +## What it checks + +- Scientific artifact type and preview safety for datasets, notebooks, figures, models, and environment specs +- Sensitive data quarantine for human-subjects data, raw identifiers, secrets, and missing de-identification evidence +- Metadata completeness for DOI/DataCite-style discovery and FAIR reuse +- License and access gates before public export +- Pinned executable environment readiness for Docker, Conda, and notebook reruns +- Compute trigger eligibility, including quarantined inputs, stale environments, schedule approval, and budget limits +- Retention and export actions plus a deterministic audit digest + +## Run locally + +```bash +npm run check +npm test +npm run demo +``` + +The sample bundle intentionally blocks one patient-level dataset and one scheduled rerun, while allowing a public aggregate table and a locked Python analysis run. + +## Demo video + +The reviewer demo is at `docs/artifact-quarantine-rerun-demo.mp4`. It shows a terminal-style walkthrough of `npm run check`, `npm test`, and `npm run demo` using the current module output. diff --git a/artifact-quarantine-rerun-governance/demo.js b/artifact-quarantine-rerun-governance/demo.js new file mode 100644 index 0000000..9beb659 --- /dev/null +++ b/artifact-quarantine-rerun-governance/demo.js @@ -0,0 +1,23 @@ +"use strict"; + +const sampleBundle = require("./sample-data.json"); +const { evaluateHostingGovernance } = require("./src/artifact-quarantine-rerun-governance"); + +const report = evaluateHostingGovernance(sampleBundle); + +console.log(JSON.stringify({ + projectId: report.projectId, + decision: report.decision, + fairScore: report.fairScore, + quarantinedArtifacts: report.artifactReports + .filter((artifact) => artifact.decision === "quarantine") + .map((artifact) => artifact.artifactId), + readyComputeTriggers: report.computePlans + .filter((trigger) => trigger.decision === "ready") + .map((trigger) => trigger.triggerId), + blockedComputeTriggers: report.computePlans + .filter((trigger) => trigger.decision === "blocked") + .map((trigger) => trigger.triggerId), + retentionActions: report.retentionActions, + auditDigest: report.auditDigest, +}, null, 2)); diff --git a/artifact-quarantine-rerun-governance/docs/artifact-quarantine-rerun-demo.mp4 b/artifact-quarantine-rerun-governance/docs/artifact-quarantine-rerun-demo.mp4 new file mode 100644 index 0000000..5ec51ed Binary files /dev/null and b/artifact-quarantine-rerun-governance/docs/artifact-quarantine-rerun-demo.mp4 differ diff --git a/artifact-quarantine-rerun-governance/docs/requirement-map.md b/artifact-quarantine-rerun-governance/docs/requirement-map.md new file mode 100644 index 0000000..274ae51 --- /dev/null +++ b/artifact-quarantine-rerun-governance/docs/requirement-map.md @@ -0,0 +1,15 @@ +# Requirement Map + +Issue: SCIBASE-AI/SCIBASE.AI#14 - Scientific/Engineering Data & Code Hosting + +| Requirement | Implementation | +| --- | --- | +| Support major scientific file types | `classifyArtifact` maps CSV, TSV, XLSX, JSON, Parquet, notebooks, code, figures, model files, and environment specs into stable hosting categories. | +| Metadata-aware previews | `buildPreviewDescriptor` decides safe preview mode, disables previews for secrets and oversized files, and annotates sensitive previews. | +| Upload versioning and diffing | Artifact inputs include `version`, `previousHash`, and `diffSummary`; the audit report surfaces version and retention decisions. | +| JSON-LD, DataCite, schema.org metadata | `evaluateMetadata` checks DOI/identifier, title, creators, publisher, year, resource type, license, keywords, and JSON-LD/schema.org flags. | +| FAIR compliance | `calculateFairScore` converts findings into a deterministic FAIR readiness score and decision. | +| Access control and persistent links | `evaluateArtifact` blocks public export when licenses, access class, or sensitivity evidence are missing. | +| Executable environments | `evaluateEnvironment` validates pinned Docker digests, Conda lock hashes, notebook kernels, network policy, and storage mounts. | +| Sandboxed execution and compute triggers | `evaluateComputeTrigger` checks input quarantine, missing inputs, unpinned environments, budget, approval, and scheduled rerun policy. | +| Reviewer-ready evidence | `evaluateHostingGovernance` returns quarantine decisions, preview descriptors, compute plans, retention/export actions, and a SHA-256 audit digest. | diff --git a/artifact-quarantine-rerun-governance/package.json b/artifact-quarantine-rerun-governance/package.json new file mode 100644 index 0000000..80a0c69 --- /dev/null +++ b/artifact-quarantine-rerun-governance/package.json @@ -0,0 +1,12 @@ +{ + "name": "artifact-quarantine-rerun-governance", + "version": "1.0.0", + "description": "Sensitive artifact quarantine and executable rerun governance for scientific data/code hosting.", + "main": "src/artifact-quarantine-rerun-governance.js", + "scripts": { + "check": "node --check src/artifact-quarantine-rerun-governance.js && node --check test.js && node --check demo.js", + "test": "node test.js", + "demo": "node demo.js" + }, + "license": "MIT" +} diff --git a/artifact-quarantine-rerun-governance/sample-data.json b/artifact-quarantine-rerun-governance/sample-data.json new file mode 100644 index 0000000..3123059 --- /dev/null +++ b/artifact-quarantine-rerun-governance/sample-data.json @@ -0,0 +1,154 @@ +{ + "now": "2026-05-15T12:00:00.000Z", + "project": { + "id": "proj-neuro-metabolomics", + "title": "Neuro Metabolomics Replication Pack" + }, + "policy": { + "maxArtifactBytes": 10737418240, + "maxInlinePreviewBytes": 10485760, + "maxRunCostCents": 5000 + }, + "artifacts": [ + { + "id": "art-aggregate-csv", + "path": "datasets/aggregate-biomarkers.csv", + "hash": "sha256:7b7f5e4f55a1c2b9", + "previousHash": "sha256:18bbf31a", + "version": "v2", + "sizeBytes": 842190, + "access": "public", + "diffSummary": { + "rowsAdded": 18, + "rowsRemoved": 2, + "columnsChanged": ["batch_id"] + }, + "metadata": { + "title": "Aggregate biomarker table", + "creators": ["Nguyen Lab"], + "publisher": "SCIBASE Demo", + "publicationYear": 2026, + "resourceType": "Dataset", + "license": "CC-BY-4.0", + "keywords": ["metabolomics", "replication"], + "doi": "10.5555/scibase.aggregate.2", + "jsonLd": true, + "schemaOrg": true + }, + "scan": { + "secretsDetected": 0 + } + }, + { + "id": "art-patient-tsv", + "path": "restricted/raw-patient-measurements.tsv", + "hash": "sha256:8e9f951bb2df", + "version": "v1", + "sizeBytes": 2351440, + "access": "restricted", + "sensitivity": ["human-subjects", "phi"], + "deidentificationEvidence": "pending", + "metadata": { + "title": "Patient-level raw measurements", + "creators": ["Nguyen Lab"], + "publisher": "SCIBASE Demo", + "publicationYear": 2026, + "resourceType": "Dataset", + "license": "restricted-research-use", + "keywords": ["metabolomics", "clinical"], + "identifier": "uuid:patient-measurements-v1", + "jsonLd": true, + "schemaOrg": false + }, + "scan": { + "secretsDetected": 0 + } + }, + { + "id": "art-analysis-notebook", + "path": "notebooks/reproduce-figures.ipynb", + "hash": "sha256:aa013ff77e", + "version": "v4", + "sizeBytes": 1181120, + "access": "project", + "metadata": { + "title": "Figure reproduction notebook", + "creators": ["Nguyen Lab"], + "publisher": "SCIBASE Demo", + "publicationYear": 2026, + "resourceType": "Software", + "license": "MIT", + "keywords": ["notebook", "figures"], + "identifier": "uuid:notebook-v4", + "jsonLd": true, + "schemaOrg": true + }, + "scan": { + "secretsDetected": 0 + } + }, + { + "id": "art-env-secret", + "path": "config/.env", + "hash": "sha256:deadcafe", + "version": "v1", + "sizeBytes": 1200, + "access": "private", + "metadata": { + "title": "Local development environment variables", + "creators": ["Nguyen Lab"], + "publisher": "SCIBASE Demo", + "publicationYear": 2026, + "resourceType": "Configuration", + "license": "internal", + "keywords": ["configuration"], + "identifier": "uuid:env-secret", + "jsonLd": false, + "schemaOrg": false + }, + "scan": { + "secretsDetected": 2 + } + } + ], + "environments": [ + { + "id": "env-python-locked", + "kind": "docker", + "name": "Python 3.12 metabolomics runtime", + "image": "ghcr.io/scibase/demo-python:3.12", + "imageDigest": "sha256:3bf96c7adf", + "networkAccess": false + }, + { + "id": "env-r-unpinned", + "kind": "conda", + "name": "R notebook runtime", + "networkAccess": true, + "networkJustification": "pending" + } + ], + "computeTriggers": [ + { + "id": "run-reproduce-figures", + "environmentId": "env-python-locked", + "inputArtifactIds": ["art-aggregate-csv", "art-analysis-notebook"], + "command": "python scripts/reproduce_figures.py", + "estimatedCostCents": 1400, + "schedule": { + "enabled": false + } + }, + { + "id": "run-refresh-clinical-model", + "environmentId": "env-r-unpinned", + "inputArtifactIds": ["art-patient-tsv", "art-env-secret"], + "command": "Rscript models/train.R", + "estimatedCostCents": 7200, + "schedule": { + "enabled": true, + "approved": false + } + } + ] +} diff --git a/artifact-quarantine-rerun-governance/src/artifact-quarantine-rerun-governance.js b/artifact-quarantine-rerun-governance/src/artifact-quarantine-rerun-governance.js new file mode 100644 index 0000000..2abdede --- /dev/null +++ b/artifact-quarantine-rerun-governance/src/artifact-quarantine-rerun-governance.js @@ -0,0 +1,470 @@ +"use strict"; + +const crypto = require("node:crypto"); +const path = require("node:path"); + +const PREVIEWABLE_TYPES = new Set(["dataset", "notebook", "figure", "json", "text"]); +const DATA_EXTENSIONS = new Set([".csv", ".tsv", ".xlsx", ".json", ".parquet"]); +const CODE_EXTENSIONS = new Set([".py", ".r", ".jl", ".ipynb"]); +const FIGURE_EXTENSIONS = new Set([".png", ".jpg", ".jpeg", ".svg", ".tif", ".tiff"]); +const MODEL_EXTENSIONS = new Set([".pt", ".pth", ".onnx", ".h5", ".pkl", ".joblib"]); +const SECRET_PATTERNS = [/\.env$/i, /\.pem$/i, /\.key$/i, /secret/i, /token/i, /credential/i]; + +function stableStringify(value) { + if (Array.isArray(value)) return `[${value.map(stableStringify).join(",")}]`; + if (value && typeof value === "object") { + return `{${Object.keys(value) + .sort() + .map((key) => `${JSON.stringify(key)}:${stableStringify(value[key])}`) + .join(",")}}`; + } + return JSON.stringify(value); +} + +function stableHash(value) { + return crypto.createHash("sha256").update(stableStringify(value)).digest("hex"); +} + +function requireFields(object, fields, label) { + const missing = fields.filter((field) => object[field] === undefined || object[field] === null); + if (missing.length > 0) throw new Error(`${label} is missing required field(s): ${missing.join(", ")}`); +} + +function finding(severity, id, title, detail, remediation, targetIds = []) { + return { severity, id, title, detail, remediation, targetIds }; +} + +function classifyArtifact(artifact) { + const ext = path.extname(artifact.path || artifact.name || "").toLowerCase(); + if (DATA_EXTENSIONS.has(ext)) return ext === ".json" ? "json" : "dataset"; + if (CODE_EXTENSIONS.has(ext)) return ext === ".ipynb" ? "notebook" : "code"; + if (FIGURE_EXTENSIONS.has(ext)) return "figure"; + if (MODEL_EXTENSIONS.has(ext)) return "model"; + if ([".yml", ".yaml", ".toml", ".lock", ".dockerfile"].includes(ext) || artifact.path === "Dockerfile") { + return "environment"; + } + if ([".md", ".txt"].includes(ext)) return "text"; + return "supplement"; +} + +function hasSecretSignal(artifact) { + const target = `${artifact.path || ""} ${artifact.name || ""}`; + return SECRET_PATTERNS.some((pattern) => pattern.test(target)) || (artifact.scan && artifact.scan.secretsDetected > 0); +} + +function sensitivitySet(artifact) { + return new Set([...(artifact.sensitivity || []), ...(artifact.metadata && artifact.metadata.sensitivity || [])]); +} + +function evaluateMetadata(artifact) { + const findings = []; + const metadata = artifact.metadata || {}; + const requiredFields = ["title", "creators", "publisher", "publicationYear", "resourceType", "license", "keywords"]; + + for (const field of requiredFields) { + const value = metadata[field]; + if (value === undefined || value === null || (Array.isArray(value) && value.length === 0) || value === "") { + findings.push(finding( + "warning", + "metadata-field-missing", + "Artifact metadata is incomplete", + `${artifact.id} is missing DataCite-style metadata field: ${field}.`, + "Add complete title, creators, publisher, publication year, resource type, license, and keywords before export.", + [artifact.id, field], + )); + } + } + + if (!metadata.doi && !metadata.identifier) { + findings.push(finding( + "warning", + "persistent-identifier-missing", + "Artifact lacks persistent identifier", + `${artifact.id} has no DOI or stable identifier.`, + "Mint or reserve a DOI/UUID before making the artifact externally discoverable.", + [artifact.id], + )); + } + + if (metadata.jsonLd !== true) { + findings.push(finding( + "warning", + "jsonld-export-missing", + "JSON-LD export is not marked ready", + `${artifact.id} does not have JSON-LD metadata ready for machine discovery.`, + "Generate JSON-LD metadata from the artifact manifest.", + [artifact.id], + )); + } + + if (metadata.schemaOrg !== true) { + findings.push(finding( + "warning", + "schema-org-export-missing", + "schema.org markup is not marked ready", + `${artifact.id} does not have schema.org discovery metadata ready.`, + "Generate schema.org Dataset or SoftwareSourceCode markup before public export.", + [artifact.id], + )); + } + + return findings; +} + +function evaluateArtifact(artifact, policy) { + requireFields(artifact, ["id", "path", "hash", "sizeBytes", "access"], "artifact"); + const artifactType = classifyArtifact(artifact); + const findings = []; + const sensitivity = sensitivitySet(artifact); + + if (hasSecretSignal(artifact)) { + findings.push(finding( + "blocker", + "secret-signal-detected", + "Artifact may contain credentials or secrets", + `${artifact.id} matches a secret filename or scan signal.`, + "Quarantine the artifact and require credential rotation evidence before any preview or export.", + [artifact.id], + )); + } + + if (sensitivity.has("human-subjects") && !artifact.irbApprovalId) { + findings.push(finding( + "blocker", + "irb-approval-missing", + "Human-subjects artifact lacks IRB approval", + `${artifact.id} is marked human-subjects but has no IRB approval reference.`, + "Attach IRB approval or mark the artifact private and non-runnable.", + [artifact.id], + )); + } + + if ((sensitivity.has("pii") || sensitivity.has("phi")) && artifact.deidentificationEvidence !== "verified") { + findings.push(finding( + "blocker", + "deidentification-not-verified", + "Sensitive data lacks verified de-identification evidence", + `${artifact.id} contains PII/PHI indicators without verified de-identification.`, + "Require de-identification evidence before previews, public links, or compute triggers.", + [artifact.id], + )); + } + + if (artifact.access === "public" && !(artifact.metadata && artifact.metadata.license)) { + findings.push(finding( + "blocker", + "public-license-missing", + "Public artifact lacks reuse license", + `${artifact.id} is public but has no license metadata.`, + "Set a clear reuse license before creating a public persistent link.", + [artifact.id], + )); + } + + if (artifact.sizeBytes > policy.maxArtifactBytes) { + findings.push(finding( + "warning", + "artifact-exceeds-storage-policy", + "Artifact exceeds configured storage policy", + `${artifact.id} is ${artifact.sizeBytes} bytes, above ${policy.maxArtifactBytes}.`, + "Route the artifact to large-object storage and require checksum verification.", + [artifact.id], + )); + } + + findings.push(...evaluateMetadata(artifact)); + + const decision = findings.some((item) => item.severity === "blocker") + ? "quarantine" + : findings.some((item) => item.severity === "warning") + ? "manual-review" + : "approved"; + + return { + artifactId: artifact.id, + path: artifact.path, + type: artifactType, + version: artifact.version || "unversioned", + access: artifact.access, + decision, + findings, + }; +} + +function buildPreviewDescriptor(artifact, artifactDecision, policy) { + const type = classifyArtifact(artifact); + if (artifactDecision.decision === "quarantine") { + return { + artifactId: artifact.id, + mode: "disabled", + reason: "quarantined-artifact", + }; + } + + if (!PREVIEWABLE_TYPES.has(type)) { + return { + artifactId: artifact.id, + mode: "metadata-only", + reason: `no-inline-preview-for-${type}`, + }; + } + + if (artifact.sizeBytes > policy.maxInlinePreviewBytes) { + return { + artifactId: artifact.id, + mode: "metadata-only", + reason: "artifact-too-large-for-inline-preview", + }; + } + + return { + artifactId: artifact.id, + mode: type === "dataset" ? "tabular-sample" : `${type}-preview`, + redacted: sensitivitySet(artifact).size > 0, + reason: "preview-safe", + }; +} + +function evaluateEnvironment(environment) { + requireFields(environment, ["id", "kind", "name"], "environment"); + const findings = []; + + if (environment.kind === "docker" && !environment.imageDigest) { + findings.push(finding( + "blocker", + "docker-digest-missing", + "Docker environment is not pinned by digest", + `${environment.id} uses ${environment.image || "an image"} without an immutable digest.`, + "Pin the Docker image by digest before allowing reproducible reruns.", + [environment.id], + )); + } + + if (environment.kind === "conda" && !environment.lockfileHash) { + findings.push(finding( + "blocker", + "conda-lock-missing", + "Conda environment lacks lockfile hash", + `${environment.id} has no lockfile hash.`, + "Generate and store a lockfile hash before scheduled reruns.", + [environment.id], + )); + } + + if (environment.kind === "notebook" && !environment.kernel) { + findings.push(finding( + "warning", + "notebook-kernel-missing", + "Notebook environment does not declare a kernel", + `${environment.id} has no kernel metadata.`, + "Record notebook kernel and runtime package lock before reviewer execution.", + [environment.id], + )); + } + + if (environment.networkAccess === true && environment.networkJustification !== "approved") { + findings.push(finding( + "warning", + "network-access-unapproved", + "Executable environment requests unapproved network access", + `${environment.id} enables network access without approval.`, + "Disable network access or attach approval and egress rules.", + [environment.id], + )); + } + + return { + environmentId: environment.id, + decision: findings.some((item) => item.severity === "blocker") + ? "blocked" + : findings.length > 0 ? "manual-review" : "ready", + findings, + }; +} + +function evaluateComputeTrigger(trigger, context, policy) { + requireFields(trigger, ["id", "environmentId", "inputArtifactIds", "command"], "compute trigger"); + const findings = []; + const environmentDecision = context.environmentDecisions.get(trigger.environmentId); + + if (!environmentDecision) { + findings.push(finding( + "blocker", + "environment-not-found", + "Compute trigger references missing environment", + `${trigger.id} references ${trigger.environmentId}, which is not defined.`, + "Attach a valid executable environment before enabling reruns.", + [trigger.id, trigger.environmentId], + )); + } else if (environmentDecision.decision === "blocked") { + findings.push(finding( + "blocker", + "environment-blocked", + "Compute trigger uses blocked environment", + `${trigger.id} uses ${trigger.environmentId}, which is not reproducible yet.`, + "Resolve environment pinning problems before enabling the trigger.", + [trigger.id, trigger.environmentId], + )); + } else if (environmentDecision.decision === "manual-review") { + findings.push(finding( + "warning", + "environment-needs-review", + "Compute trigger environment needs manual review", + `${trigger.id} uses ${trigger.environmentId}, which has non-blocking review findings.`, + "Review environment warnings before scheduled reruns.", + [trigger.id, trigger.environmentId], + )); + } + + for (const artifactId of trigger.inputArtifactIds) { + const artifactDecision = context.artifactDecisions.get(artifactId); + if (!artifactDecision) { + findings.push(finding( + "blocker", + "input-artifact-not-found", + "Compute trigger references missing input artifact", + `${trigger.id} references missing input artifact ${artifactId}.`, + "Attach all input artifacts before enabling the trigger.", + [trigger.id, artifactId], + )); + } else if (artifactDecision.decision === "quarantine") { + findings.push(finding( + "blocker", + "input-artifact-quarantined", + "Compute trigger uses quarantined input", + `${trigger.id} depends on quarantined artifact ${artifactId}.`, + "Resolve quarantine findings before execution.", + [trigger.id, artifactId], + )); + } + } + + if (trigger.schedule && trigger.schedule.enabled && trigger.schedule.approved !== true) { + findings.push(finding( + "blocker", + "scheduled-rerun-unapproved", + "Scheduled rerun lacks approval", + `${trigger.id} has an enabled schedule without approval.`, + "Approve scheduled reruns or disable the schedule.", + [trigger.id], + )); + } + + if (trigger.estimatedCostCents > policy.maxRunCostCents) { + findings.push(finding( + "warning", + "compute-cost-review-required", + "Compute trigger exceeds cost review threshold", + `${trigger.id} estimates ${trigger.estimatedCostCents} cents, above ${policy.maxRunCostCents}.`, + "Require budget owner approval before execution.", + [trigger.id], + )); + } + + const decision = findings.some((item) => item.severity === "blocker") + ? "blocked" + : findings.length > 0 ? "manual-review" : "ready"; + + return { + triggerId: trigger.id, + environmentId: trigger.environmentId, + command: trigger.command, + decision, + findings, + }; +} + +function calculateFairScore(artifactReports) { + const total = artifactReports.length * 4; + if (total === 0) return 0; + const lost = artifactReports.reduce((score, report) => { + const ids = new Set(report.findings.map((item) => item.id)); + let penalty = 0; + if (ids.has("persistent-identifier-missing")) penalty += 1; + if (ids.has("public-license-missing") || ids.has("metadata-field-missing")) penalty += 1; + if (ids.has("jsonld-export-missing") || ids.has("schema-org-export-missing")) penalty += 1; + if (report.decision === "quarantine") penalty += 1; + return score + Math.min(4, penalty); + }, 0); + return Math.round(((total - lost) / total) * 100); +} + +function buildRetentionActions(artifactReports) { + return artifactReports.map((report) => { + if (report.decision === "quarantine") { + return { artifactId: report.artifactId, action: "retain-private-quarantine", exportable: false }; + } + if (report.decision === "manual-review") { + return { artifactId: report.artifactId, action: "hold-for-metadata-review", exportable: false }; + } + return { artifactId: report.artifactId, action: "publish-persistent-link", exportable: true }; + }); +} + +function evaluateHostingGovernance(bundle, options = {}) { + requireFields(bundle, ["project", "artifacts", "environments", "computeTriggers"], "hosting governance bundle"); + requireFields(bundle.project, ["id", "title"], "project"); + + const policy = { + maxArtifactBytes: 10 * 1024 * 1024 * 1024, + maxInlinePreviewBytes: 10 * 1024 * 1024, + maxRunCostCents: 5000, + ...(bundle.policy || {}), + ...(options.policy || {}), + }; + + const artifactReports = bundle.artifacts.map((artifact) => evaluateArtifact(artifact, policy)); + const artifactDecisions = new Map(artifactReports.map((report) => [report.artifactId, report])); + const previewDescriptors = bundle.artifacts.map((artifact) => { + return buildPreviewDescriptor(artifact, artifactDecisions.get(artifact.id), policy); + }); + + const environmentReports = bundle.environments.map(evaluateEnvironment); + const environmentDecisions = new Map(environmentReports.map((report) => [report.environmentId, report])); + const computePlans = bundle.computeTriggers.map((trigger) => { + return evaluateComputeTrigger(trigger, { artifactDecisions, environmentDecisions }, policy); + }); + + const allFindings = [ + ...artifactReports.flatMap((report) => report.findings), + ...environmentReports.flatMap((report) => report.findings), + ...computePlans.flatMap((report) => report.findings), + ]; + const decision = allFindings.some((item) => item.severity === "blocker") + ? "hosting-blocked" + : allFindings.length > 0 ? "manual-review" : "ready-for-publication"; + const retentionActions = buildRetentionActions(artifactReports); + const fairScore = calculateFairScore(artifactReports); + const evaluatedAt = options.now || bundle.now || new Date().toISOString(); + const auditDigest = stableHash({ + projectId: bundle.project.id, + decision, + fairScore, + artifactReports, + environmentReports, + computePlans, + retentionActions, + evaluatedAt, + }); + + return { + projectId: bundle.project.id, + title: bundle.project.title, + evaluatedAt, + decision, + fairScore, + artifactReports, + previewDescriptors, + environmentReports, + computePlans, + retentionActions, + auditDigest: `sha256:${auditDigest}`, + }; +} + +module.exports = { + classifyArtifact, + evaluateHostingGovernance, + stableHash, + stableStringify, +}; diff --git a/artifact-quarantine-rerun-governance/test.js b/artifact-quarantine-rerun-governance/test.js new file mode 100644 index 0000000..3a88108 --- /dev/null +++ b/artifact-quarantine-rerun-governance/test.js @@ -0,0 +1,59 @@ +"use strict"; + +const assert = require("node:assert/strict"); +const sampleBundle = require("./sample-data.json"); +const { + classifyArtifact, + evaluateHostingGovernance, + stableHash, +} = require("./src/artifact-quarantine-rerun-governance"); + +function clone(value) { + return JSON.parse(JSON.stringify(value)); +} + +const blocked = evaluateHostingGovernance(sampleBundle); + +assert.equal(classifyArtifact({ path: "results/table.parquet" }), "dataset"); +assert.equal(classifyArtifact({ path: "notebooks/main.ipynb" }), "notebook"); +assert.equal(blocked.decision, "hosting-blocked"); +assert.match(blocked.auditDigest, /^sha256:[a-f0-9]{64}$/); +assert.equal(blocked.fairScore < 100, true); + +const patientReport = blocked.artifactReports.find((report) => report.artifactId === "art-patient-tsv"); +assert.equal(patientReport.decision, "quarantine"); +assert(patientReport.findings.some((finding) => finding.id === "irb-approval-missing")); +assert(patientReport.findings.some((finding) => finding.id === "deidentification-not-verified")); + +const secretReport = blocked.artifactReports.find((report) => report.artifactId === "art-env-secret"); +assert.equal(secretReport.decision, "quarantine"); +assert(secretReport.findings.some((finding) => finding.id === "secret-signal-detected")); + +const safeRun = blocked.computePlans.find((plan) => plan.triggerId === "run-reproduce-figures"); +assert.equal(safeRun.decision, "ready"); + +const blockedRun = blocked.computePlans.find((plan) => plan.triggerId === "run-refresh-clinical-model"); +assert.equal(blockedRun.decision, "blocked"); +assert(blockedRun.findings.some((finding) => finding.id === "input-artifact-quarantined")); +assert(blockedRun.findings.some((finding) => finding.id === "scheduled-rerun-unapproved")); + +const readyBundle = clone(sampleBundle); +readyBundle.artifacts = readyBundle.artifacts.filter((artifact) => artifact.id !== "art-env-secret"); +readyBundle.artifacts[1].irbApprovalId = "IRB-2026-041"; +readyBundle.artifacts[1].deidentificationEvidence = "verified"; +readyBundle.artifacts[1].metadata.schemaOrg = true; +readyBundle.environments[1].lockfileHash = "sha256:conda-lock-2026"; +readyBundle.environments[1].networkJustification = "approved"; +readyBundle.computeTriggers[1].inputArtifactIds = ["art-patient-tsv"]; +readyBundle.computeTriggers[1].estimatedCostCents = 3200; +readyBundle.computeTriggers[1].schedule.approved = true; + +const ready = evaluateHostingGovernance(readyBundle); +assert.equal(ready.decision, "ready-for-publication"); +assert.equal(ready.artifactReports.every((report) => report.decision === "approved"), true); +assert.equal(ready.computePlans.every((plan) => plan.decision === "ready"), true); +assert.equal(ready.retentionActions.every((action) => action.exportable === true), true); +assert.notEqual(blocked.auditDigest, ready.auditDigest); +assert.equal(stableHash({ b: 2, a: 1 }), stableHash({ a: 1, b: 2 })); + +console.log("artifact quarantine rerun governance tests passed");