From aa64213e9bfd5f08a6c2c7efb4472dc474c3e18e Mon Sep 17 00:00:00 2001 From: brawlaphant <35781613+brawlaphant@users.noreply.github.com> Date: Tue, 24 Feb 2026 12:43:24 -0800 Subject: [PATCH 1/9] feat(m010): make scoring status-aware and emit challenge KPIs --- .../reference-impl/m010_kpi.js | 36 +++++++++++++++++-- .../reference-impl/m010_score.js | 4 +++ 2 files changed, 37 insertions(+), 3 deletions(-) diff --git a/mechanisms/m010-reputation-signal/reference-impl/m010_kpi.js b/mechanisms/m010-reputation-signal/reference-impl/m010_kpi.js index c003fad..fc6287b 100644 --- a/mechanisms/m010-reputation-signal/reference-impl/m010_kpi.js +++ b/mechanisms/m010-reputation-signal/reference-impl/m010_kpi.js @@ -5,11 +5,17 @@ export function median(nums) { return s.length % 2 ? s[mid] : (s[mid-1] + s[mid]) / 2; } -export function computeM010KPI({ as_of, events }) { +function mean(nums) { + if (!nums.length) return null; + return nums.reduce((sum, n) => sum + n, 0) / nums.length; +} + +export function computeM010KPI({ as_of, events, challenges = [], scope = "v0_advisory" }) { const asOf = new Date(as_of); const evs = events ?? []; const signals_emitted = evs.length; const subjects_touched = new Set(evs.map(e => `${e.subject_type}:${e.subject_id}`)).size; + const chs = challenges ?? []; let bothEvidence = 0; const latencies = []; @@ -27,13 +33,37 @@ export function computeM010KPI({ as_of, events }) { const evidence_coverage_rate = signals_emitted ? bothEvidence / signals_emitted : 0.0; const median_event_latency_hours = median(latencies); - return { + const out = { mechanism_id: "m010", - scope: "v0_advisory", + scope, as_of, signals_emitted, subjects_touched, evidence_coverage_rate: Number(evidence_coverage_rate.toFixed(4)), median_event_latency_hours: median_event_latency_hours === null ? null : Number(median_event_latency_hours.toFixed(2)) }; + + if (chs.length) { + const resolvedValid = chs.filter((c) => c.status === "resolved_valid").length; + const resolvedInvalid = chs.filter((c) => c.status === "resolved_invalid").length; + const resolvedTotal = resolvedValid + resolvedInvalid; + const escalated = chs.filter((c) => c.status === "escalated").length; + + const resolutionHours = chs + .filter((c) => c.resolution?.resolved_at) + .map((c) => (new Date(c.resolution.resolved_at) - new Date(c.timestamp)) / (1000 * 60 * 60)) + .filter((n) => Number.isFinite(n) && n >= 0); + + const avgResolution = mean(resolutionHours); + + out.challenge_kpis = { + challenges_filed: chs.length, + challenge_rate: signals_emitted ? Number((chs.length / signals_emitted).toFixed(4)) : 0.0, + avg_resolution_time_hours: avgResolution === null ? null : Number(avgResolution.toFixed(2)), + challenge_success_rate: resolvedTotal ? Number((resolvedInvalid / resolvedTotal).toFixed(4)) : null, + admin_resolution_timeout_rate: Number((escalated / chs.length).toFixed(4)) + }; + } + + return out; } diff --git a/mechanisms/m010-reputation-signal/reference-impl/m010_score.js b/mechanisms/m010-reputation-signal/reference-impl/m010_score.js index 7507865..7d030bc 100644 --- a/mechanisms/m010-reputation-signal/reference-impl/m010_score.js +++ b/mechanisms/m010-reputation-signal/reference-impl/m010_score.js @@ -20,11 +20,15 @@ export function computeM010Score({ as_of, events, halfLifeHours = 336, useStakeW if (!evs.length) return { reputation_score_0_1: 0.0 }; const lambda = Math.log(2) / halfLifeHours; + const contributingStatuses = new Set(["active", "resolved_valid"]); let wSum = 0; let dSum = 0; for (const e of evs) { + const status = typeof e.status === "string" ? e.status.toLowerCase() : null; + if (status !== null && !contributingStatuses.has(status)) continue; + const ts = new Date(e.timestamp); const ageH = (asOf - ts) / (1000*60*60); const decay = Math.exp(-lambda * Math.max(0, ageH)); From 3592e3d8c29a61b5bbbdd948e81c9e3c2dead998 Mon Sep 17 00:00:00 2001 From: brawlaphant <35781613+brawlaphant@users.noreply.github.com> Date: Tue, 24 Feb 2026 12:43:29 -0800 Subject: [PATCH 2/9] feat(m010): align challenge schema and replay dataset semantics --- mechanisms/m010-reputation-signal/SPEC.md | 2 +- .../fixtures/v0_challenge_sample.json | 4 +- .../datasets/schema.json | 156 ++++++++++++++++++ .../schemas/m010_kpi.schema.json | 2 +- .../schemas/m010_signal.schema.json | 1 + 5 files changed, 161 insertions(+), 4 deletions(-) diff --git a/mechanisms/m010-reputation-signal/SPEC.md b/mechanisms/m010-reputation-signal/SPEC.md index 364872e..bccbd8a 100644 --- a/mechanisms/m010-reputation-signal/SPEC.md +++ b/mechanisms/m010-reputation-signal/SPEC.md @@ -305,7 +305,7 @@ The following metrics should be tracked and published in periodic digests: | KPI | Formula | Target | |-----|---------|--------| | `challenges_filed` | count of challenges per period | Informational | -| `challenge_rate` | challenges / total active signals | < 5% (healthy ecosystem) | +| `challenge_rate` | challenges / signals emitted in period | < 5% (healthy ecosystem) | | `avg_resolution_time_hours` | mean(resolution_timestamp - challenge_timestamp) | < 168h (7 days) | | `challenge_success_rate` | resolved_invalid / (resolved_valid + resolved_invalid) | Informational | | `admin_resolution_timeout_rate` | auto_escalated / total_challenges | < 5% | diff --git a/mechanisms/m010-reputation-signal/datasets/fixtures/v0_challenge_sample.json b/mechanisms/m010-reputation-signal/datasets/fixtures/v0_challenge_sample.json index 7bfe3f7..3ac55c2 100644 --- a/mechanisms/m010-reputation-signal/datasets/fixtures/v0_challenge_sample.json +++ b/mechanisms/m010-reputation-signal/datasets/fixtures/v0_challenge_sample.json @@ -180,7 +180,7 @@ } ], "expected_outputs": { - "description": "Only signals with status 'active' or 'resolved_valid' contribute to score. Challenged, resolved_invalid, withdrawn, and invalidated signals are excluded.", + "description": "Only signals with status 'active' or 'resolved_valid' contribute to score. Challenge rate uses challenges_filed / signals_emitted in period.", "contributing_signals": [ "signal-1 (active, endorsement=4)", "signal-4 (resolved_valid, endorsement=4)", @@ -197,7 +197,7 @@ "challenges_filed": 3, "challenge_rate": 0.375, "challenge_success_rate": 0.5, - "avg_resolution_time_hours": 49.0 + "avg_resolution_time_hours": 49.5 } } } diff --git a/mechanisms/m010-reputation-signal/datasets/schema.json b/mechanisms/m010-reputation-signal/datasets/schema.json index 785d0b1..2bedee2 100644 --- a/mechanisms/m010-reputation-signal/datasets/schema.json +++ b/mechanisms/m010-reputation-signal/datasets/schema.json @@ -2,6 +2,7 @@ "$schema": "http://json-schema.org/draft-07/schema#", "title": "m010 replay dataset", "type": "object", + "additionalProperties": false, "required": [ "mechanism_id", "scope", @@ -20,10 +21,14 @@ "description": "ISO-8601 datetime", "format": "date-time" }, + "description": { + "type": "string" + }, "events": { "type": "array", "items": { "type": "object", + "additionalProperties": false, "required": [ "timestamp", "subject_type", @@ -34,11 +39,15 @@ "evidence" ], "properties": { + "signal_id": { + "type": "string" + }, "timestamp": { "type": "string", "format": "date-time" }, "subject_type": { + "type": "string", "enum": [ "CreditClass", "Project", @@ -61,8 +70,22 @@ "signaler_id": { "type": "string" }, + "status": { + "type": "string", + "enum": [ + "submitted", + "active", + "challenged", + "escalated", + "resolved_valid", + "resolved_invalid", + "withdrawn", + "invalidated" + ] + }, "evidence": { "type": "object", + "additionalProperties": false, "required": [ "koi_links", "ledger_refs" @@ -79,11 +102,144 @@ "items": { "type": "string" } + }, + "web_links": { + "type": "array", + "items": { + "type": "string" + } } } } } } + }, + "challenges": { + "type": "array", + "items": { + "type": "object", + "additionalProperties": false, + "required": [ + "challenge_id", + "signal_id", + "challenger_id", + "category", + "rationale", + "evidence", + "timestamp", + "status" + ], + "properties": { + "challenge_id": { + "type": "string" + }, + "signal_id": { + "type": "string" + }, + "challenger_id": { + "type": "string" + }, + "category": { + "type": "string" + }, + "rationale": { + "type": "string", + "minLength": 50 + }, + "severity": { + "type": "string", + "enum": [ + "LOW", + "MEDIUM", + "HIGH", + "CRITICAL" + ] + }, + "requested_outcome": { + "type": "string", + "enum": [ + "INVALIDATE", + "DOWNGRADE", + "FLAG_FOR_REVIEW" + ] + }, + "evidence": { + "type": "object", + "additionalProperties": false, + "required": [ + "koi_links", + "ledger_refs" + ], + "properties": { + "koi_links": { + "type": "array", + "items": { + "type": "string" + } + }, + "ledger_refs": { + "type": "array", + "items": { + "type": "string" + } + }, + "web_links": { + "type": "array", + "items": { + "type": "string" + } + } + } + }, + "timestamp": { + "type": "string", + "format": "date-time" + }, + "status": { + "type": "string", + "enum": [ + "pending", + "resolved_valid", + "resolved_invalid", + "escalated" + ] + }, + "resolution": { + "type": "object", + "additionalProperties": false, + "required": [ + "resolved_by", + "resolved_at", + "outcome", + "rationale" + ], + "properties": { + "resolved_by": { + "type": "string" + }, + "resolved_at": { + "type": "string", + "format": "date-time" + }, + "outcome": { + "type": "string", + "enum": [ + "VALID", + "INVALID" + ] + }, + "rationale": { + "type": "string", + "minLength": 20 + } + } + } + } + } + }, + "expected_outputs": { + "type": "object", + "additionalProperties": true } } } diff --git a/mechanisms/m010-reputation-signal/schemas/m010_kpi.schema.json b/mechanisms/m010-reputation-signal/schemas/m010_kpi.schema.json index bec867d..cad799f 100644 --- a/mechanisms/m010-reputation-signal/schemas/m010_kpi.schema.json +++ b/mechanisms/m010-reputation-signal/schemas/m010_kpi.schema.json @@ -78,7 +78,7 @@ "type": "number", "minimum": 0.0, "maximum": 1.0, - "description": "challenges_filed / total_active_signals" + "description": "challenges_filed / signals_emitted (events in the reporting period)" }, "avg_resolution_time_hours": { "type": ["number", "null"], diff --git a/mechanisms/m010-reputation-signal/schemas/m010_signal.schema.json b/mechanisms/m010-reputation-signal/schemas/m010_signal.schema.json index 05bbffa..410e961 100644 --- a/mechanisms/m010-reputation-signal/schemas/m010_signal.schema.json +++ b/mechanisms/m010-reputation-signal/schemas/m010_signal.schema.json @@ -41,6 +41,7 @@ "submitted", "active", "challenged", + "escalated", "resolved_valid", "resolved_invalid", "withdrawn", From f7f983faa0766884e0721bb9507cdd6def3be912 Mon Sep 17 00:00:00 2001 From: brawlaphant <35781613+brawlaphant@users.noreply.github.com> Date: Tue, 24 Feb 2026 12:43:34 -0800 Subject: [PATCH 3/9] test(m010): add deterministic reference vector verification --- .../vector_v0_challenge.expected.json | 21 +++++ package.json | 1 + scripts/verify-m010-reference-impl.mjs | 90 +++++++++++++++++++ scripts/verify.mjs | 17 ++++ 4 files changed, 129 insertions(+) create mode 100644 mechanisms/m010-reputation-signal/reference-impl/test_vectors/vector_v0_challenge.expected.json create mode 100644 scripts/verify-m010-reference-impl.mjs diff --git a/mechanisms/m010-reputation-signal/reference-impl/test_vectors/vector_v0_challenge.expected.json b/mechanisms/m010-reputation-signal/reference-impl/test_vectors/vector_v0_challenge.expected.json new file mode 100644 index 0000000..b01e899 --- /dev/null +++ b/mechanisms/m010-reputation-signal/reference-impl/test_vectors/vector_v0_challenge.expected.json @@ -0,0 +1,21 @@ +{ + "kpi": { + "mechanism_id": "m010", + "scope": "v0_advisory_challenge_replay", + "as_of": "2026-02-10T12:00:00Z", + "signals_emitted": 8, + "subjects_touched": 5, + "evidence_coverage_rate": 0.625, + "median_event_latency_hours": 132, + "challenge_kpis": { + "challenges_filed": 3, + "challenge_rate": 0.375, + "avg_resolution_time_hours": 49.5, + "challenge_success_rate": 0.5, + "admin_resolution_timeout_rate": 0 + } + }, + "score": { + "reputation_score_0_1": 0.8422 + } +} diff --git a/package.json b/package.json index d174e09..84bf4c5 100644 --- a/package.json +++ b/package.json @@ -4,6 +4,7 @@ "version": "0.0.0", "scripts": { "verify": "node scripts/verify.mjs", + "verify:m010": "node scripts/verify-m010-reference-impl.mjs", "build:index": "node scripts/build-mechanism-index.mjs", "check:index": "node scripts/build-mechanism-index.mjs --check" } diff --git a/scripts/verify-m010-reference-impl.mjs b/scripts/verify-m010-reference-impl.mjs new file mode 100644 index 0000000..aa86c56 --- /dev/null +++ b/scripts/verify-m010-reference-impl.mjs @@ -0,0 +1,90 @@ +#!/usr/bin/env node +import assert from "node:assert/strict"; +import fs from "node:fs"; +import path from "node:path"; + +const repoRoot = process.cwd(); + +function readJson(rel) { + const abs = path.join(repoRoot, rel); + return JSON.parse(fs.readFileSync(abs, "utf8")); +} + +async function loadModuleFromJs(rel) { + const abs = path.join(repoRoot, rel); + const src = fs.readFileSync(abs, "utf8"); + const dataUrl = `data:text/javascript;base64,${Buffer.from(src).toString("base64")}`; + return import(dataUrl); +} + +function assertSubset(actual, expected, label) { + for (const [key, value] of Object.entries(expected)) { + assert.deepStrictEqual( + actual[key], + value, + `${label}: expected '${key}' to equal ${JSON.stringify(value)}, got ${JSON.stringify(actual[key])}` + ); + } +} + +function computeOutputs(input, computeM010KPI, computeM010Score) { + return { + kpi: computeM010KPI({ + as_of: input.as_of, + events: input.events, + challenges: input.challenges, + scope: input.scope + }), + score: computeM010Score({ + as_of: input.as_of, + events: input.events + }) + }; +} + +async function main() { + const { computeM010KPI } = await loadModuleFromJs("mechanisms/m010-reputation-signal/reference-impl/m010_kpi.js"); + const { computeM010Score } = await loadModuleFromJs("mechanisms/m010-reputation-signal/reference-impl/m010_score.js"); + + const vectors = [ + { + name: "v0_sample", + inputRel: "mechanisms/m010-reputation-signal/reference-impl/test_vectors/vector_v0_sample.input.json", + expectedRel: "mechanisms/m010-reputation-signal/reference-impl/test_vectors/vector_v0_sample.expected.json" + }, + { + name: "v0_challenge", + inputRel: "mechanisms/m010-reputation-signal/datasets/fixtures/v0_challenge_sample.json", + expectedRel: "mechanisms/m010-reputation-signal/reference-impl/test_vectors/vector_v0_challenge.expected.json", + assertFixtureKpis: true + } + ]; + + for (const vector of vectors) { + const input = readJson(vector.inputRel); + const expected = readJson(vector.expectedRel); + const actual = computeOutputs(input, computeM010KPI, computeM010Score); + + try { + assert.deepStrictEqual(actual, expected); + } catch (err) { + console.error(`m010 vector mismatch for '${vector.name}'`); + console.error("Expected:"); + console.error(JSON.stringify(expected, null, 2)); + console.error("Actual:"); + console.error(JSON.stringify(actual, null, 2)); + throw err; + } + + if (vector.assertFixtureKpis && input.expected_outputs?.challenge_kpis) { + assertSubset(actual.kpi.challenge_kpis ?? {}, input.expected_outputs.challenge_kpis, `fixture expected_outputs.challenge_kpis (${vector.name})`); + } + } + + console.log("m010 reference-impl vectors: PASS"); +} + +main().catch((err) => { + console.error(err instanceof Error ? err.message : String(err)); + process.exit(1); +}); diff --git a/scripts/verify.mjs b/scripts/verify.mjs index 3253586..fcb9f9b 100644 --- a/scripts/verify.mjs +++ b/scripts/verify.mjs @@ -33,10 +33,20 @@ requireFile("mechanisms/m010-reputation-signal/SPEC.md"); requireFile("mechanisms/m010-reputation-signal/README.md"); requireFile("mechanisms/m010-reputation-signal/schemas/m010_kpi.schema.json"); requireFile("mechanisms/m010-reputation-signal/schemas/m010_signal.schema.json"); +requireFile("mechanisms/m010-reputation-signal/schemas/m010_challenge.schema.json"); +requireFile("mechanisms/m010-reputation-signal/datasets/schema.json"); requireFile("mechanisms/m010-reputation-signal/datasets/fixtures/v0_sample.json"); +requireFile("mechanisms/m010-reputation-signal/datasets/fixtures/v0_challenge_sample.json"); +requireFile("mechanisms/m010-reputation-signal/reference-impl/m010_kpi.js"); +requireFile("mechanisms/m010-reputation-signal/reference-impl/m010_score.js"); +requireFile("mechanisms/m010-reputation-signal/reference-impl/test_vectors/vector_v0_sample.input.json"); +requireFile("mechanisms/m010-reputation-signal/reference-impl/test_vectors/vector_v0_sample.expected.json"); +requireFile("mechanisms/m010-reputation-signal/reference-impl/test_vectors/vector_v0_challenge.expected.json"); +requireFile("scripts/verify-m010-reference-impl.mjs"); // Mechanism index check run("node", ["scripts/build-mechanism-index.mjs", "--check"]); +run("node", ["scripts/verify-m010-reference-impl.mjs"]); // Basic schema sanity const kpiSchema = readJson("mechanisms/m010-reputation-signal/schemas/m010_kpi.schema.json"); @@ -45,4 +55,11 @@ if (!kpiSchema.required || !kpiSchema.required.includes("mechanism_id")) { process.exit(4); } +const signalSchema = readJson("mechanisms/m010-reputation-signal/schemas/m010_signal.schema.json"); +const signalStatus = signalSchema.properties?.status?.enum ?? []; +if (!signalStatus.includes("escalated")) { + console.error("Signal schema missing escalated status."); + process.exit(5); +} + console.log("agentic-tokenomics verify: PASS"); From f8cb0595c7efc28cdc38666102c7c3fe8d38e32b Mon Sep 17 00:00:00 2001 From: brawlaphant <35781613+brawlaphant@users.noreply.github.com> Date: Tue, 24 Feb 2026 12:43:48 -0800 Subject: [PATCH 4/9] docs(m010): document challenge-aware outputs and consumers --- CHANGELOG.md | 6 ++++++ docs/MECHANISM_CONSUMERS.md | 2 ++ mechanisms/m010-reputation-signal/README.md | 4 ++++ .../m010-reputation-signal/datasets/README.md | 2 +- .../reference-impl/README.md | 17 +++++++++++++++++ .../m010-reputation-signal/schemas/README.md | 2 +- 6 files changed, 31 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 5ab218c..081c3e0 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,6 +3,12 @@ ## Unreleased ### Added - Units 11–20: canonical schemas, mechanism index generator, consumers mapping, WG bulk pack, repo templates, and verification scripts. +- m010 reference implementation vector verifier (`scripts/verify-m010-reference-impl.mjs`) with challenge replay coverage. + +### Changed +- m010 scoring now excludes non-contributing signal states when `status` is present (`active`/`resolved_valid` only contribute). +- m010 KPI computation now emits `challenge_kpis` when challenge data is provided. +- m010 replay dataset/schema alignment expanded to cover challenge fixtures and lifecycle statuses (including `escalated`). ### Notes - This repo is primarily specification content; changes are intended to be deterministic and offline-friendly. diff --git a/docs/MECHANISM_CONSUMERS.md b/docs/MECHANISM_CONSUMERS.md index abd7745..7a408ca 100644 --- a/docs/MECHANISM_CONSUMERS.md +++ b/docs/MECHANISM_CONSUMERS.md @@ -9,9 +9,11 @@ This document maps **mechanism IDs** to known **consumers** (agents, digests, sc **Outputs** - KPI JSON block schema: `mechanisms/m010-reputation-signal/schemas/m010_kpi.schema.json` - Signal item schema: `mechanisms/m010-reputation-signal/schemas/m010_signal.schema.json` +- Challenge event schema: `mechanisms/m010-reputation-signal/schemas/m010_challenge.schema.json` **Datasets (deterministic)** - Replay fixtures: `mechanisms/m010-reputation-signal/datasets/fixtures/v0_sample.json` +- Challenge replay fixture: `mechanisms/m010-reputation-signal/datasets/fixtures/v0_challenge_sample.json` **Known consumers** - Heartbeat character: `signal-agent` (regen-heartbeat) diff --git a/mechanisms/m010-reputation-signal/README.md b/mechanisms/m010-reputation-signal/README.md index 0322220..022bff4 100644 --- a/mechanisms/m010-reputation-signal/README.md +++ b/mechanisms/m010-reputation-signal/README.md @@ -27,3 +27,7 @@ Canonical JSON schemas for m010 outputs live in `schemas/`. - `m010_signal.schema.json` — signal items with `status` lifecycle field - `m010_challenge.schema.json` — challenge events with evidence and resolution - `m010_kpi.schema.json` — KPI output including optional `challenge_kpis` + +## Reference implementation checks +- Deterministic vectors are validated by `scripts/verify-m010-reference-impl.mjs` +- `challenge_rate` is computed as `challenges_filed / signals_emitted` for replay-period KPI reporting diff --git a/mechanisms/m010-reputation-signal/datasets/README.md b/mechanisms/m010-reputation-signal/datasets/README.md index ff7ce3e..160a6ef 100644 --- a/mechanisms/m010-reputation-signal/datasets/README.md +++ b/mechanisms/m010-reputation-signal/datasets/README.md @@ -16,7 +16,7 @@ A replay runner (e.g., in `regen-heartbeat`) can read a fixture file and compute The challenge fixture (`v0_challenge_sample.json`) additionally exercises: - Status-aware scoring: only `active` and `resolved_valid` signals contribute to score -- Challenge KPIs: `challenges_filed`, `challenge_rate`, `challenge_success_rate`, `avg_resolution_time_hours` +- Challenge KPIs: `challenges_filed`, `challenge_rate` (`challenges_filed / signals_emitted`), `challenge_success_rate`, `avg_resolution_time_hours` - The `expected_outputs` field documents which signals should be included/excluded These datasets are **advisory-only** and do not imply enforcement or on-chain actions. diff --git a/mechanisms/m010-reputation-signal/reference-impl/README.md b/mechanisms/m010-reputation-signal/reference-impl/README.md index 1e4c663..0748d73 100644 --- a/mechanisms/m010-reputation-signal/reference-impl/README.md +++ b/mechanisms/m010-reputation-signal/reference-impl/README.md @@ -28,3 +28,20 @@ A normalized `reputation_score_0_1` computed as: 3) score = weighted average of decayed weights, normalized to [0,1] This is advisory and intended for digest/reporting only (no enforcement). + +### Status-aware scoring behavior +- If an event has no `status`, it is treated as contributing (legacy v0 fixtures). +- If `status` is present, only `active` and `resolved_valid` contribute to score. +- `submitted`, `challenged`, `escalated`, `resolved_invalid`, `withdrawn`, and `invalidated` are excluded from score contribution. + +### Challenge KPI behavior +When `challenges[]` are provided to `computeM010KPI`, output includes `challenge_kpis`: +- `challenges_filed` +- `challenge_rate` = `challenges_filed / signals_emitted` +- `avg_resolution_time_hours` +- `challenge_success_rate` +- `admin_resolution_timeout_rate` + +### Deterministic vectors +Reference vectors live in `test_vectors/` and are validated by: +- `node scripts/verify-m010-reference-impl.mjs` diff --git a/mechanisms/m010-reputation-signal/schemas/README.md b/mechanisms/m010-reputation-signal/schemas/README.md index ee2a53c..f4d09a8 100644 --- a/mechanisms/m010-reputation-signal/schemas/README.md +++ b/mechanisms/m010-reputation-signal/schemas/README.md @@ -10,5 +10,5 @@ These JSON Schemas define **canonical output shapes** for m010 (Reputation Signa ## Notes - These schemas are intended for **validation** and consistency across repos (Heartbeat, agent skills, etc.). - v0 is advisory-only: schemas describe outputs, not enforcement. -- The `status` field on signals tracks lifecycle state (submitted → active → challenged → resolved). See SPEC.md section 6.1. +- The `status` field on signals tracks lifecycle state (submitted → active/challenged/escalated → resolved/withdrawn/invalidated). See SPEC.md section 6.1. - Challenge events are separate from signals; they reference a `signal_id` and track their own resolution lifecycle. From bfd2f0252bff1f10f7838e28550060771e511c8b Mon Sep 17 00:00:00 2001 From: brawlaphant <35781613+brawlaphant@users.noreply.github.com> Date: Tue, 24 Feb 2026 13:17:13 -0800 Subject: [PATCH 5/9] test(m010): enforce challenge dataset integrity invariants --- CHANGELOG.md | 1 + .../m010-reputation-signal/datasets/README.md | 7 ++ package.json | 1 + scripts/verify-m010-datasets.mjs | 88 +++++++++++++++++++ scripts/verify.mjs | 2 + 5 files changed, 99 insertions(+) create mode 100644 scripts/verify-m010-datasets.mjs diff --git a/CHANGELOG.md b/CHANGELOG.md index 081c3e0..a9f3ba3 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,7 @@ ### Added - Units 11–20: canonical schemas, mechanism index generator, consumers mapping, WG bulk pack, repo templates, and verification scripts. - m010 reference implementation vector verifier (`scripts/verify-m010-reference-impl.mjs`) with challenge replay coverage. +- m010 dataset integrity verifier (`scripts/verify-m010-datasets.mjs`) covering challenge linkage, lifecycle consistency, and expected output coherence. ### Changed - m010 scoring now excludes non-contributing signal states when `status` is present (`active`/`resolved_valid` only contribute). diff --git a/mechanisms/m010-reputation-signal/datasets/README.md b/mechanisms/m010-reputation-signal/datasets/README.md index 160a6ef..26febb1 100644 --- a/mechanisms/m010-reputation-signal/datasets/README.md +++ b/mechanisms/m010-reputation-signal/datasets/README.md @@ -20,3 +20,10 @@ The challenge fixture (`v0_challenge_sample.json`) additionally exercises: - The `expected_outputs` field documents which signals should be included/excluded These datasets are **advisory-only** and do not imply enforcement or on-chain actions. + +## Integrity checks +Dataset integrity is validated by `scripts/verify-m010-datasets.mjs`, including: +- challenge-to-signal linkage (`challenge.signal_id` must reference an existing signal) +- category consistency between challenge and targeted signal +- resolution timestamp ordering and resolution presence rules by challenge status +- consistency of `expected_outputs.contributing_signals` / `excluded_signals` with status-based contribution rules diff --git a/package.json b/package.json index 84bf4c5..d9d0ed8 100644 --- a/package.json +++ b/package.json @@ -5,6 +5,7 @@ "scripts": { "verify": "node scripts/verify.mjs", "verify:m010": "node scripts/verify-m010-reference-impl.mjs", + "verify:m010:datasets": "node scripts/verify-m010-datasets.mjs", "build:index": "node scripts/build-mechanism-index.mjs", "check:index": "node scripts/build-mechanism-index.mjs --check" } diff --git a/scripts/verify-m010-datasets.mjs b/scripts/verify-m010-datasets.mjs new file mode 100644 index 0000000..4c19a18 --- /dev/null +++ b/scripts/verify-m010-datasets.mjs @@ -0,0 +1,88 @@ +#!/usr/bin/env node +import assert from "node:assert/strict"; +import fs from "node:fs"; +import path from "node:path"; + +const repoRoot = process.cwd(); + +function readJson(rel) { + return JSON.parse(fs.readFileSync(path.join(repoRoot, rel), "utf8")); +} + +function assertIsoDate(value, label) { + const ts = Date.parse(value); + assert(Number.isFinite(ts), `${label}: invalid ISO date '${value}'`); +} + +function parseSignalIdToken(entry) { + const m = /^([^\s]+)\s+\(.+\)$/.exec(entry); + return m ? m[1] : null; +} + +function verifyChallengeFixture(rel) { + const fixture = readJson(rel); + const events = fixture.events ?? []; + const challenges = fixture.challenges ?? []; + + assert(events.length > 0, `${rel}: events must not be empty`); + + const signalIdToEvent = new Map(); + for (const e of events) { + assertIsoDate(e.timestamp, `${rel}: event ${e.signal_id ?? ""} timestamp`); + assert(typeof e.signal_id === "string" && e.signal_id.length > 0, `${rel}: all challenge replay events must include signal_id`); + assert(!signalIdToEvent.has(e.signal_id), `${rel}: duplicate signal_id '${e.signal_id}'`); + signalIdToEvent.set(e.signal_id, e); + } + + for (const c of challenges) { + assertIsoDate(c.timestamp, `${rel}: challenge ${c.challenge_id} timestamp`); + const target = signalIdToEvent.get(c.signal_id); + assert(target, `${rel}: challenge '${c.challenge_id}' references unknown signal_id '${c.signal_id}'`); + assert(c.category === target.category, `${rel}: challenge '${c.challenge_id}' category mismatch (challenge=${c.category}, signal=${target.category})`); + + const resolved = c.status === "resolved_valid" || c.status === "resolved_invalid"; + if (resolved) { + assert(c.resolution && typeof c.resolution === "object", `${rel}: resolved challenge '${c.challenge_id}' missing resolution`); + assertIsoDate(c.resolution.resolved_at, `${rel}: challenge ${c.challenge_id} resolution.resolved_at`); + const deltaHours = (Date.parse(c.resolution.resolved_at) - Date.parse(c.timestamp)) / (1000 * 60 * 60); + assert(deltaHours >= 0, `${rel}: challenge '${c.challenge_id}' resolved before it was filed`); + } else { + assert(!c.resolution, `${rel}: unresolved challenge '${c.challenge_id}' must not include resolution`); + } + } + + const contributingStatuses = new Set(["active", "resolved_valid"]); + const expectedContrib = new Set((fixture.expected_outputs?.contributing_signals ?? []).map(parseSignalIdToken).filter(Boolean)); + const expectedExcluded = new Set((fixture.expected_outputs?.excluded_signals ?? []).map(parseSignalIdToken).filter(Boolean)); + + const derivedContrib = new Set(events.filter((e) => contributingStatuses.has(e.status)).map((e) => e.signal_id)); + const derivedExcluded = new Set(events.filter((e) => !contributingStatuses.has(e.status)).map((e) => e.signal_id)); + + assert.deepStrictEqual(expectedContrib, derivedContrib, `${rel}: expected_outputs.contributing_signals do not match status-derived contributors`); + assert.deepStrictEqual(expectedExcluded, derivedExcluded, `${rel}: expected_outputs.excluded_signals do not match status-derived exclusions`); + + for (const id of expectedContrib) { + assert(!expectedExcluded.has(id), `${rel}: signal '${id}' appears in both contributing and excluded lists`); + } +} + +function main() { + const sampleRel = "mechanisms/m010-reputation-signal/datasets/fixtures/v0_sample.json"; + const challengeRel = "mechanisms/m010-reputation-signal/datasets/fixtures/v0_challenge_sample.json"; + + const sample = readJson(sampleRel); + assert(sample.events?.length > 0, `${sampleRel}: events must not be empty`); + for (const [idx, e] of sample.events.entries()) { + assertIsoDate(e.timestamp, `${sampleRel}: event[${idx}] timestamp`); + } + + verifyChallengeFixture(challengeRel); + console.log("m010 dataset integrity: PASS"); +} + +try { + main(); +} catch (err) { + console.error(err instanceof Error ? err.message : String(err)); + process.exit(1); +} diff --git a/scripts/verify.mjs b/scripts/verify.mjs index fcb9f9b..ae4dff5 100644 --- a/scripts/verify.mjs +++ b/scripts/verify.mjs @@ -43,10 +43,12 @@ requireFile("mechanisms/m010-reputation-signal/reference-impl/test_vectors/vecto requireFile("mechanisms/m010-reputation-signal/reference-impl/test_vectors/vector_v0_sample.expected.json"); requireFile("mechanisms/m010-reputation-signal/reference-impl/test_vectors/vector_v0_challenge.expected.json"); requireFile("scripts/verify-m010-reference-impl.mjs"); +requireFile("scripts/verify-m010-datasets.mjs"); // Mechanism index check run("node", ["scripts/build-mechanism-index.mjs", "--check"]); run("node", ["scripts/verify-m010-reference-impl.mjs"]); +run("node", ["scripts/verify-m010-datasets.mjs"]); // Basic schema sanity const kpiSchema = readJson("mechanisms/m010-reputation-signal/schemas/m010_kpi.schema.json"); From 7f1fc34711f1951130d6504d531e3750ce69369e Mon Sep 17 00:00:00 2001 From: brawlaphant <35781613+brawlaphant@users.noreply.github.com> Date: Tue, 24 Feb 2026 14:03:16 -0800 Subject: [PATCH 6/9] feat(m010): tighten schema lifecycle and identifier constraints --- CHANGELOG.md | 1 + .../datasets/schema.json | 131 +++++++++++++++--- .../m010-reputation-signal/schemas/README.md | 1 + .../schemas/m010_challenge.schema.json | 84 ++++++++++- .../schemas/m010_kpi.schema.json | 10 +- .../schemas/m010_signal.schema.json | 27 +++- scripts/verify.mjs | 12 ++ 7 files changed, 239 insertions(+), 27 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index a9f3ba3..c93841c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,7 @@ - m010 scoring now excludes non-contributing signal states when `status` is present (`active`/`resolved_valid` only contribute). - m010 KPI computation now emits `challenge_kpis` when challenge data is provided. - m010 replay dataset/schema alignment expanded to cover challenge fixtures and lifecycle statuses (including `escalated`). +- m010 schemas are stricter on IDs/category formatting, evidence arrays, and challenge lifecycle guards (`status` ↔ `resolution`/`outcome` consistency). ### Notes - This repo is primarily specification content; changes are intended to be deterministic and offline-friendly. diff --git a/mechanisms/m010-reputation-signal/datasets/schema.json b/mechanisms/m010-reputation-signal/datasets/schema.json index 2bedee2..8f79ec9 100644 --- a/mechanisms/m010-reputation-signal/datasets/schema.json +++ b/mechanisms/m010-reputation-signal/datasets/schema.json @@ -40,7 +40,8 @@ ], "properties": { "signal_id": { - "type": "string" + "type": "string", + "pattern": "^signal-[A-Za-z0-9._:-]+$" }, "timestamp": { "type": "string", @@ -57,10 +58,12 @@ ] }, "subject_id": { - "type": "string" + "type": "string", + "minLength": 1 }, "category": { - "type": "string" + "type": "string", + "pattern": "^[a-z][a-z0-9_]{1,63}$" }, "endorsement_level": { "type": "integer", @@ -68,7 +71,8 @@ "maximum": 5 }, "signaler_id": { - "type": "string" + "type": "string", + "minLength": 1 }, "status": { "type": "string", @@ -93,20 +97,26 @@ "properties": { "koi_links": { "type": "array", + "uniqueItems": true, "items": { - "type": "string" + "type": "string", + "minLength": 1 } }, "ledger_refs": { "type": "array", + "uniqueItems": true, "items": { - "type": "string" + "type": "string", + "minLength": 1 } }, "web_links": { "type": "array", + "uniqueItems": true, "items": { - "type": "string" + "type": "string", + "minLength": 1 } } } @@ -131,16 +141,20 @@ ], "properties": { "challenge_id": { - "type": "string" + "type": "string", + "pattern": "^ch-[A-Za-z0-9._:-]+$" }, "signal_id": { - "type": "string" + "type": "string", + "pattern": "^signal-[A-Za-z0-9._:-]+$" }, "challenger_id": { - "type": "string" + "type": "string", + "minLength": 1 }, "category": { - "type": "string" + "type": "string", + "pattern": "^[a-z][a-z0-9_]{1,63}$" }, "rationale": { "type": "string", @@ -173,20 +187,26 @@ "properties": { "koi_links": { "type": "array", + "uniqueItems": true, "items": { - "type": "string" + "type": "string", + "minLength": 1 } }, "ledger_refs": { "type": "array", + "uniqueItems": true, "items": { - "type": "string" + "type": "string", + "minLength": 1 } }, "web_links": { "type": "array", + "uniqueItems": true, "items": { - "type": "string" + "type": "string", + "minLength": 1 } } } @@ -215,7 +235,8 @@ ], "properties": { "resolved_by": { - "type": "string" + "type": "string", + "minLength": 1 }, "resolved_at": { "type": "string", @@ -234,7 +255,85 @@ } } } - } + }, + "allOf": [ + { + "if": { + "properties": { + "status": { + "enum": [ + "resolved_valid", + "resolved_invalid" + ] + } + } + }, + "then": { + "required": [ + "resolution" + ] + } + }, + { + "if": { + "properties": { + "status": { + "enum": [ + "pending", + "escalated" + ] + } + } + }, + "then": { + "not": { + "required": [ + "resolution" + ] + } + } + }, + { + "if": { + "properties": { + "status": { + "const": "resolved_valid" + } + } + }, + "then": { + "properties": { + "resolution": { + "properties": { + "outcome": { + "const": "VALID" + } + } + } + } + } + }, + { + "if": { + "properties": { + "status": { + "const": "resolved_invalid" + } + } + }, + "then": { + "properties": { + "resolution": { + "properties": { + "outcome": { + "const": "INVALID" + } + } + } + } + } + } + ] } }, "expected_outputs": { diff --git a/mechanisms/m010-reputation-signal/schemas/README.md b/mechanisms/m010-reputation-signal/schemas/README.md index f4d09a8..57b4d18 100644 --- a/mechanisms/m010-reputation-signal/schemas/README.md +++ b/mechanisms/m010-reputation-signal/schemas/README.md @@ -12,3 +12,4 @@ These JSON Schemas define **canonical output shapes** for m010 (Reputation Signa - v0 is advisory-only: schemas describe outputs, not enforcement. - The `status` field on signals tracks lifecycle state (submitted → active/challenged/escalated → resolved/withdrawn/invalidated). See SPEC.md section 6.1. - Challenge events are separate from signals; they reference a `signal_id` and track their own resolution lifecycle. +- Challenge schema includes lifecycle guards: resolved statuses require `resolution`, unresolved statuses forbid it, and resolution outcomes must match status (`VALID` vs `INVALID`). diff --git a/mechanisms/m010-reputation-signal/schemas/m010_challenge.schema.json b/mechanisms/m010-reputation-signal/schemas/m010_challenge.schema.json index fe49dfd..c8555af 100644 --- a/mechanisms/m010-reputation-signal/schemas/m010_challenge.schema.json +++ b/mechanisms/m010-reputation-signal/schemas/m010_challenge.schema.json @@ -17,18 +17,22 @@ "properties": { "challenge_id": { "type": "string", + "pattern": "^ch-[A-Za-z0-9._:-]+$", "description": "Unique identifier for this challenge" }, "signal_id": { "type": "string", + "pattern": "^signal-[A-Za-z0-9._:-]+$", "description": "ID of the signal being challenged" }, "challenger_id": { "type": "string", + "minLength": 1, "description": "Address of the challenger" }, "category": { "type": "string", + "pattern": "^[a-z][a-z0-9_]{1,63}$", "description": "Must match the challenged signal's category" }, "rationale": { @@ -53,17 +57,20 @@ "properties": { "koi_links": { "type": "array", - "items": { "type": "string" }, + "uniqueItems": true, + "items": { "type": "string", "minLength": 1 }, "description": "KOI knowledge links supporting the challenge" }, "ledger_refs": { "type": "array", - "items": { "type": "string" }, + "uniqueItems": true, + "items": { "type": "string", "minLength": 1 }, "description": "On-chain references supporting the challenge" }, "web_links": { "type": "array", - "items": { "type": "string" }, + "uniqueItems": true, + "items": { "type": "string", "minLength": 1 }, "description": "Optional web references" } } @@ -85,6 +92,7 @@ "properties": { "resolved_by": { "type": "string", + "minLength": 1, "description": "v0: admin address; v1: arbiter_dao ID" }, "resolved_at": { @@ -103,5 +111,73 @@ }, "required": ["resolved_by", "resolved_at", "outcome", "rationale"] } - } + }, + "allOf": [ + { + "if": { + "properties": { + "status": { + "enum": ["resolved_valid", "resolved_invalid"] + } + } + }, + "then": { + "required": ["resolution"] + } + }, + { + "if": { + "properties": { + "status": { + "enum": ["pending", "escalated"] + } + } + }, + "then": { + "not": { + "required": ["resolution"] + } + } + }, + { + "if": { + "properties": { + "status": { + "const": "resolved_valid" + } + } + }, + "then": { + "properties": { + "resolution": { + "properties": { + "outcome": { + "const": "VALID" + } + } + } + } + } + }, + { + "if": { + "properties": { + "status": { + "const": "resolved_invalid" + } + } + }, + "then": { + "properties": { + "resolution": { + "properties": { + "outcome": { + "const": "INVALID" + } + } + } + } + } + } + ] } diff --git a/mechanisms/m010-reputation-signal/schemas/m010_kpi.schema.json b/mechanisms/m010-reputation-signal/schemas/m010_kpi.schema.json index cad799f..a0149e2 100644 --- a/mechanisms/m010-reputation-signal/schemas/m010_kpi.schema.json +++ b/mechanisms/m010-reputation-signal/schemas/m010_kpi.schema.json @@ -17,7 +17,8 @@ "const": "m010" }, "scope": { - "type": "string" + "type": "string", + "minLength": 1 }, "as_of": { "type": "string", @@ -68,6 +69,13 @@ "type": "object", "description": "Challenge workflow metrics (section 6.7 of SPEC.md). Optional in v0; required when challenge workflow is active.", "additionalProperties": false, + "required": [ + "challenges_filed", + "challenge_rate", + "avg_resolution_time_hours", + "challenge_success_rate", + "admin_resolution_timeout_rate" + ], "properties": { "challenges_filed": { "type": "integer", diff --git a/mechanisms/m010-reputation-signal/schemas/m010_signal.schema.json b/mechanisms/m010-reputation-signal/schemas/m010_signal.schema.json index 410e961..fe68460 100644 --- a/mechanisms/m010-reputation-signal/schemas/m010_signal.schema.json +++ b/mechanisms/m010-reputation-signal/schemas/m010_signal.schema.json @@ -11,6 +11,11 @@ "timestamp" ], "properties": { + "signal_id": { + "type": "string", + "pattern": "^signal-[A-Za-z0-9._:-]+$", + "description": "Stable signal identifier for lifecycle and challenge linkage" + }, "subject_type": { "type": "string", "enum": [ @@ -22,10 +27,13 @@ ] }, "subject_id": { - "type": "string" + "type": "string", + "minLength": 1 }, "category": { - "type": "string" + "type": "string", + "pattern": "^[a-z][a-z0-9_]{1,63}$", + "description": "Machine-safe category key (snake_case)" }, "endorsement_level": { "type": "integer", @@ -33,7 +41,8 @@ "maximum": 5 }, "signaler_id": { - "type": "string" + "type": "string", + "minLength": 1 }, "status": { "type": "string", @@ -64,20 +73,26 @@ "properties": { "koi_links": { "type": "array", + "uniqueItems": true, "items": { - "type": "string" + "type": "string", + "minLength": 1 } }, "ledger_refs": { "type": "array", + "uniqueItems": true, "items": { - "type": "string" + "type": "string", + "minLength": 1 } }, "web_links": { "type": "array", + "uniqueItems": true, "items": { - "type": "string" + "type": "string", + "minLength": 1 } } } diff --git a/scripts/verify.mjs b/scripts/verify.mjs index ae4dff5..dcb4f7a 100644 --- a/scripts/verify.mjs +++ b/scripts/verify.mjs @@ -56,6 +56,11 @@ if (!kpiSchema.required || !kpiSchema.required.includes("mechanism_id")) { console.error("KPI schema missing required fields."); process.exit(4); } +const challengeKpiRequired = kpiSchema.properties?.challenge_kpis?.required ?? []; +if (!challengeKpiRequired.includes("challenge_rate")) { + console.error("KPI schema missing required challenge KPI fields."); + process.exit(4); +} const signalSchema = readJson("mechanisms/m010-reputation-signal/schemas/m010_signal.schema.json"); const signalStatus = signalSchema.properties?.status?.enum ?? []; @@ -64,4 +69,11 @@ if (!signalStatus.includes("escalated")) { process.exit(5); } +const challengeSchema = readJson("mechanisms/m010-reputation-signal/schemas/m010_challenge.schema.json"); +const challengeGuards = challengeSchema.allOf ?? []; +if (!Array.isArray(challengeGuards) || challengeGuards.length < 4) { + console.error("Challenge schema missing lifecycle guard clauses."); + process.exit(6); +} + console.log("agentic-tokenomics verify: PASS"); From 3cc1ffa98c21808c1c4a4ee459a7f8dbbfb7c1d2 Mon Sep 17 00:00:00 2001 From: brawlaphant <35781613+brawlaphant@users.noreply.github.com> Date: Tue, 24 Feb 2026 16:01:11 -0800 Subject: [PATCH 7/9] test(m010): expand challenge fixtures and vector coverage --- CHANGELOG.md | 1 + docs/MECHANISM_CONSUMERS.md | 2 + .../m010-reputation-signal/datasets/README.md | 4 + .../v0_challenge_edge_timing_sample.json | 144 +++++++++++++++ .../v0_challenge_escalated_sample.json | 174 ++++++++++++++++++ ...0_challenge_invalid_resolution_sample.json | 46 +++++ .../reference-impl/README.md | 5 + ...tor_v0_challenge_edge_timing.expected.json | 21 +++ ...ector_v0_challenge_escalated.expected.json | 21 +++ scripts/verify-m010-datasets.mjs | 31 +++- scripts/verify-m010-reference-impl.mjs | 12 ++ scripts/verify.mjs | 5 + 12 files changed, 464 insertions(+), 2 deletions(-) create mode 100644 mechanisms/m010-reputation-signal/datasets/fixtures/v0_challenge_edge_timing_sample.json create mode 100644 mechanisms/m010-reputation-signal/datasets/fixtures/v0_challenge_escalated_sample.json create mode 100644 mechanisms/m010-reputation-signal/datasets/fixtures/v0_challenge_invalid_resolution_sample.json create mode 100644 mechanisms/m010-reputation-signal/reference-impl/test_vectors/vector_v0_challenge_edge_timing.expected.json create mode 100644 mechanisms/m010-reputation-signal/reference-impl/test_vectors/vector_v0_challenge_escalated.expected.json diff --git a/CHANGELOG.md b/CHANGELOG.md index c93841c..70aff08 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,7 @@ - Units 11–20: canonical schemas, mechanism index generator, consumers mapping, WG bulk pack, repo templates, and verification scripts. - m010 reference implementation vector verifier (`scripts/verify-m010-reference-impl.mjs`) with challenge replay coverage. - m010 dataset integrity verifier (`scripts/verify-m010-datasets.mjs`) covering challenge linkage, lifecycle consistency, and expected output coherence. +- Additional m010 challenge fixtures for escalated and edge-timing scenarios, plus an intentionally invalid fixture for negative verification coverage. ### Changed - m010 scoring now excludes non-contributing signal states when `status` is present (`active`/`resolved_valid` only contribute). diff --git a/docs/MECHANISM_CONSUMERS.md b/docs/MECHANISM_CONSUMERS.md index 7a408ca..e77d1bf 100644 --- a/docs/MECHANISM_CONSUMERS.md +++ b/docs/MECHANISM_CONSUMERS.md @@ -14,6 +14,8 @@ This document maps **mechanism IDs** to known **consumers** (agents, digests, sc **Datasets (deterministic)** - Replay fixtures: `mechanisms/m010-reputation-signal/datasets/fixtures/v0_sample.json` - Challenge replay fixture: `mechanisms/m010-reputation-signal/datasets/fixtures/v0_challenge_sample.json` +- Escalated challenge fixture: `mechanisms/m010-reputation-signal/datasets/fixtures/v0_challenge_escalated_sample.json` +- Edge-timing challenge fixture: `mechanisms/m010-reputation-signal/datasets/fixtures/v0_challenge_edge_timing_sample.json` **Known consumers** - Heartbeat character: `signal-agent` (regen-heartbeat) diff --git a/mechanisms/m010-reputation-signal/datasets/README.md b/mechanisms/m010-reputation-signal/datasets/README.md index 26febb1..9e2a5e0 100644 --- a/mechanisms/m010-reputation-signal/datasets/README.md +++ b/mechanisms/m010-reputation-signal/datasets/README.md @@ -6,6 +6,9 @@ These fixtures are **deterministic inputs** for generating non-zero m010 KPI out - `schema.json` — JSON schema for replay datasets - `fixtures/v0_sample.json` — sample events used by Heartbeat replay runner (all signals active) - `fixtures/v0_challenge_sample.json` — sample events exercising the challenge workflow (signals with varied statuses + challenge events with resolutions) +- `fixtures/v0_challenge_escalated_sample.json` — challenge replay including `escalated` status and timeout KPI behavior +- `fixtures/v0_challenge_edge_timing_sample.json` — challenge replay covering boundary timing (including zero-hour resolution) +- `fixtures/v0_challenge_invalid_resolution_sample.json` — intentionally invalid fixture for negative verification coverage ## How they are used A replay runner (e.g., in `regen-heartbeat`) can read a fixture file and compute: @@ -27,3 +30,4 @@ Dataset integrity is validated by `scripts/verify-m010-datasets.mjs`, including: - category consistency between challenge and targeted signal - resolution timestamp ordering and resolution presence rules by challenge status - consistency of `expected_outputs.contributing_signals` / `excluded_signals` with status-based contribution rules +- negative checks that intentionally invalid fixtures fail with the expected validation reason diff --git a/mechanisms/m010-reputation-signal/datasets/fixtures/v0_challenge_edge_timing_sample.json b/mechanisms/m010-reputation-signal/datasets/fixtures/v0_challenge_edge_timing_sample.json new file mode 100644 index 0000000..ba0d074 --- /dev/null +++ b/mechanisms/m010-reputation-signal/datasets/fixtures/v0_challenge_edge_timing_sample.json @@ -0,0 +1,144 @@ +{ + "mechanism_id": "m010", + "scope": "v0_advisory_challenge_edge_timing_replay", + "as_of": "2026-02-15T12:00:00Z", + "description": "Fixture exercising zero/low resolution durations and fresh events at as_of boundary.", + "events": [ + { + "signal_id": "signal-edge-1", + "timestamp": "2026-02-15T11:00:00Z", + "subject_type": "Project", + "subject_id": "P-regen-200", + "category": "delivery_risk", + "endorsement_level": 3, + "signaler_id": "signaler_edge_a", + "status": "active", + "evidence": { + "koi_links": ["koi://note/edge-1"], + "ledger_refs": ["ledger://tx/4001"] + } + }, + { + "signal_id": "signal-edge-2", + "timestamp": "2026-02-15T12:00:00Z", + "subject_type": "CreditClass", + "subject_id": "C01-020", + "category": "registry_quality", + "endorsement_level": 4, + "signaler_id": "signaler_edge_b", + "status": "resolved_valid", + "evidence": { + "koi_links": ["koi://note/edge-2"], + "ledger_refs": ["ledger://tx/4002"] + } + }, + { + "signal_id": "signal-edge-3", + "timestamp": "2026-02-14T12:00:00Z", + "subject_type": "Verifier", + "subject_id": "V-EdgeMRV", + "category": "attestation_quality", + "endorsement_level": 5, + "signaler_id": "signaler_edge_c", + "status": "resolved_invalid", + "evidence": { + "koi_links": ["koi://note/edge-3"], + "ledger_refs": ["ledger://tx/4003"] + } + }, + { + "signal_id": "signal-edge-4", + "timestamp": "2026-02-13T12:00:00Z", + "subject_type": "Address", + "subject_id": "regen1edge...0004", + "category": "operator_trust", + "endorsement_level": 2, + "signaler_id": "signaler_edge_d", + "status": "withdrawn", + "evidence": { + "koi_links": ["koi://note/edge-4"], + "ledger_refs": ["ledger://tx/4004"] + } + } + ], + "challenges": [ + { + "challenge_id": "ch-edge-001", + "signal_id": "signal-edge-2", + "challenger_id": "challenger_edge_a", + "category": "registry_quality", + "rationale": "This challenge was immediately resolved at submission time to validate zero-hour resolution handling in deterministic KPI computation.", + "severity": "LOW", + "requested_outcome": "FLAG_FOR_REVIEW", + "evidence": { + "koi_links": ["koi://note/ch-edge-1"], + "ledger_refs": ["ledger://tx/ch-edge-1"], + "web_links": [] + }, + "timestamp": "2026-02-15T12:00:00Z", + "status": "resolved_valid", + "resolution": { + "resolved_by": "admin_edge", + "resolved_at": "2026-02-15T12:00:00Z", + "outcome": "VALID", + "rationale": "Instant resolution for deterministic boundary test." + } + }, + { + "challenge_id": "ch-edge-002", + "signal_id": "signal-edge-3", + "challenger_id": "challenger_edge_b", + "category": "attestation_quality", + "rationale": "Evidence review completed within thirty minutes and determined the underlying verifier claim was invalid for this reporting period.", + "severity": "HIGH", + "requested_outcome": "INVALIDATE", + "evidence": { + "koi_links": ["koi://note/ch-edge-2"], + "ledger_refs": ["ledger://tx/ch-edge-2"], + "web_links": [] + }, + "timestamp": "2026-02-14T11:30:00Z", + "status": "resolved_invalid", + "resolution": { + "resolved_by": "admin_edge", + "resolved_at": "2026-02-14T12:00:00Z", + "outcome": "INVALID", + "rationale": "Short-window review confirmed evidence inconsistency." + } + }, + { + "challenge_id": "ch-edge-003", + "signal_id": "signal-edge-4", + "challenger_id": "challenger_edge_c", + "category": "operator_trust", + "rationale": "Pending challenge remains open for withdrawn signal to verify unresolved challenge handling and exclusion behavior across score and KPI outputs.", + "severity": "MEDIUM", + "requested_outcome": "FLAG_FOR_REVIEW", + "evidence": { + "koi_links": ["koi://note/ch-edge-3"], + "ledger_refs": ["ledger://tx/ch-edge-3"], + "web_links": [] + }, + "timestamp": "2026-02-14T10:00:00Z", + "status": "pending" + } + ], + "expected_outputs": { + "description": "Edge timing fixture with zero-hour resolution and at-as_of signal timestamps.", + "contributing_signals": [ + "signal-edge-1 (active, endorsement=3)", + "signal-edge-2 (resolved_valid, endorsement=4)" + ], + "excluded_signals": [ + "signal-edge-3 (resolved_invalid)", + "signal-edge-4 (withdrawn)" + ], + "challenge_kpis": { + "challenges_filed": 3, + "challenge_rate": 0.75, + "challenge_success_rate": 0.5, + "avg_resolution_time_hours": 0.25, + "admin_resolution_timeout_rate": 0 + } + } +} diff --git a/mechanisms/m010-reputation-signal/datasets/fixtures/v0_challenge_escalated_sample.json b/mechanisms/m010-reputation-signal/datasets/fixtures/v0_challenge_escalated_sample.json new file mode 100644 index 0000000..3b1f8c5 --- /dev/null +++ b/mechanisms/m010-reputation-signal/datasets/fixtures/v0_challenge_escalated_sample.json @@ -0,0 +1,174 @@ +{ + "mechanism_id": "m010", + "scope": "v0_advisory_challenge_escalated_replay", + "as_of": "2026-02-12T12:00:00Z", + "description": "Fixture exercising escalated challenges and mixed lifecycle statuses.", + "events": [ + { + "signal_id": "signal-esc-1", + "timestamp": "2026-02-10T12:00:00Z", + "subject_type": "Project", + "subject_id": "P-regen-100", + "category": "delivery_risk", + "endorsement_level": 5, + "signaler_id": "signaler_alpha", + "status": "active", + "evidence": { + "koi_links": ["koi://note/esc-1"], + "ledger_refs": ["ledger://tx/3001"] + } + }, + { + "signal_id": "signal-esc-2", + "timestamp": "2026-02-10T06:00:00Z", + "subject_type": "CreditClass", + "subject_id": "C01-010", + "category": "registry_quality", + "endorsement_level": 4, + "signaler_id": "signaler_beta", + "status": "escalated", + "evidence": { + "koi_links": ["koi://note/esc-2"], + "ledger_refs": ["ledger://tx/3002"] + } + }, + { + "signal_id": "signal-esc-3", + "timestamp": "2026-02-09T18:00:00Z", + "subject_type": "Verifier", + "subject_id": "V-GammaMRV", + "category": "attestation_quality", + "endorsement_level": 3, + "signaler_id": "signaler_gamma", + "status": "resolved_valid", + "evidence": { + "koi_links": ["koi://note/esc-3"], + "ledger_refs": ["ledger://tx/3003"] + } + }, + { + "signal_id": "signal-esc-4", + "timestamp": "2026-02-09T12:00:00Z", + "subject_type": "Methodology", + "subject_id": "METH-Biochar-v1", + "category": "method_rigor", + "endorsement_level": 2, + "signaler_id": "signaler_delta", + "status": "challenged", + "evidence": { + "koi_links": ["koi://note/esc-4"], + "ledger_refs": ["ledger://tx/3004"] + } + }, + { + "signal_id": "signal-esc-5", + "timestamp": "2026-02-09T00:00:00Z", + "subject_type": "Address", + "subject_id": "regen1esc...0005", + "category": "operator_trust", + "endorsement_level": 5, + "signaler_id": "signaler_epsilon", + "status": "invalidated", + "evidence": { + "koi_links": ["koi://note/esc-5"], + "ledger_refs": ["ledger://tx/3005"] + } + }, + { + "signal_id": "signal-esc-6", + "timestamp": "2026-02-08T12:00:00Z", + "subject_type": "Project", + "subject_id": "P-regen-101", + "category": "delivery_risk", + "endorsement_level": 1, + "signaler_id": "signaler_zeta", + "status": "active", + "evidence": { + "koi_links": ["koi://note/esc-6"], + "ledger_refs": ["ledger://tx/3006"] + } + } + ], + "challenges": [ + { + "challenge_id": "ch-esc-001", + "signal_id": "signal-esc-2", + "challenger_id": "challenger_alpha", + "category": "registry_quality", + "rationale": "Challenge escalated due to unresolved concerns on class quality scoring after missing the resolver deadline and requiring governance review.", + "severity": "MEDIUM", + "requested_outcome": "FLAG_FOR_REVIEW", + "evidence": { + "koi_links": ["koi://note/ch-esc-1"], + "ledger_refs": ["ledger://tx/ch-esc-1"], + "web_links": [] + }, + "timestamp": "2026-02-10T08:00:00Z", + "status": "escalated" + }, + { + "challenge_id": "ch-esc-002", + "signal_id": "signal-esc-3", + "challenger_id": "challenger_beta", + "category": "attestation_quality", + "rationale": "Initial concern on verifier reliability was reviewed and rejected after additional field evidence confirmed robust attestation process.", + "severity": "LOW", + "requested_outcome": "FLAG_FOR_REVIEW", + "evidence": { + "koi_links": ["koi://note/ch-esc-2"], + "ledger_refs": ["ledger://tx/ch-esc-2"], + "web_links": [] + }, + "timestamp": "2026-02-09T08:00:00Z", + "status": "resolved_valid", + "resolution": { + "resolved_by": "admin_2", + "resolved_at": "2026-02-11T08:00:00Z", + "outcome": "VALID", + "rationale": "Evidence quality was sufficient and no integrity violation was confirmed." + } + }, + { + "challenge_id": "ch-esc-003", + "signal_id": "signal-esc-4", + "challenger_id": "challenger_gamma", + "category": "method_rigor", + "rationale": "Challenge found missing baseline methodology controls and contradictory implementation references that invalidate the original endorsement quality claim.", + "severity": "HIGH", + "requested_outcome": "INVALIDATE", + "evidence": { + "koi_links": ["koi://note/ch-esc-3"], + "ledger_refs": ["ledger://tx/ch-esc-3"], + "web_links": [] + }, + "timestamp": "2026-02-09T10:00:00Z", + "status": "resolved_invalid", + "resolution": { + "resolved_by": "admin_2", + "resolved_at": "2026-02-10T10:00:00Z", + "outcome": "INVALID", + "rationale": "Review confirmed method controls were absent; endorsement removed." + } + } + ], + "expected_outputs": { + "description": "Only active/resolved_valid signals contribute; escalated and challenged are paused.", + "contributing_signals": [ + "signal-esc-1 (active, endorsement=5)", + "signal-esc-3 (resolved_valid, endorsement=3)", + "signal-esc-6 (active, endorsement=1)" + ], + "excluded_signals": [ + "signal-esc-2 (escalated)", + "signal-esc-4 (challenged)", + "signal-esc-5 (invalidated)" + ], + "challenge_kpis": { + "challenges_filed": 3, + "challenge_rate": 0.5, + "challenge_success_rate": 0.5, + "avg_resolution_time_hours": 36.0, + "admin_resolution_timeout_rate": 0.3333 + } + } +} diff --git a/mechanisms/m010-reputation-signal/datasets/fixtures/v0_challenge_invalid_resolution_sample.json b/mechanisms/m010-reputation-signal/datasets/fixtures/v0_challenge_invalid_resolution_sample.json new file mode 100644 index 0000000..d6ae222 --- /dev/null +++ b/mechanisms/m010-reputation-signal/datasets/fixtures/v0_challenge_invalid_resolution_sample.json @@ -0,0 +1,46 @@ +{ + "mechanism_id": "m010", + "scope": "v0_advisory_challenge_invalid_resolution_replay", + "as_of": "2026-02-16T12:00:00Z", + "description": "Intentionally invalid fixture: pending challenge incorrectly includes a resolution object.", + "events": [ + { + "signal_id": "signal-invalid-1", + "timestamp": "2026-02-15T12:00:00Z", + "subject_type": "Project", + "subject_id": "P-regen-300", + "category": "delivery_risk", + "endorsement_level": 3, + "signaler_id": "signaler_invalid_a", + "status": "challenged", + "evidence": { + "koi_links": ["koi://note/invalid-1"], + "ledger_refs": ["ledger://tx/5001"] + } + } + ], + "challenges": [ + { + "challenge_id": "ch-invalid-001", + "signal_id": "signal-invalid-1", + "challenger_id": "challenger_invalid_a", + "category": "delivery_risk", + "rationale": "This intentionally invalid fixture keeps status pending while injecting a resolution to validate negative test coverage in verification scripts.", + "severity": "LOW", + "requested_outcome": "FLAG_FOR_REVIEW", + "evidence": { + "koi_links": ["koi://note/ch-invalid-1"], + "ledger_refs": ["ledger://tx/ch-invalid-1"], + "web_links": [] + }, + "timestamp": "2026-02-15T13:00:00Z", + "status": "pending", + "resolution": { + "resolved_by": "admin_invalid", + "resolved_at": "2026-02-15T14:00:00Z", + "outcome": "VALID", + "rationale": "Resolution should not exist when challenge status is pending." + } + } + ] +} diff --git a/mechanisms/m010-reputation-signal/reference-impl/README.md b/mechanisms/m010-reputation-signal/reference-impl/README.md index 0748d73..4c7078e 100644 --- a/mechanisms/m010-reputation-signal/reference-impl/README.md +++ b/mechanisms/m010-reputation-signal/reference-impl/README.md @@ -45,3 +45,8 @@ When `challenges[]` are provided to `computeM010KPI`, output includes `challenge ### Deterministic vectors Reference vectors live in `test_vectors/` and are validated by: - `node scripts/verify-m010-reference-impl.mjs` +- Coverage includes: + - baseline replay (`v0_sample`) + - mixed challenge statuses (`v0_challenge_sample`) + - escalated challenge path (`v0_challenge_escalated_sample`) + - edge timing path (`v0_challenge_edge_timing_sample`) diff --git a/mechanisms/m010-reputation-signal/reference-impl/test_vectors/vector_v0_challenge_edge_timing.expected.json b/mechanisms/m010-reputation-signal/reference-impl/test_vectors/vector_v0_challenge_edge_timing.expected.json new file mode 100644 index 0000000..c0c47ca --- /dev/null +++ b/mechanisms/m010-reputation-signal/reference-impl/test_vectors/vector_v0_challenge_edge_timing.expected.json @@ -0,0 +1,21 @@ +{ + "kpi": { + "mechanism_id": "m010", + "scope": "v0_advisory_challenge_edge_timing_replay", + "as_of": "2026-02-15T12:00:00Z", + "signals_emitted": 4, + "subjects_touched": 4, + "evidence_coverage_rate": 1, + "median_event_latency_hours": 12.5, + "challenge_kpis": { + "challenges_filed": 3, + "challenge_rate": 0.75, + "avg_resolution_time_hours": 0.25, + "challenge_success_rate": 0.5, + "admin_resolution_timeout_rate": 0 + } + }, + "score": { + "reputation_score_0_1": 0.7001 + } +} diff --git a/mechanisms/m010-reputation-signal/reference-impl/test_vectors/vector_v0_challenge_escalated.expected.json b/mechanisms/m010-reputation-signal/reference-impl/test_vectors/vector_v0_challenge_escalated.expected.json new file mode 100644 index 0000000..b53ef52 --- /dev/null +++ b/mechanisms/m010-reputation-signal/reference-impl/test_vectors/vector_v0_challenge_escalated.expected.json @@ -0,0 +1,21 @@ +{ + "kpi": { + "mechanism_id": "m010", + "scope": "v0_advisory_challenge_escalated_replay", + "as_of": "2026-02-12T12:00:00Z", + "signals_emitted": 6, + "subjects_touched": 6, + "evidence_coverage_rate": 1, + "median_event_latency_hours": 69, + "challenge_kpis": { + "challenges_filed": 3, + "challenge_rate": 0.5, + "avg_resolution_time_hours": 36, + "challenge_success_rate": 0.5, + "admin_resolution_timeout_rate": 0.3333 + } + }, + "score": { + "reputation_score_0_1": 0.6131 + } +} diff --git a/scripts/verify-m010-datasets.mjs b/scripts/verify-m010-datasets.mjs index 4c19a18..99126d9 100644 --- a/scripts/verify-m010-datasets.mjs +++ b/scripts/verify-m010-datasets.mjs @@ -66,9 +66,31 @@ function verifyChallengeFixture(rel) { } } +function assertFails(fn, expectedPattern, label) { + let failed = false; + try { + fn(); + } catch (err) { + const message = err instanceof Error ? err.message : String(err); + assert(expectedPattern.test(message), `${label}: expected error matching ${expectedPattern}, got '${message}'`); + failed = true; + } + assert(failed, `${label}: expected verification to fail`); +} + function main() { const sampleRel = "mechanisms/m010-reputation-signal/datasets/fixtures/v0_sample.json"; - const challengeRel = "mechanisms/m010-reputation-signal/datasets/fixtures/v0_challenge_sample.json"; + const validChallengeFixtures = [ + "mechanisms/m010-reputation-signal/datasets/fixtures/v0_challenge_sample.json", + "mechanisms/m010-reputation-signal/datasets/fixtures/v0_challenge_escalated_sample.json", + "mechanisms/m010-reputation-signal/datasets/fixtures/v0_challenge_edge_timing_sample.json" + ]; + const invalidChallengeFixtures = [ + { + rel: "mechanisms/m010-reputation-signal/datasets/fixtures/v0_challenge_invalid_resolution_sample.json", + pattern: /must not include resolution/ + } + ]; const sample = readJson(sampleRel); assert(sample.events?.length > 0, `${sampleRel}: events must not be empty`); @@ -76,7 +98,12 @@ function main() { assertIsoDate(e.timestamp, `${sampleRel}: event[${idx}] timestamp`); } - verifyChallengeFixture(challengeRel); + for (const rel of validChallengeFixtures) { + verifyChallengeFixture(rel); + } + for (const { rel, pattern } of invalidChallengeFixtures) { + assertFails(() => verifyChallengeFixture(rel), pattern, rel); + } console.log("m010 dataset integrity: PASS"); } diff --git a/scripts/verify-m010-reference-impl.mjs b/scripts/verify-m010-reference-impl.mjs index aa86c56..d5e83a7 100644 --- a/scripts/verify-m010-reference-impl.mjs +++ b/scripts/verify-m010-reference-impl.mjs @@ -57,6 +57,18 @@ async function main() { inputRel: "mechanisms/m010-reputation-signal/datasets/fixtures/v0_challenge_sample.json", expectedRel: "mechanisms/m010-reputation-signal/reference-impl/test_vectors/vector_v0_challenge.expected.json", assertFixtureKpis: true + }, + { + name: "v0_challenge_escalated", + inputRel: "mechanisms/m010-reputation-signal/datasets/fixtures/v0_challenge_escalated_sample.json", + expectedRel: "mechanisms/m010-reputation-signal/reference-impl/test_vectors/vector_v0_challenge_escalated.expected.json", + assertFixtureKpis: true + }, + { + name: "v0_challenge_edge_timing", + inputRel: "mechanisms/m010-reputation-signal/datasets/fixtures/v0_challenge_edge_timing_sample.json", + expectedRel: "mechanisms/m010-reputation-signal/reference-impl/test_vectors/vector_v0_challenge_edge_timing.expected.json", + assertFixtureKpis: true } ]; diff --git a/scripts/verify.mjs b/scripts/verify.mjs index dcb4f7a..2a85337 100644 --- a/scripts/verify.mjs +++ b/scripts/verify.mjs @@ -37,11 +37,16 @@ requireFile("mechanisms/m010-reputation-signal/schemas/m010_challenge.schema.jso requireFile("mechanisms/m010-reputation-signal/datasets/schema.json"); requireFile("mechanisms/m010-reputation-signal/datasets/fixtures/v0_sample.json"); requireFile("mechanisms/m010-reputation-signal/datasets/fixtures/v0_challenge_sample.json"); +requireFile("mechanisms/m010-reputation-signal/datasets/fixtures/v0_challenge_escalated_sample.json"); +requireFile("mechanisms/m010-reputation-signal/datasets/fixtures/v0_challenge_edge_timing_sample.json"); +requireFile("mechanisms/m010-reputation-signal/datasets/fixtures/v0_challenge_invalid_resolution_sample.json"); requireFile("mechanisms/m010-reputation-signal/reference-impl/m010_kpi.js"); requireFile("mechanisms/m010-reputation-signal/reference-impl/m010_score.js"); requireFile("mechanisms/m010-reputation-signal/reference-impl/test_vectors/vector_v0_sample.input.json"); requireFile("mechanisms/m010-reputation-signal/reference-impl/test_vectors/vector_v0_sample.expected.json"); requireFile("mechanisms/m010-reputation-signal/reference-impl/test_vectors/vector_v0_challenge.expected.json"); +requireFile("mechanisms/m010-reputation-signal/reference-impl/test_vectors/vector_v0_challenge_escalated.expected.json"); +requireFile("mechanisms/m010-reputation-signal/reference-impl/test_vectors/vector_v0_challenge_edge_timing.expected.json"); requireFile("scripts/verify-m010-reference-impl.mjs"); requireFile("scripts/verify-m010-datasets.mjs"); From 91607789d38da42b8effeaab1c0ea128bdebb849 Mon Sep 17 00:00:00 2001 From: brawlaphant <35781613+brawlaphant@users.noreply.github.com> Date: Tue, 24 Feb 2026 16:04:43 -0800 Subject: [PATCH 8/9] test(m010): harden verifier invariants and negative fixtures --- CHANGELOG.md | 1 + .../m010-reputation-signal/datasets/README.md | 1 + .../v0_challenge_invalid_outcome_sample.json | 46 ++++++++++ scripts/verify-m010-datasets.mjs | 75 ++++++++++++++- scripts/verify-m010-reference-impl.mjs | 92 +++++++++++++++++++ scripts/verify.mjs | 1 + 6 files changed, 214 insertions(+), 2 deletions(-) create mode 100644 mechanisms/m010-reputation-signal/datasets/fixtures/v0_challenge_invalid_outcome_sample.json diff --git a/CHANGELOG.md b/CHANGELOG.md index 70aff08..fe1985d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,7 @@ - m010 reference implementation vector verifier (`scripts/verify-m010-reference-impl.mjs`) with challenge replay coverage. - m010 dataset integrity verifier (`scripts/verify-m010-datasets.mjs`) covering challenge linkage, lifecycle consistency, and expected output coherence. - Additional m010 challenge fixtures for escalated and edge-timing scenarios, plus an intentionally invalid fixture for negative verification coverage. +- Additional verifier invariants for m010 (status/outcome consistency, duplicate detection, evidence minimums, and derived KPI coherence checks). ### Changed - m010 scoring now excludes non-contributing signal states when `status` is present (`active`/`resolved_valid` only contribute). diff --git a/mechanisms/m010-reputation-signal/datasets/README.md b/mechanisms/m010-reputation-signal/datasets/README.md index 9e2a5e0..80470f9 100644 --- a/mechanisms/m010-reputation-signal/datasets/README.md +++ b/mechanisms/m010-reputation-signal/datasets/README.md @@ -9,6 +9,7 @@ These fixtures are **deterministic inputs** for generating non-zero m010 KPI out - `fixtures/v0_challenge_escalated_sample.json` — challenge replay including `escalated` status and timeout KPI behavior - `fixtures/v0_challenge_edge_timing_sample.json` — challenge replay covering boundary timing (including zero-hour resolution) - `fixtures/v0_challenge_invalid_resolution_sample.json` — intentionally invalid fixture for negative verification coverage +- `fixtures/v0_challenge_invalid_outcome_sample.json` — intentionally invalid fixture with status/outcome mismatch for negative verification coverage ## How they are used A replay runner (e.g., in `regen-heartbeat`) can read a fixture file and compute: diff --git a/mechanisms/m010-reputation-signal/datasets/fixtures/v0_challenge_invalid_outcome_sample.json b/mechanisms/m010-reputation-signal/datasets/fixtures/v0_challenge_invalid_outcome_sample.json new file mode 100644 index 0000000..edb5f4a --- /dev/null +++ b/mechanisms/m010-reputation-signal/datasets/fixtures/v0_challenge_invalid_outcome_sample.json @@ -0,0 +1,46 @@ +{ + "mechanism_id": "m010", + "scope": "v0_advisory_challenge_invalid_outcome_replay", + "as_of": "2026-02-16T12:00:00Z", + "description": "Intentionally invalid fixture: resolved_valid challenge has INVALID outcome.", + "events": [ + { + "signal_id": "signal-invalid-outcome-1", + "timestamp": "2026-02-15T12:00:00Z", + "subject_type": "Project", + "subject_id": "P-regen-301", + "category": "delivery_risk", + "endorsement_level": 4, + "signaler_id": "signaler_invalid_b", + "status": "resolved_valid", + "evidence": { + "koi_links": ["koi://note/invalid-outcome-1"], + "ledger_refs": ["ledger://tx/5002"] + } + } + ], + "challenges": [ + { + "challenge_id": "ch-invalid-002", + "signal_id": "signal-invalid-outcome-1", + "challenger_id": "challenger_invalid_b", + "category": "delivery_risk", + "rationale": "This intentionally invalid fixture sets status resolved_valid while setting resolution outcome INVALID to enforce strict status outcome consistency checks.", + "severity": "LOW", + "requested_outcome": "FLAG_FOR_REVIEW", + "evidence": { + "koi_links": ["koi://note/ch-invalid-2"], + "ledger_refs": ["ledger://tx/ch-invalid-2"], + "web_links": [] + }, + "timestamp": "2026-02-15T13:00:00Z", + "status": "resolved_valid", + "resolution": { + "resolved_by": "admin_invalid", + "resolved_at": "2026-02-15T14:00:00Z", + "outcome": "INVALID", + "rationale": "Outcome intentionally mismatched for negative verifier coverage." + } + } + ] +} diff --git a/scripts/verify-m010-datasets.mjs b/scripts/verify-m010-datasets.mjs index 99126d9..841e528 100644 --- a/scripts/verify-m010-datasets.mjs +++ b/scripts/verify-m010-datasets.mjs @@ -19,12 +19,47 @@ function parseSignalIdToken(entry) { return m ? m[1] : null; } +function mean(nums) { + if (!nums.length) return null; + return nums.reduce((sum, n) => sum + n, 0) / nums.length; +} + +function hasDuplicates(values) { + return new Set(values).size !== values.length; +} + +function deriveChallengeKpis(fixture) { + const events = fixture.events ?? []; + const challenges = fixture.challenges ?? []; + + const resolvedValid = challenges.filter((c) => c.status === "resolved_valid").length; + const resolvedInvalid = challenges.filter((c) => c.status === "resolved_invalid").length; + const resolvedTotal = resolvedValid + resolvedInvalid; + const escalated = challenges.filter((c) => c.status === "escalated").length; + + const resolutionHours = challenges + .filter((c) => c.resolution?.resolved_at) + .map((c) => (Date.parse(c.resolution.resolved_at) - Date.parse(c.timestamp)) / (1000 * 60 * 60)) + .filter((n) => Number.isFinite(n) && n >= 0); + + const avgResolution = mean(resolutionHours); + + return { + challenges_filed: challenges.length, + challenge_rate: events.length ? Number((challenges.length / events.length).toFixed(4)) : 0.0, + challenge_success_rate: resolvedTotal ? Number((resolvedInvalid / resolvedTotal).toFixed(4)) : null, + avg_resolution_time_hours: avgResolution === null ? null : Number(avgResolution.toFixed(2)), + admin_resolution_timeout_rate: challenges.length ? Number((escalated / challenges.length).toFixed(4)) : null + }; +} + function verifyChallengeFixture(rel) { const fixture = readJson(rel); const events = fixture.events ?? []; const challenges = fixture.challenges ?? []; assert(events.length > 0, `${rel}: events must not be empty`); + assert(challenges.length > 0, `${rel}: challenge fixtures must include challenges`); const signalIdToEvent = new Map(); for (const e of events) { @@ -34,11 +69,17 @@ function verifyChallengeFixture(rel) { signalIdToEvent.set(e.signal_id, e); } + const challengeIds = challenges.map((c) => c.challenge_id); + assert(!hasDuplicates(challengeIds), `${rel}: duplicate challenge_id detected`); + for (const c of challenges) { + assert(typeof c.challenge_id === "string" && c.challenge_id.length > 0, `${rel}: challenge_id must be a non-empty string`); assertIsoDate(c.timestamp, `${rel}: challenge ${c.challenge_id} timestamp`); const target = signalIdToEvent.get(c.signal_id); assert(target, `${rel}: challenge '${c.challenge_id}' references unknown signal_id '${c.signal_id}'`); assert(c.category === target.category, `${rel}: challenge '${c.challenge_id}' category mismatch (challenge=${c.category}, signal=${target.category})`); + const minEvidence = (c.evidence?.koi_links?.length ?? 0) + (c.evidence?.ledger_refs?.length ?? 0); + assert(minEvidence > 0, `${rel}: challenge '${c.challenge_id}' must include at least one koi_links or ledger_refs evidence entry`); const resolved = c.status === "resolved_valid" || c.status === "resolved_invalid"; if (resolved) { @@ -46,14 +87,29 @@ function verifyChallengeFixture(rel) { assertIsoDate(c.resolution.resolved_at, `${rel}: challenge ${c.challenge_id} resolution.resolved_at`); const deltaHours = (Date.parse(c.resolution.resolved_at) - Date.parse(c.timestamp)) / (1000 * 60 * 60); assert(deltaHours >= 0, `${rel}: challenge '${c.challenge_id}' resolved before it was filed`); + const expectedOutcome = c.status === "resolved_valid" ? "VALID" : "INVALID"; + assert( + c.resolution.outcome === expectedOutcome, + `${rel}: challenge '${c.challenge_id}' status '${c.status}' requires resolution.outcome '${expectedOutcome}', got '${c.resolution.outcome}'` + ); } else { assert(!c.resolution, `${rel}: unresolved challenge '${c.challenge_id}' must not include resolution`); } } const contributingStatuses = new Set(["active", "resolved_valid"]); - const expectedContrib = new Set((fixture.expected_outputs?.contributing_signals ?? []).map(parseSignalIdToken).filter(Boolean)); - const expectedExcluded = new Set((fixture.expected_outputs?.excluded_signals ?? []).map(parseSignalIdToken).filter(Boolean)); + const rawExpectedContrib = fixture.expected_outputs?.contributing_signals ?? []; + const rawExpectedExcluded = fixture.expected_outputs?.excluded_signals ?? []; + + const contribTokens = rawExpectedContrib.map(parseSignalIdToken); + const excludedTokens = rawExpectedExcluded.map(parseSignalIdToken); + assert(!contribTokens.includes(null), `${rel}: expected_outputs.contributing_signals entries must be ' (...)' tokens`); + assert(!excludedTokens.includes(null), `${rel}: expected_outputs.excluded_signals entries must be ' (...)' tokens`); + assert(!hasDuplicates(contribTokens), `${rel}: expected_outputs.contributing_signals contains duplicates`); + assert(!hasDuplicates(excludedTokens), `${rel}: expected_outputs.excluded_signals contains duplicates`); + + const expectedContrib = new Set(contribTokens); + const expectedExcluded = new Set(excludedTokens); const derivedContrib = new Set(events.filter((e) => contributingStatuses.has(e.status)).map((e) => e.signal_id)); const derivedExcluded = new Set(events.filter((e) => !contributingStatuses.has(e.status)).map((e) => e.signal_id)); @@ -64,6 +120,17 @@ function verifyChallengeFixture(rel) { for (const id of expectedContrib) { assert(!expectedExcluded.has(id), `${rel}: signal '${id}' appears in both contributing and excluded lists`); } + + if (fixture.expected_outputs?.challenge_kpis) { + const derived = deriveChallengeKpis(fixture); + for (const [key, expectedValue] of Object.entries(fixture.expected_outputs.challenge_kpis)) { + assert.deepStrictEqual( + derived[key], + expectedValue, + `${rel}: expected_outputs.challenge_kpis.${key} mismatch (expected ${JSON.stringify(expectedValue)}, derived ${JSON.stringify(derived[key])})` + ); + } + } } function assertFails(fn, expectedPattern, label) { @@ -89,6 +156,10 @@ function main() { { rel: "mechanisms/m010-reputation-signal/datasets/fixtures/v0_challenge_invalid_resolution_sample.json", pattern: /must not include resolution/ + }, + { + rel: "mechanisms/m010-reputation-signal/datasets/fixtures/v0_challenge_invalid_outcome_sample.json", + pattern: /requires resolution\.outcome/ } ]; diff --git a/scripts/verify-m010-reference-impl.mjs b/scripts/verify-m010-reference-impl.mjs index d5e83a7..0ec26bd 100644 --- a/scripts/verify-m010-reference-impl.mjs +++ b/scripts/verify-m010-reference-impl.mjs @@ -27,6 +27,97 @@ function assertSubset(actual, expected, label) { } } +function mean(nums) { + if (!nums.length) return null; + return nums.reduce((sum, n) => sum + n, 0) / nums.length; +} + +function assertFiniteNumberInRange(value, min, max, label) { + assert(typeof value === "number" && Number.isFinite(value), `${label}: expected finite number, got ${JSON.stringify(value)}`); + assert(value >= min && value <= max, `${label}: expected in range [${min}, ${max}], got ${value}`); +} + +function deriveChallengeKpis(input) { + const chs = input.challenges ?? []; + const evs = input.events ?? []; + + const resolvedValid = chs.filter((c) => c.status === "resolved_valid").length; + const resolvedInvalid = chs.filter((c) => c.status === "resolved_invalid").length; + const resolvedTotal = resolvedValid + resolvedInvalid; + const escalated = chs.filter((c) => c.status === "escalated").length; + + const resolutionHours = chs + .filter((c) => c.resolution?.resolved_at) + .map((c) => (Date.parse(c.resolution.resolved_at) - Date.parse(c.timestamp)) / (1000 * 60 * 60)) + .filter((n) => Number.isFinite(n) && n >= 0); + + const avgResolution = mean(resolutionHours); + + return { + challenges_filed: chs.length, + challenge_rate: evs.length ? Number((chs.length / evs.length).toFixed(4)) : 0.0, + avg_resolution_time_hours: avgResolution === null ? null : Number(avgResolution.toFixed(2)), + challenge_success_rate: resolvedTotal ? Number((resolvedInvalid / resolvedTotal).toFixed(4)) : null, + admin_resolution_timeout_rate: Number((escalated / chs.length).toFixed(4)) + }; +} + +function assertVectorInvariants(vectorName, input, actual) { + const evs = input.events ?? []; + const chs = input.challenges ?? []; + const kpi = actual.kpi ?? {}; + const score = actual.score ?? {}; + + assert.strictEqual(kpi.mechanism_id, "m010", `${vectorName}: KPI mechanism_id must be 'm010'`); + assert.strictEqual(kpi.as_of, input.as_of, `${vectorName}: KPI as_of must match input`); + assert.strictEqual(kpi.signals_emitted, evs.length, `${vectorName}: KPI signals_emitted must equal input event count`); + + const uniqueSubjects = new Set(evs.map((e) => `${e.subject_type}:${e.subject_id}`)).size; + assert.strictEqual(kpi.subjects_touched, uniqueSubjects, `${vectorName}: KPI subjects_touched must equal unique subjects in input`); + assertFiniteNumberInRange(kpi.evidence_coverage_rate, 0, 1, `${vectorName}: KPI evidence_coverage_rate`); + + if (kpi.median_event_latency_hours !== null) { + assertFiniteNumberInRange(kpi.median_event_latency_hours, 0, Number.MAX_SAFE_INTEGER, `${vectorName}: KPI median_event_latency_hours`); + } + + assertFiniteNumberInRange(score.reputation_score_0_1, 0, 1, `${vectorName}: score.reputation_score_0_1`); + + if (chs.length > 0) { + assert(kpi.challenge_kpis && typeof kpi.challenge_kpis === "object", `${vectorName}: challenge_kpis must exist when challenges are provided`); + + const requiredKeys = [ + "challenges_filed", + "challenge_rate", + "avg_resolution_time_hours", + "challenge_success_rate", + "admin_resolution_timeout_rate" + ]; + for (const key of requiredKeys) { + assert(Object.prototype.hasOwnProperty.call(kpi.challenge_kpis, key), `${vectorName}: missing challenge_kpis.${key}`); + } + + assert.strictEqual(kpi.challenge_kpis.challenges_filed, chs.length, `${vectorName}: challenge_kpis.challenges_filed must equal challenge count`); + assertFiniteNumberInRange(kpi.challenge_kpis.challenge_rate, 0, 1, `${vectorName}: challenge_kpis.challenge_rate`); + assertFiniteNumberInRange(kpi.challenge_kpis.admin_resolution_timeout_rate, 0, 1, `${vectorName}: challenge_kpis.admin_resolution_timeout_rate`); + + if (kpi.challenge_kpis.challenge_success_rate !== null) { + assertFiniteNumberInRange(kpi.challenge_kpis.challenge_success_rate, 0, 1, `${vectorName}: challenge_kpis.challenge_success_rate`); + } + if (kpi.challenge_kpis.avg_resolution_time_hours !== null) { + assertFiniteNumberInRange(kpi.challenge_kpis.avg_resolution_time_hours, 0, Number.MAX_SAFE_INTEGER, `${vectorName}: challenge_kpis.avg_resolution_time_hours`); + } + + const derived = deriveChallengeKpis(input); + assert.deepStrictEqual( + kpi.challenge_kpis, + derived, + `${vectorName}: challenge_kpis mismatch with values derived from input fixture` + ); + } else { + assert(!Object.prototype.hasOwnProperty.call(kpi, "challenge_kpis"), `${vectorName}: challenge_kpis should be omitted when no challenges are provided`); + } +} + function computeOutputs(input, computeM010KPI, computeM010Score) { return { kpi: computeM010KPI({ @@ -76,6 +167,7 @@ async function main() { const input = readJson(vector.inputRel); const expected = readJson(vector.expectedRel); const actual = computeOutputs(input, computeM010KPI, computeM010Score); + assertVectorInvariants(vector.name, input, actual); try { assert.deepStrictEqual(actual, expected); diff --git a/scripts/verify.mjs b/scripts/verify.mjs index 2a85337..7e22473 100644 --- a/scripts/verify.mjs +++ b/scripts/verify.mjs @@ -40,6 +40,7 @@ requireFile("mechanisms/m010-reputation-signal/datasets/fixtures/v0_challenge_sa requireFile("mechanisms/m010-reputation-signal/datasets/fixtures/v0_challenge_escalated_sample.json"); requireFile("mechanisms/m010-reputation-signal/datasets/fixtures/v0_challenge_edge_timing_sample.json"); requireFile("mechanisms/m010-reputation-signal/datasets/fixtures/v0_challenge_invalid_resolution_sample.json"); +requireFile("mechanisms/m010-reputation-signal/datasets/fixtures/v0_challenge_invalid_outcome_sample.json"); requireFile("mechanisms/m010-reputation-signal/reference-impl/m010_kpi.js"); requireFile("mechanisms/m010-reputation-signal/reference-impl/m010_score.js"); requireFile("mechanisms/m010-reputation-signal/reference-impl/test_vectors/vector_v0_sample.input.json"); From cf3aaf7b224a9666b19d8d10d5354837035a5bb0 Mon Sep 17 00:00:00 2001 From: brawlaphant <35781613+brawlaphant@users.noreply.github.com> Date: Tue, 24 Feb 2026 17:48:39 -0800 Subject: [PATCH 9/9] docs(m010): define consumer contract and breaking-change policy --- docs/DEPENDENCIES.md | 8 ++++++++ docs/MECHANISM_CONSUMERS.md | 17 +++++++++++++++++ mechanisms/m010-reputation-signal/README.md | 14 +++++++++++++- 3 files changed, 38 insertions(+), 1 deletion(-) diff --git a/docs/DEPENDENCIES.md b/docs/DEPENDENCIES.md index 176dcca..e591032 100644 --- a/docs/DEPENDENCIES.md +++ b/docs/DEPENDENCIES.md @@ -22,3 +22,11 @@ This repo is a coordination + specification registry for “agentic tokenomics - High-level dependency notes: `docs/DEPENDENCIES.md` (this file) - Mechanism-specific dependency notes: `mechanisms//README.md` → “External inputs” + +## Downstream coordination for schema/output changes + +When changes may affect consumers in other repos (for example `regen-heartbeat`), include in the PR: +- exact output key contract (added/changed/removed keys) +- whether change is `non-breaking` or `breaking` +- at least one deterministic fixture/vector update proving the new behavior +- rollback note (what to revert if downstream parsing fails) diff --git a/docs/MECHANISM_CONSUMERS.md b/docs/MECHANISM_CONSUMERS.md index e77d1bf..e1ecb36 100644 --- a/docs/MECHANISM_CONSUMERS.md +++ b/docs/MECHANISM_CONSUMERS.md @@ -22,3 +22,20 @@ This document maps **mechanism IDs** to known **consumers** (agents, digests, sc - Heartbeat replay runner: `scripts/replay-m010.mjs` (regen-heartbeat) - Heartbeat stub runner: `scripts/stub-run-signal-agent.mjs` (regen-heartbeat) - Heartbeat validator: `scripts/validate-signal-agent.mjs` (regen-heartbeat) + +**Consumer contract (current)** +- Score output key: `score.reputation_score_0_1` (normalized `0..1` in v0 advisory). +- Contributing signal statuses: `active`, `resolved_valid`. +- Excluded signal statuses: `submitted`, `challenged`, `escalated`, `resolved_invalid`, `withdrawn`, `invalidated`. +- KPI denominator convention: `challenge_rate = challenges_filed / signals_emitted`. + +**Compatibility policy** +- Non-breaking: + - adding optional fields + - adding new deterministic fixtures or vectors + - tightening documentation without changing output keys +- Potentially breaking (coordinate downstream first): + - renaming/removing output keys + - changing score range/key semantics + - changing KPI denominator semantics + - changing lifecycle contribution rules diff --git a/mechanisms/m010-reputation-signal/README.md b/mechanisms/m010-reputation-signal/README.md index 022bff4..4f6afa7 100644 --- a/mechanisms/m010-reputation-signal/README.md +++ b/mechanisms/m010-reputation-signal/README.md @@ -3,7 +3,7 @@ m010 defines a reputation / legitimacy signal for Regen ecosystem subjects (e.g., credit classes, projects, verifiers, methodologies, addresses) based on **stake-weighted endorsements** with **time decay**. ## What it outputs -- A normalized **reputation score** (0–1000) per `(subject_type, subject_id, category)`. +- A normalized **reputation score** (`reputation_score_0_1`, range `0–1`) per `(subject_type, subject_id, category)` in v0 advisory mode. - A queryable history of submitted signals (endorsements), including state transitions (submitted, active, challenged, resolved_valid, resolved_invalid, withdrawn, invalidated). - Challenge workflow events with evidence, resolution, and rationale. @@ -31,3 +31,15 @@ Canonical JSON schemas for m010 outputs live in `schemas/`. ## Reference implementation checks - Deterministic vectors are validated by `scripts/verify-m010-reference-impl.mjs` - `challenge_rate` is computed as `challenges_filed / signals_emitted` for replay-period KPI reporting + +## Consumer compatibility notes +- v0 score contract: consumers should read `score.reputation_score_0_1` (`0..1`). +- Status contribution contract: only `active` and `resolved_valid` contribute to score. +- Challenge KPI contract: when `challenges[]` are present, `challenge_kpis` includes: + - `challenges_filed` + - `challenge_rate` + - `avg_resolution_time_hours` + - `challenge_success_rate` + - `admin_resolution_timeout_rate` + +Breaking changes for downstream consumers include renaming/removing these keys, changing denominator semantics, or changing lifecycle contribution rules.