From 575f2277d8699c3ad38522d5268ad07735c999a2 Mon Sep 17 00:00:00 2001
From: Rick Crawford <rick.crawford@gmail.com>
Date: Wed, 17 Jun 2026 22:05:33 -0700
Subject: [PATCH] policy: `mcptest policy simulate` OSS governance gate
 (WOR-1421)

A deterministic, offline policy simulator: it reads a small declarative policy
file plus saved mcptest artifacts (run report, judge certification, conformance
report, security report, model-compat diff, evidence artifact), extracts named
facts from each, evaluates the policy rules against them, applies expiring
waivers, and prints a pass/fail/warn verdict with a deterministic exit code. No
network, no live run, so a team can gate a release locally before adopting
heavier enterprise policy infrastructure.

- mcptest-core policy.rs (pure): PolicyFile/PolicyRule (one comparator each:
  max/min/equals/one_of; severity fail|warn) + PolicyWaiver (rule/owner/reason/
  expiry/issue), seven facts_from_* extractors over serde_json::Value, and
  evaluate() producing a PolicyOutcome. A failing rule with an active waiver is
  Waived; an expired waiver fails closed; a missing fact is Unevaluated and
  fails closed (a missing input never silently passes).
- mcptest policy simulate (cli/args/policy.rs + handlers/policy.rs): reads the
  policy YAML and whichever --artifact files are supplied, evaluates, renders
  pretty/json. Dry-run always exits 0; --gate exits 1 on fail.
- examples/policy/policy.yml worked example (skipped by the examples gate, not a
  run suite), docs/policy-simulator.md with the full fact catalog, cli-reference
  + command-groups + help template, llms regen.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 crates/mcptest-core/src/lib.rs              |   4 +
 crates/mcptest-core/src/policy.rs           | 827 ++++++++++++++++++++
 crates/mcptest/src/cli/args/mod.rs          |   2 +
 crates/mcptest/src/cli/args/policy.rs       |  62 ++
 crates/mcptest/src/cli/handlers/mod.rs      |   2 +
 crates/mcptest/src/cli/handlers/policy.rs   | 133 ++++
 crates/mcptest/src/cli/mod.rs               |   4 +
 crates/mcptest/tests/cli_policy_simulate.rs | 179 +++++
 docs-site/llms-full.sha256                  |   2 +-
 docs/SUMMARY.md                             |   1 +
 docs/cli-reference.md                       |  50 +-
 docs/llms-full.txt                          |  50 +-
 docs/policy-simulator.md                    | 193 +++++
 examples/policy/policy.yml                  |  30 +
 scripts/check-examples.sh                   |   2 +
 15 files changed, 1538 insertions(+), 3 deletions(-)
 create mode 100644 crates/mcptest-core/src/policy.rs
 create mode 100644 crates/mcptest/src/cli/args/policy.rs
 create mode 100644 crates/mcptest/src/cli/handlers/policy.rs
 create mode 100644 crates/mcptest/tests/cli_policy_simulate.rs
 create mode 100644 docs/policy-simulator.md
 create mode 100644 examples/policy/policy.yml

diff --git a/crates/mcptest-core/src/lib.rs b/crates/mcptest-core/src/lib.rs
index 7bb50d14..36c2bcaf 100644
--- a/crates/mcptest-core/src/lib.rs
+++ b/crates/mcptest-core/src/lib.rs
@@ -85,6 +85,9 @@
 //! - [`plugins`]: subprocess plugin protocol. Spawns a polyglot
 //!   plugin binary, exchanges newline-delimited JSON over stdin/stdout,
 //!   and caches the process for the run lifetime.
+//! - [`policy`]: offline governance policy simulator. Turns saved artifacts
+//!   into a flat fact map and evaluates declarative rules plus expiring
+//!   waivers into a pass / warn / fail verdict, with no network access.
 //! - [`subprocess`]: one-shot JSON subprocess helper shared by
 //!   the transform step and the context-aware hooks. Spawns a command,
 //!   writes one JSON value to stdin, and parses one JSON value from stdout.
@@ -125,6 +128,7 @@ pub mod migration;
 pub mod model_compat;
 pub mod network;
 pub mod plugins;
+pub mod policy;
 pub mod profiles;
 pub mod propose;
 pub mod protocol;
diff --git a/crates/mcptest-core/src/policy.rs b/crates/mcptest-core/src/policy.rs
new file mode 100644
index 00000000..5c6a95fb
--- /dev/null
+++ b/crates/mcptest-core/src/policy.rs
@@ -0,0 +1,827 @@
+//! OSS governance policy simulator.
+//!
+//! This module turns saved mcptest artifacts (a run report, a judge
+//! certification, a conformance score, a security scan, and so on) into a flat
+//! map of named "facts", then evaluates a small declarative policy of rules and
+//! expiring waivers against those facts. It exists so a team can gate a release
+//! on the artifacts it already produces without standing up an external policy
+//! service. It is deliberately tiny: comparators are `max`/`min`/`equals`/
+//! `one_of` rather than a general expression DSL, because a governance gate
+//! that nobody can read is a governance gate nobody trusts.
+
+use std::collections::BTreeMap;
+
+use serde::{Deserialize, Serialize};
+use serde_json::Value;
+
+/// A fact value extracted from an artifact: a number, a boolean, or text.
+///
+/// Facts are the single currency the evaluator understands. Keeping the type
+/// closed (three variants, no nesting) is what lets the comparators stay
+/// trivial and the report stay readable.
+#[derive(Debug, Clone, PartialEq, Serialize)]
+#[serde(untagged)]
+pub enum FactValue {
+    /// A numeric fact, for example a count of failed tests.
+    Num(f64),
+    /// A boolean fact, for example whether a judge is certified.
+    Bool(bool),
+    /// A textual fact, for example a conformance badge such as `T1`.
+    Text(String),
+}
+
+impl FactValue {
+    /// The fact as a number, when it is numeric. Used by `max`/`min` rules.
+    pub fn as_num(&self) -> Option<f64> {
+        match self {
+            FactValue::Num(n) => Some(*n),
+            _ => None,
+        }
+    }
+
+    /// The fact as a boolean, when it is one. Used by `equals: true|false`.
+    pub fn as_bool(&self) -> Option<bool> {
+        match self {
+            FactValue::Bool(b) => Some(*b),
+            _ => None,
+        }
+    }
+
+    /// The fact as text, when it is textual. Used by `one_of` and string
+    /// `equals`.
+    pub fn as_text(&self) -> Option<&str> {
+        match self {
+            FactValue::Text(s) => Some(s.as_str()),
+            _ => None,
+        }
+    }
+
+    /// Render the fact for a report line. Numbers print without a trailing
+    /// `.0` when integral so `run.failed = 2` reads naturally.
+    fn render(&self) -> String {
+        match self {
+            FactValue::Num(n) => {
+                if n.fract() == 0.0 {
+                    format!("{}", *n as i64)
+                } else {
+                    format!("{n}")
+                }
+            }
+            FactValue::Bool(b) => b.to_string(),
+            FactValue::Text(s) => s.clone(),
+        }
+    }
+}
+
+/// The flat fact map an evaluation runs against. Keyed by dotted fact name
+/// (for example `run.failed`) so rules cite a stable identifier.
+pub type Facts = BTreeMap<String, FactValue>;
+
+/// Severity of a policy rule: a failed `fail` rule fails the gate; a failed
+/// `warn` rule only warns.
+#[derive(Debug, Clone, Copy, PartialEq, Eq, Default, Serialize, Deserialize)]
+#[serde(rename_all = "lowercase")]
+pub enum RuleSeverity {
+    /// A failing rule fails the overall verdict (the default).
+    #[default]
+    Fail,
+    /// A failing rule only warns and never fails the gate.
+    Warn,
+}
+
+/// One declarative policy rule: name a fact and one comparator. Exactly one of
+/// `max`/`min`/`equals`/`one_of` must be set.
+#[derive(Debug, Clone, Deserialize)]
+pub struct PolicyRule {
+    /// Stable rule identifier, cited in the report and matched by waivers.
+    pub id: String,
+    /// Optional human description shown when a rule is authored verbosely.
+    #[serde(default)]
+    pub description: Option<String>,
+    /// The fact name this rule constrains, for example `run.failed`.
+    pub fact: String,
+    /// Upper bound: the fact (a number) must be `<= max`.
+    #[serde(default)]
+    pub max: Option<f64>,
+    /// Lower bound: the fact (a number) must be `>= min`.
+    #[serde(default)]
+    pub min: Option<f64>,
+    /// Exact match against a JSON literal (bool, number, or string).
+    #[serde(default)]
+    pub equals: Option<Value>,
+    /// Membership: the fact (rendered to text) must be one of these.
+    #[serde(default)]
+    pub one_of: Option<Vec<String>>,
+    /// Whether a failure fails the gate or only warns.
+    #[serde(default)]
+    pub severity: RuleSeverity,
+}
+
+/// A waiver suppressing one rule's failure until it expires. An expired waiver
+/// does not suppress (fail closed) and is itself reported.
+#[derive(Debug, Clone, Deserialize)]
+pub struct PolicyWaiver {
+    /// The rule id this waiver suppresses.
+    pub rule: String,
+    /// Who owns the waiver, so reviewers know whom to ask.
+    pub owner: String,
+    /// Why the failure is tolerated, captured for the audit trail.
+    pub reason: String,
+    /// RFC 3339 UTC expiry (for example `2026-12-31T00:00:00Z`).
+    pub expiry: String,
+    /// Optional tracking reference (for example a GitHub issue id).
+    #[serde(default)]
+    pub issue: Option<String>,
+}
+
+/// The parsed policy file.
+#[derive(Debug, Clone, Deserialize)]
+pub struct PolicyFile {
+    /// Schema version of the policy document, reserved for future migrations.
+    pub version: String,
+    /// The rules evaluated against the facts.
+    pub rules: Vec<PolicyRule>,
+    /// Waivers that may suppress specific rule failures until they expire.
+    #[serde(default)]
+    pub waivers: Vec<PolicyWaiver>,
+}
+
+/// Per-rule status after evaluation.
+#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize)]
+#[serde(rename_all = "kebab-case")]
+pub enum RuleStatus {
+    /// The rule's comparator was satisfied.
+    Pass,
+    /// The rule failed and no waiver applied (fail severity).
+    Fail,
+    /// The rule failed and no waiver applied (warn severity).
+    Warn,
+    /// Failed a `fail` rule but an active waiver suppressed it.
+    Waived,
+    /// Failed and the waiver that would suppress it has expired (fail closed).
+    ExpiredWaiver,
+    /// The fact the rule references was not available (artifact not provided or
+    /// malformed). Treated as a failure so a missing input never silently passes.
+    Unevaluated,
+}
+
+/// One rule's evaluated outcome, the cited evidence row.
+#[derive(Debug, Clone, PartialEq, Serialize)]
+pub struct RuleOutcome {
+    /// The rule id this row reports on.
+    pub id: String,
+    /// The fact the rule referenced.
+    pub fact: String,
+    /// The resolved status for this rule.
+    pub status: RuleStatus,
+    /// The observed fact value rendered for the report, when available.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub observed: Option<FactValue>,
+    /// One-line human explanation.
+    pub detail: String,
+}
+
+/// Overall verdict.
+#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize)]
+#[serde(rename_all = "lowercase")]
+pub enum Verdict {
+    /// Every rule passed (or was waived).
+    Pass,
+    /// No fail-severity rule failed, but at least one warn rule did.
+    Warn,
+    /// At least one fail-severity rule failed, expired its waiver, or could
+    /// not be evaluated.
+    Fail,
+}
+
+/// The full simulation result.
+#[derive(Debug, Clone, PartialEq, Serialize)]
+pub struct PolicyOutcome {
+    /// The overall verdict the gate keys off.
+    pub verdict: Verdict,
+    /// Per-rule outcomes, in policy order, as cited evidence.
+    pub rules: Vec<RuleOutcome>,
+}
+
+impl PolicyOutcome {
+    /// True when the overall verdict is `Fail` (any fail-severity rule failed,
+    /// hit an expired waiver, or could not be evaluated).
+    pub fn failed(&self) -> bool {
+        self.verdict == Verdict::Fail
+    }
+}
+
+/// Evaluate a policy against a fact map at a given wall-clock instant.
+///
+/// `now_epoch` is Unix epoch seconds, supplied by the caller so the evaluation
+/// stays pure and tests can pin time. Each rule is resolved independently, then
+/// the overall verdict is the worst per-rule status: any fail / expired-waiver
+/// / unevaluated rule makes the verdict `Fail`, otherwise any warn makes it
+/// `Warn`, otherwise `Pass`. Waived and passing rules never fail the gate.
+pub fn evaluate(policy: &PolicyFile, facts: &Facts, now_epoch: i64) -> PolicyOutcome {
+    let mut rules = Vec::with_capacity(policy.rules.len());
+    for rule in &policy.rules {
+        rules.push(evaluate_rule(rule, facts, &policy.waivers, now_epoch));
+    }
+
+    let verdict = if rules.iter().any(|r| {
+        matches!(
+            r.status,
+            RuleStatus::Fail | RuleStatus::ExpiredWaiver | RuleStatus::Unevaluated
+        )
+    }) {
+        Verdict::Fail
+    } else if rules.iter().any(|r| r.status == RuleStatus::Warn) {
+        Verdict::Warn
+    } else {
+        Verdict::Pass
+    };
+
+    PolicyOutcome { verdict, rules }
+}
+
+/// Resolve a single rule against the facts and waivers. Split out of
+/// [`evaluate`] to keep that function short and to keep all the per-rule
+/// branching in one place.
+fn evaluate_rule(
+    rule: &PolicyRule,
+    facts: &Facts,
+    waivers: &[PolicyWaiver],
+    now_epoch: i64,
+) -> RuleOutcome {
+    let comparator_count = rule.max.is_some() as u8
+        + rule.min.is_some() as u8
+        + rule.equals.is_some() as u8
+        + rule.one_of.is_some() as u8;
+    if comparator_count != 1 {
+        return RuleOutcome {
+            id: rule.id.clone(),
+            fact: rule.fact.clone(),
+            status: RuleStatus::Unevaluated,
+            observed: None,
+            detail: format!("rule {} must set exactly one comparator", rule.id),
+        };
+    }
+
+    let Some(fact) = facts.get(&rule.fact) else {
+        return RuleOutcome {
+            id: rule.id.clone(),
+            fact: rule.fact.clone(),
+            status: RuleStatus::Unevaluated,
+            observed: None,
+            detail: format!(
+                "fact `{}` not available (artifact not provided?)",
+                rule.fact
+            ),
+        };
+    };
+
+    let check = check_comparator(rule, fact);
+    match check {
+        ComparatorResult::Unevaluated(detail) => RuleOutcome {
+            id: rule.id.clone(),
+            fact: rule.fact.clone(),
+            status: RuleStatus::Unevaluated,
+            observed: Some(fact.clone()),
+            detail,
+        },
+        ComparatorResult::Passed(detail) => RuleOutcome {
+            id: rule.id.clone(),
+            fact: rule.fact.clone(),
+            status: RuleStatus::Pass,
+            observed: Some(fact.clone()),
+            detail,
+        },
+        ComparatorResult::Failed(detail) => resolve_failure(rule, fact, waivers, now_epoch, detail),
+    }
+}
+
+/// Decide the status of a rule that failed its comparator: an active waiver
+/// downgrades it to `Waived`, an expired or unparseable waiver fails closed as
+/// `ExpiredWaiver`, and no waiver yields the rule's own severity.
+fn resolve_failure(
+    rule: &PolicyRule,
+    fact: &FactValue,
+    waivers: &[PolicyWaiver],
+    now_epoch: i64,
+    fail_detail: String,
+) -> RuleOutcome {
+    if let Some(waiver) = waivers.iter().find(|w| w.rule == rule.id) {
+        let expiry = rfc3339_to_epoch(&waiver.expiry);
+        let active = expiry.map(|e| now_epoch <= e).unwrap_or(false);
+        if active {
+            let issue = waiver
+                .issue
+                .as_ref()
+                .map(|i| format!(" ({i})"))
+                .unwrap_or_default();
+            let until = waiver.expiry.get(0..10).unwrap_or(waiver.expiry.as_str());
+            return RuleOutcome {
+                id: rule.id.clone(),
+                fact: rule.fact.clone(),
+                status: RuleStatus::Waived,
+                observed: Some(fact.clone()),
+                detail: format!("waived by {} until {until}{issue}", waiver.owner),
+            };
+        }
+        let until = waiver.expiry.get(0..10).unwrap_or(waiver.expiry.as_str());
+        return RuleOutcome {
+            id: rule.id.clone(),
+            fact: rule.fact.clone(),
+            status: RuleStatus::ExpiredWaiver,
+            observed: Some(fact.clone()),
+            detail: format!("waiver for {} expired {until}", rule.id),
+        };
+    }
+
+    let status = if rule.severity == RuleSeverity::Fail {
+        RuleStatus::Fail
+    } else {
+        RuleStatus::Warn
+    };
+    RuleOutcome {
+        id: rule.id.clone(),
+        fact: rule.fact.clone(),
+        status,
+        observed: Some(fact.clone()),
+        detail: fail_detail,
+    }
+}
+
+/// The raw verdict of applying a rule's single comparator to a fact, before
+/// waivers or severity are considered.
+enum ComparatorResult {
+    /// The comparator held; detail is a human description of why.
+    Passed(String),
+    /// The comparator did not hold; detail explains the breach.
+    Failed(String),
+    /// The fact's type did not match the comparator (for example a `max` rule
+    /// over a textual fact); detail explains the mismatch.
+    Unevaluated(String),
+}
+
+/// Apply the one set comparator on `rule` to `fact`. The caller has already
+/// verified exactly one comparator is set.
+fn check_comparator(rule: &PolicyRule, fact: &FactValue) -> ComparatorResult {
+    if let Some(max) = rule.max {
+        return match fact.as_num() {
+            Some(n) if n <= max => {
+                ComparatorResult::Passed(format!("{} = {} within max {max}", rule.fact, n))
+            }
+            Some(n) => ComparatorResult::Failed(format!("{} = {} exceeds max {max}", rule.fact, n)),
+            None => ComparatorResult::Unevaluated(format!(
+                "{} is not numeric, cannot apply max",
+                rule.fact
+            )),
+        };
+    }
+    if let Some(min) = rule.min {
+        return match fact.as_num() {
+            Some(n) if n >= min => {
+                ComparatorResult::Passed(format!("{} = {} at least min {min}", rule.fact, n))
+            }
+            Some(n) => ComparatorResult::Failed(format!("{} = {} below min {min}", rule.fact, n)),
+            None => ComparatorResult::Unevaluated(format!(
+                "{} is not numeric, cannot apply min",
+                rule.fact
+            )),
+        };
+    }
+    if let Some(expected) = &rule.equals {
+        return check_equals(rule, fact, expected);
+    }
+    if let Some(allowed) = &rule.one_of {
+        let rendered = fact.render();
+        return if allowed.iter().any(|v| v == &rendered) {
+            ComparatorResult::Passed(format!("{} = {rendered} is allowed", rule.fact))
+        } else {
+            ComparatorResult::Failed(format!(
+                "{} = {rendered} not one of [{}]",
+                rule.fact,
+                allowed.join(", ")
+            ))
+        };
+    }
+    // Unreachable: the caller guarantees one comparator is set.
+    ComparatorResult::Unevaluated(format!("rule {} has no comparator", rule.id))
+}
+
+/// Compare a fact against an `equals` JSON literal, choosing the comparison by
+/// the literal's JSON type so `equals: true` and `equals: "T1"` both work.
+fn check_equals(rule: &PolicyRule, fact: &FactValue, expected: &Value) -> ComparatorResult {
+    match expected {
+        Value::Bool(b) => match fact.as_bool() {
+            Some(actual) if actual == *b => {
+                ComparatorResult::Passed(format!("{} = {actual}, as expected", rule.fact))
+            }
+            Some(actual) => {
+                ComparatorResult::Failed(format!("{} = {actual}, expected {b}", rule.fact))
+            }
+            None => ComparatorResult::Unevaluated(format!(
+                "{} is not boolean, cannot compare to {b}",
+                rule.fact
+            )),
+        },
+        Value::Number(num) => {
+            let want = num.as_f64();
+            match (fact.as_num(), want) {
+                (Some(actual), Some(want)) if actual == want => {
+                    ComparatorResult::Passed(format!("{} = {actual}, as expected", rule.fact))
+                }
+                (Some(actual), Some(want)) => {
+                    ComparatorResult::Failed(format!("{} = {actual}, expected {want}", rule.fact))
+                }
+                _ => ComparatorResult::Unevaluated(format!(
+                    "{} is not numeric, cannot compare",
+                    rule.fact
+                )),
+            }
+        }
+        Value::String(s) => match fact.as_text() {
+            Some(actual) if actual == s => {
+                ComparatorResult::Passed(format!("{} = {actual}, as expected", rule.fact))
+            }
+            Some(actual) => {
+                ComparatorResult::Failed(format!("{} = {actual}, expected {s}", rule.fact))
+            }
+            None => ComparatorResult::Unevaluated(format!(
+                "{} is not text, cannot compare to {s}",
+                rule.fact
+            )),
+        },
+        _ => ComparatorResult::Unevaluated(format!(
+            "rule {} equals must be a bool, number, or string",
+            rule.id
+        )),
+    }
+}
+
+/// Parse the `YYYY-MM-DDTHH:MM:SS` prefix of an RFC 3339 timestamp as UTC into
+/// Unix epoch seconds.
+///
+/// Dependency-free on purpose so the simulator pulls no date crate. Fractional
+/// seconds and timezone offsets are ignored, and inputs shorter than 19
+/// characters return `None`. Exposed as `pub` so the CLI handler and tests can
+/// reuse the exact same parse the evaluator uses for waiver expiry.
+pub fn rfc3339_to_epoch(ts: &str) -> Option<i64> {
+    if ts.len() < 19 {
+        return None;
+    }
+    let year: i64 = ts.get(0..4)?.parse().ok()?;
+    let month: i64 = ts.get(5..7)?.parse().ok()?;
+    let day: i64 = ts.get(8..10)?.parse().ok()?;
+    let hour: i64 = ts.get(11..13)?.parse().ok()?;
+    let min: i64 = ts.get(14..16)?.parse().ok()?;
+    let sec: i64 = ts.get(17..19)?.parse().ok()?;
+    // days_from_civil (Howard Hinnant)
+    let y = if month <= 2 { year - 1 } else { year };
+    let era = if y >= 0 { y } else { y - 399 } / 400;
+    let yoe = y - era * 400;
+    let doy = (153 * (if month > 2 { month - 3 } else { month + 9 }) + 2) / 5 + day - 1;
+    let doe = yoe * 365 + yoe / 4 - yoe / 100 + doy;
+    let days = era * 146_097 + doe - 719_468;
+    Some(days * 86_400 + hour * 3_600 + min * 60 + sec)
+}
+
+/// Read defensively into the facts map: insert a numeric fact from a JSON
+/// `u64` field only when present, never panicking on a missing or wrong-typed
+/// field.
+fn insert_u64(facts: &mut Facts, key: &str, field: Option<u64>) {
+    if let Some(n) = field {
+        facts.insert(key.to_string(), FactValue::Num(n as f64));
+    }
+}
+
+/// Extract facts from a `mcptest run --reporter json` report.
+///
+/// Reads the `summary` object for the run tallies. Each field is optional so a
+/// trimmed report (for example one without `inconclusive`) simply yields fewer
+/// facts rather than an error.
+pub fn facts_from_run_report(v: &Value) -> Facts {
+    let mut facts = Facts::new();
+    let summary = &v["summary"];
+    insert_u64(&mut facts, "run.total", summary["total"].as_u64());
+    insert_u64(&mut facts, "run.passed", summary["passed"].as_u64());
+    insert_u64(&mut facts, "run.failed", summary["failed"].as_u64());
+    insert_u64(&mut facts, "run.skipped", summary["skipped"].as_u64());
+    insert_u64(
+        &mut facts,
+        "run.inconclusive",
+        summary["inconclusive"].as_u64(),
+    );
+    facts
+}
+
+/// Extract facts from a `mcptest judge certify` certification record.
+///
+/// `now_epoch` lets the extractor compute `judge.expired` from the
+/// certification's validity window so a stale certification can gate without
+/// the policy author hardcoding a date. The expiry fact is only emitted when
+/// the record carries a validity window.
+pub fn facts_from_certification(v: &Value, now_epoch: i64) -> Facts {
+    let mut facts = Facts::new();
+    if let Some(certified) = v["certified"].as_bool() {
+        facts.insert("judge.certified".to_string(), FactValue::Bool(certified));
+    }
+    if let Some(ece) = v["metrics"]["ece"].as_f64() {
+        facts.insert("judge.ece".to_string(), FactValue::Num(ece));
+    }
+    if let Some(brier) = v["metrics"]["brier"].as_f64() {
+        facts.insert("judge.brier".to_string(), FactValue::Num(brier));
+    }
+    if let Some(valid_until) = v["validity_window"]["valid_until"].as_str() {
+        if let Some(epoch) = rfc3339_to_epoch(valid_until) {
+            facts.insert(
+                "judge.expired".to_string(),
+                FactValue::Bool(now_epoch > epoch),
+            );
+        }
+    }
+    facts
+}
+
+/// Extract facts from a `mcptest conformance run` report.
+///
+/// The badge and per-tier tallies let a policy gate on, say, "tier 1 or 2" or
+/// "every MUST passed". The tier is read flexibly because some report shapes
+/// nest it under an object and some expose it as a bare string.
+pub fn facts_from_conformance(v: &Value) -> Facts {
+    let mut facts = Facts::new();
+    if let Some(badge) = v["badge"].as_str() {
+        facts.insert(
+            "conformance.badge".to_string(),
+            FactValue::Text(badge.to_string()),
+        );
+    }
+    insert_u64(
+        &mut facts,
+        "conformance.must_passed",
+        v["must"]["passed"].as_u64(),
+    );
+    insert_u64(
+        &mut facts,
+        "conformance.must_total",
+        v["must"]["total"].as_u64(),
+    );
+    insert_u64(
+        &mut facts,
+        "conformance.should_passed",
+        v["should"]["passed"].as_u64(),
+    );
+    insert_u64(
+        &mut facts,
+        "conformance.should_total",
+        v["should"]["total"].as_u64(),
+    );
+    let tier = v["tier"]
+        .as_str()
+        .or_else(|| v["tier"]["tier"].as_str())
+        .map(|s| s.to_string());
+    if let Some(tier) = tier {
+        facts.insert("conformance.tier".to_string(), FactValue::Text(tier));
+    }
+    facts
+}
+
+/// Extract facts from a `mcptest security` report.
+///
+/// Tallies findings by severity so a policy can gate on, for example,
+/// "no critical findings". The five severity counts are always emitted (zero
+/// when absent) so a `max: 0` rule never lands as `Unevaluated` just because a
+/// clean scan had no findings of that severity.
+pub fn facts_from_security(v: &Value) -> Facts {
+    let mut facts = Facts::new();
+    let findings = v["findings"]
+        .as_array()
+        .or_else(|| v.as_array())
+        .cloned()
+        .unwrap_or_default();
+    let mut critical = 0u64;
+    let mut high = 0u64;
+    let mut medium = 0u64;
+    let mut low = 0u64;
+    let mut info = 0u64;
+    for finding in &findings {
+        match finding["severity"].as_str() {
+            Some("critical") => critical += 1,
+            Some("high") => high += 1,
+            Some("medium") => medium += 1,
+            Some("low") => low += 1,
+            Some("info") => info += 1,
+            _ => {}
+        }
+    }
+    facts.insert(
+        "security.critical_count".to_string(),
+        FactValue::Num(critical as f64),
+    );
+    facts.insert(
+        "security.high_count".to_string(),
+        FactValue::Num(high as f64),
+    );
+    facts.insert(
+        "security.medium_count".to_string(),
+        FactValue::Num(medium as f64),
+    );
+    facts.insert("security.low_count".to_string(), FactValue::Num(low as f64));
+    facts.insert(
+        "security.info_count".to_string(),
+        FactValue::Num(info as f64),
+    );
+    facts.insert(
+        "security.total_findings".to_string(),
+        FactValue::Num(findings.len() as f64),
+    );
+    facts
+}
+
+/// Extract facts from a `mcptest model-compat diff` report.
+///
+/// The per-bucket tallies let a policy gate on model drift, for example
+/// "no model regressed to fail".
+pub fn facts_from_model_compat(v: &Value) -> Facts {
+    let mut facts = Facts::new();
+    let summary = &v["summary"];
+    insert_u64(&mut facts, "model_compat.total", summary["total"].as_u64());
+    insert_u64(&mut facts, "model_compat.pass", summary["pass"].as_u64());
+    insert_u64(&mut facts, "model_compat.drift", summary["drift"].as_u64());
+    insert_u64(&mut facts, "model_compat.fail", summary["fail"].as_u64());
+    facts
+}
+
+/// Extract facts from an evidence artifact.
+///
+/// Surfaces the reproducibility and origin flags so a policy can require a
+/// reproducible, verifiable-origin run before a release.
+pub fn facts_from_evidence(v: &Value) -> Facts {
+    let mut facts = Facts::new();
+    if let Some(reproducible) = v["reproducible"].as_bool() {
+        facts.insert(
+            "evidence.reproducible".to_string(),
+            FactValue::Bool(reproducible),
+        );
+    }
+    if let Some(unverifiable) = v["unverifiable_origin"].as_bool() {
+        facts.insert(
+            "evidence.unverifiable_origin".to_string(),
+            FactValue::Bool(unverifiable),
+        );
+    }
+    facts
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use serde_json::json;
+
+    fn policy_from(yaml: &str) -> PolicyFile {
+        serde_yaml::from_str(yaml).expect("policy fixture parses")
+    }
+
+    #[test]
+    fn a_satisfied_max_rule_passes() {
+        let policy = policy_from(
+            "version: \"1.0\"\nrules:\n  - id: no-fail\n    fact: run.failed\n    max: 0\n",
+        );
+        let facts = facts_from_run_report(&json!({"summary": {"failed": 0}}));
+        let outcome = evaluate(&policy, &facts, 0);
+        assert_eq!(outcome.verdict, Verdict::Pass);
+        assert_eq!(outcome.rules[0].status, RuleStatus::Pass);
+    }
+
+    #[test]
+    fn an_exceeded_max_rule_fails_the_verdict() {
+        let policy = policy_from(
+            "version: \"1.0\"\nrules:\n  - id: no-fail\n    fact: run.failed\n    max: 0\n",
+        );
+        let facts = facts_from_run_report(&json!({"summary": {"failed": 2}}));
+        let outcome = evaluate(&policy, &facts, 0);
+        assert_eq!(outcome.verdict, Verdict::Fail);
+        assert_eq!(outcome.rules[0].status, RuleStatus::Fail);
+        assert!(outcome.rules[0].detail.contains("exceeds max 0"));
+    }
+
+    #[test]
+    fn equals_true_passes_and_false_fails() {
+        let policy = policy_from(
+            "version: \"1.0\"\nrules:\n  - id: certified\n    fact: judge.certified\n    equals: true\n",
+        );
+        let pass = facts_from_certification(&json!({"certified": true}), 0);
+        assert_eq!(evaluate(&policy, &pass, 0).verdict, Verdict::Pass);
+        let fail = facts_from_certification(&json!({"certified": false}), 0);
+        let outcome = evaluate(&policy, &fail, 0);
+        assert_eq!(outcome.verdict, Verdict::Fail);
+        assert!(outcome.rules[0].detail.contains("expected true"));
+    }
+
+    #[test]
+    fn judge_expired_is_computed_from_validity_window() {
+        let cert = json!({
+            "certified": true,
+            "validity_window": {"valid_until": "2023-11-14T22:13:20Z"}
+        });
+        // now after expiry -> expired true.
+        let after = facts_from_certification(&cert, 1_700_000_001);
+        assert_eq!(after.get("judge.expired"), Some(&FactValue::Bool(true)));
+        // now before expiry -> expired false.
+        let before = facts_from_certification(&cert, 1_600_000_000);
+        assert_eq!(before.get("judge.expired"), Some(&FactValue::Bool(false)));
+    }
+
+    #[test]
+    fn an_active_waiver_suppresses_a_failure() {
+        let policy = policy_from(
+            "version: \"1.0\"\nrules:\n  - id: no-fail\n    fact: run.failed\n    max: 0\nwaivers:\n  - rule: no-fail\n    owner: alice\n    reason: known\n    expiry: \"2099-01-01T00:00:00Z\"\n    issue: GH-123\n",
+        );
+        let facts = facts_from_run_report(&json!({"summary": {"failed": 2}}));
+        let outcome = evaluate(&policy, &facts, 1_700_000_000);
+        assert_eq!(outcome.rules[0].status, RuleStatus::Waived);
+        assert_eq!(outcome.verdict, Verdict::Pass);
+        assert!(outcome.rules[0].detail.contains("alice"));
+        assert!(outcome.rules[0].detail.contains("GH-123"));
+    }
+
+    #[test]
+    fn an_expired_waiver_does_not_suppress_and_fails_closed() {
+        let policy = policy_from(
+            "version: \"1.0\"\nrules:\n  - id: no-fail\n    fact: run.failed\n    max: 0\nwaivers:\n  - rule: no-fail\n    owner: alice\n    reason: known\n    expiry: \"2000-01-01T00:00:00Z\"\n",
+        );
+        let facts = facts_from_run_report(&json!({"summary": {"failed": 2}}));
+        let outcome = evaluate(&policy, &facts, 1_700_000_000);
+        assert_eq!(outcome.rules[0].status, RuleStatus::ExpiredWaiver);
+        assert_eq!(outcome.verdict, Verdict::Fail);
+        assert!(outcome.rules[0].detail.contains("expired"));
+    }
+
+    #[test]
+    fn a_missing_fact_is_unevaluated_and_fails() {
+        let policy = policy_from(
+            "version: \"1.0\"\nrules:\n  - id: no-fail\n    fact: run.failed\n    max: 0\n",
+        );
+        let facts = Facts::new();
+        let outcome = evaluate(&policy, &facts, 0);
+        assert_eq!(outcome.rules[0].status, RuleStatus::Unevaluated);
+        assert_eq!(outcome.verdict, Verdict::Fail);
+        assert!(outcome.rules[0].detail.contains("not available"));
+    }
+
+    #[test]
+    fn a_warn_severity_failure_warns_but_does_not_fail() {
+        let policy = policy_from(
+            "version: \"1.0\"\nrules:\n  - id: tier\n    fact: conformance.badge\n    one_of: [T1, T2]\n    severity: warn\n",
+        );
+        let facts = facts_from_conformance(&json!({"badge": "T3"}));
+        let outcome = evaluate(&policy, &facts, 0);
+        assert_eq!(outcome.rules[0].status, RuleStatus::Warn);
+        assert_eq!(outcome.verdict, Verdict::Warn);
+    }
+
+    #[test]
+    fn one_of_on_conformance_badge_matches_membership() {
+        let policy = policy_from(
+            "version: \"1.0\"\nrules:\n  - id: tier\n    fact: conformance.badge\n    one_of: [T1, T2]\n",
+        );
+        let pass = facts_from_conformance(&json!({"badge": "T1"}));
+        assert_eq!(evaluate(&policy, &pass, 0).verdict, Verdict::Pass);
+        let fail = facts_from_conformance(&json!({"badge": "F"}));
+        assert_eq!(evaluate(&policy, &fail, 0).verdict, Verdict::Fail);
+    }
+
+    #[test]
+    fn security_counts_tally_from_a_findings_array() {
+        let report = json!({
+            "findings": [
+                {"severity": "critical"},
+                {"severity": "high"},
+                {"severity": "high"},
+                {"severity": "low"}
+            ]
+        });
+        let facts = facts_from_security(&report);
+        assert_eq!(
+            facts.get("security.critical_count"),
+            Some(&FactValue::Num(1.0))
+        );
+        assert_eq!(facts.get("security.high_count"), Some(&FactValue::Num(2.0)));
+        assert_eq!(
+            facts.get("security.total_findings"),
+            Some(&FactValue::Num(4.0))
+        );
+        // A clean severity still emits a zero count.
+        assert_eq!(
+            facts.get("security.medium_count"),
+            Some(&FactValue::Num(0.0))
+        );
+    }
+
+    #[test]
+    fn rfc3339_parses_known_epochs() {
+        assert_eq!(rfc3339_to_epoch("1970-01-01T00:00:00Z"), Some(0));
+        assert_eq!(
+            rfc3339_to_epoch("2023-11-14T22:13:20Z"),
+            Some(1_700_000_000)
+        );
+        assert_eq!(rfc3339_to_epoch("short"), None);
+    }
+}
diff --git a/crates/mcptest/src/cli/args/mod.rs b/crates/mcptest/src/cli/args/mod.rs
index 7f0ab06a..bbd6d970 100644
--- a/crates/mcptest/src/cli/args/mod.rs
+++ b/crates/mcptest/src/cli/args/mod.rs
@@ -32,6 +32,7 @@ pub mod migrate;
 pub mod mock;
 pub mod model_compat;
 pub mod pipe;
+pub mod policy;
 pub mod prompt;
 pub mod propose;
 pub mod record;
@@ -86,6 +87,7 @@ pub use model_compat::{
     ModelCompatDiffFormatCli, ModelCompatRunArgs,
 };
 pub use pipe::{OnBudgetExceededCli, PipeArgs, PipeFormat};
+pub use policy::{PolicyArgs, PolicyCommand, PolicyFormat, PolicySimulateArgs};
 pub use prompt::PromptArgs;
 pub use propose::ProposeArgs;
 pub use record::RecordArgs;
diff --git a/crates/mcptest/src/cli/args/policy.rs b/crates/mcptest/src/cli/args/policy.rs
new file mode 100644
index 00000000..2c4165f4
--- /dev/null
+++ b/crates/mcptest/src/cli/args/policy.rs
@@ -0,0 +1,62 @@
+//! Arguments for `mcptest policy`.
+use std::path::PathBuf;
+
+use clap::{Args, Subcommand, ValueEnum};
+
+/// Arguments for `mcptest policy`.
+#[derive(Debug, Args)]
+pub struct PolicyArgs {
+    /// The `policy` subcommand to run.
+    #[command(subcommand)]
+    pub command: PolicyCommand,
+}
+
+/// Subcommands for `mcptest policy`.
+#[derive(Debug, Subcommand)]
+pub enum PolicyCommand {
+    /// Evaluate a policy against saved mcptest artifacts.
+    Simulate(PolicySimulateArgs),
+}
+
+/// Arguments for `mcptest policy simulate`.
+#[derive(Debug, Args)]
+pub struct PolicySimulateArgs {
+    /// Policy file (declarative YAML rules + waivers).
+    #[arg(long, value_name = "FILE")]
+    pub policy: PathBuf,
+    /// Run report JSON from `mcptest run --reporter json`.
+    #[arg(long = "run-report", value_name = "FILE")]
+    pub run_report: Option<PathBuf>,
+    /// Judge certification record from `mcptest judge certify`.
+    #[arg(long = "judge-cert", value_name = "FILE")]
+    pub judge_cert: Option<PathBuf>,
+    /// Conformance report JSON from `mcptest conformance run`.
+    #[arg(long = "conformance-report", value_name = "FILE")]
+    pub conformance_report: Option<PathBuf>,
+    /// Security report JSON from `mcptest security`.
+    #[arg(long, value_name = "FILE")]
+    pub security: Option<PathBuf>,
+    /// Model-compat diff JSON from `mcptest model-compat diff`.
+    #[arg(long = "model-compat", value_name = "FILE")]
+    pub model_compat: Option<PathBuf>,
+    /// Evidence artifact JSON from `mcptest evidence`.
+    #[arg(long, value_name = "FILE")]
+    pub evidence: Option<PathBuf>,
+    /// Exit non-zero when the policy fails (CI gate). Off by default (dry-run
+    /// always exits 0 and just prints the verdict).
+    #[arg(long)]
+    pub gate: bool,
+    /// Output format.
+    #[arg(long, value_name = "FORMAT", default_value = "pretty")]
+    pub format: PolicyFormat,
+}
+
+/// Output format for `policy simulate`.
+#[derive(Debug, Clone, Copy, PartialEq, Eq, ValueEnum)]
+#[value(rename_all = "lowercase")]
+pub enum PolicyFormat {
+    /// Human-readable verdict and per-rule lines.
+    Pretty,
+    /// The full [`mcptest_core::policy::PolicyOutcome`] as pretty JSON.
+    Json,
+}
diff --git a/crates/mcptest/src/cli/handlers/mod.rs b/crates/mcptest/src/cli/handlers/mod.rs
index 3f9edf6a..135f76b0 100644
--- a/crates/mcptest/src/cli/handlers/mod.rs
+++ b/crates/mcptest/src/cli/handlers/mod.rs
@@ -39,6 +39,7 @@ pub(crate) mod migrate;
 pub(crate) mod mock;
 pub(crate) mod model_compat;
 pub(crate) mod pipe;
+pub(crate) mod policy;
 pub(crate) mod prompt;
 pub(crate) mod propose;
 pub(crate) mod readiness;
@@ -103,6 +104,7 @@ pub(crate) use migrate::migrate_command;
 pub(crate) use mock::mock_command;
 pub(crate) use model_compat::model_compat_command;
 pub(crate) use pipe::pipe_command;
+pub(crate) use policy::policy_command;
 pub(crate) use prompt::prompt_command;
 pub(crate) use propose::propose_command;
 pub(crate) use record::record_command;
diff --git a/crates/mcptest/src/cli/handlers/policy.rs b/crates/mcptest/src/cli/handlers/policy.rs
new file mode 100644
index 00000000..2e51f0a6
--- /dev/null
+++ b/crates/mcptest/src/cli/handlers/policy.rs
@@ -0,0 +1,133 @@
+//! Handler for `mcptest policy <subcommand>`.
+use std::path::Path;
+use std::time::{SystemTime, UNIX_EPOCH};
+
+use anyhow::{Context, Result};
+use mcptest_core::policy::{
+    evaluate, facts_from_certification, facts_from_conformance, facts_from_evidence,
+    facts_from_model_compat, facts_from_run_report, facts_from_security, Facts, PolicyFile,
+    PolicyOutcome, RuleStatus, Verdict,
+};
+
+use crate::cli::args::{PolicyArgs, PolicyCommand, PolicyFormat, PolicySimulateArgs};
+
+/// Dispatch `mcptest policy` to its one subcommand.
+///
+/// Kept as a thin router so adding a second `policy` verb later is a one-line
+/// match arm, matching the shape of the other grouped subcommands.
+pub(crate) fn policy_command(args: PolicyArgs) -> Result<i32> {
+    match args.command {
+        PolicyCommand::Simulate(a) => simulate_command(a),
+    }
+}
+
+/// Run `mcptest policy simulate`.
+///
+/// Loads the declarative policy, builds a fact map from whichever artifacts the
+/// caller passed, evaluates the policy at the current instant, and prints the
+/// verdict. The exit code is the governance signal: a dry-run always exits 0 so
+/// the verdict is informational, while `--gate` exits 1 on a failing verdict so
+/// CI can block on it.
+fn simulate_command(args: PolicySimulateArgs) -> Result<i32> {
+    let body = std::fs::read_to_string(&args.policy)
+        .with_context(|| format!("failed to read policy {}", args.policy.display()))?;
+    let policy: PolicyFile = serde_yaml::from_str(&body)
+        .with_context(|| format!("failed to parse policy {}", args.policy.display()))?;
+
+    let now = SystemTime::now()
+        .duration_since(UNIX_EPOCH)
+        .map(|d| d.as_secs() as i64)
+        .unwrap_or(0);
+
+    let facts = collect_facts(&args, now)?;
+    let outcome = evaluate(&policy, &facts, now);
+
+    match args.format {
+        PolicyFormat::Json => println!("{}", serde_json::to_string_pretty(&outcome)?),
+        PolicyFormat::Pretty => print_pretty(&outcome),
+    }
+
+    if args.gate && outcome.failed() {
+        Ok(1)
+    } else {
+        Ok(0)
+    }
+}
+
+/// Read and extend the fact map from every artifact path the caller supplied.
+///
+/// Pulled out of [`simulate_command`] so that function stays short and the set
+/// of artifact-to-extractor wirings reads as one table.
+fn collect_facts(args: &PolicySimulateArgs, now: i64) -> Result<Facts> {
+    let mut facts = Facts::new();
+    if let Some(path) = &args.run_report {
+        facts.extend(facts_from_run_report(&read_json(path)?));
+    }
+    if let Some(path) = &args.judge_cert {
+        facts.extend(facts_from_certification(&read_json(path)?, now));
+    }
+    if let Some(path) = &args.conformance_report {
+        facts.extend(facts_from_conformance(&read_json(path)?));
+    }
+    if let Some(path) = &args.security {
+        facts.extend(facts_from_security(&read_json(path)?));
+    }
+    if let Some(path) = &args.model_compat {
+        facts.extend(facts_from_model_compat(&read_json(path)?));
+    }
+    if let Some(path) = &args.evidence {
+        facts.extend(facts_from_evidence(&read_json(path)?));
+    }
+    Ok(facts)
+}
+
+/// Print the verdict and one cited line per rule in a human-readable block.
+fn print_pretty(outcome: &PolicyOutcome) {
+    let verdict = match outcome.verdict {
+        Verdict::Pass => "pass",
+        Verdict::Warn => "warn",
+        Verdict::Fail => "fail",
+    };
+    println!("policy: {verdict}");
+    for rule in &outcome.rules {
+        println!(
+            "  [{}] {} ({}): {}",
+            tag(rule.status),
+            rule.id,
+            rule.fact,
+            rule.detail
+        );
+    }
+}
+
+/// Map a rule status to its fixed-width display tag.
+fn tag(status: RuleStatus) -> &'static str {
+    match status {
+        RuleStatus::Pass => "PASS",
+        RuleStatus::Fail => "FAIL",
+        RuleStatus::Warn => "WARN",
+        RuleStatus::Waived => "WAIVED",
+        RuleStatus::ExpiredWaiver => "EXPIRED-WAIVER",
+        RuleStatus::Unevaluated => "UNEVALUATED",
+    }
+}
+
+/// Read a JSON artifact from disk into a [`serde_json::Value`], attaching the
+/// path to any read or parse error so a missing or malformed input is obvious.
+fn read_json(path: &Path) -> Result<serde_json::Value> {
+    let body = std::fs::read_to_string(path)
+        .with_context(|| format!("failed to read artifact {}", path.display()))?;
+    serde_json::from_str(&body)
+        .with_context(|| format!("failed to parse artifact {}", path.display()))
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn read_json_reports_the_path_on_a_missing_file() {
+        let err = read_json(Path::new("/nope/does-not-exist.json")).unwrap_err();
+        assert!(err.to_string().contains("does-not-exist.json"));
+    }
+}
diff --git a/crates/mcptest/src/cli/mod.rs b/crates/mcptest/src/cli/mod.rs
index c7410e29..52d43f34 100644
--- a/crates/mcptest/src/cli/mod.rs
+++ b/crates/mcptest/src/cli/mod.rs
@@ -77,6 +77,7 @@ Commands:
     evidence       Aggregate a run into a portable, signable evidence artifact
     sbom           Print the SBOM baked into the binary at build time
     matchers       List every matcher the YAML schema accepts
+    policy         Evaluate a policy against saved artifacts
     schema         Emit the JSON Schema for the YAML config
 
   Plumbing:
@@ -233,6 +234,8 @@ pub enum Command {
     Cache(CacheArgs),
     /// Run a declarative multi-step tool-call pipeline.
     Pipe(PipeArgs),
+    /// Evaluate a policy against saved artifacts (a local governance gate).
+    Policy(PolicyArgs),
     /// Scan a tools/list snapshot, or drive the live red-team lane with
     /// `security redteam`.
     Security(SecurityArgs),
@@ -313,6 +316,7 @@ where
         Command::Propose(args) => handlers::propose_command(args),
         Command::Cache(args) => handlers::cache_command(args),
         Command::Pipe(args) => handlers::pipe_command(args),
+        Command::Policy(args) => handlers::policy_command(args),
         Command::Security(args) => handlers::security_command(args, &cli.global),
         Command::WebBotAuth(args) => handlers::web_bot_auth_command(args),
         Command::Sbom(args) => handlers::sbom_command(args),
diff --git a/crates/mcptest/tests/cli_policy_simulate.rs b/crates/mcptest/tests/cli_policy_simulate.rs
new file mode 100644
index 00000000..61c94028
--- /dev/null
+++ b/crates/mcptest/tests/cli_policy_simulate.rs
@@ -0,0 +1,179 @@
+//! Integration tests for `mcptest policy simulate`.
+//!
+//! Drives the binary over a saved run report and a small policy, asserting the
+//! verdict text and the exit code (the governance signal): a dry-run always
+//! exits 0, `--gate` exits 1 on a failing verdict, an active waiver suppresses
+//! a failure, and an expired waiver fails closed.
+
+use assert_cmd::Command;
+use predicates::prelude::*;
+use tempfile::tempdir;
+
+fn mcptest() -> Command {
+    Command::cargo_bin("mcptest").expect("binary builds")
+}
+
+fn write(path: &std::path::Path, body: &str) {
+    std::fs::write(path, body).unwrap();
+}
+
+/// A clean run report: three tests, none failed.
+fn clean_run_report() -> &'static str {
+    "{\"summary\":{\"total\":3,\"passed\":3,\"failed\":0,\"skipped\":0,\"duration_ms\":1}}"
+}
+
+/// A failing run report: one of three tests failed.
+fn failing_run_report() -> &'static str {
+    "{\"summary\":{\"total\":3,\"passed\":2,\"failed\":1,\"skipped\":0,\"duration_ms\":1}}"
+}
+
+/// A minimal policy that only forbids failed tests.
+fn no_fail_policy() -> &'static str {
+    "version: \"1.0\"\nrules:\n  - id: no-failed-tests\n    fact: run.failed\n    max: 0\n"
+}
+
+#[test]
+fn a_passing_policy_over_a_run_report_succeeds() {
+    let dir = tempdir().unwrap();
+    let report = dir.path().join("run.json");
+    let policy = dir.path().join("policy.yml");
+    write(&report, clean_run_report());
+    write(&policy, no_fail_policy());
+
+    mcptest()
+        .args([
+            "policy",
+            "simulate",
+            "--policy",
+            policy.to_str().unwrap(),
+            "--run-report",
+            report.to_str().unwrap(),
+        ])
+        .assert()
+        .success()
+        .stdout(predicate::str::contains("pass"));
+}
+
+#[test]
+fn a_failing_policy_with_gate_exits_one() {
+    let dir = tempdir().unwrap();
+    let report = dir.path().join("run.json");
+    let policy = dir.path().join("policy.yml");
+    write(&report, failing_run_report());
+    write(&policy, no_fail_policy());
+
+    mcptest()
+        .args([
+            "policy",
+            "simulate",
+            "--policy",
+            policy.to_str().unwrap(),
+            "--run-report",
+            report.to_str().unwrap(),
+            "--gate",
+        ])
+        .assert()
+        .code(1)
+        .stdout(predicate::str::contains("no-failed-tests"))
+        .stdout(predicate::str::contains("FAIL"));
+}
+
+#[test]
+fn a_failing_policy_without_gate_still_exits_zero() {
+    let dir = tempdir().unwrap();
+    let report = dir.path().join("run.json");
+    let policy = dir.path().join("policy.yml");
+    write(&report, failing_run_report());
+    write(&policy, no_fail_policy());
+
+    mcptest()
+        .args([
+            "policy",
+            "simulate",
+            "--policy",
+            policy.to_str().unwrap(),
+            "--run-report",
+            report.to_str().unwrap(),
+        ])
+        .assert()
+        .success()
+        .stdout(predicate::str::contains("fail"))
+        .stdout(predicate::str::contains("FAIL"));
+}
+
+#[test]
+fn an_active_waiver_keeps_the_gate_green() {
+    let dir = tempdir().unwrap();
+    let report = dir.path().join("run.json");
+    let policy = dir.path().join("policy.yml");
+    write(&report, failing_run_report());
+    write(
+        &policy,
+        "version: \"1.0\"\nrules:\n  - id: no-failed-tests\n    fact: run.failed\n    max: 0\nwaivers:\n  - rule: no-failed-tests\n    owner: alice\n    reason: known flake\n    expiry: \"2099-01-01T00:00:00Z\"\n",
+    );
+
+    mcptest()
+        .args([
+            "policy",
+            "simulate",
+            "--policy",
+            policy.to_str().unwrap(),
+            "--run-report",
+            report.to_str().unwrap(),
+            "--gate",
+        ])
+        .assert()
+        .success()
+        .stdout(predicate::str::contains("WAIVED"));
+}
+
+#[test]
+fn an_expired_waiver_fails_closed_under_gate() {
+    let dir = tempdir().unwrap();
+    let report = dir.path().join("run.json");
+    let policy = dir.path().join("policy.yml");
+    write(&report, failing_run_report());
+    write(
+        &policy,
+        "version: \"1.0\"\nrules:\n  - id: no-failed-tests\n    fact: run.failed\n    max: 0\nwaivers:\n  - rule: no-failed-tests\n    owner: alice\n    reason: stale\n    expiry: \"2000-01-01T00:00:00Z\"\n",
+    );
+
+    mcptest()
+        .args([
+            "policy",
+            "simulate",
+            "--policy",
+            policy.to_str().unwrap(),
+            "--run-report",
+            report.to_str().unwrap(),
+            "--gate",
+        ])
+        .assert()
+        .code(1)
+        .stdout(predicate::str::contains("EXPIRED-WAIVER"))
+        .stdout(predicate::str::contains("expired"));
+}
+
+#[test]
+fn json_format_emits_a_verdict_field() {
+    let dir = tempdir().unwrap();
+    let report = dir.path().join("run.json");
+    let policy = dir.path().join("policy.yml");
+    write(&report, clean_run_report());
+    write(&policy, no_fail_policy());
+
+    mcptest()
+        .args([
+            "policy",
+            "simulate",
+            "--policy",
+            policy.to_str().unwrap(),
+            "--run-report",
+            report.to_str().unwrap(),
+            "--format",
+            "json",
+        ])
+        .assert()
+        .success()
+        .stdout(predicate::str::contains("\"verdict\""));
+}
diff --git a/docs-site/llms-full.sha256 b/docs-site/llms-full.sha256
index 6c4b1a9c..1a89d0dd 100644
--- a/docs-site/llms-full.sha256
+++ b/docs-site/llms-full.sha256
@@ -1 +1 @@
-cc16f70b1ae3d29472c79d8d0d226155dd092de48d11ec9e3275d382f18797c1
+88562f76c5d5964f98034d6063f192117bd81a97a73771063d719ff7461dde8d
diff --git a/docs/SUMMARY.md b/docs/SUMMARY.md
index 3f8c6d9c..7f872568 100644
--- a/docs/SUMMARY.md
+++ b/docs/SUMMARY.md
@@ -110,6 +110,7 @@
 - [Software Bill of Materials (mcptest sbom)](./sbom.md)
 - [Portable run evidence (mcptest evidence)](./evidence.md)
 - [Session ledger (mcptest ledger)](./session-ledger.md)
+- [Policy simulator (mcptest policy simulate)](./policy-simulator.md)
 
 # Advanced and deep dives
 
diff --git a/docs/cli-reference.md b/docs/cli-reference.md
index 4be8194d..90d0a805 100644
--- a/docs/cli-reference.md
+++ b/docs/cli-reference.md
@@ -40,7 +40,7 @@ further down.
 | Agent integration | [`mcp-server`](#mcp-server), [`skill`](#skill), [`prompt`](#prompt), [`propose`](#propose) |
 | Explore a server | [`inspect`](#inspect), [`tools`](#tools-resources-prompts-capabilities), [`resources`](#tools-resources-prompts-capabilities), [`prompts`](#tools-resources-prompts-capabilities), [`capabilities`](#tools-resources-prompts-capabilities), [`discover`](#discover), [`generate`](#generate), [`mock`](#mock) |
 | Quality and security | [`compliance`](#compliance), [`conformance`](#conformance), [`security`](#security), [`coverage`](#coverage), [`eval`](#eval), [`judge`](#judge), [`fuzz`](#fuzz), [`lint`](#lint), [`schema-lint`](#schema-lint), [`model-compat`](#model-compat) |
-| Reports and artifacts | [`report`](#report), [`diff`](#diff), [`baseline`](#baseline), [`ledger`](#ledger), [`evidence`](#evidence), [`sbom`](#sbom), [`matchers`](#matchers), [`schema`](#schema) |
+| Reports and artifacts | [`report`](#report), [`diff`](#diff), [`baseline`](#baseline), [`ledger`](#ledger), [`evidence`](#evidence), [`policy`](#policy), [`sbom`](#sbom), [`matchers`](#matchers), [`schema`](#schema) |
 | Plumbing | [`completions`](#completions), [`cache`](#cache), [`login`](#login), [`migrate`](#migrate), [`record`](#record), [`distill`](#distill), [`pipe`](#pipe), [`web-bot-auth`](#web-bot-auth) |
 
 One command is deliberately absent from `--help`: [`exec`](#exec) is the
@@ -829,6 +829,54 @@ is a separate step.
 
 **Status.** Working (`certify`).
 
+### `policy`
+
+Evaluate a declarative governance policy against saved mcptest artifacts: a
+local OSS gate that answers "would this release pass our quality, security, and
+judge-certification requirements" without any external service. v1 ships one
+subcommand:
+
+```sh
+mcptest policy simulate --policy policy.yml
+                        [--run-report run.json] [--judge-cert cert.json]
+                        [--conformance-report conf.json] [--security sec.json]
+                        [--model-compat diff.json] [--evidence ev.json]
+                        [--gate] [--format pretty|json]
+```
+
+**Description.** The policy file declares `rules` (each names a `fact` and one
+comparator: `max`, `min`, `equals`, or `one_of`, with `severity: fail` or
+`warn`) and optional `waivers` (each references a rule and carries an `owner`,
+`reason`, RFC 3339 `expiry`, and `issue`). `simulate` reads whichever artifact
+files are supplied, extracts named facts from each (for example `run.failed`,
+`judge.certified`, `judge.expired`, `conformance.badge`,
+`security.critical_count`, `model_compat.fail`, `evidence.reproducible`),
+evaluates every rule, and prints a per-rule verdict with the observed value.
+Waivers suppress a failing rule until they expire; an expired waiver fails
+closed, and a rule whose fact is missing is reported `unevaluated` and fails
+closed too. See [policy-simulator.md](policy-simulator.md) for the full fact
+catalog.
+
+**Arguments.**
+
+| Argument | Type | Description |
+| -------- | ---- | ----------- |
+| `--policy <FILE>` | path | The declarative policy YAML. Required. |
+| `--run-report <FILE>` | path | Run report JSON from `mcptest run --reporter json`. |
+| `--judge-cert <FILE>` | path | Judge certification record from `mcptest judge certify`. |
+| `--conformance-report <FILE>` | path | Conformance report JSON from `mcptest conformance run`. |
+| `--security <FILE>` | path | Security report JSON from `mcptest security`. |
+| `--model-compat <FILE>` | path | Model-compat diff JSON from `mcptest model-compat diff`. |
+| `--evidence <FILE>` | path | Evidence artifact JSON from `mcptest evidence`. |
+| `--gate` | flag | Exit non-zero when the policy fails. Off by default (dry-run prints the verdict and always exits 0). |
+| `--format <FORMAT>` | enum | `pretty` (default) or `json`. |
+
+**Exit codes.** Dry-run always exits `0`. With `--gate`: `0` when the verdict is
+pass or warn, `1` when any fail-severity rule fails, hits an expired waiver, or
+references a fact that was not supplied.
+
+**Status.** Working (`simulate`).
+
 ### `conformance`
 
 Score a running MCP server against the vendored SEP corpus,
diff --git a/docs/llms-full.txt b/docs/llms-full.txt
index aef66395..b6511ddb 100644
--- a/docs/llms-full.txt
+++ b/docs/llms-full.txt
@@ -5264,7 +5264,7 @@ further down.
 | Agent integration | [`mcp-server`](#mcp-server), [`skill`](#skill), [`prompt`](#prompt), [`propose`](#propose) |
 | Explore a server | [`inspect`](#inspect), [`tools`](#tools-resources-prompts-capabilities), [`resources`](#tools-resources-prompts-capabilities), [`prompts`](#tools-resources-prompts-capabilities), [`capabilities`](#tools-resources-prompts-capabilities), [`discover`](#discover), [`generate`](#generate), [`mock`](#mock) |
 | Quality and security | [`compliance`](#compliance), [`conformance`](#conformance), [`security`](#security), [`coverage`](#coverage), [`eval`](#eval), [`judge`](#judge), [`fuzz`](#fuzz), [`lint`](#lint), [`schema-lint`](#schema-lint), [`model-compat`](#model-compat) |
-| Reports and artifacts | [`report`](#report), [`diff`](#diff), [`baseline`](#baseline), [`ledger`](#ledger), [`evidence`](#evidence), [`sbom`](#sbom), [`matchers`](#matchers), [`schema`](#schema) |
+| Reports and artifacts | [`report`](#report), [`diff`](#diff), [`baseline`](#baseline), [`ledger`](#ledger), [`evidence`](#evidence), [`policy`](#policy), [`sbom`](#sbom), [`matchers`](#matchers), [`schema`](#schema) |
 | Plumbing | [`completions`](#completions), [`cache`](#cache), [`login`](#login), [`migrate`](#migrate), [`record`](#record), [`distill`](#distill), [`pipe`](#pipe), [`web-bot-auth`](#web-bot-auth) |
 
 One command is deliberately absent from `--help`: [`exec`](#exec) is the
@@ -6053,6 +6053,54 @@ is a separate step.
 
 **Status.** Working (`certify`).
 
+### `policy`
+
+Evaluate a declarative governance policy against saved mcptest artifacts: a
+local OSS gate that answers "would this release pass our quality, security, and
+judge-certification requirements" without any external service. v1 ships one
+subcommand:
+
+```sh
+mcptest policy simulate --policy policy.yml
+                        [--run-report run.json] [--judge-cert cert.json]
+                        [--conformance-report conf.json] [--security sec.json]
+                        [--model-compat diff.json] [--evidence ev.json]
+                        [--gate] [--format pretty|json]
+```
+
+**Description.** The policy file declares `rules` (each names a `fact` and one
+comparator: `max`, `min`, `equals`, or `one_of`, with `severity: fail` or
+`warn`) and optional `waivers` (each references a rule and carries an `owner`,
+`reason`, RFC 3339 `expiry`, and `issue`). `simulate` reads whichever artifact
+files are supplied, extracts named facts from each (for example `run.failed`,
+`judge.certified`, `judge.expired`, `conformance.badge`,
+`security.critical_count`, `model_compat.fail`, `evidence.reproducible`),
+evaluates every rule, and prints a per-rule verdict with the observed value.
+Waivers suppress a failing rule until they expire; an expired waiver fails
+closed, and a rule whose fact is missing is reported `unevaluated` and fails
+closed too. See [policy-simulator.md](policy-simulator.md) for the full fact
+catalog.
+
+**Arguments.**
+
+| Argument | Type | Description |
+| -------- | ---- | ----------- |
+| `--policy <FILE>` | path | The declarative policy YAML. Required. |
+| `--run-report <FILE>` | path | Run report JSON from `mcptest run --reporter json`. |
+| `--judge-cert <FILE>` | path | Judge certification record from `mcptest judge certify`. |
+| `--conformance-report <FILE>` | path | Conformance report JSON from `mcptest conformance run`. |
+| `--security <FILE>` | path | Security report JSON from `mcptest security`. |
+| `--model-compat <FILE>` | path | Model-compat diff JSON from `mcptest model-compat diff`. |
+| `--evidence <FILE>` | path | Evidence artifact JSON from `mcptest evidence`. |
+| `--gate` | flag | Exit non-zero when the policy fails. Off by default (dry-run prints the verdict and always exits 0). |
+| `--format <FORMAT>` | enum | `pretty` (default) or `json`. |
+
+**Exit codes.** Dry-run always exits `0`. With `--gate`: `0` when the verdict is
+pass or warn, `1` when any fail-severity rule fails, hits an expired waiver, or
+references a fact that was not supplied.
+
+**Status.** Working (`simulate`).
+
 ### `conformance`
 
 Score a running MCP server against the vendored SEP corpus,
diff --git a/docs/policy-simulator.md b/docs/policy-simulator.md
new file mode 100644
index 00000000..df03478e
--- /dev/null
+++ b/docs/policy-simulator.md
@@ -0,0 +1,193 @@
+# Policy simulator
+
+`mcptest policy simulate` is a local governance gate. It reads a small
+declarative policy file plus whichever mcptest artifacts you already have on
+disk, extracts named facts from each artifact, evaluates the policy rules
+against those facts, applies any waivers, and prints a pass / warn / fail
+verdict with a cited line per rule. It runs entirely offline: no network, no
+hosted collector, no live server. The same artifacts your pipeline already
+produces become the inputs to a release gate you can read in one sitting.
+
+This is the open-source, single-developer half of governance. It does not store
+results, manage approvals across a team, or enforce anything centrally; it
+evaluates a policy you commit next to your tests and returns an exit code your
+CI can block on.
+
+## The command
+
+```sh
+mcptest policy simulate --policy policy.yml \
+  --run-report run.json \
+  --judge-cert cert.json \
+  --conformance-report conformance.json \
+  --security security.json \
+  --model-compat model-compat.json \
+  --evidence evidence.json
+```
+
+Only `--policy` is required. Pass whichever artifact flags you have; each one
+adds its facts to the pool the rules evaluate against. A rule whose fact has no
+backing artifact is reported as `unevaluated` and fails the gate, so a missing
+input never silently passes (more on that below).
+
+| Flag | Artifact | Produced by |
+| ---- | -------- | ----------- |
+| `--policy` | The policy file itself (YAML) | You author it. See `examples/policy/policy.yml`. |
+| `--run-report` | Run report (JSON) | `mcptest run --reporter json --output run.json` |
+| `--judge-cert` | Judge certification record (JSON) | `mcptest judge certify --output cert.json` |
+| `--conformance-report` | Conformance score (JSON) | `mcptest conformance run --format json` |
+| `--security` | Security scan (JSON) | `mcptest security tools-list.json --format json` |
+| `--model-compat` | Model-compatibility diff (JSON) | `mcptest model-compat diff --format json` |
+| `--evidence` | Evidence artifact (JSON) | `mcptest evidence run.json --out evidence.json` |
+| `--gate` | (no artifact) | Turns a failing verdict into a non-zero exit code. |
+| `--format` | (no artifact) | `pretty` (default) or `json`. |
+
+## The policy file
+
+A policy is a tiny YAML document: a version string, a list of rules, and an
+optional list of waivers. There is no expression language on purpose. Each rule
+names exactly one fact and one comparator, so anyone reading the file can see at
+a glance what gates the release.
+
+```yaml
+version: "1.0"
+rules:
+  - id: no-failed-tests
+    description: Every test in the run must pass.
+    fact: run.failed
+    max: 0
+  - id: judge-certified
+    description: The grading judge must be certified.
+    fact: judge.certified
+    equals: true
+  - id: conformance-tier
+    description: The server must reach conformance tier 1 or 2.
+    fact: conformance.badge
+    one_of: [T1, T2]
+    severity: warn
+waivers:
+  - rule: conformance-tier
+    owner: platform-team
+    reason: A known SHOULD gap tracked upstream.
+    expiry: "2099-01-01T00:00:00Z"
+    issue: GH-1234
+```
+
+The full worked example lives at `examples/policy/policy.yml`.
+
+### Rules
+
+Each rule has:
+
+- `id`: a stable identifier, cited in the report and matched by waivers.
+- `description` (optional): a human note shown when you author the rule.
+- `fact`: the fact name the rule constrains (see the catalog below).
+- exactly one comparator:
+  - `max`: the fact (a number) must be less than or equal to this value.
+  - `min`: the fact (a number) must be greater than or equal to this value.
+  - `equals`: the fact must equal this literal. The comparison follows the
+    literal's type: `equals: true` compares a boolean fact, `equals: 0` a
+    numeric fact, `equals: "T1"` a textual fact.
+  - `one_of`: the fact (rendered to text) must be one of the listed values.
+- `severity` (optional, defaults to `fail`): `fail` means a failing rule fails
+  the gate; `warn` means a failing rule only warns and never fails the gate.
+
+Setting zero or more than one comparator on a rule makes it `unevaluated`, which
+fails the gate, so a malformed rule is loud rather than silent.
+
+### Waivers
+
+A waiver suppresses one rule's failure until it expires:
+
+- `rule`: the rule id it suppresses.
+- `owner`: who owns the waiver, so reviewers know whom to ask.
+- `reason`: why the failure is tolerated, captured for the audit trail.
+- `expiry`: an RFC 3339 UTC timestamp, for example `2026-12-31T00:00:00Z`.
+- `issue` (optional): a tracking reference such as a GitHub issue id.
+
+Waivers fail closed. While a waiver is active, a failing rule is reported as
+`waived` and does not fail the gate. Once the expiry passes (or if the expiry
+does not parse), the waiver no longer suppresses anything: the rule is reported
+as `expired-waiver` and the gate fails. A waiver is a dated promise to fix
+something, not a permanent exception, and the simulator enforces the date.
+
+## The fact catalog
+
+Each artifact flag contributes a fixed set of facts. A fact that an artifact
+does not carry is simply absent (and any rule referencing it is `unevaluated`),
+except the five security severity counts, which are always emitted (zero when a
+clean scan had no findings of that severity) so a `max: 0` rule on them works
+even against a clean scan.
+
+| Fact | Type | Source artifact |
+| ---- | ---- | --------------- |
+| `run.total` | number | `--run-report` |
+| `run.passed` | number | `--run-report` |
+| `run.failed` | number | `--run-report` |
+| `run.skipped` | number | `--run-report` |
+| `run.inconclusive` | number | `--run-report` (when present) |
+| `judge.certified` | boolean | `--judge-cert` |
+| `judge.ece` | number | `--judge-cert` (expected calibration error) |
+| `judge.brier` | number | `--judge-cert` (Brier score) |
+| `judge.expired` | boolean | `--judge-cert` (computed from the certification's validity window versus the current time) |
+| `conformance.badge` | text | `--conformance-report` (`T1` / `T2` / `T3` / `F`) |
+| `conformance.must_passed` | number | `--conformance-report` (MUST checks passed) |
+| `conformance.must_total` | number | `--conformance-report` (MUST checks total) |
+| `conformance.should_passed` | number | `--conformance-report` (SHOULD checks passed) |
+| `conformance.should_total` | number | `--conformance-report` (SHOULD checks total) |
+| `conformance.tier` | text | `--conformance-report` |
+| `security.critical_count` | number | `--security` |
+| `security.high_count` | number | `--security` |
+| `security.medium_count` | number | `--security` |
+| `security.low_count` | number | `--security` |
+| `security.info_count` | number | `--security` |
+| `security.total_findings` | number | `--security` |
+| `model_compat.total` | number | `--model-compat` |
+| `model_compat.pass` | number | `--model-compat` |
+| `model_compat.drift` | number | `--model-compat` |
+| `model_compat.fail` | number | `--model-compat` |
+| `evidence.reproducible` | boolean | `--evidence` |
+| `evidence.unverifiable_origin` | boolean | `--evidence` |
+
+## Verdict and exit codes
+
+Every rule resolves to one of: `pass`, `fail`, `warn`, `waived`,
+`expired-waiver`, or `unevaluated`. The overall verdict is the worst outcome
+across all rules:
+
+- `fail` if any rule is `fail`, `expired-waiver`, or `unevaluated`.
+- otherwise `warn` if any rule is `warn`.
+- otherwise `pass`.
+
+`waived` and `pass` rules never fail the gate.
+
+Exit codes follow the verdict only when you ask for a gate:
+
+- Without `--gate` the command is a dry run: it always exits 0 and just prints
+  the verdict. This is useful for showing the report without blocking.
+- With `--gate` the command exits 1 when the verdict is `fail`, and 0
+  otherwise. This is what you wire into CI.
+
+Two behaviors are worth restating because they keep the gate honest:
+
+- An expired waiver fails closed. A stale promise to fix something stops
+  suppressing the failure the moment it expires.
+- A missing fact is `unevaluated` and fails. If a rule references a fact and you
+  did not pass the artifact that provides it, the gate fails rather than
+  pretending the check passed.
+
+## How it relates to evidence and judge certification
+
+The simulator sits one layer above the artifacts. The evidence artifact
+(`mcptest evidence`) bundles a run's metadata into one portable, signable file;
+the policy simulator can read that file and gate on its `reproducible` and
+`unverifiable_origin` flags alongside everything else. Judge certification
+(`mcptest judge certify`) proves a grading judge is calibrated before its
+verdict may gate; the simulator turns that proof into a gate condition with the
+`judge.certified` and `judge.expired` facts, so a release can require that the
+judge behind its evals was certified and that the certification has not gone
+stale. In short, the other commands produce trustworthy artifacts; the policy
+simulator decides, locally and reproducibly, whether those artifacts clear the
+bar you set.
+
+See `examples/policy/policy.yml` for a starting point you can copy and trim.
diff --git a/examples/policy/policy.yml b/examples/policy/policy.yml
new file mode 100644
index 00000000..f955fc99
--- /dev/null
+++ b/examples/policy/policy.yml
@@ -0,0 +1,30 @@
+# A starter governance policy for `mcptest policy simulate`.
+version: "1.0"
+rules:
+  - id: no-failed-tests
+    description: Every test in the run must pass.
+    fact: run.failed
+    max: 0
+  - id: judge-certified
+    description: The grading judge must be certified.
+    fact: judge.certified
+    equals: true
+  - id: judge-not-expired
+    description: The judge certification must not have expired.
+    fact: judge.expired
+    equals: false
+  - id: conformance-tier
+    description: The server must reach conformance tier 1 or 2.
+    fact: conformance.badge
+    one_of: [T1, T2]
+    severity: warn
+  - id: no-critical-security
+    description: No critical security findings.
+    fact: security.critical_count
+    max: 0
+waivers:
+  - rule: conformance-tier
+    owner: platform-team
+    reason: A known SHOULD gap tracked upstream.
+    expiry: "2099-01-01T00:00:00Z"
+    issue: GH-1234
diff --git a/scripts/check-examples.sh b/scripts/check-examples.sh
index de46ee6c..51663139 100755
--- a/scripts/check-examples.sh
+++ b/scripts/check-examples.sh
@@ -48,6 +48,8 @@ skip_reason() {
       echo "expected-failures baseline used by the coverage suite, not a run suite" ;;
     "examples/pipe-search-then-update.yml")
       echo "pipe plan (schemas/pipe/v0.json), not a run suite" ;;
+    "examples/policy/policy.yml")
+      echo "governance policy file for 'mcptest policy simulate', not a run suite" ;;
     *) echo "" ;;
   esac
 }