From e25973d6263cb179aea4e6d1fe37bdfd768b2b94 Mon Sep 17 00:00:00 2001
From: luohui1 <3053763193@qq.com>
Date: Sat, 13 Jun 2026 22:55:17 +0800
Subject: [PATCH] docs: clarify fail-on-category thresholds

---
 CHANGELOG.md                   |  4 ++++
 docs/platform/cli.md           | 12 +++++++++---
 docs/reporting/scoring-spec.md |  6 ++++--
 src/mcts/cli/main.py           |  3 ++-
 4 files changed, 19 insertions(+), 6 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index c81d031..8ad0b0b 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -7,6 +7,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 ## [Unreleased]
 
+### Changed
+
+- Clarified inclusive `--fail-on-category` thresholds in CLI help and docs, including the `permissions:0` pitfall and `permissions:1` zero-risk pattern.
+
 ## [0.1.4] - 2026-06-12
 
 ### Security
diff --git a/docs/platform/cli.md b/docs/platform/cli.md
index 407af2e..42e98a9 100644
--- a/docs/platform/cli.md
+++ b/docs/platform/cli.md
@@ -87,7 +87,7 @@ When `-o` is set, format determines serialization. SARIF uses `reporting/sarif.p
 | `--fail-on-critical` | false | Exit **1** if any critical finding |
 | `--min-score` | — | Exit **1** if legacy `score.overall` < N (0–100) |
 | `--max-critical` | — | Exit **1** if critical count > N |
-| `--fail-on-category` | — | Repeatable. Format: `category:limit`. Exit **1** when **legacy** category score ≥ limit |
+| `--fail-on-category` | — | Repeatable. Format: `category:limit`. Exit **1** when **legacy** category score >= limit (inclusive) |
 | `--scoring` | `both` | `legacy`, `v2`, or `both` — enable multi-factor scoring |
 | `--min-security-score` | — | Exit **1** if v2 benchmark security score < N (requires `--scoring v2` or `both`) |
 | `--max-absolute-risk` | — | Exit **1** if v2 `absolute_risk` > N (requires `--scoring v2` or `both`) |
@@ -99,6 +99,10 @@ When `-o` is set, format determines serialization. SARIF uses `reporting/sarif.p
 
 Valid **legacy** category keys: `permissions`, `injection`, `execution`, `data_leakage`, `attack_chains`, `shadowing`, `jailbreak`. Category gates apply to v1 tiles only — not `category_scores_v2`. See [Scoring developer guide](../reporting/scoring-guide.md).
 
+`--fail-on-category` limits are inclusive: the scan fails when the category score is greater than or equal to the limit. This makes `permissions:0` stricter than "zero findings"; a clean permissions tile has score `0`, so `0 >= 0` still fails. Use `permissions:1` when you want to allow zero permissions risk points and fail on any positive permissions risk.
+
+For multi-server MCP repository checks such as G02 policies, prefer `permissions:1` for "no permissions risk" gates. If a category tile displays `Passed`, read that as `0` category risk points, not as a CI gate result; the gate failure line is authoritative.
+
 ### Terminal UI flags
 
 | Flag | Default | Description |
@@ -217,9 +221,9 @@ mcts scan . --config ~/.cursor/mcp.json --server my-server \
 mcts scan ./server.py -o report.sarif --format sarif \
   --min-score 70 --max-critical 0 --fail-on-critical
 
-# Category gates
+# Category gates (inclusive: score >= limit fails)
 mcts scan ./repo/ \
-  --fail-on-category permissions:10 \
+  --fail-on-category permissions:1 \
   --fail-on-category injection:15
 
 # Fuzz telemetry replay
@@ -457,6 +461,8 @@ See [Protocol Fuzzing](../scanning/fuzzing.md).
 
 Gate failures (`scan` only): `--fail-on-critical`, `--min-score`, `--max-critical`, `--fail-on-category` (legacy); `--min-security-score`, `--max-absolute-risk`, `--max-risk-level`, `--min-category-score-v2` (v2, require `--scoring v2` or `both`).
 
+`--fail-on-category` uses the same inclusive `score >= limit` rule in CI and local scans. For example, `permissions:0` fails even when permissions score is `0`; use `permissions:1` to fail on any positive permissions risk while allowing a clean tile.
+
 ---
 
 ## Environment variables
diff --git a/docs/reporting/scoring-spec.md b/docs/reporting/scoring-spec.md
index e7c3560..07312ab 100644
--- a/docs/reporting/scoring-spec.md
+++ b/docs/reporting/scoring-spec.md
@@ -210,9 +210,11 @@ Exit code **1** when a gate fails; **2** for usage/consent errors.
 | `--fail-on-critical` | `summary.critical > 0` (scorable findings) |
 | `--min-score N` | `score.overall < N` |
 | `--max-critical N` | `summary.critical > N` |
-| `--fail-on-category KEY:LIMIT` | Legacy category score ≥ LIMIT |
+| `--fail-on-category KEY:LIMIT` | Legacy category score >= LIMIT |
 
-Category gates are **inclusive** at the limit: `--fail-on-category permissions:10` fails when permissions category score is **10 or higher**.
+Category gates are **inclusive** at the limit: `--fail-on-category permissions:10` fails when permissions category score is **10 or higher**. For zero-risk policies, use `permissions:1` to allow a clean `0` and fail on any positive permissions risk. `permissions:0` fails even when the permissions score is `0` because `0 >= 0`.
+
+If a category tile displays `Passed`, that means the tile has zero category risk points; it is not a CI gate result. Gate failure messages include the inclusive comparison so CI output stays authoritative.
 
 ### v2 gates (shipped)
 
diff --git a/src/mcts/cli/main.py b/src/mcts/cli/main.py
index 8c92d7c..cd7b88d 100644
--- a/src/mcts/cli/main.py
+++ b/src/mcts/cli/main.py
@@ -318,7 +318,8 @@ def scan(
             help=(
                 "Exit 1 when legacy category risk score meets or exceeds threshold (inclusive). "
                 "Legacy v1 tiles only — not category_scores_v2. "
-                "e.g. permissions:0 fails when score is 0 or more. Repeatable."
+                "Use permissions:1 to allow 0 risk points but fail on any positive risk; "
+                "permissions:0 also fails when score is 0. Repeatable."
             ),
         ),
     ] = None,