From e25973d6263cb179aea4e6d1fe37bdfd768b2b94 Mon Sep 17 00:00:00 2001 From: luohui1 <3053763193@qq.com> Date: Sat, 13 Jun 2026 22:55:17 +0800 Subject: [PATCH] docs: clarify fail-on-category thresholds --- CHANGELOG.md | 4 ++++ docs/platform/cli.md | 12 +++++++++--- docs/reporting/scoring-spec.md | 6 ++++-- src/mcts/cli/main.py | 3 ++- 4 files changed, 19 insertions(+), 6 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index c81d031..8ad0b0b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] +### Changed + +- Clarified inclusive `--fail-on-category` thresholds in CLI help and docs, including the `permissions:0` pitfall and `permissions:1` zero-risk pattern. + ## [0.1.4] - 2026-06-12 ### Security diff --git a/docs/platform/cli.md b/docs/platform/cli.md index 407af2e..42e98a9 100644 --- a/docs/platform/cli.md +++ b/docs/platform/cli.md @@ -87,7 +87,7 @@ When `-o` is set, format determines serialization. SARIF uses `reporting/sarif.p | `--fail-on-critical` | false | Exit **1** if any critical finding | | `--min-score` | — | Exit **1** if legacy `score.overall` < N (0–100) | | `--max-critical` | — | Exit **1** if critical count > N | -| `--fail-on-category` | — | Repeatable. Format: `category:limit`. Exit **1** when **legacy** category score ≥ limit | +| `--fail-on-category` | — | Repeatable. Format: `category:limit`. Exit **1** when **legacy** category score >= limit (inclusive) | | `--scoring` | `both` | `legacy`, `v2`, or `both` — enable multi-factor scoring | | `--min-security-score` | — | Exit **1** if v2 benchmark security score < N (requires `--scoring v2` or `both`) | | `--max-absolute-risk` | — | Exit **1** if v2 `absolute_risk` > N (requires `--scoring v2` or `both`) | @@ -99,6 +99,10 @@ When `-o` is set, format determines serialization. SARIF uses `reporting/sarif.p Valid **legacy** category keys: `permissions`, `injection`, `execution`, `data_leakage`, `attack_chains`, `shadowing`, `jailbreak`. Category gates apply to v1 tiles only — not `category_scores_v2`. See [Scoring developer guide](../reporting/scoring-guide.md). +`--fail-on-category` limits are inclusive: the scan fails when the category score is greater than or equal to the limit. This makes `permissions:0` stricter than "zero findings"; a clean permissions tile has score `0`, so `0 >= 0` still fails. Use `permissions:1` when you want to allow zero permissions risk points and fail on any positive permissions risk. + +For multi-server MCP repository checks such as G02 policies, prefer `permissions:1` for "no permissions risk" gates. If a category tile displays `Passed`, read that as `0` category risk points, not as a CI gate result; the gate failure line is authoritative. + ### Terminal UI flags | Flag | Default | Description | @@ -217,9 +221,9 @@ mcts scan . --config ~/.cursor/mcp.json --server my-server \ mcts scan ./server.py -o report.sarif --format sarif \ --min-score 70 --max-critical 0 --fail-on-critical -# Category gates +# Category gates (inclusive: score >= limit fails) mcts scan ./repo/ \ - --fail-on-category permissions:10 \ + --fail-on-category permissions:1 \ --fail-on-category injection:15 # Fuzz telemetry replay @@ -457,6 +461,8 @@ See [Protocol Fuzzing](../scanning/fuzzing.md). Gate failures (`scan` only): `--fail-on-critical`, `--min-score`, `--max-critical`, `--fail-on-category` (legacy); `--min-security-score`, `--max-absolute-risk`, `--max-risk-level`, `--min-category-score-v2` (v2, require `--scoring v2` or `both`). +`--fail-on-category` uses the same inclusive `score >= limit` rule in CI and local scans. For example, `permissions:0` fails even when permissions score is `0`; use `permissions:1` to fail on any positive permissions risk while allowing a clean tile. + --- ## Environment variables diff --git a/docs/reporting/scoring-spec.md b/docs/reporting/scoring-spec.md index e7c3560..07312ab 100644 --- a/docs/reporting/scoring-spec.md +++ b/docs/reporting/scoring-spec.md @@ -210,9 +210,11 @@ Exit code **1** when a gate fails; **2** for usage/consent errors. | `--fail-on-critical` | `summary.critical > 0` (scorable findings) | | `--min-score N` | `score.overall < N` | | `--max-critical N` | `summary.critical > N` | -| `--fail-on-category KEY:LIMIT` | Legacy category score ≥ LIMIT | +| `--fail-on-category KEY:LIMIT` | Legacy category score >= LIMIT | -Category gates are **inclusive** at the limit: `--fail-on-category permissions:10` fails when permissions category score is **10 or higher**. +Category gates are **inclusive** at the limit: `--fail-on-category permissions:10` fails when permissions category score is **10 or higher**. For zero-risk policies, use `permissions:1` to allow a clean `0` and fail on any positive permissions risk. `permissions:0` fails even when the permissions score is `0` because `0 >= 0`. + +If a category tile displays `Passed`, that means the tile has zero category risk points; it is not a CI gate result. Gate failure messages include the inclusive comparison so CI output stays authoritative. ### v2 gates (shipped) diff --git a/src/mcts/cli/main.py b/src/mcts/cli/main.py index 8c92d7c..cd7b88d 100644 --- a/src/mcts/cli/main.py +++ b/src/mcts/cli/main.py @@ -318,7 +318,8 @@ def scan( help=( "Exit 1 when legacy category risk score meets or exceeds threshold (inclusive). " "Legacy v1 tiles only — not category_scores_v2. " - "e.g. permissions:0 fails when score is 0 or more. Repeatable." + "Use permissions:1 to allow 0 risk points but fail on any positive risk; " + "permissions:0 also fails when score is 0. Repeatable." ), ), ] = None,