From a2f51c9548c9d3721dac078b78a111b305f10f71 Mon Sep 17 00:00:00 2001 From: N1ghthill <115030983+N1ghthill@users.noreply.github.com> Date: Mon, 23 Mar 2026 23:16:00 -0300 Subject: [PATCH] Harden MCP runtime with operator policy and integration coverage --- .github/workflows/ci.yml | 13 +- CHANGELOG.md | 2 +- README.md | 7 +- docs/README.md | 48 +- docs/architecture.md | 4 +- docs/beta-readiness-gate.md | 3 +- docs/call-for-testers.md | 4 +- docs/community-host-validation.md | 2 +- docs/core-interfaces-refactor-plan.md | 14 +- docs/history/README.md | 16 + docs/{ => history}/alpha-release-notes.md | 8 +- docs/{ => history}/beta-resume-plan.md | 8 +- docs/{ => history}/mvp-closeout-backlog.md | 8 +- docs/{ => history}/mvp-evolution-plan.md | 12 +- docs/{ => history}/mvp-plan.md | 14 +- docs/{ => history}/post-mvp-evolution-plan.md | 14 +- .../release-candidate-0.1.0a2.md | 8 +- docs/operator-workflows.md | 2 +- docs/policy.md | 95 ++++ docs/release-checklist.md | 160 +++--- docs/roadmap.md | 163 +++--- docs/runtime-integration-testing.md | 71 +++ docs/runtime-mcp-maturation-plan.md | 488 ++++++++++++++++++ docs/status.md | 126 ++--- docs/vps-validation-runbook.md | 2 +- src/master_control/agent/session_summary.py | 151 +++++- src/master_control/agent/turn_planning.py | 150 +++++- src/master_control/agent/turn_rendering.py | 140 ++++- src/master_control/app.py | 45 +- src/master_control/config.py | 6 + src/master_control/config_manager.py | 14 +- src/master_control/core/runtime.py | 280 +++++++++- .../interfaces/agent/session_summary.py | 151 +----- .../interfaces/agent/turn_planning.py | 150 +----- .../interfaces/agent/turn_rendering.py | 146 +----- .../interfaces/cli/entrypoint.py | 4 +- src/master_control/interfaces/mcp/server.py | 67 ++- src/master_control/policy/config.py | 282 ++++++++++ src/master_control/policy/engine.py | 104 +++- src/master_control/store/session_store.py | 357 +++++++++++++ src/master_control/tools/registry.py | 15 +- tests/test_app.py | 2 +- tests/test_config_tools.py | 6 + tests/test_mcp_server.py | 190 +++++-- tests/test_mcp_stdio_integration.py | 147 ++++++ tests/test_policy.py | 12 +- tests/test_runtime_policy_integration.py | 157 ++++++ tests/test_session_store.py | 66 +++ 48 files changed, 3052 insertions(+), 882 deletions(-) create mode 100644 docs/history/README.md rename docs/{ => history}/alpha-release-notes.md (90%) rename docs/{ => history}/beta-resume-plan.md (97%) rename docs/{ => history}/mvp-closeout-backlog.md (92%) rename docs/{ => history}/mvp-evolution-plan.md (94%) rename docs/{ => history}/mvp-plan.md (85%) rename docs/{ => history}/post-mvp-evolution-plan.md (97%) rename docs/{ => history}/release-candidate-0.1.0a2.md (92%) create mode 100644 docs/policy.md create mode 100644 docs/runtime-integration-testing.md create mode 100644 docs/runtime-mcp-maturation-plan.md create mode 100644 src/master_control/policy/config.py create mode 100644 tests/test_mcp_stdio_integration.py create mode 100644 tests/test_runtime_policy_integration.py diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index af2739d..b764892 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -45,8 +45,17 @@ jobs: - name: Run unit tests run: PYTHONPATH=src python -m unittest discover -s tests - - name: Run pytest suite - run: PYTHONPATH=src python -m pytest -q + - name: Run pytest unit suite + run: >- + PYTHONPATH=src python -m pytest -q tests + --ignore tests/test_runtime_policy_integration.py + --ignore tests/test_mcp_stdio_integration.py + + - name: Run pytest runtime integration suite + run: >- + PYTHONPATH=src python -m pytest -q + tests/test_runtime_policy_integration.py + tests/test_mcp_stdio_integration.py - name: Compile source tree run: python -m compileall src diff --git a/CHANGELOG.md b/CHANGELOG.md index 3deb2d9..a079361 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -76,7 +76,7 @@ ### Notes - the repository is still pre-release -- the current unreleased target is the `0.1.0a2` release candidate documented in `docs/release-candidate-0.1.0a2.md` +- the current unreleased target is the `0.1.0a2` release candidate documented in `docs/history/release-candidate-0.1.0a2.md` - the narrow local CLI MVP is closed for the current alpha baseline - the project is not yet a production-ready Linux operations platform diff --git a/README.md b/README.md index e375076..acd2084 100644 --- a/README.md +++ b/README.md @@ -19,7 +19,7 @@ MC is built around three constraints: - single-host and local-first by design - install path: source checkout plus `install.sh` - validated on the maintainer workstation and on a dedicated Debian 13 VPS lab -- main integration interface: experimental read-only MCP stdio +- main integration interface: experimental MCP stdio with approval-mediated write flow - local administration interface: CLI - optional interface: chat/provider path - not positioned as a production-ready Linux administration platform, security auditor, or package manager @@ -56,6 +56,7 @@ If `install.sh` reports that `ensurepip` is unavailable on Debian or Ubuntu, ins - host, disk, memory, process, service, and journal inspection - process-to-`systemd` correlation and failed-service triage - managed config read, write, backup, and restore inside a constrained policy boundary +- operator-configurable policy through a versioned TOML file with safe defaults and fail-closed load errors - recommendation workflow with explicit approval before risky execution - repeatable host-profile validation through `mc validate-host-profile` - optional heuristic, OpenAI, and Ollama-backed planning on top of the same runtime @@ -64,9 +65,13 @@ If `install.sh` reports that `ensurepip` is unavailable on Debian or Ubuntu, ins - [Documentation map](docs/README.md) - [Current status](docs/status.md) +- [Roadmap](docs/roadmap.md) +- [Runtime + MCP maturation plan](docs/runtime-mcp-maturation-plan.md) - [Architecture](docs/architecture.md) - [Security model](docs/security-model.md) +- [Policy guide](docs/policy.md) - [Operator workflows](docs/operator-workflows.md) +- [Runtime integration testing](docs/runtime-integration-testing.md) - [Provider setup](docs/providers.md) - [Host-profile validation guide](docs/host-profile-validation.md) - [Validation evidence](docs/alpha-validation-report.md) diff --git a/docs/README.md b/docs/README.md index 4c41ba6..fd7b5d8 100644 --- a/docs/README.md +++ b/docs/README.md @@ -8,42 +8,46 @@ Use this file to find the right working document, validation record, or planning ## Start Here - `README.md`: GitHub-facing overview, current posture, and quick-start path -- `docs/status.md`: authoritative snapshot of maturity, implemented scope, and validation baseline -- `docs/release-candidate-0.1.0a2.md`: current release-candidate record and cut state +- `docs/status.md`: authoritative snapshot of current maturity, scope, and validation baseline +- `docs/roadmap.md`: concise phase-level roadmap for the current direction +- `docs/runtime-mcp-maturation-plan.md`: canonical execution plan for the MCP-first runtime maturation track ## Product And Operator Guides -- `docs/providers.md`: provider selection, setup, and behavior -- `docs/operator-workflows.md`: supported operator workflows and follow-up paths +- `docs/architecture.md`: system structure, scope boundaries, and major flows +- `docs/security-model.md`: safety model, approval boundaries, and execution constraints +- `docs/policy.md`: operator-configurable policy file guide +- `docs/operator-workflows.md`: bounded operator workflows and evidence chains - `docs/host-profile-validation.md`: maintainer/operator host validation harness guide - `docs/community-host-validation.md`: public submission flow for external host validation +- `docs/providers.md`: provider setup and behavior for the optional planner layer - `docs/release-checklist.md`: release execution checklist ## Validation And Evidence -- `docs/alpha-validation-report.md`: main alpha-track validation summary -- `docs/vps-validation-report.md`: dedicated Debian 13 VPS lab validation evidence +- `docs/alpha-validation-report.md`: main validation summary for the current pre-1.0 baseline +- `docs/runtime-integration-testing.md`: runtime and MCP contract validation guide +- `docs/vps-validation-report.md`: dedicated Debian VPS validation evidence - `docs/vps-validation-runbook.md`: repeatable runbook for the maintainer-controlled VPS lab -- `docs/call-for-testers.md`: maintainer-facing outreach copy for collecting more validation evidence +- `docs/call-for-testers.md`: outreach copy for collecting more host-validation evidence -## Architecture And Security +## Supporting Engineering Docs -- `docs/architecture.md`: system structure, scope boundaries, and major flows -- `docs/security-model.md`: safety model, approval boundaries, and execution constraints +- `docs/core-interfaces-refactor-plan.md`: supporting engineering brief for the remaining code-ownership cleanup +- `docs/beta-readiness-gate.md`: beta gate criteria and release blockers - `docs/adrs/`: architectural decision records - `docs/diagrams/README.md`: diagram index and rendering notes -## Plans And Release Management +## Historical Records -- `docs/core-interfaces-refactor-plan.md`: canonical working brief for the runtime-first refactor -- `docs/roadmap.md`: high-level sequence of work and milestone framing -- `docs/beta-readiness-gate.md`: beta gate criteria and current release blockers -- `docs/beta-resume-plan.md`: short-horizon maintainer execution record -- `docs/mvp-plan.md`: original MVP definition -- `docs/mvp-evolution-plan.md`: MVP evolution and closeout record -- `docs/mvp-closeout-backlog.md`: deferred backlog after MVP closeout -- `docs/post-mvp-evolution-plan.md`: longer-horizon post-MVP planning -- `docs/alpha-release-notes.md`: current alpha-facing release notes +- `docs/history/README.md`: index of historical planning and release records +- `docs/history/release-candidate-0.1.0a2.md`: historical cut record for the current public pre-release +- `docs/history/alpha-release-notes.md`: historical alpha release notes +- `docs/history/beta-resume-plan.md`: historical beta-prep execution record +- `docs/history/mvp-plan.md`: original MVP framing kept for traceability +- `docs/history/mvp-evolution-plan.md`: MVP closeout sequencing record +- `docs/history/mvp-closeout-backlog.md`: MVP closeout completion record +- `docs/history/post-mvp-evolution-plan.md`: previous planning record kept for context ## Contribution And Repository Docs @@ -55,6 +59,8 @@ Use this file to find the right working document, validation record, or planning - `README.md` stays short and GitHub-facing - `docs/README.md` is the canonical document map - `docs/status.md` records the current reality; it is not the marketing page +- `docs/roadmap.md` and `docs/runtime-mcp-maturation-plan.md` are the current planning references +- historical documents stay available for traceability, but they are not authoritative for current direction - validation reports record evidence and should avoid private host coordinates or internal-only access details -- planning documents keep maintainer context; stable operator instructions belong in focused guides +- stable operator instructions belong in focused guides - when you add, rename, or repurpose a document, update this map diff --git a/docs/architecture.md b/docs/architecture.md index 2a6fed1..5e22416 100644 --- a/docs/architecture.md +++ b/docs/architecture.md @@ -23,7 +23,7 @@ The authoritative direction is now runtime-first: the conversational path is an - treat MCP as the main integration interface on top of the runtime - keep the CLI as the local operator and administration interface - preserve the existing chat/provider path as an optional interface -- keep an experimental read-only MCP bridge on top of the same runtime +- keep an experimental MCP bridge with approval-mediated write flow on top of the same runtime - persist local state and audit data in SQLite - keep the codebase modular, but inside one deployable process @@ -205,7 +205,7 @@ Its planners, providers, summaries, and rendering helpers are interface logic, n ### MCP -An experimental read-only MCP stdio bridge now exists. +An experimental MCP stdio bridge now exists with approval-mediated write operations. It is the main integration interface for exposing runtime capabilities to external AI clients without duplicating policy, audit, or execution logic. Broader capability exposure remains intentionally deferred until the runtime boundary is easier to own. diff --git a/docs/beta-readiness-gate.md b/docs/beta-readiness-gate.md index f8324de..c5966cc 100644 --- a/docs/beta-readiness-gate.md +++ b/docs/beta-readiness-gate.md @@ -9,7 +9,8 @@ This document defines the minimum bar for moving Master Control from the current It is not a feature wishlist. It is a release gate. -Use `docs/beta-resume-plan.md` for short-horizon execution order. +Use `docs/roadmap.md` and `docs/runtime-mcp-maturation-plan.md` for current planning. +`docs/history/beta-resume-plan.md` remains available as a historical execution record. Use this document only to decide whether beta claims and tagging are actually justified. ## Gate Summary diff --git a/docs/call-for-testers.md b/docs/call-for-testers.md index 97861fe..3dadebc 100644 --- a/docs/call-for-testers.md +++ b/docs/call-for-testers.md @@ -58,7 +58,7 @@ Master Control `0.1.0a2` is available as a preview build for real Linux host tes The project already has: -- a bounded CLI-first operator workflow +- a bounded local-first runtime for host operations - typed diagnostics and approval-gated mutations - a repeatable host-validation harness - a redacted bundle flow for submitting validation reports @@ -79,7 +79,7 @@ Full guide: docs/community-host-validation.md ```text I am looking for Linux testers for Master Control. -It is a CLI-first host admin agent with typed diagnostics, approval-gated actions, and a built-in validation flow. +It is a local-first runtime for controlled Linux host operations, with typed diagnostics, approval-gated actions, and a built-in validation flow. If you can run a quick validation on your own Linux host and send the generated redacted report, that would help broaden the real-host evidence base beyond the first validated profiles. diff --git a/docs/community-host-validation.md b/docs/community-host-validation.md index f771fe7..5bc55ba 100644 --- a/docs/community-host-validation.md +++ b/docs/community-host-validation.md @@ -118,4 +118,4 @@ See also: - `docs/host-profile-validation.md` - `docs/beta-readiness-gate.md` -- `docs/beta-resume-plan.md` +- `docs/roadmap.md` diff --git a/docs/core-interfaces-refactor-plan.md b/docs/core-interfaces-refactor-plan.md index dbcc69a..cd32d52 100644 --- a/docs/core-interfaces-refactor-plan.md +++ b/docs/core-interfaces-refactor-plan.md @@ -297,14 +297,14 @@ These documents contain useful evidence or historical execution context and shou - `docs/alpha-validation-report.md` - `docs/vps-validation-report.md` - `docs/vps-validation-runbook.md` -- `docs/release-candidate-0.1.0a2.md` -- `docs/alpha-release-notes.md` +- `docs/history/release-candidate-0.1.0a2.md` +- `docs/history/alpha-release-notes.md` - `docs/beta-readiness-gate.md` -- `docs/beta-resume-plan.md` -- `docs/mvp-plan.md` -- `docs/mvp-evolution-plan.md` -- `docs/mvp-closeout-backlog.md` -- `docs/post-mvp-evolution-plan.md` +- `docs/history/beta-resume-plan.md` +- `docs/history/mvp-plan.md` +- `docs/history/mvp-evolution-plan.md` +- `docs/history/mvp-closeout-backlog.md` +- `docs/history/post-mvp-evolution-plan.md` - `docs/community-host-validation.md` - `docs/host-profile-validation.md` diff --git a/docs/history/README.md b/docs/history/README.md new file mode 100644 index 0000000..12002db --- /dev/null +++ b/docs/history/README.md @@ -0,0 +1,16 @@ +# Historical Docs + +This directory contains documents kept for traceability. + +These files are not the current product brief or roadmap. +Use `docs/status.md`, `docs/roadmap.md`, and `docs/runtime-mcp-maturation-plan.md` for current guidance. + +## Contents + +- `alpha-release-notes.md`: alpha release notes record +- `beta-resume-plan.md`: beta-prep execution record +- `mvp-plan.md`: original MVP framing +- `mvp-evolution-plan.md`: MVP closeout sequencing record +- `mvp-closeout-backlog.md`: MVP closeout completion record +- `post-mvp-evolution-plan.md`: previous planning record +- `release-candidate-0.1.0a2.md`: pre-release cut record diff --git a/docs/alpha-release-notes.md b/docs/history/alpha-release-notes.md similarity index 90% rename from docs/alpha-release-notes.md rename to docs/history/alpha-release-notes.md index 0c85c6e..bb178cb 100644 --- a/docs/alpha-release-notes.md +++ b/docs/history/alpha-release-notes.md @@ -1,5 +1,11 @@ # Alpha Release Notes +> Historical document +> +> This file is kept for traceability of the alpha release line. +> It is not the current product brief or roadmap. +> Use `docs/status.md`, `docs/roadmap.md`, and `docs/runtime-mcp-maturation-plan.md` for current guidance. + Version target: `0.1.0a2` local CLI alpha release candidate Snapshot date: 2026-03-18 @@ -95,4 +101,4 @@ mc tool service_status --arg name=ollama-local.service --arg scope=user ## Validation reference See `docs/alpha-validation-report.md` and `docs/vps-validation-report.md` for the current real-host validation snapshots behind this alpha baseline. -See `docs/release-candidate-0.1.0a2.md` for the current cut status and remaining release work. +See `docs/history/release-candidate-0.1.0a2.md` for the current cut status and remaining release work. diff --git a/docs/beta-resume-plan.md b/docs/history/beta-resume-plan.md similarity index 97% rename from docs/beta-resume-plan.md rename to docs/history/beta-resume-plan.md index f6b0b86..a4bf579 100644 --- a/docs/beta-resume-plan.md +++ b/docs/history/beta-resume-plan.md @@ -1,5 +1,11 @@ # Beta Resume Plan +> Historical document +> +> This file is kept for traceability of earlier beta-prep execution work. +> It is not the current product brief or roadmap. +> Use `docs/status.md`, `docs/roadmap.md`, and `docs/runtime-mcp-maturation-plan.md` for current guidance. + Snapshot date: 2026-03-20 ## Purpose @@ -140,7 +146,7 @@ Status: Near-term tasks: - summarize the dedicated VPS report in `docs/alpha-validation-report.md` -- update `docs/beta-readiness-gate.md`, `docs/status.md`, `docs/roadmap.md`, and `docs/release-candidate-0.1.0a2.md` +- update `docs/beta-readiness-gate.md`, `docs/status.md`, `docs/roadmap.md`, and `docs/history/release-candidate-0.1.0a2.md` - decide whether to tag now or keep the build in private-preview language a little longer - keep the community submission path simple enough that external testers can still contribute broader host diversity reports diff --git a/docs/mvp-closeout-backlog.md b/docs/history/mvp-closeout-backlog.md similarity index 92% rename from docs/mvp-closeout-backlog.md rename to docs/history/mvp-closeout-backlog.md index ec62ff9..67bf876 100644 --- a/docs/mvp-closeout-backlog.md +++ b/docs/history/mvp-closeout-backlog.md @@ -1,5 +1,11 @@ # MVP Closeout Backlog +> Historical document +> +> This file is kept as the MVP closeout completion record. +> It is not the current product brief or roadmap. +> Use `docs/status.md`, `docs/roadmap.md`, and `docs/runtime-mcp-maturation-plan.md` for current guidance. + Snapshot date: 2026-03-18 ## Purpose @@ -24,7 +30,7 @@ Completed milestones: - Milestone 3: operator utility and approval UX, completed on 2026-03-18 - Milestone 4: alpha hardening and release baseline, completed on 2026-03-18 -The higher-level completion record remains in `docs/mvp-evolution-plan.md`. +The higher-level completion record remains in `docs/history/mvp-evolution-plan.md`. ## Closed scope diff --git a/docs/mvp-evolution-plan.md b/docs/history/mvp-evolution-plan.md similarity index 94% rename from docs/mvp-evolution-plan.md rename to docs/history/mvp-evolution-plan.md index c28aeb0..ef2ae3b 100644 --- a/docs/mvp-evolution-plan.md +++ b/docs/history/mvp-evolution-plan.md @@ -1,5 +1,11 @@ # MVP Evolution Plan +> Historical document +> +> This file is kept as the MVP closeout sequencing record. +> It is not the current product brief or roadmap. +> Use `docs/status.md`, `docs/roadmap.md`, and `docs/runtime-mcp-maturation-plan.md` for current guidance. + Snapshot date: 2026-03-18 ## Purpose @@ -8,9 +14,9 @@ This document now serves as the completion record for the delivery plan that clo It remains the canonical reference for how the repository's closeout documents relate to each other: -- `docs/mvp-plan.md`: stable MVP contract and exit criteria -- `docs/mvp-evolution-plan.md`: milestone sequencing and completion record -- `docs/mvp-closeout-backlog.md`: closed execution backlog record +- `docs/history/mvp-plan.md`: stable MVP contract and exit criteria +- `docs/history/mvp-evolution-plan.md`: milestone sequencing and completion record +- `docs/history/mvp-closeout-backlog.md`: closed execution backlog record - `docs/status.md`: current implementation snapshot - `docs/roadmap.md`: phase-level roadmap beyond the closed MVP diff --git a/docs/mvp-plan.md b/docs/history/mvp-plan.md similarity index 85% rename from docs/mvp-plan.md rename to docs/history/mvp-plan.md index 1d771db..8ce792d 100644 --- a/docs/mvp-plan.md +++ b/docs/history/mvp-plan.md @@ -1,5 +1,11 @@ # MVP Plan +> Historical document +> +> This file preserves the original MVP framing for traceability. +> It is not the current product brief or roadmap. +> Use `docs/status.md`, `docs/roadmap.md`, and `docs/runtime-mcp-maturation-plan.md` for current guidance. + ## Target MVP definition Master Control MVP means: @@ -19,9 +25,9 @@ This MVP does not require web UI, daemon mode, plugins, or remote multi-user dep To avoid legacy planning drift, use the documents this way: -- `docs/mvp-plan.md`: stable MVP contract and exit criteria -- `docs/mvp-evolution-plan.md`: milestone sequencing and closeout completion record -- `docs/mvp-closeout-backlog.md`: closed execution backlog record for the MVP closeout +- `docs/history/mvp-plan.md`: stable MVP contract and exit criteria +- `docs/history/mvp-evolution-plan.md`: milestone sequencing and closeout completion record +- `docs/history/mvp-closeout-backlog.md`: closed execution backlog record for the MVP closeout - `docs/status.md`: current implementation snapshot - `docs/roadmap.md`: phase-level view of the post-closeout roadmap @@ -39,7 +45,7 @@ The MVP should only be called complete when all of these are true: ## Current delta to close -The narrow local CLI MVP closeout is complete. `docs/mvp-evolution-plan.md` now serves as the completion record for that delivery plan. +The narrow local CLI MVP closeout is complete. `docs/history/mvp-evolution-plan.md` now serves as the completion record for that delivery plan. ### Workstream 1: Correctness and context hardening diff --git a/docs/post-mvp-evolution-plan.md b/docs/history/post-mvp-evolution-plan.md similarity index 97% rename from docs/post-mvp-evolution-plan.md rename to docs/history/post-mvp-evolution-plan.md index 550c83a..f52a791 100644 --- a/docs/post-mvp-evolution-plan.md +++ b/docs/history/post-mvp-evolution-plan.md @@ -1,5 +1,11 @@ # Post-MVP Evolution Plan +> Historical document +> +> This file is kept as a previous planning record for traceability. +> It is not the current product brief or roadmap. +> Use `docs/status.md`, `docs/roadmap.md`, and `docs/runtime-mcp-maturation-plan.md` for current guidance. + Snapshot date: 2026-03-18 ## Purpose @@ -8,9 +14,9 @@ This document starts the next planning track after the narrow local CLI MVP clos It does not replace the closed MVP records: -- `docs/mvp-plan.md`: stable contract for the narrow local CLI MVP -- `docs/mvp-evolution-plan.md`: closed sequencing record for the MVP closeout -- `docs/mvp-closeout-backlog.md`: closed backlog record for the MVP closeout +- `docs/history/mvp-plan.md`: stable contract for the narrow local CLI MVP +- `docs/history/mvp-evolution-plan.md`: closed sequencing record for the MVP closeout +- `docs/history/mvp-closeout-backlog.md`: closed backlog record for the MVP closeout Instead, this file defines the next professional engineering track for turning Master Control from a validated late-alpha baseline into a more functional, operator-useful product. @@ -445,7 +451,7 @@ Use the repository docs as operational artifacts with these roles: - `docs/status.md`: current implemented state only - `docs/roadmap.md`: phase-level direction and sequencing only -- `docs/post-mvp-evolution-plan.md`: active planning record for the current post-MVP track +- `docs/history/post-mvp-evolution-plan.md`: active planning record for the current post-MVP track - `docs/operator-workflows.md`: bounded operator journeys, smoke commands, and workflow safety notes - `docs/beta-readiness-gate.md`: release gate for moving from late alpha to beta-oriented scope - `docs/alpha-validation-report.md` or future validation docs: validation evidence only diff --git a/docs/release-candidate-0.1.0a2.md b/docs/history/release-candidate-0.1.0a2.md similarity index 92% rename from docs/release-candidate-0.1.0a2.md rename to docs/history/release-candidate-0.1.0a2.md index 50f1ef9..9fb1276 100644 --- a/docs/release-candidate-0.1.0a2.md +++ b/docs/history/release-candidate-0.1.0a2.md @@ -1,5 +1,11 @@ # Release Candidate 0.1.0a2 +> Historical document +> +> This file is kept for traceability of the `0.1.0a2` release cut. +> It is not the current product brief or roadmap. +> Use `docs/status.md`, `docs/roadmap.md`, and `docs/runtime-mcp-maturation-plan.md` for current guidance. + Snapshot date: 2026-03-20 ## Purpose @@ -97,5 +103,5 @@ Related GitHub issues: 1. attach or summarize the VPS report in `docs/alpha-validation-report.md` 2. update `docs/beta-readiness-gate.md`, `docs/status.md`, and `docs/roadmap.md` to reflect that multi-host evidence now exists 3. rerun the local baseline one final time -4. confirm `CHANGELOG.md`, `docs/alpha-release-notes.md`, and the release wording are still aligned with the actual scope +4. confirm `CHANGELOG.md`, `docs/history/alpha-release-notes.md`, and the release wording are still aligned with the actual scope 5. decide whether to tag `0.1.0a2` immediately or keep the build in private-preview language a little longer diff --git a/docs/operator-workflows.md b/docs/operator-workflows.md index 03aae62..be7ab87 100644 --- a/docs/operator-workflows.md +++ b/docs/operator-workflows.md @@ -7,7 +7,7 @@ Snapshot date: 2026-03-22 This document records the bounded operator workflows that currently define the post-MVP useful baseline for Master Control. Each workflow below is intentionally small, typed, and auditable. -They describe runtime-supported operator paths; the same runtime may be reached through direct CLI commands, the optional chat interface, or the current experimental read-only MCP bridge where appropriate. +They describe runtime-supported operator paths; the same runtime may be reached through direct CLI commands, the optional chat interface, or the current experimental MCP bridge with approval-mediated write flow where appropriate. ## Workflow 1: Slow Host Diagnosis diff --git a/docs/policy.md b/docs/policy.md new file mode 100644 index 0000000..b9e508e --- /dev/null +++ b/docs/policy.md @@ -0,0 +1,95 @@ +# Policy Guide + +Snapshot date: 2026-03-23 + +## Purpose + +This guide documents the first operator-configurable policy slice for Master Control. + +The policy file lets an operator narrow tool access and managed config targets without editing Python. +It does not introduce RBAC or multi-user authorization. + +## Policy File Location + +Default path: + +- `MC_STATE_DIR/policy.toml` + +Override: + +- `MC_POLICY_PATH=/path/to/policy.toml` + +Runtime behavior: + +- missing policy file: fall back to the default safe policy +- invalid policy file: fail closed for tool execution +- policy load state is visible through `mc doctor` + +## Supported Domains + +Current version: + +- `version = 1` + +Supported tool rule fields under `[tools.]`: + +- `enabled = true|false` +- `require_confirmation = true|false` +- `allowed_scopes = ["system", "user"]` +- `service_patterns = ["nginx.service", "*.service"]` + +Supported managed target fields under `[[config_targets]]`: + +- `name` +- `description` +- `roots` +- `file_globs` +- `validator` +- `validator_command` only when `validator = "command"` + +Current validators: + +- `ini_parse` +- `json_parse` +- `command` + +## Example + +```toml +version = 1 + +[tools.system_info] +require_confirmation = true + +[tools.restart_service] +allowed_scopes = ["system"] +service_patterns = ["nginx.service", "sshd.service"] + +[[config_targets]] +name = "managed_ini" +description = "Operator-managed INI files under state." +roots = ["$STATE_DIR/managed-configs"] +file_globs = ["*.ini", "*.cfg"] +validator = "ini_parse" + +[[config_targets]] +name = "systemd_unit" +description = "Systemd units under /etc/systemd/system." +roots = ["/etc/systemd/system"] +file_globs = ["*.service", "*.timer"] +validator = "command" +validator_command = ["systemd-analyze", "verify", "{path}"] +``` + +## Path Rules + +- `$STATE_DIR` expands to the resolved MC state directory +- relative `roots` are resolved from the policy file directory +- confirmation rules may add confirmation to safer tools, but they do not weaken built-in confirmation for risky tools + +## Operational Notes + +- use `mc doctor` after every policy change +- invalid policy blocks execution until fixed +- keep target roots narrow and validators explicit +- broader package, network, and user-management policy domains are later work diff --git a/docs/release-checklist.md b/docs/release-checklist.md index 868b720..a2bf928 100644 --- a/docs/release-checklist.md +++ b/docs/release-checklist.md @@ -1,108 +1,132 @@ # Release Checklist -Current organization rule for the beta-prep track: +## Purpose -- use `docs/beta-resume-plan.md` as the short-horizon execution record -- use `docs/beta-readiness-gate.md` as the release gate, not as the day-to-day work queue +This is the maintainer checklist for pre-1.0 Master Control releases. -## Alpha baseline +Use `docs/status.md` for the current reality. +Use `docs/roadmap.md` and `docs/runtime-mcp-maturation-plan.md` for the current direction. +Use `docs/beta-readiness-gate.md` only when deciding whether beta language is justified. -Run this checklist only after the closeout milestones in `docs/mvp-evolution-plan.md` are satisfied for correctness, context handling, and operator flow quality. +## Release Preconditions -Before calling the narrow local CLI MVP ready for an alpha tag: +Before cutting a release or public preview build: -1. run the automated baseline -2. build a wheel or equivalent packaging artifact -3. validate provider resolution on the target host -4. run a real-host smoke test for service actions -5. run a real-host smoke test for managed config editing -6. run a real-host smoke test for `reconcile-timer install|remove` in `scope=user` -7. confirm documentation matches the operator-visible commands -8. confirm `README.md`, `docs/status.md`, `docs/roadmap.md`, `docs/beta-resume-plan.md`, `docs/beta-readiness-gate.md`, `docs/mvp-plan.md`, and `docs/mvp-evolution-plan.md`, and `docs/mvp-closeout-backlog.md` are aligned -9. confirm GitHub Actions CI is green on `main` -10. capture release notes in `CHANGELOG.md` +1. confirm the current direction is still described correctly in `README.md`, `docs/README.md`, `docs/status.md`, `docs/roadmap.md`, and `docs/runtime-mcp-maturation-plan.md` +2. confirm architectural or safety changes are reflected in `docs/architecture.md` and `docs/security-model.md` +3. confirm operator-visible workflows are still described correctly in `docs/operator-workflows.md` +4. confirm `CHANGELOG.md` reflects the user-visible scope -## Automated baseline +## Automated Baseline + +Run: ```bash python3 -m ruff check . python3 -m mypy src PYTHONPATH=src python3 -m unittest discover -s tests +PYTHONPATH=src python3 -m pytest -q python3 -m compileall src PYTHONPATH=src python3 -m master_control --json doctor python3 -m pip wheel . --no-deps -w /tmp/mc-dist ``` -## Clean-environment install +Required result: + +- all commands pass + +## Operator Bootstrap Validation -- prefer `./install.sh` for the operator-facing bootstrap path -- prefer the full operator lifecycle check `install -> doctor -> validate-host-profile -> uninstall` -- for a repeatable repo-side rerun, prefer `python3 scripts/validate_operator_bootstrap.py --output-dir ` -- GitHub CI now runs that same bootstrap harness as a lightweight heuristic-backed smoke; keep it green, but do not treat it as a substitute for an additional host report -- confirm the generated wrapper can run `mc doctor` -- if validating the developer path directly, prefer `python3 -m venv` when the host provides stdlib `venv` -- if the host lacks `ensurepip/python3-venv`, use `python3 -m virtualenv` as the fallback -- validate `pip install .` or `pip install -e .` in that isolated environment -- run `mc doctor` with isolated `MC_STATE_DIR` and `MC_DB_PATH` -- run `mc validate-host-profile --output-dir ` -- validate `./uninstall.sh` and decide whether `--purge-state` is appropriate for the target host +Preferred operator-path validation: -## Real-host smoke tests +```bash +./install.sh --provider heuristic +~/.local/bin/mc doctor +~/.local/bin/mc validate-host-profile --output-dir ./artifacts/host-validation +./uninstall.sh --purge-state +``` -Preferred execution path for each additional host profile: +Also rerun the repo-side harness: + +```bash +python3 scripts/validate_operator_bootstrap.py --output-dir ./artifacts/bootstrap-validation +``` + +Required result: + +- install, doctor, host validation, and uninstall complete cleanly +- bootstrap harness reports success + +## Real-Host Validation + +For each host profile used as release evidence: ```bash mc validate-host-profile --output-dir ./artifacts/host-validation ``` -Use the generated JSON report as the release evidence artifact for that host. +Required result: + +- generated report records `overall_ok: true` +- host caveats are written down explicitly +- evidence is kept separate by host profile + +## Interface Validation + +Validate the interfaces that are part of the release scope. + +### CLI + +- `mc doctor` +- `mc tools` +- one read-only tool execution +- one approval-gated mutation flow on a safe target + +### MCP + +- `mc mcp-serve` starts cleanly +- documented MCP behavior matches the currently supported scope +- if the release includes only read-only MCP, confirm mutating tools are still blocked there +- if the release includes write-capable MCP in the future, confirm approval-mediated mutation flow from a real MCP client -### Provider resolution +### Optional planner layer -- if using `MC_PROVIDER=auto`, run `mc doctor` and confirm the selected backend matches the host setup -- if using Ollama locally, confirm `ollama serve` is available and `ollama pull ` has already been run -- confirm `mc doctor` reports the configured Ollama model as installed before running chat smokes -- if Ollama is listening on a non-default port, set `MC_OLLAMA_BASE_URL` before running `mc doctor` -- if using OpenAI, confirm `OPENAI_API_KEY` is present and `mc doctor` reports the provider as available +- if provider behavior changed, validate heuristic, OpenAI, and/or Ollama paths that are included in the release scope +- confirm provider health reporting in `mc doctor` +- do not treat provider validation as a substitute for runtime validation -### Service actions +## Workflow Validation -- inspect a known service with `mc chat --once "status do servico "` -- trigger a pending restart request through chat -- confirm a restart or reload only on a safe non-critical target -- verify the post-action state returned by the tool -- for workstation-safe validation without root, prefer `scope=user` against a non-critical `systemd --user` unit +At minimum, validate the currently supported bounded workflows: -### Managed config editing +1. slow-host diagnosis +2. failed-service triage +3. managed-config read, write, validation, and rollback -- create a test file under `/managed-configs/` -- read it with `read_config_file` -- write a valid replacement with `write_config_file --confirm` -- confirm backup creation under `/config-backups/` -- restore the prior version with `restore_config_backup --confirm` +Use: -### Reconcile timer automation +- `docs/operator-workflows.md` +- `docs/host-profile-validation.md` +- `docs/alpha-validation-report.md` +- `docs/vps-validation-report.md` -- render the units first with `mc reconcile-timer render` -- install the user-scoped timer with `mc reconcile-timer install --scope user` -- confirm it appears in `systemctl --user list-timers master-control-reconcile.timer --all` -- remove it again with `mc reconcile-timer remove --scope user` -- confirm the timer no longer appears in the user timer list +## Documentation Check -### Operator-utility diagnostics +Before release: -- run `mc tool process_to_unit --arg name=` -- run `mc tool failed_services --arg scope= --arg limit=` -- confirm both tools return structured output without requiring confirmation +1. remove or down-rank stale release language +2. confirm historical documents are not presented as current planning docs +3. confirm version references are correct +4. confirm operator-facing commands in docs still match the implementation -## Release notes minimum +## Release Notes Minimum -The alpha notes should mention: +Release notes should mention: - supported interfaces -- supported providers -- auto provider resolution order -- support for `systemd --user` service operations through `scope=user` -- managed config targets -- service actions currently available -- what is still intentionally out of scope +- current MCP scope +- supported providers in the optional planner layer +- approval-gated mutation model +- managed config boundaries +- currently available service actions +- current out-of-scope boundaries diff --git a/docs/roadmap.md b/docs/roadmap.md index 755556a..5732fe4 100644 --- a/docs/roadmap.md +++ b/docs/roadmap.md @@ -1,149 +1,104 @@ # Roadmap -Snapshot date: 2026-03-22 +Snapshot date: 2026-03-23 -## Current stage +## Current Direction -- late alpha -- public pre-release `v0.1.0a2` is out -- the alpha baseline is validated on the maintainer host and on a dedicated Debian 13 VPS lab -- the runtime already supports bounded inspection, controlled service/config actions, auditability, and validation workflows -- the main roadmap change is not "add more AI"; it is to make the product center match the value already present in the runtime -- the active track is now the runtime-first refactor described in `docs/core-interfaces-refactor-plan.md` +- late alpha, pre-1.0 +- validated alpha baseline published as `v0.1.0a2` +- runtime-first and MCP-first product direction +- runtime already supports bounded inspection, controlled service/config actions, auditability, and validation workflows +- a first operator-configurable policy slice now exists through versioned TOML +- current priority is to mature the runtime and MCP path into a trustworthy operational interface -## Phase 0: Foundation +## Phase 1: Controlled MCP Write Path Status: -- Completed +- Current focus -Deliverables: - -- repository structure -- architecture and security documents -- ADRs for major early decisions -- Python package bootstrap -- local SQLite initialization -- policy engine and initial tool registry - -## Phase 1: Bounded runtime capabilities - -Status: +Goal: -- Completed for the current alpha slice +- make MCP the primary controlled interface for both inspection and bounded mutation Deliverables: -- typed host inspection tools -- structured and testable tool outputs -- audit persistence -- local state persistence -- safe operator-facing CLI entry points +- runtime integration coverage for the main read and write flows +- MCP read-write contract with explicit approval lifecycle +- documented and validated operator-configurable policy model +- thinner runtime ownership boundaries between core and interfaces -Result: +Exit criteria: -- exit criteria met for the current alpha scope +- MCP can execute bounded mutations through the same runtime path as CLI +- approval is explicit, auditable, and machine-tractable for MCP clients +- policy is configurable without code changes for managed boundaries +- runtime integration evidence exists for the main mutation paths -## Phase 2: Controlled mutations and operator trust +## Phase 2: Trusted Daily Host Operations Status: -- Completed for the current alpha slice - -Deliverables: +- Not complete -- confirmation flow for mutating tools -- config write helpers with backup and validation -- service restart and reload tools -- clearer operator approval prompts - -Result: - -- the current service and config mutation boundary is implemented and evidence-backed - -## Phase 3: Alpha baseline and validation - -Status: +Goal: -- Completed +- make the runtime trustworthy enough for daily single-host operational work Deliverables: -- narrow local CLI alpha baseline -- repeatable bootstrap validation -- release-facing docs and evidence -- second real-host validation evidence +- broader safe tool surface for daily host administration +- explicit concurrency and state-integrity model +- semantic versioning and tool-schema compatibility rules +- stronger operational validation and diagnostics -Result: +Exit criteria: -- the current alpha baseline is validated and publicly present as `v0.1.0a2` +- the runtime can be trusted for routine single-host operational work +- concurrent calls do not corrupt state or bypass approval +- tool schemas have defined compatibility rules +- new tool domains follow the same typed, policy-gated, auditable contract -## Phase 4: Runtime-first repositioning +## Phase 3: Optional Planning And Secondary Interfaces Status: -- In progress, with the first code-boundary slices already landed +- Later phase Goal: -- reposition MC around its runtime value instead of its conversational framing - -Deliverables: - -- rewritten canonical docs around the runtime-first contract -- explicit `core` versus `interfaces` ownership in the codebase -- reduced architectural centrality of the current agent/provider path -- preserved alpha baseline while boundaries are clarified - -Exit criteria: - -- the canonical docs describe MC as a runtime with interfaces -- core ownership is clearer in code than it is today -- the repository is easier to explain and maintain than the current chat-centric shape - -## Phase 5: MCP interface - -Status: - -- In progress for the first experimental read-only slice +- restore planner and conversational UX as optional layers over a stable MCP-first runtime Deliverables: -- first experimental MCP interface on top of the existing runtime -- same policy and audit path as the CLI -- local-first activation and administration guidance +- refreshed heuristic, OpenAI, and Ollama support on top of the stabilized runtime +- chat as a secondary interface over the same approval and audit model +- documentation and validation that keep MCP as the primary external interface Exit criteria: -- MCP does not duplicate business logic already owned by the runtime -- MCP remains an interface, not a second product +- planner and chat layers remain optional +- they improve convenience without owning safety semantics +- MCP remains the product center in architecture, docs, and validation -## Phase 6: Service mode and broader interfaces - -Status: - -- Not started - -Deliverables: - -- optional local service mode where justified by MCP or other interfaces -- any further interface additions that still reuse the same runtime boundary -- richer observability - -Exit criteria: +## Near-Term Execution Order -- interface layer remains separate from execution core -- all external interfaces reuse the same policy and audit paths +1. add runtime integration coverage and harnesses +2. validate the MCP contract through real-client and stdio transcript flows +3. harden concurrency and schema governance +4. continue core/interface cleanup +5. expand tool domains only after the previous items are stable -## Next roadmap focus +## Out Of Scope For This Track -The immediate roadmap track is: +- unrestricted shell access +- multi-user auth and remote control-plane work +- SaaS orchestration +- large UI work +- service-mode expansion before the single-process runtime model is solid -1. finish the canonical documentation rewrite around the runtime-first contract -2. introduce clearer `core` and `interfaces` ownership in the codebase -3. keep the current operator bootstrap and validation path stable while code moves -4. harden the current experimental MCP bridge before expanding it -5. postpone broader service mode and additional interfaces until the runtime contract is easier to own +## Historical Records -Historical milestone sequencing remains recorded in `docs/mvp-evolution-plan.md`, `docs/mvp-closeout-backlog.md`, `docs/post-mvp-evolution-plan.md`, and `docs/beta-resume-plan.md`. -The current authoritative refactor brief is `docs/core-interfaces-refactor-plan.md`. +Earlier MVP, alpha, and beta-prep planning documents remain in the repository for traceability. +They are not the current roadmap. +Use `docs/runtime-mcp-maturation-plan.md` for the detailed execution plan behind this roadmap. diff --git a/docs/runtime-integration-testing.md b/docs/runtime-integration-testing.md new file mode 100644 index 0000000..6041027 --- /dev/null +++ b/docs/runtime-integration-testing.md @@ -0,0 +1,71 @@ +# Runtime Integration Testing + +Snapshot date: 2026-03-23 + +## Purpose + +This document records the current runtime-contract validation layers beyond pure unit coverage. + +The goal is to validate the real runtime boundaries: + +- policy loading +- approval lifecycle +- managed config mutation +- MCP stdio contract behavior + +## Current Layers + +1. Unit and module-level regression coverage across `tests/` +2. Runtime policy integration coverage in `tests/test_runtime_policy_integration.py` +3. MCP stdio subprocess contract coverage in `tests/test_mcp_stdio_integration.py` +4. Operator bootstrap validation via `python3 scripts/validate_operator_bootstrap.py` +5. Host-profile validation via `mc validate-host-profile` + +## Local Commands + +Run the fast engineering baseline: + +```bash +python3 -m ruff check . +python3 -m mypy src +PYTHONPATH=src python3 -m unittest discover -s tests +python3 -m compileall src +``` + +Run the main pytest suite without the runtime/MCP integration slice: + +```bash +PYTHONPATH=src python3 -m pytest -q tests \ + --ignore tests/test_runtime_policy_integration.py \ + --ignore tests/test_mcp_stdio_integration.py +``` + +Run the runtime/MCP integration slice explicitly: + +```bash +PYTHONPATH=src python3 -m pytest -q \ + tests/test_runtime_policy_integration.py \ + tests/test_mcp_stdio_integration.py +``` + +Run the install/bootstrap path: + +```bash +python3 scripts/validate_operator_bootstrap.py \ + --output-dir /tmp/mc-bootstrap-validation \ + --provider heuristic \ + --python python3 +``` + +## What These Tests Prove Today + +- operator policy can disable tools, require confirmation, constrain service targets, and redefine managed config targets +- invalid policy fails closed and is surfaced through `mc doctor` +- `mc mcp-serve` works as a real stdio subprocess for `initialize`, `tools/list`, `tools/call`, and `approvals/*` +- approval-mediated config mutation works through the real MCP server process, not just through in-process unit helpers + +## Known Gaps + +- no container-backed integration harness yet for repeatable `systemd` service scenarios +- no external desktop MCP client transcript is checked in yet +- real-host smoke validation remains necessary for host-specific paths that containers do not model well diff --git a/docs/runtime-mcp-maturation-plan.md b/docs/runtime-mcp-maturation-plan.md new file mode 100644 index 0000000..86825b9 --- /dev/null +++ b/docs/runtime-mcp-maturation-plan.md @@ -0,0 +1,488 @@ +# Runtime + MCP Maturation Plan + +Snapshot date: 2026-03-23 + +## Purpose + +This document is the canonical working plan for the next Master Control maturation track. + +It translates the current runtime-first architecture into a practical execution order aimed at the next durable product outcome: + +Master Control should become a safe, auditable MCP runtime for Linux host inspection and controlled mutation, with optional planning layers on top. + +This plan does not replace `docs/status.md`. +It does not replace the historical alpha and MVP records. +It defines the next ordered path so the project does not drift between runtime work, MCP work, and optional agent work. + +## Current Baseline + +As of this snapshot, MC already has: + +- a real runtime with typed tools, policy checks, confirmation gates, audit events, SQLite state, and bounded execution +- operator-facing CLI flows for inspection, recommendation handling, managed config changes, and host validation +- an experimental MCP bridge with approval-mediated write flow over the same runtime +- repeatable local validation through lint, typecheck, tests, compile checks, bootstrap validation, and host-profile validation +- optional heuristic, OpenAI, and Ollama planning paths layered on top of the runtime + +The current codebase also still has clear limits: + +- the MCP write path now exists through persisted approvals, but real-client validation and broader contract hardening are still pending +- the runtime still carries chat-oriented orchestration inside `core.runtime` +- a first operator-configurable policy slice now exists through versioned TOML, but broader validation and operator guidance are still pending +- the tool contract does not yet have explicit schema-version governance +- concurrency and multi-call behavior are only partially addressed through SQLite WAL and bounded subprocesses, not through a complete runtime concurrency model +- real-host validation exists, but deeper integration coverage for runtime mutation paths and MCP write flows is not yet the main center of the test strategy + +## Product Goal + +The target product shape is: + +1. MCP is the primary external interface +2. the runtime remains the owner of execution, policy, approval, audit, and state +3. mutating host operations remain bounded, typed, and explicitly approved +4. operator policy can be configured without code edits +5. chat and planner providers remain optional interfaces over the same runtime + +In plain terms: + +- MC should not expose a general shell +- MC should expose a safe operational capability layer +- MCP clients should be able to inspect the host and apply controlled changes through that layer +- every risky step should remain attributable, reviewable, and reversible where possible + +## Execution Order + +The maturation order for this track is: + +1. make the runtime and MCP write path safe, testable, and operator-configurable +2. make the runtime broader and trustworthy enough for daily host administration on a single host +3. reactivate planning and chat as optional accelerators on top of a stable MCP-first core + +This order is intentional. +If MC expands tools or agent UX before the MCP write path, policy model, and runtime validation are hardened, complexity will grow faster than trust. + +## Strategic Invariants + +The following constraints remain fixed through this plan: + +- typed tools before generic shell execution +- explicit approval for mutating or privileged actions +- one runtime, reused by CLI, MCP, and chat +- single-host and local-first first +- SQLite remains acceptable until concurrency or scale proves otherwise +- every external interface must reuse the same policy and audit paths +- optional provider work must not weaken runtime guarantees + +## Phase 1: Controlled MCP Write Path + +Status: + +- Not complete + +Goal: + +- harden the experimental MCP bridge into a controlled read-write MCP interface that still preserves policy, approval, and audit guarantees + +Why this phase is first: + +- this is the heart of the product value +- until MCP can mutate safely through the runtime, the current architecture is only partially realized + +### Workstream 1.1: Runtime-centered integration validation + +Required outcomes: + +- define a first-class integration test matrix for runtime operations +- cover both read and write paths against a real host or controlled containerized target where appropriate +- keep unit coverage, but stop treating it as enough evidence for runtime trust + +Required coverage slices: + +- read-only host inspection: `system_info`, `disk_usage`, `memory_usage`, `top_processes`, `process_to_unit`, `service_status`, `failed_services`, `read_journal` +- managed file reads and writes within policy-managed targets +- backup and restore paths for managed config +- approval-gated service reload and restart +- audit trail persistence for allowed, denied, pending-confirmation, and executed actions +- failure-mode validation for invalid arguments, denied paths, missing confirmation, validator failure, and command timeout paths + +Execution model: + +- keep fast unit tests for pure logic +- add integration suites for runtime contracts +- use a layered validation strategy: + - local fast integration tests with temporary managed targets + - container-backed integration tests for repeatable service/config scenarios + - real-host smoke validation for `systemd` and host-specific paths that containers cannot model cleanly + +Artifacts to add: + +- runtime integration test guide +- container or fixture harness for integration scenarios +- explicit CI split between unit coverage and runtime integration coverage + +### Workstream 1.2: MCP read-write contract with approval + +Current progress: + +- persisted tool approvals are now part of the runtime state model +- the experimental MCP bridge now exposes controlled write requests plus `approvals/list|get|approve|reject` +- approval lifecycle coverage exists at unit and runtime-contract level + +Required outcomes: + +- extend the MCP surface from read-only to controlled read-write +- preserve the same runtime `run_tool` policy and confirmation flow +- make approval explicit and machine-tractable for MCP clients + +Required MCP behavior: + +- read-only tools may execute immediately +- mutating and privileged tools must not execute on the first unsafe request +- MCP responses must return a structured pending-approval payload instead of silently failing or silently executing +- approval must be attributable to a pending action, not just to a repeated raw request + +Recommended approval contract: + +- add explicit pending approval state in MCP responses +- add a small approval workflow surface such as: + - `approvals/list` + - `approvals/get` + - `approvals/approve` + - `approvals/reject` +- bind each pending action to an approval id plus a normalized action envelope +- include operator-facing evidence in the approval payload: + - requested tool + - normalized arguments + - risk class + - policy decision + - execution summary preview + - rollback or follow-up hints when available + +Implementation rule: + +- approval is a runtime concept exposed through MCP +- MCP must not invent its own policy logic + +Interoperability target: + +- `mc mcp-serve` can be used from a standard MCP client +- a client such as Claude Desktop can inspect the host and complete an approval-mediated mutation flow without unrestricted shell access + +### Workstream 1.3: Operator-configurable policy model + +Current progress: + +- versioned TOML policy loading now exists +- a missing policy file falls back to the default safe policy +- invalid policy fails closed and is surfaced through `mc doctor` + +Required outcomes: + +- move policy configuration out of code for operator-owned boundaries +- document the policy model clearly enough that an operator can change allowed targets and approval rules without editing Python + +Recommended configuration shape: + +- versioned TOML policy files under the MC state or config directory +- separate policy domains for: + - tool enablement + - risk overrides only where explicitly allowed + - managed file targets and validators + - service scopes and allowlisted units where needed + - package/network/user-management domains when those tools are added later + +Minimum requirements: + +- a missing policy file should fall back to a safe default +- invalid policy should fail closed +- policy load errors should be visible through `mc doctor` +- policy changes should be auditable +- docs must explain how to add or narrow managed targets safely + +Non-goal for the first slice: + +- do not introduce a full RBAC or multi-user authorization system yet + +### Workstream 1.4: Core/interface cleanup required for MCP-first ownership + +Required outcomes: + +- reduce chat ownership inside `core.runtime` +- keep MCP and CLI thin over reusable runtime methods +- continue shrinking `MasterControlApp` into compatibility-only or retirement-ready status + +Concrete direction: + +- move chat-specific planning and rendering seams out of `core.runtime` +- expose reusable runtime services for: + - tool execution + - approval lifecycle + - recommendation reconciliation + - audit queries + - policy diagnostics + +### Phase 1 exit criteria + +Phase 1 is complete when all of the following are true: + +- runtime integration coverage exists for the main read and write flows +- MCP supports controlled write operations through the same runtime path as CLI +- mutating MCP calls produce explicit approval flows instead of direct execution +- operator policy for managed targets is configurable without code changes +- `mc mcp-serve` can be exercised from a real MCP client for diagnostic and controlled mutation flows +- the audit trail captures pending approval, approval decision, execution result, and failure paths + +## Phase 2: Trusted Daily Host Operations + +Status: + +- Not complete + +Goal: + +- make the runtime reliable enough for daily single-host administration with a broader safe tool surface + +Why this phase is second: + +- tool breadth without trust will create a large unsafe surface +- once Phase 1 exists, new tools can follow a stable contract instead of inventing new behavior + +### Workstream 2.1: Broaden tool coverage by operational domain + +Priority order for new tools: + +1. package management +2. network inspection and bounded network changes +3. user and group inspection before user mutation +4. filesystem and process maintenance tasks that fit the same safety model + +Admission rule for every new tool: + +- typed arguments and typed output +- explicit risk classification +- policy and approval integration +- audit integration +- real integration coverage +- operator documentation +- rollback or bounded blast radius where applicable + +Examples of likely additions: + +- package manager read tools before install/remove tools +- network interface and listening-port inspection before config mutation +- user inspection before account or group changes + +Non-goal: + +- do not add “raw command execution” as a shortcut for breadth + +### Workstream 2.2: Concurrency and state integrity + +Required outcomes: + +- define the runtime concurrency model explicitly before claiming broader MCP service reliability +- prevent concurrent calls from corrupting state or trampling the same managed target + +Current baseline: + +- SQLite uses WAL and a busy timeout +- subprocesses are bounded +- this is directionally good, but not yet a complete concurrency model + +Required runtime guarantees: + +- concurrent reads must remain safe +- concurrent writes to the same managed file must serialize +- concurrent service actions on the same unit must serialize +- approval consumption must be atomic +- recommendation reconciliation and audit writes must not corrupt state under concurrent calls + +Recommended implementation direction: + +- per-target locks for managed files +- per-service locks for service actions +- transaction boundaries around approval state transitions +- explicit idempotency handling for repeated approval or repeated MCP calls +- concurrency tests that intentionally race write and approval paths + +Longer-term note: + +- if MCP evolves from stdio to a long-running local service, queueing and worker isolation may become necessary +- that should be added only after the current single-process concurrency model is explicit and tested + +### Workstream 2.3: Semantic versioning and tool-schema governance + +Required outcomes: + +- define what constitutes a breaking change for the runtime and MCP tool surface +- version tool schemas intentionally instead of allowing accidental drift + +Required governance: + +- stable tool names as long-lived contracts +- explicit schema version metadata for each tool contract +- compatibility rules for adding optional fields, deprecating fields, and changing semantics +- documented release policy connecting runtime versions to tool-schema compatibility + +Recommended release rules: + +- patch: fixes, no schema break +- minor: backward-compatible tool additions or field additions +- major: breaking schema or behavior changes + +Required artifacts: + +- schema compatibility policy document +- contract tests for the exposed MCP tool descriptors +- release checklist updates to verify tool compatibility + +### Workstream 2.4: Production-oriented validation and observability + +Required outcomes: + +- improve confidence that MC can be trusted for daily single-host work +- expand evidence from “passes on the maintainer host” to a more deliberate operational validation matrix + +Required additions: + +- broader host-profile validation scenarios +- failure-injection tests for policy denial, validation failure, lock contention, and restart errors +- operator diagnostics for: + - policy load state + - pending approvals + - lock contention or busy-state reporting + - version and schema compatibility + +### Phase 2 exit criteria + +Phase 2 is complete when all of the following are true: + +- the MCP tool surface covers the main daily host-administration journeys within the project scope +- concurrent calls do not corrupt runtime state or bypass approval guarantees +- tool schemas are versioned and governed by documented compatibility rules +- the runtime has evidence-backed trust for routine single-host operations +- the operator can rely on MC for daily bounded administration without treating it as an experimental shell proxy + +## Phase 3: Optional Planning And Secondary Interfaces + +Status: + +- Partially present, but not the primary focus + +Goal: + +- restore planning and conversational UX as optional value-add layers over a stable MCP-first runtime + +Why this phase is third: + +- the runtime must already be trustworthy on its own +- optional planning should accelerate safe workflows, not define the product core + +### Workstream 3.1: Re-center providers as optional planners + +Required outcomes: + +- keep heuristic, OpenAI, and Ollama support behind the same runtime contract +- ensure provider output can never bypass policy, approval, or audit +- refresh provider docs and tests after MCP-first core work lands + +Required rules: + +- planner output remains structured +- planner proposes, runtime decides +- planner quality may affect convenience, not safety semantics + +### Workstream 3.2: Maintain and improve chat as a secondary interface + +Required outcomes: + +- keep chat usable for operators who want guided flows +- keep chat thin over runtime and planning seams +- avoid reintroducing chat as the architectural center + +Required contract: + +- chat should call into the same approval and audit lifecycle as MCP +- approval guidance should remain explicit +- chat-specific rendering should stay out of the runtime core + +### Phase 3 exit criteria + +Phase 3 is complete when all of the following are true: + +- heuristic, OpenAI, and Ollama paths work as optional planners over the same runtime +- chat remains useful without reclaiming core ownership +- MCP remains the primary external interface in docs, validation, and architecture + +## Cross-Cutting Validation Strategy + +Every phase in this plan should be validated through the same layered evidence model: + +1. unit tests for pure logic and formatting +2. integration tests for runtime contracts +3. MCP interoperability tests against the running server +4. real-host or controlled-environment workflow validation +5. release-facing evidence updates + +The minimum recurring baseline should continue to include: + +- `python3 -m ruff check .` +- `python3 -m mypy src` +- `PYTHONPATH=src python3 -m unittest discover -s tests` +- `PYTHONPATH=src python3 -m pytest -q` +- `python3 -m compileall src` +- `PYTHONPATH=src python3 -m master_control --json doctor` + +Additional validation that should become mandatory for this track: + +- runtime integration suite +- MCP interoperability suite +- concurrency suite for approval and mutation paths +- policy configuration load and failure tests + +## Recommended Immediate Package Queue + +The next clean execution slices are: + +1. MCP write-path design package + define approval lifecycle, MCP method shape, and runtime service seams before broad implementation +2. operator-configurable policy package + externalize managed target and tool policy into a documented versioned policy file +3. runtime integration harness package + add container or host-backed integration tests for read/write runtime flows +4. MCP read-write implementation package + implement the approval-mediated MCP mutation flow +5. concurrency hardening package + add locking, atomic approval state transitions, and contention tests +6. tool-schema governance package + define semantic versioning and contract-compatibility rules +7. safe tool-breadth package + add new admin domains only after the previous packages are stable +8. optional planner reactivation package + re-harden heuristic, OpenAI, Ollama, and chat as secondary layers + +## Sequencing Rules + +To avoid losing the plot, follow these rules: + +1. do not expand tool breadth before approval-mediated MCP writes exist +2. do not claim production trust before concurrency and policy configuration are explicit +3. do not let chat/provider work block MCP-first core work +4. land every new tool with policy, tests, docs, and audit support in the same change family +5. treat schema compatibility as part of the product contract, not as release polish + +## Deferred Until Proven Necessary + +The following items remain intentionally deferred during this track: + +- unrestricted shell access +- multi-user auth and remote control-plane design +- SaaS orchestration +- broad daemon/service-mode expansion before the current MCP and concurrency model is solid +- large UI work + +## Success Definition + +This maturation track is successful when MC can truthfully be described this way: + +Master Control is a production-trustworthy local runtime for bounded Linux host administration on a single host, exposed primarily through MCP, with explicit approval for risky actions, operator-configurable policy, audited execution, stable tool contracts, and optional planner-backed interfaces layered on top. diff --git a/docs/status.md b/docs/status.md index 92458c1..024122c 100644 --- a/docs/status.md +++ b/docs/status.md @@ -1,6 +1,6 @@ # Project Status -Snapshot date: 2026-03-22 +Snapshot date: 2026-03-23 ## Purpose @@ -9,23 +9,21 @@ This document is the authoritative snapshot of project maturity, implemented sco It is not the GitHub landing page. It is not the long-horizon roadmap. -## Maturity +## Current Position -- Stage: late alpha -- Public release posture: GitHub pre-release `v0.1.0a2` is published -- Product posture: MC is being repositioned from an AI-first conversational agent to a runtime-first capability layer with interfaces -- Interface posture: MCP is the main integration interface; the CLI remains the local administration interface; the chat/provider path remains optional -- Install posture: source checkout plus `install.sh`; no `.deb` package yet +- Stage: late alpha, pre-1.0 +- Release posture: public pre-release `v0.1.0a2` is published +- Product posture: runtime-first and MCP-first +- Interface posture: MCP is the main external integration interface; CLI is the local administration interface; chat/providers are optional +- Install posture: source checkout plus `install.sh` - Scope posture: single-host and local-first -- Refactor posture: the runtime-first documentation reset and the first code-boundary slices have landed without resetting the validated alpha baseline -- Historical planning records remain available in `docs/mvp-evolution-plan.md`, `docs/mvp-closeout-backlog.md`, `docs/post-mvp-evolution-plan.md`, and `docs/beta-resume-plan.md` +- Packaging posture: no `.deb` package and no service-mode requirement yet -## Current product statement +## Current Product Statement Master Control is a local-first runtime for controlled Linux host operations, with typed capabilities, approval boundaries, and auditability. -The core value today is not generic AI autonomy. -The core value is the bounded runtime: +The core value today is the bounded runtime: - typed tools - policy and confirmation gates @@ -33,27 +31,28 @@ The core value is the bounded runtime: - config safety - repeatable validation -The MCP interface is the main integration path for that runtime. -The CLI remains the local administration surface. -The conversational and provider-backed path still exists, but it is now understood as an optional interface layered on top of the same runtime. +MCP is the main integration path for that runtime. +CLI remains the local administration surface. +Chat and planner providers remain optional layers on top of the same runtime. -## What is already implemented +## Implemented Today ### Runtime foundation - modular Python monolith with `src/` layout - SQLite bootstrap and local state directory - architecture, security, roadmap, and ADR documentation -- audit trail for plans, executions, provider errors, and recommendation status updates +- audit trail for plans, executions, provider errors, and recommendation status changes - operator bootstrap scripts for install and removal -- repeatable repo-side bootstrap validation harness with per-step logs and cleanup checks -- GitHub CI bootstrap smoke for the non-editable operator path via `scripts/validate_operator_bootstrap.py` -- redacted host-validation bundle generation plus a dedicated intake path for community-submitted reports +- repeatable bootstrap validation harness with per-step logs and cleanup checks +- GitHub CI bootstrap smoke for the non-editable operator path +- host-validation bundle generation and community intake path ### Runtime capabilities - typed inspection and controlled-action tools - policy evaluation before every tool execution +- versioned operator policy loading with safe defaults, fail-closed errors, and doctor diagnostics - explicit confirmation gates for mutating and privileged paths - bounded subprocess execution with `shell=False`, timeouts, and output truncation - managed config read, write, validation, backup, and restore for bounded targets @@ -76,14 +75,14 @@ The conversational and provider-backed path still exists, but it is now understo - `reload_service` - `restart_service` -### Runtime interfaces +### Interfaces -- experimental read-only MCP stdio bridge on top of the runtime +- experimental MCP stdio bridge with approval-mediated write flow on top of the runtime - CLI commands for doctor, tools, audit, sessions, observations, recommendations, direct tool execution, and chat - CLI-integrated `validate-host-profile` command backed by reusable host-validation code - optional `systemd` timer installation for bounded recommendation reconciliation -### Optional agent interface +### Optional planner layer - provider abstraction - heuristic planner for offline development @@ -91,43 +90,39 @@ The conversational and provider-backed path still exists, but it is now understo - Ollama chat adapter for local structured planning - local-first auto provider resolution: `ollama -> openai -> heuristic` - structured execution plans instead of free-form tool calls -- iterative per-turn planning loop that can continue a diagnosis using fresh tool outputs -- provider health reporting in `mc doctor`, including local Ollama endpoint and model availability -- deterministic turn guidance, structured session context, and recommendation rendering helpers - -## Product interpretation of the current baseline +- provider health reporting in `mc doctor` -The validated alpha baseline should now be interpreted as follows: +## What Is True Right Now - MC is already useful as a bounded runtime for Linux inspection and controlled actions -- MCP is the clearest external integration surface for the current product direction -- the CLI is the local operator and administration surface -- the current chat/provider path is an optional interface, not the explanation of the product -- the current codebase still carries more conversational complexity than the runtime-centered product story requires -- the current refactor is meant to correct that mismatch without throwing away validated behavior +- MCP is the main external interface direction, and the current experimental slice already supports approval-mediated write operations +- CLI is still the most complete operator surface today +- chat/provider paths are optional and should not define the product center +- a first operator-configurable policy slice is landed through versioned TOML, but broader validation and operator evidence are still ahead +- concurrency and tool-schema governance work are still ahead of the current baseline -## Current refactor focus +## Active Focus -The active refactor is described in `docs/core-interfaces-refactor-plan.md`. +The current execution focus is defined by `docs/runtime-mcp-maturation-plan.md`. -The near-term objective is: +The next maturity steps are: -1. finish aligning the remaining supporting docs with the runtime-first contract -2. continue reducing legacy compatibility surface around the old app-centric entry points -3. harden the current experimental MCP bridge without expanding its capability surface prematurely -4. perform broader cleanup only after the replacement structure is stable +1. stronger runtime integration coverage for read and write flows +2. real-client MCP validation and contract hardening on top of the new approval flow +3. concurrency hardening and state-integrity guarantees +4. tool-schema compatibility rules and release policy +5. broader tool expansion only after the runtime contract is stable -## What is intentionally out of scope right now +## Intentionally Out Of Scope Right Now -- general package management -- full host security auditing or compliance scanning +- unrestricted shell access - web UI - voice interface - Slack or Discord integrations - multi-user auth and remote deployment - SaaS-style remote control infrastructure -## Validation baseline +## Validation Baseline At this snapshot, the project is validated by: @@ -135,29 +130,44 @@ At this snapshot, the project is validated by: - `python3 -m mypy src` - `PYTHONPATH=src python3 -m unittest discover -s tests` - `PYTHONPATH=src python3 -m pytest -q` +- explicit runtime/MCP integration coverage in `tests/test_runtime_policy_integration.py` and `tests/test_mcp_stdio_integration.py` - `python3 -m compileall src` - manual CLI smoke checks for chat, recommendations, recommendation-triggered actions, and `reconcile-timer render|install|remove` - manual CLI smoke checks for managed config write with validation and backup - manual CLI smoke checks for `process_to_unit` and `failed_services` -- automated coverage for observation freshness and stale-context refresh behavior -- real-host validation of `service_status`, `reload_service`, and `restart_service` on `systemd --user` -- real-host validation of `service_status`, `reload_service`, and `restart_service` on system-scoped units -- real-host validation of managed config read/write/restore on a file under `/managed-configs/` -- repeatable host-profile validation harness available through `mc validate-host-profile` -- repeatable operator bootstrap validation harness available through `python3 scripts/validate_operator_bootstrap.py` +- repeatable host-profile validation through `mc validate-host-profile` +- repeatable operator bootstrap validation through `python3 scripts/validate_operator_bootstrap.py` - GitHub CI bootstrap smoke for the non-editable operator path - clean-environment operator bootstrap validation via `./install.sh`, `mc doctor`, `mc validate-host-profile`, and `./uninstall.sh --purge-state` - packaging sanity check via `python3 -m pip wheel . --no-deps -w /tmp/mc-dist` -- dedicated VPS operator-path validation on 2026-03-20 after installing `python3.13-venv` -- dedicated VPS bootstrap harness rerun on 2026-03-20 with `overall_ok: true` +- dedicated Debian VPS operator-path validation on 2026-03-20 + +## Current Canonical Docs -## Evidence records +- `docs/status.md`: reality snapshot +- `docs/roadmap.md`: concise roadmap +- `docs/runtime-mcp-maturation-plan.md`: canonical execution plan +- `docs/architecture.md`: system structure and boundaries +- `docs/security-model.md`: safety and approval model +- `docs/policy.md`: operator policy guide +- `docs/operator-workflows.md`: bounded operator journeys +- `docs/runtime-integration-testing.md`: runtime and MCP validation guide +- `docs/host-profile-validation.md`: validation harness guide -Primary evidence and release records remain: +## Evidence Records - `docs/alpha-validation-report.md` - `docs/vps-validation-report.md` -- `docs/alpha-release-notes.md` -- `docs/release-candidate-0.1.0a2.md` -- `docs/operator-workflows.md` - `docs/beta-readiness-gate.md` + +## Historical Records + +The following documents remain useful for traceability, but they are not the current product brief or roadmap: + +- `docs/history/alpha-release-notes.md` +- `docs/history/release-candidate-0.1.0a2.md` +- `docs/history/beta-resume-plan.md` +- `docs/history/mvp-plan.md` +- `docs/history/mvp-evolution-plan.md` +- `docs/history/mvp-closeout-backlog.md` +- `docs/history/post-mvp-evolution-plan.md` diff --git a/docs/vps-validation-runbook.md b/docs/vps-validation-runbook.md index 45030fd..d976d66 100644 --- a/docs/vps-validation-runbook.md +++ b/docs/vps-validation-runbook.md @@ -140,7 +140,7 @@ After a successful VPS pass, the repository should be able to update: - `docs/alpha-validation-report.md` - `docs/beta-readiness-gate.md` -- `docs/release-candidate-0.1.0a2.md` +- `docs/history/release-candidate-0.1.0a2.md` - `docs/status.md` ## Release Interpretation diff --git a/src/master_control/agent/session_summary.py b/src/master_control/agent/session_summary.py index d504a7a..364c2f4 100644 --- a/src/master_control/agent/session_summary.py +++ b/src/master_control/agent/session_summary.py @@ -1,3 +1,150 @@ -"""Compatibility re-export for the moved agent session summary helpers.""" +from __future__ import annotations -from master_control.interfaces.agent.session_summary import * # noqa: F401,F403 +from collections import OrderedDict + +from master_control.agent.tool_result_views import build_tool_result_view +from master_control.shared.planning import ExecutionPlan +from master_control.shared.session_summary import parse_session_summary + +SUMMARY_ORDER = ( + "current_focus", + "tracked_unit", + "tracked_scope", + "tracked_path", + "last_intent", + "last_user_request", + "host", + "memory", + "disk", + "service", + "config", + "config_target", + "config_validation", + "last_backup_path", + "logs", + "processes", + "last_assistant_reply", +) +MAX_SUMMARY_LINES = 13 +MAX_VALUE_CHARS = 180 + + +def update_session_summary( + existing_summary: str | None, + *, + user_input: str, + plan: ExecutionPlan | None, + executions: list[dict[str, object]], + assistant_message: str, +) -> str: + summary = parse_session_summary(existing_summary) + + summary["last_user_request"] = _truncate(user_input) + summary["last_assistant_reply"] = _truncate(_first_paragraph(assistant_message)) + + if plan is not None: + summary["last_intent"] = _truncate(plan.intent) + if plan.steps: + first_step = plan.steps[0] + summary["current_focus"] = _truncate(first_step.rationale) + tracked_unit = _extract_tracked_unit(first_step.tool_name, first_step.arguments) + if tracked_unit: + summary["tracked_unit"] = _truncate(tracked_unit) + tracked_scope = _extract_tracked_scope(first_step.tool_name, first_step.arguments) + if tracked_scope: + summary["tracked_scope"] = tracked_scope + tracked_path = _extract_tracked_path(first_step.arguments) + if tracked_path: + summary["tracked_path"] = _truncate(tracked_path) + + for execution in executions: + _apply_execution_summary(summary, execution) + + return _render_summary(summary) + + +def _render_summary(summary: OrderedDict[str, str]) -> str: + ordered_items: list[tuple[str, str]] = [] + seen_keys = set() + + for key in SUMMARY_ORDER: + value = summary.get(key) + if value: + ordered_items.append((key, value)) + seen_keys.add(key) + + for key, value in summary.items(): + if key in seen_keys or not value: + continue + ordered_items.append((key, value)) + + lines = [f"{key}: {value}" for key, value in ordered_items[:MAX_SUMMARY_LINES]] + return "\n".join(lines) + + +def _apply_execution_summary( + summary: OrderedDict[str, str], + execution: dict[str, object], +) -> None: + if not execution.get("ok"): + return + + tool_name = execution.get("tool") + arguments = execution.get("arguments") + result = execution.get("result") + if not isinstance(tool_name, str) or not isinstance(result, dict): + return + resolved_arguments = arguments if isinstance(arguments, dict) else {} + view = build_tool_result_view(tool_name, resolved_arguments, result) + for key, value in view.summary_updates.items(): + if isinstance(value, str) and value: + summary[key] = _truncate(value) + + +def _extract_tracked_unit(tool_name: str, arguments: dict[str, object]) -> str | None: + candidate_keys = ["unit"] + if tool_name in { + "service_status", + "restart_service", + "reload_service", + "failed_services", + }: + candidate_keys.append("name") + for key in candidate_keys: + value = arguments.get(key) + if isinstance(value, str) and value.strip(): + return value.strip() + return None + + +def _extract_tracked_path(arguments: dict[str, object]) -> str | None: + value = arguments.get("path") + if isinstance(value, str) and value.strip(): + return value.strip() + return None + + +def _extract_tracked_scope(tool_name: str, arguments: dict[str, object]) -> str | None: + if tool_name not in { + "service_status", + "restart_service", + "reload_service", + "failed_services", + }: + return None + value = arguments.get("scope") + if isinstance(value, str) and value in {"system", "user"}: + return value + return None + + +def _first_paragraph(text: str) -> str: + lines = [line.strip() for line in text.splitlines() if line.strip()] + return lines[0] if lines else "" + + +def _truncate(value: str) -> str: + normalized = " ".join(value.split()) + if len(normalized) <= MAX_VALUE_CHARS: + return normalized + return normalized[: MAX_VALUE_CHARS - 3].rstrip() + "..." diff --git a/src/master_control/agent/turn_planning.py b/src/master_control/agent/turn_planning.py index ee5f906..ebe7f11 100644 --- a/src/master_control/agent/turn_planning.py +++ b/src/master_control/agent/turn_planning.py @@ -1,3 +1,149 @@ -"""Compatibility re-export for the moved agent turn-planning helpers.""" +from __future__ import annotations -from master_control.interfaces.agent.turn_planning import * # noqa: F401,F403 +import json + +from master_control.agent.tool_result_views import build_tool_result_view +from master_control.core.observations import ObservationFreshness, observation_key_for_tool +from master_control.providers.base import ProviderError, ProviderResponse +from master_control.shared.planning import ExecutionPlan, PlanningDecision + + +def build_turn_planning_prompt( + *, + user_input: str, + iteration: int, + executions: list[dict[str, object]], +) -> str | None: + if not executions: + if iteration == 0: + return "\n".join( + [ + "Current-turn planning guardrails:", + f"- original_user_request: {user_input}", + "- For live host inspection requests, do not answer from memory alone.", + "- If the user asks about current memory, disk, processes, service state, logs, or host metadata, return decision.state=needs_tools and call the matching read-only tool first.", + "- Only return decision.state=complete on the first planning pass when the request is non-operational, already fully answered by the provided context, or safely unsupported.", + ] + ) + return ( + "This is a continuation of the same user request. " + "If enough information is already available, return decision.state=complete with no steps." + ) + + observation_lines = [summarize_execution_for_planner(execution) for execution in executions] + rendered_observations = "\n".join(f"- {line}" for line in observation_lines) + return "\n".join( + [ + "Current-turn planning context:", + f"- original_user_request: {user_input}", + f"- planning_iteration: {iteration + 1}", + "- Return an explicit planner decision: needs_tools, complete, or blocked.", + "- Do not repeat tool calls that already ran in this same turn unless the user explicitly asked to rerun them.", + "- If the observations below are already enough, return decision.state=complete, no steps, and summarize the findings.", + "- If the request cannot continue safely with the available tools, return decision.state=blocked.", + "- If a prior step failed or requires confirmation, do not propose dependent steps.", + "Execution observations:", + rendered_observations, + ] + ) + + +def summarize_execution_for_planner(execution: dict[str, object]) -> str: + tool_name = str(execution.get("tool", "unknown")) + arguments = execution.get("arguments", {}) + argument_text = json.dumps(arguments, sort_keys=True) + if execution.get("pending_confirmation"): + return f"{tool_name}({argument_text}) -> pending_confirmation" + if not execution.get("ok"): + return f"{tool_name}({argument_text}) -> error: {execution.get('error', 'unknown')}" + + result = execution.get("result") + if not isinstance(result, dict): + return f"{tool_name}({argument_text}) -> ok" + return build_tool_result_view(tool_name, _coerce_mapping(arguments), result).planner_summary + + +def _coerce_mapping(value: object) -> dict[str, object]: + if isinstance(value, dict): + return value + return {} + + +def validate_provider_response_for_loop(provider_response: ProviderResponse) -> PlanningDecision: + decision = provider_response.resolved_decision() + has_steps = bool(provider_response.plan and provider_response.plan.steps) + if decision.state == "needs_tools" and not has_steps: + raise ProviderError("Provider declared needs_tools without returning executable steps.") + if decision.state != "needs_tools" and has_steps: + raise ProviderError( + "Provider returned executable steps for a non-needs_tools decision." + ) + return decision + + +def should_continue_planning( + plan: ExecutionPlan | None, + *, + multi_step_intents: set[str], +) -> bool: + if plan is None: + return False + return plan.intent in multi_step_intents + + +def classify_turn_decision( + provider_response: ProviderResponse, + executions: list[dict[str, object]], + *, + multi_step_intents: set[str], +) -> PlanningDecision: + for execution in executions: + if execution.get("pending_confirmation"): + return PlanningDecision( + state="blocked", + kind="awaiting_confirmation", + reason="The next action is waiting for explicit confirmation before it can run.", + ) + for execution in executions: + if not execution.get("ok"): + return PlanningDecision( + state="blocked", + kind="execution_failed", + reason="A tool execution failed before the request could complete safely.", + ) + + plan_decision = provider_response.resolved_decision() + if executions and not should_continue_planning( + provider_response.plan, + multi_step_intents=multi_step_intents, + ): + return PlanningDecision( + state="complete", + kind="evidence_sufficient", + reason="Current-turn evidence is sufficient for the final response.", + ) + if plan_decision.state == "needs_tools" and collect_planned_refresh_keys(provider_response.plan): + return PlanningDecision( + state="needs_tools", + kind="refresh_required", + reason="Fresh host observations are required before the diagnosis can continue.", + ) + return plan_decision + + +def collect_planned_refresh_keys(plan: ExecutionPlan | None) -> list[str]: + if plan is None: + return [] + keys: list[str] = [] + for step in plan.steps: + observation_key = observation_key_for_tool(step.tool_name) + if observation_key is None or observation_key in keys: + continue + keys.append(observation_key) + return keys + + +def collect_stale_observation_keys( + observation_freshness: tuple[ObservationFreshness, ...], +) -> list[str]: + return sorted({item.key for item in observation_freshness if item.stale}) diff --git a/src/master_control/agent/turn_rendering.py b/src/master_control/agent/turn_rendering.py index 33448d7..c8878f1 100644 --- a/src/master_control/agent/turn_rendering.py +++ b/src/master_control/agent/turn_rendering.py @@ -1,3 +1,139 @@ -"""Compatibility re-export for the moved agent turn-rendering helpers.""" +from __future__ import annotations -from master_control.interfaces.agent.turn_rendering import * # noqa: F401,F403 +from typing import cast + +from master_control.agent.tool_result_views import build_tool_result_view +from master_control.core.recommendation_sync import RecommendationSyncResult +from master_control.providers.base import ProviderResponse +from master_control.shared.planning import PlanningDecision + + +def render_chat_response( + provider_response: ProviderResponse, + executions: list[dict[str, object]], +) -> str: + sections = [provider_response.message] + rendered_results = collect_rendered_execution_summaries(executions) + if rendered_results: + sections.extend(rendered_results) + return "\n\n".join(sections) + + +def apply_turn_decision_guidance( + message: str, + executions: list[dict[str, object]], + turn_decision: PlanningDecision, +) -> str: + if turn_decision.state == "blocked" and turn_decision.kind == "awaiting_confirmation": + pending_execution = next( + (execution for execution in executions if execution.get("pending_confirmation")), + None, + ) + if isinstance(pending_execution, dict): + approval = pending_execution.get("approval") + if isinstance(approval, dict): + cli_command = approval.get("cli_command") + chat_command = approval.get("chat_command") + summary = approval.get("summary") + if ( + isinstance(cli_command, str) and cli_command and cli_command in message + ) or ( + isinstance(chat_command, str) and chat_command and chat_command in message + ): + if isinstance(summary, str) and summary.strip(): + return ( + f"{message}\n\nAção pendente de confirmação explícita. " + f"{summary.strip()}" + ) + return f"{message}\n\nAção pendente de confirmação explícita." + command_parts: list[str] = [] + if isinstance(cli_command, str) and cli_command.strip(): + command_parts.append(f"CLI: `{cli_command}`") + if isinstance(chat_command, str) and chat_command.strip(): + command_parts.append(f"Chat: `{chat_command}`") + if command_parts: + prefix = "Ação pendente de confirmação explícita." + if isinstance(summary, str) and summary.strip(): + prefix = f"{prefix} {summary.strip()}" + return f"{message}\n\n{prefix} " + " ".join(command_parts) + return f"{message}\n\nAção pendente de confirmação explícita." + + if turn_decision.state == "blocked" and turn_decision.kind == "missing_safe_tool": + return ( + f"{message}\n\nEste runtime não expõe a tool segura necessária para esse pedido. " + "Use `mc tools` para conferir as capabilities disponíveis." + ) + + if turn_decision.state == "blocked" and turn_decision.kind == "execution_failed": + return ( + f"{message}\n\nO turno foi interrompido porque uma execução falhou antes da conclusão." + ) + + if turn_decision.state == "needs_tools" and turn_decision.kind == "refresh_required": + return f"{message}\n\nO agente ainda precisa atualizar sinais do host antes de concluir." + + return message + + +def collect_rendered_execution_summaries( + executions: list[dict[str, object]], +) -> list[str]: + rendered_results = [render_execution_summary(execution) for execution in executions] + return [item for item in rendered_results if item] + + +def append_recommendations_to_message( + message: str, + sync: RecommendationSyncResult, +) -> str: + highlighted = [*sync.new, *sync.reopened] + if not highlighted: + return message + + lines: list[str] = [] + for item in highlighted[:2]: + line = f"- [#{item['id']} {item['status']}] {item['message']}" + evidence = item.get("evidence_summary") + if isinstance(evidence, str) and evidence.strip(): + line += f" Evidência: {evidence.strip()}." + action_summary = item.get("action_summary") + if isinstance(action_summary, str) and action_summary.strip(): + line += f" Ação sugerida: {action_summary.strip()}" + next_step = item.get("next_step") + if isinstance(next_step, dict): + cli_command = next_step.get("cli_command") + if isinstance(cli_command, str) and cli_command.strip(): + line += f" Próximo passo: `{cli_command.strip()}`" + lines.append(line) + rendered = "\n".join(lines) + return f"{message}\n\nRecomendações da sessão:\n{rendered}" + + +def render_execution_summary(execution: dict[str, object]) -> str: + arguments = _coerce_mapping(execution.get("arguments")) + if not execution.get("ok"): + if execution.get("pending_confirmation"): + approval = execution.get("approval") + if isinstance(approval, dict): + cli_command = approval.get("cli_command") + chat_command = approval.get("chat_command") + summary = approval.get("summary") + prefix = ( + summary.strip() + if isinstance(summary, str) and summary.strip() + else f"A execução de `{execution['tool']}` exige confirmação explícita antes de prosseguir." + ) + return f"{prefix} " f"CLI: `{cli_command}`. Chat: `{chat_command}`." + return f"A execução de `{execution['tool']}` exige confirmação explícita antes de prosseguir." + return f"Falha em `{execution['tool']}`: {execution.get('error', 'erro desconhecido')}." + + tool_name = str(execution["tool"]) + result = execution["result"] + assert isinstance(result, dict) + return build_tool_result_view(tool_name, arguments, result).rendered_summary + + +def _coerce_mapping(value: object) -> dict[str, object]: + if isinstance(value, dict): + return cast(dict[str, object], value) + return {} diff --git a/src/master_control/app.py b/src/master_control/app.py index d4c5d36..d781319 100644 --- a/src/master_control/app.py +++ b/src/master_control/app.py @@ -2,13 +2,10 @@ from typing import Any -from master_control.bootstrap_prereqs import collect_bootstrap_python_diagnostics from master_control.config import Settings from master_control.core.runtime import MasterControlRuntime from master_control.interfaces.agent.chat import MasterControlChatInterface -from master_control.providers.availability import collect_provider_checks from master_control.providers.base import ProviderClient -from master_control.systemd_timer import collect_reconcile_timer_diagnostics class MasterControlApp: @@ -30,47 +27,7 @@ def __getattr__(self, name: str) -> Any: return getattr(self.runtime, name) def doctor(self) -> dict[str, object]: - self.runtime.bootstrap() - provider_checks = collect_provider_checks(self.runtime.settings) - store_diagnostics = self.runtime.store.diagnostics() - timer_diagnostics = collect_reconcile_timer_diagnostics() - bootstrap_python_diagnostics = collect_bootstrap_python_diagnostics("python3") - active_provider_check = dict( - provider_checks.get( - self.runtime.provider.name, - { - "name": self.runtime.provider.name, - "available": True, - "summary": "active provider has no dedicated health probe", - }, - ) - ) - doctor_ok = bool(active_provider_check.get("available", False)) and bool( - store_diagnostics.get("ok", False) - ) - llm_provider_available = any( - bool(provider_checks[name].get("available", False)) for name in ("ollama", "openai") - ) - return { - "ok": doctor_ok, - "state_dir": str(self.runtime.settings.state_dir), - "db_path": str(self.runtime.settings.db_path), - "provider": self.runtime.settings.provider, - "provider_backend": self.runtime.provider.name, - "planner_mode": ( - "llm" if self.runtime.provider.name in {"openai", "ollama"} else "heuristic" - ), - "llm_provider_available": llm_provider_available, - "active_provider_check": active_provider_check, - "provider_checks": provider_checks, - "provider_diagnostics": self.runtime.provider.diagnostics(), - "store_diagnostics": store_diagnostics, - "bootstrap_python_diagnostics": bootstrap_python_diagnostics, - "reconcile_timer_diagnostics": timer_diagnostics, - "audit_event_count": self.runtime.store.count_audit_events(), - "session_count": len(self.runtime.store.list_sessions(limit=10_000)), - "tools": [spec.name for spec in self.runtime.list_tools()], - } + return self.runtime.doctor() def start_chat_session( self, diff --git a/src/master_control/config.py b/src/master_control/config.py index c339180..a5e2cd7 100644 --- a/src/master_control/config.py +++ b/src/master_control/config.py @@ -22,6 +22,7 @@ class Settings: provider: str state_dir: Path db_path: Path + policy_path: Path | None = None openai_api_key: str | None = None openai_base_url: str = DEFAULT_OPENAI_BASE_URL openai_model: str = DEFAULT_OPENAI_MODEL @@ -41,12 +42,14 @@ class Settings: def from_env(cls) -> "Settings": state_dir = Path(os.getenv("MC_STATE_DIR", DEFAULT_STATE_DIR)) db_path = Path(os.getenv("MC_DB_PATH", state_dir / "mc.sqlite3")) + policy_path = Path(os.getenv("MC_POLICY_PATH", state_dir / "policy.toml")) return cls( app_name="master-control", log_level=os.getenv("MC_LOG_LEVEL", "INFO"), provider=os.getenv("MC_PROVIDER", "auto"), state_dir=state_dir, db_path=db_path, + policy_path=policy_path, openai_api_key=os.getenv("OPENAI_API_KEY"), openai_base_url=os.getenv("OPENAI_BASE_URL", DEFAULT_OPENAI_BASE_URL), openai_model=os.getenv("MC_OPENAI_MODEL", DEFAULT_OPENAI_MODEL), @@ -72,6 +75,9 @@ def from_env(cls) -> "Settings": def ensure_directories(self) -> None: self.state_dir.mkdir(parents=True, exist_ok=True) + def resolved_policy_path(self) -> Path: + return self.policy_path or self.state_dir / "policy.toml" + def _parse_bool_env(name: str, default: bool) -> bool: raw_value = os.getenv(name) diff --git a/src/master_control/config_manager.py b/src/master_control/config_manager.py index 363e527..5e039db 100644 --- a/src/master_control/config_manager.py +++ b/src/master_control/config_manager.py @@ -5,6 +5,7 @@ import json import os import tempfile +from collections.abc import Callable from dataclasses import dataclass from datetime import UTC, datetime from pathlib import Path @@ -53,10 +54,12 @@ def __init__( runner: CommandRunner, *, targets: tuple[ConfigTarget, ...] | None = None, + target_loader: Callable[[], tuple[ConfigTarget, ...]] | None = None, ) -> None: self.state_dir = state_dir self.runner = runner - self.targets = targets or build_default_config_targets(state_dir) + self.targets = targets + self.target_loader = target_loader self.backup_root = state_dir / "config-backups" def read_text(self, path_text: str) -> dict[str, Any]: @@ -143,7 +146,7 @@ def resolve_target(self, path_text: str) -> ConfigResolution: candidate = Path(path_text).expanduser() resolved_path = candidate.resolve(strict=False) - for target in self.targets: + for target in self._current_targets(): if target.matches(resolved_path): return ConfigResolution(path=resolved_path, target=target) @@ -266,6 +269,13 @@ def _is_within(self, candidate: Path, root: Path) -> bool: except ValueError: return False + def _current_targets(self) -> tuple[ConfigTarget, ...]: + if self.target_loader is not None: + return self.target_loader() + if self.targets is not None: + return self.targets + return build_default_config_targets(self.state_dir) + def build_default_config_targets(state_dir: Path) -> tuple[ConfigTarget, ...]: managed_root = (state_dir / "managed-configs").resolve(strict=False) diff --git a/src/master_control/core/runtime.py b/src/master_control/core/runtime.py index 8b59b0a..319e301 100644 --- a/src/master_control/core/runtime.py +++ b/src/master_control/core/runtime.py @@ -6,6 +6,22 @@ from pathlib import Path from typing import Any, cast +from master_control.agent.session_summary import update_session_summary +from master_control.agent.turn_planning import ( + build_turn_planning_prompt, + classify_turn_decision, + collect_planned_refresh_keys, + collect_stale_observation_keys, + should_continue_planning, + summarize_execution_for_planner, + validate_provider_response_for_loop, +) +from master_control.agent.turn_rendering import ( + append_recommendations_to_message, + apply_turn_decision_guidance, + collect_rendered_execution_summaries, + render_chat_response, +) from master_control.bootstrap_prereqs import collect_bootstrap_python_diagnostics from master_control.config import Settings from master_control.core.observations import ( @@ -28,22 +44,6 @@ build_recommendation_candidates, sort_recommendations, ) -from master_control.interfaces.agent.session_summary import update_session_summary -from master_control.interfaces.agent.turn_planning import ( - build_turn_planning_prompt, - classify_turn_decision, - collect_planned_refresh_keys, - collect_stale_observation_keys, - should_continue_planning, - summarize_execution_for_planner, - validate_provider_response_for_loop, -) -from master_control.interfaces.agent.turn_rendering import ( - append_recommendations_to_message, - apply_turn_decision_guidance, - collect_rendered_execution_summaries, - render_chat_response, -) from master_control.policy.engine import PolicyEngine from master_control.providers.availability import collect_provider_checks from master_control.providers.base import ( @@ -136,6 +136,11 @@ def _coerce_mapping(value: object) -> dict[str, object]: return {} +def _has_explicit_approval_id(context_payload: dict[str, object]) -> bool: + value = context_payload.get("approval_id") + return isinstance(value, int) and not isinstance(value, bool) + + class MasterControlRuntime: def __init__( self, @@ -145,9 +150,15 @@ def __init__( ) -> None: self.settings = settings self.store = SessionStore(settings.db_path) - self.policy = PolicyEngine() + self.policy = PolicyEngine( + state_dir=settings.state_dir, + policy_path=settings.resolved_policy_path(), + ) self.provider = provider_override or build_provider(settings) - self.registry: ToolRegistry = build_default_registry(settings.state_dir) + self.registry: ToolRegistry = build_default_registry( + settings.state_dir, + config_target_loader=self.policy.config_targets, + ) self.chat_session_id: int | None = None self.previous_provider_response_id: str | None = None @@ -159,6 +170,7 @@ def doctor(self) -> dict[str, object]: self.bootstrap() provider_checks = collect_provider_checks(self.settings) store_diagnostics = self.store.diagnostics() + policy_diagnostics = self.policy.diagnostics() timer_diagnostics = collect_reconcile_timer_diagnostics() bootstrap_python_diagnostics = collect_bootstrap_python_diagnostics("python3") active_provider_check = dict( @@ -171,8 +183,10 @@ def doctor(self) -> dict[str, object]: }, ) ) - doctor_ok = bool(active_provider_check.get("available", False)) and bool( - store_diagnostics.get("ok", False) + doctor_ok = ( + bool(active_provider_check.get("available", False)) + and bool(store_diagnostics.get("ok", False)) + and bool(policy_diagnostics.get("ok", False)) ) llm_provider_available = any( bool(provider_checks[name].get("available", False)) for name in ("ollama", "openai") @@ -189,6 +203,7 @@ def doctor(self) -> dict[str, object]: "provider_checks": provider_checks, "provider_diagnostics": self.provider.diagnostics(), "store_diagnostics": store_diagnostics, + "policy_diagnostics": policy_diagnostics, "bootstrap_python_diagnostics": bootstrap_python_diagnostics, "reconcile_timer_diagnostics": timer_diagnostics, "audit_event_count": self.store.count_audit_events(), @@ -203,6 +218,126 @@ def list_audit_events(self, limit: int = 20) -> list[dict[str, object]]: self.bootstrap() return self.store.list_audit_events(limit=limit) + def list_tool_approvals( + self, + *, + status: str | None = None, + limit: int = 100, + ) -> dict[str, object]: + self.bootstrap() + return { + "status_filter": status, + "approvals": self.store.list_tool_approvals(status=status, limit=limit), + } + + def get_tool_approval(self, approval_id: int) -> dict[str, object]: + self.bootstrap() + approval = self.store.get_tool_approval(approval_id) + if approval is None: + raise ValueError(f"Unknown approval_id: {approval_id}") + return approval + + def reject_tool_approval(self, approval_id: int) -> dict[str, object]: + self.bootstrap() + approval = self.store.reject_tool_approval(approval_id) + if approval is None: + existing = self.store.get_tool_approval(approval_id) + if existing is None: + raise ValueError(f"Unknown approval_id: {approval_id}") + raise ValueError( + f"Approval {approval_id} is in status '{existing['status']}' and cannot be rejected." + ) + self.store.record_audit_event( + "tool_approval_rejected", + { + "approval_id": approval_id, + "tool": approval["tool"], + "risk": approval["risk"], + "audit_context": approval["audit_context"], + }, + ) + return approval + + def approve_tool_approval(self, approval_id: int) -> dict[str, object]: + self.bootstrap() + approval = self.store.claim_tool_approval(approval_id) + if approval is None: + existing = self.store.get_tool_approval(approval_id) + if existing is None: + raise ValueError(f"Unknown approval_id: {approval_id}") + raise ValueError( + f"Approval {approval_id} is in status '{existing['status']}' and cannot run." + ) + + tool_name = approval.get("tool") + arguments = approval.get("arguments") + audit_context = approval.get("audit_context") + if not isinstance(tool_name, str) or not tool_name: + raise ValueError(f"Approval {approval_id} is missing a valid tool name.") + if not isinstance(arguments, dict): + arguments = {} + if not isinstance(audit_context, dict): + audit_context = {} + + execution_context = dict(audit_context) + execution_context["approval_id"] = approval_id + + try: + execution = self.run_tool( + tool_name, + dict(arguments), + confirmed=True, + audit_context=execution_context, + ) + except Exception as exc: + failure_payload = { + "ok": False, + "tool": tool_name, + "arguments": dict(arguments), + **execution_context, + "error": str(exc), + } + finalized = self.store.finish_tool_approval( + approval_id, + status="failed", + execution_payload=failure_payload, + ) + self.store.record_audit_event( + "tool_approval_execution", + { + "approval_id": approval_id, + "tool": tool_name, + "ok": False, + "error": str(exc), + }, + ) + if finalized is None: + raise RuntimeError( + f"Tool approval {approval_id} could not be finalized after failure." + ) from exc + raise + + finalized = self.store.finish_tool_approval( + approval_id, + status="completed" if bool(execution.get("ok", False)) else "failed", + execution_payload=execution, + ) + if finalized is None: + raise RuntimeError(f"Tool approval {approval_id} could not be finalized.") + self.store.record_audit_event( + "tool_approval_execution", + { + "approval_id": approval_id, + "tool": tool_name, + "ok": execution.get("ok", False), + "pending_confirmation": execution.get("pending_confirmation", False), + }, + ) + return { + "approval": finalized, + "execution": execution, + } + def list_sessions(self, limit: int = 20) -> list[dict[str, object]]: self.bootstrap() sessions = self.store.list_sessions(limit=limit) @@ -540,7 +675,17 @@ def run_tool( self.store.record_audit_event("tool_execution", payload) return payload - decision = self.policy.evaluate(tool.spec) + claimed_approval: dict[str, object] | None = None + if confirmed and not _has_explicit_approval_id(context_payload): + claimed_approval = self.store.claim_latest_matching_tool_approval( + tool_name=tool.spec.name, + arguments=argument_payload, + audit_context=context_payload, + ) + if claimed_approval is not None: + context_payload["approval_id"] = claimed_approval["id"] + + decision = self.policy.evaluate(tool.spec, argument_payload) audit_base = { "tool": tool.spec.name, "risk": tool.spec.risk.value, @@ -553,21 +698,28 @@ def run_tool( payload = { "ok": False, **audit_base, - "error": "Policy denied tool execution.", + "error": decision.reason, } + self._finalize_claimed_tool_approval( + claimed_approval, + status="failed", + execution_payload=payload, + ) self.store.record_audit_event("tool_execution", payload) return payload if decision.needs_confirmation and not confirmed: + approval = self._create_tool_approval( + tool_name=tool.spec.name, + risk=tool.spec.risk.value, + arguments=argument_payload, + context_payload=context_payload, + ) payload = { "ok": False, **audit_base, "pending_confirmation": True, - "approval": self._build_approval_payload( - tool.spec.name, - argument_payload, - context_payload, - ), + "approval": approval, "error": "Tool requires explicit confirmation before execution.", } self.store.record_audit_event("tool_execution", payload) @@ -581,6 +733,11 @@ def run_tool( **audit_base, "error": str(exc), } + self._finalize_claimed_tool_approval( + claimed_approval, + status="failed", + execution_payload=payload, + ) self.store.record_audit_event("tool_execution", payload) return payload @@ -600,6 +757,11 @@ def run_tool( arguments=argument_payload, result=result, ) + self._finalize_claimed_tool_approval( + claimed_approval, + status="completed", + execution_payload=payload, + ) self.store.record_audit_event( "tool_execution", { @@ -658,6 +820,70 @@ def _build_approval_payload( "summary": f"Confirme a execução de {_describe_tool_target(tool_name, arguments)}.", } + def _create_tool_approval( + self, + *, + tool_name: str, + risk: str, + arguments: dict[str, object], + context_payload: dict[str, object], + ) -> dict[str, object]: + rendered = self._build_approval_payload(tool_name, arguments, context_payload) + approval = self.store.create_tool_approval( + tool_name=tool_name, + risk=risk, + arguments=arguments, + audit_context=context_payload, + summary=str(rendered["summary"]), + cli_command=str(rendered["cli_command"]), + chat_command=str(rendered["chat_command"]), + ) + return self._format_tool_approval(approval, required=True) + + def _format_tool_approval( + self, + approval: dict[str, object], + *, + required: bool = False, + ) -> dict[str, object]: + payload = { + "id": approval["id"], + "tool": approval["tool"], + "risk": approval["risk"], + "arguments": approval["arguments"], + "audit_context": approval["audit_context"], + "summary": approval["summary"], + "cli_command": approval["cli_command"], + "chat_command": approval["chat_command"], + "status": approval["status"], + "execution": approval["execution"], + "error": approval["error"], + "created_at": approval["created_at"], + "updated_at": approval["updated_at"], + "resolved_at": approval["resolved_at"], + } + if required: + payload["required"] = True + return payload + + def _finalize_claimed_tool_approval( + self, + approval: dict[str, object] | None, + *, + status: str, + execution_payload: dict[str, object], + ) -> None: + if approval is None: + return + approval_id = approval.get("id") + if not isinstance(approval_id, int) or isinstance(approval_id, bool): + return + self.store.finish_tool_approval( + approval_id, + status=status, + execution_payload=execution_payload, + ) + def _build_recommendation_commands(self, recommendation_id: int) -> dict[str, str]: return { "cli_accept_command": f"mc recommendation {recommendation_id} accepted", diff --git a/src/master_control/interfaces/agent/session_summary.py b/src/master_control/interfaces/agent/session_summary.py index 364c2f4..9320f90 100644 --- a/src/master_control/interfaces/agent/session_summary.py +++ b/src/master_control/interfaces/agent/session_summary.py @@ -1,150 +1,3 @@ -from __future__ import annotations +"""Compatibility re-export for agent session summary helpers.""" -from collections import OrderedDict - -from master_control.agent.tool_result_views import build_tool_result_view -from master_control.shared.planning import ExecutionPlan -from master_control.shared.session_summary import parse_session_summary - -SUMMARY_ORDER = ( - "current_focus", - "tracked_unit", - "tracked_scope", - "tracked_path", - "last_intent", - "last_user_request", - "host", - "memory", - "disk", - "service", - "config", - "config_target", - "config_validation", - "last_backup_path", - "logs", - "processes", - "last_assistant_reply", -) -MAX_SUMMARY_LINES = 13 -MAX_VALUE_CHARS = 180 - - -def update_session_summary( - existing_summary: str | None, - *, - user_input: str, - plan: ExecutionPlan | None, - executions: list[dict[str, object]], - assistant_message: str, -) -> str: - summary = parse_session_summary(existing_summary) - - summary["last_user_request"] = _truncate(user_input) - summary["last_assistant_reply"] = _truncate(_first_paragraph(assistant_message)) - - if plan is not None: - summary["last_intent"] = _truncate(plan.intent) - if plan.steps: - first_step = plan.steps[0] - summary["current_focus"] = _truncate(first_step.rationale) - tracked_unit = _extract_tracked_unit(first_step.tool_name, first_step.arguments) - if tracked_unit: - summary["tracked_unit"] = _truncate(tracked_unit) - tracked_scope = _extract_tracked_scope(first_step.tool_name, first_step.arguments) - if tracked_scope: - summary["tracked_scope"] = tracked_scope - tracked_path = _extract_tracked_path(first_step.arguments) - if tracked_path: - summary["tracked_path"] = _truncate(tracked_path) - - for execution in executions: - _apply_execution_summary(summary, execution) - - return _render_summary(summary) - - -def _render_summary(summary: OrderedDict[str, str]) -> str: - ordered_items: list[tuple[str, str]] = [] - seen_keys = set() - - for key in SUMMARY_ORDER: - value = summary.get(key) - if value: - ordered_items.append((key, value)) - seen_keys.add(key) - - for key, value in summary.items(): - if key in seen_keys or not value: - continue - ordered_items.append((key, value)) - - lines = [f"{key}: {value}" for key, value in ordered_items[:MAX_SUMMARY_LINES]] - return "\n".join(lines) - - -def _apply_execution_summary( - summary: OrderedDict[str, str], - execution: dict[str, object], -) -> None: - if not execution.get("ok"): - return - - tool_name = execution.get("tool") - arguments = execution.get("arguments") - result = execution.get("result") - if not isinstance(tool_name, str) or not isinstance(result, dict): - return - resolved_arguments = arguments if isinstance(arguments, dict) else {} - view = build_tool_result_view(tool_name, resolved_arguments, result) - for key, value in view.summary_updates.items(): - if isinstance(value, str) and value: - summary[key] = _truncate(value) - - -def _extract_tracked_unit(tool_name: str, arguments: dict[str, object]) -> str | None: - candidate_keys = ["unit"] - if tool_name in { - "service_status", - "restart_service", - "reload_service", - "failed_services", - }: - candidate_keys.append("name") - for key in candidate_keys: - value = arguments.get(key) - if isinstance(value, str) and value.strip(): - return value.strip() - return None - - -def _extract_tracked_path(arguments: dict[str, object]) -> str | None: - value = arguments.get("path") - if isinstance(value, str) and value.strip(): - return value.strip() - return None - - -def _extract_tracked_scope(tool_name: str, arguments: dict[str, object]) -> str | None: - if tool_name not in { - "service_status", - "restart_service", - "reload_service", - "failed_services", - }: - return None - value = arguments.get("scope") - if isinstance(value, str) and value in {"system", "user"}: - return value - return None - - -def _first_paragraph(text: str) -> str: - lines = [line.strip() for line in text.splitlines() if line.strip()] - return lines[0] if lines else "" - - -def _truncate(value: str) -> str: - normalized = " ".join(value.split()) - if len(normalized) <= MAX_VALUE_CHARS: - return normalized - return normalized[: MAX_VALUE_CHARS - 3].rstrip() + "..." +from master_control.agent.session_summary import * # noqa: F401,F403 diff --git a/src/master_control/interfaces/agent/turn_planning.py b/src/master_control/interfaces/agent/turn_planning.py index ebe7f11..0e44d33 100644 --- a/src/master_control/interfaces/agent/turn_planning.py +++ b/src/master_control/interfaces/agent/turn_planning.py @@ -1,149 +1,3 @@ -from __future__ import annotations +"""Compatibility re-export for agent turn-planning helpers.""" -import json - -from master_control.agent.tool_result_views import build_tool_result_view -from master_control.core.observations import ObservationFreshness, observation_key_for_tool -from master_control.providers.base import ProviderError, ProviderResponse -from master_control.shared.planning import ExecutionPlan, PlanningDecision - - -def build_turn_planning_prompt( - *, - user_input: str, - iteration: int, - executions: list[dict[str, object]], -) -> str | None: - if not executions: - if iteration == 0: - return "\n".join( - [ - "Current-turn planning guardrails:", - f"- original_user_request: {user_input}", - "- For live host inspection requests, do not answer from memory alone.", - "- If the user asks about current memory, disk, processes, service state, logs, or host metadata, return decision.state=needs_tools and call the matching read-only tool first.", - "- Only return decision.state=complete on the first planning pass when the request is non-operational, already fully answered by the provided context, or safely unsupported.", - ] - ) - return ( - "This is a continuation of the same user request. " - "If enough information is already available, return decision.state=complete with no steps." - ) - - observation_lines = [summarize_execution_for_planner(execution) for execution in executions] - rendered_observations = "\n".join(f"- {line}" for line in observation_lines) - return "\n".join( - [ - "Current-turn planning context:", - f"- original_user_request: {user_input}", - f"- planning_iteration: {iteration + 1}", - "- Return an explicit planner decision: needs_tools, complete, or blocked.", - "- Do not repeat tool calls that already ran in this same turn unless the user explicitly asked to rerun them.", - "- If the observations below are already enough, return decision.state=complete, no steps, and summarize the findings.", - "- If the request cannot continue safely with the available tools, return decision.state=blocked.", - "- If a prior step failed or requires confirmation, do not propose dependent steps.", - "Execution observations:", - rendered_observations, - ] - ) - - -def summarize_execution_for_planner(execution: dict[str, object]) -> str: - tool_name = str(execution.get("tool", "unknown")) - arguments = execution.get("arguments", {}) - argument_text = json.dumps(arguments, sort_keys=True) - if execution.get("pending_confirmation"): - return f"{tool_name}({argument_text}) -> pending_confirmation" - if not execution.get("ok"): - return f"{tool_name}({argument_text}) -> error: {execution.get('error', 'unknown')}" - - result = execution.get("result") - if not isinstance(result, dict): - return f"{tool_name}({argument_text}) -> ok" - return build_tool_result_view(tool_name, _coerce_mapping(arguments), result).planner_summary - - -def _coerce_mapping(value: object) -> dict[str, object]: - if isinstance(value, dict): - return value - return {} - - -def validate_provider_response_for_loop(provider_response: ProviderResponse) -> PlanningDecision: - decision = provider_response.resolved_decision() - has_steps = bool(provider_response.plan and provider_response.plan.steps) - if decision.state == "needs_tools" and not has_steps: - raise ProviderError("Provider declared needs_tools without returning executable steps.") - if decision.state != "needs_tools" and has_steps: - raise ProviderError( - "Provider returned executable steps for a non-needs_tools decision." - ) - return decision - - -def should_continue_planning( - plan: ExecutionPlan | None, - *, - multi_step_intents: set[str], -) -> bool: - if plan is None: - return False - return plan.intent in multi_step_intents - - -def classify_turn_decision( - provider_response: ProviderResponse, - executions: list[dict[str, object]], - *, - multi_step_intents: set[str], -) -> PlanningDecision: - for execution in executions: - if execution.get("pending_confirmation"): - return PlanningDecision( - state="blocked", - kind="awaiting_confirmation", - reason="The next action is waiting for explicit confirmation before it can run.", - ) - for execution in executions: - if not execution.get("ok"): - return PlanningDecision( - state="blocked", - kind="execution_failed", - reason="A tool execution failed before the request could complete safely.", - ) - - plan_decision = provider_response.resolved_decision() - if executions and not should_continue_planning( - provider_response.plan, - multi_step_intents=multi_step_intents, - ): - return PlanningDecision( - state="complete", - kind="evidence_sufficient", - reason="Current-turn evidence is sufficient for the final response.", - ) - if plan_decision.state == "needs_tools" and collect_planned_refresh_keys(provider_response.plan): - return PlanningDecision( - state="needs_tools", - kind="refresh_required", - reason="Fresh host observations are required before the diagnosis can continue.", - ) - return plan_decision - - -def collect_planned_refresh_keys(plan: ExecutionPlan | None) -> list[str]: - if plan is None: - return [] - keys: list[str] = [] - for step in plan.steps: - observation_key = observation_key_for_tool(step.tool_name) - if observation_key is None or observation_key in keys: - continue - keys.append(observation_key) - return keys - - -def collect_stale_observation_keys( - observation_freshness: tuple[ObservationFreshness, ...], -) -> list[str]: - return sorted({item.key for item in observation_freshness if item.stale}) +from master_control.agent.turn_planning import * # noqa: F401,F403 diff --git a/src/master_control/interfaces/agent/turn_rendering.py b/src/master_control/interfaces/agent/turn_rendering.py index 002a54c..e330630 100644 --- a/src/master_control/interfaces/agent/turn_rendering.py +++ b/src/master_control/interfaces/agent/turn_rendering.py @@ -1,145 +1,3 @@ -from __future__ import annotations +"""Compatibility re-export for agent turn-rendering helpers.""" -from typing import cast - -from master_control.agent.tool_result_views import build_tool_result_view -from master_control.core.recommendation_sync import RecommendationSyncResult -from master_control.providers.base import ProviderResponse -from master_control.shared.planning import PlanningDecision - - -def render_chat_response( - provider_response: ProviderResponse, - executions: list[dict[str, object]], -) -> str: - sections = [provider_response.message] - rendered_results = collect_rendered_execution_summaries(executions) - if rendered_results: - sections.extend(rendered_results) - return "\n\n".join(sections) - - -def apply_turn_decision_guidance( - message: str, - executions: list[dict[str, object]], - turn_decision: PlanningDecision, -) -> str: - if turn_decision.state == "blocked" and turn_decision.kind == "awaiting_confirmation": - pending_execution = next( - (execution for execution in executions if execution.get("pending_confirmation")), - None, - ) - if isinstance(pending_execution, dict): - approval = pending_execution.get("approval") - if isinstance(approval, dict): - cli_command = approval.get("cli_command") - chat_command = approval.get("chat_command") - summary = approval.get("summary") - if ( - isinstance(cli_command, str) and cli_command and cli_command in message - ) or ( - isinstance(chat_command, str) and chat_command and chat_command in message - ): - if isinstance(summary, str) and summary.strip(): - return ( - f"{message}\n\nAção pendente de confirmação explícita. " - f"{summary.strip()}" - ) - return f"{message}\n\nAção pendente de confirmação explícita." - command_parts: list[str] = [] - if isinstance(cli_command, str) and cli_command.strip(): - command_parts.append(f"CLI: `{cli_command}`") - if isinstance(chat_command, str) and chat_command.strip(): - command_parts.append(f"Chat: `{chat_command}`") - if command_parts: - prefix = "Ação pendente de confirmação explícita." - if isinstance(summary, str) and summary.strip(): - prefix = f"{prefix} {summary.strip()}" - return ( - f"{message}\n\n{prefix} " - + " ".join(command_parts) - ) - return f"{message}\n\nAção pendente de confirmação explícita." - - if turn_decision.state == "blocked" and turn_decision.kind == "missing_safe_tool": - return ( - f"{message}\n\nEste runtime não expõe a tool segura necessária para esse pedido. " - "Use `mc tools` para conferir as capabilities disponíveis." - ) - - if turn_decision.state == "blocked" and turn_decision.kind == "execution_failed": - return ( - f"{message}\n\nO turno foi interrompido porque uma execução falhou antes da conclusão." - ) - - if turn_decision.state == "needs_tools" and turn_decision.kind == "refresh_required": - return f"{message}\n\nO agente ainda precisa atualizar sinais do host antes de concluir." - - return message - - -def collect_rendered_execution_summaries( - executions: list[dict[str, object]], -) -> list[str]: - rendered_results = [render_execution_summary(execution) for execution in executions] - return [item for item in rendered_results if item] - - -def append_recommendations_to_message( - message: str, - sync: RecommendationSyncResult, -) -> str: - highlighted = [*sync.new, *sync.reopened] - if not highlighted: - return message - - lines: list[str] = [] - for item in highlighted[:2]: - line = f"- [#{item['id']} {item['status']}] {item['message']}" - evidence = item.get("evidence_summary") - if isinstance(evidence, str) and evidence.strip(): - line += f" Evidência: {evidence.strip()}." - action_summary = item.get("action_summary") - if isinstance(action_summary, str) and action_summary.strip(): - line += f" Ação sugerida: {action_summary.strip()}" - next_step = item.get("next_step") - if isinstance(next_step, dict): - cli_command = next_step.get("cli_command") - if isinstance(cli_command, str) and cli_command.strip(): - line += f" Próximo passo: `{cli_command.strip()}`" - lines.append(line) - rendered = "\n".join(lines) - return f"{message}\n\nRecomendações da sessão:\n{rendered}" - - -def render_execution_summary(execution: dict[str, object]) -> str: - arguments = _coerce_mapping(execution.get("arguments")) - if not execution.get("ok"): - if execution.get("pending_confirmation"): - approval = execution.get("approval") - if isinstance(approval, dict): - cli_command = approval.get("cli_command") - chat_command = approval.get("chat_command") - summary = approval.get("summary") - prefix = ( - summary.strip() - if isinstance(summary, str) and summary.strip() - else f"A execução de `{execution['tool']}` exige confirmação explícita antes de prosseguir." - ) - return ( - f"{prefix} " - f"CLI: `{cli_command}`. Chat: `{chat_command}`." - ) - return f"A execução de `{execution['tool']}` exige confirmação explícita antes de prosseguir." - return f"Falha em `{execution['tool']}`: {execution.get('error', 'erro desconhecido')}." - - tool_name = str(execution["tool"]) - result = execution["result"] - assert isinstance(result, dict) - return build_tool_result_view(tool_name, arguments, result).rendered_summary - - -def _coerce_mapping(value: object) -> dict[str, object]: - if isinstance(value, dict): - return cast(dict[str, object], value) - return {} +from master_control.agent.turn_rendering import * # noqa: F401,F403 diff --git a/src/master_control/interfaces/cli/entrypoint.py b/src/master_control/interfaces/cli/entrypoint.py index 6240fac..e765ce6 100644 --- a/src/master_control/interfaces/cli/entrypoint.py +++ b/src/master_control/interfaces/cli/entrypoint.py @@ -232,7 +232,7 @@ def build_parser() -> argparse.ArgumentParser: subparsers.add_parser( "mcp-serve", - help="Run the experimental read-only MCP interface over runtime capabilities.", + help="Run the experimental MCP interface with approval-mediated write operations.", ) return parser @@ -462,6 +462,8 @@ def main(argv: Sequence[str] | None = None) -> int: f"{store_status} journal={store_diagnostics['journal_mode']} " f"integrity={store_diagnostics['integrity_check']}" ) + policy_diagnostics = cast(dict[str, Any], doctor_payload["policy_diagnostics"]) + print(f"policy: {policy_diagnostics['summary']}") bootstrap_diagnostics = cast( dict[str, Any], doctor_payload["bootstrap_python_diagnostics"], diff --git a/src/master_control/interfaces/mcp/server.py b/src/master_control/interfaces/mcp/server.py index 8707ca7..126e175 100644 --- a/src/master_control/interfaces/mcp/server.py +++ b/src/master_control/interfaces/mcp/server.py @@ -9,9 +9,20 @@ from master_control.config import Settings from master_control.core.runtime import MasterControlRuntime from master_control.logging_utils import configure_logging -from master_control.tools.base import RiskLevel -READ_ONLY_METHODS = frozenset({"initialize", "ping", "doctor", "tools/list", "tools/call"}) +SUPPORTED_METHODS = frozenset( + { + "initialize", + "ping", + "doctor", + "tools/list", + "tools/call", + "approvals/list", + "approvals/get", + "approvals/approve", + "approvals/reject", + } +) @dataclass(frozen=True, slots=True) @@ -27,7 +38,7 @@ def as_dict(self) -> dict[str, str]: class MasterControlMCPServer: - """Experimental stdio MCP interface for read-only runtime capabilities.""" + """Experimental stdio MCP interface with approval-mediated write operations.""" def __init__(self, runtime: MasterControlRuntime) -> None: self.runtime = runtime @@ -71,7 +82,7 @@ def _handle_line(self, line: str) -> dict[str, object]: request_id=request_id, error=MCPError("invalid_request", "Request is missing a string method."), ) - if method not in READ_ONLY_METHODS: + if method not in SUPPORTED_METHODS: return self._error_response( request_id=request_id, error=MCPError("unsupported_method", f"Unsupported method: {method}"), @@ -106,9 +117,13 @@ def _dispatch(self, method: str, params: object) -> dict[str, object]: }, "capabilities": { "tools": { - "mode": "read_only", + "mode": "approval_controlled", "count": len(self._list_exposed_tools()), - } + }, + "approvals": { + "mode": "explicit", + "statuses": ["pending", "executing", "completed", "failed", "rejected"], + }, }, } if method == "ping": @@ -125,24 +140,42 @@ def _dispatch(self, method: str, params: object) -> dict[str, object]: raise ValueError("tools/call requires params.name.") if not isinstance(tool_arguments, dict): raise ValueError("tools/call params.arguments must be an object.") - spec = self.runtime.registry.get(tool_name).spec - if spec.risk is not RiskLevel.READ_ONLY: - raise ValueError( - f"Tool '{tool_name}' is not exposed through the read-only MCP interface." - ) return self.runtime.run_tool( tool_name, tool_arguments, audit_context={"source": "mcp_stdio"}, ) + if method == "approvals/list": + arguments = params if isinstance(params, dict) else {} + status = arguments.get("status") + limit = arguments.get("limit", 100) + if status is not None and not isinstance(status, str): + raise ValueError("approvals/list params.status must be a string when provided.") + if not isinstance(limit, int) or isinstance(limit, bool) or limit <= 0: + raise ValueError("approvals/list params.limit must be a positive integer.") + return self.runtime.list_tool_approvals(status=status, limit=limit) + if method == "approvals/get": + arguments = params if isinstance(params, dict) else {} + approval_id = arguments.get("id") + if not isinstance(approval_id, int) or isinstance(approval_id, bool): + raise ValueError("approvals/get requires params.id as an integer.") + return self.runtime.get_tool_approval(approval_id) + if method == "approvals/approve": + arguments = params if isinstance(params, dict) else {} + approval_id = arguments.get("id") + if not isinstance(approval_id, int) or isinstance(approval_id, bool): + raise ValueError("approvals/approve requires params.id as an integer.") + return self.runtime.approve_tool_approval(approval_id) + if method == "approvals/reject": + arguments = params if isinstance(params, dict) else {} + approval_id = arguments.get("id") + if not isinstance(approval_id, int) or isinstance(approval_id, bool): + raise ValueError("approvals/reject requires params.id as an integer.") + return self.runtime.reject_tool_approval(approval_id) raise ValueError(f"Unsupported method: {method}") def _list_exposed_tools(self) -> list[dict[str, object]]: - return [ - spec.as_dict() - for spec in self.runtime.list_tools() - if spec.risk is RiskLevel.READ_ONLY - ] + return [spec.as_dict() for spec in self.runtime.list_tools()] def _error_response( self, @@ -160,7 +193,7 @@ def _error_response( def build_parser() -> argparse.ArgumentParser: parser = argparse.ArgumentParser( prog="mc-mcp", - description="Run the experimental Master Control MCP interface.", + description="Run the experimental Master Control MCP interface with approval flow.", ) return parser diff --git a/src/master_control/policy/config.py b/src/master_control/policy/config.py new file mode 100644 index 0000000..6b852c6 --- /dev/null +++ b/src/master_control/policy/config.py @@ -0,0 +1,282 @@ +from __future__ import annotations + +import tomllib +from dataclasses import dataclass +from pathlib import Path + +from master_control.config_manager import ConfigTarget, build_default_config_targets + +SUPPORTED_POLICY_VERSION = 1 +SUPPORTED_VALIDATORS = frozenset({"ini_parse", "json_parse", "command"}) +SUPPORTED_SCOPES = frozenset({"system", "user"}) + + +@dataclass(frozen=True, slots=True) +class ToolPolicyRule: + enabled: bool | None = None + require_confirmation: bool = False + allowed_scopes: tuple[str, ...] = () + service_patterns: tuple[str, ...] = () + + def as_dict(self) -> dict[str, object]: + payload: dict[str, object] = { + "require_confirmation": self.require_confirmation, + } + if self.enabled is not None: + payload["enabled"] = self.enabled + if self.allowed_scopes: + payload["allowed_scopes"] = list(self.allowed_scopes) + if self.service_patterns: + payload["service_patterns"] = list(self.service_patterns) + return payload + + +@dataclass(frozen=True, slots=True) +class LoadedPolicy: + path: Path + exists: bool + version: int + using_default: bool + error: str | None + tool_rules: dict[str, ToolPolicyRule] + config_targets: tuple[ConfigTarget, ...] + + def diagnostics(self) -> dict[str, object]: + summary = "loaded operator policy" + if self.error is not None: + summary = f"policy error: {self.error}" + elif self.using_default: + summary = "using default safe policy" + return { + "ok": self.error is None, + "path": str(self.path), + "exists": self.exists, + "version": self.version, + "using_default": self.using_default, + "summary": summary, + "error": self.error, + "tool_rule_count": len(self.tool_rules), + "tools_with_rules": sorted(self.tool_rules), + "config_target_count": len(self.config_targets), + "config_targets": [target.name for target in self.config_targets], + } + + +class PolicyLoader: + def __init__(self, path: Path, state_dir: Path) -> None: + self.path = path + self.state_dir = state_dir.resolve(strict=False) + self._cached_key: tuple[bool, int | None] | None = None + self._cached_policy: LoadedPolicy | None = None + + def load(self) -> LoadedPolicy: + exists = self.path.exists() + stamp = self.path.stat().st_mtime_ns if exists else None + cache_key = (exists, stamp) + if self._cached_key == cache_key and self._cached_policy is not None: + return self._cached_policy + + if not exists: + policy = LoadedPolicy( + path=self.path, + exists=False, + version=SUPPORTED_POLICY_VERSION, + using_default=True, + error=None, + tool_rules={}, + config_targets=build_default_config_targets(self.state_dir), + ) + else: + try: + raw_payload = tomllib.loads(self.path.read_text(encoding="utf-8")) + policy = self._parse_loaded_policy(raw_payload) + except Exception as exc: + policy = LoadedPolicy( + path=self.path, + exists=True, + version=SUPPORTED_POLICY_VERSION, + using_default=False, + error=str(exc), + tool_rules={}, + config_targets=(), + ) + + self._cached_key = cache_key + self._cached_policy = policy + return policy + + def diagnostics(self) -> dict[str, object]: + return self.load().diagnostics() + + def config_targets(self) -> tuple[ConfigTarget, ...]: + return self.load().config_targets + + def _parse_loaded_policy(self, raw_payload: object) -> LoadedPolicy: + if not isinstance(raw_payload, dict): + raise ValueError("Policy document must be a TOML table at the root.") + + version = raw_payload.get("version", SUPPORTED_POLICY_VERSION) + if not isinstance(version, int) or isinstance(version, bool): + raise ValueError("Policy field 'version' must be an integer.") + if version != SUPPORTED_POLICY_VERSION: + raise ValueError(f"Unsupported policy version: {version}") + + tool_rules = self._parse_tool_rules(raw_payload.get("tools")) + config_targets = self._parse_config_targets(raw_payload.get("config_targets")) + return LoadedPolicy( + path=self.path, + exists=True, + version=version, + using_default=False, + error=None, + tool_rules=tool_rules, + config_targets=config_targets, + ) + + def _parse_tool_rules(self, raw_tools: object) -> dict[str, ToolPolicyRule]: + if raw_tools is None: + return {} + if not isinstance(raw_tools, dict): + raise ValueError("Policy field 'tools' must be a TOML table.") + + tool_rules: dict[str, ToolPolicyRule] = {} + for tool_name, raw_rule in raw_tools.items(): + if not isinstance(tool_name, str) or not tool_name: + raise ValueError("Policy tool rule names must be non-empty strings.") + if not isinstance(raw_rule, dict): + raise ValueError(f"Policy rule for '{tool_name}' must be a TOML table.") + enabled = self._optional_bool(raw_rule, "enabled") + require_confirmation = self._bool_with_default( + raw_rule, + "require_confirmation", + default=False, + ) + allowed_scopes = tuple(self._optional_string_list(raw_rule, "allowed_scopes")) + for scope in allowed_scopes: + if scope not in SUPPORTED_SCOPES: + raise ValueError( + f"Policy rule for '{tool_name}' contains unsupported scope '{scope}'." + ) + service_patterns = tuple(self._optional_string_list(raw_rule, "service_patterns")) + tool_rules[tool_name] = ToolPolicyRule( + enabled=enabled, + require_confirmation=require_confirmation, + allowed_scopes=allowed_scopes, + service_patterns=service_patterns, + ) + return tool_rules + + def _parse_config_targets(self, raw_targets: object) -> tuple[ConfigTarget, ...]: + if raw_targets is None: + return build_default_config_targets(self.state_dir) + if not isinstance(raw_targets, list): + raise ValueError("Policy field 'config_targets' must be an array of tables.") + + parsed_targets: list[ConfigTarget] = [] + for index, raw_target in enumerate(raw_targets, start=1): + if not isinstance(raw_target, dict): + raise ValueError(f"Config target #{index} must be a TOML table.") + name = self._required_string(raw_target, "name", index=index) + description = self._required_string(raw_target, "description", index=index) + roots = tuple( + self._resolve_policy_path(path_text) + for path_text in self._required_string_list(raw_target, "roots", index=index) + ) + file_globs = tuple(self._required_string_list(raw_target, "file_globs", index=index)) + validator_kind = self._required_string(raw_target, "validator", index=index) + if validator_kind not in SUPPORTED_VALIDATORS: + raise ValueError( + f"Config target '{name}' uses unsupported validator '{validator_kind}'." + ) + validator_command_items = self._optional_string_list(raw_target, "validator_command") + validator_command = tuple(validator_command_items) if validator_command_items else None + if validator_kind == "command" and not validator_command: + raise ValueError( + f"Config target '{name}' requires 'validator_command' for validator=command." + ) + if validator_kind != "command" and validator_command is not None: + raise ValueError( + f"Config target '{name}' may only set 'validator_command' when validator=command." + ) + parsed_targets.append( + ConfigTarget( + name=name, + description=description, + roots=roots, + file_globs=file_globs, + validator_kind=validator_kind, + validator_command=validator_command, + ) + ) + return tuple(parsed_targets) + + def _resolve_policy_path(self, raw_path: str) -> Path: + expanded = raw_path.replace("$STATE_DIR", str(self.state_dir)) + candidate = Path(expanded).expanduser() + if not candidate.is_absolute(): + candidate = self.path.parent / candidate + return candidate.resolve(strict=False) + + def _required_string( + self, + payload: dict[str, object], + key: str, + *, + index: int | None = None, + ) -> str: + value = payload.get(key) + if not isinstance(value, str) or not value.strip(): + location = f" in config target #{index}" if index is not None else "" + raise ValueError(f"Policy field '{key}'{location} must be a non-empty string.") + return value.strip() + + def _required_string_list( + self, + payload: dict[str, object], + key: str, + *, + index: int | None = None, + ) -> list[str]: + value = payload.get(key) + if not isinstance(value, list) or not value: + location = f" in config target #{index}" if index is not None else "" + raise ValueError(f"Policy field '{key}'{location} must be a non-empty string list.") + items: list[str] = [] + for item in value: + if not isinstance(item, str) or not item.strip(): + raise ValueError(f"Policy field '{key}' contains an invalid string item.") + items.append(item.strip()) + return items + + def _optional_string_list(self, payload: dict[str, object], key: str) -> list[str]: + value = payload.get(key) + if value is None: + return [] + if not isinstance(value, list): + raise ValueError(f"Policy field '{key}' must be a string list when provided.") + items: list[str] = [] + for item in value: + if not isinstance(item, str) or not item.strip(): + raise ValueError(f"Policy field '{key}' contains an invalid string item.") + items.append(item.strip()) + return items + + def _optional_bool(self, payload: dict[str, object], key: str) -> bool | None: + value = payload.get(key) + if value is None: + return None + if not isinstance(value, bool): + raise ValueError(f"Policy field '{key}' must be a boolean when provided.") + return value + + def _bool_with_default( + self, + payload: dict[str, object], + key: str, + *, + default: bool, + ) -> bool: + value = payload.get(key, default) + if not isinstance(value, bool): + raise ValueError(f"Policy field '{key}' must be a boolean.") + return value diff --git a/src/master_control/policy/engine.py b/src/master_control/policy/engine.py index 26b0e60..4029d17 100644 --- a/src/master_control/policy/engine.py +++ b/src/master_control/policy/engine.py @@ -1,7 +1,12 @@ from __future__ import annotations +import fnmatch +from collections.abc import Mapping from dataclasses import dataclass +from pathlib import Path +from master_control.config_manager import ConfigTarget +from master_control.policy.config import PolicyLoader, ToolPolicyRule from master_control.tools.base import RiskLevel, ToolSpec @@ -20,23 +25,102 @@ def as_dict(self) -> dict[str, object]: class PolicyEngine: - def evaluate(self, spec: ToolSpec) -> PolicyDecision: - if spec.risk is RiskLevel.READ_ONLY: + def __init__(self, *, state_dir: Path, policy_path: Path) -> None: + self.loader = PolicyLoader(policy_path, state_dir) + + def diagnostics(self) -> dict[str, object]: + return self.loader.diagnostics() + + def config_targets(self) -> tuple[ConfigTarget, ...]: + return self.loader.config_targets() + + def evaluate( + self, + spec: ToolSpec, + arguments: Mapping[str, object] | None = None, + ) -> PolicyDecision: + policy = self.loader.load() + if policy.error is not None: + return PolicyDecision( + allowed=False, + needs_confirmation=False, + reason=f"Policy load error: {policy.error}", + ) + + rule = policy.tool_rules.get(spec.name) + if rule is not None and rule.enabled is False: return PolicyDecision( - allowed=True, + allowed=False, needs_confirmation=False, - reason="Read-only tool.", + reason=f"Tool `{spec.name}` is disabled by operator policy.", ) - if spec.risk is RiskLevel.MUTATING_SAFE: + denied_reason = self._evaluate_argument_constraints(spec.name, arguments or {}, rule) + if denied_reason is not None: return PolicyDecision( - allowed=True, - needs_confirmation=True, - reason="Mutating tool requires explicit confirmation.", + allowed=False, + needs_confirmation=False, + reason=denied_reason, ) + needs_confirmation = spec.risk is not RiskLevel.READ_ONLY + if rule is not None and rule.require_confirmation: + needs_confirmation = True + return PolicyDecision( allowed=True, - needs_confirmation=True, - reason="Privileged tool requires confirmation and preflight validation.", + needs_confirmation=needs_confirmation, + reason=self._build_reason(spec.risk, rule, needs_confirmation), ) + + def _evaluate_argument_constraints( + self, + tool_name: str, + arguments: Mapping[str, object], + rule: ToolPolicyRule | None, + ) -> str | None: + if rule is None: + return None + + if rule.allowed_scopes: + scope = self._normalize_scope(arguments.get("scope")) + if scope not in rule.allowed_scopes: + allowed_scopes = ", ".join(rule.allowed_scopes) + return f"Tool `{tool_name}` is limited by policy to scopes: {allowed_scopes}." + + if rule.service_patterns and tool_name in { + "service_status", + "restart_service", + "reload_service", + }: + service_name = arguments.get("name") + if not isinstance(service_name, str) or not service_name.strip(): + return f"Tool `{tool_name}` requires a valid service name for policy evaluation." + normalized_name = service_name.strip() + if not any( + fnmatch.fnmatch(normalized_name, pattern) for pattern in rule.service_patterns + ): + patterns = ", ".join(rule.service_patterns) + return ( + f"Tool `{tool_name}` is limited by policy to these service patterns: {patterns}." + ) + return None + + def _normalize_scope(self, raw_scope: object) -> str: + if isinstance(raw_scope, str) and raw_scope.strip(): + return raw_scope.strip().lower() + return "system" + + def _build_reason( + self, + risk: RiskLevel, + rule: ToolPolicyRule | None, + needs_confirmation: bool, + ) -> str: + if rule is not None and rule.require_confirmation and risk is RiskLevel.READ_ONLY: + return "Operator policy requires explicit confirmation for this tool." + if risk is RiskLevel.READ_ONLY and not needs_confirmation: + return "Read-only tool." + if risk is RiskLevel.MUTATING_SAFE: + return "Mutating tool requires explicit confirmation." + return "Privileged tool requires confirmation and preflight validation." diff --git a/src/master_control/store/session_store.py b/src/master_control/store/session_store.py index 6b0590c..c6778eb 100644 --- a/src/master_control/store/session_store.py +++ b/src/master_control/store/session_store.py @@ -78,6 +78,23 @@ payload TEXT NOT NULL, created_at TEXT NOT NULL DEFAULT CURRENT_TIMESTAMP ); + +CREATE TABLE IF NOT EXISTS tool_approvals ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + tool_name TEXT NOT NULL, + risk TEXT NOT NULL, + arguments_json TEXT NOT NULL, + audit_context_json TEXT NOT NULL, + summary TEXT NOT NULL, + cli_command TEXT NOT NULL, + chat_command TEXT NOT NULL, + status TEXT NOT NULL DEFAULT 'pending', + execution_payload_json TEXT, + error_text TEXT, + created_at TEXT NOT NULL DEFAULT CURRENT_TIMESTAMP, + updated_at TEXT NOT NULL DEFAULT CURRENT_TIMESTAMP, + resolved_at TEXT +); """ @@ -156,6 +173,18 @@ def _ensure_indexes(self, connection: sqlite3.Connection) -> None: ON observations(session_id, key, id DESC) """ ) + connection.execute( + """ + CREATE INDEX IF NOT EXISTS idx_tool_approvals_status_id + ON tool_approvals(status, id DESC) + """ + ) + connection.execute( + """ + CREATE INDEX IF NOT EXISTS idx_tool_approvals_match + ON tool_approvals(tool_name, status, id DESC) + """ + ) def _ensure_columns( self, @@ -209,6 +238,301 @@ def count_audit_events(self) -> int: row = cursor.fetchone() return int(row[0]) if row is not None else 0 + def create_tool_approval( + self, + *, + tool_name: str, + risk: str, + arguments: dict[str, object], + audit_context: dict[str, object], + summary: str, + cli_command: str, + chat_command: str, + ) -> dict[str, object]: + with closing(self._connect()) as connection: + cursor = connection.execute( + """ + INSERT INTO tool_approvals ( + tool_name, + risk, + arguments_json, + audit_context_json, + summary, + cli_command, + chat_command + ) + VALUES (?, ?, ?, ?, ?, ?, ?) + """, + ( + tool_name, + risk, + json.dumps(arguments, sort_keys=True), + json.dumps(audit_context, sort_keys=True), + summary, + cli_command, + chat_command, + ), + ) + connection.commit() + approval_id = cursor.lastrowid + if approval_id is None: + raise RuntimeError("SQLite did not return a tool approval id.") + approval = self.get_tool_approval(int(approval_id)) + if approval is None: + raise RuntimeError(f"Tool approval {approval_id} disappeared after insert.") + return approval + + def list_tool_approvals( + self, + *, + status: str | None = None, + limit: int = 100, + ) -> list[dict[str, object]]: + with closing(self._connect()) as connection: + if status is None: + cursor = connection.execute( + """ + SELECT + id, + tool_name, + risk, + arguments_json, + audit_context_json, + summary, + cli_command, + chat_command, + status, + execution_payload_json, + error_text, + created_at, + updated_at, + resolved_at + FROM tool_approvals + ORDER BY id DESC + LIMIT ? + """, + (limit,), + ) + else: + cursor = connection.execute( + """ + SELECT + id, + tool_name, + risk, + arguments_json, + audit_context_json, + summary, + cli_command, + chat_command, + status, + execution_payload_json, + error_text, + created_at, + updated_at, + resolved_at + FROM tool_approvals + WHERE status = ? + ORDER BY id DESC + LIMIT ? + """, + (status, limit), + ) + rows = cursor.fetchall() + return [self._row_to_tool_approval(row) for row in rows] + + def get_tool_approval(self, approval_id: int) -> dict[str, object] | None: + with closing(self._connect()) as connection: + cursor = connection.execute( + """ + SELECT + id, + tool_name, + risk, + arguments_json, + audit_context_json, + summary, + cli_command, + chat_command, + status, + execution_payload_json, + error_text, + created_at, + updated_at, + resolved_at + FROM tool_approvals + WHERE id = ? + """, + (approval_id,), + ) + row = cursor.fetchone() + if row is None: + return None + return self._row_to_tool_approval(row) + + def claim_tool_approval(self, approval_id: int) -> dict[str, object] | None: + with closing(self._connect()) as connection: + connection.execute("BEGIN IMMEDIATE") + cursor = connection.execute( + """ + UPDATE tool_approvals + SET status = 'executing', updated_at = CURRENT_TIMESTAMP + WHERE id = ? AND status = 'pending' + """, + (approval_id,), + ) + if cursor.rowcount != 1: + connection.rollback() + return None + cursor = connection.execute( + """ + SELECT + id, + tool_name, + risk, + arguments_json, + audit_context_json, + summary, + cli_command, + chat_command, + status, + execution_payload_json, + error_text, + created_at, + updated_at, + resolved_at + FROM tool_approvals + WHERE id = ? + """, + (approval_id,), + ) + row = cursor.fetchone() + connection.commit() + if row is None: + return None + return self._row_to_tool_approval(row) + + def claim_latest_matching_tool_approval( + self, + *, + tool_name: str, + arguments: dict[str, object], + audit_context: dict[str, object], + ) -> dict[str, object] | None: + with closing(self._connect()) as connection: + connection.execute("BEGIN IMMEDIATE") + cursor = connection.execute( + """ + SELECT id + FROM tool_approvals + WHERE tool_name = ? + AND status = 'pending' + AND arguments_json = ? + AND audit_context_json = ? + ORDER BY id DESC + LIMIT 1 + """, + ( + tool_name, + json.dumps(arguments, sort_keys=True), + json.dumps(audit_context, sort_keys=True), + ), + ) + row = cursor.fetchone() + if row is None: + connection.rollback() + return None + approval_id = int(row[0]) + cursor = connection.execute( + """ + UPDATE tool_approvals + SET status = 'executing', updated_at = CURRENT_TIMESTAMP + WHERE id = ? AND status = 'pending' + """, + (approval_id,), + ) + if cursor.rowcount != 1: + connection.rollback() + return None + cursor = connection.execute( + """ + SELECT + id, + tool_name, + risk, + arguments_json, + audit_context_json, + summary, + cli_command, + chat_command, + status, + execution_payload_json, + error_text, + created_at, + updated_at, + resolved_at + FROM tool_approvals + WHERE id = ? + """, + (approval_id,), + ) + claimed_row = cursor.fetchone() + connection.commit() + if claimed_row is None: + return None + return self._row_to_tool_approval(claimed_row) + + def finish_tool_approval( + self, + approval_id: int, + *, + status: str, + execution_payload: dict[str, object], + ) -> dict[str, object] | None: + with closing(self._connect()) as connection: + cursor = connection.execute( + """ + UPDATE tool_approvals + SET + status = ?, + execution_payload_json = ?, + error_text = ?, + updated_at = CURRENT_TIMESTAMP, + resolved_at = CURRENT_TIMESTAMP + WHERE id = ? AND status = 'executing' + """, + ( + status, + json.dumps(execution_payload, sort_keys=True), + str(execution_payload.get("error")) + if isinstance(execution_payload.get("error"), str) + else None, + approval_id, + ), + ) + connection.commit() + if cursor.rowcount != 1: + return None + return self.get_tool_approval(approval_id) + + def reject_tool_approval(self, approval_id: int) -> dict[str, object] | None: + with closing(self._connect()) as connection: + cursor = connection.execute( + """ + UPDATE tool_approvals + SET + status = 'rejected', + updated_at = CURRENT_TIMESTAMP, + resolved_at = CURRENT_TIMESTAMP + WHERE id = ? AND status = 'pending' + """, + (approval_id,), + ) + connection.commit() + if cursor.rowcount != 1: + return None + return self.get_tool_approval(approval_id) + def create_session(self) -> int: with closing(self._connect()) as connection: cursor = connection.execute("INSERT INTO sessions DEFAULT VALUES") @@ -846,6 +1170,27 @@ def _row_to_recommendation(self, row: tuple[object, ...]) -> dict[str, object]: "last_seen_at": row[13], } + def _row_to_tool_approval(self, row: tuple[object, ...]) -> dict[str, object]: + arguments = _deserialize_json_object(row[3]) + audit_context = _deserialize_json_object(row[4]) + execution = _deserialize_json_object(row[9]) if row[9] is not None else None + return { + "id": row[0], + "tool": row[1], + "risk": row[2], + "arguments": arguments, + "audit_context": audit_context, + "summary": row[5], + "cli_command": row[6], + "chat_command": row[7], + "status": row[8], + "execution": execution, + "error": row[10], + "created_at": row[11], + "updated_at": row[12], + "resolved_at": row[13], + } + def _coerce_int(value: object, label: str) -> int: if isinstance(value, int) and not isinstance(value, bool): @@ -853,3 +1198,15 @@ def _coerce_int(value: object, label: str) -> int: if isinstance(value, str): return int(value) raise TypeError(f"Expected integer-compatible value for {label}, got {type(value).__name__}.") + + +def _deserialize_json_object(value: object) -> dict[str, object]: + if not isinstance(value, str) or not value: + return {} + try: + payload = json.loads(value) + except json.JSONDecodeError: + return {} + if isinstance(payload, dict): + return payload + return {} diff --git a/src/master_control/tools/registry.py b/src/master_control/tools/registry.py index 6110ff2..74ca014 100644 --- a/src/master_control/tools/registry.py +++ b/src/master_control/tools/registry.py @@ -1,8 +1,9 @@ from __future__ import annotations +from collections.abc import Callable from pathlib import Path -from master_control.config_manager import ConfigManager +from master_control.config_manager import ConfigManager, ConfigTarget from master_control.executor.command_runner import CommandRunner from master_control.tools.base import Tool from master_control.tools.disk_usage import DiskUsageTool @@ -37,9 +38,17 @@ def list_specs(self): return [self._tools[name].spec for name in sorted(self._tools)] -def build_default_registry(state_dir: Path) -> ToolRegistry: +def build_default_registry( + state_dir: Path, + *, + config_target_loader: Callable[[], tuple[ConfigTarget, ...]] | None = None, +) -> ToolRegistry: runner = CommandRunner() - config_manager = ConfigManager(state_dir, runner) + config_manager = ConfigManager( + state_dir, + runner, + target_loader=config_target_loader, + ) registry = ToolRegistry() registry.register(SystemInfoTool()) registry.register(DiskUsageTool()) diff --git a/tests/test_app.py b/tests/test_app.py index 87be545..ad6b182 100644 --- a/tests/test_app.py +++ b/tests/test_app.py @@ -47,7 +47,7 @@ def test_doctor_reports_unavailable_explicit_ollama_provider(self) -> None: app = MasterControlApp(settings) with patch( - "master_control.app.collect_provider_checks", + "master_control.core.runtime.collect_provider_checks", return_value={ "ollama": { "name": "ollama", diff --git a/tests/test_config_tools.py b/tests/test_config_tools.py index 45ab7f2..86831ae 100644 --- a/tests/test_config_tools.py +++ b/tests/test_config_tools.py @@ -59,6 +59,9 @@ def test_write_config_file_requires_confirmation_and_creates_backup(self) -> Non self.assertFalse(pending["ok"]) self.assertTrue(pending["pending_confirmation"]) self.assertIn("--confirm", pending["approval"]["cli_command"]) + approval_id = pending["approval"]["id"] + approval = app.get_tool_approval(int(approval_id)) + self.assertEqual(approval["status"], "pending") confirmed = app.run_tool( "write_config_file", @@ -69,6 +72,9 @@ def test_write_config_file_requires_confirmation_and_creates_backup(self) -> Non self.assertTrue(confirmed["result"]["changed"]) self.assertIsNotNone(confirmed["result"]["backup_path"]) self.assertEqual(config_path.read_text(encoding="utf-8"), "[main]\nkey=new\n") + resolved_approval = app.get_tool_approval(int(approval_id)) + self.assertEqual(resolved_approval["status"], "completed") + self.assertTrue(resolved_approval["execution"]["ok"]) def test_restore_config_backup_restores_previous_content(self) -> None: with tempfile.TemporaryDirectory() as tmp_dir: diff --git a/tests/test_mcp_server.py b/tests/test_mcp_server.py index ef7e284..0eaa67a 100644 --- a/tests/test_mcp_server.py +++ b/tests/test_mcp_server.py @@ -11,18 +11,9 @@ class MCPServerTest(unittest.TestCase): - def test_list_tools_only_exposes_read_only_tools(self) -> None: + def test_list_tools_exposes_read_and_write_tools(self) -> None: with tempfile.TemporaryDirectory() as tmp_dir: - settings = Settings( - app_name="master-control", - log_level="DEBUG", - provider="heuristic", - state_dir=Path(tmp_dir) / "state", - db_path=Path(tmp_dir) / "state" / "mc.sqlite3", - ) - runtime = MasterControlRuntime(settings) - runtime.bootstrap() - server = MasterControlMCPServer(runtime) + server = self._build_server(Path(tmp_dir)) payload = server._handle_line(json.dumps({"id": 1, "method": "tools/list"})) @@ -33,26 +24,11 @@ def test_list_tools_only_exposes_read_only_tools(self) -> None: assert isinstance(tools, list) tool_names = [item["name"] for item in tools if isinstance(item, dict)] self.assertIn("system_info", tool_names) - self.assertNotIn("write_config_file", tool_names) - self.assertTrue( - all( - isinstance(item, dict) and item.get("risk") == "read_only" - for item in tools - ) - ) + self.assertIn("write_config_file", tool_names) def test_tools_call_runs_read_only_tool(self) -> None: with tempfile.TemporaryDirectory() as tmp_dir: - settings = Settings( - app_name="master-control", - log_level="DEBUG", - provider="heuristic", - state_dir=Path(tmp_dir) / "state", - db_path=Path(tmp_dir) / "state" / "mc.sqlite3", - ) - runtime = MasterControlRuntime(settings) - runtime.bootstrap() - server = MasterControlMCPServer(runtime) + server = self._build_server(Path(tmp_dir)) payload = server._handle_line( json.dumps( @@ -70,19 +46,15 @@ def test_tools_call_runs_read_only_tool(self) -> None: self.assertEqual(result["tool"], "system_info") self.assertTrue(result["ok"]) - def test_tools_call_blocks_mutating_tool(self) -> None: + def test_tools_call_returns_pending_approval_for_mutating_tool(self) -> None: with tempfile.TemporaryDirectory() as tmp_dir: - settings = Settings( - app_name="master-control", - log_level="DEBUG", - provider="heuristic", - state_dir=Path(tmp_dir) / "state", - db_path=Path(tmp_dir) / "state" / "mc.sqlite3", - ) - runtime = MasterControlRuntime(settings) - runtime.bootstrap() - server = MasterControlMCPServer(runtime) + root = Path(tmp_dir) + managed_root = root / "state" / "managed-configs" + managed_root.mkdir(parents=True, exist_ok=True) + config_path = managed_root / "demo.ini" + config_path.write_text("[main]\nkey=old\n", encoding="utf-8") + server = self._build_server(root) payload = server._handle_line( json.dumps( { @@ -90,16 +62,146 @@ def test_tools_call_blocks_mutating_tool(self) -> None: "method": "tools/call", "params": { "name": "write_config_file", - "arguments": {"path": "/tmp/demo.ini", "content": "x"}, + "arguments": { + "path": str(config_path), + "content": "[main]\nkey=new\n", + }, + }, + } + ) + ) + + self.assertTrue(payload["ok"]) + result = payload["result"] + assert isinstance(result, dict) + self.assertFalse(result["ok"]) + self.assertTrue(result["pending_confirmation"]) + approval = result["approval"] + assert isinstance(approval, dict) + self.assertEqual(approval["status"], "pending") + self.assertEqual(approval["tool"], "write_config_file") + + def test_approvals_approve_executes_pending_tool(self) -> None: + with tempfile.TemporaryDirectory() as tmp_dir: + root = Path(tmp_dir) + managed_root = root / "state" / "managed-configs" + managed_root.mkdir(parents=True, exist_ok=True) + config_path = managed_root / "demo.ini" + config_path.write_text("[main]\nkey=old\n", encoding="utf-8") + + server = self._build_server(root) + pending = server._handle_line( + json.dumps( + { + "id": "req-3", + "method": "tools/call", + "params": { + "name": "write_config_file", + "arguments": { + "path": str(config_path), + "content": "[main]\nkey=new\n", + }, + }, + } + ) + ) + approval_id = pending["result"]["approval"]["id"] + + listed = server._handle_line( + json.dumps( + { + "id": "req-4", + "method": "approvals/list", + "params": {"status": "pending"}, + } + ) + ) + self.assertTrue(listed["ok"]) + approvals = listed["result"]["approvals"] + assert isinstance(approvals, list) + self.assertEqual(approvals[0]["id"], approval_id) + + approved = server._handle_line( + json.dumps( + { + "id": "req-5", + "method": "approvals/approve", + "params": {"id": approval_id}, + } + ) + ) + + self.assertTrue(approved["ok"]) + result = approved["result"] + assert isinstance(result, dict) + self.assertTrue(result["execution"]["ok"]) + self.assertEqual(result["approval"]["status"], "completed") + self.assertEqual(config_path.read_text(encoding="utf-8"), "[main]\nkey=new\n") + + fetched = server._handle_line( + json.dumps( + { + "id": "req-6", + "method": "approvals/get", + "params": {"id": approval_id}, + } + ) + ) + self.assertTrue(fetched["ok"]) + self.assertEqual(fetched["result"]["status"], "completed") + + def test_approvals_reject_closes_pending_tool(self) -> None: + with tempfile.TemporaryDirectory() as tmp_dir: + root = Path(tmp_dir) + managed_root = root / "state" / "managed-configs" + managed_root.mkdir(parents=True, exist_ok=True) + config_path = managed_root / "demo.ini" + config_path.write_text("[main]\nkey=old\n", encoding="utf-8") + + server = self._build_server(root) + pending = server._handle_line( + json.dumps( + { + "id": "req-7", + "method": "tools/call", + "params": { + "name": "write_config_file", + "arguments": { + "path": str(config_path), + "content": "[main]\nkey=new\n", + }, }, } ) ) + approval_id = pending["result"]["approval"]["id"] + + rejected = server._handle_line( + json.dumps( + { + "id": "req-8", + "method": "approvals/reject", + "params": {"id": approval_id}, + } + ) + ) + + self.assertTrue(rejected["ok"]) + self.assertEqual(rejected["result"]["status"], "rejected") + self.assertEqual(config_path.read_text(encoding="utf-8"), "[main]\nkey=old\n") - self.assertFalse(payload["ok"]) - error = payload["error"] - assert isinstance(error, dict) - self.assertEqual(error["code"], "invalid_params") + def _build_server(self, root: Path) -> MasterControlMCPServer: + state_dir = root / "state" + settings = Settings( + app_name="master-control", + log_level="DEBUG", + provider="heuristic", + state_dir=state_dir, + db_path=state_dir / "mc.sqlite3", + ) + runtime = MasterControlRuntime(settings) + runtime.bootstrap() + return MasterControlMCPServer(runtime) if __name__ == "__main__": diff --git a/tests/test_mcp_stdio_integration.py b/tests/test_mcp_stdio_integration.py new file mode 100644 index 0000000..e5efa1d --- /dev/null +++ b/tests/test_mcp_stdio_integration.py @@ -0,0 +1,147 @@ +from __future__ import annotations + +import json +import os +import subprocess +import sys +import tempfile +import unittest +from pathlib import Path +from typing import Any + + +class MCPStdioIntegrationTest(unittest.TestCase): + def test_stdio_server_round_trips_initialize_and_write_approval_flow(self) -> None: + with tempfile.TemporaryDirectory() as tmp_dir: + state_dir = Path(tmp_dir) / "state" + managed_root = state_dir / "managed-configs" + managed_root.mkdir(parents=True, exist_ok=True) + config_path = managed_root / "demo.ini" + config_path.write_text("[main]\nkey=old\n", encoding="utf-8") + + with self._start_server(state_dir) as process: + initialize = self._request(process, {"id": 1, "method": "initialize"}) + self.assertTrue(initialize["ok"]) + self.assertEqual(initialize["result"]["server"]["transport"], "stdio") + + listed = self._request(process, {"id": 2, "method": "tools/list"}) + self.assertTrue(listed["ok"]) + tools = listed["result"]["tools"] + tool_names = [item["name"] for item in tools if isinstance(item, dict)] + self.assertIn("system_info", tool_names) + self.assertIn("write_config_file", tool_names) + + read_only = self._request( + process, + { + "id": 3, + "method": "tools/call", + "params": {"name": "system_info", "arguments": {}}, + }, + ) + self.assertTrue(read_only["ok"]) + self.assertTrue(read_only["result"]["ok"]) + + pending = self._request( + process, + { + "id": 4, + "method": "tools/call", + "params": { + "name": "write_config_file", + "arguments": { + "path": str(config_path), + "content": "[main]\nkey=new\n", + }, + }, + }, + ) + self.assertTrue(pending["ok"]) + self.assertFalse(pending["result"]["ok"]) + self.assertTrue(pending["result"]["pending_confirmation"]) + approval_id = pending["result"]["approval"]["id"] + + fetched = self._request( + process, + { + "id": 5, + "method": "approvals/get", + "params": {"id": approval_id}, + }, + ) + self.assertTrue(fetched["ok"]) + self.assertEqual(fetched["result"]["status"], "pending") + + approved = self._request( + process, + { + "id": 6, + "method": "approvals/approve", + "params": {"id": approval_id}, + }, + ) + self.assertTrue(approved["ok"]) + self.assertTrue(approved["result"]["execution"]["ok"]) + self.assertEqual(approved["result"]["approval"]["status"], "completed") + self.assertEqual(config_path.read_text(encoding="utf-8"), "[main]\nkey=new\n") + + def _request( + self, + process: subprocess.Popen[str], + payload: dict[str, Any], + ) -> dict[str, Any]: + assert process.stdin is not None + assert process.stdout is not None + process.stdin.write(json.dumps(payload) + "\n") + process.stdin.flush() + line = process.stdout.readline() + if not line: + stderr = "" + if process.stderr is not None: + stderr = process.stderr.read() + raise AssertionError(f"MCP server closed the pipe unexpectedly. stderr={stderr!r}") + return json.loads(line) + + def _start_server(self, state_dir: Path): + env = os.environ.copy() + env["MC_STATE_DIR"] = str(state_dir) + env["MC_DB_PATH"] = str(state_dir / "mc.sqlite3") + env["MC_PROVIDER"] = "heuristic" + command = [sys.executable, "-m", "master_control", "mcp-serve"] + return _ManagedProcess( + subprocess.Popen( + command, + cwd=Path(__file__).resolve().parents[1], + env=env, + stdin=subprocess.PIPE, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + text=True, + ) + ) + + +class _ManagedProcess: + def __init__(self, process: subprocess.Popen[str]) -> None: + self.process = process + + def __enter__(self) -> subprocess.Popen[str]: + return self.process + + def __exit__(self, exc_type, exc, tb) -> None: + if self.process.stdin is not None: + self.process.stdin.close() + self.process.terminate() + try: + self.process.wait(timeout=5) + except subprocess.TimeoutExpired: + self.process.kill() + self.process.wait(timeout=5) + if self.process.stdout is not None: + self.process.stdout.close() + if self.process.stderr is not None: + self.process.stderr.close() + + +if __name__ == "__main__": + unittest.main() diff --git a/tests/test_policy.py b/tests/test_policy.py index 0802b83..ff5781a 100644 --- a/tests/test_policy.py +++ b/tests/test_policy.py @@ -1,6 +1,8 @@ from __future__ import annotations +import tempfile import unittest +from pathlib import Path from master_control.policy.engine import PolicyEngine from master_control.tools.base import RiskLevel, ToolSpec @@ -8,7 +10,15 @@ class PolicyEngineTest(unittest.TestCase): def setUp(self) -> None: - self.engine = PolicyEngine() + self.tmp_dir = tempfile.TemporaryDirectory() + state_dir = Path(self.tmp_dir.name) + self.engine = PolicyEngine( + state_dir=state_dir, + policy_path=state_dir / "policy.toml", + ) + + def tearDown(self) -> None: + self.tmp_dir.cleanup() def test_read_only_does_not_require_confirmation(self) -> None: spec = ToolSpec( diff --git a/tests/test_runtime_policy_integration.py b/tests/test_runtime_policy_integration.py new file mode 100644 index 0000000..1e7915a --- /dev/null +++ b/tests/test_runtime_policy_integration.py @@ -0,0 +1,157 @@ +from __future__ import annotations + +import tempfile +import textwrap +import unittest +from pathlib import Path + +from master_control.app import MasterControlApp +from master_control.config import Settings + + +class RuntimePolicyIntegrationTest(unittest.TestCase): + def test_policy_can_disable_tool_execution(self) -> None: + with tempfile.TemporaryDirectory() as tmp_dir: + state_dir = Path(tmp_dir) + policy_path = state_dir / "policy.toml" + policy_path.write_text( + textwrap.dedent( + """ + version = 1 + + [tools.system_info] + enabled = false + """ + ).strip() + + "\n", + encoding="utf-8", + ) + + app = self._build_app(state_dir, policy_path=policy_path) + payload = app.run_tool("system_info") + + self.assertFalse(payload["ok"]) + self.assertIn("disabled by operator policy", payload["error"]) + self.assertTrue(app.doctor()["policy_diagnostics"]["ok"]) + + def test_policy_can_require_confirmation_for_read_only_tool(self) -> None: + with tempfile.TemporaryDirectory() as tmp_dir: + state_dir = Path(tmp_dir) + policy_path = state_dir / "policy.toml" + policy_path.write_text( + textwrap.dedent( + """ + version = 1 + + [tools.system_info] + require_confirmation = true + """ + ).strip() + + "\n", + encoding="utf-8", + ) + + app = self._build_app(state_dir, policy_path=policy_path) + pending = app.run_tool("system_info") + + self.assertFalse(pending["ok"]) + self.assertTrue(pending["pending_confirmation"]) + + confirmed = app.run_tool("system_info", confirmed=True) + self.assertTrue(confirmed["ok"]) + + def test_policy_can_constrain_service_targets_before_execution(self) -> None: + with tempfile.TemporaryDirectory() as tmp_dir: + state_dir = Path(tmp_dir) + policy_path = state_dir / "policy.toml" + policy_path.write_text( + textwrap.dedent( + """ + version = 1 + + [tools.restart_service] + allowed_scopes = ["system"] + service_patterns = ["demo.service"] + """ + ).strip() + + "\n", + encoding="utf-8", + ) + + app = self._build_app(state_dir, policy_path=policy_path) + payload = app.run_tool( + "restart_service", + {"name": "nginx.service", "scope": "system"}, + confirmed=True, + ) + + self.assertFalse(payload["ok"]) + self.assertIn("limited by policy", payload["error"]) + + def test_policy_can_define_custom_config_targets(self) -> None: + with tempfile.TemporaryDirectory() as tmp_dir: + state_dir = Path(tmp_dir) + policy_path = state_dir / "policy.toml" + custom_root = state_dir / "custom-configs" + custom_root.mkdir(parents=True, exist_ok=True) + config_path = custom_root / "service.ini" + config_path.write_text("[service]\nmode=old\n", encoding="utf-8") + + policy_path.write_text( + textwrap.dedent( + """ + version = 1 + + [[config_targets]] + name = "custom_ini" + description = "Operator-managed custom INI files." + roots = ["$STATE_DIR/custom-configs"] + file_globs = ["*.ini"] + validator = "ini_parse" + """ + ).strip() + + "\n", + encoding="utf-8", + ) + + app = self._build_app(state_dir, policy_path=policy_path) + payload = app.run_tool( + "write_config_file", + {"path": str(config_path), "content": "[service]\nmode=new\n"}, + confirmed=True, + ) + + self.assertTrue(payload["ok"]) + self.assertEqual(payload["result"]["target"], "custom_ini") + self.assertEqual(config_path.read_text(encoding="utf-8"), "[service]\nmode=new\n") + + def test_invalid_policy_fails_closed_and_surfaces_in_doctor(self) -> None: + with tempfile.TemporaryDirectory() as tmp_dir: + state_dir = Path(tmp_dir) + policy_path = state_dir / "policy.toml" + policy_path.write_text('version = "broken"\n', encoding="utf-8") + + app = self._build_app(state_dir, policy_path=policy_path) + doctor = app.doctor() + payload = app.run_tool("system_info") + + self.assertFalse(doctor["ok"]) + self.assertFalse(doctor["policy_diagnostics"]["ok"]) + self.assertIn("must be an integer", doctor["policy_diagnostics"]["error"]) + self.assertFalse(payload["ok"]) + self.assertIn("Policy load error", payload["error"]) + + def _build_app(self, state_dir: Path, *, policy_path: Path) -> MasterControlApp: + settings = Settings( + app_name="master-control", + log_level="INFO", + provider="none", + state_dir=state_dir, + db_path=state_dir / "mc.sqlite3", + policy_path=policy_path, + ) + return MasterControlApp(settings) + + +if __name__ == "__main__": + unittest.main() diff --git a/tests/test_session_store.py b/tests/test_session_store.py index 2d9ad31..b63ec80 100644 --- a/tests/test_session_store.py +++ b/tests/test_session_store.py @@ -42,6 +42,72 @@ def test_store_connections_enable_foreign_keys(self) -> None: ) connection.commit() + def test_tool_approval_lifecycle_is_persisted(self) -> None: + with tempfile.TemporaryDirectory() as tmp_dir: + store = SessionStore(Path(tmp_dir) / "mc.sqlite3") + store.initialize() + + created = store.create_tool_approval( + tool_name="write_config_file", + risk="mutating_safe", + arguments={"path": "/tmp/demo.ini", "content": "key=value\n"}, + audit_context={"source": "test"}, + summary="Confirme a execução.", + cli_command="mc tool write_config_file --confirm", + chat_command="/tool write_config_file confirm", + ) + + self.assertEqual(created["status"], "pending") + self.assertIsNone(created["execution"]) + + claimed = store.claim_tool_approval(int(created["id"])) + assert claimed is not None + self.assertEqual(claimed["status"], "executing") + + finalized = store.finish_tool_approval( + int(created["id"]), + status="completed", + execution_payload={"ok": True, "result": {"changed": True}}, + ) + assert finalized is not None + self.assertEqual(finalized["status"], "completed") + self.assertEqual(finalized["execution"], {"ok": True, "result": {"changed": True}}) + + def test_claim_latest_matching_tool_approval_selects_pending_request(self) -> None: + with tempfile.TemporaryDirectory() as tmp_dir: + store = SessionStore(Path(tmp_dir) / "mc.sqlite3") + store.initialize() + + first = store.create_tool_approval( + tool_name="write_config_file", + risk="mutating_safe", + arguments={"path": "/tmp/demo.ini", "content": "old"}, + audit_context={"source": "test"}, + summary="first", + cli_command="first", + chat_command="first", + ) + second = store.create_tool_approval( + tool_name="write_config_file", + risk="mutating_safe", + arguments={"path": "/tmp/demo.ini", "content": "old"}, + audit_context={"source": "test"}, + summary="second", + cli_command="second", + chat_command="second", + ) + + claimed = store.claim_latest_matching_tool_approval( + tool_name="write_config_file", + arguments={"path": "/tmp/demo.ini", "content": "old"}, + audit_context={"source": "test"}, + ) + + assert claimed is not None + self.assertEqual(claimed["id"], second["id"]) + self.assertEqual(claimed["status"], "executing") + self.assertEqual(store.get_tool_approval(int(first["id"]))["status"], "pending") + if __name__ == "__main__": unittest.main()