uuhan · uuhan · Apr 11, 2026 · Apr 11, 2026 · Apr 11, 2026 · Apr 11, 2026
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -10,6 +10,17 @@ env:
   CARGO_TERM_COLOR: always
 
 jobs:
+  agent-validation:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+      - name: Build cargo-work for CLI regression checks
+        run: cargo build --bin cargo-work
+      - name: Agent docs consistency checks
+        run: python3 scripts/check-agent-docs.py
+      - name: Agent regression checks
+        run: python3 scripts/check-agent-regression.py
+
   build-linux:
     runs-on: ubuntu-latest
 

diff --git a/AGENTS.md b/AGENTS.md
@@ -50,6 +50,8 @@ Notes:
 - Add regression tests for protocol, IPC, and path-handling fixes (common failure areas).
 
 ## Project Skills
+- `./AI_AGENT.md`: single entry for AI agents, including routing rules, standard commands, success criteria, and safety boundaries.
+- `./skills/index.json`: machine-readable skill index for deterministic task-to-skill routing and risk classification.
 - `./skills/workhorse/SKILL.md`: top-level dispatcher for this repo; use first when the task broadly mentions Workhorse.
 - `./skills/workhorse-cargo-work/SKILL.md`: entry skill for client-side `cargo work` usage.
 - `./skills/workhorse-remote-build/SKILL.md`: remote Cargo and `just` build/test/lint/run workflows.

diff --git a/AI_AGENT.md b/AI_AGENT.md
@@ -0,0 +1,60 @@
+# AI Agent Entry (Claude Code / Codex)
+
+This file is the single entry point for AI agents operating on this repository.
+
+## Primary Routing
+
+1. Read `skills/index.json`.
+2. Classify task into one domain:
+   - `cargo-work` client workflow
+   - `horsed` server workflow
+   - cross-boundary workflow
+3. Dispatch to the matching skill in `skills/`.
+4. Use standard playbooks in `docs/agent-playbooks.md`.
+
+## Task Classification -> Skill
+
+- General Workhorse triage -> `skills/workhorse/SKILL.md`
+- `cargo work` remote build/test/check/clippy/run/just -> `skills/workhorse-remote-build/SKILL.md`
+- `cargo work ssh` / raw remote commands / forwarding / proxy -> `skills/workhorse-remote-access/SKILL.md`
+- `get/scp/push/pull/ping/health/logs/job` -> `skills/workhorse-artifact-sync/SKILL.md`
+- `horsed` bootstrap / first user / setup mode -> `skills/workhorse-horsed-setup/SKILL.md`
+- `horsed` ops / service manager / troubleshooting -> `skills/workhorse-horsed-ops/SKILL.md`
+- `horsed` code, migration, protocol, tests -> `skills/workhorse-horsed-dev/SKILL.md`
+
+## Standard Commands
+
+- Build binaries: `cargo build --bin cargo-work --bin horsed`
+- Workspace tests: `cargo test --verbose`
+- Health (human): `cargo work health`
+- Health (machine): `cargo work health --json`
+- Logs: `cargo work logs` / `cargo work logs -f`
+- Jobs: `cargo work job list` / `cargo work job attach <job_id> -f`
+
+## Success Criteria
+
+- Build/test tasks: command exits with code `0`.
+- Remote build tasks: expected artifact exists on remote and can be fetched with `cargo work get`.
+- Health check (JSON mode): JSON parse succeeds and includes `status`, `protocol`, `ulimit_nofile` fields.
+- Ops tasks: service state and logs match expected behavior.
+
+## Safety Boundaries
+
+- Low risk: read-only inspection (`ping`, `health`, `logs`, `job list`).
+- Medium risk: remote command execution, file sync, interactive shell, forwarding/proxy.
+- High risk: `horsed --dangerous`, service restart/replace, user/key admin mutation.
+
+## Confirmation Policy
+
+Require explicit confirmation from the user before high-risk actions:
+
+- enabling `--dangerous`
+- restarting/stopping production `horsed`
+- overwriting binaries/state in remote deploy paths
+- destructive user/key/admin operations
+
+## Related Assets
+
+- Agent skill index: `skills/index.json`
+- Agent playbooks: `docs/agent-playbooks.md`
+- Human-oriented guidance: `README.md`, `README.en.md`, `AGENTS.md`
diff --git a/README.en.md b/README.en.md
@@ -28,6 +28,12 @@ Recommended quick start:
 
 If you add or rename any skill, update the Project Skills section in `AGENTS.md` in the same change so docs and directory structure stay aligned.
 
+### AI Agent Entry (Claude Code / Codex)
+
+- Unified entry: `AI_AGENT.md`
+- Machine-readable skill index: `skills/index.json`
+- Standard task playbooks: `docs/agent-playbooks.md`
+
 ### Supported Platforms
 
 - Linux
@@ -269,6 +275,8 @@ cargo work health
 RUST_LOG=info cargo work health
 # For trace-stage diagnostics:
 RUST_LOG=info WH_DEBUG=1 cargo work health
+# Machine-readable output (recommended for AI agents):
+cargo work health --json
 ```
 
 Admins can manage users and public keys with the `admin` subcommand:

diff --git a/README.md b/README.md
@@ -32,6 +32,12 @@ n. 驮马，做粗工者，重负荷机器
 
 如果你在扩展工作流，请在新增或重命名 skill 后同步更新 `AGENTS.md` 的 Project Skills 列表，保持入口文档和实际目录一致。
 
+### AI Agent 入口（Claude Code / Codex）
+
+- 统一入口：`AI_AGENT.md`
+- 机器可读技能索引：`skills/index.json`
+- 标准任务配方：`docs/agent-playbooks.md`
+
 ### 支持的平台
 
 - Linux
@@ -265,6 +271,8 @@ cargo work health
 RUST_LOG=info cargo work health
 # 排障时可附加 trace 流：
 RUST_LOG=info WH_DEBUG=1 cargo work health
+# 机器可读输出（推荐给 AI Agent）：
+cargo work health --json
 ```
 
 管理员可以使用 `admin` 子命令管理用户和公钥：

diff --git a/cargo-work/src/command/health.rs b/cargo-work/src/command/health.rs
@@ -3,6 +3,7 @@ use crate::options::HealthOptions;
 use color_eyre::eyre::WrapErr;
 use color_eyre::eyre::{anyhow, ContextCompat, Result};
 use git2::Repository;
+use serde_json::json;
 use stable::data::v2::{self, Body};
 use std::path::Path;
 use tokio::io::AsyncWriteExt;
@@ -14,7 +15,7 @@ pub async fn run(sk: &Path, mut options: HealthOptions) -> Result<()> {
     super::log_stage(&trace_id, action, "resolve.start");
     let repo = Repository::discover(".")?;
 
-    if let Some(remote) = options.remote {
+    if let Some(remote) = options.host {
         options.horse.remote.replace(remote);
     }
 
@@ -38,7 +39,9 @@ pub async fn run(sk: &Path, mut options: HealthOptions) -> Result<()> {
         match call_health_once(sk, &options.horse, host, &trace_id, Body::HealthCheckV2).await {
             Ok(body) => body,
             Err(err) => {
-                tracing::warn!("health v2 失败, 回退到 v1: {}", err);
+                if !options.json {
+                    tracing::warn!("health v2 失败, 回退到 v1: {}", err);
+                }
                 call_health_once(sk, &options.horse, host, &trace_id, Body::HealthCheck).await?
             }
         };
@@ -52,25 +55,49 @@ pub async fn run(sk: &Path, mut options: HealthOptions) -> Result<()> {
             family,
             default_shell,
         } => {
-            tracing::info!("Health OK.");
-            tracing::info!("Server version: {} ({})", version, commit);
-            tracing::info!("Server OS: {} / {} ({})", os, arch, family);
-            tracing::info!(
-                "Server default shell: {}",
-                default_shell.unwrap_or_else(|| "unknown".to_string())
-            );
-            if let Some(lim) = ulimit {
-                tracing::info!("Server ulimit -n: {}", lim);
+            if options.json {
+                let out = json!({
+                    "status": "ok",
+                    "protocol": "v2",
+                    "version": version,
+                    "commit": commit,
+                    "os": os,
+                    "arch": arch,
+                    "family": family,
+                    "default_shell": default_shell.unwrap_or_else(|| "unknown".to_string()),
+                    "ulimit_nofile": ulimit,
+                });
+                println!("{}", serde_json::to_string_pretty(&out)?);
             } else {
-                tracing::info!("Server ulimit -n: unknown");
+                tracing::info!("Health OK.");
+                tracing::info!("Server version: {} ({})", version, commit);
+                tracing::info!("Server OS: {} / {} ({})", os, arch, family);
+                tracing::info!(
+                    "Server default shell: {}",
+                    default_shell.unwrap_or_else(|| "unknown".to_string())
+                );
+                if let Some(lim) = ulimit {
+                    tracing::info!("Server ulimit -n: {}", lim);
+                } else {
+                    tracing::info!("Server ulimit -n: unknown");
+                }
             }
         }
         Body::HealthStatus { ulimit } => {
-            tracing::info!("Health OK (legacy).");
-            if let Some(lim) = ulimit {
-                tracing::info!("Server ulimit -n: {}", lim);
+            if options.json {
+                let out = json!({
+                    "status": "ok",
+                    "protocol": "v1",
+                    "ulimit_nofile": ulimit,
+                });
+                println!("{}", serde_json::to_string_pretty(&out)?);
             } else {
-                tracing::info!("Server ulimit -n: unknown");
+                tracing::info!("Health OK (legacy).");
+                if let Some(lim) = ulimit {
+                    tracing::info!("Server ulimit -n: {}", lim);
+                } else {
+                    tracing::info!("Server ulimit -n: unknown");
+                }
             }
         }
         _ => {

diff --git a/cargo-work/src/command/ping.rs b/cargo-work/src/command/ping.rs
@@ -16,7 +16,7 @@ pub async fn run(sk: &Path, mut options: PingOptions) -> Result<()> {
     let repo = Repository::discover(".")?;
     let head = repo.head()?;
 
-    if let Some(remote) = options.remote {
+    if let Some(remote) = options.host {
         // arg comes first
         options.horse.remote.replace(remote);
     }

diff --git a/cargo-work/src/main.rs b/cargo-work/src/main.rs
@@ -219,7 +219,7 @@ async fn main() -> Result<()> {
                 let options = PingOptions {
                     horse: horse.clone(),
                     count: Some(3),
-                    remote: None,
+                    host: None,
                 };
                 if let Err(err) = ping::run(&key, options).await {
                     tracing::error!("执行失败: {}", err);

diff --git a/cargo-work/src/options.rs b/cargo-work/src/options.rs
@@ -230,7 +230,8 @@ pub struct PingOptions {
     pub horse: HorseOptions,
     #[clap(short, long, help = "指定次数")]
     pub count: Option<u32>,
-    pub remote: Option<String>,
+    #[clap(value_name = "REMOTE")]
+    pub host: Option<String>,
 }
 
 #[derive(Clone, Debug, Args)]
@@ -261,7 +262,10 @@ pub struct JustOptions {
 pub struct HealthOptions {
     #[clap(flatten)]
     pub horse: HorseOptions,
-    pub remote: Option<String>,
+    #[clap(value_name = "REMOTE")]
+    pub host: Option<String>,
+    #[clap(long, help = "以 JSON 格式输出健康信息")]
+    pub json: bool,
 }
 
 #[derive(Clone, Debug, Args)]

diff --git a/docs/agent-playbooks.md b/docs/agent-playbooks.md
@@ -0,0 +1,80 @@
+# Agent Playbooks
+
+## 1) Remote Build Playbook
+
+### Preconditions
+- Target server is resolvable via `--repo`, `--repo-name`, or git remote `horsed`.
+- SSH key is available (`--ssh-key` or default key path).
+
+### Steps
+1. `cargo work ping --count 1`
+2. `cargo work build --release` (or `cargo work test` for test tasks)
+3. If needed, attach output: `cargo work job list` then `cargo work job attach <job_id> -f`
+
+### Fallback
+- If repo/host resolve fails: provide `--repo ssh://git@HOST:2222/ns/repo.git`.
+- If command hangs: retry with `RUST_LOG=info WH_DEBUG=1` to collect staged traces.
+
+### Acceptance Signals
+- Exit code is `0`.
+- Output contains successful completion from Cargo.
+
+## 2) Horsed Deploy Playbook
+
+### Preconditions
+- Remote branch is up to date.
+- Explicit confirmation if service restart is required.
+
+### Steps (Linux/macOS)
+1. `just install-work`
+2. `HORSED_SHELL=/bin/bash cargo work just install-horsed`
+3. `HORSED_SHELL=/bin/bash cargo work -- systemctl --user restart horsed`
+4. `cargo work health --json`
+
+### Steps (Windows)
+1. `just install-work`
+2. `HORSED_SHELL=powershell.exe cargo work just deploy-horsed`
+3. `cargo work health --json`
+
+### Fallback
+- If `nu` is missing on server, use `/bin/bash`, `/bin/sh`, or `powershell.exe`.
+- If post-restart health fails, inspect: `cargo work logs -f`.
+
+### Acceptance Signals
+- `health --json` returns parseable JSON with `status: "ok"`.
+
+## 3) Artifact Retrieval Playbook
+
+### Preconditions
+- Remote build already completed.
+
+### Steps
+1. Retrieve a file: `cargo work get target/release/<artifact> -f`
+2. Retrieve a directory: `cargo work get target -f`
+3. Alternative stream copy: `cargo work scp <remote_file> <local_file>`
+
+### Fallback
+- If local file exists and retrieval fails, use `-f` or `--outfile`.
+
+### Acceptance Signals
+- Retrieved path exists locally.
+- Artifact checksum/size matches expected output (when available).
+
+## 4) Health/Logs Troubleshooting Playbook
+
+### Preconditions
+- Server is reachable.
+
+### Steps
+1. `cargo work ping --count 3`
+2. `cargo work health --json`
+3. `cargo work logs` (or `cargo work logs -f`)
+4. `cargo work job list`
+
+### Fallback
+- If health output seems empty in normal mode: `RUST_LOG=info cargo work health`
+- For deeper traces: `RUST_LOG=info WH_DEBUG=1 cargo work health`
+
+### Acceptance Signals
+- JSON includes stable fields: `status`, `protocol`, `ulimit_nofile`.
+- Logs show expected service state transitions.
diff --git a/horsed/src/ssh/mod.rs b/horsed/src/ssh/mod.rs
@@ -883,10 +883,18 @@ impl AppServer {
                                         // 渐进退避: 前几次快速重试, 之后逐渐放慢
                                         let wait = match idle_count {
                                             1..=3 => tokio::task::yield_now().await,
-                                            4..=20 => tokio::time::sleep(
-                                                std::time::Duration::from_micros(100)).await,
-                                            _ => tokio::time::sleep(
-                                                std::time::Duration::from_millis(1)).await,
+                                            4..=20 => {
+                                                tokio::time::sleep(
+                                                    std::time::Duration::from_micros(100),
+                                                )
+                                                .await
+                                            }
+                                            _ => {
+                                                tokio::time::sleep(
+                                                    std::time::Duration::from_millis(1),
+                                                )
+                                                .await
+                                            }
                                         };
                                     }
                                     Ok(buf) => {

diff --git a/justfile b/justfile
@@ -38,3 +38,7 @@ changes:
 get-release:
   cargo work get ./target/release/cargo-work -f
   cargo work get ./target/release/horsed -f
+
+agent-check:
+  @python3 scripts/check-agent-docs.py
+  @python3 scripts/check-agent-regression.py