From d6db9a3db9a9fed257a5f61bbb43e4cd86ac47c3 Mon Sep 17 00:00:00 2001 From: George Date: Wed, 28 Jan 2026 17:54:51 +0200 Subject: [PATCH 01/23] Make subagent model customizable --- cli/src/commands/agent/run/mcp_init.rs | 7 +- cli/src/commands/agent/run/mode_async.rs | 1 + .../commands/agent/run/mode_interactive.rs | 1 + cli/src/main.rs | 8 +-- cli/subagents.toml | 38 ++++------- libs/mcp/server/src/lib.rs | 30 ++++++--- libs/mcp/server/src/subagent_tools.rs | 5 ++ libs/shared/src/models/subagent.rs | 66 +++++++++++++++++++ 8 files changed, 118 insertions(+), 38 deletions(-) diff --git a/cli/src/commands/agent/run/mcp_init.rs b/cli/src/commands/agent/run/mcp_init.rs index 843f95dd..357b1bd1 100644 --- a/cli/src/commands/agent/run/mcp_init.rs +++ b/cli/src/commands/agent/run/mcp_init.rs @@ -17,6 +17,7 @@ use stakpak_mcp_proxy::server::start_proxy_server; use stakpak_mcp_server::{EnabledToolsConfig, MCPServerConfig, ToolMode, start_server}; use stakpak_shared::cert_utils::CertificateChain; use stakpak_shared::models::integrations::openai::ToolCallResultProgress; +use stakpak_shared::models::subagent::SubagentConfigs; use std::collections::HashMap; use std::sync::Arc; use tokio::net::TcpListener; @@ -34,6 +35,8 @@ pub struct McpInitConfig { pub enabled_tools: EnabledToolsConfig, /// Whether to enable mTLS for secure communication pub enable_mtls: bool, + /// Subagent configurations + pub subagent_configs: Option, } impl Default for McpInitConfig { @@ -43,6 +46,7 @@ impl Default for McpInitConfig { privacy_mode: false, enabled_tools: EnabledToolsConfig { slack: false }, enable_mtls: true, + subagent_configs: None, } } } @@ -126,6 +130,7 @@ async fn start_mcp_server( let redact_secrets = mcp_config.redact_secrets; let privacy_mode = mcp_config.privacy_mode; let enabled_tools = mcp_config.enabled_tools.clone(); + let subagent_configs = mcp_config.subagent_configs.clone(); tokio::spawn(async move { let server_config = MCPServerConfig { @@ -135,7 +140,7 @@ async fn start_mcp_server( privacy_mode, enabled_tools, tool_mode: ToolMode::Combined, - subagent_configs: None, + subagent_configs, certificate_chain: cert_chain, }; diff --git a/cli/src/commands/agent/run/mode_async.rs b/cli/src/commands/agent/run/mode_async.rs index f4293a11..abe1f956 100644 --- a/cli/src/commands/agent/run/mode_async.rs +++ b/cli/src/commands/agent/run/mode_async.rs @@ -60,6 +60,7 @@ pub async fn run_async(ctx: AppConfig, config: RunAsyncConfig) -> Result<(), Str privacy_mode: config.privacy_mode, enabled_tools: config.enabled_tools.clone(), enable_mtls: config.enable_mtls, + subagent_configs: config.subagent_configs.clone(), }; let mcp_init_result = initialize_mcp_server_and_tools(&ctx, mcp_init_config, None).await?; let mcp_client = mcp_init_result.client; diff --git a/cli/src/commands/agent/run/mode_interactive.rs b/cli/src/commands/agent/run/mode_interactive.rs index 80a8d78f..e06b3ce5 100644 --- a/cli/src/commands/agent/run/mode_interactive.rs +++ b/cli/src/commands/agent/run/mode_interactive.rs @@ -189,6 +189,7 @@ pub async fn run_interactive( privacy_mode, enabled_tools: enabled_tools.clone(), enable_mtls, + subagent_configs: subagent_configs.clone(), }; let (mcp_client, mcp_tools, _tools, _server_shutdown_tx, _proxy_shutdown_tx) = match mcp_init::initialize_mcp_server_and_tools( diff --git a/cli/src/main.rs b/cli/src/main.rs index 553bab4f..9ec64ac9 100644 --- a/cli/src/main.rs +++ b/cli/src/main.rs @@ -94,9 +94,9 @@ struct Cli { #[arg(long = "disable-mcp-mtls", default_value_t = false)] disable_mcp_mtls: bool, - /// Enable subagents - #[arg(long = "enable-subagents", default_value_t = false)] - enable_subagents: bool, + /// Disable subagents + #[arg(long = "disable-subagents", default_value_t = false)] + disable_subagents: bool, /// Subagent configuration file subagents.toml #[arg(long = "subagent-config")] @@ -331,7 +331,7 @@ async fn main() { let _ = update_result; let rulebooks = rulebooks_result; - let subagent_configs = if cli.enable_subagents { + let subagent_configs = if !cli.disable_subagents { if let Some(subagent_config_path) = &cli.subagent_config_path { SubagentConfigs::load_from_file(subagent_config_path) .map_err(|e| { diff --git a/cli/subagents.toml b/cli/subagents.toml index 0f0c1b4e..8084fbee 100644 --- a/cli/subagents.toml +++ b/cli/subagents.toml @@ -1,34 +1,23 @@ -[subagents.ResearchAgent] +[subagents.DiscoveryAgent] description = """ -Fast research agent for code exploration, documentation lookup, and analysis. Ideal for understanding codebases, searching documentation, and gathering information without executing commands. Uses read-only tools for quick insights. -""" -max_steps = 20 -allowed_tools = [ - "view", - "local_code_search", - "search_docs", - "search_memory", - "read_rulebook", -] - -[subagents.SandboxResearchAgent] -description = """ -Secure research agent that combines code analysis with safe command execution in an isolated container environment. -Perfect for tasks requiring system inspection, file analysis, or running diagnostic commands while maintaining security. +Research and Discovery agent with safe command execution in an isolated read-only environment. +Perfect for tasks requiring system inspection, file analysis, or running diagnostic commands while maintaining safety. Includes read-only access to cloud credentials (AWS, GCP, Azure, DigitalOcean) and your working directory. -Note: Slower than ResearchAgent due to containerization overhead, but provides additional command execution capabilities. +Tools: stakpak__run_command, stakpak__view, stakpak__search_docs, stakpak__search_memory, stakpak__read_rulebook, stakpak__paks__search_paks, stakpak__paks__get_pak_content """ +model = "eco" max_steps = 30 allowed_tools = [ - "run_command", - "view", - "local_code_search", - "search_docs", - "search_memory", - "read_rulebook", + "stakpak__run_command", + "stakpak__view", + "stakpak__search_docs", + "stakpak__search_memory", + "stakpak__read_rulebook", + "stakpak__paks__search_paks", + "stakpak__paks__get_pak_content", ] -[subagents.SandboxResearchAgent.warden] +[subagents.DiscoveryAgent.warden] enabled = true volumes = [ # stakpak config @@ -36,6 +25,7 @@ volumes = [ # working directory "./:/agent:ro", + "./.stakpak:/agent/.stakpak", # cloud credentials "~/.aws:/home/agent/.aws:ro", diff --git a/libs/mcp/server/src/lib.rs b/libs/mcp/server/src/lib.rs index 4815456c..7fbc7e3e 100644 --- a/libs/mcp/server/src/lib.rs +++ b/libs/mcp/server/src/lib.rs @@ -226,21 +226,33 @@ fn build_tool_container( task_manager_handle: Arc, ) -> Result { let tool_container = match config.tool_mode { - ToolMode::LocalOnly => ToolContainer::new( - None, - config.redact_secrets, - config.privacy_mode, - config.enabled_tools.clone(), - task_manager_handle.clone(), - config.subagent_configs.clone(), - ToolContainer::tool_router_local(), - ), + ToolMode::LocalOnly => { + let mut tool_router = ToolContainer::tool_router_local(); + + if config.subagent_configs.is_some() { + tool_router += ToolContainer::tool_router_subagent(); + } + + ToolContainer::new( + None, + config.redact_secrets, + config.privacy_mode, + config.enabled_tools.clone(), + task_manager_handle.clone(), + config.subagent_configs.clone(), + tool_router, + ) + } ToolMode::RemoteOnly => { let mut tool_router = ToolContainer::tool_router_remote(); if config.enabled_tools.slack { tool_router += ToolContainer::tool_router_slack(); } + if config.subagent_configs.is_some() { + tool_router += ToolContainer::tool_router_subagent(); + } + ToolContainer::new( config.client.clone(), config.redact_secrets, diff --git a/libs/mcp/server/src/subagent_tools.rs b/libs/mcp/server/src/subagent_tools.rs index d3248cd3..ecb4c52a 100644 --- a/libs/mcp/server/src/subagent_tools.rs +++ b/libs/mcp/server/src/subagent_tools.rs @@ -132,6 +132,11 @@ The subagent runs asynchronously in the background. This tool returns immediatel prompt_file_path, subagent_config.max_steps ); + // Add model flag if specified + if let Some(model) = &subagent_config.model { + command.push_str(&format!(" --model {}", model)); + } + for tool in &subagent_config.allowed_tools { command.push_str(&format!(" -t {}", tool)); } diff --git a/libs/shared/src/models/subagent.rs b/libs/shared/src/models/subagent.rs index 488e76cd..f03c4ce7 100644 --- a/libs/shared/src/models/subagent.rs +++ b/libs/shared/src/models/subagent.rs @@ -14,6 +14,9 @@ pub struct SubagentConfig { pub description: String, pub max_steps: usize, pub allowed_tools: Vec, + /// Model to use for the subagent (e.g., "eco", "smart") + #[serde(default)] + pub model: Option, #[serde(default)] pub warden: Option, } @@ -67,3 +70,66 @@ impl SubagentConfigs { } } } + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_parse_subagent_config_with_model() { + let content = r#" +[subagents.DiscoveryAgent] +description = "Test agent" +model = "eco" +max_steps = 30 +allowed_tools = ["stakpak__run_command", "stakpak__view"] + +[subagents.DiscoveryAgent.warden] +enabled = true +volumes = ["./:/agent:ro"] +"#; + let configs = SubagentConfigs::load_from_str(content) + .expect("Failed to parse subagent config"); + + let agent = configs.get_config("DiscoveryAgent") + .expect("DiscoveryAgent not found"); + + assert_eq!(agent.model, Some("eco".to_string())); + assert_eq!(agent.max_steps, 30); + assert_eq!(agent.allowed_tools, vec!["stakpak__run_command", "stakpak__view"]); + assert!(agent.warden.is_some()); + } + + #[test] + fn test_parse_subagent_config_without_model() { + let content = r#" +[subagents.BasicAgent] +description = "Agent without model" +max_steps = 10 +allowed_tools = ["stakpak__view"] +"#; + let configs = SubagentConfigs::load_from_str(content) + .expect("Failed to parse subagent config"); + + let agent = configs.get_config("BasicAgent") + .expect("BasicAgent not found"); + + assert_eq!(agent.model, None); + assert_eq!(agent.max_steps, 10); + } + + #[test] + fn test_parse_default_subagents_toml() { + // Test parsing the actual default config + let content = include_str!("../../../../cli/subagents.toml"); + let configs = SubagentConfigs::load_from_str(content) + .expect("Failed to parse default subagents.toml"); + + let discovery = configs.get_config("DiscoveryAgent") + .expect("DiscoveryAgent not found in default config"); + + assert_eq!(discovery.model, Some("eco".to_string())); + assert!(discovery.allowed_tools.contains(&"stakpak__run_command".to_string())); + assert!(discovery.allowed_tools.contains(&"stakpak__view".to_string())); + } +} From ecece7e5a3789dbe65795ffc9109083053bfa797 Mon Sep 17 00:00:00 2001 From: George Date: Tue, 3 Feb 2026 17:29:37 +0200 Subject: [PATCH 02/23] Add session id namespaced subagent temp data --- libs/mcp/server/src/subagent_tools.rs | 53 ++++++++++++++++----------- 1 file changed, 31 insertions(+), 22 deletions(-) diff --git a/libs/mcp/server/src/subagent_tools.rs b/libs/mcp/server/src/subagent_tools.rs index ecb4c52a..29a7ff95 100644 --- a/libs/mcp/server/src/subagent_tools.rs +++ b/libs/mcp/server/src/subagent_tools.rs @@ -2,8 +2,8 @@ use std::path::Path; use crate::tool_container::ToolContainer; use rmcp::{ - ErrorData as McpError, handler::server::wrapper::Parameters, model::*, schemars, tool, - tool_router, + ErrorData as McpError, RoleServer, handler::server::wrapper::Parameters, model::*, schemars, + service::RequestContext, tool, tool_router, }; use serde::{Deserialize, Serialize}; use serde_json::json; @@ -58,21 +58,24 @@ The subagent runs asynchronously in the background. This tool returns immediatel )] pub async fn subagent_task( &self, + ctx: RequestContext, Parameters(TaskRequest { description, prompt, subagent_type, }): Parameters, ) -> Result { - let subagent_command = match self.build_subagent_command(&prompt, &subagent_type) { - Ok(command) => command, - Err(e) => { - return Ok(CallToolResult::error(vec![Content::text(format!( - "COMMAND_BUILD_FAILED: Failed to build subagent command: {}", - e - ))])); - } - }; + let session_id = self.get_session_id(&ctx); + let subagent_command = + match self.build_subagent_command(&prompt, &subagent_type, session_id.as_deref()) { + Ok(command) => command, + Err(e) => { + return Ok(CallToolResult::error(vec![Content::text(format!( + "COMMAND_BUILD_FAILED: Failed to build subagent command: {}", + e + ))])); + } + }; // Start the subagent as a background task using existing task manager let task_info = match self @@ -99,6 +102,7 @@ The subagent runs asynchronously in the background. This tool returns immediatel &self, prompt: &str, subagent_type: &str, + session_id: Option<&str>, ) -> Result { let subagent_config = if let Some(subagent_configs) = self.get_subagent_configs() { subagent_configs.get_config(subagent_type) @@ -113,19 +117,24 @@ The subagent runs asynchronously in the background. This tool returns immediatel })?; let prompt_filename = format!("prompt_{}.txt", Uuid::new_v4()); - let prompt_file_path = LocalStore::write_session_data( - Path::new("subagents") + let prompt_subpath = match session_id { + Some(sid) => Path::new(sid) + .join("subagents") .join(&prompt_filename) .to_string_lossy() - .as_ref(), - prompt, - ) - .map_err(|e| { - McpError::internal_error( - "Failed to create prompt file", - Some(json!({"error": e.to_string()})), - ) - })?; + .to_string(), + None => Path::new("subagents") + .join(&prompt_filename) + .to_string_lossy() + .to_string(), + }; + let prompt_file_path = + LocalStore::write_session_data(&prompt_subpath, prompt).map_err(|e| { + McpError::internal_error( + "Failed to create prompt file", + Some(json!({"error": e.to_string()})), + ) + })?; let mut command = format!( r#"stakpak -a --prompt-file {} --max-steps {}"#, From 867d0d2137ca18520d8a82467c75905948107330 Mon Sep 17 00:00:00 2001 From: George Date: Tue, 3 Feb 2026 17:30:29 +0200 Subject: [PATCH 03/23] Add a sanboxed version of the discovery subagent --- cli/subagents.toml | 24 ++++++++++++++++++++++-- 1 file changed, 22 insertions(+), 2 deletions(-) diff --git a/cli/subagents.toml b/cli/subagents.toml index 8084fbee..ed54be4a 100644 --- a/cli/subagents.toml +++ b/cli/subagents.toml @@ -1,9 +1,26 @@ [subagents.DiscoveryAgent] description = """ +Fast Research and Discovery agent with read-only tools. +Perfect for tasks requiring filesystem exploration, knowledge base search, and web research. +Delagate research and discovery tasks to this agent. +""" +model = "eco" +max_steps = 30 +allowed_tools = [ + "stakpak__view", + "stakpak__search_docs", + "stakpak__search_memory", + "stakpak__read_rulebook", + "stakpak__paks__search_paks", + "stakpak__paks__get_pak_content", +] + +[subagents.SandboxDiscoveryAgent] +description = """ Research and Discovery agent with safe command execution in an isolated read-only environment. Perfect for tasks requiring system inspection, file analysis, or running diagnostic commands while maintaining safety. Includes read-only access to cloud credentials (AWS, GCP, Azure, DigitalOcean) and your working directory. -Tools: stakpak__run_command, stakpak__view, stakpak__search_docs, stakpak__search_memory, stakpak__read_rulebook, stakpak__paks__search_paks, stakpak__paks__get_pak_content +This agent is slower than the regular DiscoveryAgent, but can run read-only commands. """ model = "eco" max_steps = 30 @@ -17,11 +34,13 @@ allowed_tools = [ "stakpak__paks__get_pak_content", ] -[subagents.DiscoveryAgent.warden] +[subagents.SandboxDiscoveryAgent.warden] enabled = true volumes = [ # stakpak config "~/.stakpak/config.toml:/home/agent/.stakpak/config.toml:ro", + "~/.stakpak/data/local.db:/home/agent/.stakpak/data/local.db", + "~/.agent-board/data.db:/home/agent/.agent-board/data.db", # working directory "./:/agent:ro", @@ -34,3 +53,4 @@ volumes = [ "~/.azure:/home/agent/.azure:ro", "~/.kube:/home/agent/.kube:ro", ] + From 3ae45294c53c21c1cb8ae0cb42698536eaf60e6d Mon Sep 17 00:00:00 2001 From: George Date: Tue, 3 Feb 2026 17:31:01 +0200 Subject: [PATCH 04/23] Add agentboard and local state data to warden write whitelist --- cli/src/config/warden.rs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/cli/src/config/warden.rs b/cli/src/config/warden.rs index 06aa677a..c11cb2da 100644 --- a/cli/src/config/warden.rs +++ b/cli/src/config/warden.rs @@ -20,6 +20,8 @@ impl WardenConfig { volumes: vec![ "~/.stakpak/config.toml:/home/agent/.stakpak/config.toml:ro".to_string(), "~/.stakpak/auth.toml:/home/agent/.stakpak/auth.toml:ro".to_string(), + "~/.stakpak/data/local.db:/home/agent/.stakpak/data/local.db".to_string(), + "~/.agent-board/data.db:/home/agent/.agent-board/data.db".to_string(), "./:/agent:ro".to_string(), "./.stakpak:/agent/.stakpak".to_string(), "~/.aws:/home/agent/.aws:ro".to_string(), From c06e0426252a7096ece7c719e18b785b71f2a34b Mon Sep 17 00:00:00 2001 From: George Date: Tue, 3 Feb 2026 17:31:55 +0200 Subject: [PATCH 05/23] Make sure allowed tools usage is consistent across modes --- cli/src/commands/agent/run/mcp_init.rs | 10 +++++++++- cli/src/commands/agent/run/mode_async.rs | 13 +++---------- cli/src/commands/agent/run/mode_interactive.rs | 8 ++++---- 3 files changed, 16 insertions(+), 15 deletions(-) diff --git a/cli/src/commands/agent/run/mcp_init.rs b/cli/src/commands/agent/run/mcp_init.rs index 357b1bd1..a40f56be 100644 --- a/cli/src/commands/agent/run/mcp_init.rs +++ b/cli/src/commands/agent/run/mcp_init.rs @@ -37,6 +37,8 @@ pub struct McpInitConfig { pub enable_mtls: bool, /// Subagent configurations pub subagent_configs: Option, + /// Optional list of allowed tool names (filters tools if specified) + pub allowed_tools: Option>, } impl Default for McpInitConfig { @@ -47,6 +49,7 @@ impl Default for McpInitConfig { enabled_tools: EnabledToolsConfig { slack: false }, enable_mtls: true, subagent_configs: None, + allowed_tools: None, } } } @@ -328,7 +331,12 @@ pub async fn initialize_mcp_server_and_tools( .await .map_err(|e| format!("Failed to get tools: {}", e))?; - let tools = convert_tools_with_filter(&mcp_tools, app_config.allowed_tools.as_ref()); + // Use allowed_tools from mcp_config if provided, otherwise fall back to app_config + let allowed_tools_ref = mcp_config + .allowed_tools + .as_ref() + .or(app_config.allowed_tools.as_ref()); + let tools = convert_tools_with_filter(&mcp_tools, allowed_tools_ref); Ok(McpInitResult { client: mcp_client, diff --git a/cli/src/commands/agent/run/mode_async.rs b/cli/src/commands/agent/run/mode_async.rs index d8d6bab3..49baa3c9 100644 --- a/cli/src/commands/agent/run/mode_async.rs +++ b/cli/src/commands/agent/run/mode_async.rs @@ -58,6 +58,7 @@ pub async fn run_async(ctx: AppConfig, config: RunAsyncConfig) -> Result<(), Str enabled_tools: config.enabled_tools.clone(), enable_mtls: config.enable_mtls, subagent_configs: config.subagent_configs.clone(), + allowed_tools: config.allowed_tools.clone(), }; let mcp_init_result = initialize_mcp_server_and_tools(&ctx, mcp_init_config, None).await?; let mcp_client = mcp_init_result.client; @@ -65,16 +66,8 @@ pub async fn run_async(ctx: AppConfig, config: RunAsyncConfig) -> Result<(), Str let server_shutdown_tx = mcp_init_result.server_shutdown_tx; let proxy_shutdown_tx = mcp_init_result.proxy_shutdown_tx; - // Filter tools if allowed_tools is specified - let tools = if let Some(allowed) = &config.allowed_tools { - mcp_init_result - .tools - .into_iter() - .filter(|t| allowed.contains(&t.function.name)) - .collect() - } else { - mcp_init_result.tools - }; + // Tools are already filtered by initialize_mcp_server_and_tools + let tools = mcp_init_result.tools; // Build unified AgentClient config let providers = ctx.get_llm_provider_config(); diff --git a/cli/src/commands/agent/run/mode_interactive.rs b/cli/src/commands/agent/run/mode_interactive.rs index e06b3ce5..a499a59d 100644 --- a/cli/src/commands/agent/run/mode_interactive.rs +++ b/cli/src/commands/agent/run/mode_interactive.rs @@ -4,7 +4,7 @@ use crate::commands::agent::run::checkpoint::{ get_checkpoint_messages, resume_session_from_checkpoint, }; use crate::commands::agent::run::helpers::{ - add_agents_md, add_local_context, add_rulebooks, add_subagents, convert_tools_with_filter, + add_agents_md, add_local_context, add_rulebooks, add_subagents, refresh_billing_info, tool_call_history_string, tool_result, user_message, }; use crate::commands::agent::run::mcp_init; @@ -190,8 +190,10 @@ pub async fn run_interactive( enabled_tools: enabled_tools.clone(), enable_mtls, subagent_configs: subagent_configs.clone(), + allowed_tools: allowed_tools_for_tui.clone(), }; - let (mcp_client, mcp_tools, _tools, _server_shutdown_tx, _proxy_shutdown_tx) = + // Tools are already filtered by initialize_mcp_server_and_tools (same as async mode) + let (mcp_client, mcp_tools, tools, _server_shutdown_tx, _proxy_shutdown_tx) = match mcp_init::initialize_mcp_server_and_tools( &ctx_clone, mcp_init_config, @@ -215,8 +217,6 @@ pub async fn run_interactive( } }; - let tools = convert_tools_with_filter(&mcp_tools, allowed_tools_for_tui.as_ref()); - let data = client.get_my_account().await?; send_input_event(&input_tx, InputEvent::GetStatus(data.to_text())).await?; From a1c690d98aa02ea8ecf3c31028a40f2a77ab4009 Mon Sep 17 00:00:00 2001 From: George Date: Tue, 3 Feb 2026 18:04:16 +0200 Subject: [PATCH 06/23] Remove sandbox subagent --- cli/subagents.toml | 73 +++++++++++++++++++++++----------------------- 1 file changed, 36 insertions(+), 37 deletions(-) diff --git a/cli/subagents.toml b/cli/subagents.toml index ed54be4a..839132ae 100644 --- a/cli/subagents.toml +++ b/cli/subagents.toml @@ -1,6 +1,6 @@ [subagents.DiscoveryAgent] description = """ -Fast Research and Discovery agent with read-only tools. +Fast Research and Discovery agent with read-only tools. Perfect for tasks requiring filesystem exploration, knowledge base search, and web research. Delagate research and discovery tasks to this agent. """ @@ -15,42 +15,41 @@ allowed_tools = [ "stakpak__paks__get_pak_content", ] -[subagents.SandboxDiscoveryAgent] -description = """ -Research and Discovery agent with safe command execution in an isolated read-only environment. -Perfect for tasks requiring system inspection, file analysis, or running diagnostic commands while maintaining safety. -Includes read-only access to cloud credentials (AWS, GCP, Azure, DigitalOcean) and your working directory. -This agent is slower than the regular DiscoveryAgent, but can run read-only commands. -""" -model = "eco" -max_steps = 30 -allowed_tools = [ - "stakpak__run_command", - "stakpak__view", - "stakpak__search_docs", - "stakpak__search_memory", - "stakpak__read_rulebook", - "stakpak__paks__search_paks", - "stakpak__paks__get_pak_content", -] - -[subagents.SandboxDiscoveryAgent.warden] -enabled = true -volumes = [ - # stakpak config - "~/.stakpak/config.toml:/home/agent/.stakpak/config.toml:ro", - "~/.stakpak/data/local.db:/home/agent/.stakpak/data/local.db", - "~/.agent-board/data.db:/home/agent/.agent-board/data.db", +# [subagents.SandboxDiscoveryAgent] +# description = """ +# Research and Discovery agent with safe command execution in an isolated read-only environment. +# Perfect for tasks requiring system inspection, file analysis, or running diagnostic commands while maintaining safety. +# Includes read-only access to cloud credentials (AWS, GCP, Azure, DigitalOcean) and your working directory. +# This agent is slower than the regular DiscoveryAgent, but can run read-only commands. +# """ +# model = "eco" +# max_steps = 30 +# allowed_tools = [ +# "stakpak__run_command", +# "stakpak__view", +# "stakpak__search_docs", +# "stakpak__search_memory", +# "stakpak__read_rulebook", +# "stakpak__paks__search_paks", +# "stakpak__paks__get_pak_content", +# ] - # working directory - "./:/agent:ro", - "./.stakpak:/agent/.stakpak", +# [subagents.SandboxDiscoveryAgent.warden] +# enabled = true +# volumes = [ +# # stakpak config +# "~/.stakpak/config.toml:/home/agent/.stakpak/config.toml:ro", +# "~/.stakpak/data/local.db:/home/agent/.stakpak/data/local.db", +# "~/.agent-board/data.db:/home/agent/.agent-board/data.db", - # cloud credentials - "~/.aws:/home/agent/.aws:ro", - "~/.config/gcloud:/home/agent/.config/gcloud:ro", - "~/.digitalocean:/home/agent/.digitalocean:ro", - "~/.azure:/home/agent/.azure:ro", - "~/.kube:/home/agent/.kube:ro", -] +# # working directory +# "./:/agent:ro", +# "./.stakpak:/agent/.stakpak", +# # cloud credentials +# "~/.aws:/home/agent/.aws:ro", +# "~/.config/gcloud:/home/agent/.config/gcloud:ro", +# "~/.digitalocean:/home/agent/.digitalocean:ro", +# "~/.azure:/home/agent/.azure:ro", +# "~/.kube:/home/agent/.kube:ro", +# ] From f85014b56165e573834d55084acf8893d12bac49 Mon Sep 17 00:00:00 2001 From: George Date: Tue, 3 Feb 2026 08:53:36 -0800 Subject: [PATCH 07/23] fix: cargo fmt --- .../commands/agent/run/mode_interactive.rs | 4 +- libs/shared/src/models/subagent.rs | 46 ++++++++++++------- 2 files changed, 32 insertions(+), 18 deletions(-) diff --git a/cli/src/commands/agent/run/mode_interactive.rs b/cli/src/commands/agent/run/mode_interactive.rs index b02bd0de..ef387e8a 100644 --- a/cli/src/commands/agent/run/mode_interactive.rs +++ b/cli/src/commands/agent/run/mode_interactive.rs @@ -4,8 +4,8 @@ use crate::commands::agent::run::checkpoint::{ get_checkpoint_messages, resume_session_from_checkpoint, }; use crate::commands::agent::run::helpers::{ - add_agents_md, add_local_context, add_rulebooks, add_subagents, - refresh_billing_info, tool_call_history_string, tool_result, user_message, + add_agents_md, add_local_context, add_rulebooks, add_subagents, refresh_billing_info, + tool_call_history_string, tool_result, user_message, }; use crate::commands::agent::run::mcp_init; use crate::commands::agent::run::renderer::{OutputFormat, OutputRenderer}; diff --git a/libs/shared/src/models/subagent.rs b/libs/shared/src/models/subagent.rs index 26988c83..d85c0b89 100644 --- a/libs/shared/src/models/subagent.rs +++ b/libs/shared/src/models/subagent.rs @@ -85,15 +85,19 @@ allowed_tools = ["stakpak__run_command", "stakpak__view"] enabled = true volumes = ["./:/agent:ro"] "#; - let configs = SubagentConfigs::load_from_str(content) - .expect("Failed to parse subagent config"); - - let agent = configs.get_config("DiscoveryAgent") + let configs = + SubagentConfigs::load_from_str(content).expect("Failed to parse subagent config"); + + let agent = configs + .get_config("DiscoveryAgent") .expect("DiscoveryAgent not found"); - + assert_eq!(agent.model, Some("eco".to_string())); assert_eq!(agent.max_steps, 30); - assert_eq!(agent.allowed_tools, vec!["stakpak__run_command", "stakpak__view"]); + assert_eq!( + agent.allowed_tools, + vec!["stakpak__run_command", "stakpak__view"] + ); assert!(agent.warden.is_some()); } @@ -105,12 +109,13 @@ description = "Agent without model" max_steps = 10 allowed_tools = ["stakpak__view"] "#; - let configs = SubagentConfigs::load_from_str(content) - .expect("Failed to parse subagent config"); - - let agent = configs.get_config("BasicAgent") + let configs = + SubagentConfigs::load_from_str(content).expect("Failed to parse subagent config"); + + let agent = configs + .get_config("BasicAgent") .expect("BasicAgent not found"); - + assert_eq!(agent.model, None); assert_eq!(agent.max_steps, 10); } @@ -121,12 +126,21 @@ allowed_tools = ["stakpak__view"] let content = include_str!("../../../../cli/subagents.toml"); let configs = SubagentConfigs::load_from_str(content) .expect("Failed to parse default subagents.toml"); - - let discovery = configs.get_config("DiscoveryAgent") + + let discovery = configs + .get_config("DiscoveryAgent") .expect("DiscoveryAgent not found in default config"); - + assert_eq!(discovery.model, Some("eco".to_string())); - assert!(discovery.allowed_tools.contains(&"stakpak__run_command".to_string())); - assert!(discovery.allowed_tools.contains(&"stakpak__view".to_string())); + assert!( + discovery + .allowed_tools + .contains(&"stakpak__run_command".to_string()) + ); + assert!( + discovery + .allowed_tools + .contains(&"stakpak__view".to_string()) + ); } } From dcfffe71157e916361d6500fbab3c4a4cc65b5c9 Mon Sep 17 00:00:00 2001 From: George Date: Tue, 3 Feb 2026 09:03:10 -0800 Subject: [PATCH 08/23] fix: update test to match actual subagents.toml config --- libs/shared/src/models/subagent.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/libs/shared/src/models/subagent.rs b/libs/shared/src/models/subagent.rs index d85c0b89..f3b03205 100644 --- a/libs/shared/src/models/subagent.rs +++ b/libs/shared/src/models/subagent.rs @@ -135,12 +135,12 @@ allowed_tools = ["stakpak__view"] assert!( discovery .allowed_tools - .contains(&"stakpak__run_command".to_string()) + .contains(&"stakpak__view".to_string()) ); assert!( discovery .allowed_tools - .contains(&"stakpak__view".to_string()) + .contains(&"stakpak__search_docs".to_string()) ); } } From 1e677d96316ce452b6766a2b0dce045078c8f879 Mon Sep 17 00:00:00 2001 From: George Date: Thu, 5 Feb 2026 10:32:28 -0800 Subject: [PATCH 09/23] Add subagent usage instructions in system prompt --- .../task_board_context/system_prompt.txt | 30 +++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/libs/api/src/local/hooks/task_board_context/system_prompt.txt b/libs/api/src/local/hooks/task_board_context/system_prompt.txt index 30a76ba5..58e779da 100644 --- a/libs/api/src/local/hooks/task_board_context/system_prompt.txt +++ b/libs/api/src/local/hooks/task_board_context/system_prompt.txt @@ -108,6 +108,7 @@ When asked about what you can support or do always search documentation first # Plan When presented with a problem or task, follow this systematic approach: 1. Problem Analysis: + - **Parallelization check**: Before executing, identify if the task contains 2+ independent read-only investigation paths (different directories, codebases, topics, or data sources). If yes → delegate each path to a subagent and synthesize their results. - Gather all relevant information about the current system state - List the key components and systems you need to examine - Note the technologies, platforms, and environments involved @@ -207,6 +208,35 @@ When providing solutions: - Create a new batch starting from the failed step with corrections - Don't repeat successful operations from the original batch +# Subagents for Parallel Work + +Delegate parallelizable tasks to subagents to save context and increase throughput. Subagent outputs are summarized—you won't see their internal reasoning or tool calls. + +## When to Use Subagents +- **Codebase exploration**: Multiple directories/modules to analyze simultaneously +- **Iterative research**: Searching docs, comparing options, gathering info from multiple sources +- **Bulk read operations**: Reading many files, checking multiple configs +- **Independent investigations**: Tasks that don't depend on each other's results + +## When NOT to Use Subagents +- **Sequential dependencies**: When step N needs output from step N-1 +- **Mutating operations**: File writes, deployments, infrastructure changes (keep in main context) +- **Simple single lookups**: One file read or one doc search—overhead not worth it +- **Debugging with unknowns**: When you need to see full context to diagnose issues + +## Subagent Tradeoffs +| Benefit | Cost | +|---------|------| +| Saves main context tokens | Lose internal reasoning visibility | +| Parallel execution | Can't course-correct mid-task | +| Focused task completion | Must define clear, self-contained tasks | + +## Best Practices +- Write clear, self-contained prompts—subagent has no prior context +- Request structured output (lists, summaries) for easy consumption +- Batch related research into one subagent vs. many tiny tasks +- Use for read-heavy work; keep writes in main agent + # Tool Usage - Call tools directly when you have all required information - For tools requiring additional information: From 912d40b5c0d54fa267a96ea9a4656b2e8d4c8da7 Mon Sep 17 00:00:00 2001 From: George Date: Thu, 5 Feb 2026 11:29:49 -0800 Subject: [PATCH 10/23] Improve wait TUI for background tasks --- libs/mcp/server/src/local_tools.rs | 75 ++++++- libs/mcp/server/src/subagent_tools.rs | 2 +- libs/shared/src/models/integrations/openai.rs | 36 +++ libs/shared/src/remote_connection.rs | 6 + libs/shared/src/task_manager.rs | 33 ++- tui/src/services/bash_block.rs | 211 ++++++++++++++++++ tui/src/services/handlers/tool.rs | 75 ++++++- tui/src/services/message.rs | 59 +++++ 8 files changed, 479 insertions(+), 18 deletions(-) diff --git a/libs/mcp/server/src/local_tools.rs b/libs/mcp/server/src/local_tools.rs index 1c29fa5a..cd9c4dbd 100644 --- a/libs/mcp/server/src/local_tools.rs +++ b/libs/mcp/server/src/local_tools.rs @@ -18,7 +18,9 @@ use reqwest::header::{HeaderMap, HeaderValue, USER_AGENT}; use serde_json::json; use similar::TextDiff; use stakpak_shared::models::integrations::mcp::CallToolResultExt; -use stakpak_shared::models::integrations::openai::ToolCallResultProgress; +use stakpak_shared::models::integrations::openai::{ + ProgressType, TaskUpdate, ToolCallResultProgress, +}; use stakpak_shared::task_manager::TaskInfo; use stakpak_shared::tls_client::{TlsClientConfig, create_tls_client}; use stakpak_shared::utils::{ @@ -305,7 +307,7 @@ Use the get_all_tasks tool to monitor task progress, or the cancel_task tool to _ctx: RequestContext, Parameters(RunCommandRequest { command, - description: _, + description, timeout, remote, password, @@ -329,12 +331,17 @@ Use the get_all_tasks tool to monitor task progress, or the cancel_task tool to }; self.get_task_manager() - .start_task(actual_command, timeout_duration, Some(remote_connection)) + .start_task( + actual_command, + description, + timeout_duration, + Some(remote_connection), + ) .await } else { // Local async command (existing logic) self.get_task_manager() - .start_task(actual_command, timeout_duration, None) + .start_task(actual_command, description, timeout_duration, None) .await }; @@ -1196,6 +1203,9 @@ SAFETY NOTES: serde_json::to_string(&ToolCallResultProgress { id: progress_id, message: format!("{}\n", line), + progress_type: Some(ProgressType::CommandOutput), + task_updates: None, + progress: None, }) .unwrap_or_default(), ), @@ -1236,6 +1246,9 @@ SAFETY NOTES: message: Some(serde_json::to_string(&ToolCallResultProgress { id: progress_id, message: stall_msg, + progress_type: Some(ProgressType::CommandOutput), + task_updates: None, + progress: None, }).unwrap_or_default()), }).await; } @@ -2667,6 +2680,9 @@ SAFETY NOTES: let wait_operation = async { loop { let all_tasks = self.get_task_manager().get_all_tasks().await?; + + // Calculate real progress based on completed target tasks + let mut completed_count = 0; let mut target_tasks_completed = true; for task_id in task_ids { @@ -2675,25 +2691,70 @@ SAFETY NOTES: stakpak_shared::task_manager::TaskStatus::Pending | stakpak_shared::task_manager::TaskStatus::Running => { target_tasks_completed = false; - break; } - _ => {} + _ => { + completed_count += 1; + } } } } + // Calculate progress percentage + let progress_pct = if task_ids.is_empty() { + 100.0 + } else { + (completed_count as f64 / task_ids.len() as f64) * 100.0 + }; + + // Build structured task updates + let task_updates: Vec = all_tasks + .iter() + .filter(|t| task_ids.contains(&t.id)) + .map(|t| { + let duration_secs = t.duration.map(|d| d.as_secs_f64()); + let output_preview = t.output.as_ref().and_then(|o| { + let lines: Vec<&str> = o.lines().collect(); + if lines.is_empty() { + None + } else { + // Get last non-empty line, truncated + lines.iter().rev().find(|l| !l.is_empty()).map(|l| { + if l.len() > 50 { + format!("{}...", &l[..50]) + } else { + l.to_string() + } + }) + } + }); + + TaskUpdate { + task_id: t.id.clone(), + status: format!("{:?}", t.status), + description: t.description.clone(), + duration_secs, + output_preview, + is_target: true, + } + }) + .collect(); + + // Also include fallback message for backwards compatibility let progress_table = self.format_tasks_table(&all_tasks, task_ids); let _ = ctx .peer .notify_progress(ProgressNotificationParam { progress_token: ProgressToken(NumberOrString::Number(0)), - progress: if target_tasks_completed { 100.0 } else { 50.0 }, + progress: progress_pct, total: Some(100.0), message: Some( serde_json::to_string(&ToolCallResultProgress { id: progress_id, message: progress_table, + progress_type: Some(ProgressType::TaskWait), + task_updates: Some(task_updates), + progress: Some(progress_pct), }) .unwrap_or_default(), ), diff --git a/libs/mcp/server/src/subagent_tools.rs b/libs/mcp/server/src/subagent_tools.rs index 29a7ff95..e823d283 100644 --- a/libs/mcp/server/src/subagent_tools.rs +++ b/libs/mcp/server/src/subagent_tools.rs @@ -80,7 +80,7 @@ The subagent runs asynchronously in the background. This tool returns immediatel // Start the subagent as a background task using existing task manager let task_info = match self .get_task_manager() - .start_task(subagent_command, None, None) // No timeout, no remote + .start_task(subagent_command, Some(description.clone()), None, None) // description, no timeout, no remote .await { Ok(task_info) => task_info, diff --git a/libs/shared/src/models/integrations/openai.rs b/libs/shared/src/models/integrations/openai.rs index 9edf5d64..fbe49a13 100644 --- a/libs/shared/src/models/integrations/openai.rs +++ b/libs/shared/src/models/integrations/openai.rs @@ -546,6 +546,42 @@ pub struct ToolCallResult { pub struct ToolCallResultProgress { pub id: Uuid, pub message: String, + /// Type of progress update for specialized handling + #[serde(skip_serializing_if = "Option::is_none")] + pub progress_type: Option, + /// Structured task updates for task wait progress + #[serde(skip_serializing_if = "Option::is_none")] + pub task_updates: Option>, + /// Overall progress percentage (0.0 - 100.0) + #[serde(skip_serializing_if = "Option::is_none")] + pub progress: Option, +} + +/// Type of progress update +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)] +pub enum ProgressType { + /// Command output streaming + CommandOutput, + /// Task wait progress with structured updates + TaskWait, + /// Generic progress + Generic, +} + +/// Structured task status update +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)] +pub struct TaskUpdate { + pub task_id: String, + pub status: String, + #[serde(skip_serializing_if = "Option::is_none")] + pub description: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub duration_secs: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub output_preview: Option, + /// Whether this is a target task being waited on + #[serde(default)] + pub is_target: bool, } // ============================================================================= diff --git a/libs/shared/src/remote_connection.rs b/libs/shared/src/remote_connection.rs index 6f435b64..400fe195 100644 --- a/libs/shared/src/remote_connection.rs +++ b/libs/shared/src/remote_connection.rs @@ -514,6 +514,9 @@ impl RemoteConnection { message: Some(serde_json::to_string(&crate::models::integrations::openai::ToolCallResultProgress { id: progress_id, message: text, + progress_type: None, + task_updates: None, + progress: None, }).unwrap_or_default()), }).await; } @@ -537,6 +540,9 @@ impl RemoteConnection { message: Some(serde_json::to_string(&crate::models::integrations::openai::ToolCallResultProgress { id: progress_id, message: text, + progress_type: None, + task_updates: None, + progress: None, }).unwrap_or_default()), }).await; } diff --git a/libs/shared/src/task_manager.rs b/libs/shared/src/task_manager.rs index 0953fa72..6531c2eb 100644 --- a/libs/shared/src/task_manager.rs +++ b/libs/shared/src/task_manager.rs @@ -93,6 +93,7 @@ pub struct Task { pub id: TaskId, pub status: TaskStatus, pub command: String, + pub description: Option, pub remote_connection: Option, pub output: Option, pub error: Option, @@ -113,6 +114,7 @@ pub struct TaskInfo { pub id: TaskId, pub status: TaskStatus, pub command: String, + pub description: Option, pub output: Option, pub start_time: DateTime, pub duration: Option, @@ -137,6 +139,7 @@ impl From<&Task> for TaskInfo { id: task.id.clone(), status: task.status.clone(), command: task.command.clone(), + description: task.description.clone(), output: task.output.clone(), start_time: task.start_time, duration, @@ -172,6 +175,7 @@ pub enum TaskMessage { Start { id: Option, command: String, + description: Option, remote_connection: Option, timeout: Option, response_tx: oneshot::Sender>, @@ -270,13 +274,20 @@ impl TaskManager { TaskMessage::Start { id, command, + description, remote_connection, timeout, response_tx, } => { let task_id = id.unwrap_or_else(|| generate_simple_id(6)); let result = self - .start_task(task_id.clone(), command, timeout, remote_connection) + .start_task( + task_id.clone(), + command, + description, + timeout, + remote_connection, + ) .await; let _ = response_tx.send(result.map(|_| task_id.clone())); false @@ -351,6 +362,7 @@ impl TaskManager { &mut self, id: TaskId, command: String, + description: Option, timeout: Option, remote_connection: Option, ) -> Result<(), TaskError> { @@ -362,6 +374,7 @@ impl TaskManager { id: id.clone(), status: TaskStatus::Running, command: command.clone(), + description, remote_connection: remote_connection.clone(), output: None, error: None, @@ -743,6 +756,7 @@ impl TaskManagerHandle { pub async fn start_task( &self, command: String, + description: Option, timeout: Option, remote_connection: Option, ) -> Result { @@ -752,6 +766,7 @@ impl TaskManagerHandle { .send(TaskMessage::Start { id: None, command: command.clone(), + description, remote_connection: remote_connection.clone(), timeout, response_tx, @@ -874,7 +889,7 @@ mod tests { // Start a background task let task_info = handle - .start_task("sleep 5".to_string(), None, None) + .start_task("sleep 5".to_string(), None, None, None) .await .expect("Failed to start task"); @@ -910,7 +925,7 @@ mod tests { // Start a long-running background task let task_info = handle - .start_task("sleep 10".to_string(), None, None) + .start_task("sleep 10".to_string(), None, None, None) .await .expect("Failed to start task"); @@ -946,7 +961,7 @@ mod tests { // Start a simple task let task_info = handle - .start_task("echo 'Hello, World!'".to_string(), None, None) + .start_task("echo 'Hello, World!'".to_string(), None, None, None) .await .expect("Failed to start task"); @@ -987,7 +1002,7 @@ mod tests { // Start a task that will fail immediately let result = handle - .start_task("nonexistent_command_12345".to_string(), None, None) + .start_task("nonexistent_command_12345".to_string(), None, None, None) .await; // Should get a TaskFailedOnStart error @@ -1011,7 +1026,7 @@ mod tests { // Start a long-running task let _task_info = handle - .start_task("sleep 30".to_string(), None, None) + .start_task("sleep 30".to_string(), None, None, None) .await .expect("Failed to start task"); @@ -1041,7 +1056,7 @@ mod tests { // Start a task that writes a marker file while running let marker = format!("/tmp/stakpak_test_drop_{}", std::process::id()); let task_info = handle - .start_task(format!("touch {} && sleep 30", marker), None, None) + .start_task(format!("touch {} && sleep 30", marker), None, None, None) .await .expect("Failed to start task"); @@ -1071,7 +1086,9 @@ mod tests { }); // Start a task that will exit with non-zero code immediately - let result = handle.start_task("exit 1".to_string(), None, None).await; + let result = handle + .start_task("exit 1".to_string(), None, None, None) + .await; // Should get a TaskFailedOnStart error assert!(matches!(result, Err(TaskError::TaskFailedOnStart(_)))); diff --git a/tui/src/services/bash_block.rs b/tui/src/services/bash_block.rs index 7bb7f51d..89594f8a 100644 --- a/tui/src/services/bash_block.rs +++ b/tui/src/services/bash_block.rs @@ -2239,3 +2239,214 @@ pub fn render_run_command_block( owned_lines } + +/// Render a task wait block showing progress of background tasks +/// Displays a bordered box with task statuses and overall progress +pub fn render_task_wait_block( + task_updates: &[stakpak_shared::models::integrations::openai::TaskUpdate], + progress: f64, + target_task_ids: &[String], + terminal_width: usize, +) -> Vec> { + let content_width = if terminal_width > 4 { + terminal_width - 4 + } else { + 40 + }; + let inner_width = content_width; + let horizontal_line = "─".repeat(inner_width + 2); + + // Border color - gray for all states (could differentiate later) + let border_color = term_color(Color::Gray); + + // Check if all tasks are completed + let all_completed = progress >= 100.0; + + // Dot color and title suffix based on progress + let (dot_color, title_suffix, suffix_color) = if all_completed { + (Color::LightGreen, "".to_string(), None) + } else { + let completed_count = task_updates + .iter() + .filter(|t| { + t.is_target + && (t.status == "Completed" + || t.status == "Failed" + || t.status == "Cancelled" + || t.status == "TimedOut") + }) + .count(); + let total_count = target_task_ids.len(); + ( + Color::Yellow, + format!(" - Waiting ({}/{})", completed_count, total_count), + Some(Color::Yellow), + ) + }; + + // Build title + let base_title = "Wait for Tasks"; + let title_text = format!("{}{}", base_title, title_suffix); + let title_display_len = calculate_display_width(&title_text); + let remaining_dashes = inner_width.saturating_sub(title_display_len + 2); + + // Build title spans + let title_border = if let Some(color) = suffix_color { + Line::from(vec![ + Span::styled("╭─", Style::default().fg(border_color)), + Span::styled( + "●", + Style::default().fg(dot_color).add_modifier(Modifier::BOLD), + ), + Span::styled( + format!(" {} ", base_title), + Style::default() + .fg(term_color(Color::White)) + .add_modifier(Modifier::BOLD), + ), + Span::styled( + format!("{} ", title_suffix.trim_start()), + Style::default().fg(color).add_modifier(Modifier::BOLD), + ), + Span::styled( + format!("{}╮", "─".repeat(remaining_dashes)), + Style::default().fg(border_color), + ), + ]) + } else { + Line::from(vec![ + Span::styled("╭─", Style::default().fg(border_color)), + Span::styled( + "●", + Style::default().fg(dot_color).add_modifier(Modifier::BOLD), + ), + Span::styled( + format!(" {} ", title_text), + Style::default() + .fg(term_color(Color::White)) + .add_modifier(Modifier::BOLD), + ), + Span::styled( + format!("{}╮", "─".repeat(remaining_dashes)), + Style::default().fg(border_color), + ), + ]) + }; + + let bottom_border = Line::from(vec![Span::styled( + format!("╰{}╯", horizontal_line), + Style::default().fg(border_color), + )]); + + let mut formatted_lines = Vec::new(); + formatted_lines.push(title_border); + + // Filter to show only target tasks, sorted by status (running first, then completed) + let mut target_tasks: Vec<_> = task_updates.iter().filter(|t| t.is_target).collect(); + + // Sort: Running tasks first, then by task_id + target_tasks.sort_by(|a, b| { + let a_running = a.status == "Running" || a.status == "Pending"; + let b_running = b.status == "Running" || b.status == "Pending"; + match (a_running, b_running) { + (true, false) => std::cmp::Ordering::Less, + (false, true) => std::cmp::Ordering::Greater, + _ => a.task_id.cmp(&b.task_id), + } + }); + + // Render each task + for task in &target_tasks { + let (status_icon, status_color) = match task.status.as_str() { + "Running" => ("◐", Color::Yellow), + "Pending" => ("○", Color::DarkGray), + "Completed" => ("✓", Color::LightGreen), + "Failed" => ("✗", Color::LightRed), + "Cancelled" => ("⊘", Color::LightRed), + "TimedOut" => ("⏱", Color::LightRed), + _ => ("?", Color::DarkGray), + }; + + // Format duration + let duration_str = task + .duration_secs + .map(|d| { + if d < 60.0 { + format!("{:.1}s", d) + } else { + format!("{:.0}m{:.0}s", d / 60.0, d % 60.0) + } + }) + .unwrap_or_else(|| "...".to_string()); + + // Truncate task_id for display (show first 8 chars) + let task_id_display = if task.task_id.len() > 8 { + format!("{}…", &task.task_id[..8]) + } else { + task.task_id.clone() + }; + + // Get description or fall back to truncated task_id + let display_name = task + .description + .as_ref() + .map(|d| { + // Truncate description if too long + if d.len() > 30 { + format!("{}…", &d[..30]) + } else { + d.clone() + } + }) + .unwrap_or_else(|| task_id_display.clone()); + + // Build the task line: "│ ● description [duration] status │" + let task_content = format!("{} {} [{}]", display_name, task.status, duration_str); + let content_display_width = calculate_display_width(&task_content) + 2; // +2 for icon and space + let padding_needed = inner_width.saturating_sub(content_display_width); + + let line_spans = vec![ + Span::styled("│", Style::default().fg(border_color)), + Span::from(" "), + Span::styled( + status_icon.to_string(), + Style::default() + .fg(status_color) + .add_modifier(Modifier::BOLD), + ), + Span::from(" "), + Span::styled(display_name, Style::default().fg(AdaptiveColors::text())), + Span::styled( + format!(" [{}]", duration_str), + Style::default().fg(Color::DarkGray), + ), + Span::styled( + format!(" {}", task.status), + Style::default().fg(status_color), + ), + Span::from(" ".repeat(padding_needed)), + Span::styled(" │", Style::default().fg(border_color)), + ]; + formatted_lines.push(Line::from(line_spans)); + } + + // If no target tasks, show a message + if target_tasks.is_empty() { + let msg = "No tasks to display"; + let padding = inner_width.saturating_sub(msg.len()); + formatted_lines.push(Line::from(vec![ + Span::styled("│", Style::default().fg(border_color)), + Span::from(" "), + Span::styled(msg.to_string(), Style::default().fg(Color::DarkGray)), + Span::from(" ".repeat(padding)), + Span::styled(" │", Style::default().fg(border_color)), + ])); + } + + formatted_lines.push(bottom_border); + + // Add spacing marker + formatted_lines.push(Line::from(vec![Span::from("SPACING_MARKER")])); + + formatted_lines +} diff --git a/tui/src/services/handlers/tool.rs b/tui/src/services/handlers/tool.rs index 00df807c..aea05d9f 100644 --- a/tui/src/services/handlers/tool.rs +++ b/tui/src/services/handlers/tool.rs @@ -7,7 +7,7 @@ use crate::services::commands::{CommandAction, CommandContext, execute_command, use crate::services::helper_block::push_error_message; use crate::services::message::{Message, invalidate_message_lines_cache}; use stakpak_shared::models::integrations::openai::{ - ToolCall, ToolCallResult, ToolCallResultProgress, ToolCallResultStatus, + ProgressType, ToolCall, ToolCallResult, ToolCallResultProgress, ToolCallResultStatus, }; use tokio::sync::mpsc::Sender; @@ -75,6 +75,11 @@ pub fn handle_stream_tool_result( return None; // Don't add this marker to the streaming buffer } + // Handle TaskWait progress type specially - use replace mode instead of append + if matches!(progress.progress_type, Some(ProgressType::TaskWait)) { + return handle_task_wait_progress(state, progress); + } + // Ensure loading state is true during streaming tool results // Only set it if it's not already true to avoid unnecessary state changes if !state.loading { @@ -82,7 +87,7 @@ pub fn handle_stream_tool_result( } state.is_streaming = true; state.streaming_tool_result_id = Some(tool_call_id); - // 1. Update the buffer for this tool_call_id + // 1. Update the buffer for this tool_call_id (append mode for command output) state .streaming_tool_results .entry(tool_call_id) @@ -148,6 +153,72 @@ pub fn handle_stream_tool_result( None } +/// Handle TaskWait progress type with replace mode and dedicated UI +fn handle_task_wait_progress( + state: &mut AppState, + progress: ToolCallResultProgress, +) -> Option { + let tool_call_id = progress.id; + + // Ensure loading state is true + if !state.loading { + state.loading = true; + } + state.is_streaming = true; + state.streaming_tool_result_id = Some(tool_call_id); + + // Remove the pending message if exists + if let Some(pending_id) = state.pending_bash_message_id { + state.messages.retain(|m| m.id != pending_id); + } + // Remove any old message with this id (replace mode) + state.messages.retain(|m| m.id != tool_call_id); + + // Use structured task updates if available, otherwise fall back to message + if let Some(task_updates) = progress.task_updates { + // Extract target task IDs from task updates + let target_task_ids: Vec = task_updates + .iter() + .filter(|t| t.is_target) + .map(|t| t.task_id.clone()) + .collect(); + + let overall_progress = progress.progress.unwrap_or(0.0); + + // Use dedicated task wait block + state.messages.push(Message::render_task_wait_block( + task_updates, + overall_progress, + target_task_ids, + Some(tool_call_id), + )); + } else { + // Fallback: use generic streaming block with replace mode + // Store message directly (not appending) + state + .streaming_tool_results + .insert(tool_call_id, progress.message.clone()); + + state.messages.push(Message::render_streaming_border_block( + &progress.message, + "Wait for Tasks", + "Progress", + None, + "TaskWait", + Some(tool_call_id), + )); + } + + invalidate_message_lines_cache(state); + + // If content changed while user is scrolled up, mark it + if !state.stay_at_bottom { + state.content_changed_while_scrolled_up = true; + } + + None +} + /// Handle message tool calls event pub fn handle_message_tool_calls(state: &mut AppState, tool_calls: Vec) { // exclude any tool call that is already executed diff --git a/tui/src/services/message.rs b/tui/src/services/message.rs index af7c2e05..3d2e9b17 100644 --- a/tui/src/services/message.rs +++ b/tui/src/services/message.rs @@ -70,6 +70,13 @@ pub enum MessageContent { /// View file block - compact display showing file path, line count, and optional grep/glob /// (file_path: String, total_lines: usize, grep: Option, glob: Option) RenderViewFileBlock(String, usize, Option, Option), + /// Task wait block - shows progress of background tasks being waited on + /// (task_updates: Vec, overall_progress: f64, target_task_ids: Vec) + RenderTaskWaitBlock( + Vec, + f64, + Vec, + ), } /// Compute a hash of the MessageContent for cache invalidation. @@ -205,6 +212,22 @@ pub fn hash_message_content(content: &MessageContent) -> u64 { 18u8.hash(&mut hasher); text.hash(&mut hasher); } + MessageContent::RenderTaskWaitBlock(task_updates, progress, target_ids) => { + 19u8.hash(&mut hasher); + task_updates.len().hash(&mut hasher); + // Hash progress as integer to avoid float hashing issues + (*progress as u64).hash(&mut hasher); + target_ids.hash(&mut hasher); + // Hash task statuses and durations for change detection + for task in task_updates { + task.task_id.hash(&mut hasher); + task.status.hash(&mut hasher); + // Hash duration as integer seconds for change detection + if let Some(d) = task.duration_secs { + (d as u64).hash(&mut hasher); + } + } + } } hasher.finish() @@ -535,6 +558,21 @@ impl Message { is_collapsed: Some(true), } } + + /// Create a task wait block message + /// Shows progress of background tasks being waited on with status indicators + pub fn render_task_wait_block( + task_updates: Vec, + progress: f64, + target_task_ids: Vec, + message_id: Option, + ) -> Self { + Message { + id: message_id.unwrap_or_else(Uuid::new_v4), + content: MessageContent::RenderTaskWaitBlock(task_updates, progress, target_task_ids), + is_collapsed: None, + } + } } pub fn get_wrapped_plain_lines<'a>( @@ -1353,6 +1391,16 @@ fn render_single_message_internal(msg: &Message, width: usize) -> Vec<(Line<'sta let borrowed = get_wrapped_styled_block_lines(&rendered, width); lines.extend(convert_to_owned_lines(borrowed)); } + MessageContent::RenderTaskWaitBlock(task_updates, progress, target_task_ids) => { + let rendered = crate::services::bash_block::render_task_wait_block( + task_updates, + *progress, + target_task_ids, + width, + ); + let borrowed = get_wrapped_styled_block_lines(&rendered, width); + lines.extend(convert_to_owned_lines(borrowed)); + } } lines @@ -1965,6 +2013,17 @@ fn get_wrapped_message_lines_internal( let owned_lines = convert_to_owned_lines(borrowed_lines); all_lines.extend(owned_lines); } + MessageContent::RenderTaskWaitBlock(task_updates, progress, target_task_ids) => { + let rendered_lines = crate::services::bash_block::render_task_wait_block( + task_updates, + *progress, + target_task_ids, + width, + ); + let borrowed_lines = get_wrapped_styled_block_lines(&rendered_lines, width); + let owned_lines = convert_to_owned_lines(borrowed_lines); + all_lines.extend(owned_lines); + } }; agent_mode_removed = false; checkpoint_id_removed = false; From daed210be6e313ed0ea39b1cc9b6713ebce006e8 Mon Sep 17 00:00:00 2001 From: George Date: Thu, 5 Feb 2026 11:46:48 -0800 Subject: [PATCH 11/23] Rename DiscoveryAgent to ReadOnlyAgent --- cli/subagents.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cli/subagents.toml b/cli/subagents.toml index 839132ae..6c43b254 100644 --- a/cli/subagents.toml +++ b/cli/subagents.toml @@ -1,4 +1,4 @@ -[subagents.DiscoveryAgent] +[subagents.ReadOnlyAgent] description = """ Fast Research and Discovery agent with read-only tools. Perfect for tasks requiring filesystem exploration, knowledge base search, and web research. From b238ec90d1177c5216b7f4490fba58bd7a573dd4 Mon Sep 17 00:00:00 2001 From: George Date: Thu, 5 Feb 2026 13:55:10 -0800 Subject: [PATCH 12/23] Add dynamic subagents based on AOrchestra --- cli/src/commands/agent/run/helpers.rs | 22 -- cli/src/commands/agent/run/mcp_init.rs | 11 +- cli/src/commands/agent/run/mode_async.rs | 10 +- .../commands/agent/run/mode_interactive.rs | 12 +- cli/src/commands/mcp/server.rs | 2 +- cli/src/main.rs | 31 +- cli/subagents.toml | 55 --- libs/mcp/server/src/lib.rs | 12 +- libs/mcp/server/src/subagent_tools.rs | 314 +++++++++++++----- libs/mcp/server/src/tool_container.rs | 8 - libs/shared/src/models/mod.rs | 1 - libs/shared/src/models/subagent.rs | 146 -------- 12 files changed, 255 insertions(+), 369 deletions(-) delete mode 100644 cli/subagents.toml delete mode 100644 libs/shared/src/models/subagent.rs diff --git a/cli/src/commands/agent/run/helpers.rs b/cli/src/commands/agent/run/helpers.rs index 9d6dda6e..5db0d1a1 100644 --- a/cli/src/commands/agent/run/helpers.rs +++ b/cli/src/commands/agent/run/helpers.rs @@ -4,7 +4,6 @@ use stakpak_api::models::ListRuleBook; use stakpak_shared::models::integrations::openai::{ ChatMessage, FunctionDefinition, MessageContent, Role, Tool, ToolCallResult, }; -use stakpak_shared::models::subagent::SubagentConfigs; pub fn convert_tools_with_filter( tools: &[rmcp::model::Tool], @@ -132,27 +131,6 @@ pub fn add_rulebooks(user_input: &str, rulebooks: &[ListRuleBook]) -> (String, O (formatted_input, Some(rulebooks_text)) } -pub fn add_subagents( - messages: &[ChatMessage], - user_input: &str, - subagent_configs: &Option, -) -> (String, Option) { - if let Some(subagent_configs) = subagent_configs { - let subagents_text = subagent_configs.format_for_context(); - - if messages.is_empty() { - let formatted_input = format!( - "{}\n\n{}\n", - user_input, subagents_text - ); - (formatted_input, Some(subagents_text)) - } else { - (user_input.to_string(), None) - } - } else { - (user_input.to_string(), None) - } -} pub fn tool_call_history_string(tool_calls: &[ToolCallResult]) -> Option { if tool_calls.is_empty() { diff --git a/cli/src/commands/agent/run/mcp_init.rs b/cli/src/commands/agent/run/mcp_init.rs index a40f56be..153ed362 100644 --- a/cli/src/commands/agent/run/mcp_init.rs +++ b/cli/src/commands/agent/run/mcp_init.rs @@ -17,7 +17,6 @@ use stakpak_mcp_proxy::server::start_proxy_server; use stakpak_mcp_server::{EnabledToolsConfig, MCPServerConfig, ToolMode, start_server}; use stakpak_shared::cert_utils::CertificateChain; use stakpak_shared::models::integrations::openai::ToolCallResultProgress; -use stakpak_shared::models::subagent::SubagentConfigs; use std::collections::HashMap; use std::sync::Arc; use tokio::net::TcpListener; @@ -35,8 +34,8 @@ pub struct McpInitConfig { pub enabled_tools: EnabledToolsConfig, /// Whether to enable mTLS for secure communication pub enable_mtls: bool, - /// Subagent configurations - pub subagent_configs: Option, + /// Whether to enable subagent tools + pub enable_subagents: bool, /// Optional list of allowed tool names (filters tools if specified) pub allowed_tools: Option>, } @@ -48,7 +47,7 @@ impl Default for McpInitConfig { privacy_mode: false, enabled_tools: EnabledToolsConfig { slack: false }, enable_mtls: true, - subagent_configs: None, + enable_subagents: true, allowed_tools: None, } } @@ -133,7 +132,7 @@ async fn start_mcp_server( let redact_secrets = mcp_config.redact_secrets; let privacy_mode = mcp_config.privacy_mode; let enabled_tools = mcp_config.enabled_tools.clone(); - let subagent_configs = mcp_config.subagent_configs.clone(); + let enable_subagents = mcp_config.enable_subagents; tokio::spawn(async move { let server_config = MCPServerConfig { @@ -143,7 +142,7 @@ async fn start_mcp_server( privacy_mode, enabled_tools, tool_mode: ToolMode::Combined, - subagent_configs, + enable_subagents, certificate_chain: cert_chain, }; diff --git a/cli/src/commands/agent/run/mode_async.rs b/cli/src/commands/agent/run/mode_async.rs index 08e0092d..44c5a13a 100644 --- a/cli/src/commands/agent/run/mode_async.rs +++ b/cli/src/commands/agent/run/mode_async.rs @@ -1,6 +1,6 @@ use crate::agent::run::helpers::system_message; use crate::commands::agent::run::helpers::{ - add_agents_md, add_local_context, add_rulebooks, add_subagents, tool_result, user_message, + add_agents_md, add_local_context, add_rulebooks, tool_result, user_message, }; use crate::commands::agent::run::mcp_init::{McpInitConfig, initialize_mcp_server_and_tools}; use crate::commands::agent::run::renderer::{OutputFormat, OutputRenderer}; @@ -15,7 +15,6 @@ use stakpak_mcp_server::EnabledToolsConfig; use stakpak_shared::local_store::LocalStore; use stakpak_shared::models::integrations::openai::ChatMessage; use stakpak_shared::models::llm::LLMTokenUsage; -use stakpak_shared::models::subagent::SubagentConfigs; use std::time::Instant; use uuid::Uuid; @@ -27,7 +26,7 @@ pub struct RunAsyncConfig { pub redact_secrets: bool, pub privacy_mode: bool, pub rulebooks: Option>, - pub subagent_configs: Option, + pub enable_subagents: bool, pub max_steps: Option, pub output_format: OutputFormat, pub allowed_tools: Option>, @@ -59,7 +58,7 @@ pub async fn run_async(ctx: AppConfig, config: RunAsyncConfig) -> Result<(), Str privacy_mode: config.privacy_mode, enabled_tools: config.enabled_tools.clone(), enable_mtls: config.enable_mtls, - subagent_configs: config.subagent_configs.clone(), + enable_subagents: config.enable_subagents, allowed_tools: config.allowed_tools.clone(), }; let mcp_init_result = initialize_mcp_server_and_tools(&ctx, mcp_init_config, None).await?; @@ -144,9 +143,6 @@ pub async fn run_async(ctx: AppConfig, config: RunAsyncConfig) -> Result<(), Str (user_input, None) }; - let (user_input, _subagents_text) = - add_subagents(&chat_messages, &user_input, &config.subagent_configs); - let user_input = if chat_messages.is_empty() && let Some(agents_md) = &config.agents_md { diff --git a/cli/src/commands/agent/run/mode_interactive.rs b/cli/src/commands/agent/run/mode_interactive.rs index 275a03f5..ad0ea206 100644 --- a/cli/src/commands/agent/run/mode_interactive.rs +++ b/cli/src/commands/agent/run/mode_interactive.rs @@ -4,7 +4,7 @@ use crate::commands::agent::run::checkpoint::{ get_checkpoint_messages, resume_session_from_checkpoint, }; use crate::commands::agent::run::helpers::{ - add_agents_md, add_local_context, add_rulebooks, add_subagents, refresh_billing_info, + add_agents_md, add_local_context, add_rulebooks, refresh_billing_info, tool_call_history_string, tool_result, user_message, }; use crate::commands::agent::run::mcp_init; @@ -27,7 +27,6 @@ use stakpak_shared::models::integrations::openai::{ ChatMessage, MessageContent, Role, ToolCall, ToolCallResultStatus, }; use stakpak_shared::models::llm::{LLMTokenUsage, PromptTokensDetails}; -use stakpak_shared::models::subagent::SubagentConfigs; use stakpak_shared::telemetry::{TelemetryEvent, capture_event}; use stakpak_tui::{InputEvent, LoadingOperation, OutputEvent}; use std::sync::Arc; @@ -114,7 +113,7 @@ pub struct RunInteractiveConfig { pub redact_secrets: bool, pub privacy_mode: bool, pub rulebooks: Option>, - pub subagent_configs: Option, + pub enable_subagents: bool, pub enable_mtls: bool, pub is_git_repo: bool, pub study_mode: bool, @@ -153,7 +152,7 @@ pub async fn run_interactive( let mut rulebooks = config.rulebooks.clone(); let mut all_available_rulebooks: Option> = None; let system_prompt = config.system_prompt.clone(); - let subagent_configs = config.subagent_configs.clone(); + let enable_subagents = config.enable_subagents; let agents_md = config.agents_md.clone(); let checkpoint_id = config.checkpoint_id.clone(); let allowed_tools = config.allowed_tools.clone(); @@ -268,7 +267,7 @@ pub async fn run_interactive( privacy_mode, enabled_tools: enabled_tools.clone(), enable_mtls, - subagent_configs: subagent_configs.clone(), + enable_subagents, allowed_tools: allowed_tools_for_tui.clone(), }; // Tools are already filtered by initialize_mcp_server_and_tools (same as async mode) @@ -404,9 +403,6 @@ pub async fn run_interactive( (user_input.to_string(), None::) }; - let (user_input, _) = - add_subagents(&messages, &user_input, &subagent_configs); - let user_input = if messages.is_empty() && let Some(agents_md_info) = &agents_md { diff --git a/cli/src/commands/mcp/server.rs b/cli/src/commands/mcp/server.rs index 0f173f63..9f9b3f81 100644 --- a/cli/src/commands/mcp/server.rs +++ b/cli/src/commands/mcp/server.rs @@ -56,7 +56,7 @@ pub async fn run_server( slack: enable_slack_tools, }, tool_mode, - subagent_configs: None, + enable_subagents: true, bind_address, certificate_chain: Arc::new(certificate_chain), }, diff --git a/cli/src/main.rs b/cli/src/main.rs index c5b9efad..3b94ce37 100644 --- a/cli/src/main.rs +++ b/cli/src/main.rs @@ -3,7 +3,6 @@ use names::{self, Name}; use rustls::crypto::CryptoProvider; use stakpak_api::{AgentClient, AgentClientConfig, AgentProvider}; use stakpak_mcp_server::EnabledToolsConfig; -use stakpak_shared::models::subagent::SubagentConfigs; use std::{ env, path::{Path, PathBuf}, @@ -98,10 +97,6 @@ struct Cli { #[arg(long = "disable-subagents", default_value_t = false)] disable_subagents: bool, - /// Subagent configuration file subagents.toml - #[arg(long = "subagent-config")] - subagent_config_path: Option, - /// Ignore AGENTS.md files (skip discovery and injection) #[arg(long = "ignore-agents-md", default_value_t = false)] ignore_agents_md: bool, @@ -133,8 +128,6 @@ struct Cli { command: Option, } -static DEFAULT_SUBAGENT_CONFIG: &str = include_str!("../subagents.toml"); - #[tokio::main] async fn main() { // Initialize rustls crypto provider @@ -335,25 +328,7 @@ async fn main() { let _ = update_result; let rulebooks = rulebooks_result; - let subagent_configs = if !cli.disable_subagents { - if let Some(subagent_config_path) = &cli.subagent_config_path { - SubagentConfigs::load_from_file(subagent_config_path) - .map_err(|e| { - eprintln!("Warning: Failed to load subagent configs: {}", e); - e - }) - .ok() - } else { - SubagentConfigs::load_from_str(DEFAULT_SUBAGENT_CONFIG) - .map_err(|e| { - eprintln!("Warning: Failed to load subagent configs: {}", e); - e - }) - .ok() - } - } else { - None - }; + let enable_subagents = !cli.disable_subagents; // match get_or_build_local_code_index(&config, None, cli.index_big_project) // .await @@ -449,7 +424,7 @@ async fn main() { redact_secrets: !cli.disable_secret_redaction, privacy_mode: cli.privacy_mode, rulebooks, - subagent_configs, + enable_subagents, max_steps, output_format: cli.output_format, enable_mtls: !cli.disable_mcp_mtls, @@ -475,7 +450,7 @@ async fn main() { redact_secrets: !cli.disable_secret_redaction, privacy_mode: cli.privacy_mode, rulebooks, - subagent_configs, + enable_subagents, enable_mtls: !cli.disable_mcp_mtls, is_git_repo: gitignore::is_git_repo(), study_mode: cli.study_mode, diff --git a/cli/subagents.toml b/cli/subagents.toml deleted file mode 100644 index 6c43b254..00000000 --- a/cli/subagents.toml +++ /dev/null @@ -1,55 +0,0 @@ -[subagents.ReadOnlyAgent] -description = """ -Fast Research and Discovery agent with read-only tools. -Perfect for tasks requiring filesystem exploration, knowledge base search, and web research. -Delagate research and discovery tasks to this agent. -""" -model = "eco" -max_steps = 30 -allowed_tools = [ - "stakpak__view", - "stakpak__search_docs", - "stakpak__search_memory", - "stakpak__read_rulebook", - "stakpak__paks__search_paks", - "stakpak__paks__get_pak_content", -] - -# [subagents.SandboxDiscoveryAgent] -# description = """ -# Research and Discovery agent with safe command execution in an isolated read-only environment. -# Perfect for tasks requiring system inspection, file analysis, or running diagnostic commands while maintaining safety. -# Includes read-only access to cloud credentials (AWS, GCP, Azure, DigitalOcean) and your working directory. -# This agent is slower than the regular DiscoveryAgent, but can run read-only commands. -# """ -# model = "eco" -# max_steps = 30 -# allowed_tools = [ -# "stakpak__run_command", -# "stakpak__view", -# "stakpak__search_docs", -# "stakpak__search_memory", -# "stakpak__read_rulebook", -# "stakpak__paks__search_paks", -# "stakpak__paks__get_pak_content", -# ] - -# [subagents.SandboxDiscoveryAgent.warden] -# enabled = true -# volumes = [ -# # stakpak config -# "~/.stakpak/config.toml:/home/agent/.stakpak/config.toml:ro", -# "~/.stakpak/data/local.db:/home/agent/.stakpak/data/local.db", -# "~/.agent-board/data.db:/home/agent/.agent-board/data.db", - -# # working directory -# "./:/agent:ro", -# "./.stakpak:/agent/.stakpak", - -# # cloud credentials -# "~/.aws:/home/agent/.aws:ro", -# "~/.config/gcloud:/home/agent/.config/gcloud:ro", -# "~/.digitalocean:/home/agent/.digitalocean:ro", -# "~/.azure:/home/agent/.azure:ro", -# "~/.kube:/home/agent/.kube:ro", -# ] diff --git a/libs/mcp/server/src/lib.rs b/libs/mcp/server/src/lib.rs index 7fbc7e3e..70b016fd 100644 --- a/libs/mcp/server/src/lib.rs +++ b/libs/mcp/server/src/lib.rs @@ -14,7 +14,6 @@ use tracing::error; use stakpak_api::AgentProvider; use stakpak_shared::cert_utils::CertificateChain; -use stakpak_shared::models::subagent::SubagentConfigs; use stakpak_shared::task_manager::{TaskManager, TaskManagerHandle}; pub mod integrations; @@ -126,7 +125,7 @@ pub struct MCPServerConfig { pub privacy_mode: bool, pub enabled_tools: EnabledToolsConfig, pub tool_mode: ToolMode, - pub subagent_configs: Option, + pub enable_subagents: bool, pub certificate_chain: Arc>, } @@ -229,7 +228,7 @@ fn build_tool_container( ToolMode::LocalOnly => { let mut tool_router = ToolContainer::tool_router_local(); - if config.subagent_configs.is_some() { + if config.enable_subagents { tool_router += ToolContainer::tool_router_subagent(); } @@ -239,7 +238,6 @@ fn build_tool_container( config.privacy_mode, config.enabled_tools.clone(), task_manager_handle.clone(), - config.subagent_configs.clone(), tool_router, ) } @@ -249,7 +247,7 @@ fn build_tool_container( tool_router += ToolContainer::tool_router_slack(); } - if config.subagent_configs.is_some() { + if config.enable_subagents { tool_router += ToolContainer::tool_router_subagent(); } @@ -259,7 +257,6 @@ fn build_tool_container( config.privacy_mode, config.enabled_tools.clone(), task_manager_handle.clone(), - config.subagent_configs.clone(), tool_router, ) } @@ -271,7 +268,7 @@ fn build_tool_container( tool_router += ToolContainer::tool_router_slack(); } - if config.subagent_configs.is_some() { + if config.enable_subagents { tool_router += ToolContainer::tool_router_subagent(); } @@ -281,7 +278,6 @@ fn build_tool_container( config.privacy_mode, config.enabled_tools.clone(), task_manager_handle.clone(), - config.subagent_configs.clone(), tool_router, ) } diff --git a/libs/mcp/server/src/subagent_tools.rs b/libs/mcp/server/src/subagent_tools.rs index e823d283..1aa2699c 100644 --- a/libs/mcp/server/src/subagent_tools.rs +++ b/libs/mcp/server/src/subagent_tools.rs @@ -5,117 +5,247 @@ use rmcp::{ ErrorData as McpError, RoleServer, handler::server::wrapper::Parameters, model::*, schemars, service::RequestContext, tool, tool_router, }; -use serde::{Deserialize, Serialize}; +use serde::Deserialize; use serde_json::json; use stakpak_shared::local_store::LocalStore; use uuid::Uuid; -#[derive(Debug, Deserialize, Serialize, schemars::JsonSchema, Clone)] -pub struct SubagentType(pub String); -impl std::fmt::Display for SubagentType { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!(f, "{}", self.0) - } +/// Model selection for dynamic subagents +#[derive(Debug, Clone, Deserialize, schemars::JsonSchema)] +#[serde(rename_all = "lowercase")] +pub enum SubagentModel { + /// Fast, cost-effective model for simple tasks + Eco, + /// More capable model for complex reasoning tasks + Smart, } -#[derive(Debug, Serialize, Deserialize)] -pub struct SubagentResult { - pub success: bool, - pub description: String, - pub agent_type: SubagentType, - pub steps_taken: usize, - pub artifacts_generated: Vec, - pub final_response: String, - pub execution_time_seconds: f64, - pub checkpoint_id: Option, +impl std::fmt::Display for SubagentModel { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + SubagentModel::Eco => write!(f, "eco"), + SubagentModel::Smart => write!(f, "smart"), + } + } } +/// Request for creating a dynamic subagent with full control over its configuration. +/// Based on the AOrchestra 4-tuple model: (Instruction, Context, Tools, Model) #[derive(Debug, Deserialize, schemars::JsonSchema)] -pub struct TaskRequest { +pub struct DynamicSubagentRequest { + /// A short (3-5 word) description of what the task accomplishes #[schemars(description = "A short (3-5 word) description of the task")] pub description: String, - #[schemars(description = "The task for the agent to perform")] - pub prompt: String, - #[schemars(description = "The type of specialized agent to use for this task")] - pub subagent_type: String, + + /// The task instruction - what the subagent should do (the "I" in the 4-tuple). + /// Should be specific, actionable, and include success criteria. + #[schemars( + description = "The task instruction specifying what the subagent should accomplish. Be specific and include success criteria." + )] + pub instruction: String, + + /// Curated context from previous work (the "C" in the 4-tuple). + /// Include: relevant findings, key artifacts/references, what didn't work. + /// Exclude: full conversation history, irrelevant tangents, raw tool outputs. + #[schemars( + description = "Curated context from previous attempts/findings. Include: relevant discoveries, key references (file paths, URLs, IDs), failed approaches to avoid. Keep concise - don't pass full history." + )] + pub context: Option, + + /// Tools to grant the subagent (the "T" in the 4-tuple). + /// Follow least-privilege: only include tools necessary for the task. + /// Use tool names like: stakpak__view, stakpak__run_command, stakpak__search_docs, etc. + #[schemars( + description = "Array of tool names to grant the subagent. Follow least-privilege principle - only include tools necessary for the task. Examples: stakpak__view, stakpak__run_command, stakpak__search_docs, stakpak__str_replace" + )] + pub tools: Vec, + + /// Model to use (the "M" in the 4-tuple). + /// - eco: Fast, cost-effective for simple tasks (file reading, searches) + /// - smart: More capable for complex reasoning, multi-step analysis + #[schemars( + description = "Model selection: 'eco' for fast/discovery/exploratory/research tasks, 'smart' for complex reasoning" + )] + pub model: SubagentModel, + + /// Maximum steps the subagent can take (default: 30) + #[schemars(description = "Maximum steps the subagent can take (default: 30)")] + pub max_steps: Option, + + /// Enable sandbox mode using warden container isolation. + /// When enabled, the subagent runs in an isolated Docker container with: + /// - Read-only access to the current working directory + /// - Read-only access to cloud credentials (AWS, GCP, Azure, etc.) + /// - Network isolation and security policies + /// Use this when the subagent needs to run potentially unsafe commands. + #[schemars( + description = "Enable sandbox mode for isolated execution. Runs subagent in a warden container with read-only filesystem access and security policies. Recommended when using run_command tool." + )] + #[serde(default)] + pub enable_sandbox: bool, } #[tool_router(router = tool_router_subagent, vis = "pub")] impl ToolContainer { + /// Create and execute a dynamic subagent with full control over its configuration. + /// Based on the AOrchestra 4-tuple model: (Instruction, Context, Tools, Model) #[tool( - description = "Execute a task using a specialized subagent. This tool allows you to delegate specific tasks to specialized agents based on the task type and requirements. + description = "Create a dynamic subagent with full control over its configuration. This implements the AOrchestra 4-tuple model (Instruction, Context, Tools, Model) for on-demand agent specialization. PARAMETERS: -- description: A short (3-5 word) description of what the task accomplishes -- prompt: Detailed instructions for the agent to perform the task -- subagent_type: The type of specialized agent to use from the available options +- description: A short (3-5 word) description of the task +- instruction: What the subagent should do - be specific and include success criteria +- context: (Optional) Curated context from previous work - include relevant findings, key references, failed approaches +- tools: Array of tool names to grant (follow least-privilege - minimum required) +- model: 'eco' for simple tasks, 'smart' for complex reasoning +- max_steps: (Optional) Maximum steps, default 30 +- enable_sandbox: (Optional) Run in isolated warden container with security policies + +WHEN TO USE: +- When you need fine-grained control over subagent capabilities +- When passing context from previous attempts would help +- When the pre-defined subagent types don't fit your needs + +CONTEXT GUIDELINES (the key differentiator): +Include: +- Relevant findings from previous attempts ('Found that config is in /etc/app/config.yaml') +- Key references discovered (file paths, URLs, IDs, names) +- Failed approaches to avoid ('API v1 endpoint returned 404, use v2') +- Constraints or clarifications -USAGE: -Use this tool when you need to delegate a specific task to a specialized agent that can handle the requirements better than general-purpose processing. The subagent will execute the task according to the provided prompt and return the results. +Exclude: +- Full conversation history (causes context degradation) +- Raw tool outputs (summarize instead) +- Irrelevant tangents from other subtasks -The subagent runs asynchronously in the background. This tool returns immediately with a task ID that you can use to monitor progress and get results using the get_task_details and get_all_tasks tools." +TOOL SELECTION (least-privilege): +- Read-only tasks: stakpak__view, stakpak__search_docs +- Research tasks: stakpak__view, stakpak__search_docs, stakpak__view_web_page +- Code changes: stakpak__view, stakpak__str_replace, stakpak__create +- System tasks: stakpak__view, stakpak__run_command (use enable_sandbox=true for safety) + +SANDBOX MODE (enable_sandbox=true): +- Runs subagent in isolated Docker container via warden +- Read-only access to working directory and cloud credentials +- Recommended when using run_command tool for untrusted operations +- Adds ~5-10s startup overhead for container initialization + +The subagent runs asynchronously. Use get_task_details to monitor progress." )] - pub async fn subagent_task( + pub async fn dynamic_subagent_task( &self, ctx: RequestContext, - Parameters(TaskRequest { + Parameters(DynamicSubagentRequest { description, - prompt, - subagent_type, - }): Parameters, + instruction, + context, + tools, + model, + max_steps, + enable_sandbox, + }): Parameters, ) -> Result { + // Validate tools array is not empty + if tools.is_empty() { + return Ok(CallToolResult::error(vec![Content::text( + "VALIDATION_ERROR: tools array cannot be empty. Provide at least one tool for the subagent.", + )])); + } + let session_id = self.get_session_id(&ctx); - let subagent_command = - match self.build_subagent_command(&prompt, &subagent_type, session_id.as_deref()) { - Ok(command) => command, - Err(e) => { - return Ok(CallToolResult::error(vec![Content::text(format!( - "COMMAND_BUILD_FAILED: Failed to build subagent command: {}", - e - ))])); - } - }; + let max_steps = max_steps.unwrap_or(30); - // Start the subagent as a background task using existing task manager + // Build the dynamic subagent command + let subagent_command = match self.build_dynamic_subagent_command( + &instruction, + context.as_deref(), + &tools, + &model, + max_steps, + enable_sandbox, + session_id.as_deref(), + ) { + Ok(command) => command, + Err(e) => { + return Ok(CallToolResult::error(vec![Content::text(format!( + "COMMAND_BUILD_FAILED: Failed to build dynamic subagent command: {}", + e + ))])); + } + }; + + // Start the subagent as a background task let task_info = match self .get_task_manager() - .start_task(subagent_command, Some(description.clone()), None, None) // description, no timeout, no remote + .start_task(subagent_command, Some(description.clone()), None, None) .await { Ok(task_info) => task_info, Err(e) => { return Ok(CallToolResult::error(vec![Content::text(format!( - "TASK_START_FAILED: Failed to start subagent task: {}", + "TASK_START_FAILED: Failed to start dynamic subagent task: {}", e ))])); } }; + // Format tools list for display + let tools_display = tools.join(", "); + let context_display = context + .as_ref() + .map(|c| format!("\nContext: {} chars provided", c.len())) + .unwrap_or_default(); + let sandbox_display = if enable_sandbox { + "\nSandbox: enabled (warden isolation)" + } else { + "" + }; + Ok(CallToolResult::success(vec![Content::text(format!( - "🤖 Subagent Task Started\n\nTask ID: {}\nDescription: {}\nAgent Type: {}\nStatus: {:?}\n\nThe subagent is now running in the background. Use get_task_details to monitor progress and get results.", - task_info.id, description, subagent_type, task_info.status + "🤖 Dynamic Subagent Created\n\n\ + Task ID: {}\n\ + Description: {}\n\ + Model: {}\n\ + Tools: [{}]\n\ + Max Steps: {}{}{}\n\ + Status: {:?}\n\n\ + The subagent is now running in the background with the specified configuration.\n\ + Use get_task_details to monitor progress and get results.", + task_info.id, + description, + model, + tools_display, + max_steps, + context_display, + sandbox_display, + task_info.status ))])) } - fn build_subagent_command( + /// Build command for dynamic subagent with full 4-tuple configuration + fn build_dynamic_subagent_command( &self, - prompt: &str, - subagent_type: &str, + instruction: &str, + context: Option<&str>, + tools: &[String], + model: &SubagentModel, + max_steps: usize, + enable_sandbox: bool, session_id: Option<&str>, ) -> Result { - let subagent_config = if let Some(subagent_configs) = self.get_subagent_configs() { - subagent_configs.get_config(subagent_type) - } else { - None - } - .ok_or_else(|| { - McpError::internal_error( - "Unknown subagent type", - Some(json!({"subagent_type": subagent_type})), - ) - })?; + // Combine instruction and context into the prompt + let full_prompt = match context { + Some(ctx) if !ctx.is_empty() => { + format!( + "=== CONTEXT (from previous work) ===\n{}\n\n=== YOUR TASK ===\n{}", + ctx, instruction + ) + } + _ => instruction.to_string(), + }; + // Write prompt to file let prompt_filename = format!("prompt_{}.txt", Uuid::new_v4()); let prompt_subpath = match session_id { Some(sid) => Path::new(sid) @@ -128,31 +258,28 @@ The subagent runs asynchronously in the background. This tool returns immediatel .to_string_lossy() .to_string(), }; - let prompt_file_path = - LocalStore::write_session_data(&prompt_subpath, prompt).map_err(|e| { + + let prompt_file_path = LocalStore::write_session_data(&prompt_subpath, &full_prompt) + .map_err(|e| { McpError::internal_error( "Failed to create prompt file", Some(json!({"error": e.to_string()})), ) })?; + // Build the base stakpak command let mut command = format!( - r#"stakpak -a --prompt-file {} --max-steps {}"#, - prompt_file_path, subagent_config.max_steps + r#"stakpak -a --prompt-file {} --max-steps {} --model {}"#, + prompt_file_path, max_steps, model ); - // Add model flag if specified - if let Some(model) = &subagent_config.model { - command.push_str(&format!(" --model {}", model)); - } - - for tool in &subagent_config.allowed_tools { + // Add each tool + for tool in tools { command.push_str(&format!(" -t {}", tool)); } - if let Some(warden) = &subagent_config.warden - && warden.enabled - { + // If sandbox mode is enabled, wrap the command in warden + if enable_sandbox { let stakpak_image = format!( "ghcr.io/stakpak/agent-warden:v{}", env!("CARGO_PKG_VERSION") @@ -160,14 +287,43 @@ The subagent runs asynchronously in the background. This tool returns immediatel let mut warden_command = format!("stakpak warden run --image {}", stakpak_image); + // Mount the prompt file into the container let warden_prompt_path = format!("/tmp/{}", prompt_filename); - warden_command.push_str(&format!(" -v {}:{}", prompt_file_path, warden_prompt_path)); - for volume in &warden.volumes { - warden_command.push_str(&format!(" -v {}", volume)); + // Add default sandbox volumes for read-only access + // Working directory (read-only) + warden_command.push_str(" -v ./:/agent:ro"); + // Session data directory (read-write for subagent state) + warden_command.push_str(" -v ./.stakpak:/agent/.stakpak"); + + // Cloud credentials (read-only) - only mount if they exist + let cloud_volumes = [ + ("~/.aws", "/home/agent/.aws:ro"), + ("~/.config/gcloud", "/home/agent/.config/gcloud:ro"), + ("~/.azure", "/home/agent/.azure:ro"), + ("~/.kube", "/home/agent/.kube:ro"), + ]; + + for (host_path, container_path) in cloud_volumes { + // Expand ~ to home directory + let expanded_path = if host_path.starts_with("~/") { + if let Ok(home) = std::env::var("HOME") { + host_path.replacen("~", &home, 1) + } else { + continue; + } + } else { + host_path.to_string() + }; + + // Only add volume if the path exists + if Path::new(&expanded_path).exists() { + warden_command.push_str(&format!(" -v {}:{}", expanded_path, container_path)); + } } + // Wrap the stakpak command, replacing the prompt path with the container path command = format!( "{} '{}'", warden_command, diff --git a/libs/mcp/server/src/tool_container.rs b/libs/mcp/server/src/tool_container.rs index 3be45fbb..491148db 100644 --- a/libs/mcp/server/src/tool_container.rs +++ b/libs/mcp/server/src/tool_container.rs @@ -5,7 +5,6 @@ use rmcp::{ service::RequestContext, tool_router, }; use stakpak_api::AgentProvider; -use stakpak_shared::models::subagent::SubagentConfigs; use stakpak_shared::remote_connection::RemoteConnectionManager; use stakpak_shared::secret_manager::SecretManager; use stakpak_shared::task_manager::TaskManagerHandle; @@ -17,7 +16,6 @@ pub struct ToolContainer { pub secret_manager: SecretManager, pub task_manager: Arc, pub remote_connection_manager: Arc, - pub subagent_configs: Option, pub enabled_tools: EnabledToolsConfig, pub tool_router: ToolRouter, } @@ -30,7 +28,6 @@ impl ToolContainer { privacy_mode: bool, enabled_tools: EnabledToolsConfig, task_manager: Arc, - subagent_configs: Option, tool_router: ToolRouter, ) -> Result { Ok(Self { @@ -38,7 +35,6 @@ impl ToolContainer { secret_manager: SecretManager::new(redact_secrets, privacy_mode), task_manager, remote_connection_manager: Arc::new(RemoteConnectionManager::new()), - subagent_configs, enabled_tools, tool_router, }) @@ -60,10 +56,6 @@ impl ToolContainer { &self.remote_connection_manager } - pub fn get_subagent_configs(&self) -> &Option { - &self.subagent_configs - } - pub fn get_session_id(&self, ctx: &RequestContext) -> Option { ctx.meta .get("session_id") diff --git a/libs/shared/src/models/mod.rs b/libs/shared/src/models/mod.rs index 3f1da599..d125dc09 100644 --- a/libs/shared/src/models/mod.rs +++ b/libs/shared/src/models/mod.rs @@ -6,4 +6,3 @@ pub mod integrations; pub mod llm; pub mod model_pricing; pub mod stakai_adapter; -pub mod subagent; diff --git a/libs/shared/src/models/subagent.rs b/libs/shared/src/models/subagent.rs deleted file mode 100644 index f3b03205..00000000 --- a/libs/shared/src/models/subagent.rs +++ /dev/null @@ -1,146 +0,0 @@ -use serde::{Deserialize, Serialize}; -use std::collections::HashMap; -use std::path::Path; - -#[derive(Debug, Clone, Deserialize, Serialize)] -pub struct WardenConfig { - pub enabled: bool, - #[serde(default)] - pub volumes: Vec, -} - -#[derive(Debug, Clone, Deserialize, Serialize)] -pub struct SubagentConfig { - pub description: String, - pub max_steps: usize, - pub allowed_tools: Vec, - /// Model to use for the subagent (e.g., "eco", "smart") - #[serde(default)] - pub model: Option, - #[serde(default)] - pub warden: Option, -} - -#[derive(Debug, Clone, Deserialize, Serialize)] -pub struct SubagentConfigs { - pub subagents: HashMap, -} - -impl SubagentConfigs { - pub fn load_from_file>(path: P) -> Result> { - let content = std::fs::read_to_string(path)?; - Self::load_from_str(&content) - } - - pub fn load_from_str(content: &str) -> Result> { - let config: SubagentConfigs = toml::from_str(content)?; - Ok(config) - } - - pub fn get_available_types(&self) -> Vec { - self.subagents.keys().cloned().collect() - } - - pub fn get_config(&self, subagent_type: &str) -> Option<&SubagentConfig> { - self.subagents.get(subagent_type) - } - - pub fn format_for_context(&self) -> String { - if self.subagents.is_empty() { - "# No Subagents Available".to_string() - } else { - let subagents_text = self - .subagents - .iter() - .map(|(name, config)| { - format!( - " - Name: {}\n Description: {}\n Tools: {}", - name, - config.description, - config.allowed_tools.join(", ") - ) - }) - .collect::>() - .join("\n"); - - format!("# Available Subagents:\n\n{}", subagents_text) - } - } -} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn test_parse_subagent_config_with_model() { - let content = r#" -[subagents.DiscoveryAgent] -description = "Test agent" -model = "eco" -max_steps = 30 -allowed_tools = ["stakpak__run_command", "stakpak__view"] - -[subagents.DiscoveryAgent.warden] -enabled = true -volumes = ["./:/agent:ro"] -"#; - let configs = - SubagentConfigs::load_from_str(content).expect("Failed to parse subagent config"); - - let agent = configs - .get_config("DiscoveryAgent") - .expect("DiscoveryAgent not found"); - - assert_eq!(agent.model, Some("eco".to_string())); - assert_eq!(agent.max_steps, 30); - assert_eq!( - agent.allowed_tools, - vec!["stakpak__run_command", "stakpak__view"] - ); - assert!(agent.warden.is_some()); - } - - #[test] - fn test_parse_subagent_config_without_model() { - let content = r#" -[subagents.BasicAgent] -description = "Agent without model" -max_steps = 10 -allowed_tools = ["stakpak__view"] -"#; - let configs = - SubagentConfigs::load_from_str(content).expect("Failed to parse subagent config"); - - let agent = configs - .get_config("BasicAgent") - .expect("BasicAgent not found"); - - assert_eq!(agent.model, None); - assert_eq!(agent.max_steps, 10); - } - - #[test] - fn test_parse_default_subagents_toml() { - // Test parsing the actual default config - let content = include_str!("../../../../cli/subagents.toml"); - let configs = SubagentConfigs::load_from_str(content) - .expect("Failed to parse default subagents.toml"); - - let discovery = configs - .get_config("DiscoveryAgent") - .expect("DiscoveryAgent not found in default config"); - - assert_eq!(discovery.model, Some("eco".to_string())); - assert!( - discovery - .allowed_tools - .contains(&"stakpak__view".to_string()) - ); - assert!( - discovery - .allowed_tools - .contains(&"stakpak__search_docs".to_string()) - ); - } -} From f3a9350a4b5395f98de84e96c7ac5cf7e31b83ea Mon Sep 17 00:00:00 2001 From: George Date: Thu, 5 Feb 2026 18:17:27 -0800 Subject: [PATCH 13/23] Tweak prompts and add model selection heuristics --- cli/src/commands/acp/server.rs | 1 + cli/src/commands/agent/run/mode_async.rs | 11 +++- .../commands/agent/run/mode_interactive.rs | 1 + cli/src/commands/agent/run/tooling.rs | 16 ++++- .../task_board_context/system_prompt.txt | 1 + libs/mcp/client/src/lib.rs | 9 +-- libs/mcp/server/src/subagent_tools.rs | 58 +++++++------------ 7 files changed, 51 insertions(+), 46 deletions(-) diff --git a/cli/src/commands/acp/server.rs b/cli/src/commands/acp/server.rs index f6adf81e..1cfbafff 100644 --- a/cli/src/commands/acp/server.rs +++ b/cli/src/commands/acp/server.rs @@ -975,6 +975,7 @@ impl StakpakAcpAgent { &tool_call, tool_cancel_rx, self.current_session_id.get(), + Some(self.model.read().await.id.clone()), ) .await .map_err(|e| { diff --git a/cli/src/commands/agent/run/mode_async.rs b/cli/src/commands/agent/run/mode_async.rs index 44c5a13a..7e9711f6 100644 --- a/cli/src/commands/agent/run/mode_async.rs +++ b/cli/src/commands/agent/run/mode_async.rs @@ -275,8 +275,15 @@ pub async fn run_async(ctx: AppConfig, config: RunAsyncConfig) -> Result<(), Str // Add timeout for tool execution let tool_execution = async { - run_tool_call(&mcp_client, &mcp_tools, tool_call, None, current_session_id) - .await + run_tool_call( + &mcp_client, + &mcp_tools, + tool_call, + None, + current_session_id, + Some(config.model.id.clone()), + ) + .await }; let result = match tokio::time::timeout( diff --git a/cli/src/commands/agent/run/mode_interactive.rs b/cli/src/commands/agent/run/mode_interactive.rs index ad0ea206..04a927d9 100644 --- a/cli/src/commands/agent/run/mode_interactive.rs +++ b/cli/src/commands/agent/run/mode_interactive.rs @@ -486,6 +486,7 @@ pub async fn run_interactive( &tool_call, Some(cancel_rx.resubscribe()), current_session_id, + Some(model.id.clone()), ) .await? } else { diff --git a/cli/src/commands/agent/run/tooling.rs b/cli/src/commands/agent/run/tooling.rs index af1f7ae3..1cb010f4 100644 --- a/cli/src/commands/agent/run/tooling.rs +++ b/cli/src/commands/agent/run/tooling.rs @@ -39,6 +39,7 @@ pub async fn run_tool_call( tool_call: &ToolCall, cancel_rx: Option>, session_id: Option, + model_id: Option, ) -> Result, String> { let tool_name = &tool_call.function.name; let tool_exists = tools.iter().any(|tool| tool.name == *tool_name); @@ -58,13 +59,26 @@ pub async fn run_tool_call( }; // Call tool and handle errors gracefully + let metadata = Some({ + let mut meta = serde_json::Map::new(); + if let Some(session_id) = session_id { + meta.insert( + "session_id".to_string(), + serde_json::Value::String(session_id.to_string()), + ); + } + if let Some(model_id) = model_id { + meta.insert("model_id".to_string(), serde_json::Value::String(model_id)); + } + meta + }); let handle = match stakpak_mcp_client::call_tool( mcp_client, CallToolRequestParam { name: tool_name.clone().into(), arguments, }, - session_id, + metadata, ) .await { diff --git a/libs/api/src/local/hooks/task_board_context/system_prompt.txt b/libs/api/src/local/hooks/task_board_context/system_prompt.txt index 58e779da..8f92affb 100644 --- a/libs/api/src/local/hooks/task_board_context/system_prompt.txt +++ b/libs/api/src/local/hooks/task_board_context/system_prompt.txt @@ -236,6 +236,7 @@ Delegate parallelizable tasks to subagents to save context and increase throughp - Request structured output (lists, summaries) for easy consumption - Batch related research into one subagent vs. many tiny tasks - Use for read-heavy work; keep writes in main agent +- Use principle of least-privilege when choosing subagent tools (only give them what they need, ESPECIALLY for potentially dangerous tools like run_command) # Tool Usage - Call tools directly when you have all required information diff --git a/libs/mcp/client/src/lib.rs b/libs/mcp/client/src/lib.rs index fda146de..90bdbb38 100644 --- a/libs/mcp/client/src/lib.rs +++ b/libs/mcp/client/src/lib.rs @@ -10,7 +10,6 @@ use stakpak_shared::cert_utils::CertificateChain; use stakpak_shared::models::integrations::openai::ToolCallResultProgress; use std::sync::Arc; use tokio::sync::mpsc::Sender; -use uuid::Uuid; mod local; @@ -74,14 +73,10 @@ pub async fn get_tools(client: &McpClient) -> Result> { pub async fn call_tool( client: &McpClient, params: CallToolRequestParam, - session_id: Option, + metadata: Option>, ) -> Result, String> { - let mut meta_map = serde_json::Map::new(); - if let Some(session_id) = session_id { - meta_map.insert("session_id".to_string(), serde_json::json!(session_id)); - } let options = PeerRequestOptions { - meta: Some(Meta(meta_map)), + meta: Some(Meta(metadata.unwrap_or_default())), ..Default::default() }; client diff --git a/libs/mcp/server/src/subagent_tools.rs b/libs/mcp/server/src/subagent_tools.rs index 1aa2699c..c836c09a 100644 --- a/libs/mcp/server/src/subagent_tools.rs +++ b/libs/mcp/server/src/subagent_tools.rs @@ -10,26 +10,6 @@ use serde_json::json; use stakpak_shared::local_store::LocalStore; use uuid::Uuid; - -/// Model selection for dynamic subagents -#[derive(Debug, Clone, Deserialize, schemars::JsonSchema)] -#[serde(rename_all = "lowercase")] -pub enum SubagentModel { - /// Fast, cost-effective model for simple tasks - Eco, - /// More capable model for complex reasoning tasks - Smart, -} - -impl std::fmt::Display for SubagentModel { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - match self { - SubagentModel::Eco => write!(f, "eco"), - SubagentModel::Smart => write!(f, "smart"), - } - } -} - /// Request for creating a dynamic subagent with full control over its configuration. /// Based on the AOrchestra 4-tuple model: (Instruction, Context, Tools, Model) #[derive(Debug, Deserialize, schemars::JsonSchema)] @@ -61,14 +41,11 @@ pub struct DynamicSubagentRequest { )] pub tools: Vec, - /// Model to use (the "M" in the 4-tuple). - /// - eco: Fast, cost-effective for simple tasks (file reading, searches) - /// - smart: More capable for complex reasoning, multi-step analysis - #[schemars( - description = "Model selection: 'eco' for fast/discovery/exploratory/research tasks, 'smart' for complex reasoning" - )] - pub model: SubagentModel, - + // /// Model to use (the "M" in the 4-tuple). + // #[schemars( + // description = "Model selection: small cheap models for fast/exploratory/research tasks or large more expensive models for complex reasoning" + // )] + // pub model_id: Option, /// Maximum steps the subagent can take (default: 30) #[schemars(description = "Maximum steps the subagent can take (default: 30)")] pub max_steps: Option, @@ -97,8 +74,7 @@ PARAMETERS: - description: A short (3-5 word) description of the task - instruction: What the subagent should do - be specific and include success criteria - context: (Optional) Curated context from previous work - include relevant findings, key references, failed approaches -- tools: Array of tool names to grant (follow least-privilege - minimum required) -- model: 'eco' for simple tasks, 'smart' for complex reasoning +- tools: Array of tool names to grant (follow least-privilege - minimum tools required) - max_steps: (Optional) Maximum steps, default 30 - enable_sandbox: (Optional) Run in isolated warden container with security policies @@ -120,10 +96,7 @@ Exclude: - Irrelevant tangents from other subtasks TOOL SELECTION (least-privilege): -- Read-only tasks: stakpak__view, stakpak__search_docs -- Research tasks: stakpak__view, stakpak__search_docs, stakpak__view_web_page -- Code changes: stakpak__view, stakpak__str_replace, stakpak__create -- System tasks: stakpak__view, stakpak__run_command (use enable_sandbox=true for safety) +- Always prefer read only tools / tasks for subagents SANDBOX MODE (enable_sandbox=true): - Runs subagent in isolated Docker container via warden @@ -141,7 +114,6 @@ The subagent runs asynchronously. Use get_task_details to monitor progress." instruction, context, tools, - model, max_steps, enable_sandbox, }): Parameters, @@ -156,6 +128,20 @@ The subagent runs asynchronously. Use get_task_details to monitor progress." let session_id = self.get_session_id(&ctx); let max_steps = max_steps.unwrap_or(30); + let model = if let Some(serde_json::Value::String(model_id)) = ctx.meta.get("model_id") { + if model_id.contains("claude-opus-4-6") { + model_id.replace("opus-4-6", "haiku-4-5") + } else if model_id.contains("claude-opus") { + model_id.replace("opus", "haiku") + } else if model_id.contains("claude-sonnet") { + model_id.replace("sonnet", "haiku") + } else { + model_id.clone() + } + } else { + "claude-haiku-4-5".to_string() + }; + // Build the dynamic subagent command let subagent_command = match self.build_dynamic_subagent_command( &instruction, @@ -229,7 +215,7 @@ The subagent runs asynchronously. Use get_task_details to monitor progress." instruction: &str, context: Option<&str>, tools: &[String], - model: &SubagentModel, + model: &str, max_steps: usize, enable_sandbox: bool, session_id: Option<&str>, From af77a2742f43d36b25154857f33dd4e98c328725 Mon Sep 17 00:00:00 2001 From: George Date: Thu, 5 Feb 2026 18:27:54 -0800 Subject: [PATCH 14/23] Fix linter issues --- cli/src/commands/agent/run/helpers.rs | 1 - libs/mcp/server/src/subagent_tools.rs | 2 ++ 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/cli/src/commands/agent/run/helpers.rs b/cli/src/commands/agent/run/helpers.rs index 5db0d1a1..3e264142 100644 --- a/cli/src/commands/agent/run/helpers.rs +++ b/cli/src/commands/agent/run/helpers.rs @@ -131,7 +131,6 @@ pub fn add_rulebooks(user_input: &str, rulebooks: &[ListRuleBook]) -> (String, O (formatted_input, Some(rulebooks_text)) } - pub fn tool_call_history_string(tool_calls: &[ToolCallResult]) -> Option { if tool_calls.is_empty() { return None; diff --git a/libs/mcp/server/src/subagent_tools.rs b/libs/mcp/server/src/subagent_tools.rs index c836c09a..7ab922cc 100644 --- a/libs/mcp/server/src/subagent_tools.rs +++ b/libs/mcp/server/src/subagent_tools.rs @@ -55,6 +55,7 @@ pub struct DynamicSubagentRequest { /// - Read-only access to the current working directory /// - Read-only access to cloud credentials (AWS, GCP, Azure, etc.) /// - Network isolation and security policies + /// /// Use this when the subagent needs to run potentially unsafe commands. #[schemars( description = "Enable sandbox mode for isolated execution. Runs subagent in a warden container with read-only filesystem access and security policies. Recommended when using run_command tool." @@ -210,6 +211,7 @@ The subagent runs asynchronously. Use get_task_details to monitor progress." } /// Build command for dynamic subagent with full 4-tuple configuration + #[allow(clippy::too_many_arguments)] fn build_dynamic_subagent_command( &self, instruction: &str, From edf4a1942f36f8adbcf0bc4269dc5bbb5baee3b3 Mon Sep 17 00:00:00 2001 From: George Date: Thu, 5 Feb 2026 18:48:45 -0800 Subject: [PATCH 15/23] Tweak system prompt and tool --- .../task_board_context/system_prompt.txt | 88 ++++++++++++++----- libs/mcp/server/src/subagent_tools.rs | 16 ++-- 2 files changed, 74 insertions(+), 30 deletions(-) diff --git a/libs/api/src/local/hooks/task_board_context/system_prompt.txt b/libs/api/src/local/hooks/task_board_context/system_prompt.txt index 8f92affb..39a5696e 100644 --- a/libs/api/src/local/hooks/task_board_context/system_prompt.txt +++ b/libs/api/src/local/hooks/task_board_context/system_prompt.txt @@ -210,33 +210,77 @@ When providing solutions: # Subagents for Parallel Work -Delegate parallelizable tasks to subagents to save context and increase throughput. Subagent outputs are summarized—you won't see their internal reasoning or tool calls. +Delegate parallelizable tasks to subagents to save context and increase throughput. Subagent results are not visible to the user—summarize key findings in your response. ## When to Use Subagents -- **Codebase exploration**: Multiple directories/modules to analyze simultaneously -- **Iterative research**: Searching docs, comparing options, gathering info from multiple sources -- **Bulk read operations**: Reading many files, checking multiple configs -- **Independent investigations**: Tasks that don't depend on each other's results +- **Parallel exploration**: Multiple directories, modules, or sources to analyze simultaneously +- **Iterative research**: Multi-round searches, doc lookups, comparing options +- **Open-ended searches**: When you're not confident you'll find the match quickly ## When NOT to Use Subagents - **Sequential dependencies**: When step N needs output from step N-1 -- **Mutating operations**: File writes, deployments, infrastructure changes (keep in main context) -- **Simple single lookups**: One file read or one doc search—overhead not worth it -- **Debugging with unknowns**: When you need to see full context to diagnose issues - -## Subagent Tradeoffs -| Benefit | Cost | -|---------|------| -| Saves main context tokens | Lose internal reasoning visibility | -| Parallel execution | Can't course-correct mid-task | -| Focused task completion | Must define clear, self-contained tasks | - -## Best Practices -- Write clear, self-contained prompts—subagent has no prior context -- Request structured output (lists, summaries) for easy consumption -- Batch related research into one subagent vs. many tiny tasks -- Use for read-heavy work; keep writes in main agent -- Use principle of least-privilege when choosing subagent tools (only give them what they need, ESPECIALLY for potentially dangerous tools like run_command) +- **Mutating operations**: File writes, deployments, infra changes (keep in main context) +- **Simple lookups**: One file read, one doc search, known file paths—just do it directly +- **Known patterns**: Searching for a specific class/function name—use grep/glob directly + +## Parallel Execution +Launch multiple independent subagents in a single message: +``` +[ + subagent: "Analyze frontend architecture in /src/web", + subagent: "Analyze backend API in /src/api", + subagent: "Review infra setup in /terraform" +] +``` + +## Tool Selection (Critical) +Subagents require explicit tool lists. **Apply principle of least-privilege**—only grant tools the task actually needs. + +**Read-only tools** (safe for research): +`view`, `search_docs`, `view_web_page`, `search_memory`, `read_rulebook`, `search_paks`, `get_pak_content` + +**Mutating tools** (grant sparingly): +`create`, `str_replace`, `remove`, `run_command`, `run_command_task` + +**Tool selection by task type:** +| Task | Tools | +|------|-------| +| Codebase exploration | `view` | +| Doc/web research | `view`, `search_docs`, `read_rulebook`, `view_web_page`, `search_paks`, `get_pak_content` | +| Write code | `view`, `create`, `str_replace`, `remove` | +| Write + validate | `view`, `str_replace`, `run_command` | +| Run diagnostics | `view`, `run_command` | + +**`run_command` is dangerous:** +- NEVER grant for pure research/reading tasks +- When granted, constrain in prompt: "Only run `terraform validate` and `terraform fmt`" +- Use `view` instead of shell commands like `cat`, `ls`, `find` +- Consider if `view` with grep/glob args can replace the command +- Consider enabling sandbox mode so you can run commands safely without risk of destructive operations + +## Writing Effective Prompts +Subagents have no prior context. Make prompts self-contained: + +**Bad:** "Find where the error is handled" + +**Good:** "Search /src/services for error handling patterns (try/catch, middleware, exception handlers). Return: 1) files with error handling, 2) main approach used, 3) coverage gaps." + +**Always include:** +- Specific paths or file patterns to search +- Expected output format (list, summary, table) +- Whether to research only OR write code + +## Example +``` +User: "How does auth work in this app?" + +# Parallel subagents with tools: [view] +1. "Find auth files in /src—login, session, JWT patterns. List files + purposes." +2. "Find auth middleware and route guards. Document the flow." +3. "Find auth config and related env vars." + +# Synthesize results into cohesive answer +``` # Tool Usage - Call tools directly when you have all required information diff --git a/libs/mcp/server/src/subagent_tools.rs b/libs/mcp/server/src/subagent_tools.rs index 7ab922cc..63520169 100644 --- a/libs/mcp/server/src/subagent_tools.rs +++ b/libs/mcp/server/src/subagent_tools.rs @@ -18,12 +18,12 @@ pub struct DynamicSubagentRequest { #[schemars(description = "A short (3-5 word) description of the task")] pub description: String, - /// The task instruction - what the subagent should do (the "I" in the 4-tuple). + /// The task instructions - what the subagent should do (the "I" in the 4-tuple). /// Should be specific, actionable, and include success criteria. #[schemars( - description = "The task instruction specifying what the subagent should accomplish. Be specific and include success criteria." + description = "The task instructions specifying what the subagent should accomplish. Be specific and include success criteria." )] - pub instruction: String, + pub instructions: String, /// Curated context from previous work (the "C" in the 4-tuple). /// Include: relevant findings, key artifacts/references, what didn't work. @@ -112,7 +112,7 @@ The subagent runs asynchronously. Use get_task_details to monitor progress." ctx: RequestContext, Parameters(DynamicSubagentRequest { description, - instruction, + instructions, context, tools, max_steps, @@ -145,7 +145,7 @@ The subagent runs asynchronously. Use get_task_details to monitor progress." // Build the dynamic subagent command let subagent_command = match self.build_dynamic_subagent_command( - &instruction, + &instructions, context.as_deref(), &tools, &model, @@ -214,7 +214,7 @@ The subagent runs asynchronously. Use get_task_details to monitor progress." #[allow(clippy::too_many_arguments)] fn build_dynamic_subagent_command( &self, - instruction: &str, + instructions: &str, context: Option<&str>, tools: &[String], model: &str, @@ -227,10 +227,10 @@ The subagent runs asynchronously. Use get_task_details to monitor progress." Some(ctx) if !ctx.is_empty() => { format!( "=== CONTEXT (from previous work) ===\n{}\n\n=== YOUR TASK ===\n{}", - ctx, instruction + ctx, instructions ) } - _ => instruction.to_string(), + _ => instructions.to_string(), }; // Write prompt to file From fd058205eb3971df8f9bfb72dd7b098a5f327edb Mon Sep 17 00:00:00 2001 From: George Date: Fri, 6 Feb 2026 19:15:26 -0800 Subject: [PATCH 16/23] Add resume flow and approvals --- cli/src/commands/agent/run/mod.rs | 2 + cli/src/commands/agent/run/mode_async.rs | 436 ++++++++++++++++-- cli/src/commands/agent/run/pause.rs | 182 ++++++++ cli/src/main.rs | 69 ++- libs/mcp/server/src/local_tools.rs | 46 +- libs/mcp/server/src/subagent_tools.rs | 187 +++++++- libs/shared/src/models/integrations/openai.rs | 23 + libs/shared/src/task_manager.rs | 136 +++++- tui/src/app.rs | 6 + tui/src/services/approval_bar.rs | 7 +- tui/src/services/bash_block.rs | 400 ++++++++++++++++ tui/src/services/handlers/dialog.rs | 20 + tui/src/services/handlers/tool.rs | 31 ++ tui/src/services/message.rs | 69 +++ 14 files changed, 1569 insertions(+), 45 deletions(-) create mode 100644 cli/src/commands/agent/run/pause.rs diff --git a/cli/src/commands/agent/run/mod.rs b/cli/src/commands/agent/run/mod.rs index a4753ca5..aafa22b4 100644 --- a/cli/src/commands/agent/run/mod.rs +++ b/cli/src/commands/agent/run/mod.rs @@ -3,6 +3,7 @@ pub mod helpers; pub mod mcp_init; pub mod mode_async; pub mod mode_interactive; +pub mod pause; pub mod profile_switch; pub mod renderer; pub mod stream; @@ -11,4 +12,5 @@ pub mod tui; pub use mode_async::{RunAsyncConfig, run_async}; pub use mode_interactive::{RunInteractiveConfig, run_interactive}; +pub use pause::{AsyncOutcome, ResumeInput}; pub use renderer::OutputFormat; diff --git a/cli/src/commands/agent/run/mode_async.rs b/cli/src/commands/agent/run/mode_async.rs index 7e9711f6..7cd58f23 100644 --- a/cli/src/commands/agent/run/mode_async.rs +++ b/cli/src/commands/agent/run/mode_async.rs @@ -3,6 +3,10 @@ use crate::commands::agent::run::helpers::{ add_agents_md, add_local_context, add_rulebooks, tool_result, user_message, }; use crate::commands::agent::run::mcp_init::{McpInitConfig, initialize_mcp_server_and_tools}; +use crate::commands::agent::run::pause::{ + AsyncManifest, AsyncOutcome, PauseReason, PendingToolCall, ResumeInput, build_resume_hint, + detect_pending_tool_calls, write_pause_manifest, +}; use crate::commands::agent::run::renderer::{OutputFormat, OutputRenderer}; use crate::commands::agent::run::tooling::run_tool_call; use crate::config::AppConfig; @@ -13,8 +17,9 @@ use stakpak_api::{ }; use stakpak_mcp_server::EnabledToolsConfig; use stakpak_shared::local_store::LocalStore; -use stakpak_shared::models::integrations::openai::ChatMessage; +use stakpak_shared::models::integrations::openai::{ChatMessage, MessageContent, Role}; use stakpak_shared::models::llm::LLMTokenUsage; +use std::collections::HashMap; use std::time::Instant; use uuid::Uuid; @@ -35,17 +40,150 @@ pub struct RunAsyncConfig { pub enabled_tools: EnabledToolsConfig, pub model: Model, pub agents_md: Option, + /// When true, respect auto-approve config and pause when tools require approval. + pub pause_on_approval: bool, + /// Resume input (tool decisions or text prompt) when resuming from a paused checkpoint. + pub resume_input: Option, + /// Auto-approve tool overrides from profile config. + pub auto_approve_tools: Option>, } // All print functions have been moved to the renderer module and are no longer needed here -pub async fn run_async(ctx: AppConfig, config: RunAsyncConfig) -> Result<(), String> { +/// Simple auto-approve policy for async mode. +/// Mirrors the TUI's AutoApprovePolicy without depending on the TUI crate. +#[derive(Debug, Clone, PartialEq)] +enum AsyncApprovePolicy { + Auto, + Prompt, + Never, +} + +/// Lightweight auto-approve config for async mode. +struct AsyncAutoApproveConfig { + enabled: bool, + default_policy: AsyncApprovePolicy, + tools: HashMap, +} + +impl AsyncAutoApproveConfig { + fn new(auto_approve_tools: Option<&Vec>) -> Self { + let mut tools = HashMap::new(); + + // Auto-approve tools (read-only, safe): + for name in &[ + "view", + "generate_password", + "search_docs", + "search_memory", + "read_rulebook", + "local_code_search", + "get_all_tasks", + "get_task_details", + "wait_for_tasks", + ] { + tools.insert(name.to_string(), AsyncApprovePolicy::Auto); + } + + // Prompt tools (mutating, require approval): + for name in &[ + "create", + "str_replace", + "generate_code", + "run_command", + "run_command_task", + "subagent_task", + "dynamic_subagent_task", + "cancel_task", + "remove", + ] { + tools.insert(name.to_string(), AsyncApprovePolicy::Prompt); + } + + // Apply profile overrides + if let Some(profile_tools) = auto_approve_tools { + for tool_name in profile_tools { + tools.insert(tool_name.clone(), AsyncApprovePolicy::Auto); + } + } + + // Try to load session config from disk + let config_path = std::path::Path::new(".stakpak/session/auto_approve.json"); + if config_path.exists() + && let Ok(content) = std::fs::read_to_string(config_path) + && let Ok(session_config) = serde_json::from_str::(&content) + && let Some(session_tools) = session_config.get("tools").and_then(|t| t.as_object()) + { + for (name, policy_val) in session_tools { + // Don't override profile-specified tools + if auto_approve_tools + .map(|pt| pt.contains(name)) + .unwrap_or(false) + { + continue; + } + let policy = match policy_val.as_str() { + Some("Auto") => AsyncApprovePolicy::Auto, + Some("Never") => AsyncApprovePolicy::Never, + _ => AsyncApprovePolicy::Prompt, + }; + tools.insert(name.clone(), policy); + } + } + + AsyncAutoApproveConfig { + enabled: true, + default_policy: AsyncApprovePolicy::Prompt, + tools, + } + } + + /// Strip MCP server prefix from tool name (e.g., "stakpak__run_command" -> "run_command"). + fn strip_prefix(name: &str) -> &str { + if let Some(pos) = name.find("__") + && pos + 2 < name.len() + { + return &name[pos + 2..]; + } + name + } + + fn get_policy(&self, tool_name: &str) -> &AsyncApprovePolicy { + let stripped = Self::strip_prefix(tool_name); + self.tools.get(stripped).unwrap_or(&self.default_policy) + } + + fn should_auto_approve(&self, tool_name: &str) -> bool { + if !self.enabled { + return false; + } + matches!(self.get_policy(tool_name), AsyncApprovePolicy::Auto) + } + + /// Check if any tool in the batch requires approval (Prompt or Never policy). + fn any_requires_approval(&self, tool_names: &[&str]) -> bool { + tool_names + .iter() + .any(|name| !self.should_auto_approve(name)) + } +} + +pub async fn run_async(ctx: AppConfig, config: RunAsyncConfig) -> Result { let start_time = Instant::now(); let mut llm_response_time = std::time::Duration::new(0, 0); let mut chat_messages: Vec = Vec::new(); let mut total_usage = LLMTokenUsage::default(); let renderer = OutputRenderer::new(config.output_format.clone(), config.verbose); + // Build auto-approve config if pause_on_approval is enabled + let auto_approve = if config.pause_on_approval { + Some(AsyncAutoApproveConfig::new( + config.auto_approve_tools.as_ref(), + )) + } else { + None + }; + print!("{}", renderer.render_title("Stakpak Agent - Async Mode")); print!( "{}", @@ -95,13 +233,14 @@ pub async fn run_async(ctx: AppConfig, config: RunAsyncConfig) -> Result<(), Str let mut current_session_id: Option = None; let mut current_checkpoint_id: Option = None; + let mut prior_steps: usize = 0; // Load checkpoint messages if provided - if let Some(checkpoint_id_str) = config.checkpoint_id { + if let Some(checkpoint_id_str) = &config.checkpoint_id { let checkpoint_start = Instant::now(); // Parse checkpoint UUID - let checkpoint_uuid = Uuid::parse_str(&checkpoint_id_str) + let checkpoint_uuid = Uuid::parse_str(checkpoint_id_str) .map_err(|_| format!("Invalid checkpoint ID: {}", checkpoint_id_str))?; // Get checkpoint with session info @@ -109,6 +248,12 @@ pub async fn run_async(ctx: AppConfig, config: RunAsyncConfig) -> Result<(), Str Ok(checkpoint) => { current_session_id = Some(checkpoint.session_id); current_checkpoint_id = Some(checkpoint_uuid); + prior_steps = checkpoint + .state + .messages + .iter() + .filter(|m| m.role == Role::Assistant) + .count(); chat_messages.extend(checkpoint.state.messages); } Err(e) => { @@ -123,13 +268,117 @@ pub async fn run_async(ctx: AppConfig, config: RunAsyncConfig) -> Result<(), Str ); } + // Handle resume from paused state + if let Some(resume_input) = &config.resume_input { + let pending_tool_calls = detect_pending_tool_calls(&chat_messages); + + if !pending_tool_calls.is_empty() && resume_input.has_tool_decisions() { + // Resume with tool decisions + print!( + "{}", + renderer.render_info(&format!( + "Resuming with tool decisions for {} pending tool call(s)", + pending_tool_calls.len() + )) + ); + + for tool_call in &pending_tool_calls { + if resume_input.is_approved(&tool_call.id) { + // Execute approved tool + print!( + "{}", + renderer.render_tool_execution( + &tool_call.function.name, + &tool_call.function.arguments, + 0, + 1, + ) + ); + + let tool_execution = async { + run_tool_call( + &mcp_client, + &mcp_tools, + tool_call, + None, + current_session_id, + Some(config.model.id.clone()), + ) + .await + }; + + let result = match tokio::time::timeout( + std::time::Duration::from_secs(60 * 60), + tool_execution, + ) + .await + { + Ok(result) => result?, + Err(_) => { + let error_msg = format!( + "Tool '{}' timed out after 60 minutes", + tool_call.function.name + ); + print!("{}", renderer.render_error(&error_msg)); + chat_messages.push(tool_result(tool_call.id.clone(), error_msg)); + continue; + } + }; + + if let Some(result) = result { + let result_content = result + .content + .iter() + .map(|c| match c.raw.as_text() { + Some(text) => text.text.clone(), + None => String::new(), + }) + .collect::>() + .join("\n"); + + print!("{}", renderer.render_tool_result(&result_content)); + chat_messages.push(tool_result(tool_call.id.clone(), result_content)); + } else { + chat_messages + .push(tool_result(tool_call.id.clone(), "No result".to_string())); + } + } else { + // Reject tool + print!( + "{}", + renderer.render_info(&format!( + "Rejected tool call: {} ({})", + tool_call.function.name, tool_call.id + )) + ); + chat_messages.push(tool_result( + tool_call.id.clone(), + "TOOL_CALL_REJECTED".to_string(), + )); + } + } + } else if let Some(_prompt) = &resume_input.prompt { + // Resume with text input + print!("{}", renderer.render_info("Resuming with user input")); + // Don't add prompt here — it will be added below via the normal prompt path + } + } + if let Some(system_prompt) = config.system_prompt { chat_messages.insert(0, system_message(system_prompt)); print!("{}", renderer.render_info("System prompt loaded")); } - // Add user prompt if provided - if !config.prompt.is_empty() { + // Add user prompt if provided (and not resuming with tool decisions) + let should_add_prompt = if let Some(resume_input) = &config.resume_input { + // When resuming with tool decisions, don't add the prompt as a user message + // When resuming with text input, the prompt IS the resume text + !resume_input.has_tool_decisions() + } else { + true + }; + + if should_add_prompt && !config.prompt.is_empty() { let (user_input, _local_context) = add_local_context(&chat_messages, &config.prompt, &config.local_context, false) .await @@ -230,23 +479,28 @@ pub async fn run_async(ctx: AppConfig, config: RunAsyncConfig) -> Result<(), Str print!("{}", renderer.render_step_header(step, tool_count)); + // Extract agent message content + let agent_message = + response.choices[0] + .message + .content + .as_ref() + .map(|content| match content { + MessageContent::String(s) => s.clone(), + MessageContent::Array(parts) => parts + .iter() + .filter_map(|part| part.text.as_ref()) + .map(|text| text.as_str()) + .filter(|text| !text.starts_with("")) + .collect::>() + .join("\n"), + }); + // Show assistant response - if let Some(content) = &response.choices[0].message.content { - let content_str = match content { - stakpak_shared::models::integrations::openai::MessageContent::String(s) => { - s.clone() - } - stakpak_shared::models::integrations::openai::MessageContent::Array(parts) => parts - .iter() - .filter_map(|part| part.text.as_ref()) - .map(|text| text.as_str()) - .filter(|text| !text.starts_with("")) - .collect::>() - .join("\n"), - }; - if !content_str.trim().is_empty() { - print!("{}", renderer.render_assistant_message(&content_str, false)); - } + if let Some(content_str) = &agent_message + && !content_str.trim().is_empty() + { + print!("{}", renderer.render_assistant_message(content_str, false)); } // Check if there are tool calls to execute @@ -260,7 +514,78 @@ pub async fn run_async(ctx: AppConfig, config: RunAsyncConfig) -> Result<(), Str break; } - // Execute all tool calls + // Check if pause_on_approval is enabled and any tools require approval + if let Some(ref auto_approve_config) = auto_approve { + let tool_names: Vec<&str> = tool_calls + .iter() + .map(|tc| tc.function.name.as_str()) + .collect(); + + if auto_approve_config.any_requires_approval(&tool_names) { + // PAUSE: tools require approval + let pending: Vec = + tool_calls.iter().map(PendingToolCall::from).collect(); + + let checkpoint_id_str = current_checkpoint_id.map(|id| id.to_string()); + let session_id_str = current_session_id.map(|id| id.to_string()); + + let pause_reason = PauseReason::ToolApprovalRequired { + pending_tool_calls: pending, + }; + + let resume_hint = checkpoint_id_str + .as_ref() + .map(|cid| build_resume_hint(cid, &pause_reason)); + + let manifest = AsyncManifest { + outcome: "paused".to_string(), + checkpoint_id: checkpoint_id_str.clone(), + session_id: session_id_str.clone(), + model: config.model.id.clone(), + agent_message: agent_message.clone(), + steps: step, + total_steps: prior_steps + step, + usage: total_usage.clone(), + pause_reason: Some(pause_reason.clone()), + resume_hint, + }; + + // Write pause manifest + if let Err(e) = write_pause_manifest(&manifest) { + print!( + "{}", + renderer + .render_warning(&format!("Failed to write pause manifest: {}", e)) + ); + } + + // Output JSON to stdout if in JSON mode + if config.output_format == OutputFormat::Json + && let Ok(json) = serde_json::to_string_pretty(&manifest) + { + println!("{}", json); + } + + print!( + "{}", + renderer.render_info("Agent paused - tools require approval") + ); + + // Shutdown MCP + let _ = server_shutdown_tx.send(()); + let _ = proxy_shutdown_tx.send(()); + tokio::time::sleep(tokio::time::Duration::from_millis(100)).await; + + return Ok(AsyncOutcome::Paused { + checkpoint_id: checkpoint_id_str, + session_id: session_id_str, + pause_reason, + agent_message, + }); + } + } + + // Execute all tool calls (either auto-approved or pause_on_approval is disabled) for (i, tool_call) in tool_calls.iter().enumerate() { // Print tool start with arguments print!( @@ -342,6 +667,27 @@ pub async fn run_async(ctx: AppConfig, config: RunAsyncConfig) -> Result<(), Str let elapsed = start_time.elapsed(); let tool_execution_time = elapsed.saturating_sub(llm_response_time); + // Build completion output + let checkpoint_id_str = current_checkpoint_id.map(|id| id.to_string()); + let session_id_str = current_session_id.map(|id| id.to_string()); + + // Extract final message + let final_message = chat_messages + .iter() + .rev() + .find(|m| m.role == Role::Assistant) + .and_then(|m| m.content.as_ref()) + .map(|content| match content { + MessageContent::String(s) => s.clone(), + MessageContent::Array(parts) => parts + .iter() + .filter_map(|part| part.text.as_ref()) + .map(|text| text.as_str()) + .filter(|text| !text.starts_with("")) + .collect::>() + .join("\n"), + }); + // Use generic renderer functions to build the completion output print!("{}", renderer.render_section_break()); print!("{}", renderer.render_title("Execution Summary")); @@ -377,11 +723,30 @@ pub async fn run_async(ctx: AppConfig, config: RunAsyncConfig) -> Result<(), Str ) ); - print!("{}", renderer.render_final_completion(&chat_messages)); - println!(); + // Output JSON completion manifest if in JSON mode + if config.output_format == OutputFormat::Json { + let manifest = AsyncManifest { + outcome: "completed".to_string(), + checkpoint_id: checkpoint_id_str.clone(), + session_id: session_id_str.clone(), + model: config.model.id.clone(), + agent_message: final_message.clone(), + steps: step, + total_steps: prior_steps + step, + usage: total_usage.clone(), + pause_reason: None, + resume_hint: None, + }; + if let Ok(json) = serde_json::to_string_pretty(&manifest) { + println!("{}", json); + } + } else { + print!("{}", renderer.render_final_completion(&chat_messages)); + println!(); - // Print token usage at the end - print!("{}", renderer.render_token_usage_stats(&total_usage)); + // Print token usage at the end + print!("{}", renderer.render_token_usage_stats(&total_usage)); + } // Save conversation to file let conversation_json = serde_json::to_string_pretty(&chat_messages).unwrap_or_default(); @@ -423,7 +788,9 @@ pub async fn run_async(ctx: AppConfig, config: RunAsyncConfig) -> Result<(), Str } // Print resume command - println!("\nTo resume, run:\nstakpak -c {}\n", checkpoint_id); + if config.output_format != OutputFormat::Json { + println!("\nTo resume, run:\nstakpak -c {}\n", checkpoint_id); + } } else { print!( "{}", @@ -432,7 +799,9 @@ pub async fn run_async(ctx: AppConfig, config: RunAsyncConfig) -> Result<(), Str } // Print session ID if available - if let Some(session_id) = current_session_id { + if let Some(session_id) = current_session_id + && config.output_format != OutputFormat::Json + { println!("Session ID: {}", session_id); } @@ -447,5 +816,12 @@ pub async fn run_async(ctx: AppConfig, config: RunAsyncConfig) -> Result<(), Str tokio::time::sleep(tokio::time::Duration::from_millis(100)).await; print!("{}", renderer.render_success("Shutdown complete")); - Ok(()) + let outcome: AsyncOutcome = AsyncOutcome::Completed { + checkpoint_id: checkpoint_id_str, + session_id: session_id_str, + agent_message: final_message, + steps: step - 1, + }; + + Ok(outcome) } diff --git a/cli/src/commands/agent/run/pause.rs b/cli/src/commands/agent/run/pause.rs new file mode 100644 index 00000000..1da9d222 --- /dev/null +++ b/cli/src/commands/agent/run/pause.rs @@ -0,0 +1,182 @@ +use serde::{Deserialize, Serialize}; +use stakpak_shared::local_store::LocalStore; +use stakpak_shared::models::integrations::openai::{ChatMessage, Role, ToolCall}; +use stakpak_shared::models::llm::LLMTokenUsage; +use std::collections::HashSet; + +/// Exit code indicating the agent has paused and needs input or approval to resume. +pub const EXIT_CODE_PAUSED: i32 = 10; + +/// The outcome of an async agent run. +#[allow(dead_code)] +#[derive(Debug, Clone)] +pub enum AsyncOutcome { + /// Agent completed successfully. + Completed { + checkpoint_id: Option, + session_id: Option, + agent_message: Option, + steps: usize, + }, + /// Agent paused and needs input or approval. + Paused { + checkpoint_id: Option, + session_id: Option, + pause_reason: PauseReason, + agent_message: Option, + }, + /// Agent failed. + Failed { error: String }, +} + +/// Why the agent paused. +#[derive(Debug, Clone, Serialize, Deserialize)] +#[serde(tag = "type")] +pub enum PauseReason { + /// One or more tool calls require approval before execution. + #[serde(rename = "tool_approval_required")] + ToolApprovalRequired { + pending_tool_calls: Vec, + }, + /// The agent responded with text only (asking a question or requesting input). + #[serde(rename = "input_required")] + InputRequired, +} + +/// A tool call pending approval. +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct PendingToolCall { + pub id: String, + pub name: String, + pub arguments: serde_json::Value, +} + +impl From<&ToolCall> for PendingToolCall { + fn from(tc: &ToolCall) -> Self { + let arguments = serde_json::from_str(&tc.function.arguments) + .unwrap_or(serde_json::Value::String(tc.function.arguments.clone())); + PendingToolCall { + id: tc.id.clone(), + name: tc.function.name.clone(), + arguments, + } + } +} + +/// Unified JSON output for async agent runs (both pause and completion). +/// All fields are always present for consistent parsing. +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct AsyncManifest { + /// "paused" or "completed" + pub outcome: String, + pub checkpoint_id: Option, + pub session_id: Option, + /// Model ID used for this execution (e.g., "claude-sonnet-4-5-20250929"). + pub model: String, + /// The agent's text response (if any) in this execution. + pub agent_message: Option, + /// Steps taken in this execution (current run only). + pub steps: usize, + /// Total steps across all executions in this session (including resumed runs). + pub total_steps: usize, + /// Token usage for this execution only. + pub usage: LLMTokenUsage, + /// Present when outcome is "paused" — why the agent paused. + #[serde(skip_serializing_if = "Option::is_none")] + pub pause_reason: Option, + /// Present when outcome is "paused" — CLI command hint to resume. + #[serde(skip_serializing_if = "Option::is_none")] + pub resume_hint: Option, +} + +/// Resume input provided via CLI flags when resuming from a paused checkpoint. +#[derive(Debug, Clone, Default)] +pub struct ResumeInput { + /// Tool call IDs explicitly approved. + pub approved: HashSet, + /// Tool call IDs explicitly rejected. + pub rejected: HashSet, + /// Approve all pending tool calls. + pub approve_all: bool, + /// Reject all pending tool calls. + pub reject_all: bool, + /// Text prompt for input-required pauses. + pub prompt: Option, +} + +impl ResumeInput { + /// Returns true if this resume input contains any tool decisions. + pub fn has_tool_decisions(&self) -> bool { + self.approve_all + || self.reject_all + || !self.approved.is_empty() + || !self.rejected.is_empty() + } + + /// Determine whether a specific tool call ID should be approved. + /// Unspecified tools are rejected (per design decision). + pub fn is_approved(&self, tool_call_id: &str) -> bool { + if self.approve_all { + return !self.rejected.contains(tool_call_id); + } + if self.reject_all { + return false; + } + self.approved.contains(tool_call_id) + } +} + +/// Detect pending tool calls from checkpoint messages. +/// Returns tool calls from the last assistant message that don't have corresponding tool results. +pub fn detect_pending_tool_calls(messages: &[ChatMessage]) -> Vec { + // Find the last assistant message with tool_calls + let tool_calls = messages + .iter() + .rev() + .find(|msg| msg.role == Role::Assistant && msg.tool_calls.is_some()) + .and_then(|msg| msg.tool_calls.as_ref()); + + let Some(tool_calls) = tool_calls else { + return Vec::new(); + }; + + // Collect IDs of tool calls that already have results + let executed_ids: HashSet = messages + .iter() + .filter(|msg| msg.role == Role::Tool) + .filter_map(|msg| msg.tool_call_id.clone()) + .collect(); + + // Return tool calls without results + tool_calls + .iter() + .filter(|tc| !executed_ids.contains(&tc.id)) + .cloned() + .collect() +} + +/// Write the async manifest to `.stakpak/session/pause.json`. +pub fn write_pause_manifest(manifest: &AsyncManifest) -> Result { + let json = serde_json::to_string_pretty(manifest) + .map_err(|e| format!("Failed to serialize pause manifest: {}", e))?; + LocalStore::write_session_data("pause.json", &json) +} + +/// Build a resume hint command string. +pub fn build_resume_hint(checkpoint_id: &str, pause_reason: &PauseReason) -> String { + match pause_reason { + PauseReason::ToolApprovalRequired { pending_tool_calls } => { + if pending_tool_calls.len() == 1 { + format!( + "stakpak -c {} --approve {}", + checkpoint_id, pending_tool_calls[0].id + ) + } else { + format!("stakpak -c {} --approve-all", checkpoint_id) + } + } + PauseReason::InputRequired => { + format!("stakpak -c {} \"your input here\"", checkpoint_id) + } + } +} diff --git a/cli/src/main.rs b/cli/src/main.rs index 265b607f..55246a6a 100644 --- a/cli/src/main.rs +++ b/cli/src/main.rs @@ -20,7 +20,10 @@ use commands::{ Commands, agent::{ self, - run::{OutputFormat, RunAsyncConfig, RunInteractiveConfig}, + run::{ + AsyncOutcome, OutputFormat, ResumeInput, RunAsyncConfig, RunInteractiveConfig, + pause::EXIT_CODE_PAUSED, + }, }, }; use config::{AppConfig, ModelsCache}; @@ -97,6 +100,26 @@ struct Cli { #[arg(long = "disable-subagents", default_value_t = false)] disable_subagents: bool, + /// Pause when tools require approval (async mode only) + #[arg(long = "pause-on-approval", default_value_t = false)] + pause_on_approval: bool, + + /// Approve a specific tool call by ID when resuming (can be repeated) + #[arg(long = "approve", action = clap::ArgAction::Append)] + approve: Option>, + + /// Reject a specific tool call by ID when resuming (can be repeated) + #[arg(long = "reject", action = clap::ArgAction::Append)] + reject: Option>, + + /// Approve all pending tool calls when resuming + #[arg(long = "approve-all", default_value_t = false)] + approve_all: bool, + + /// Reject all pending tool calls when resuming + #[arg(long = "reject-all", default_value_t = false)] + reject_all: bool, + /// Ignore AGENTS.md files (skip discovery and injection) #[arg(long = "ignore-agents-md", default_value_t = false)] ignore_agents_md: bool, @@ -383,7 +406,9 @@ async fn main() { let prompt = if let Some(prompt_file_path) = &cli.prompt_file { match std::fs::read_to_string(prompt_file_path) { Ok(content) => { - println!("📖 Reading prompt from file: {}", prompt_file_path); + if cli.output_format != OutputFormat::Json { + println!("📖 Reading prompt from file: {}", prompt_file_path); + } content.trim().to_string() } Err(e) => { @@ -418,7 +443,7 @@ async fn main() { let result = match use_async_mode { // Async mode: run continuously until no more tool calls (or max_steps=1 for single-step) true => { - agent::run::run_async( + let async_result = agent::run::run_async( config, RunAsyncConfig { prompt, @@ -439,9 +464,45 @@ async fn main() { }, model: default_model.clone(), agents_md: agents_md.clone(), + pause_on_approval: cli.pause_on_approval, + resume_input: if cli.approve.is_some() + || cli.reject.is_some() + || cli.approve_all + || cli.reject_all + { + Some(ResumeInput { + approved: cli + .approve + .unwrap_or_default() + .into_iter() + .collect(), + rejected: cli + .reject + .unwrap_or_default() + .into_iter() + .collect(), + approve_all: cli.approve_all, + reject_all: cli.reject_all, + prompt: None, + }) + } else { + None + }, + auto_approve_tools: None, }, ) - .await + .await; + + // Handle AsyncOutcome → exit code + match async_result { + Ok(AsyncOutcome::Paused { .. }) => { + cache_task.abort(); + std::process::exit(EXIT_CODE_PAUSED); + } + Ok(AsyncOutcome::Completed { .. }) => Ok(()), + Ok(AsyncOutcome::Failed { error }) => Err(error), + Err(e) => Err(e), + } } // Interactive mode: run in TUI diff --git a/libs/mcp/server/src/local_tools.rs b/libs/mcp/server/src/local_tools.rs index cd9c4dbd..06b1afdb 100644 --- a/libs/mcp/server/src/local_tools.rs +++ b/libs/mcp/server/src/local_tools.rs @@ -19,7 +19,7 @@ use serde_json::json; use similar::TextDiff; use stakpak_shared::models::integrations::mcp::CallToolResultExt; use stakpak_shared::models::integrations::openai::{ - ProgressType, TaskUpdate, ToolCallResultProgress, + PendingToolCall, ProgressType, TaskPauseInfo, TaskUpdate, ToolCallResultProgress, }; use stakpak_shared::task_manager::TaskInfo; use stakpak_shared::tls_client::{TlsClientConfig, create_tls_client}; @@ -2728,6 +2728,49 @@ SAFETY NOTES: } }); + // Extract pause info if task is paused + let pause_info = t.pause_info.as_ref().and_then(|pi| { + pi.raw_output.as_ref().and_then(|raw| { + // Parse the JSON output to extract agent_message and pending_tool_calls + serde_json::from_str::(raw) + .ok() + .and_then(|json| { + let agent_message = json + .get("agent_message") + .and_then(|v| v.as_str()) + .map(|s| s.to_string()); + + let pending_tool_calls = json + .get("pause_reason") + .and_then(|pr| pr.get("pending_tool_calls")) + .and_then(|ptc| ptc.as_array()) + .map(|arr| { + arr.iter() + .filter_map(|tc| { + Some(PendingToolCall { + id: tc.get("id")?.as_str()?.to_string(), + name: tc + .get("name")? + .as_str()? + .to_string(), + arguments: tc.get("arguments").cloned(), + }) + }) + .collect() + }); + + if agent_message.is_some() || pending_tool_calls.is_some() { + Some(TaskPauseInfo { + agent_message, + pending_tool_calls, + }) + } else { + None + } + }) + }) + }); + TaskUpdate { task_id: t.id.clone(), status: format!("{:?}", t.status), @@ -2735,6 +2778,7 @@ SAFETY NOTES: duration_secs, output_preview, is_target: true, + pause_info, } }) .collect(); diff --git a/libs/mcp/server/src/subagent_tools.rs b/libs/mcp/server/src/subagent_tools.rs index 63520169..30ee6500 100644 --- a/libs/mcp/server/src/subagent_tools.rs +++ b/libs/mcp/server/src/subagent_tools.rs @@ -1,3 +1,4 @@ +use std::env; use std::path::Path; use crate::tool_container::ToolContainer; @@ -8,6 +9,7 @@ use rmcp::{ use serde::Deserialize; use serde_json::json; use stakpak_shared::local_store::LocalStore; +use tracing::error; use uuid::Uuid; /// Request for creating a dynamic subagent with full control over its configuration. @@ -64,6 +66,41 @@ pub struct DynamicSubagentRequest { pub enable_sandbox: bool, } +/// Request for resuming a paused or completed subagent task. +#[derive(Debug, Deserialize, schemars::JsonSchema)] +pub struct ResumeSubagentTaskRequest { + #[schemars(description = "The task ID of the paused subagent task to resume")] + pub task_id: String, + #[schemars( + description = "Tool call IDs to approve (e.g., [\"tc_1\", \"tc_2\"]). Unspecified tool calls are rejected." + )] + pub approve: Option>, + #[schemars(description = "Tool call IDs to explicitly reject (e.g., [\"tc_3\"])")] + pub reject: Option>, + #[schemars( + description = "Approve all pending tool calls (overrides individual approve/reject)" + )] + pub approve_all: Option, + #[schemars(description = "Reject all pending tool calls")] + pub reject_all: Option, + #[schemars( + description = "Text input to provide when the subagent paused for input (input_required pause reason)" + )] + pub input: Option, +} + +/// Get the current executable path for spawning subagents +fn get_current_exe() -> Result { + env::current_exe() + .map_err(|e| { + McpError::internal_error( + "Failed to get current executable path", + Some(json!({"error": e.to_string()})), + ) + }) + .map(|p| p.to_string_lossy().to_string()) +} + #[tool_router(router = tool_router_subagent, vis = "pub")] impl ToolContainer { /// Create and execute a dynamic subagent with full control over its configuration. @@ -210,6 +247,144 @@ The subagent runs asynchronously. Use get_task_details to monitor progress." ))])) } + /// Resume a paused or completed subagent task with approval decisions or follow-up input. + #[tool( + description = "Resume a paused or completed subagent task. Subagents pause when they need tool approval or user input. + +PARAMETERS: +- task_id: The task ID of the paused subagent +- approve: List of tool call IDs to approve +- reject: List of tool call IDs to reject +- approve_all: Approve all pending tool calls +- reject_all: Reject all pending tool calls +- input: Text input to continue the conversation (for input_required pauses or completed tasks) + +WORKFLOW: +1. Start subagent: dynamic_subagent_task — subagents automatically pause on tool approval +2. Monitor with get_task_details — check for status 'Paused' or 'Completed' +3. Read pause_info.raw_output to see pending_tool_calls or the agent's message +4. Resume with approval decisions or follow-up input +5. The subagent continues execution from where it stopped + +NOTES: +- Works on tasks with status 'Paused' or 'Completed' +- The checkpoint ID is automatically extracted from the task's internal state +- For tool_approval_required pauses: use approve/reject/approve_all/reject_all +- For input_required pauses or completed tasks: use the input parameter +- Unspecified tool calls are rejected by default" + )] + pub async fn resume_subagent_task( + &self, + Parameters(ResumeSubagentTaskRequest { + task_id, + approve, + reject, + approve_all, + reject_all, + input, + }): Parameters, + ) -> Result { + // Look up the paused task to extract checkpoint_id from pause_info + let task_info = self + .get_task_manager() + .get_task_details(task_id.clone()) + .await + .map_err(|e| { + McpError::internal_error( + "Failed to get task details", + Some(json!({"error": e.to_string()})), + ) + })? + .ok_or_else(|| { + McpError::invalid_params("Task not found", Some(json!({"task_id": task_id}))) + })?; + + if !matches!( + task_info.status, + stakpak_shared::task_manager::TaskStatus::Paused + | stakpak_shared::task_manager::TaskStatus::Completed + ) { + return Ok(CallToolResult::error(vec![ + Content::text("RESUME_TASK_ERROR"), + Content::text(format!( + "Task '{}' cannot be resumed (status: {:?}). Only paused or completed tasks can be resumed.", + task_id, task_info.status + )), + ])); + } + + let checkpoint_id = task_info + .pause_info + .as_ref() + .and_then(|pi| pi.checkpoint_id.as_ref()) + .ok_or_else(|| { + McpError::internal_error( + "Paused task has no checkpoint ID in pause_info", + Some(json!({"task_id": task_id})), + ) + })?; + + // Get the current executable path for resuming + let current_exe = get_current_exe()?; + + // Build the stakpak CLI command for resuming + let mut command = format!("{} -a --output json -c {}", current_exe, checkpoint_id); + + if approve_all.unwrap_or(false) { + command.push_str(" --approve-all"); + } + if reject_all.unwrap_or(false) { + command.push_str(" --reject-all"); + } + if let Some(approve_ids) = &approve { + for id in approve_ids { + command.push_str(&format!(" --approve {}", id)); + } + } + if let Some(reject_ids) = &reject { + for id in reject_ids { + command.push_str(&format!(" --reject {}", id)); + } + } + if let Some(input_text) = &input { + // Write input to a temp file and pass via --prompt-file to avoid shell escaping issues + let input_filename = format!("resume_input_{}.txt", Uuid::new_v4()); + match LocalStore::write_session_data( + &format!("subagents/{}", input_filename), + input_text, + ) { + Ok(path) => { + command.push_str(&format!(" --prompt-file {}", path)); + } + Err(e) => { + return Ok(CallToolResult::error(vec![ + Content::text("RESUME_TASK_ERROR"), + Content::text(format!("Failed to write input file: {}", e)), + ])); + } + } + } + + match self + .get_task_manager() + .resume_task(task_id.clone(), command.clone()) + .await + { + Ok(task_info) => Ok(CallToolResult::success(vec![Content::text(format!( + "🤖 Subagent Task Resumed\n\nTask ID: {}\nStatus: {:?}\n\nThe subagent is now running. Use get_task_details to monitor progress.", + task_info.id, task_info.status + ))])), + Err(e) => { + error!("Failed to resume subagent task: {}", e); + + Ok(CallToolResult::error(vec![ + Content::text("RESUME_TASK_ERROR"), + Content::text(format!("Failed to resume subagent task: {}", e)), + ])) + } + } + } + /// Build command for dynamic subagent with full 4-tuple configuration #[allow(clippy::too_many_arguments)] fn build_dynamic_subagent_command( @@ -255,10 +430,13 @@ The subagent runs asynchronously. Use get_task_details to monitor progress." ) })?; - // Build the base stakpak command + // Get the current executable path to use for subagent + let current_exe = get_current_exe()?; + + // Build the base stakpak command using current executable let mut command = format!( - r#"stakpak -a --prompt-file {} --max-steps {} --model {}"#, - prompt_file_path, max_steps, model + r#"{} -a --pause-on-approval --output json --prompt-file {} --max-steps {} --model {}"#, + current_exe, prompt_file_path, max_steps, model ); // Add each tool @@ -273,7 +451,8 @@ The subagent runs asynchronously. Use get_task_details to monitor progress." env!("CARGO_PKG_VERSION") ); - let mut warden_command = format!("stakpak warden run --image {}", stakpak_image); + let mut warden_command = + format!("{} warden run --image {}", current_exe, stakpak_image); // Mount the prompt file into the container let warden_prompt_path = format!("/tmp/{}", prompt_filename); diff --git a/libs/shared/src/models/integrations/openai.rs b/libs/shared/src/models/integrations/openai.rs index 89a59cb0..145157eb 100644 --- a/libs/shared/src/models/integrations/openai.rs +++ b/libs/shared/src/models/integrations/openai.rs @@ -585,6 +585,29 @@ pub struct TaskUpdate { /// Whether this is a target task being waited on #[serde(default)] pub is_target: bool, + /// Pause information for paused subagent tasks + #[serde(skip_serializing_if = "Option::is_none")] + pub pause_info: Option, +} + +/// Pause information for subagent tasks awaiting approval +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)] +pub struct TaskPauseInfo { + /// The agent's message before pausing + #[serde(skip_serializing_if = "Option::is_none")] + pub agent_message: Option, + /// Pending tool calls awaiting approval + #[serde(skip_serializing_if = "Option::is_none")] + pub pending_tool_calls: Option>, +} + +/// A pending tool call awaiting approval +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)] +pub struct PendingToolCall { + pub id: String, + pub name: String, + #[serde(skip_serializing_if = "Option::is_none")] + pub arguments: Option, } // ============================================================================= diff --git a/libs/shared/src/task_manager.rs b/libs/shared/src/task_manager.rs index 6531c2eb..2c797888 100644 --- a/libs/shared/src/task_manager.rs +++ b/libs/shared/src/task_manager.rs @@ -86,6 +86,7 @@ pub enum TaskStatus { Failed, Cancelled, TimedOut, + Paused, } #[derive(Debug, Clone)] @@ -100,6 +101,7 @@ pub struct Task { pub start_time: DateTime, pub duration: Option, pub timeout: Option, + pub pause_info: Option, } pub struct TaskEntry { @@ -118,6 +120,7 @@ pub struct TaskInfo { pub output: Option, pub start_time: DateTime, pub duration: Option, + pub pause_info: Option, } impl From<&Task> for TaskInfo { @@ -143,6 +146,7 @@ impl From<&Task> for TaskInfo { output: task.output.clone(), start_time: task.start_time, duration, + pause_info: task.pause_info.clone(), } } } @@ -153,6 +157,12 @@ pub struct TaskCompletion { pub final_status: TaskStatus, } +#[derive(Debug, Clone, serde::Serialize)] +pub struct PauseInfo { + pub checkpoint_id: Option, + pub raw_output: Option, +} + #[derive(Debug, thiserror::Error)] pub enum TaskError { #[error("Task not found: {0}")] @@ -169,6 +179,8 @@ pub enum TaskError { TaskCancelled, #[error("Task failed on start: {0}")] TaskFailedOnStart(String), + #[error("Task not paused: {0}")] + TaskNotPaused(TaskId), } pub enum TaskMessage { @@ -206,6 +218,11 @@ pub enum TaskMessage { id: TaskId, output: String, }, + Resume { + id: TaskId, + command: String, + response_tx: oneshot::Sender>, + }, } pub struct TaskManager { @@ -319,8 +336,8 @@ impl TaskManager { } TaskMessage::TaskUpdate { id, completion } => { if let Some(entry) = self.tasks.get_mut(&id) { - entry.task.status = completion.final_status; - entry.task.output = Some(completion.output); + entry.task.status = completion.final_status.clone(); + entry.task.output = Some(completion.output.clone()); entry.task.error = completion.error; entry.task.duration = Some( Utc::now() @@ -329,6 +346,25 @@ impl TaskManager { .unwrap_or_default(), ); + // Extract checkpoint info for paused and completed tasks + if matches!( + completion.final_status, + TaskStatus::Paused | TaskStatus::Completed + ) { + let checkpoint_id = + serde_json::from_str::(&completion.output) + .ok() + .and_then(|v| { + v.get("checkpoint_id") + .and_then(|c| c.as_str()) + .map(|s| s.to_string()) + }); + entry.task.pause_info = Some(PauseInfo { + checkpoint_id, + raw_output: Some(completion.output), + }); + } + // Keep completed tasks in the list so they can be viewed with get_all_tasks // TODO: Consider implementing a cleanup mechanism for old completed tasks // if matches!(entry.task.status, TaskStatus::Completed | TaskStatus::Failed | TaskStatus::Cancelled | TaskStatus::TimedOut) { @@ -350,6 +386,15 @@ impl TaskManager { } false } + TaskMessage::Resume { + id, + command, + response_tx, + } => { + let result = self.resume_task(id, command).await; + let _ = response_tx.send(result); + false + } TaskMessage::Shutdown { response_tx } => { self.shutdown_all_tasks().await; let _ = response_tx.send(()); @@ -381,6 +426,7 @@ impl TaskManager { start_time: Utc::now(), duration: None, timeout, + pause_info: None, }; let (cancel_tx, cancel_rx) = oneshot::channel(); @@ -423,6 +469,59 @@ impl TaskManager { Ok(()) } + async fn resume_task(&mut self, id: TaskId, command: String) -> Result<(), TaskError> { + // Verify the task exists and is in a resumable state + if let Some(entry) = self.tasks.get(&id) { + if !matches!( + entry.task.status, + TaskStatus::Paused | TaskStatus::Completed + ) { + return Err(TaskError::TaskNotPaused(id)); + } + } else { + return Err(TaskError::TaskNotFound(id)); + } + + // Update the task to Running and start a new execution + let entry = self.tasks.get_mut(&id).unwrap(); + entry.task.status = TaskStatus::Running; + entry.task.command = command.clone(); + entry.task.pause_info = None; + entry.task.output = None; + entry.task.error = None; + + let (cancel_tx, cancel_rx) = oneshot::channel(); + let (process_tx, process_rx) = oneshot::channel(); + let task_tx = self.tx.clone(); + + let remote_connection = entry.task.remote_connection.clone(); + let timeout = entry.task.timeout; + + let handle = tokio::spawn(Self::execute_task( + id.clone(), + command, + remote_connection.clone(), + timeout, + cancel_rx, + process_tx, + task_tx, + )); + + entry.handle = handle; + entry.cancel_tx = Some(cancel_tx); + entry.process_id = None; + + // Wait for process ID for local tasks + if remote_connection.is_none() + && let Ok(process_id) = process_rx.await + && let Some(entry) = self.tasks.get_mut(&id) + { + entry.process_id = Some(process_id); + } + + Ok(()) + } + async fn cancel_task(&mut self, id: &TaskId) -> Result<(), TaskError> { if let Some(mut entry) = self.tasks.remove(id) { entry.task.status = TaskStatus::Cancelled; @@ -592,6 +691,12 @@ impl TaskManager { error: final_error, final_status: TaskStatus::Completed, } + } else if exit_status.code() == Some(10) { + TaskCompletion { + output: final_output, + error: None, + final_status: TaskStatus::Paused, + } } else { TaskCompletion { output: final_output, @@ -831,6 +936,33 @@ impl TaskManagerHandle { }) } + pub async fn resume_task(&self, id: TaskId, command: String) -> Result { + let (response_tx, response_rx) = oneshot::channel(); + + self.tx + .send(TaskMessage::Resume { + id: id.clone(), + command, + response_tx, + }) + .map_err(|_| TaskError::ManagerShutdown)?; + + response_rx + .await + .map_err(|_| TaskError::ManagerShutdown)??; + + // Wait for the task to start + tokio::time::sleep(START_TASK_WAIT_TIME).await; + + let task_info = self + .get_task_details(id.clone()) + .await + .map_err(|_| TaskError::ManagerShutdown)? + .ok_or(TaskError::TaskNotFound(id))?; + + Ok(task_info) + } + pub async fn get_task_status(&self, id: TaskId) -> Result, TaskError> { let (response_tx, response_rx) = oneshot::channel(); diff --git a/tui/src/app.rs b/tui/src/app.rs index 73541b83..f1815757 100644 --- a/tui/src/app.rs +++ b/tui/src/app.rs @@ -242,6 +242,11 @@ pub struct AppState { /// Billing info for the side panel pub billing_info: Option, + + /// Cached pause info for subagent tasks (task_id -> pause_info) + /// Used to display what subagents want to do in the approval bar + pub subagent_pause_info: + HashMap, } pub struct AppStateOptions<'a> { @@ -495,6 +500,7 @@ impl AppState { pending_editor_open: None, billing_info: None, auth_display_info, + subagent_pause_info: HashMap::new(), } } diff --git a/tui/src/services/approval_bar.rs b/tui/src/services/approval_bar.rs index 6756e29f..af9f85bd 100644 --- a/tui/src/services/approval_bar.rs +++ b/tui/src/services/approval_bar.rs @@ -250,10 +250,9 @@ impl ApprovalBar { if !self.is_visible() { return 0; } - // For now, estimate max height needed - // Top border (1) + up to 3 button rows with spacing (5) + empty line (1) + footer (1) + bottom border (1) = 9 - // But cap at reasonable height - 8 + // Base height: top border (1) + button row (1) + empty line (1) + footer (1) + bottom border (1) = 5 + // Cap at reasonable height + 5 } /// Render the approval bar with wrapping support diff --git a/tui/src/services/bash_block.rs b/tui/src/services/bash_block.rs index 89594f8a..cfa708af 100644 --- a/tui/src/services/bash_block.rs +++ b/tui/src/services/bash_block.rs @@ -2360,6 +2360,7 @@ pub fn render_task_wait_block( let (status_icon, status_color) = match task.status.as_str() { "Running" => ("◐", Color::Yellow), "Pending" => ("○", Color::DarkGray), + "Paused" => ("⏸", Color::Magenta), "Completed" => ("✓", Color::LightGreen), "Failed" => ("✗", Color::LightRed), "Cancelled" => ("⊘", Color::LightRed), @@ -2428,6 +2429,85 @@ pub fn render_task_wait_block( Span::styled(" │", Style::default().fg(border_color)), ]; formatted_lines.push(Line::from(line_spans)); + + // If task is paused, show pause info (agent message and pending tool calls) + if let Some(pause_info) = &task.pause_info { + // Show agent message if present + if let Some(agent_msg) = &pause_info.agent_message { + let trimmed_msg = agent_msg.trim(); + if !trimmed_msg.is_empty() { + // Truncate long messages + let display_msg = if trimmed_msg.len() > inner_width.saturating_sub(6) { + format!("{}…", &trimmed_msg[..inner_width.saturating_sub(7)]) + } else { + trimmed_msg.to_string() + }; + let msg_padding = inner_width.saturating_sub(display_msg.len() + 4); + formatted_lines.push(Line::from(vec![ + Span::styled("│", Style::default().fg(border_color)), + Span::from(" "), + Span::styled( + display_msg, + Style::default() + .fg(Color::DarkGray) + .add_modifier(Modifier::ITALIC), + ), + Span::from(" ".repeat(msg_padding)), + Span::styled("│", Style::default().fg(border_color)), + ])); + } + } + + // Show pending tool calls + if let Some(tool_calls) = &pause_info.pending_tool_calls { + for tc in tool_calls { + // Format: " → tool_name(args_preview)" + let args_preview = tc + .arguments + .as_ref() + .map(|args| { + let args_str = args.to_string(); + if args_str.len() > 40 { + format!("{}…", &args_str[..40]) + } else { + args_str + } + }) + .unwrap_or_default(); + + let tool_display = if args_preview.is_empty() { + format!("→ {}", tc.name) + } else { + format!("→ {}({})", tc.name, args_preview) + }; + + let tool_display_width = calculate_display_width(&tool_display) + 4; + let tool_padding = inner_width.saturating_sub(tool_display_width); + + formatted_lines.push(Line::from(vec![ + Span::styled("│", Style::default().fg(border_color)), + Span::from(" "), + Span::styled("→ ", Style::default().fg(Color::Magenta)), + Span::styled( + tc.name.clone(), + Style::default() + .fg(Color::Cyan) + .add_modifier(Modifier::BOLD), + ), + Span::styled( + if args_preview.is_empty() { + String::new() + } else { + format!("({})", args_preview) + }, + Style::default().fg(Color::DarkGray), + ), + Span::from(" ".repeat(tool_padding)), + Span::styled("│", Style::default().fg(border_color)), + ])); + } + } + } } // If no target tasks, show a message @@ -2450,3 +2530,323 @@ pub fn render_task_wait_block( formatted_lines } + +/// Render a pending block for resume_subagent_task showing what the subagent wants to do +pub fn render_subagent_resume_pending_block<'a>( + tool_call: &ToolCall, + is_auto_approved: bool, + pause_info: Option<&stakpak_shared::models::integrations::openai::TaskPauseInfo>, + width: usize, +) -> Vec> { + let mut formatted_lines: Vec> = Vec::new(); + + let border_color = if is_auto_approved { + Color::Green + } else { + Color::Cyan + }; + let inner_width = width.saturating_sub(4); + + // Parse arguments to determine resume type + let args = serde_json::from_str::(&tool_call.function.arguments).ok(); + + // Extract task_id from arguments + let task_id = args + .as_ref() + .and_then(|a| a.get("task_id").and_then(|v| v.as_str()).map(String::from)) + .unwrap_or_else(|| "unknown".to_string()); + + // Check if this is an input-based resume (for completed agents) or tool approval resume + let input_text = args + .as_ref() + .and_then(|a| a.get("input").and_then(|v| v.as_str()).map(String::from)); + + let has_approve_all = args + .as_ref() + .and_then(|a| a.get("approve_all").and_then(|v| v.as_bool())) + .unwrap_or(false); + + // Title + let title = format!(" Resume Subagent [{}] ", task_id); + let title_len = calculate_display_width(&title); + let dashes_after = inner_width.saturating_sub(title_len + 1); + + // Top border with title + let top_border = Line::from(vec![ + Span::styled("╭─", Style::default().fg(border_color)), + Span::styled( + title, + Style::default() + .fg(Color::Magenta) + .add_modifier(Modifier::BOLD), + ), + Span::styled( + format!("{}╮", "─".repeat(dashes_after)), + Style::default().fg(border_color), + ), + ]); + formatted_lines.push(top_border); + + // Handle input-based resume (completed agent, continuing with user input) + if let Some(input) = input_text { + let header = "Continue with input:"; + let header_padding = inner_width.saturating_sub(calculate_display_width(header)); + formatted_lines.push(Line::from(vec![ + Span::styled("│ ", Style::default().fg(border_color)), + Span::styled( + header.to_string(), + Style::default() + .fg(Color::Yellow) + .add_modifier(Modifier::BOLD), + ), + Span::from(" ".repeat(header_padding)), + Span::styled(" │", Style::default().fg(border_color)), + ])); + + // Empty line + formatted_lines.push(Line::from(vec![ + Span::styled("│", Style::default().fg(border_color)), + Span::from(" ".repeat(inner_width + 2)), + Span::styled("│", Style::default().fg(border_color)), + ])); + + // Show the input text, wrapped if necessary + let input_lines = wrap_text_to_lines(&input, inner_width.saturating_sub(4)); + for line in input_lines { + let line_width = calculate_display_width(&line); + let line_padding = inner_width.saturating_sub(line_width + 2); + formatted_lines.push(Line::from(vec![ + Span::styled("│ ", Style::default().fg(border_color)), + Span::styled(" ", Style::default()), + Span::styled(line, Style::default().fg(Color::White)), + Span::from(" ".repeat(line_padding)), + Span::styled(" │", Style::default().fg(border_color)), + ])); + } + + // Empty line + formatted_lines.push(Line::from(vec![ + Span::styled("│", Style::default().fg(border_color)), + Span::from(" ".repeat(inner_width + 2)), + Span::styled("│", Style::default().fg(border_color)), + ])); + } else if has_approve_all || pause_info.is_some() { + // Handle tool approval resume - show what the subagent wants to execute + if let Some(pi) = pause_info { + // Header line + let header = "Subagent wants to execute:"; + let header_padding = inner_width.saturating_sub(calculate_display_width(header)); + formatted_lines.push(Line::from(vec![ + Span::styled("│ ", Style::default().fg(border_color)), + Span::styled( + header.to_string(), + Style::default() + .fg(Color::Yellow) + .add_modifier(Modifier::BOLD), + ), + Span::from(" ".repeat(header_padding)), + Span::styled(" │", Style::default().fg(border_color)), + ])); + + // Empty line + formatted_lines.push(Line::from(vec![ + Span::styled("│", Style::default().fg(border_color)), + Span::from(" ".repeat(inner_width + 2)), + Span::styled("│", Style::default().fg(border_color)), + ])); + + // Show pending tool calls + if let Some(tool_calls) = &pi.pending_tool_calls { + for tc in tool_calls { + // Tool name line + let tool_header = format!(" → {}", tc.name); + let tool_header_width = calculate_display_width(&tool_header); + let tool_header_padding = inner_width.saturating_sub(tool_header_width); + + formatted_lines.push(Line::from(vec![ + Span::styled("│ ", Style::default().fg(border_color)), + Span::styled(" → ", Style::default().fg(Color::Magenta)), + Span::styled( + tc.name.clone(), + Style::default() + .fg(Color::Cyan) + .add_modifier(Modifier::BOLD), + ), + Span::from(" ".repeat(tool_header_padding)), + Span::styled(" │", Style::default().fg(border_color)), + ])); + + // Show arguments in a readable format + if let Some(args) = &tc.arguments { + let formatted_args = format_tool_arguments_readable(args, inner_width - 6); + for arg_line in formatted_args { + let arg_display_width = calculate_display_width(&arg_line); + let arg_padding = inner_width.saturating_sub(arg_display_width + 4); + + formatted_lines.push(Line::from(vec![ + Span::styled("│ ", Style::default().fg(border_color)), + Span::from(" "), + Span::styled(arg_line, Style::default().fg(Color::DarkGray)), + Span::from(" ".repeat(arg_padding)), + Span::styled(" │", Style::default().fg(border_color)), + ])); + } + } + + // Empty line between tool calls + formatted_lines.push(Line::from(vec![ + Span::styled("│", Style::default().fg(border_color)), + Span::from(" ".repeat(inner_width + 2)), + Span::styled("│", Style::default().fg(border_color)), + ])); + } + } + } else { + // approve_all but no pause_info cached + let msg = "Approve all pending tool calls"; + let msg_padding = inner_width.saturating_sub(calculate_display_width(msg)); + formatted_lines.push(Line::from(vec![ + Span::styled("│ ", Style::default().fg(border_color)), + Span::styled(msg.to_string(), Style::default().fg(Color::Yellow)), + Span::from(" ".repeat(msg_padding)), + Span::styled(" │", Style::default().fg(border_color)), + ])); + + // Empty line + formatted_lines.push(Line::from(vec![ + Span::styled("│", Style::default().fg(border_color)), + Span::from(" ".repeat(inner_width + 2)), + Span::styled("│", Style::default().fg(border_color)), + ])); + } + } else { + // No pause info and no input - generic message + let msg = "Resume subagent task"; + let msg_padding = inner_width.saturating_sub(calculate_display_width(msg)); + formatted_lines.push(Line::from(vec![ + Span::styled("│ ", Style::default().fg(border_color)), + Span::styled(msg.to_string(), Style::default().fg(Color::DarkGray)), + Span::from(" ".repeat(msg_padding)), + Span::styled(" │", Style::default().fg(border_color)), + ])); + + // Empty line + formatted_lines.push(Line::from(vec![ + Span::styled("│", Style::default().fg(border_color)), + Span::from(" ".repeat(inner_width + 2)), + Span::styled("│", Style::default().fg(border_color)), + ])); + } + + // Bottom border + let bottom_border = Line::from(vec![ + Span::styled("╰", Style::default().fg(border_color)), + Span::styled( + "─".repeat(inner_width + 2), + Style::default().fg(border_color), + ), + Span::styled("╯", Style::default().fg(border_color)), + ]); + formatted_lines.push(bottom_border); + + // Add spacing marker + formatted_lines.push(Line::from(vec![Span::from("SPACING_MARKER")])); + + formatted_lines +} + +/// Wrap text to fit within a given width, respecting word boundaries +fn wrap_text_to_lines(text: &str, max_width: usize) -> Vec { + let mut lines = Vec::new(); + let mut current_line = String::new(); + + for word in text.split_whitespace() { + if current_line.is_empty() { + if word.chars().count() > max_width { + // Word is too long, truncate it + let truncated: String = word.chars().take(max_width.saturating_sub(1)).collect(); + lines.push(format!("{}…", truncated)); + } else { + current_line = word.to_string(); + } + } else if current_line.chars().count() + 1 + word.chars().count() <= max_width { + current_line.push(' '); + current_line.push_str(word); + } else { + lines.push(current_line); + if word.chars().count() > max_width { + let truncated: String = word.chars().take(max_width.saturating_sub(1)).collect(); + lines.push(format!("{}…", truncated)); + current_line = String::new(); + } else { + current_line = word.to_string(); + } + } + } + + if !current_line.is_empty() { + lines.push(current_line); + } + + // Limit to 5 lines max + if lines.len() > 5 { + lines.truncate(4); + lines.push("...".to_string()); + } + + lines +} + +/// Format tool arguments in a readable way for display +fn format_tool_arguments_readable(args: &serde_json::Value, max_width: usize) -> Vec { + let mut lines = Vec::new(); + + if let Some(obj) = args.as_object() { + for (key, value) in obj { + let value_str = match value { + serde_json::Value::String(s) => { + // For long strings, truncate and show preview + let max_value_len = max_width.saturating_sub(key.len() + 4); + if s.chars().count() > max_value_len { + let truncated: String = + s.chars().take(max_value_len.saturating_sub(3)).collect(); + format!("\"{}…\"", truncated) + } else { + format!("\"{}\"", s) + } + } + serde_json::Value::Bool(b) => b.to_string(), + serde_json::Value::Number(n) => n.to_string(), + serde_json::Value::Array(arr) => { + if arr.is_empty() { + "[]".to_string() + } else { + format!("[{} items]", arr.len()) + } + } + serde_json::Value::Object(_) => "{...}".to_string(), + serde_json::Value::Null => "null".to_string(), + }; + + let line = format!("{}: {}", key, value_str); + // Truncate if still too long (respecting char boundaries) + if line.chars().count() > max_width { + let truncated: String = line.chars().take(max_width.saturating_sub(1)).collect(); + lines.push(format!("{}…", truncated)); + } else { + lines.push(line); + } + } + } else { + // Not an object, just show the raw value truncated + let s = args.to_string(); + if s.chars().count() > max_width { + let truncated: String = s.chars().take(max_width.saturating_sub(1)).collect(); + lines.push(format!("{}…", truncated)); + } else { + lines.push(s); + } + } + + lines +} diff --git a/tui/src/services/handlers/dialog.rs b/tui/src/services/handlers/dialog.rs index 3a0c31a3..7a2d29a4 100644 --- a/tui/src/services/handlers/dialog.rs +++ b/tui/src/services/handlers/dialog.rs @@ -363,6 +363,26 @@ pub fn handle_show_confirmation_dialog( crate::services::bash_block::RunCommandState::Pending, Some(message_id), )); + } else if tool_name == "resume_subagent_task" { + // For resume_subagent_task, use the special subagent pending block + // Try to get pause info from cached subagent state + let pause_info = serde_json::from_str::(&tool_call.function.arguments) + .ok() + .and_then(|args| { + args.get("task_id") + .and_then(|v| v.as_str()) + .map(String::from) + }) + .and_then(|task_id| state.subagent_pause_info.get(&task_id).cloned()); + + state + .messages + .push(Message::render_subagent_resume_pending_block( + tool_call.clone(), + is_auto_approved, + pause_info, + Some(message_id), + )); } else { state.messages.push(Message::render_pending_border_block( tool_call.clone(), diff --git a/tui/src/services/handlers/tool.rs b/tui/src/services/handlers/tool.rs index aea05d9f..b1d2299a 100644 --- a/tui/src/services/handlers/tool.rs +++ b/tui/src/services/handlers/tool.rs @@ -183,6 +183,17 @@ fn handle_task_wait_progress( .map(|t| t.task_id.clone()) .collect(); + // Cache pause info for paused subagent tasks (for approval bar display) + for task in &task_updates { + if task.status == "Paused" + && let Some(pause_info) = &task.pause_info + { + state + .subagent_pause_info + .insert(task.task_id.clone(), pause_info.clone()); + } + } + let overall_progress = progress.progress.unwrap_or(0.0); // Use dedicated task wait block @@ -769,6 +780,26 @@ fn update_pending_tool_display(state: &mut AppState) { let msg = Message::render_run_command_block(command, None, run_state, None); state.pending_bash_message_id = Some(msg.id); state.messages.push(msg); + } else if tool_name == "resume_subagent_task" { + // For resume_subagent_task, use the special subagent pending block + let pause_info = + serde_json::from_str::(&tool_call.function.arguments) + .ok() + .and_then(|args| { + args.get("task_id") + .and_then(|v| v.as_str()) + .map(String::from) + }) + .and_then(|task_id| state.subagent_pause_info.get(&task_id).cloned()); + + let msg = Message::render_subagent_resume_pending_block( + tool_call.clone(), + auto_approve, + pause_info, + None, + ); + state.pending_bash_message_id = Some(msg.id); + state.messages.push(msg); } else { // For other tools, use the standard pending block let msg = Message::render_pending_border_block(tool_call.clone(), auto_approve, None); diff --git a/tui/src/services/message.rs b/tui/src/services/message.rs index 6225d719..0b7d7080 100644 --- a/tui/src/services/message.rs +++ b/tui/src/services/message.rs @@ -77,6 +77,13 @@ pub enum MessageContent { f64, Vec, ), + /// Subagent resume pending block - shows what the subagent wants to do + /// (tool_call: ToolCall, is_auto_approved: bool, pause_info: Option) + RenderSubagentResumePendingBlock( + ToolCall, + bool, + Option, + ), } /// Compute a hash of the MessageContent for cache invalidation. @@ -228,6 +235,19 @@ pub fn hash_message_content(content: &MessageContent) -> u64 { } } } + MessageContent::RenderSubagentResumePendingBlock(tool_call, is_auto, pause_info) => { + 20u8.hash(&mut hasher); + tool_call.id.hash(&mut hasher); + is_auto.hash(&mut hasher); + if let Some(pi) = pause_info { + if let Some(msg) = &pi.agent_message { + msg.hash(&mut hasher); + } + if let Some(calls) = &pi.pending_tool_calls { + calls.len().hash(&mut hasher); + } + } + } } hasher.finish() @@ -573,6 +593,25 @@ impl Message { is_collapsed: None, } } + + /// Create a subagent resume pending block message + /// Shows what the subagent wants to do (pending tool calls) + pub fn render_subagent_resume_pending_block( + tool_call: ToolCall, + is_auto_approved: bool, + pause_info: Option, + message_id: Option, + ) -> Self { + Message { + id: message_id.unwrap_or_else(Uuid::new_v4), + content: MessageContent::RenderSubagentResumePendingBlock( + tool_call, + is_auto_approved, + pause_info, + ), + is_collapsed: None, + } + } } pub fn get_wrapped_plain_lines<'a>( @@ -1401,6 +1440,20 @@ fn render_single_message_internal(msg: &Message, width: usize) -> Vec<(Line<'sta let borrowed = get_wrapped_styled_block_lines(&rendered, width); lines.extend(convert_to_owned_lines(borrowed)); } + MessageContent::RenderSubagentResumePendingBlock( + tool_call, + is_auto_approved, + pause_info, + ) => { + let rendered = crate::services::bash_block::render_subagent_resume_pending_block( + tool_call, + *is_auto_approved, + pause_info.as_ref(), + width, + ); + let borrowed = get_wrapped_styled_block_lines(&rendered, width); + lines.extend(convert_to_owned_lines(borrowed)); + } } lines @@ -2024,6 +2077,22 @@ fn get_wrapped_message_lines_internal( let owned_lines = convert_to_owned_lines(borrowed_lines); all_lines.extend(owned_lines); } + MessageContent::RenderSubagentResumePendingBlock( + tool_call, + is_auto_approved, + pause_info, + ) => { + let rendered_lines = + crate::services::bash_block::render_subagent_resume_pending_block( + tool_call, + *is_auto_approved, + pause_info.as_ref(), + width, + ); + let borrowed_lines = get_wrapped_styled_block_lines(&rendered_lines, width); + let owned_lines = convert_to_owned_lines(borrowed_lines); + all_lines.extend(owned_lines); + } }; agent_mode_removed = false; checkpoint_id_removed = false; From da659b1dbdada06bbc315b03bdcf2154ad3f1579 Mon Sep 17 00:00:00 2001 From: George Date: Sat, 7 Feb 2026 18:19:40 -0800 Subject: [PATCH 17/23] fix(ci): replace actions/cache with Swatinem/rust-cache - Switch to Swatinem/rust-cache@v2 for proper Rust artifact caching - Remove redundant cargo check step (clippy covers it) - Add --all-targets and -D warnings to clippy - Fixes runner crashes caused by stale target/ cache exhausting disk --- .github/workflows/ci.yml | 16 ++-------------- 1 file changed, 2 insertions(+), 14 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 502d34dc..b9d50477 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -15,25 +15,13 @@ jobs: steps: - uses: actions/checkout@v4 - - name: Cache Cargo Dependencies - uses: actions/cache@v4 - with: - path: | - ~/.cargo/bin/ - ~/.cargo/registry/index/ - ~/.cargo/registry/cache/ - ~/.cargo/git/db/ - ./target/ - key: ${{ runner.os }}-cargo-${{ hashFiles('Cargo.lock') }} - - - name: Check - run: cargo check --verbose --all-targets + - uses: Swatinem/rust-cache@v2 - name: Format Check run: cargo fmt -- --check - name: Lint - run: cargo clippy + run: cargo clippy --all-targets -- -D warnings - name: Build run: cargo build --verbose From 08b6d08a6c30ed12f057d51bb21b8bafaa22a86a Mon Sep 17 00:00:00 2001 From: George Date: Sat, 7 Feb 2026 19:02:20 -0800 Subject: [PATCH 18/23] Fix ci issues --- libs/ai/src/types/cache.rs | 8 ++------ libs/ai/src/types/cache_strategy.rs | 9 ++------- 2 files changed, 4 insertions(+), 13 deletions(-) diff --git a/libs/ai/src/types/cache.rs b/libs/ai/src/types/cache.rs index 1edcd095..0931d880 100644 --- a/libs/ai/src/types/cache.rs +++ b/libs/ai/src/types/cache.rs @@ -108,11 +108,13 @@ impl Default for CacheControl { /// ``` #[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)] #[serde(rename_all = "snake_case")] +#[derive(Default)] pub enum PromptCacheRetention { /// Standard in-memory caching (default) /// /// Cached prompts may persist for 5-10 minutes during normal operation, /// or up to an hour during off-peak periods. + #[default] InMemory, /// Extended 24-hour caching @@ -123,12 +125,6 @@ pub enum PromptCacheRetention { Extended24h, } -impl Default for PromptCacheRetention { - fn default() -> Self { - Self::InMemory - } -} - impl std::fmt::Display for PromptCacheRetention { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match self { diff --git a/libs/ai/src/types/cache_strategy.rs b/libs/ai/src/types/cache_strategy.rs index 5aed8592..9cbc2471 100644 --- a/libs/ai/src/types/cache_strategy.rs +++ b/libs/ai/src/types/cache_strategy.rs @@ -51,7 +51,7 @@ use serde::{Deserialize, Serialize}; /// 2. **Request level**: Override via `GenerateOptions::with_cache_strategy()` /// /// Request-level configuration takes precedence over provider defaults. -#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)] +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, Default)] #[serde(tag = "type", rename_all = "snake_case")] pub enum CacheStrategy { /// Automatic caching optimized for the provider (default) @@ -59,6 +59,7 @@ pub enum CacheStrategy { /// - **Anthropic**: Caches last tool + last system + last 2 messages /// - **OpenAI**: Uses session_id as prompt_cache_key if provided /// - **Google**: No-op (implicit caching) + #[default] Auto, /// Custom Anthropic-style caching configuration @@ -71,12 +72,6 @@ pub enum CacheStrategy { None, } -impl Default for CacheStrategy { - fn default() -> Self { - Self::Auto - } -} - /// Anthropic-specific cache configuration /// /// Controls which components receive cache breakpoints. From d2a7f51da78cff56ca55b0c2fc055d093901ee78 Mon Sep 17 00:00:00 2001 From: George Date: Sat, 7 Feb 2026 19:08:06 -0800 Subject: [PATCH 19/23] Fix ci issues --- libs/api/src/stakpak/models.rs | 18 ++++-------------- 1 file changed, 4 insertions(+), 14 deletions(-) diff --git a/libs/api/src/stakpak/models.rs b/libs/api/src/stakpak/models.rs index 90a5a94f..52737f9f 100644 --- a/libs/api/src/stakpak/models.rs +++ b/libs/api/src/stakpak/models.rs @@ -12,33 +12,23 @@ use uuid::Uuid; // ============================================================================= /// Session visibility -#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)] +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Default)] #[serde(rename_all = "UPPERCASE")] pub enum SessionVisibility { + #[default] Private, Public, } -impl Default for SessionVisibility { - fn default() -> Self { - Self::Private - } -} - /// Session status -#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)] +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Default)] #[serde(rename_all = "UPPERCASE")] pub enum SessionStatus { + #[default] Active, Deleted, } -impl Default for SessionStatus { - fn default() -> Self { - Self::Active - } -} - /// Full session with active checkpoint #[derive(Debug, Clone, Deserialize)] pub struct Session { From 06bb69db9c2d1c1fd785f4bb2bdf37d1c8bf9277 Mon Sep 17 00:00:00 2001 From: George Date: Sat, 7 Feb 2026 19:13:49 -0800 Subject: [PATCH 20/23] chore: pin rust toolchain to 1.89.0 --- rust-toolchain.toml | 2 ++ 1 file changed, 2 insertions(+) create mode 100644 rust-toolchain.toml diff --git a/rust-toolchain.toml b/rust-toolchain.toml new file mode 100644 index 00000000..b67e7d53 --- /dev/null +++ b/rust-toolchain.toml @@ -0,0 +1,2 @@ +[toolchain] +channel = "1.89.0" From c08a9a9e167be5a53b2fdc1c4c957cf64888dd5b Mon Sep 17 00:00:00 2001 From: George Date: Sat, 7 Feb 2026 19:15:16 -0800 Subject: [PATCH 21/23] ci: add rust toolchain setup with rustfmt and clippy --- .github/workflows/ci.yml | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index b9d50477..120d62b8 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -15,6 +15,12 @@ jobs: steps: - uses: actions/checkout@v4 + - name: Setup Rust + uses: dtolnay/rust-toolchain@master + with: + toolchain: "1.89.0" + components: rustfmt, clippy + - uses: Swatinem/rust-cache@v2 - name: Format Check From 59fde2f638dc48dd687459a38d74b4484d8929dd Mon Sep 17 00:00:00 2001 From: George Date: Sat, 7 Feb 2026 21:51:47 -0800 Subject: [PATCH 22/23] Sync with main's daemon --- cli/src/commands/daemon/agent.rs | 237 ++++++++++++-------- cli/src/commands/daemon/commands/history.rs | 1 + cli/src/commands/daemon/commands/run.rs | 13 ++ cli/src/commands/daemon/commands/status.rs | 1 + cli/src/commands/daemon/commands/trigger.rs | 1 + cli/src/commands/daemon/config.rs | 20 ++ cli/src/commands/daemon/db.rs | 4 + cli/src/commands/daemon/prompt.rs | 2 + cli/src/commands/daemon/scheduler.rs | 1 + 9 files changed, 186 insertions(+), 94 deletions(-) diff --git a/cli/src/commands/daemon/agent.rs b/cli/src/commands/daemon/agent.rs index 5b8333c6..1ab18cf0 100644 --- a/cli/src/commands/daemon/agent.rs +++ b/cli/src/commands/daemon/agent.rs @@ -1,26 +1,15 @@ //! Agent spawner for daemon triggers. //! //! Spawns the stakpak agent as a child process when a trigger fires, -//! capturing session and checkpoint information from the output. +//! capturing session and checkpoint information from JSON output. -use regex::Regex; +use crate::commands::agent::run::pause::{AsyncManifest, PauseReason, EXIT_CODE_PAUSED}; use std::process::Stdio; -use std::sync::LazyLock; use std::time::Duration; use tokio::io::{AsyncBufReadExt, BufReader}; use tokio::process::Command; use tracing::{debug, info, warn}; -/// Regex pattern to extract session ID from agent output. -/// Matches: "Session ID: {uuid}" -static SESSION_ID_REGEX: LazyLock> = - LazyLock::new(|| Regex::new(r"Session ID:\s*([0-9a-fA-F-]{36})").ok()); - -/// Regex pattern to extract checkpoint ID from agent output. -/// Matches: "stakpak -c {uuid}" in the resume command output -static CHECKPOINT_ID_REGEX: LazyLock> = - LazyLock::new(|| Regex::new(r"stakpak -c\s+([0-9a-fA-F-]{36})").ok()); - /// Result of spawning and running the agent. #[derive(Debug, Clone)] pub struct AgentResult { @@ -32,6 +21,12 @@ pub struct AgentResult { pub checkpoint_id: Option, /// Whether the agent was killed due to timeout. pub timed_out: bool, + /// Whether the agent paused (needs approval or input). + pub paused: bool, + /// Pause reason if the agent paused. + pub pause_reason: Option, + /// Resume hint command if the agent paused. + pub resume_hint: Option, /// Combined stdout output from the agent. pub stdout: String, /// Combined stderr output from the agent. @@ -44,9 +39,15 @@ impl AgentResult { self.exit_code == Some(0) } - /// Returns true if the agent failed (non-zero exit or timeout). + /// Returns true if the agent paused (exit code 10). + pub fn is_paused(&self) -> bool { + self.paused || self.exit_code == Some(EXIT_CODE_PAUSED) + } + + /// Returns true if the agent failed (non-zero exit, not paused, or timeout). pub fn failed(&self) -> bool { - self.timed_out || matches!(self.exit_code, Some(code) if code != 0) + self.timed_out + || matches!(self.exit_code, Some(code) if code != 0 && code != EXIT_CODE_PAUSED) } } @@ -78,18 +79,21 @@ pub struct SpawnConfig { pub enable_slack_tools: bool, /// Enable subagents. pub enable_subagents: bool, + /// Pause when tools require approval instead of auto-approving. + pub pause_on_approval: bool, } /// Spawn the stakpak agent with the given configuration. /// -/// The agent is run in async mode (`-a`) to completion. Output is captured -/// and parsed for session ID and checkpoint ID. +/// The agent is run in async mode (`-a`) with JSON output (`-o json`). +/// Output is parsed from the JSON manifest for session ID, checkpoint ID, +/// and pause state. /// /// # Arguments /// * `config` - Configuration for spawning the agent /// /// # Returns -/// * `Ok(AgentResult)` - Agent completed (possibly with timeout) +/// * `Ok(AgentResult)` - Agent completed (possibly with timeout or pause) /// * `Err(AgentError)` - Failed to spawn or run the agent pub async fn spawn_agent(config: SpawnConfig) -> Result { // Find the stakpak binary @@ -104,9 +108,11 @@ pub async fn spawn_agent(config: SpawnConfig) -> Result "Spawning agent" ); - // Build the command + // Build the command with JSON output for robust parsing let mut cmd = Command::new(&binary); cmd.arg("-a") // async mode + .arg("-o") + .arg("json") // JSON output for robust parsing .arg("--profile") .arg(&config.profile) .arg(&config.prompt) @@ -121,6 +127,9 @@ pub async fn spawn_agent(config: SpawnConfig) -> Result if config.enable_subagents { cmd.arg("--enable-subagents"); } + if config.pause_on_approval { + cmd.arg("--pause-on-approval"); + } // Set working directory if specified if let Some(workdir) = &config.workdir { @@ -140,33 +149,13 @@ pub async fn spawn_agent(config: SpawnConfig) -> Result let result = tokio::time::timeout(config.timeout, async { let mut stdout_lines = Vec::new(); let mut stderr_lines = Vec::new(); - let mut session_id: Option = None; - let mut checkpoint_id: Option = None; - // Read stdout line by line to capture session/checkpoint IDs + // Read stdout line by line if let Some(stdout) = stdout_handle { let reader = BufReader::new(stdout); let mut lines = reader.lines(); while let Ok(Some(line)) = lines.next_line().await { - // Check for session ID - if let Some(regex) = SESSION_ID_REGEX.as_ref() - && let Some(caps) = regex.captures(&line) - && let Some(id) = caps.get(1) - { - session_id = Some(id.as_str().to_string()); - debug!(session_id = %id.as_str(), "Captured session ID"); - } - - // Check for checkpoint ID - if let Some(regex) = CHECKPOINT_ID_REGEX.as_ref() - && let Some(caps) = regex.captures(&line) - && let Some(id) = caps.get(1) - { - checkpoint_id = Some(id.as_str().to_string()); - debug!(checkpoint_id = %id.as_str(), "Captured checkpoint ID"); - } - stdout_lines.push(line); } } @@ -184,24 +173,35 @@ pub async fn spawn_agent(config: SpawnConfig) -> Result // Wait for the process to exit let status = child.wait().await; - ( - stdout_lines.join("\n"), - stderr_lines.join("\n"), - status, - session_id, - checkpoint_id, - ) + (stdout_lines.join("\n"), stderr_lines.join("\n"), status) }) .await; match result { - Ok((stdout, stderr, status, session_id, checkpoint_id)) => { + Ok((stdout, stderr, status)) => { let exit_code = status.ok().and_then(|s| s.code()); + // Try to parse JSON manifest from stdout + let manifest = parse_json_manifest(&stdout); + + let (session_id, checkpoint_id, paused, pause_reason, resume_hint) = + if let Some(m) = &manifest { + ( + m.session_id.clone(), + m.checkpoint_id.clone(), + m.outcome == "paused", + m.pause_reason.clone(), + m.resume_hint.clone(), + ) + } else { + (None, None, false, None, None) + }; + info!( exit_code = ?exit_code, session_id = ?session_id, checkpoint_id = ?checkpoint_id, + paused = paused, "Agent completed" ); @@ -210,6 +210,9 @@ pub async fn spawn_agent(config: SpawnConfig) -> Result session_id, checkpoint_id, timed_out: false, + paused, + pause_reason, + resume_hint, stdout, stderr, }) @@ -226,6 +229,9 @@ pub async fn spawn_agent(config: SpawnConfig) -> Result session_id: None, checkpoint_id: None, timed_out: true, + paused: false, + pause_reason: None, + resume_hint: None, stdout: String::new(), stderr: String::new(), }) @@ -233,22 +239,28 @@ pub async fn spawn_agent(config: SpawnConfig) -> Result } } -/// Parse session ID from agent output text. -pub fn parse_session_id(output: &str) -> Option { - SESSION_ID_REGEX - .as_ref()? - .captures(output) - .and_then(|caps| caps.get(1)) - .map(|m| m.as_str().to_string()) -} +/// Parse JSON manifest from agent stdout. +/// The manifest is the last valid JSON object in the output. +fn parse_json_manifest(stdout: &str) -> Option { + // In JSON mode, the agent outputs a single JSON object at the end + // Try to find and parse it by looking for lines that start with '{' + for line in stdout.lines().rev() { + let trimmed = line.trim(); + if trimmed.starts_with('{') + && let Ok(manifest) = serde_json::from_str::(trimmed) + { + return Some(manifest); + } + } + + // Try parsing from the beginning if stdout starts with '{' + if stdout.trim().starts_with('{') + && let Ok(manifest) = serde_json::from_str::(stdout.trim()) + { + return Some(manifest); + } -/// Parse checkpoint ID from agent output text. -pub fn parse_checkpoint_id(output: &str) -> Option { - CHECKPOINT_ID_REGEX - .as_ref()? - .captures(output) - .and_then(|caps| caps.get(1)) - .map(|m| m.as_str().to_string()) + None } #[cfg(test)] @@ -256,49 +268,53 @@ mod tests { use super::*; #[test] - fn test_parse_session_id() { - let output = r#" -[info] Starting agent... -Some output here -Session ID: 550e8400-e29b-41d4-a716-446655440000 -More output -"#; - let session_id = parse_session_id(output); + fn test_parse_json_manifest_completed() { + let output = r#"{"outcome":"completed","checkpoint_id":"abc12345-e29b-41d4-a716-446655440000","session_id":"550e8400-e29b-41d4-a716-446655440000","model":"claude-sonnet-4-5-20250929","agent_message":"Done!","steps":3,"total_steps":3,"usage":{"prompt_tokens":100,"completion_tokens":50,"total_tokens":150}}"#; + + let manifest = parse_json_manifest(output); + assert!(manifest.is_some()); + let m = manifest.unwrap(); + assert_eq!(m.outcome, "completed"); assert_eq!( - session_id, + m.session_id, Some("550e8400-e29b-41d4-a716-446655440000".to_string()) ); + assert_eq!( + m.checkpoint_id, + Some("abc12345-e29b-41d4-a716-446655440000".to_string()) + ); + assert!(m.pause_reason.is_none()); } #[test] - fn test_parse_session_id_no_match() { - let output = "No session ID here"; - let session_id = parse_session_id(output); - assert_eq!(session_id, None); + fn test_parse_json_manifest_paused() { + let output = r#"{"outcome":"paused","checkpoint_id":"abc12345-e29b-41d4-a716-446655440000","session_id":"550e8400-e29b-41d4-a716-446655440000","model":"claude-sonnet-4-5-20250929","agent_message":"Need approval","steps":2,"total_steps":2,"usage":{"prompt_tokens":100,"completion_tokens":50,"total_tokens":150},"pause_reason":{"type":"tool_approval_required","pending_tool_calls":[{"id":"call_123","name":"run_command","arguments":{"command":"ls"}}]},"resume_hint":"stakpak -c abc12345-e29b-41d4-a716-446655440000 --approve-all"}"#; + + let manifest = parse_json_manifest(output); + assert!(manifest.is_some()); + let m = manifest.unwrap(); + assert_eq!(m.outcome, "paused"); + assert!(m.pause_reason.is_some()); + assert!(m.resume_hint.is_some()); } #[test] - fn test_parse_checkpoint_id() { - let output = r#" -[success] Checkpoint abc12345-e29b-41d4-a716-446655440000 saved to /path/to/file - -To resume, run: -stakpak -c abc12345-e29b-41d4-a716-446655440000 - -Session ID: 550e8400-e29b-41d4-a716-446655440000 -"#; - let checkpoint_id = parse_checkpoint_id(output); - assert_eq!( - checkpoint_id, - Some("abc12345-e29b-41d4-a716-446655440000".to_string()) - ); + fn test_parse_json_manifest_no_match() { + let output = "No JSON here, just text output"; + let manifest = parse_json_manifest(output); + assert!(manifest.is_none()); } #[test] - fn test_parse_checkpoint_id_no_match() { - let output = "No checkpoint here"; - let checkpoint_id = parse_checkpoint_id(output); - assert_eq!(checkpoint_id, None); + fn test_parse_json_manifest_with_prefix() { + // JSON output may have some text before it + let output = r#"[info] Starting... +[info] Processing... +{"outcome":"completed","checkpoint_id":"abc12345","session_id":"def67890","model":"test","agent_message":null,"steps":1,"total_steps":1,"usage":{"prompt_tokens":10,"completion_tokens":5,"total_tokens":15}}"#; + + let manifest = parse_json_manifest(output); + assert!(manifest.is_some()); + assert_eq!(manifest.unwrap().outcome, "completed"); } #[test] @@ -308,12 +324,16 @@ Session ID: 550e8400-e29b-41d4-a716-446655440000 session_id: Some("test-session".to_string()), checkpoint_id: Some("test-checkpoint".to_string()), timed_out: false, + paused: false, + pause_reason: None, + resume_hint: None, stdout: String::new(), stderr: String::new(), }; assert!(result.success()); assert!(!result.failed()); + assert!(!result.is_paused()); } #[test] @@ -323,12 +343,16 @@ Session ID: 550e8400-e29b-41d4-a716-446655440000 session_id: None, checkpoint_id: None, timed_out: false, + paused: false, + pause_reason: None, + resume_hint: None, stdout: String::new(), stderr: "Error occurred".to_string(), }; assert!(!result.success()); assert!(result.failed()); + assert!(!result.is_paused()); } #[test] @@ -338,12 +362,37 @@ Session ID: 550e8400-e29b-41d4-a716-446655440000 session_id: None, checkpoint_id: None, timed_out: true, + paused: false, + pause_reason: None, + resume_hint: None, stdout: String::new(), stderr: String::new(), }; assert!(!result.success()); assert!(result.failed()); + assert!(!result.is_paused()); + } + + #[test] + fn test_agent_result_paused() { + let result = AgentResult { + exit_code: Some(EXIT_CODE_PAUSED), + session_id: Some("test-session".to_string()), + checkpoint_id: Some("test-checkpoint".to_string()), + timed_out: false, + paused: true, + pause_reason: Some(PauseReason::ToolApprovalRequired { + pending_tool_calls: vec![], + }), + resume_hint: Some("stakpak -c test-checkpoint --approve-all".to_string()), + stdout: String::new(), + stderr: String::new(), + }; + + assert!(!result.success()); + assert!(!result.failed()); // Paused is not a failure + assert!(result.is_paused()); } // Integration tests would require mocking the stakpak binary diff --git a/cli/src/commands/daemon/commands/history.rs b/cli/src/commands/daemon/commands/history.rs index 566deed1..2e9eb6b2 100644 --- a/cli/src/commands/daemon/commands/history.rs +++ b/cli/src/commands/daemon/commands/history.rs @@ -231,6 +231,7 @@ fn format_status(status: &RunStatus) -> String { RunStatus::Failed => "\x1b[31mfailed\x1b[0m".to_string(), RunStatus::Skipped => "\x1b[90mskipped\x1b[0m".to_string(), RunStatus::TimedOut => "\x1b[31mtimed out\x1b[0m".to_string(), + RunStatus::Paused => "\x1b[33mpaused\x1b[0m".to_string(), } } diff --git a/cli/src/commands/daemon/commands/run.rs b/cli/src/commands/daemon/commands/run.rs index 606073f9..ae7e9b80 100644 --- a/cli/src/commands/daemon/commands/run.rs +++ b/cli/src/commands/daemon/commands/run.rs @@ -345,6 +345,7 @@ async fn handle_trigger_event( workdir: None, enable_slack_tools: trigger.effective_enable_slack_tools(&config.defaults), enable_subagents: trigger.effective_enable_subagents(&config.defaults), + pause_on_approval: trigger.effective_pause_on_approval(&config.defaults), }; match spawn_agent(spawn_config).await { @@ -366,6 +367,17 @@ async fn handle_trigger_event( let (status, error_msg) = if result.timed_out { print_event("timeout", &trigger.name, "Agent timed out"); (RunStatus::TimedOut, Some("Agent timed out".to_string())) + } else if result.is_paused() { + let resume_hint = result + .resume_hint + .as_deref() + .unwrap_or("stakpak daemon resume "); + print_event( + "pause", + &trigger.name, + &format!("Agent paused - resume with: {}", resume_hint), + ); + (RunStatus::Paused, None) } else if result.success() { print_event("done", &trigger.name, "Agent completed successfully"); (RunStatus::Completed, None) @@ -407,6 +419,7 @@ async fn handle_trigger_event( trigger = %trigger.name, status = ?status, session_id = ?result.session_id, + paused = result.is_paused(), "Agent completed" ); } diff --git a/cli/src/commands/daemon/commands/status.rs b/cli/src/commands/daemon/commands/status.rs index a0b6539b..e404f0af 100644 --- a/cli/src/commands/daemon/commands/status.rs +++ b/cli/src/commands/daemon/commands/status.rs @@ -129,6 +129,7 @@ async fn get_last_run_info(db: &DaemonDb, trigger_name: &str) -> Option<(DateTim RunStatus::Failed => "failed", RunStatus::Skipped => "skipped", RunStatus::TimedOut => "timed out", + RunStatus::Paused => "paused", }; ( run.finished_at.unwrap_or(run.started_at), diff --git a/cli/src/commands/daemon/commands/trigger.rs b/cli/src/commands/daemon/commands/trigger.rs index f0d85d23..f3577262 100644 --- a/cli/src/commands/daemon/commands/trigger.rs +++ b/cli/src/commands/daemon/commands/trigger.rs @@ -89,6 +89,7 @@ pub async fn show_trigger(name: &str) -> Result<(), String> { RunStatus::Failed => "\x1b[31mfailed\x1b[0m", RunStatus::Skipped => "\x1b[90mskipped\x1b[0m", RunStatus::TimedOut => "\x1b[31mtimed out\x1b[0m", + RunStatus::Paused => "\x1b[33mpaused\x1b[0m", }; let time_str = run.started_at.format("%Y-%m-%d %H:%M:%S"); println!(" #{:<4} {} {}", run.id, time_str, status_str); diff --git a/cli/src/commands/daemon/config.rs b/cli/src/commands/daemon/config.rs index d95a1d1e..ab0e35f7 100644 --- a/cli/src/commands/daemon/config.rs +++ b/cli/src/commands/daemon/config.rs @@ -79,6 +79,11 @@ pub struct DaemonDefaults { /// Enable subagents for agent. #[serde(default)] pub enable_subagents: bool, + + /// Pause when tools require approval instead of auto-approving. + /// When true, the agent will pause and exit with code 10 when tools need approval. + #[serde(default = "default_pause_on_approval")] + pub pause_on_approval: bool, } impl Default for DaemonDefaults { @@ -89,6 +94,7 @@ impl Default for DaemonDefaults { check_timeout: default_check_timeout(), enable_slack_tools: false, enable_subagents: false, + pause_on_approval: default_pause_on_approval(), } } } @@ -105,6 +111,10 @@ fn default_check_timeout() -> Duration { Duration::from_secs(30) // 30 seconds } +fn default_pause_on_approval() -> bool { + false // Default to auto-approve, matching async mode default +} + /// A scheduled trigger that can wake the agent. #[derive(Debug, Clone, Serialize, Deserialize)] pub struct Trigger { @@ -145,6 +155,10 @@ pub struct Trigger { /// Enable subagents for agent. /// Falls back to defaults.enable_subagents if not specified. pub enable_subagents: Option, + + /// Pause when tools require approval instead of auto-approving. + /// Falls back to defaults.pause_on_approval if not specified. + pub pause_on_approval: Option, } impl Trigger { @@ -173,6 +187,12 @@ impl Trigger { pub fn effective_enable_subagents(&self, defaults: &DaemonDefaults) -> bool { self.enable_subagents.unwrap_or(defaults.enable_subagents) } + + /// Get the effective pause_on_approval, falling back to defaults. + pub fn effective_pause_on_approval(&self, defaults: &DaemonDefaults) -> bool { + self.pause_on_approval + .unwrap_or(defaults.pause_on_approval) + } } /// Custom serde module for Option with humantime format. diff --git a/cli/src/commands/daemon/db.rs b/cli/src/commands/daemon/db.rs index 207b5a8b..f0e75808 100644 --- a/cli/src/commands/daemon/db.rs +++ b/cli/src/commands/daemon/db.rs @@ -20,6 +20,8 @@ pub enum RunStatus { Skipped, /// Timed out TimedOut, + /// Paused (agent needs approval or input to continue) + Paused, } impl std::fmt::Display for RunStatus { @@ -30,6 +32,7 @@ impl std::fmt::Display for RunStatus { RunStatus::Failed => write!(f, "failed"), RunStatus::Skipped => write!(f, "skipped"), RunStatus::TimedOut => write!(f, "timed_out"), + RunStatus::Paused => write!(f, "paused"), } } } @@ -44,6 +47,7 @@ impl std::str::FromStr for RunStatus { "failed" => Ok(RunStatus::Failed), "skipped" => Ok(RunStatus::Skipped), "timed_out" => Ok(RunStatus::TimedOut), + "paused" => Ok(RunStatus::Paused), _ => Err(format!("Unknown run status: {}", s)), } } diff --git a/cli/src/commands/daemon/prompt.rs b/cli/src/commands/daemon/prompt.rs index 0a800f2c..f63e2229 100644 --- a/cli/src/commands/daemon/prompt.rs +++ b/cli/src/commands/daemon/prompt.rs @@ -94,6 +94,7 @@ mod tests { timeout: Some(Duration::from_secs(1800)), enable_slack_tools: None, enable_subagents: None, + pause_on_approval: None, } } @@ -110,6 +111,7 @@ mod tests { timeout: None, enable_slack_tools: None, enable_subagents: None, + pause_on_approval: None, } } diff --git a/cli/src/commands/daemon/scheduler.rs b/cli/src/commands/daemon/scheduler.rs index f51a44f9..7ff3f4ae 100644 --- a/cli/src/commands/daemon/scheduler.rs +++ b/cli/src/commands/daemon/scheduler.rs @@ -223,6 +223,7 @@ mod tests { timeout: None, enable_slack_tools: None, enable_subagents: None, + pause_on_approval: None, } } From cd9af67bb14ae29f635800f4cca0101cbea65dd3 Mon Sep 17 00:00:00 2001 From: George Date: Sat, 7 Feb 2026 22:01:27 -0800 Subject: [PATCH 23/23] Cargo fmt --- cli/src/commands/daemon/agent.rs | 2 +- cli/src/commands/daemon/config.rs | 3 +-- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/cli/src/commands/daemon/agent.rs b/cli/src/commands/daemon/agent.rs index 1ab18cf0..a14931c8 100644 --- a/cli/src/commands/daemon/agent.rs +++ b/cli/src/commands/daemon/agent.rs @@ -3,7 +3,7 @@ //! Spawns the stakpak agent as a child process when a trigger fires, //! capturing session and checkpoint information from JSON output. -use crate::commands::agent::run::pause::{AsyncManifest, PauseReason, EXIT_CODE_PAUSED}; +use crate::commands::agent::run::pause::{AsyncManifest, EXIT_CODE_PAUSED, PauseReason}; use std::process::Stdio; use std::time::Duration; use tokio::io::{AsyncBufReadExt, BufReader}; diff --git a/cli/src/commands/daemon/config.rs b/cli/src/commands/daemon/config.rs index ab0e35f7..50047033 100644 --- a/cli/src/commands/daemon/config.rs +++ b/cli/src/commands/daemon/config.rs @@ -190,8 +190,7 @@ impl Trigger { /// Get the effective pause_on_approval, falling back to defaults. pub fn effective_pause_on_approval(&self, defaults: &DaemonDefaults) -> bool { - self.pause_on_approval - .unwrap_or(defaults.pause_on_approval) + self.pause_on_approval.unwrap_or(defaults.pause_on_approval) } }