diff --git a/.github/issues/async-mode-exit-bug.md b/.github/issues/async-mode-exit-bug.md new file mode 100644 index 00000000..6a41e137 --- /dev/null +++ b/.github/issues/async-mode-exit-bug.md @@ -0,0 +1,47 @@ +# Async mode fails to exit gracefully and doesn't report session ID/stats + +## Problem + +When running `stakpak --async` (especially inside Docker containers for harbor eval), the agent: + +1. **Doesn't exit when background tasks (child processes ) complete** - uses `std::process::exit(0)` as a workaround +2. **Doesn't fetch/display session stats** - unlike interactive mode which calls `get_session_stats()` +3. **Doesn't print session URL** - missing `https://stakpak.dev/{user}/agent-sessions/{id}` + +## Reproduction + +```dockerfile +# Example: Running async mode in Docker +FROM stakpak/agent:latest +RUN stakpak --async --prompt "Run background task" +# Agent may hang or force-exit without reporting stats +``` + +## Current vs Expected + +| Feature | Interactive Mode | Async Mode (Bug) | +|---------|-----------------|------------------| +| Session Stats | ✅ Fetched | ❌ Missing | +| Session URL | ✅ Printed | ❌ Missing | +| Graceful Exit | ✅ Clean | ❌ `process::exit(0)` | + +## Key Files + +- `mode_async.rs:437-472` - Only prints session ID, then force-exits +- `mode_interactive.rs:1149-1192` - Properly fetches stats and prints URL + +## Fix + +Add to `mode_async.rs` before shutdown: +```rust +if let Some(session_id) = current_session_id { + if let Ok(stats) = client.get_session_stats(session_id).await { + print!("{}", renderer.render_session_stats(&stats)); + } + if let Ok(account) = client.get_my_account().await { + println!("https://stakpak.dev/{}/agent-sessions/{}", account.username, session_id); + } +} +``` + +**Labels:** `bug`, `async-mode` diff --git a/.github/issues/tool-result-anthropic-400.md b/.github/issues/tool-result-anthropic-400.md new file mode 100644 index 00000000..3c5065de --- /dev/null +++ b/.github/issues/tool-result-anthropic-400.md @@ -0,0 +1,35 @@ +# Stakpak API: Anthropic 400 — Orphaned tool_result blocks on multi-tool turns + +## Problem + +When using the Stakpak API (`STAKPAK_API_KEY`) with tool calling, the backend returns Anthropic API 400 errors when an assistant message contains multiple tool calls and each tool result is sent as a separate message. + +``` +Anthropic API error 400: messages.N.content.0: unexpected `tool_use_id` found in `tool_result` blocks. +Each `tool_result` block must have a corresponding `tool_use` block in the previous message. +``` + +## Root Cause + +Anthropic requires all tool results for a given assistant turn to be in **one user message**. The Stakpak backend currently forwards separate tool-result messages, so the 2nd+ tool_result sees a user message (previous tool result) instead of the assistant message with `tool_use` blocks. + +**Wrong:** assistant → user(tool_A) → user(tool_B) +**Correct:** assistant → user(tool_A, tool_B) + +## Reproduction + +1. Use `STAKPAK_API_KEY` with tool calling enabled. +2. Send a prompt that triggers multiple tool calls in one turn. +3. Execute tools and return results; observe 400 on follow-up request. + +## Fix (Stakpak backend) + +When converting to Anthropic format, merge consecutive tool-result messages into a single user message with multiple `tool_result` blocks. + +## Impact + +- **Severity:** High — breaks multi-tool-call turns via Stakpak API. +- **Client workaround:** None; fix must be in Stakpak backend. +- **Direct Anthropic:** Fixed in `libs/ai` (merge in Anthropic convert). + +**Labels:** `bug`, `stakpak-api`, `tool-calling`, `anthropic` diff --git a/.gitignore b/.gitignore index ffceb842..57dc0cfd 100644 --- a/.gitignore +++ b/.gitignore @@ -1,9 +1,12 @@ target debug.log -.stakpak/session* - # Stakpak local files +Fix_rfc.md +stakpak_FIX_ISSUE.md +.github/issues/anthropic-400-tool-result-blocks.md .stakpak .DS_Store debug_acp.sh -PR_Description.md \ No newline at end of file +PR_Description.md +cost-stats.md +token-stats.md \ No newline at end of file diff --git a/libs/ai/src/providers/anthropic/convert.rs b/libs/ai/src/providers/anthropic/convert.rs index 4c53f0b0..ed3be26f 100644 --- a/libs/ai/src/providers/anthropic/convert.rs +++ b/libs/ai/src/providers/anthropic/convert.rs @@ -300,17 +300,24 @@ fn build_tools_with_caching( /// /// Caches the last N non-system messages to maximize cache hits /// on subsequent requests in a conversation. +/// +/// Also merges consecutive Tool messages into a single user message with multiple +/// tool_result blocks, since Anthropic requires each tool_result to have a +/// corresponding tool_use in the *immediately preceding* assistant message. +/// Separate Tool messages would make the 2nd+ tool_result reference a user message +/// (the previous tool result) instead of the assistant, causing API errors. fn build_messages_with_caching( messages: &[Message], validator: &mut CacheControlValidator, tail_count: usize, ) -> Result> { let non_system: Vec<&Message> = messages.iter().filter(|m| m.role != Role::System).collect(); + let merged = merge_consecutive_tool_messages(&non_system); - let len = non_system.len(); + let len = merged.len(); let cache_start_index = len.saturating_sub(tail_count); - non_system + merged .iter() .enumerate() .map(|(i, msg)| { @@ -320,6 +327,51 @@ fn build_messages_with_caching( .collect() } +/// Merge consecutive Tool messages into single messages. +/// +/// Anthropic requires: each tool_result block must have a corresponding tool_use +/// in the *previous* message. With separate Tool messages (assistant → tool_A → +/// tool_B), the "previous" message for tool_B is tool_A (a user message with +/// tool_result), not the assistant—causing "unexpected tool_use_id" errors. +/// +/// We merge consecutive Tool messages so we get: assistant → user(tool_A, tool_B). +fn merge_consecutive_tool_messages(messages: &[&Message]) -> Vec { + use crate::types::{ContentPart, MessageContent}; + + let mut result = Vec::new(); + let mut i = 0; + + while i < messages.len() { + let msg = messages[i]; + if msg.role != Role::Tool { + result.push((*msg).clone()); + i += 1; + continue; + } + + // Collect all consecutive Tool messages and merge their tool_result parts + let mut tool_result_parts: Vec = Vec::new(); + while i < messages.len() && messages[i].role == Role::Tool { + let parts = messages[i].parts(); + for part in parts { + if let ContentPart::ToolResult { .. } = &part { + tool_result_parts.push(part); + } + } + i += 1; + } + + if !tool_result_parts.is_empty() { + result.push(Message::new( + Role::Tool, + MessageContent::Parts(tool_result_parts), + )); + } + } + + result +} + /// Convert unified message to Anthropic message with optional auto-caching fn to_anthropic_message_with_caching( msg: &Message, @@ -775,4 +827,72 @@ mod tests { _ => panic!("Expected string content for simple user message"), } } + + #[test] + fn test_consecutive_tool_messages_merged_for_anthropic() { + // Anthropic requires tool_results in a single user message following the assistant. + // Separate Tool messages cause "unexpected tool_use_id" errors because the 2nd+ + // tool_result's "previous message" would be another tool result, not the assistant. + use crate::Model; + use crate::types::{GenerateRequest, MessageContent}; + + let messages = vec![ + Message::new( + Role::Assistant, + MessageContent::Parts(vec![ + ContentPart::tool_call("toolu_1", "tool_a", serde_json::json!({})), + ContentPart::tool_call("toolu_2", "tool_b", serde_json::json!({})), + ]), + ), + Message::new( + Role::Tool, + MessageContent::Parts(vec![ContentPart::tool_result( + "toolu_1", + serde_json::json!("result_a"), + )]), + ), + Message::new( + Role::Tool, + MessageContent::Parts(vec![ContentPart::tool_result( + "toolu_2", + serde_json::json!("result_b"), + )]), + ), + ]; + + let req = GenerateRequest::new( + Model::custom("claude-3-5-sonnet-20241022", "anthropic"), + messages, + ); + + let config = super::super::AnthropicConfig::default(); + let result = to_anthropic_request(&req, &config, false).unwrap(); + + // Should have 3 messages: assistant, user (merged tool results) + assert_eq!( + result.request.messages.len(), + 2, + "Consecutive tool messages should be merged" + ); + assert_eq!(result.request.messages[0].role, "assistant"); + assert_eq!(result.request.messages[1].role, "user"); + + // The merged user message should have 2 tool_result blocks + match &result.request.messages[1].content { + AnthropicMessageContent::Blocks(blocks) => { + assert_eq!(blocks.len(), 2, "Should have 2 tool_result blocks"); + if let AnthropicContent::ToolResult { tool_use_id, .. } = &blocks[0] { + assert_eq!(tool_use_id, "toolu_1"); + } else { + panic!("First block should be ToolResult"); + } + if let AnthropicContent::ToolResult { tool_use_id, .. } = &blocks[1] { + assert_eq!(tool_use_id, "toolu_2"); + } else { + panic!("Second block should be ToolResult"); + } + } + _ => panic!("Expected Blocks content with merged tool results"), + } + } }