stakpak · abdalla1912mohamed · Jan 28, 2026 · Feb 7, 2026 · Feb 7, 2026 · Feb 7, 2026
diff --git a/.github/issues/async-mode-exit-bug.md b/.github/issues/async-mode-exit-bug.md
@@ -0,0 +1,47 @@
+# Async mode fails to exit gracefully and doesn't report session ID/stats
+
+## Problem
+
+When running `stakpak --async` (especially inside Docker containers for harbor eval), the agent:
+
+1. **Doesn't exit when background tasks (child processes ) complete** - uses `std::process::exit(0)` as a workaround
+2. **Doesn't fetch/display session stats** - unlike interactive mode which calls `get_session_stats()`
+3. **Doesn't print session URL** - missing `https://stakpak.dev/{user}/agent-sessions/{id}`
+
+## Reproduction
+
+```dockerfile
+# Example: Running async mode in Docker
+FROM stakpak/agent:latest
+RUN stakpak --async --prompt "Run background task"
+# Agent may hang or force-exit without reporting stats
+```
+
+## Current vs Expected
+
+| Feature | Interactive Mode | Async Mode (Bug) |
+|---------|-----------------|------------------|
+| Session Stats | ✅ Fetched | ❌ Missing |
+| Session URL | ✅ Printed | ❌ Missing |
+| Graceful Exit | ✅ Clean | ❌ `process::exit(0)` |
+
+## Key Files
+
+- `mode_async.rs:437-472` - Only prints session ID, then force-exits
+- `mode_interactive.rs:1149-1192` - Properly fetches stats and prints URL
+
+## Fix
+
+Add to `mode_async.rs` before shutdown:
+```rust
+if let Some(session_id) = current_session_id {
+    if let Ok(stats) = client.get_session_stats(session_id).await {
+        print!("{}", renderer.render_session_stats(&stats));
+    }
+    if let Ok(account) = client.get_my_account().await {
+        println!("https://stakpak.dev/{}/agent-sessions/{}", account.username, session_id);
+    }
+}
+```
+
+**Labels:** `bug`, `async-mode`
diff --git a/.github/issues/tool-result-anthropic-400.md b/.github/issues/tool-result-anthropic-400.md
@@ -0,0 +1,35 @@
+# Stakpak API: Anthropic 400 — Orphaned tool_result blocks on multi-tool turns
+
+## Problem
+
+When using the Stakpak API (`STAKPAK_API_KEY`) with tool calling, the backend returns Anthropic API 400 errors when an assistant message contains multiple tool calls and each tool result is sent as a separate message.
+
+```
+Anthropic API error 400: messages.N.content.0: unexpected `tool_use_id` found in `tool_result` blocks. 
+Each `tool_result` block must have a corresponding `tool_use` block in the previous message.
+```
+
+## Root Cause
+
+Anthropic requires all tool results for a given assistant turn to be in **one user message**. The Stakpak backend currently forwards separate tool-result messages, so the 2nd+ tool_result sees a user message (previous tool result) instead of the assistant message with `tool_use` blocks.
+
+**Wrong:** assistant → user(tool_A) → user(tool_B)  
+**Correct:** assistant → user(tool_A, tool_B)
+
+## Reproduction
+
+1. Use `STAKPAK_API_KEY` with tool calling enabled.
+2. Send a prompt that triggers multiple tool calls in one turn.
+3. Execute tools and return results; observe 400 on follow-up request.
+
+## Fix (Stakpak backend)
+
+When converting to Anthropic format, merge consecutive tool-result messages into a single user message with multiple `tool_result` blocks.
+
+## Impact
+
+- **Severity:** High — breaks multi-tool-call turns via Stakpak API.
+- **Client workaround:** None; fix must be in Stakpak backend.
+- **Direct Anthropic:** Fixed in `libs/ai` (merge in Anthropic convert).
+
+**Labels:** `bug`, `stakpak-api`, `tool-calling`, `anthropic`
diff --git a/.gitignore b/.gitignore
@@ -1,9 +1,12 @@
 target
 debug.log
-.stakpak/session*
-
 # Stakpak local files
+Fix_rfc.md
+stakpak_FIX_ISSUE.md
+.github/issues/anthropic-400-tool-result-blocks.md
 .stakpak
 .DS_Store
 debug_acp.sh
-PR_Description.md
+PR_Description.md
+cost-stats.md
+token-stats.md
diff --git a/libs/ai/src/providers/anthropic/convert.rs b/libs/ai/src/providers/anthropic/convert.rs
@@ -300,17 +300,24 @@ fn build_tools_with_caching(
 ///
 /// Caches the last N non-system messages to maximize cache hits
 /// on subsequent requests in a conversation.
+///
+/// Also merges consecutive Tool messages into a single user message with multiple
+/// tool_result blocks, since Anthropic requires each tool_result to have a
+/// corresponding tool_use in the *immediately preceding* assistant message.
+/// Separate Tool messages would make the 2nd+ tool_result reference a user message
+/// (the previous tool result) instead of the assistant, causing API errors.
 fn build_messages_with_caching(
     messages: &[Message],
     validator: &mut CacheControlValidator,
     tail_count: usize,
 ) -> Result<Vec<AnthropicMessage>> {
     let non_system: Vec<&Message> = messages.iter().filter(|m| m.role != Role::System).collect();
+    let merged = merge_consecutive_tool_messages(&non_system);
 
-    let len = non_system.len();
+    let len = merged.len();
     let cache_start_index = len.saturating_sub(tail_count);
 
-    non_system
+    merged
         .iter()
         .enumerate()
         .map(|(i, msg)| {
@@ -320,6 +327,51 @@ fn build_messages_with_caching(
         .collect()
 }
 
+/// Merge consecutive Tool messages into single messages.
+///
+/// Anthropic requires: each tool_result block must have a corresponding tool_use
+/// in the *previous* message. With separate Tool messages (assistant → tool_A →
+/// tool_B), the "previous" message for tool_B is tool_A (a user message with
+/// tool_result), not the assistant—causing "unexpected tool_use_id" errors.
+///
+/// We merge consecutive Tool messages so we get: assistant → user(tool_A, tool_B).
+fn merge_consecutive_tool_messages(messages: &[&Message]) -> Vec<Message> {
+    use crate::types::{ContentPart, MessageContent};
+
+    let mut result = Vec::new();
+    let mut i = 0;
+
+    while i < messages.len() {
+        let msg = messages[i];
+        if msg.role != Role::Tool {
+            result.push((*msg).clone());
+            i += 1;
+            continue;
+        }
+
+        // Collect all consecutive Tool messages and merge their tool_result parts
+        let mut tool_result_parts: Vec<ContentPart> = Vec::new();
+        while i < messages.len() && messages[i].role == Role::Tool {
+            let parts = messages[i].parts();
+            for part in parts {
+                if let ContentPart::ToolResult { .. } = &part {
+                    tool_result_parts.push(part);
+                }
+            }
+            i += 1;
+        }
+
+        if !tool_result_parts.is_empty() {
+            result.push(Message::new(
+                Role::Tool,
+                MessageContent::Parts(tool_result_parts),
+            ));
+        }
+    }
+
+    result
+}
+
 /// Convert unified message to Anthropic message with optional auto-caching
 fn to_anthropic_message_with_caching(
     msg: &Message,
@@ -775,4 +827,72 @@ mod tests {
             _ => panic!("Expected string content for simple user message"),
         }
     }
+
+    #[test]
+    fn test_consecutive_tool_messages_merged_for_anthropic() {
+        // Anthropic requires tool_results in a single user message following the assistant.
+        // Separate Tool messages cause "unexpected tool_use_id" errors because the 2nd+
+        // tool_result's "previous message" would be another tool result, not the assistant.
+        use crate::Model;
+        use crate::types::{GenerateRequest, MessageContent};
+
+        let messages = vec![
+            Message::new(
+                Role::Assistant,
+                MessageContent::Parts(vec![
+                    ContentPart::tool_call("toolu_1", "tool_a", serde_json::json!({})),
+                    ContentPart::tool_call("toolu_2", "tool_b", serde_json::json!({})),
+                ]),
+            ),
+            Message::new(
+                Role::Tool,
+                MessageContent::Parts(vec![ContentPart::tool_result(
+                    "toolu_1",
+                    serde_json::json!("result_a"),
+                )]),
+            ),
+            Message::new(
+                Role::Tool,
+                MessageContent::Parts(vec![ContentPart::tool_result(
+                    "toolu_2",
+                    serde_json::json!("result_b"),
+                )]),
+            ),
+        ];
+
+        let req = GenerateRequest::new(
+            Model::custom("claude-3-5-sonnet-20241022", "anthropic"),
+            messages,
+        );
+
+        let config = super::super::AnthropicConfig::default();
+        let result = to_anthropic_request(&req, &config, false).unwrap();
+
+        // Should have 3 messages: assistant, user (merged tool results)
+        assert_eq!(
+            result.request.messages.len(),
+            2,
+            "Consecutive tool messages should be merged"
+        );
+        assert_eq!(result.request.messages[0].role, "assistant");
+        assert_eq!(result.request.messages[1].role, "user");
+
+        // The merged user message should have 2 tool_result blocks
+        match &result.request.messages[1].content {
+            AnthropicMessageContent::Blocks(blocks) => {
+                assert_eq!(blocks.len(), 2, "Should have 2 tool_result blocks");
+                if let AnthropicContent::ToolResult { tool_use_id, .. } = &blocks[0] {
+                    assert_eq!(tool_use_id, "toolu_1");
+                } else {
+                    panic!("First block should be ToolResult");
+                }
+                if let AnthropicContent::ToolResult { tool_use_id, .. } = &blocks[1] {
+                    assert_eq!(tool_use_id, "toolu_2");
+                } else {
+                    panic!("Second block should be ToolResult");
+                }
+            }
+            _ => panic!("Expected Blocks content with merged tool results"),
+        }
+    }
 }