diff --git a/CHANGELOG.md b/CHANGELOG.md index 597b7f927..9c558d357 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,9 @@ All notable changes to this project will be documented in this file. ## [Unreleased] +### Fixed +- **"Finished" activity no longer leaks raw tool-input JSON when a turn ends on a backgrounded tool call** — When an agent turn ended on `ScheduleWakeup` or a background `Bash` call with no trailing assistant text, Cyrus posted a "Finished" response activity whose body was the raw tool-input JSON. The response activity is now skipped entirely when there is no real assistant text to post, and tool-use content is no longer buffered as a fallback response body. ([CYPACK-1177](https://linear.app/ceedar/issue/CYPACK-1177/finished-activity-shows-raw-tool-input-json-when-turn-ends-on)) + ### Changed - **Slack mention prompt nudges agents toward `linear_agent_give_feedback` for live child sessions** — When responding in Slack, Cyrus is now told to send mid-flight corrections to a running child agent session via `mcp__cyrus-tools__linear_agent_give_feedback` instead of falling back to `mcp__linear__save_comment`. Produces a stronger signal when correcting work that is already in progress. ([CYPACK-1189](https://linear.app/ceedar/issue/CYPACK-1189), [#1198](https://github.com/cyrusagents/cyrus/pull/1198)) diff --git a/packages/edge-worker/src/AgentSessionManager.ts b/packages/edge-worker/src/AgentSessionManager.ts index eb66e245f..344dfc95a 100644 --- a/packages/edge-worker/src/AgentSessionManager.ts +++ b/packages/edge-worker/src/AgentSessionManager.ts @@ -511,16 +511,14 @@ export class AgentSessionManager extends EventEmitter { sessionId, message as SDKAssistantMessage, ); - // Buffer the text content so addResultEntry can post it as the response - if (assistantEntry.content) { - this.lastAssistantBodyBySession.set( - sessionId, - assistantEntry.content, - ); - } if (assistantEntry.metadata?.toolUseId) { // Tool-use message: flush any buffered text first (preserves ordering), - // then post immediately for real-time "in progress" display + // then post immediately for real-time "in progress" display. + // Do NOT buffer tool-use content into lastAssistantBodyBySession — + // its `content` is JSON.stringify of the tool input (see extractContent), + // and using it as the final response body produces the CYPACK-1177 bug + // where "Finished" activities render raw tool-input JSON when a turn + // ends on a backgrounded tool call (ScheduleWakeup, background Bash). await this.flushBufferedAssistant(sessionId); await this.syncEntryToActivitySink(assistantEntry, sessionId); } else { @@ -532,6 +530,10 @@ export class AgentSessionManager extends EventEmitter { // between activities (e.g. between "Using model: ..." and the // first real assistant turn). if (assistantEntry.content?.trim()) { + this.lastAssistantBodyBySession.set( + sessionId, + assistantEntry.content, + ); this.bufferedAssistantEntryBySession.set( sessionId, assistantEntry, @@ -664,6 +666,18 @@ export class AgentSessionManager extends EventEmitter { : "")) ).trim(); + // CYPACK-1177: if the turn ends on a backgrounded tool call (ScheduleWakeup, + // background Bash, etc.) with no trailing assistant text, there is no real + // content to post. Skip the response activity entirely rather than emitting + // a "Finished" entry with raw tool-input JSON or an empty body. The session + // status is already updated by the caller (completeSession). + if (!content && !resultMessage.is_error) { + this.sessionLog(sessionId).debug( + "Skipping result activity — no assistant text to post (turn ended on tool call)", + ); + return; + } + const resultEntry: CyrusAgentSessionEntry = { // Set the appropriate session ID based on runner type ...(runnerType === "gemini" @@ -1418,7 +1432,7 @@ export class AgentSessionManager extends EventEmitter { const log = this.sessionLog(sessionId); const session = this.sessions.get(sessionId); - if (!session || !session.externalSessionId) { + if (!session?.externalSessionId) { log.debug( `Skipping ${label} - no external session ID (platform: ${session?.issueContext?.trackerId || "unknown"})`, ); @@ -1681,7 +1695,7 @@ export class AgentSessionManager extends EventEmitter { message: SDKStatusMessage, ): Promise { const session = this.sessions.get(sessionId); - if (!session || !session.externalSessionId) { + if (!session?.externalSessionId) { const log = this.sessionLog(sessionId); log.debug( `Skipping status message - no external session ID (platform: ${session?.issueContext?.trackerId || "unknown"})`, diff --git a/packages/edge-worker/test/AgentSessionManager.result-no-trailing-text.test.ts b/packages/edge-worker/test/AgentSessionManager.result-no-trailing-text.test.ts new file mode 100644 index 000000000..86d425880 --- /dev/null +++ b/packages/edge-worker/test/AgentSessionManager.result-no-trailing-text.test.ts @@ -0,0 +1,198 @@ +import type { + SDKAssistantMessage, + SDKResultMessage, +} from "@anthropic-ai/claude-agent-sdk"; +import { ClaudeMessageFormatter } from "cyrus-claude-runner"; +import { beforeEach, describe, expect, it, vi } from "vitest"; +import { AgentSessionManager } from "../src/AgentSessionManager"; +import type { IActivitySink } from "../src/sinks/IActivitySink"; + +/** + * Regression test for CYPACK-1177: + * When an SDK turn ends on a tool call (ScheduleWakeup or background Bash) + * with NO trailing assistant text, the "Finished" response activity was + * being posted with raw tool-input JSON as its body. The fix: + * + * 1. Don't buffer tool-use content into lastAssistantBodyBySession — its + * `content` is JSON.stringify(tool input). + * 2. If addResultEntry has no assistant text to post and the result is not + * an error, skip emitting the response activity entirely. + */ +describe("AgentSessionManager - result activity when turn ends on tool call (CYPACK-1177)", () => { + let manager: AgentSessionManager; + let mockActivitySink: IActivitySink; + let postActivitySpy: ReturnType; + const sessionId = "test-session-1177"; + const issueId = "issue-1177"; + + beforeEach(() => { + mockActivitySink = { + id: "test-workspace", + postActivity: vi.fn().mockResolvedValue({ activityId: "activity-1" }), + createAgentSession: vi.fn().mockResolvedValue("ext-session-1"), + }; + postActivitySpy = mockActivitySink.postActivity as ReturnType; + + manager = new AgentSessionManager(); + manager.createCyrusAgentSession( + sessionId, + issueId, + { + id: issueId, + identifier: "CYPACK-1177", + title: "Finished activity bug", + description: "", + branchName: "test-branch", + }, + { path: "/tmp/workspace", isGitWorktree: false }, + ); + manager.setActivitySink(sessionId, mockActivitySink); + + const formatter = new ClaudeMessageFormatter(); + const runnerStub = { + getFormatter: () => formatter, + constructor: { name: "ClaudeRunner" }, + } as unknown as Parameters[1]; + manager.addAgentRunner(sessionId, runnerStub); + }); + + function buildToolUseAssistantMessage( + uuid: string, + toolUseId: string, + name: string, + input: Record, + ): SDKAssistantMessage { + return { + type: "assistant", + session_id: "claude-session", + parent_tool_use_id: null, + uuid, + message: { + id: `msg_${toolUseId}`, + type: "message", + role: "assistant", + model: "claude", + stop_reason: "tool_use", + stop_sequence: null, + usage: { + input_tokens: 0, + output_tokens: 0, + cache_creation_input_tokens: 0, + cache_read_input_tokens: 0, + }, + content: [{ type: "tool_use", id: toolUseId, name, input }], + }, + } as unknown as SDKAssistantMessage; + } + + function buildSuccessResult(result = ""): SDKResultMessage { + return { + type: "result", + subtype: "success", + session_id: "claude-session", + duration_ms: 1000, + duration_api_ms: 800, + is_error: false, + num_turns: 1, + result, + total_cost_usd: 0, + usage: { + input_tokens: 0, + output_tokens: 0, + cache_creation_input_tokens: 0, + cache_read_input_tokens: 0, + }, + } as unknown as SDKResultMessage; + } + + it("does not emit a response activity with raw tool-input JSON when turn ends on ScheduleWakeup", async () => { + await manager.handleClaudeMessage( + sessionId, + buildToolUseAssistantMessage("uuid-1", "toolu_wakeup", "ScheduleWakeup", { + delaySeconds: 270, + reason: "Wait for full RSpec suite to finish before committing", + prompt: + "Continue with QC-5209 — check RSpec results and commit/push/PR", + }), + ); + await manager.handleClaudeMessage(sessionId, buildSuccessResult("")); + + const postedContents = postActivitySpy.mock.calls.map( + ([, content]) => content, + ); + + // Must NOT emit a response activity at all (no real text to post). + const responses = postedContents.filter((c: any) => c?.type === "response"); + expect(responses).toEqual([]); + + // Must NOT emit a response whose body contains the tool input. + const leakedJson = postedContents.filter( + (c: any) => + typeof c?.body === "string" && + c.body.includes("delaySeconds") && + c.body.includes("270"), + ); + expect(leakedJson).toEqual([]); + }); + + it("does not emit a response activity when turn ends on a background Bash tool call", async () => { + await manager.handleClaudeMessage( + sessionId, + buildToolUseAssistantMessage("uuid-2", "toolu_bash", "Bash", { + command: "echo 'waiting for repo...'", + description: "idle", + }), + ); + await manager.handleClaudeMessage(sessionId, buildSuccessResult("")); + + const postedContents = postActivitySpy.mock.calls.map( + ([, content]) => content, + ); + + const responses = postedContents.filter((c: any) => c?.type === "response"); + expect(responses).toEqual([]); + + const leakedJson = postedContents.filter( + (c: any) => + typeof c?.body === "string" && + c.body.includes("echo 'waiting for repo...'"), + ); + expect(leakedJson).toEqual([]); + }); + + it("still emits a response activity when assistant text precedes the result", async () => { + // Trailing assistant text path — should produce a real response. + const textMsg: SDKAssistantMessage = { + type: "assistant", + session_id: "claude-session", + parent_tool_use_id: null, + uuid: "uuid-text", + message: { + id: "msg_text", + type: "message", + role: "assistant", + model: "claude", + stop_reason: "end_turn", + stop_sequence: null, + usage: { + input_tokens: 0, + output_tokens: 0, + cache_creation_input_tokens: 0, + cache_read_input_tokens: 0, + }, + content: [{ type: "text", text: "All done — PR created." }], + }, + } as unknown as SDKAssistantMessage; + + await manager.handleClaudeMessage(sessionId, textMsg); + await manager.handleClaudeMessage(sessionId, buildSuccessResult("")); + + const postedContents = postActivitySpy.mock.calls.map( + ([, content]) => content, + ); + + const responses = postedContents.filter((c: any) => c?.type === "response"); + expect(responses).toHaveLength(1); + expect(responses[0].body).toBe("All done — PR created."); + }); +});