Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,9 @@ All notable changes to this project will be documented in this file.

## [Unreleased]

### Fixed
- **"Finished" activity no longer leaks raw tool-input JSON when a turn ends on a backgrounded tool call** — When an agent turn ended on `ScheduleWakeup` or a background `Bash` call with no trailing assistant text, Cyrus posted a "Finished" response activity whose body was the raw tool-input JSON. The response activity is now skipped entirely when there is no real assistant text to post, and tool-use content is no longer buffered as a fallback response body. ([CYPACK-1177](https://linear.app/ceedar/issue/CYPACK-1177/finished-activity-shows-raw-tool-input-json-when-turn-ends-on))

### Changed
- **Slack mention prompt nudges agents toward `linear_agent_give_feedback` for live child sessions** — When responding in Slack, Cyrus is now told to send mid-flight corrections to a running child agent session via `mcp__cyrus-tools__linear_agent_give_feedback` instead of falling back to `mcp__linear__save_comment`. Produces a stronger signal when correcting work that is already in progress. ([CYPACK-1189](https://linear.app/ceedar/issue/CYPACK-1189), [#1198](https://github.com/cyrusagents/cyrus/pull/1198))

Expand Down
34 changes: 24 additions & 10 deletions packages/edge-worker/src/AgentSessionManager.ts
Original file line number Diff line number Diff line change
Expand Up @@ -511,16 +511,14 @@ export class AgentSessionManager extends EventEmitter {
sessionId,
message as SDKAssistantMessage,
);
// Buffer the text content so addResultEntry can post it as the response
if (assistantEntry.content) {
this.lastAssistantBodyBySession.set(
sessionId,
assistantEntry.content,
);
}
if (assistantEntry.metadata?.toolUseId) {
// Tool-use message: flush any buffered text first (preserves ordering),
// then post immediately for real-time "in progress" display
// then post immediately for real-time "in progress" display.
// Do NOT buffer tool-use content into lastAssistantBodyBySession —
// its `content` is JSON.stringify of the tool input (see extractContent),
// and using it as the final response body produces the CYPACK-1177 bug
// where "Finished" activities render raw tool-input JSON when a turn
// ends on a backgrounded tool call (ScheduleWakeup, background Bash).
await this.flushBufferedAssistant(sessionId);
await this.syncEntryToActivitySink(assistantEntry, sessionId);
} else {
Expand All @@ -532,6 +530,10 @@ export class AgentSessionManager extends EventEmitter {
// between activities (e.g. between "Using model: ..." and the
// first real assistant turn).
if (assistantEntry.content?.trim()) {
this.lastAssistantBodyBySession.set(
sessionId,
assistantEntry.content,
);
this.bufferedAssistantEntryBySession.set(
sessionId,
assistantEntry,
Expand Down Expand Up @@ -664,6 +666,18 @@ export class AgentSessionManager extends EventEmitter {
: ""))
).trim();

// CYPACK-1177: if the turn ends on a backgrounded tool call (ScheduleWakeup,
// background Bash, etc.) with no trailing assistant text, there is no real
// content to post. Skip the response activity entirely rather than emitting
// a "Finished" entry with raw tool-input JSON or an empty body. The session
// status is already updated by the caller (completeSession).
if (!content && !resultMessage.is_error) {
this.sessionLog(sessionId).debug(
"Skipping result activity — no assistant text to post (turn ended on tool call)",
);
return;
}

const resultEntry: CyrusAgentSessionEntry = {
// Set the appropriate session ID based on runner type
...(runnerType === "gemini"
Expand Down Expand Up @@ -1418,7 +1432,7 @@ export class AgentSessionManager extends EventEmitter {
const log = this.sessionLog(sessionId);
const session = this.sessions.get(sessionId);

if (!session || !session.externalSessionId) {
if (!session?.externalSessionId) {
log.debug(
`Skipping ${label} - no external session ID (platform: ${session?.issueContext?.trackerId || "unknown"})`,
);
Expand Down Expand Up @@ -1681,7 +1695,7 @@ export class AgentSessionManager extends EventEmitter {
message: SDKStatusMessage,
): Promise<void> {
const session = this.sessions.get(sessionId);
if (!session || !session.externalSessionId) {
if (!session?.externalSessionId) {
const log = this.sessionLog(sessionId);
log.debug(
`Skipping status message - no external session ID (platform: ${session?.issueContext?.trackerId || "unknown"})`,
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,198 @@
import type {
SDKAssistantMessage,
SDKResultMessage,
} from "@anthropic-ai/claude-agent-sdk";
import { ClaudeMessageFormatter } from "cyrus-claude-runner";
import { beforeEach, describe, expect, it, vi } from "vitest";
import { AgentSessionManager } from "../src/AgentSessionManager";
import type { IActivitySink } from "../src/sinks/IActivitySink";

/**
* Regression test for CYPACK-1177:
* When an SDK turn ends on a tool call (ScheduleWakeup or background Bash)
* with NO trailing assistant text, the "Finished" response activity was
* being posted with raw tool-input JSON as its body. The fix:
*
* 1. Don't buffer tool-use content into lastAssistantBodyBySession — its
* `content` is JSON.stringify(tool input).
* 2. If addResultEntry has no assistant text to post and the result is not
* an error, skip emitting the response activity entirely.
*/
describe("AgentSessionManager - result activity when turn ends on tool call (CYPACK-1177)", () => {
let manager: AgentSessionManager;
let mockActivitySink: IActivitySink;
let postActivitySpy: ReturnType<typeof vi.fn>;
const sessionId = "test-session-1177";
const issueId = "issue-1177";

beforeEach(() => {
mockActivitySink = {
id: "test-workspace",
postActivity: vi.fn().mockResolvedValue({ activityId: "activity-1" }),
createAgentSession: vi.fn().mockResolvedValue("ext-session-1"),
};
postActivitySpy = mockActivitySink.postActivity as ReturnType<typeof vi.fn>;

manager = new AgentSessionManager();
manager.createCyrusAgentSession(
sessionId,
issueId,
{
id: issueId,
identifier: "CYPACK-1177",
title: "Finished activity bug",
description: "",
branchName: "test-branch",
},
{ path: "/tmp/workspace", isGitWorktree: false },
);
manager.setActivitySink(sessionId, mockActivitySink);

const formatter = new ClaudeMessageFormatter();
const runnerStub = {
getFormatter: () => formatter,
constructor: { name: "ClaudeRunner" },
} as unknown as Parameters<typeof manager.addAgentRunner>[1];
manager.addAgentRunner(sessionId, runnerStub);
});

function buildToolUseAssistantMessage(
uuid: string,
toolUseId: string,
name: string,
input: Record<string, unknown>,
): SDKAssistantMessage {
return {
type: "assistant",
session_id: "claude-session",
parent_tool_use_id: null,
uuid,
message: {
id: `msg_${toolUseId}`,
type: "message",
role: "assistant",
model: "claude",
stop_reason: "tool_use",
stop_sequence: null,
usage: {
input_tokens: 0,
output_tokens: 0,
cache_creation_input_tokens: 0,
cache_read_input_tokens: 0,
},
content: [{ type: "tool_use", id: toolUseId, name, input }],
},
} as unknown as SDKAssistantMessage;
}

function buildSuccessResult(result = ""): SDKResultMessage {
return {
type: "result",
subtype: "success",
session_id: "claude-session",
duration_ms: 1000,
duration_api_ms: 800,
is_error: false,
num_turns: 1,
result,
total_cost_usd: 0,
usage: {
input_tokens: 0,
output_tokens: 0,
cache_creation_input_tokens: 0,
cache_read_input_tokens: 0,
},
} as unknown as SDKResultMessage;
}

it("does not emit a response activity with raw tool-input JSON when turn ends on ScheduleWakeup", async () => {
await manager.handleClaudeMessage(
sessionId,
buildToolUseAssistantMessage("uuid-1", "toolu_wakeup", "ScheduleWakeup", {
delaySeconds: 270,
reason: "Wait for full RSpec suite to finish before committing",
prompt:
"Continue with QC-5209 — check RSpec results and commit/push/PR",
}),
);
await manager.handleClaudeMessage(sessionId, buildSuccessResult(""));

const postedContents = postActivitySpy.mock.calls.map(
([, content]) => content,
);

// Must NOT emit a response activity at all (no real text to post).
const responses = postedContents.filter((c: any) => c?.type === "response");
expect(responses).toEqual([]);

// Must NOT emit a response whose body contains the tool input.
const leakedJson = postedContents.filter(
(c: any) =>
typeof c?.body === "string" &&
c.body.includes("delaySeconds") &&
c.body.includes("270"),
);
expect(leakedJson).toEqual([]);
});

it("does not emit a response activity when turn ends on a background Bash tool call", async () => {
await manager.handleClaudeMessage(
sessionId,
buildToolUseAssistantMessage("uuid-2", "toolu_bash", "Bash", {
command: "echo 'waiting for repo...'",
description: "idle",
}),
);
await manager.handleClaudeMessage(sessionId, buildSuccessResult(""));

const postedContents = postActivitySpy.mock.calls.map(
([, content]) => content,
);

const responses = postedContents.filter((c: any) => c?.type === "response");
expect(responses).toEqual([]);

const leakedJson = postedContents.filter(
(c: any) =>
typeof c?.body === "string" &&
c.body.includes("echo 'waiting for repo...'"),
);
expect(leakedJson).toEqual([]);
});

it("still emits a response activity when assistant text precedes the result", async () => {
// Trailing assistant text path — should produce a real response.
const textMsg: SDKAssistantMessage = {
type: "assistant",
session_id: "claude-session",
parent_tool_use_id: null,
uuid: "uuid-text",
message: {
id: "msg_text",
type: "message",
role: "assistant",
model: "claude",
stop_reason: "end_turn",
stop_sequence: null,
usage: {
input_tokens: 0,
output_tokens: 0,
cache_creation_input_tokens: 0,
cache_read_input_tokens: 0,
},
content: [{ type: "text", text: "All done — PR created." }],
},
} as unknown as SDKAssistantMessage;

await manager.handleClaudeMessage(sessionId, textMsg);
await manager.handleClaudeMessage(sessionId, buildSuccessResult(""));

const postedContents = postActivitySpy.mock.calls.map(
([, content]) => content,
);

const responses = postedContents.filter((c: any) => c?.type === "response");
expect(responses).toHaveLength(1);
expect(responses[0].body).toBe("All done — PR created.");
});
});
Loading