diff --git a/src/agent/cognitive.gleam b/src/agent/cognitive.gleam index cbc6798..5eddabf 100644 --- a/src/agent/cognitive.gleam +++ b/src/agent/cognitive.gleam @@ -36,6 +36,7 @@ import gleam/int import gleam/list import gleam/option.{None, Some} import gleam/string +import llm/message_history import llm/response import llm/types as llm_types import meta/log as meta_log @@ -121,7 +122,7 @@ pub fn start( max_tokens: cfg.max_tokens, max_context_messages: cfg.max_context_messages, tools:, - messages: cfg.initial_messages, + messages: message_history.from_list(cfg.initial_messages), registry: cfg.registry, pending: dict.new(), status: Idle, @@ -344,7 +345,9 @@ fn handle_message( types.SetScheduler(scheduler) -> CognitiveState(..state, scheduler: Some(scheduler)) types.GetMessages(reply_to:) -> { - process.send(reply_to, state.messages) + // GetMessages is the WS / TUI snapshot path — they want a flat + // List(Message) for rendering. for_send is the wire export. + process.send(reply_to, message_history.for_send(state.messages)) state } types.Ping(reply_to:) -> { @@ -1147,13 +1150,12 @@ fn handle_think_complete( state.cycle_id, ) let new_task_id = cycle_log.generate_uuid() - let nudge_msg = - llm_types.Message(role: llm_types.User, content: [ + let retry_messages = + message_history.add_user(state.messages, [ llm_types.TextContent( "Your previous response was empty. Please provide a substantive response.", ), ]) - let retry_messages = list.append(state.messages, [nudge_msg]) let req = cognitive_llm.build_request_with_model( state, @@ -1200,8 +1202,8 @@ fn handle_think_complete( state.cycle_id, ) let new_task_id = cycle_log.generate_uuid() - let nudge_msg = - llm_types.Message(role: llm_types.User, content: [ + let retry_messages = + message_history.add_user(state.messages, [ llm_types.TextContent( "Your previous response was cut off at the token cap" <> " (output_tokens=" @@ -1222,7 +1224,6 @@ fn handle_think_complete( <> " a different result.", ), ]) - let retry_messages = list.append(state.messages, [nudge_msg]) let req = cognitive_llm.build_request_with_model( state, @@ -1299,12 +1300,8 @@ fn handle_think_complete( ) None -> #(text, req_model) } - let assistant_msg = - llm_types.Message( - role: llm_types.Assistant, - content: resp.content, - ) - let messages = list.append(state.messages, [assistant_msg]) + let messages = + message_history.add_assistant(state.messages, resp.content) // Output gate strategy: // - Autonomous (scheduler) cycles: full LLM scorer + normative // calculus — nobody's watching, quality matters before delivery diff --git a/src/agent/cognitive/agents.gleam b/src/agent/cognitive/agents.gleam index da53af7..adf554b 100644 --- a/src/agent/cognitive/agents.gleam +++ b/src/agent/cognitive/agents.gleam @@ -36,6 +36,7 @@ import gleam/list import gleam/option.{type Option, None, Some} import gleam/string import knowledge/search as knowledge_search +import llm/message_history import llm/response import llm/types as llm_types import narrative/appraiser @@ -211,9 +212,7 @@ fn handle_own_human_input( frontdoor_types.CognitiveLoopOrigin, ) - let assistant_msg = - llm_types.Message(role: llm_types.Assistant, content: resp.content) - let messages = list.append(state.messages, [assistant_msg]) + let messages = message_history.add_assistant(state.messages, resp.content) let ctx = OwnToolWaiting(tool_use_id: call.id) @@ -472,11 +471,10 @@ fn handle_memory_tools( case remaining_calls { [] -> { // Only memory calls — add results to messages and re-think - let assistant_msg = - llm_types.Message(role: llm_types.Assistant, content: resp.content) - let user_msg = - llm_types.Message(role: llm_types.User, content: memory_results) - let messages = list.append(state.messages, [assistant_msg, user_msg]) + let messages = + state.messages + |> message_history.add_assistant(resp.content) + |> message_history.add_user(memory_results) let new_task_id = cycle_log.generate_uuid() let cycle_id = option.unwrap(state.cycle_id, new_task_id) @@ -537,11 +535,10 @@ fn handle_memory_tools( case agent_calls { [] -> { // No agent calls either — just memory + unknown tools, re-think - let assistant_msg = - llm_types.Message(role: llm_types.Assistant, content: resp.content) - let user_msg = - llm_types.Message(role: llm_types.User, content: initial) - let messages = list.append(state.messages, [assistant_msg, user_msg]) + let messages = + state.messages + |> message_history.add_assistant(resp.content) + |> message_history.add_user(initial) let new_task_id = cycle_log.generate_uuid() let cycle_id = option.unwrap(state.cycle_id, new_task_id) // Check for mid-cycle escalation before re-thinking @@ -625,9 +622,7 @@ fn dispatch_agent_calls( "" -> "No agent tools matched." t -> t } - let assistant_msg = - llm_types.Message(role: llm_types.Assistant, content: resp.content) - let messages = list.append(state.messages, [assistant_msg]) + let messages = message_history.add_assistant(state.messages, resp.content) output.send_reply( state, reply_text, @@ -996,8 +991,6 @@ fn do_dispatch_agents( // that, every subsequent cycle re-sends this orphaned tool_use // and the API 400s. Emit a user message with synthesised error // tool_result blocks for every tool_use we refused to dispatch. - let assistant_msg = - llm_types.Message(role: llm_types.Assistant, content: resp.content) let tool_use_calls = list.filter_map(resp.content, fn(block) { case block { @@ -1014,11 +1007,12 @@ fn do_dispatch_agents( is_error: True, ) }) - let user_msg = - llm_types.Message(role: llm_types.User, content: error_results) let messages = case error_results { - [] -> list.append(state.messages, [assistant_msg]) - _ -> list.append(state.messages, [assistant_msg, user_msg]) + [] -> message_history.add_assistant(state.messages, resp.content) + _ -> + state.messages + |> message_history.add_assistant(resp.content) + |> message_history.add_user(error_results) } CognitiveState( ..state, @@ -1045,9 +1039,7 @@ fn do_dispatch_agents( let pending_ids = list.append(agent_pending_ids, coder_pending_ids) // Add assistant message with tool use content - let assistant_msg = - llm_types.Message(role: llm_types.Assistant, content: resp.content) - let messages = list.append(state.messages, [assistant_msg]) + let messages = message_history.add_assistant(state.messages, resp.content) // Insert new pending entries (agents + coder dispatches) into the dict let new_pending = @@ -1488,10 +1480,11 @@ pub fn handle_agent_complete( _ -> [tool_result_block] } - // Build ONE user message with ALL accumulated results - let user_msg = - llm_types.Message(role: llm_types.User, content: all_results) - let messages = list.append(state.messages, [user_msg]) + // Build ONE user message with ALL accumulated results. + // MessageHistory.add_user strips any orphan tool_result whose + // tool_use_id isn't paired with a tool_use in the prior assistant + // message — invariant maintained by construction. + let messages = message_history.add_user(state.messages, all_results) // Spawn post-execution D' re-check if enabled let result_text = @@ -1661,9 +1654,7 @@ pub fn handle_coder_dispatch_complete( list.append(accumulated_results, [tool_result_block]) _ -> [tool_result_block] } - let user_msg = - llm_types.Message(role: llm_types.User, content: all_results) - let messages = list.append(state.messages, [user_msg]) + let messages = message_history.add_user(state.messages, all_results) let new_task_id = cycle_log.generate_uuid() let cycle_id = option.unwrap(state.cycle_id, new_task_id) let state = CognitiveState(..state, messages:, pending: remaining) @@ -1922,9 +1913,8 @@ pub fn handle_user_answer( content: answer, is_error: False, ) - let user_msg = - llm_types.Message(role: llm_types.User, content: [tool_result_block]) - let messages = list.append(state.messages, [user_msg]) + let messages = + message_history.add_user(state.messages, [tool_result_block]) // Spawn a continuation think worker let new_task_id = cycle_log.generate_uuid() @@ -2342,7 +2332,7 @@ pub fn dispatch_deferred( }) CognitiveState( ..state, - messages: list.append(state.messages, error_results), + messages: message_history.add_all(state.messages, error_results), ) } True -> { diff --git a/src/agent/cognitive/llm.gleam b/src/agent/cognitive/llm.gleam index 54bf2c3..751c2ea 100644 --- a/src/agent/cognitive/llm.gleam +++ b/src/agent/cognitive/llm.gleam @@ -21,7 +21,7 @@ import gleam/int import gleam/list import gleam/option.{None, Some} import gleam/string -import llm/message_repair +import llm/message_history.{type MessageHistory} import llm/request import llm/types as llm_types import meta/types as meta_types @@ -60,7 +60,7 @@ pub fn proceed_with_model( queue_depth: list.length(state.input_queue), session_since: state.identity.session_since, agents_active: registry.count_running(state.registry), - message_count: list.length(state.messages), + message_count: message_history.length(state.messages), sensory_events: state.pending_sensory_events, active_delegations: dict.values(state.active_delegations), sandbox_enabled: state.config.sandbox_enabled, @@ -81,11 +81,7 @@ pub fn proceed_with_model( // Consume pending Layer 3b meta intervention if any let state = consume_meta_intervention(state, cycle_id) - let msg = - llm_types.Message(role: llm_types.User, content: [ - llm_types.TextContent(text:), - ]) - let messages = list.append(state.messages, [msg]) + let messages = message_history.add_user_text(state.messages, text) let task_id = cycle_id let req = build_request_with_model(state, model, messages) @@ -230,11 +226,10 @@ pub fn handle_think_error( // well-formed (alternating user/assistant). Without this, the // next user input would create two consecutive user messages // and the API would reject the request. - let error_msg = - llm_types.Message(role: llm_types.Assistant, content: [ + let messages = + message_history.add_assistant(state.messages, [ llm_types.TextContent(text: user_text), ]) - let messages = list.append(state.messages, [error_msg]) CognitiveState( ..state, messages:, @@ -267,11 +262,10 @@ pub fn handle_think_down( ) let user_text = render_user_error(InternalCrash) output.send_reply(state, user_text, state.model, None, []) - let error_msg = - llm_types.Message(role: llm_types.Assistant, content: [ + let messages = + message_history.add_assistant(state.messages, [ llm_types.TextContent(text: user_text), ]) - let messages = list.append(state.messages, [error_msg]) CognitiveState( ..state, messages:, @@ -286,29 +280,34 @@ pub fn handle_think_down( /// Build an LLM request using the current model. pub fn build_request( state: CognitiveState, - messages: List(llm_types.Message), + messages: MessageHistory, ) -> llm_types.LlmRequest { build_request_with_model(state, state.model, messages) } /// Build an LLM request with a specific model. +/// +/// The `MessageHistory` is invariant-bearing by construction (see +/// `llm/message_history.gleam`): orphan tool_uses, orphan +/// tool_results, leading-assistant, and same-role-runs are all +/// impossible to introduce. The reactive `repair_orphans_and_warn` +/// pipeline this function used to call is gone — there's nothing +/// left to repair. +/// +/// What remains here are the *quantitative* trims that depend on +/// runtime knobs (`max_context_messages`) and the hard token-budget +/// safety net. Those still apply to the wire-side `List(Message)`. pub fn build_request_with_model( state: CognitiveState, model: String, - messages: List(llm_types.Message), + messages: MessageHistory, ) -> llm_types.LlmRequest { - // Defensive repair: inject synthetic tool_result blocks for any - // orphaned tool_use ids. Anthropic's API rejects histories where an - // assistant tool_use isn't immediately followed by a matching - // tool_result; once an orphan lands in state.messages, every cycle - // keeps sending it until something repairs it. This is the last - // line of defence — upstream paths still need to be tidy. We - // slog.warn on any repair so orphan sources stay visible. - let repaired = repair_orphans_and_warn(messages, state.cycle_id) - // Message count trim (configurable) + let raw = message_history.for_send(messages) + // Message count trim (configurable). `context.trim` keeps the + // tool_use/tool_result pairing intact even after dropping by count. let trimmed = case state.max_context_messages { - None -> context.ensure_alternation(repaired) - Some(max) -> context.trim(repaired, max) + None -> context.ensure_alternation(raw) + Some(max) -> context.trim(raw, max) } // Token budget safety net — hard cap to prevent API 400 errors. // System prompt + tools + response budget need headroom, so cap messages @@ -329,30 +328,6 @@ pub fn build_request_with_model( } } -/// Wrap `message_repair.repair` with a warn log when it has to act. -/// Each orphan is a bug upstream — we want the logs to point operators -/// (and us) at the code path that left it dangling. -fn repair_orphans_and_warn( - messages: List(llm_types.Message), - cycle_id: option.Option(String), -) -> List(llm_types.Message) { - case message_repair.find_orphans(messages) { - [] -> messages - orphans -> { - slog.warn( - "cognitive/llm", - "build_request", - "Repairing " - <> int.to_string(list.length(orphans)) - <> " orphaned tool_use id(s): " - <> string.join(orphans, ", "), - cycle_id, - ) - message_repair.repair(messages) - } - } -} - // --------------------------------------------------------------------------- // Layer 3b meta intervention // --------------------------------------------------------------------------- diff --git a/src/agent/cognitive/safety.gleam b/src/agent/cognitive/safety.gleam index 20171c2..ad4db43 100644 --- a/src/agent/cognitive/safety.gleam +++ b/src/agent/cognitive/safety.gleam @@ -30,6 +30,7 @@ import gleam/int import gleam/list import gleam/option.{None, Some} import gleam/string +import llm/message_history import llm/provider import llm/types as llm_types import narrative/librarian @@ -44,11 +45,12 @@ fn get_datetime() -> String /// Add a synthetic assistant message to state so message history stays /// well-formed (alternating user/assistant) when going to Idle after an error. fn with_assistant_error(state: CognitiveState, text: String) -> CognitiveState { - let msg = - llm_types.Message(role: llm_types.Assistant, content: [ + CognitiveState( + ..state, + messages: message_history.add_assistant(state.messages, [ llm_types.TextContent(text:), - ]) - CognitiveState(..state, messages: list.append(state.messages, [msg])) + ]), + ) } /// Technical rejection notice for the agent — goes to notification channel, @@ -174,7 +176,7 @@ pub fn spawn_safety_gate( |> string.join("; ") // Build context from recent messages (character-budget walker, all content types) - let ctx = build_context_string(state.messages, 2000) + let ctx = build_context_string(message_history.to_list(state.messages), 2000) let det_config = state.config.deterministic_config let redact_secrets = state.redact_secrets @@ -418,8 +420,6 @@ pub fn handle_safety_gate_complete( <> "]", ) }) - let assistant_msg = - llm_types.Message(role: llm_types.Assistant, content: resp.content) let result_blocks = list.map(error_results, fn(r) { case r { @@ -437,9 +437,10 @@ pub fn handle_safety_gate_complete( ) } }) - let user_msg = - llm_types.Message(role: llm_types.User, content: result_blocks) - let messages = list.append(state.messages, [assistant_msg, user_msg]) + let messages = + state.messages + |> message_history.add_assistant(resp.content) + |> message_history.add_user(result_blocks) let new_task_id = cycle_log.generate_uuid() let req = cognitive_llm.build_request(state, messages) worker.spawn_think( @@ -470,17 +471,15 @@ pub fn handle_safety_gate_complete( } _ -> { // Normal MODIFY — append caution instruction and re-think - let assistant_msg = - llm_types.Message(role: llm_types.Assistant, content: resp.content) - let modify_msg = - llm_types.Message(role: llm_types.User, content: [ + let messages = + state.messages + |> message_history.add_assistant(resp.content) + |> message_history.add_user([ llm_types.TextContent( text: build_rejection_notice("tool", result, "tool dispatch") <> " Please reconsider your approach and proceed with additional caution.", ), ]) - let messages = - list.append(state.messages, [assistant_msg, modify_msg]) let new_task_id = cycle_log.generate_uuid() let req = cognitive_llm.build_request(state, messages) case state.verbose { @@ -518,8 +517,6 @@ pub fn handle_safety_gate_complete( dprime_types.Reject -> { // Generate error tool results for all calls and continue - let assistant_msg = - llm_types.Message(role: llm_types.Assistant, content: resp.content) let error_blocks = list.map(calls, fn(call) { llm_types.ToolResultContent( @@ -532,9 +529,10 @@ pub fn handle_safety_gate_complete( is_error: True, ) }) - let user_msg = - llm_types.Message(role: llm_types.User, content: error_blocks) - let messages = list.append(state.messages, [assistant_msg, user_msg]) + let messages = + state.messages + |> message_history.add_assistant(resp.content) + |> message_history.add_user(error_blocks) let new_task_id = cycle_log.generate_uuid() let req = cognitive_llm.build_request(state, messages) @@ -591,7 +589,8 @@ pub fn spawn_input_safety_gate( let scorer_model = state.task_model let verbose = state.verbose let instruction = text - let base_ctx = build_context_string(state.messages, 2000) + let base_ctx = + build_context_string(message_history.to_list(state.messages), 2000) // Phase D follow-up — meta-learning. Prepend any persisted // affect-performance correlation warnings (negative r ≤ -0.4) so the // input gate's LLM scorer can weight risk against the agent's known @@ -963,14 +962,13 @@ pub fn handle_input_safety_gate_complete( dprime_types.Modify -> { // Inject a caution message into history, then proceed - let caution_msg = - llm_types.Message(role: llm_types.User, content: [ + let messages = + message_history.add_user(state.messages, [ llm_types.TextContent( text: build_rejection_notice("input", result, "user query") <> " Please proceed with additional caution.", ), ]) - let messages = list.append(state.messages, [caution_msg]) cognitive_llm.proceed_with_model( CognitiveState(..state, messages:), model, @@ -1137,7 +1135,7 @@ pub fn handle_post_execution_gate_complete( pub fn check_deterministic_only( state: CognitiveState, reply_text: String, - messages: List(llm_types.Message), + messages: message_history.MessageHistory, task_id: String, usage: llm_types.Usage, ) -> CognitiveState { @@ -1237,7 +1235,7 @@ pub fn spawn_output_gate( state: CognitiveState, output_state: dprime_types.DprimeState, report_text: String, - messages: List(llm_types.Message), + messages: message_history.MessageHistory, task_id: String, modification_count: Int, ) -> CognitiveState { @@ -1249,7 +1247,7 @@ pub fn spawn_output_gate( // Use the most recent user message as query context, not the potentially // stale last_user_input which may predate multiple tool turns (BF-05). let query = - list.reverse(state.messages) + list.reverse(message_history.to_list(state.messages)) |> list.find_map(fn(m) { case m.role { llm_types.User -> @@ -1465,14 +1463,13 @@ pub fn handle_output_gate_complete( "Output gate: MODIFY (" <> explanation <> ")", state.cycle_id, ) - let correction_msg = - llm_types.Message(role: llm_types.User, content: [ + let messages = + message_history.add_user(state.messages, [ llm_types.TextContent( text: "[SYSTEM: Your response was NOT delivered to the user. The quality gate flagged specific issues listed below. IMPORTANT: Fix ONLY the flagged issues. Preserve all other content, structure, and tone from your original response. Do not remove information that was not flagged. Do not add unnecessary hedging or caveats. Produce a corrected version of your full response.]\n\nFlagged issues:\n" <> explanation, ), ]) - let messages = list.append(state.messages, [correction_msg]) let new_state = CognitiveState(..state, messages:) let task_id = cycle_log.generate_uuid() let req = cognitive_llm.build_request(new_state, messages) diff --git a/src/agent/cognitive_state.gleam b/src/agent/cognitive_state.gleam index fc8a5b0..74aabea 100644 --- a/src/agent/cognitive_state.gleam +++ b/src/agent/cognitive_state.gleam @@ -24,6 +24,7 @@ import gleam/dict.{type Dict} import gleam/erlang/process.{type Subject} import gleam/list import gleam/option.{type Option, None, Some} +import llm/message_history.{type MessageHistory} import llm/provider.{type Provider} import llm/retry import llm/types as llm_types @@ -165,7 +166,7 @@ pub type CognitiveState { system: String, max_context_messages: Option(Int), tools: List(llm_types.Tool), - messages: List(llm_types.Message), + messages: MessageHistory, // --- Loop control --- status: CognitiveStatus, cycle_id: Option(String), diff --git a/src/llm/message_history.gleam b/src/llm/message_history.gleam new file mode 100644 index 0000000..abf4840 --- /dev/null +++ b/src/llm/message_history.gleam @@ -0,0 +1,442 @@ +//// Opaque, invariant-bearing wrapper around the cognitive loop's +//// message list. +//// +//// **Why this exists.** Anthropic's API rejects requests whose history +//// violates one of these rules: +//// +//// 1. Every assistant `tool_use` block must be answered by a +//// `tool_result` (with matching `tool_use_id`) in the very next +//// user message. +//// 2. Conversely, every user `tool_result` block's `tool_use_id` +//// must match a `tool_use` in the immediately-prior assistant +//// message — no orphan tool_results. +//// 3. Messages must alternate user/assistant. +//// 4. The first message must be user-role. +//// +//// Violations get returned as 400 errors that look like +//// "messages.40.content.0: unexpected `tool_use_id` ..." and they +//// poison every subsequent cycle until something repairs the +//// stored history. +//// +//// Historically the cog kept `state.messages: List(Message)` and let +//// every handler `list.append` directly. A reactive sweep at the LLM +//// boundary patched up *some* violations, but new code paths kept +//// introducing new shapes and the boundary sweep didn't always cover +//// them. The cog would die mid-cycle with an opaque API error and the +//// operator would have to restart it. +//// +//// The fix: opaque `MessageHistory` with one chokepoint (`append`) +//// that maintains every invariant by construction. Nothing outside +//// this module can build or modify a `MessageHistory` except via the +//// typed API. The reactive sweep becomes redundant — it has nothing +//// left to repair. + +// Copyright (C) 2026 Seamus Brady +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU Affero General Public License as published +// by the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. + +import gleam/list +import gleam/option.{type Option, None, Some} +import gleam/set.{type Set} +import gleam/string +import llm/types.{ + type ContentBlock, type Message, Assistant, Message, ToolResultContent, + ToolUseContent, User, +} + +// --------------------------------------------------------------------------- +// Opaque type +// --------------------------------------------------------------------------- + +/// Append-only message history with provider-API invariants enforced +/// at every mutation. Construction is via `new/0`, `from_list/1`, or +/// any of the `add_*` functions — `MessageHistory(...)` is not exposed. +pub opaque type MessageHistory { + MessageHistory(messages: List(Message)) +} + +// --------------------------------------------------------------------------- +// Construction +// --------------------------------------------------------------------------- + +/// Empty history. Caller's first append should usually be a user +/// message; an assistant-first append is silently dropped (with a +/// repair record returned by `last_repair/1` if the caller cares). +pub fn new() -> MessageHistory { + MessageHistory(messages: []) +} + +/// Lift a raw `List(Message)` into a sanitised `MessageHistory`. +/// Used at startup to load persisted history off disk and at any +/// other boundary where untyped messages cross in (e.g. tests). The +/// repair pipeline runs once during ingest: +/// +/// 1. Drop a leading assistant message +/// 2. Coalesce consecutive same-role messages +/// 3. Strip orphan tool_results whose tool_use_id isn't in any +/// prior assistant message +/// 4. Inject synthetic tool_results for any orphan tool_uses +pub fn from_list(messages: List(Message)) -> MessageHistory { + MessageHistory(messages: sanitise(messages)) +} + +/// Equivalent to folding `add` over `msgs`. The same invariants apply; +/// each message goes through the chokepoint individually. +pub fn from_messages(msgs: List(Message)) -> MessageHistory { + list.fold(msgs, new(), fn(h, m) { add(h, m) }) +} + +// --------------------------------------------------------------------------- +// Read +// --------------------------------------------------------------------------- + +/// Number of messages in the history. +pub fn length(h: MessageHistory) -> Int { + list.length(h.messages) +} + +pub fn is_empty(h: MessageHistory) -> Bool { + case h.messages { + [] -> True + _ -> False + } +} + +/// Most-recent message, if any. Useful for "did the last turn end +/// with an assistant tool_use?" decisions. +pub fn last(h: MessageHistory) -> Option(Message) { + case list.reverse(h.messages) { + [m, ..] -> Some(m) + [] -> None + } +} + +/// Read-only iteration. The list is the canonical chronological order. +pub fn to_list(h: MessageHistory) -> List(Message) { + h.messages +} + +/// Wire-ready message list for the LLM provider. Currently identical +/// to `to_list/1` because the invariants are maintained at append +/// time; the separate function exists so future safety nets (e.g. +/// last-resort hard token trimming) can live on the send path without +/// changing the audit/log surface. +pub fn for_send(h: MessageHistory) -> List(Message) { + h.messages +} + +// --------------------------------------------------------------------------- +// Mutation — the chokepoint +// --------------------------------------------------------------------------- + +/// Append a message. The invariants are enforced here: +/// +/// * Leading assistant → silently dropped. +/// * Consecutive same-role → coalesced into one message (content +/// blocks concatenated). This prevents the "messages must +/// alternate" 400 from the API. +/// * User message containing tool_result blocks → any tool_result +/// whose tool_use_id has no matching tool_use in the +/// immediately-prior assistant message is dropped. If that empties +/// the message, the message itself is dropped. +/// +/// The function is total: every input produces a valid `MessageHistory`. +pub fn add(h: MessageHistory, msg: Message) -> MessageHistory { + case h.messages, msg.role { + [], Assistant -> h + [], _ -> MessageHistory(messages: [msg]) + prior_messages, _ -> { + let prior_assistant_tool_use_ids = + last_assistant_tool_use_ids(prior_messages) + let cleaned = clean_msg(msg, prior_assistant_tool_use_ids) + case cleaned { + None -> h + Some(c) -> { + case list.reverse(prior_messages) { + [last_msg, ..rest_rev] if last_msg.role == c.role -> + MessageHistory( + messages: list.reverse([ + Message( + role: last_msg.role, + content: list.append(last_msg.content, c.content), + ), + ..rest_rev + ]), + ) + _ -> MessageHistory(messages: list.append(prior_messages, [c])) + } + } + } + } + } +} + +/// Append several messages in order. Each goes through `add/2`. +pub fn add_all(h: MessageHistory, msgs: List(Message)) -> MessageHistory { + list.fold(msgs, h, fn(acc, m) { add(acc, m) }) +} + +/// Append a plain user-text turn. Convenience for the most common +/// case (operator typing or scheduler prepending a context block). +pub fn add_user_text(h: MessageHistory, text: String) -> MessageHistory { + add(h, Message(role: User, content: [types.TextContent(text: text)])) +} + +/// Append an assistant turn from raw content blocks. Prefer this +/// over `add` when constructing a fresh assistant message at the +/// call site — it makes the role explicit. +pub fn add_assistant( + h: MessageHistory, + content: List(ContentBlock), +) -> MessageHistory { + add(h, Message(role: Assistant, content: content)) +} + +/// Append a user turn from raw content blocks. Tool_results inside +/// `content` are vetted against the prior assistant's tool_use ids; +/// orphans are stripped. +pub fn add_user( + h: MessageHistory, + content: List(ContentBlock), +) -> MessageHistory { + add(h, Message(role: User, content: content)) +} + +// --------------------------------------------------------------------------- +// Internal — invariant enforcement +// --------------------------------------------------------------------------- + +/// Pull the tool_use IDs from the last assistant message in the list, +/// if there is one. Returns an empty set if the last message is user +/// or there is no message — in either case any tool_result_id offered +/// by the next user message is orphan. +fn last_assistant_tool_use_ids(messages: List(Message)) -> Set(String) { + case list.reverse(messages) { + [Message(role: Assistant, content: c), ..] -> tool_use_ids_in(c) + _ -> set.new() + } +} + +fn tool_use_ids_in(content: List(ContentBlock)) -> Set(String) { + list.fold(content, set.new(), fn(acc, block) { + case block { + ToolUseContent(id: id, ..) -> set.insert(acc, id) + _ -> acc + } + }) +} + +/// Strip orphan tool_result blocks from a user message. If the +/// resulting block list is empty, return None to signal "drop the +/// message entirely". Non-user messages pass through unchanged. +fn clean_msg(msg: Message, valid_tool_use_ids: Set(String)) -> Option(Message) { + case msg.role { + User -> { + let cleaned_blocks = + list.filter(msg.content, fn(block) { + case block { + ToolResultContent(tool_use_id: id, ..) -> + set.contains(valid_tool_use_ids, id) + _ -> True + } + }) + case cleaned_blocks { + [] -> None + _ -> Some(Message(role: User, content: cleaned_blocks)) + } + } + Assistant -> Some(msg) + } +} + +// --------------------------------------------------------------------------- +// Sanitisation — for `from_list` +// --------------------------------------------------------------------------- + +/// Bring an arbitrary message list into a state where every API +/// invariant holds. Used once at ingest; downstream `add` calls +/// preserve the invariants. +fn sanitise(messages: List(Message)) -> List(Message) { + messages + |> drop_leading_assistant + |> coalesce_same_role + |> strip_orphan_tool_results + |> inject_orphan_tool_use_stubs +} + +fn drop_leading_assistant(messages: List(Message)) -> List(Message) { + case messages { + [Message(role: Assistant, ..), ..rest] -> rest + _ -> messages + } +} + +fn coalesce_same_role(messages: List(Message)) -> List(Message) { + case messages { + [] -> [] + [first, ..rest] -> coalesce_loop(rest, first, []) + } +} + +fn coalesce_loop( + remaining: List(Message), + current: Message, + acc: List(Message), +) -> List(Message) { + case remaining { + [] -> list.reverse([current, ..acc]) + [next, ..rest] -> + case current.role == next.role { + True -> + coalesce_loop( + rest, + Message( + role: current.role, + content: list.append(current.content, next.content), + ), + acc, + ) + False -> coalesce_loop(rest, next, [current, ..acc]) + } + } +} + +fn strip_orphan_tool_results(messages: List(Message)) -> List(Message) { + let valid_ids = + list.fold(messages, set.new(), fn(acc, msg) { + case msg.role { + Assistant -> + list.fold(msg.content, acc, fn(s, block) { + case block { + ToolUseContent(id: id, ..) -> set.insert(s, id) + _ -> s + } + }) + _ -> acc + } + }) + list.filter_map(messages, fn(msg) { + case msg.role { + User -> { + let filtered = + list.filter(msg.content, fn(block) { + case block { + ToolResultContent(tool_use_id: id, ..) -> + set.contains(valid_ids, id) + _ -> True + } + }) + case filtered { + [] -> Error(Nil) + _ -> Ok(Message(role: User, content: filtered)) + } + } + _ -> Ok(msg) + } + }) +} + +const orphan_stub_content = "[internal: tool call did not complete]" + +/// For each assistant tool_use whose immediately-following user +/// message has no matching tool_result, inject a synthetic stub. +/// This handles the OPPOSITE direction from `strip_orphan_tool_results`. +fn inject_orphan_tool_use_stubs(messages: List(Message)) -> List(Message) { + do_inject_stubs(messages, []) +} + +fn do_inject_stubs(messages: List(Message), acc: List(Message)) -> List(Message) { + case messages { + [] -> list.reverse(acc) + [ + Message(role: Assistant, content: a) as assistant, + Message(role: User, content: u) as user_msg, + ..rest + ] -> { + let tool_use_ids = list.filter_map(a, extract_tool_use_id) + let result_ids = result_ids_set(u) + let orphans = + list.filter(tool_use_ids, fn(id) { !set.contains(result_ids, id) }) + case orphans { + [] -> do_inject_stubs(rest, [user_msg, assistant, ..acc]) + _ -> { + let stubs = list.map(orphans, stub_block) + let patched = Message(role: User, content: list.append(stubs, u)) + do_inject_stubs(rest, [patched, assistant, ..acc]) + } + } + } + [Message(role: Assistant, content: a) as assistant, ..rest] -> { + // Trailing assistant — fabricate a follow-up user. + let tool_use_ids = list.filter_map(a, extract_tool_use_id) + case tool_use_ids { + [] -> do_inject_stubs(rest, [assistant, ..acc]) + ids -> { + let stubs = list.map(ids, stub_block) + let patched_user = Message(role: User, content: stubs) + do_inject_stubs(rest, [patched_user, assistant, ..acc]) + } + } + } + [other, ..rest] -> do_inject_stubs(rest, [other, ..acc]) + } +} + +fn extract_tool_use_id(block: ContentBlock) -> Result(String, Nil) { + case block { + ToolUseContent(id: id, ..) -> Ok(id) + _ -> Error(Nil) + } +} + +fn result_ids_set(content: List(ContentBlock)) -> Set(String) { + list.fold(content, set.new(), fn(acc, block) { + case block { + ToolResultContent(tool_use_id: id, ..) -> set.insert(acc, id) + _ -> acc + } + }) +} + +fn stub_block(id: String) -> ContentBlock { + ToolResultContent( + tool_use_id: id, + content: orphan_stub_content, + is_error: True, + ) +} + +// --------------------------------------------------------------------------- +// Diagnostics +// --------------------------------------------------------------------------- + +/// Compact one-line description of the history shape. Useful in +/// debug logs when investigating "why did the cog send a poisoned +/// history" — except now it can't. +pub fn describe(h: MessageHistory) -> String { + let counts = + list.fold(h.messages, #(0, 0), fn(acc, msg) { + let #(u, a) = acc + case msg.role { + User -> #(u + 1, a) + Assistant -> #(u, a + 1) + } + }) + let #(u, a) = counts + string.concat([ + "MessageHistory(", + "n=", + int_to_string(list.length(h.messages)), + ", user=", + int_to_string(u), + ", assistant=", + int_to_string(a), + ")", + ]) +} + +@external(erlang, "erlang", "integer_to_binary") +fn int_to_string(n: Int) -> String diff --git a/src/llm/message_repair.gleam b/src/llm/message_repair.gleam deleted file mode 100644 index af4a3c6..0000000 --- a/src/llm/message_repair.gleam +++ /dev/null @@ -1,169 +0,0 @@ -//// Defensive repair for message history before it's handed to a -//// provider. -//// -//// Anthropic's API rejects any request whose message history contains -//// a `tool_use` block without a matching `tool_result` block in the -//// immediately-following user message — the familiar 400 error -//// -//// tool_use ids were found without tool_result blocks immediately -//// after: toolu_... -//// -//// Every code path that appends an assistant message with tool_use -//// content is supposed to append a follow-up user message containing -//// tool_result blocks for each id. Most paths do; some don't. Once an -//// orphan lands in `CognitiveState.messages`, every subsequent cycle -//// re-sends the poisoned history and the API keeps rejecting. -//// -//// This module is the last line of defence: a pure function that -//// walks a messages list, detects orphaned tool_use ids, and injects -//// synthesised `tool_result` stubs so the provider sees a -//// well-formed history. -//// -//// The repair is a safety net, not a license to write sloppy upstream -//// code. Callers should `slog.warn` when `repair/1` actually changes -//// anything — each repair represents an upstream bug worth fixing. - -// Copyright (C) 2026 Seamus Brady -// -// This program is free software: you can redistribute it and/or modify -// it under the terms of the GNU Affero General Public License as published -// by the Free Software Foundation, either version 3 of the License, or -// (at your option) any later version. - -import gleam/list -import gleam/set.{type Set} -import llm/types.{ - type ContentBlock, type Message, Assistant, Message, ToolResultContent, - ToolUseContent, User, -} - -/// Synthetic content for an orphan repair. `is_error: True` so the -/// LLM knows the call didn't complete normally. -const orphan_stub_content = "[internal: tool call did not complete]" - -// --------------------------------------------------------------------------- -// Public API -// --------------------------------------------------------------------------- - -/// Scan the messages list and return the list of orphaned tool_use -/// ids — tool_use blocks in an assistant message whose immediately -/// following user message does not contain a `tool_result` with a -/// matching id. Empty list = well-formed. -pub fn find_orphans(messages: List(Message)) -> List(String) { - collect_orphans(messages, []) - |> list.reverse -} - -/// Return a messages list where every orphan identified by -/// `find_orphans` has a matching synthetic `tool_result` block in a -/// user message immediately after the offending assistant message. -/// -/// If the orphan-assistant is already followed by a user message, the -/// stub blocks are prepended to that message's content. Otherwise a -/// fresh user message is inserted. -pub fn repair(messages: List(Message)) -> List(Message) { - // Single pass. Each assistant message is checked against the next - // message; any tool_use whose id is missing from the following - // user's tool_results gets a stub prepended. A trailing assistant - // with tool_use and no follower gets a synthetic user message - // appended. - do_repair(messages, []) -} - -// --------------------------------------------------------------------------- -// Internal -// --------------------------------------------------------------------------- - -fn collect_orphans(messages: List(Message), acc: List(String)) -> List(String) { - case messages { - [] -> acc - [ - Message(role: Assistant, content: content), - Message(role: User, content: next_content), - ..rest - ] -> { - let tool_use_ids = tool_use_ids_in(content) - let result_ids = result_ids_in(next_content) - let orphans = - list.filter(tool_use_ids, fn(id) { !set.contains(result_ids, id) }) - let next_acc = list.fold(orphans, acc, fn(a, id) { [id, ..a] }) - // The user message has been "consumed" as the expected match; - // continue from after it. - collect_orphans(rest, next_acc) - } - [Message(role: Assistant, content: content), ..rest] -> { - // Assistant with no following user — every tool_use in content - // is an orphan. - let tool_use_ids = tool_use_ids_in(content) - let next_acc = list.fold(tool_use_ids, acc, fn(a, id) { [id, ..a] }) - collect_orphans(rest, next_acc) - } - [_, ..rest] -> collect_orphans(rest, acc) - } -} - -fn do_repair(messages: List(Message), acc: List(Message)) -> List(Message) { - case messages { - [] -> list.reverse(acc) - [ - Message(role: Assistant, content: a_content) as assistant, - Message(role: User, content: u_content) as user_msg, - ..rest - ] -> { - let tool_use_ids = tool_use_ids_in(a_content) - let result_ids = result_ids_in(u_content) - let orphan_ids = - list.filter(tool_use_ids, fn(id) { !set.contains(result_ids, id) }) - case orphan_ids { - [] -> do_repair(rest, [user_msg, assistant, ..acc]) - _ -> { - let stubs = list.map(orphan_ids, stub_block) - let patched_user = - Message(role: User, content: list.append(stubs, u_content)) - do_repair(rest, [patched_user, assistant, ..acc]) - } - } - } - [Message(role: Assistant, content: a_content) as assistant, ..rest] -> { - // Assistant with no following user at all — inject a synthetic - // user message containing stubs for every tool_use. - let tool_use_ids = tool_use_ids_in(a_content) - case tool_use_ids { - [] -> do_repair(rest, [assistant, ..acc]) - ids -> { - let stubs = list.map(ids, stub_block) - let patched_user = Message(role: User, content: stubs) - do_repair(rest, [patched_user, assistant, ..acc]) - } - } - } - [other, ..rest] -> do_repair(rest, [other, ..acc]) - } -} - -fn tool_use_ids_in(content: List(ContentBlock)) -> List(String) { - list.filter_map(content, fn(block) { - case block { - ToolUseContent(id: id, ..) -> Ok(id) - _ -> Error(Nil) - } - }) -} - -fn result_ids_in(content: List(ContentBlock)) -> Set(String) { - list.filter_map(content, fn(block) { - case block { - ToolResultContent(tool_use_id: id, ..) -> Ok(id) - _ -> Error(Nil) - } - }) - |> set.from_list -} - -fn stub_block(id: String) -> ContentBlock { - ToolResultContent( - tool_use_id: id, - content: orphan_stub_content, - is_error: True, - ) -} diff --git a/test/llm/message_history_test.gleam b/test/llm/message_history_test.gleam new file mode 100644 index 0000000..ad77d68 --- /dev/null +++ b/test/llm/message_history_test.gleam @@ -0,0 +1,260 @@ +//// Tests for the invariant-bearing MessageHistory wrapper. +//// +//// The whole point of this module is that the API contracts are +//// impossible to violate via its public surface. Each invariant gets a +//// dedicated test that constructs the exact malformation that used to +//// poison `state.messages` in the old `List(Message)` design and +//// confirms the new API silently corrects it (or refuses, depending +//// on the case). + +// Copyright (C) 2026 Seamus Brady +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU Affero General Public License as published +// by the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. + +import gleam/list +import gleeunit/should +import llm/message_history as mh +import llm/types.{ + Assistant, Message, TextContent, ToolResultContent, ToolUseContent, User, +} + +// ── Construction ────────────────────────────────────────────────────────── + +pub fn new_is_empty_test() { + mh.length(mh.new()) |> should.equal(0) + mh.is_empty(mh.new()) |> should.be_true +} + +pub fn from_list_empty_is_empty_test() { + mh.length(mh.from_list([])) |> should.equal(0) +} + +// ── Invariant 1: first message must be User ─────────────────────────────── + +pub fn add_assistant_to_empty_history_is_dropped_test() { + // Anthropic rejects a history that starts with an assistant message. + // The old List(Message) design relied on a sweep at the LLM boundary; + // here, add() refuses up-front. + let h = mh.new() |> mh.add_assistant([TextContent("hi")]) + mh.length(h) |> should.equal(0) +} + +pub fn from_list_drops_leading_assistant_test() { + let raw = [ + Message(role: Assistant, content: [TextContent("orphan")]), + Message(role: User, content: [TextContent("hello")]), + ] + let h = mh.from_list(raw) + case mh.to_list(h) { + [Message(role: User, ..)] -> Nil + _ -> { + should.fail() + Nil + } + } +} + +// ── Invariant 2: alternation (no consecutive same-role) ─────────────────── + +pub fn consecutive_user_messages_coalesce_test() { + let h = + mh.new() + |> mh.add_user_text("first") + |> mh.add_user_text("second") + mh.length(h) |> should.equal(1) + case mh.to_list(h) { + [Message(role: User, content: blocks)] -> + list.length(blocks) |> should.equal(2) + _ -> { + should.fail() + Nil + } + } +} + +pub fn consecutive_assistant_messages_coalesce_test() { + // First an assistant lands inside a valid user/assistant pair, then a + // second assistant arrives — they should coalesce. + let h = + mh.new() + |> mh.add_user_text("question") + |> mh.add_assistant([TextContent("answer part 1")]) + |> mh.add_assistant([TextContent("answer part 2")]) + mh.length(h) |> should.equal(2) +} + +// ── Invariant 3: orphan tool_result stripping ───────────────────────────── +// +// This is the bug class that caused the operator's cog to die with +// "messages.40.content.0: unexpected `tool_use_id`". The fix is +// structural: add() refuses to insert a tool_result whose tool_use_id +// isn't in the immediately-prior assistant message. + +pub fn add_user_with_orphan_tool_result_strips_it_test() { + let h = + mh.new() + |> mh.add_user_text("hi") + // Add an assistant turn with no tool_use blocks. + |> mh.add_assistant([TextContent("hi back")]) + // Now try to inject a user message with a tool_result whose + // tool_use_id has no matching tool_use in the prior assistant. + |> mh.add_user([ + ToolResultContent(tool_use_id: "ghost", content: "x", is_error: False), + ]) + // The stripped tool_result emptied the message; the message was + // dropped entirely. History stays clean. + mh.length(h) |> should.equal(2) +} + +pub fn add_user_keeps_paired_tool_result_test() { + let h = + mh.new() + |> mh.add_user_text("call a tool") + |> mh.add_assistant([ + ToolUseContent(id: "real", name: "calc", input_json: "{}"), + ]) + |> mh.add_user([ + ToolResultContent(tool_use_id: "real", content: "42", is_error: False), + ]) + mh.length(h) |> should.equal(3) +} + +pub fn add_user_drops_orphan_keeps_valid_test() { + // Mixed: one valid tool_result, one orphan. Strip orphan, keep valid. + let h = + mh.new() + |> mh.add_user_text("call a tool") + |> mh.add_assistant([ + ToolUseContent(id: "real", name: "calc", input_json: "{}"), + ]) + |> mh.add_user([ + ToolResultContent(tool_use_id: "real", content: "42", is_error: False), + ToolResultContent(tool_use_id: "ghost", content: "x", is_error: False), + ]) + case mh.to_list(h) { + [_, _, Message(role: User, content: blocks)] -> { + list.length(blocks) |> should.equal(1) + case blocks { + [ToolResultContent(tool_use_id: id, ..)] -> id |> should.equal("real") + _ -> { + should.fail() + Nil + } + } + } + _ -> { + should.fail() + Nil + } + } +} + +// ── from_list: ingest sanitisation handles every direction ──────────────── +// +// from_list is used at startup (load persisted history off disk) and +// elsewhere where untyped messages cross the boundary. It runs the +// full repair pipeline: drop leading assistant, coalesce, strip orphan +// tool_results, inject stubs for orphan tool_uses. + +pub fn from_list_strips_orphan_tool_result_at_ingest_test() { + // Direct reproduction of the cog-killing bug: a persisted history + // contains a user message with a tool_result whose matching + // tool_use was lost (e.g. context trimmed and never repaired). The + // ingest path must clean it before construction returns. + let raw = [ + Message(role: User, content: [TextContent("first")]), + Message(role: Assistant, content: [TextContent("hello")]), + Message(role: User, content: [ + ToolResultContent(tool_use_id: "ghost", content: "x", is_error: False), + ]), + ] + let h = mh.from_list(raw) + // The orphan-only user message gets emptied → dropped entirely. + // Coalescing then merges the leading user with... no following user + // (the assistant remains). + let after = mh.to_list(h) + list.any(after, fn(msg) { + case msg.role { + User -> + list.any(msg.content, fn(b) { + case b { + ToolResultContent(tool_use_id: "ghost", ..) -> True + _ -> False + } + }) + _ -> False + } + }) + |> should.be_false +} + +pub fn from_list_injects_stub_for_orphan_tool_use_test() { + // Opposite direction: an assistant emitted a tool_use but the + // matching user with tool_result is missing. The ingest pipeline + // synthesises a stub so the next API call doesn't 400. + let raw = [ + Message(role: User, content: [TextContent("call calc")]), + Message(role: Assistant, content: [ + ToolUseContent(id: "abandoned", name: "calc", input_json: "{}"), + ]), + ] + let h = mh.from_list(raw) + let after = mh.to_list(h) + // Should now have 3 messages: user, assistant, synthetic-user-with-stub. + list.length(after) |> should.equal(3) + case list.last(after) { + Ok(Message(role: User, content: blocks)) -> + list.any(blocks, fn(b) { + case b { + ToolResultContent(tool_use_id: "abandoned", is_error: True, ..) -> + True + _ -> False + } + }) + |> should.be_true + _ -> { + should.fail() + Nil + } + } +} + +// ── for_send is wire-ready, to_list is identical (today) ────────────────── + +pub fn for_send_equals_to_list_test() { + let h = + mh.new() + |> mh.add_user_text("hi") + |> mh.add_assistant([TextContent("yes")]) + mh.for_send(h) |> should.equal(mh.to_list(h)) +} + +// ── Last + length helpers ───────────────────────────────────────────────── + +pub fn last_returns_most_recent_test() { + let h = + mh.new() + |> mh.add_user_text("first") + |> mh.add_assistant([TextContent("answer")]) + case mh.last(h) { + option.Some(Message(role: Assistant, ..)) -> Nil + _ -> { + should.fail() + Nil + } + } +} + +pub fn length_counts_messages_test() { + let h = + mh.new() + |> mh.add_user_text("a") + |> mh.add_assistant([TextContent("b")]) + |> mh.add_user_text("c") + mh.length(h) |> should.equal(3) +} + +import gleam/option diff --git a/test/llm/message_repair_test.gleam b/test/llm/message_repair_test.gleam deleted file mode 100644 index f0b91e6..0000000 --- a/test/llm/message_repair_test.gleam +++ /dev/null @@ -1,140 +0,0 @@ -//// Message-history repair: detecting and fixing orphaned tool_use -//// ids so the Anthropic API doesn't 400 on well-formed histories. - -// Copyright (C) 2026 Seamus Brady -// -// This program is free software: you can redistribute it and/or modify -// it under the terms of the GNU Affero General Public License as published -// by the Free Software Foundation, either version 3 of the License, or -// (at your option) any later version. - -import gleam/list -import gleeunit/should -import llm/message_repair -import llm/types.{ - Assistant, Message, TextContent, ToolResultContent, ToolUseContent, User, -} - -// --------------------------------------------------------------------------- -// Helpers -// --------------------------------------------------------------------------- - -fn tool_use(id: String) -> types.ContentBlock { - ToolUseContent(id: id, name: "dummy", input_json: "{}") -} - -fn tool_result(id: String) -> types.ContentBlock { - ToolResultContent(tool_use_id: id, content: "ok", is_error: False) -} - -fn orphan_stub_matches(blocks: List(types.ContentBlock), id: String) -> Bool { - list.any(blocks, fn(b) { - case b { - ToolResultContent(tool_use_id: tid, is_error: True, ..) -> tid == id - _ -> False - } - }) -} - -// --------------------------------------------------------------------------- -// find_orphans -// --------------------------------------------------------------------------- - -pub fn empty_history_has_no_orphans_test() { - message_repair.find_orphans([]) |> should.equal([]) -} - -pub fn well_formed_history_has_no_orphans_test() { - let msgs = [ - Message(role: User, content: [TextContent(text: "hi")]), - Message(role: Assistant, content: [tool_use("t1")]), - Message(role: User, content: [tool_result("t1")]), - ] - message_repair.find_orphans(msgs) |> should.equal([]) -} - -pub fn orphan_assistant_followed_by_user_without_result_test() { - let msgs = [ - Message(role: Assistant, content: [tool_use("t1"), tool_use("t2")]), - Message(role: User, content: [tool_result("t1")]), - ] - // t1 has a matching result, t2 doesn't. - message_repair.find_orphans(msgs) |> should.equal(["t2"]) -} - -pub fn orphan_assistant_with_no_following_user_test() { - let msgs = [Message(role: Assistant, content: [tool_use("t1")])] - message_repair.find_orphans(msgs) |> should.equal(["t1"]) -} - -pub fn multiple_orphans_across_history_test() { - let msgs = [ - Message(role: Assistant, content: [tool_use("a1")]), - Message(role: User, content: [TextContent(text: "no result block")]), - Message(role: Assistant, content: [tool_use("b1"), tool_use("b2")]), - Message(role: User, content: [tool_result("b1")]), - ] - // a1 orphaned (no matching result in next user msg), b2 orphaned. - message_repair.find_orphans(msgs) |> should.equal(["a1", "b2"]) -} - -// --------------------------------------------------------------------------- -// repair — leaves well-formed history untouched -// --------------------------------------------------------------------------- - -pub fn repair_is_identity_when_no_orphans_test() { - let msgs = [ - Message(role: User, content: [TextContent(text: "hi")]), - Message(role: Assistant, content: [tool_use("t1")]), - Message(role: User, content: [tool_result("t1")]), - Message(role: Assistant, content: [TextContent(text: "done")]), - ] - message_repair.repair(msgs) |> should.equal(msgs) -} - -// --------------------------------------------------------------------------- -// repair — injects stubs -// --------------------------------------------------------------------------- - -pub fn repair_prepends_stub_to_following_user_test() { - let msgs = [ - Message(role: Assistant, content: [tool_use("t1"), tool_use("t2")]), - Message(role: User, content: [tool_result("t1")]), - ] - let repaired = message_repair.repair(msgs) - // The user message after the orphan assistant must now contain a - // tool_result for t2 (as the new stub) plus the original t1 result. - let assert [_, Message(role: User, content: user_content)] = repaired - orphan_stub_matches(user_content, "t2") |> should.equal(True) - // Original t1 result is preserved. - list.any(user_content, fn(b) { - case b { - ToolResultContent(tool_use_id: "t1", is_error: False, ..) -> True - _ -> False - } - }) - |> should.equal(True) -} - -pub fn repair_inserts_user_when_assistant_has_no_follower_test() { - let msgs = [Message(role: Assistant, content: [tool_use("t1")])] - let repaired = message_repair.repair(msgs) - // A user message must now follow the assistant with a stub for t1. - case repaired { - [Message(role: Assistant, ..), Message(role: User, content: uc)] -> { - orphan_stub_matches(uc, "t1") |> should.equal(True) - } - _ -> should.fail() - } -} - -pub fn repair_is_idempotent_test() { - let msgs = [ - Message(role: Assistant, content: [tool_use("t1"), tool_use("t2")]), - Message(role: User, content: [tool_result("t1")]), - ] - let once = message_repair.repair(msgs) - let twice = message_repair.repair(once) - once |> should.equal(twice) - message_repair.find_orphans(once) |> should.equal([]) -}