From bb1ce8320b9168874c4177d4ac7a57b77864d463 Mon Sep 17 00:00:00 2001 From: Max Novich Date: Wed, 4 Feb 2026 21:49:05 -0800 Subject: [PATCH 1/4] Add image attachment support to ACP communication - Add ImageAttachment type to Rust backend and TypeScript frontend - Update send_agent_prompt_streaming command to accept optional images parameter - Add run_acp_prompt_streaming_with_images function to AI client - Update session manager send_prompt to handle image attachments - Images are sent as base64-encoded content blocks in ACP prompts - Supports pasting and attaching images for multimodal agent interactions --- src-tauri/src/ai/client.rs | 59 +++++++++++++++++++++++++++++++++---- src-tauri/src/ai/mod.rs | 4 +-- src-tauri/src/ai/session.rs | 44 +++++++++++++++++++-------- src-tauri/src/lib.rs | 25 ++++++++++++---- src/lib/services/ai.ts | 9 ++++-- src/lib/types.ts | 8 +++++ 6 files changed, 122 insertions(+), 27 deletions(-) diff --git a/src-tauri/src/ai/client.rs b/src-tauri/src/ai/client.rs index b35806d..3bb0e03 100644 --- a/src-tauri/src/ai/client.rs +++ b/src-tauri/src/ai/client.rs @@ -558,7 +558,7 @@ pub async fn run_acp_prompt( ) -> Result { // No streaming, no events emitted — internal_session_id is unused let result = - run_acp_prompt_internal(agent, working_dir, prompt, None, None, "", true, None, None) + run_acp_prompt_internal(agent, working_dir, prompt, None, None, None, "", true, None, None) .await?; Ok(result.response) } @@ -578,6 +578,7 @@ pub async fn run_acp_prompt_raw( prompt, None, None, + None, "", false, None, @@ -603,6 +604,7 @@ pub async fn run_acp_prompt_with_session( agent, working_dir, prompt, + None, session_id, None, "", @@ -638,6 +640,38 @@ pub async fn run_acp_prompt_streaming( agent, working_dir, prompt, + None, // No images + acp_session_id, + Some(app_handle), + internal_session_id, + true, + buffer_callback, + cancellation, + ) + .await +} + +/// Run a prompt with images through ACP with streaming events emitted to frontend +/// +/// Same as `run_acp_prompt_streaming` but accepts optional image attachments. +/// Images are sent as ContentBlock::Image in the prompt request. +#[allow(clippy::too_many_arguments)] +pub async fn run_acp_prompt_streaming_with_images( + agent: &AcpAgent, + working_dir: &Path, + prompt: &str, + images: Option<&[crate::ImageAttachment]>, + acp_session_id: Option<&str>, + internal_session_id: &str, + app_handle: tauri::AppHandle, + buffer_callback: Option) + Send + Sync>>, + cancellation: Option>, +) -> Result { + run_acp_prompt_internal( + agent, + working_dir, + prompt, + images, acp_session_id, Some(app_handle), internal_session_id, @@ -654,6 +688,7 @@ async fn run_acp_prompt_internal( agent: &AcpAgent, working_dir: &Path, prompt: &str, + images: Option<&[crate::ImageAttachment]>, acp_session_id: Option<&str>, app_handle: Option, internal_session_id: &str, @@ -666,6 +701,7 @@ async fn run_acp_prompt_internal( let agent_args: Vec = agent.acp_args().iter().map(|s| s.to_string()).collect(); let working_dir = working_dir.to_path_buf(); let prompt = prompt.to_string(); + let images_owned: Option> = images.map(|imgs| imgs.to_vec()); let acp_session_id = acp_session_id.map(|s| s.to_string()); let internal_session_id = internal_session_id.to_string(); @@ -687,6 +723,7 @@ async fn run_acp_prompt_internal( &agent_args, &working_dir, &prompt, + images_owned.as_deref(), acp_session_id.as_deref(), app_handle, &internal_session_id, @@ -709,6 +746,7 @@ async fn run_acp_session_inner( agent_args: &[String], working_dir: &Path, prompt: &str, + images: Option<&[crate::ImageAttachment]>, existing_session_id: Option<&str>, app_handle: Option, internal_session_id: &str, @@ -845,11 +883,20 @@ async fn run_acp_session_inner( prompt.to_string() }; - // Send the prompt - let prompt_request = PromptRequest::new( - session_id.clone(), - vec![AcpContentBlock::Text(TextContent::new(full_prompt))], - ); + // Build content blocks: text prompt + optional images + let mut content_blocks = vec![AcpContentBlock::Text(TextContent::new(full_prompt))]; + + // Add image blocks if provided + if let Some(imgs) = images { + for img in imgs { + content_blocks.push(AcpContentBlock::Image( + agent_client_protocol::ImageContent::new(img.data.clone(), img.mime_type.clone()), + )); + } + } + + // Send the prompt with content blocks + let prompt_request = PromptRequest::new(session_id.clone(), content_blocks); let prompt_result = connection.prompt(prompt_request).await; diff --git a/src-tauri/src/ai/mod.rs b/src-tauri/src/ai/mod.rs index 6c188d1..6d58f16 100644 --- a/src-tauri/src/ai/mod.rs +++ b/src-tauri/src/ai/mod.rs @@ -26,8 +26,8 @@ pub mod session; // Re-export core ACP client functionality pub use client::{ discover_acp_providers, find_acp_agent, find_acp_agent_by_id, run_acp_prompt, - run_acp_prompt_raw, run_acp_prompt_streaming, run_acp_prompt_with_session, AcpAgent, - AcpPromptResult, AcpProviderInfo, + run_acp_prompt_raw, run_acp_prompt_streaming, run_acp_prompt_streaming_with_images, + run_acp_prompt_with_session, AcpAgent, AcpPromptResult, AcpProviderInfo, }; // Re-export session manager types diff --git a/src-tauri/src/ai/session.rs b/src-tauri/src/ai/session.rs index 5c85114..297977a 100644 --- a/src-tauri/src/ai/session.rs +++ b/src-tauri/src/ai/session.rs @@ -293,7 +293,12 @@ impl SessionManager { } /// Send a prompt to a session - pub async fn send_prompt(&self, session_id: &str, prompt: String) -> Result<(), String> { + pub async fn send_prompt( + &self, + session_id: &str, + prompt: String, + images: Option>, + ) -> Result<(), String> { // Get or create live session let session_arc = self.get_or_create_live_session(session_id).await?; @@ -347,17 +352,32 @@ impl SessionManager { tokio::spawn(async move { // Run the ACP prompt with streaming - let result = client::run_acp_prompt_streaming( - &agent, - &working_dir, - &prompt, - acp_session_id.as_deref(), - &session_id_owned, - app_handle.clone(), - Some(buffer_callback), - Some(cancellation.clone()), - ) - .await; + let result = if let Some(ref imgs) = images { + client::run_acp_prompt_streaming_with_images( + &agent, + &working_dir, + &prompt, + Some(imgs.as_slice()), + acp_session_id.as_deref(), + &session_id_owned, + app_handle.clone(), + Some(buffer_callback), + Some(cancellation.clone()), + ) + .await + } else { + client::run_acp_prompt_streaming( + &agent, + &working_dir, + &prompt, + acp_session_id.as_deref(), + &session_id_owned, + app_handle.clone(), + Some(buffer_callback), + Some(cancellation.clone()), + ) + .await + }; // Update session and persist based on result let mut session = session_arc_clone.write().await; diff --git a/src-tauri/src/lib.rs b/src-tauri/src/lib.rs index 6d304ad..86e9738 100644 --- a/src-tauri/src/lib.rs +++ b/src-tauri/src/lib.rs @@ -841,6 +841,16 @@ async fn analyze_diff( ai::analysis::analyze_diff(&path, &spec, provider.as_deref()).await } +/// An image attachment for AI prompts +#[derive(serde::Deserialize, serde::Serialize, Debug, Clone)] +#[serde(rename_all = "camelCase")] +pub struct ImageAttachment { + /// Base64-encoded image data + pub data: String, + /// MIME type (e.g., "image/png", "image/jpeg") + pub mime_type: String, +} + /// Response from send_agent_prompt including session ID for continuity. #[derive(serde::Serialize)] #[serde(rename_all = "camelCase")] @@ -894,6 +904,8 @@ async fn send_agent_prompt( /// - "session-complete": Finalized transcript when done /// - "session-error": Error information if the session fails /// +/// Supports optional image attachments for multimodal prompts. +/// /// Returns the same response as send_agent_prompt for compatibility. #[tauri::command(rename_all = "camelCase")] async fn send_agent_prompt_streaming( @@ -902,6 +914,7 @@ async fn send_agent_prompt_streaming( prompt: String, session_id: Option, provider: Option, + images: Option>, ) -> Result { let agent = if let Some(provider_id) = provider { ai::find_acp_agent_by_id(&provider_id).ok_or_else(|| { @@ -919,10 +932,11 @@ async fn send_agent_prompt_streaming( // Legacy path: no internal session ID, use ACP session ID or "legacy" as fallback let internal_id = session_id.as_deref().unwrap_or("legacy"); - let result = ai::run_acp_prompt_streaming( + let result = ai::run_acp_prompt_streaming_with_images( &agent, &path, &prompt, + images.as_deref(), session_id.as_deref(), internal_id, app_handle, @@ -981,8 +995,9 @@ async fn send_prompt( state: State<'_, Arc>, session_id: String, prompt: String, + images: Option>, ) -> Result<(), String> { - state.send_prompt(&session_id, prompt).await + state.send_prompt(&session_id, prompt, images).await } /// Update session title. @@ -1965,7 +1980,7 @@ async fn start_branch_session( // Send the full prompt (with context) to the AI if let Err(e) = session_manager - .send_prompt(&ai_session_id, full_prompt) + .send_prompt(&ai_session_id, full_prompt, None) .await { // Clean up on failure @@ -2357,7 +2372,7 @@ async fn restart_branch_session( // Send the prompt to the AI if let Err(e) = session_manager - .send_prompt(&ai_session_id, full_prompt) + .send_prompt(&ai_session_id, full_prompt, None) .await { // Clean up on failure @@ -2585,7 +2600,7 @@ async fn start_branch_note( // Send the full prompt (with context) to the AI if let Err(e) = session_manager - .send_prompt(&ai_session_id, full_prompt) + .send_prompt(&ai_session_id, full_prompt, None) .await { // Clean up on failure diff --git a/src/lib/services/ai.ts b/src/lib/services/ai.ts index da7759b..4c8b583 100644 --- a/src/lib/services/ai.ts +++ b/src/lib/services/ai.ts @@ -7,6 +7,7 @@ import type { DiffSpec, SmartDiffAnnotation, Comment, + ImageAttachment, } from '../types'; // ============================================================================= @@ -180,8 +181,12 @@ export async function getSessionStatus(sessionId: string): Promise { - return invoke('send_prompt', { sessionId, prompt }); +export async function sendPrompt( + sessionId: string, + prompt: string, + images?: ImageAttachment[] +): Promise { + return invoke('send_prompt', { sessionId, prompt, images: images ?? null }); } /** diff --git a/src/lib/types.ts b/src/lib/types.ts index f5b9faf..ea6482c 100644 --- a/src/lib/types.ts +++ b/src/lib/types.ts @@ -243,6 +243,14 @@ export interface NewEdit { // AI Analysis types // ============================================================================= +/** An image attachment for multimodal prompts */ +export interface ImageAttachment { + /** Base64-encoded image data */ + data: string; + /** MIME type (e.g., "image/png", "image/jpeg") */ + mime_type: string; +} + /** A span of lines for AI annotations (0-indexed, exclusive end) */ export interface LineSpan { start: number; From 5508c0c097a04c943d71531f924a7b4313f591c2 Mon Sep 17 00:00:00 2001 From: Max Novich Date: Thu, 5 Feb 2026 17:11:37 -0800 Subject: [PATCH 2/4] feat: add image attachment support to new agent forward UI Wire up image attachment support for the new agent forward UI (konami code enabled branch workflow). This completes the image support infrastructure by: - Add image attachment UI to NewSessionModal with file picker and preview - Update startBranchSession, restartBranchSession, and startBranchNote to accept and pass image attachments - Thread images parameter through Tauri commands to session_manager.send_prompt The ACP communication layer already supported images; this change enables the new agent forward UI to utilize that capability. Co-Authored-By: Claude Sonnet 4.5 --- src-tauri/src/lib.rs | 9 +- src/lib/NewSessionModal.svelte | 155 ++++++++++++++++++++++++++++++++- src/lib/services/branch.ts | 15 +++- 3 files changed, 170 insertions(+), 9 deletions(-) diff --git a/src-tauri/src/lib.rs b/src-tauri/src/lib.rs index 86e9738..4e04a70 100644 --- a/src-tauri/src/lib.rs +++ b/src-tauri/src/lib.rs @@ -1942,6 +1942,7 @@ async fn start_branch_session( branch_id: String, user_prompt: String, agent_id: Option, + images: Option>, ) -> Result { // Get the branch to find the worktree path let branch = state @@ -1980,7 +1981,7 @@ async fn start_branch_session( // Send the full prompt (with context) to the AI if let Err(e) = session_manager - .send_prompt(&ai_session_id, full_prompt, None) + .send_prompt(&ai_session_id, full_prompt, images) .await { // Clean up on failure @@ -2336,6 +2337,7 @@ async fn restart_branch_session( session_manager: State<'_, Arc>, branch_session_id: String, full_prompt: String, + images: Option>, ) -> Result { // Get the old session to retrieve the branch ID and prompt let old_session = state @@ -2372,7 +2374,7 @@ async fn restart_branch_session( // Send the prompt to the AI if let Err(e) = session_manager - .send_prompt(&ai_session_id, full_prompt, None) + .send_prompt(&ai_session_id, full_prompt, images) .await { // Clean up on failure @@ -2559,6 +2561,7 @@ async fn start_branch_note( title: String, description: String, agent_id: Option, + images: Option>, ) -> Result { // Get the branch to find the worktree path let branch = state @@ -2600,7 +2603,7 @@ async fn start_branch_note( // Send the full prompt (with context) to the AI if let Err(e) = session_manager - .send_prompt(&ai_session_id, full_prompt, None) + .send_prompt(&ai_session_id, full_prompt, images) .await { // Clean up on failure diff --git a/src/lib/NewSessionModal.svelte b/src/lib/NewSessionModal.svelte index 8cc82dc..ed6f6b7 100644 --- a/src/lib/NewSessionModal.svelte +++ b/src/lib/NewSessionModal.svelte @@ -8,8 +8,9 @@ builds the full prompt with timeline context. -->