diff --git a/src-tauri/src/ai/client.rs b/src-tauri/src/ai/client.rs index b35806d..1d1091f 100644 --- a/src-tauri/src/ai/client.rs +++ b/src-tauri/src/ai/client.rs @@ -557,9 +557,19 @@ pub async fn run_acp_prompt( prompt: &str, ) -> Result { // No streaming, no events emitted — internal_session_id is unused - let result = - run_acp_prompt_internal(agent, working_dir, prompt, None, None, "", true, None, None) - .await?; + let result = run_acp_prompt_internal( + agent, + working_dir, + prompt, + None, + None, + None, + "", + true, + None, + None, + ) + .await?; Ok(result.response) } @@ -578,6 +588,7 @@ pub async fn run_acp_prompt_raw( prompt, None, None, + None, "", false, None, @@ -603,6 +614,7 @@ pub async fn run_acp_prompt_with_session( agent, working_dir, prompt, + None, session_id, None, "", @@ -638,6 +650,38 @@ pub async fn run_acp_prompt_streaming( agent, working_dir, prompt, + None, // No images + acp_session_id, + Some(app_handle), + internal_session_id, + true, + buffer_callback, + cancellation, + ) + .await +} + +/// Run a prompt with images through ACP with streaming events emitted to frontend +/// +/// Same as `run_acp_prompt_streaming` but accepts optional image attachments. +/// Images are sent as ContentBlock::Image in the prompt request. +#[allow(clippy::too_many_arguments)] +pub async fn run_acp_prompt_streaming_with_images( + agent: &AcpAgent, + working_dir: &Path, + prompt: &str, + images: Option<&[crate::ImageAttachment]>, + acp_session_id: Option<&str>, + internal_session_id: &str, + app_handle: tauri::AppHandle, + buffer_callback: Option) + Send + Sync>>, + cancellation: Option>, +) -> Result { + run_acp_prompt_internal( + agent, + working_dir, + prompt, + images, acp_session_id, Some(app_handle), internal_session_id, @@ -654,6 +698,7 @@ async fn run_acp_prompt_internal( agent: &AcpAgent, working_dir: &Path, prompt: &str, + images: Option<&[crate::ImageAttachment]>, acp_session_id: Option<&str>, app_handle: Option, internal_session_id: &str, @@ -666,6 +711,7 @@ async fn run_acp_prompt_internal( let agent_args: Vec = agent.acp_args().iter().map(|s| s.to_string()).collect(); let working_dir = working_dir.to_path_buf(); let prompt = prompt.to_string(); + let images_owned: Option> = images.map(|imgs| imgs.to_vec()); let acp_session_id = acp_session_id.map(|s| s.to_string()); let internal_session_id = internal_session_id.to_string(); @@ -687,6 +733,7 @@ async fn run_acp_prompt_internal( &agent_args, &working_dir, &prompt, + images_owned.as_deref(), acp_session_id.as_deref(), app_handle, &internal_session_id, @@ -709,6 +756,7 @@ async fn run_acp_session_inner( agent_args: &[String], working_dir: &Path, prompt: &str, + images: Option<&[crate::ImageAttachment]>, existing_session_id: Option<&str>, app_handle: Option, internal_session_id: &str, @@ -845,11 +893,20 @@ async fn run_acp_session_inner( prompt.to_string() }; - // Send the prompt - let prompt_request = PromptRequest::new( - session_id.clone(), - vec![AcpContentBlock::Text(TextContent::new(full_prompt))], - ); + // Build content blocks: text prompt + optional images + let mut content_blocks = vec![AcpContentBlock::Text(TextContent::new(full_prompt))]; + + // Add image blocks if provided + if let Some(imgs) = images { + for img in imgs { + content_blocks.push(AcpContentBlock::Image( + agent_client_protocol::ImageContent::new(img.data.clone(), img.mime_type.clone()), + )); + } + } + + // Send the prompt with content blocks + let prompt_request = PromptRequest::new(session_id.clone(), content_blocks); let prompt_result = connection.prompt(prompt_request).await; diff --git a/src-tauri/src/ai/mod.rs b/src-tauri/src/ai/mod.rs index 6c188d1..6d58f16 100644 --- a/src-tauri/src/ai/mod.rs +++ b/src-tauri/src/ai/mod.rs @@ -26,8 +26,8 @@ pub mod session; // Re-export core ACP client functionality pub use client::{ discover_acp_providers, find_acp_agent, find_acp_agent_by_id, run_acp_prompt, - run_acp_prompt_raw, run_acp_prompt_streaming, run_acp_prompt_with_session, AcpAgent, - AcpPromptResult, AcpProviderInfo, + run_acp_prompt_raw, run_acp_prompt_streaming, run_acp_prompt_streaming_with_images, + run_acp_prompt_with_session, AcpAgent, AcpPromptResult, AcpProviderInfo, }; // Re-export session manager types diff --git a/src-tauri/src/ai/session.rs b/src-tauri/src/ai/session.rs index 5c85114..297977a 100644 --- a/src-tauri/src/ai/session.rs +++ b/src-tauri/src/ai/session.rs @@ -293,7 +293,12 @@ impl SessionManager { } /// Send a prompt to a session - pub async fn send_prompt(&self, session_id: &str, prompt: String) -> Result<(), String> { + pub async fn send_prompt( + &self, + session_id: &str, + prompt: String, + images: Option>, + ) -> Result<(), String> { // Get or create live session let session_arc = self.get_or_create_live_session(session_id).await?; @@ -347,17 +352,32 @@ impl SessionManager { tokio::spawn(async move { // Run the ACP prompt with streaming - let result = client::run_acp_prompt_streaming( - &agent, - &working_dir, - &prompt, - acp_session_id.as_deref(), - &session_id_owned, - app_handle.clone(), - Some(buffer_callback), - Some(cancellation.clone()), - ) - .await; + let result = if let Some(ref imgs) = images { + client::run_acp_prompt_streaming_with_images( + &agent, + &working_dir, + &prompt, + Some(imgs.as_slice()), + acp_session_id.as_deref(), + &session_id_owned, + app_handle.clone(), + Some(buffer_callback), + Some(cancellation.clone()), + ) + .await + } else { + client::run_acp_prompt_streaming( + &agent, + &working_dir, + &prompt, + acp_session_id.as_deref(), + &session_id_owned, + app_handle.clone(), + Some(buffer_callback), + Some(cancellation.clone()), + ) + .await + }; // Update session and persist based on result let mut session = session_arc_clone.write().await; diff --git a/src-tauri/src/lib.rs b/src-tauri/src/lib.rs index 6d304ad..6762e86 100644 --- a/src-tauri/src/lib.rs +++ b/src-tauri/src/lib.rs @@ -841,6 +841,73 @@ async fn analyze_diff( ai::analysis::analyze_diff(&path, &spec, provider.as_deref()).await } +/// Maximum size for base64-encoded image data (10MB) +const MAX_IMAGE_SIZE: usize = 10 * 1024 * 1024; + +/// Maximum number of images per request +const MAX_IMAGE_COUNT: usize = 5; + +/// Allowed MIME types for image attachments +const ALLOWED_MIME_TYPES: &[&str] = &[ + "image/png", + "image/jpeg", + "image/jpg", + "image/gif", + "image/webp", +]; + +/// An image attachment for AI prompts +#[derive(serde::Deserialize, serde::Serialize, Debug, Clone)] +#[serde(rename_all = "camelCase")] +pub struct ImageAttachment { + /// Base64-encoded image data + pub data: String, + /// MIME type (e.g., "image/png", "image/jpeg") + pub mime_type: String, +} + +impl ImageAttachment { + /// Validates the image attachment for size and format + pub fn validate(&self) -> Result<(), String> { + if self.data.len() > MAX_IMAGE_SIZE { + return Err(format!( + "Image too large: {} bytes (max {} bytes)", + self.data.len(), + MAX_IMAGE_SIZE + )); + } + + if !ALLOWED_MIME_TYPES.contains(&self.mime_type.as_str()) { + return Err(format!( + "Unsupported image format: {}. Allowed formats: {}", + self.mime_type, + ALLOWED_MIME_TYPES.join(", ") + )); + } + + Ok(()) + } +} + +/// Validates a collection of image attachments +fn validate_images(images: &Option>) -> Result<(), String> { + if let Some(imgs) = images { + if imgs.len() > MAX_IMAGE_COUNT { + return Err(format!( + "Too many images: {} (max {})", + imgs.len(), + MAX_IMAGE_COUNT + )); + } + + for (i, img) in imgs.iter().enumerate() { + img.validate() + .map_err(|e| format!("Image {}: {}", i + 1, e))?; + } + } + Ok(()) +} + /// Response from send_agent_prompt including session ID for continuity. #[derive(serde::Serialize)] #[serde(rename_all = "camelCase")] @@ -894,6 +961,8 @@ async fn send_agent_prompt( /// - "session-complete": Finalized transcript when done /// - "session-error": Error information if the session fails /// +/// Supports optional image attachments for multimodal prompts. +/// /// Returns the same response as send_agent_prompt for compatibility. #[tauri::command(rename_all = "camelCase")] async fn send_agent_prompt_streaming( @@ -902,7 +971,10 @@ async fn send_agent_prompt_streaming( prompt: String, session_id: Option, provider: Option, + images: Option>, ) -> Result { + validate_images(&images)?; + let agent = if let Some(provider_id) = provider { ai::find_acp_agent_by_id(&provider_id).ok_or_else(|| { format!( @@ -919,10 +991,11 @@ async fn send_agent_prompt_streaming( // Legacy path: no internal session ID, use ACP session ID or "legacy" as fallback let internal_id = session_id.as_deref().unwrap_or("legacy"); - let result = ai::run_acp_prompt_streaming( + let result = ai::run_acp_prompt_streaming_with_images( &agent, &path, &prompt, + images.as_deref(), session_id.as_deref(), internal_id, app_handle, @@ -981,8 +1054,10 @@ async fn send_prompt( state: State<'_, Arc>, session_id: String, prompt: String, + images: Option>, ) -> Result<(), String> { - state.send_prompt(&session_id, prompt).await + validate_images(&images)?; + state.send_prompt(&session_id, prompt, images).await } /// Update session title. @@ -1927,7 +2002,10 @@ async fn start_branch_session( branch_id: String, user_prompt: String, agent_id: Option, + images: Option>, ) -> Result { + validate_images(&images)?; + // Get the branch to find the worktree path let branch = state .get_branch(&branch_id) @@ -1965,7 +2043,7 @@ async fn start_branch_session( // Send the full prompt (with context) to the AI if let Err(e) = session_manager - .send_prompt(&ai_session_id, full_prompt) + .send_prompt(&ai_session_id, full_prompt, images) .await { // Clean up on failure @@ -2321,7 +2399,10 @@ async fn restart_branch_session( session_manager: State<'_, Arc>, branch_session_id: String, full_prompt: String, + images: Option>, ) -> Result { + validate_images(&images)?; + // Get the old session to retrieve the branch ID and prompt let old_session = state .get_branch_session(&branch_session_id) @@ -2357,7 +2438,7 @@ async fn restart_branch_session( // Send the prompt to the AI if let Err(e) = session_manager - .send_prompt(&ai_session_id, full_prompt) + .send_prompt(&ai_session_id, full_prompt, images) .await { // Clean up on failure @@ -2544,7 +2625,10 @@ async fn start_branch_note( title: String, description: String, agent_id: Option, + images: Option>, ) -> Result { + validate_images(&images)?; + // Get the branch to find the worktree path let branch = state .get_branch(&branch_id) @@ -2585,7 +2669,7 @@ async fn start_branch_note( // Send the full prompt (with context) to the AI if let Err(e) = session_manager - .send_prompt(&ai_session_id, full_prompt) + .send_prompt(&ai_session_id, full_prompt, images) .await { // Clean up on failure diff --git a/src/lib/NewSessionModal.svelte b/src/lib/NewSessionModal.svelte index 8cc82dc..142e7c0 100644 --- a/src/lib/NewSessionModal.svelte +++ b/src/lib/NewSessionModal.svelte @@ -8,8 +8,16 @@ builds the full prompt with timeline context. -->