From eedb868d04b2f8ff014bc2275bd54be791d7c664 Mon Sep 17 00:00:00 2001 From: Jessica Alder Date: Tue, 28 Apr 2026 17:20:33 -0700 Subject: [PATCH] google: default mimeType when emitting fileData for HTTPS image URLs --- crates/lingua/src/providers/google/convert.rs | 77 +++++++++- crates/lingua/src/util/media.rs | 106 +++++++++++++ payloads/cases/advanced.ts | 52 +++++++ .../__snapshots__/transforms.test.ts.snap | 145 ++++++++++++++++++ 4 files changed, 378 insertions(+), 2 deletions(-) diff --git a/crates/lingua/src/providers/google/convert.rs b/crates/lingua/src/providers/google/convert.rs index 6dc928e6..39cfe712 100644 --- a/crates/lingua/src/providers/google/convert.rs +++ b/crates/lingua/src/providers/google/convert.rs @@ -30,7 +30,7 @@ use crate::universal::request::{ }; use crate::universal::response::{FinishReason, UniversalUsage}; use crate::universal::tools::{BuiltinToolProvider, UniversalTool, UniversalToolType}; -use crate::util::media::parse_base64_data_url; +use crate::util::media::{mime_type_from_url, parse_base64_data_url}; /// Prefix for synthetic tool call IDs generated when Google omits them. const SYNTHETIC_CALL_ID_PREFIX: &str = "call_"; @@ -393,10 +393,16 @@ impl TryFromLLM for GoogleContent { } else if data.starts_with("http://") || data.starts_with("https://") { + // Vertex AI rejects file_data with empty mimeType; fall + // back to URL inference (extension or S3 presigned + // content-type), then DEFAULT_MIME_TYPE. + let mime_type = media_type + .or_else(|| mime_type_from_url(&data)) + .unwrap_or_else(|| DEFAULT_MIME_TYPE.to_string()); converted.push(GooglePart { file_data: Some(GoogleFileData { file_uri: Some(data), - mime_type: media_type, + mime_type: Some(mime_type), }), ..Default::default() }); @@ -1193,6 +1199,73 @@ mod tests { assert_eq!(parts[0].text.as_deref(), Some("Hello")); } + fn image_url_message(url: &str, media_type: Option) -> Message { + Message::User { + content: UserContent::Array(vec![UserContentPart::Image { + image: Value::String(url.to_string()), + media_type, + provider_options: None, + }]), + } + } + + #[test] + fn test_image_url_with_no_media_type() { + // Vertex AI rejects file_data with empty mimeType. When an OpenAI + // chat-completions image_url with an HTTPS URL flows in, media_type + // arrives as None. The converter must infer or default it. + let message = image_url_message("https://example.com/photo.jpg", None); + let content = >::try_from(message).unwrap(); + let parts = content.parts.unwrap(); + assert_eq!(parts.len(), 1); + let fd = parts[0].file_data.as_ref().expect("expected file_data"); + assert_eq!( + fd.file_uri.as_deref(), + Some("https://example.com/photo.jpg") + ); + assert_eq!(fd.mime_type.as_deref(), Some("image/jpeg")); + } + + #[test] + fn test_image_url_with_caller_supplied_media_type_wins() { + // Caller-supplied media_type takes precedence over URL inference. + let message = image_url_message( + "https://example.com/photo.jpg", + Some("image/png".to_string()), + ); + let content = >::try_from(message).unwrap(); + let fd = content.parts.unwrap()[0] + .file_data + .as_ref() + .cloned() + .expect("expected file_data"); + assert_eq!(fd.mime_type.as_deref(), Some("image/png")); + } + + #[test] + fn test_image_url_no_extension_falls_back_to_default() { + let message = image_url_message("https://example.com/no-extension", None); + let content = >::try_from(message).unwrap(); + let fd = content.parts.unwrap()[0] + .file_data + .as_ref() + .cloned() + .expect("expected file_data"); + assert_eq!(fd.mime_type.as_deref(), Some(DEFAULT_MIME_TYPE)); + } + + #[test] + fn test_image_data_url_unaffected() { + // Regression guard: data URL still emits inline_data with parsed mime. + let message = image_url_message("data:image/png;base64,iVBORw0KGgo=", None); + let content = >::try_from(message).unwrap(); + let part = &content.parts.unwrap()[0]; + assert!(part.file_data.is_none()); + let blob = part.inline_data.as_ref().expect("expected inline_data"); + assert_eq!(blob.mime_type.as_deref(), Some("image/png")); + assert_eq!(blob.data.as_deref(), Some("iVBORw0KGgo=")); + } + #[test] fn test_message_to_google_content_assistant() { let message = Message::Assistant { diff --git a/crates/lingua/src/util/media.rs b/crates/lingua/src/util/media.rs index 3fd371fd..85be4b36 100644 --- a/crates/lingua/src/util/media.rs +++ b/crates/lingua/src/util/media.rs @@ -420,6 +420,54 @@ pub fn is_localhost_url(url: &str) -> bool { url.starts_with("http://127.0.0.1") || url.starts_with("http://localhost") } +/// Best-effort MIME type lookup for a URL, without making any network calls. +/// +/// Resolution order: +/// 1. `response-content-type` query parameter on S3 presigned URLs. +/// 2. Filename extension lookup against the Vertex/Gemini-supported types. +/// +/// Returns `None` when neither source produces a value. Callers should apply +/// their own default (e.g. [`crate::universal::defaults::DEFAULT_MIME_TYPE`]). +pub fn mime_type_from_url(url: &str) -> Option { + let metadata = parse_file_metadata_from_url(url)?; + if let Some(content_type) = metadata.content_type { + return Some(content_type); + } + let extension = metadata.filename.rsplit_once('.')?.1.to_ascii_lowercase(); + mime_type_from_extension(&extension).map(str::to_string) +} + +fn mime_type_from_extension(extension: &str) -> Option<&'static str> { + Some(match extension { + "png" => "image/png", + "jpg" | "jpeg" => "image/jpeg", + "gif" => "image/gif", + "webp" => "image/webp", + "heic" => "image/heic", + "heif" => "image/heif", + "pdf" => "application/pdf", + "mp3" => "audio/mpeg", + "wav" => "audio/wav", + "ogg" => "audio/ogg", + "flac" => "audio/flac", + "aac" => "audio/aac", + "m4a" => "audio/mp4", + "mp4" => "video/mp4", + "mov" => "video/quicktime", + "avi" => "video/x-msvideo", + "webm" => "video/webm", + "mpeg" | "mpg" => "video/mpeg", + "txt" => "text/plain", + "html" | "htm" => "text/html", + "css" => "text/css", + "csv" => "text/csv", + "md" => "text/markdown", + "json" => "application/json", + "xml" => "application/xml", + _ => return None, + }) +} + #[cfg(test)] mod tests { use super::*; @@ -490,4 +538,62 @@ mod tests { assert!(parse_file_metadata_from_url("ftp://example.com/file").is_none()); assert!(parse_file_metadata_from_url("https://example.com/").is_none()); } + + #[test] + fn mime_type_from_url_extension_jpg() { + assert_eq!( + mime_type_from_url("https://example.com/photo.jpg").as_deref(), + Some("image/jpeg"), + ); + assert_eq!( + mime_type_from_url("https://example.com/photo.jpeg").as_deref(), + Some("image/jpeg"), + ); + } + + #[test] + fn mime_type_from_url_extension_uppercase() { + assert_eq!( + mime_type_from_url("https://example.com/PHOTO.JPG").as_deref(), + Some("image/jpeg"), + ); + } + + #[test] + fn mime_type_from_url_extension_pdf() { + assert_eq!( + mime_type_from_url("https://example.com/doc.pdf").as_deref(), + Some("application/pdf"), + ); + } + + #[test] + fn mime_type_from_url_s3_presigned_overrides_extension() { + // Path says .jpg but the presigned response-content-type says image/png — presigned wins. + let url = "https://bucket.s3.amazonaws.com/file.jpg?X-Amz-Expires=60&response-content-type=image%2Fpng"; + assert_eq!(mime_type_from_url(url).as_deref(), Some("image/png")); + } + + #[test] + fn mime_type_from_url_no_extension() { + assert_eq!(mime_type_from_url("https://example.com/file"), None); + } + + #[test] + fn mime_type_from_url_unknown_extension() { + assert_eq!(mime_type_from_url("https://example.com/file.xyz"), None); + } + + #[test] + fn mime_type_from_url_data_url_returns_none() { + // Data URLs aren't fetched filenames; callers should use parse_base64_data_url. + assert_eq!(mime_type_from_url("data:image/png;base64,iVBORw=="), None); + } + + #[test] + fn mime_type_from_url_invalid() { + assert_eq!(mime_type_from_url(""), None); + assert_eq!(mime_type_from_url("not a url"), None); + assert_eq!(mime_type_from_url("ftp://example.com/file.jpg"), None); + } } diff --git a/payloads/cases/advanced.ts b/payloads/cases/advanced.ts index d7f251d3..10acd709 100644 --- a/payloads/cases/advanced.ts +++ b/payloads/cases/advanced.ts @@ -125,6 +125,58 @@ export const advancedCases: TestCaseCollection = { }, }, + // Pins converter behavior for an HTTPS image URL with no caller-supplied + // media type. Vertex AI v1 rejects file_data with empty mimeType, so the + // Google adapter must infer or default it. The existing multimodalRequest + // case asserts inlineData (a different code path) and does not exercise + // this converter branch. + multimodalRequestUrlImage: { + "chat-completions": { + model: OPENAI_CHAT_COMPLETIONS_MODEL, + messages: [ + { + role: "user", + content: [ + { + type: "text", + text: "What do you see in this image?", + }, + { + type: "image_url", + image_url: { + url: "https://t3.ftcdn.net/jpg/02/36/99/22/360_F_236992283_sNOxCVQeFLd5pdqaKGh8DRGMZy7P4XKm.jpg", + }, + }, + ], + }, + ], + max_completion_tokens: 300, + }, + responses: null, + anthropic: null, + google: { + contents: [ + { + role: "user", + parts: [ + { text: "What do you see in this image?" }, + { + fileData: { + fileUri: + "https://t3.ftcdn.net/jpg/02/36/99/22/360_F_236992283_sNOxCVQeFLd5pdqaKGh8DRGMZy7P4XKm.jpg", + mimeType: "image/jpeg", + }, + }, + ], + }, + ], + generationConfig: { + maxOutputTokens: 300, + }, + }, + bedrock: null, + }, + complexReasoningRequest: { responses: { model: OPENAI_RESPONSES_MODEL, diff --git a/payloads/scripts/transforms/__snapshots__/transforms.test.ts.snap b/payloads/scripts/transforms/__snapshots__/transforms.test.ts.snap index 69561f30..0bc12fd2 100644 --- a/payloads/scripts/transforms/__snapshots__/transforms.test.ts.snap +++ b/payloads/scripts/transforms/__snapshots__/transforms.test.ts.snap @@ -7115,6 +7115,31 @@ exports[`chat-completions → anthropic > multimodalRequest > response 1`] = ` } `; +exports[`chat-completions → anthropic > multimodalRequestUrlImage > request 1`] = ` +{ + "max_tokens": 300, + "messages": [ + { + "content": [ + { + "text": "What do you see in this image?", + "type": "text", + }, + { + "source": { + "type": "url", + "url": "https://t3.ftcdn.net/jpg/02/36/99/22/360_F_236992283_sNOxCVQeFLd5pdqaKGh8DRGMZy7P4XKm.jpg", + }, + "type": "image", + }, + ], + "role": "user", + }, + ], + "model": "claude-sonnet-4-5-20250929", +} +`; + exports[`chat-completions → anthropic > nMultipleCompletionsParam > request 1`] = ` { "max_tokens": 4096, @@ -9456,6 +9481,7 @@ exports[`chat-completions → google > multimodalRequest > request 1`] = ` { "fileData": { "fileUri": "https://t3.ftcdn.net/jpg/02/36/99/22/360_F_236992283_sNOxCVQeFLd5pdqaKGh8DRGMZy7P4XKm.jpg", + "mimeType": "image/jpeg", }, }, ], @@ -9498,6 +9524,32 @@ exports[`chat-completions → google > multimodalRequest > response 1`] = ` } `; +exports[`chat-completions → google > multimodalRequestUrlImage > request 1`] = ` +{ + "contents": [ + { + "parts": [ + { + "text": "What do you see in this image?", + }, + { + "fileData": { + "fileUri": "https://t3.ftcdn.net/jpg/02/36/99/22/360_F_236992283_sNOxCVQeFLd5pdqaKGh8DRGMZy7P4XKm.jpg", + "mimeType": "image/jpeg", + }, + }, + ], + "role": "user", + }, + ], + "generationConfig": { + "maxOutputTokens": 300, + "responseSchema": null, + }, + "model": "gemini-2.5-flash", +} +`; + exports[`chat-completions → google > nMultipleCompletionsParam > request 1`] = ` { "contents": [ @@ -12158,6 +12210,28 @@ exports[`chat-completions → responses > multimodalRequest > response 1`] = ` } `; +exports[`chat-completions → responses > multimodalRequestUrlImage > request 1`] = ` +{ + "input": [ + { + "content": [ + { + "text": "What do you see in this image?", + "type": "input_text", + }, + { + "image_url": "https://t3.ftcdn.net/jpg/02/36/99/22/360_F_236992283_sNOxCVQeFLd5pdqaKGh8DRGMZy7P4XKm.jpg", + "type": "input_image", + }, + ], + "role": "user", + }, + ], + "max_output_tokens": 300, + "model": "gpt-5-nano", +} +`; + exports[`chat-completions → responses > nMultipleCompletionsParam > request 1`] = ` { "input": [ @@ -14613,6 +14687,31 @@ The spacing between contour lines indicates the steepness of the terrain - close } `; +exports[`google → anthropic > multimodalRequestUrlImage > request 1`] = ` +{ + "max_tokens": 300, + "messages": [ + { + "content": [ + { + "text": "What do you see in this image?", + "type": "text", + }, + { + "source": { + "type": "url", + "url": "https://t3.ftcdn.net/jpg/02/36/99/22/360_F_236992283_sNOxCVQeFLd5pdqaKGh8DRGMZy7P4XKm.jpg", + }, + "type": "image", + }, + ], + "role": "user", + }, + ], + "model": "claude-sonnet-4-5-20250929", +} +`; + exports[`google → anthropic > parallelToolCallsRequest > request 1`] = ` { "max_tokens": 4096, @@ -16861,6 +16960,30 @@ exports[`google → chat-completions > multimodalRequest > response 1`] = ` } `; +exports[`google → chat-completions > multimodalRequestUrlImage > request 1`] = ` +{ + "max_completion_tokens": 300, + "messages": [ + { + "content": [ + { + "text": "What do you see in this image?", + "type": "text", + }, + { + "image_url": { + "url": "https://t3.ftcdn.net/jpg/02/36/99/22/360_F_236992283_sNOxCVQeFLd5pdqaKGh8DRGMZy7P4XKm.jpg", + }, + "type": "image_url", + }, + ], + "role": "user", + }, + ], + "model": "gpt-5-nano", +} +`; + exports[`google → chat-completions > parallelToolCallsRequest > request 1`] = ` { "messages": [ @@ -19033,6 +19156,28 @@ exports[`google → responses > multimodalRequest > response 1`] = ` } `; +exports[`google → responses > multimodalRequestUrlImage > request 1`] = ` +{ + "input": [ + { + "content": [ + { + "text": "What do you see in this image?", + "type": "input_text", + }, + { + "image_url": "https://t3.ftcdn.net/jpg/02/36/99/22/360_F_236992283_sNOxCVQeFLd5pdqaKGh8DRGMZy7P4XKm.jpg", + "type": "input_image", + }, + ], + "role": "user", + }, + ], + "max_output_tokens": 300, + "model": "gpt-5-nano", +} +`; + exports[`google → responses > parallelToolCallsRequest > request 1`] = ` { "input": [