Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
48 changes: 48 additions & 0 deletions crates/coverage-report/src/requests_expected_differences.json
Original file line number Diff line number Diff line change
Expand Up @@ -236,6 +236,54 @@
"skip": true,
"reason": "Image media_type normalization artifact"
},
{
"testCase": "imageUrlMimeTypeFallbackParam",
"source": "ChatCompletions",
"target": "Responses",
"fields": [
{ "pattern": "messages[*].content[*].media_type", "reason": "Google-backed image URL MIME inference normalizes null media_type to image/jpeg" }
]
},
{
"testCase": "imageUrlMimeTypeFallbackParam",
"source": "Google",
"target": "ChatCompletions",
"fields": [
{ "pattern": "messages[*].content[*].media_type", "reason": "OpenAI ChatCompletions image URLs do not preserve inferred MIME type on plain HTTPS URLs" }
]
},
{
"testCase": "imageUrlMimeTypeFallbackParam",
"source": "*",
"target": "Anthropic",
"fields": [
{ "pattern": "messages[*].content.length", "reason": "Anthropic request path drops the image URL block for this case, leaving only the text part" }
]
},
{
"testCase": "imageUrlMimeTypeFallbackParam",
"source": "*",
"target": "Bedrock",
"fields": [
{ "pattern": "messages[*].content.length", "reason": "Bedrock Anthropic-compatible request path drops the image URL block for this case, leaving only the text part" }
]
},
{
"testCase": "imageUrlMimeTypeFallbackParam",
"source": "*",
"target": "Bedrock Anthropic",
"fields": [
{ "pattern": "messages[*].content.length", "reason": "Bedrock Anthropic-compatible request path drops the image URL block for this case, leaving only the text part" }
]
},
{
"testCase": "imageUrlMimeTypeFallbackParam",
"source": "*",
"target": "Vertex Anthropic",
"fields": [
{ "pattern": "messages[*].content.length", "reason": "Vertex Anthropic-compatible request path drops the image URL block for this case, leaving only the text part" }
]
},
{
"testCase": "instructionsParam",
"source": "ChatCompletions",
Expand Down
113 changes: 111 additions & 2 deletions crates/lingua/src/providers/google/convert.rs
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ use crate::universal::request::{
};
use crate::universal::response::{FinishReason, UniversalUsage};
use crate::universal::tools::{BuiltinToolProvider, UniversalTool, UniversalToolType};
use crate::util::media::parse_base64_data_url;
use crate::util::media::{parse_base64_data_url, parse_file_metadata_from_url};

/// Prefix for synthetic tool call IDs generated when Google omits them.
const SYNTHETIC_CALL_ID_PREFIX: &str = "call_";
Expand All @@ -55,6 +55,74 @@ fn text_part(text: String) -> GooglePart {
}
}

fn mime_type_from_url(url: &str) -> String {

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is this worth generalizing as a public helper in case it is needed outside of Google?

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ehhh im not sure vertex has weird requirements around what mime types it accepts I'd like to keep it separate.

if let Some(metadata) = parse_file_metadata_from_url(url) {
if let Some(content_type) = metadata.content_type {
return content_type;
}

if let Some((_, extension)) = metadata.filename.rsplit_once('.') {

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

May be worth covering audio (mp3/wav/m4a) and video (mp4/mov/webm) for Gemini?

let mime_type = if extension.eq_ignore_ascii_case("jpg")
|| extension.eq_ignore_ascii_case("jpeg")
{
Some("image/jpeg")
} else if extension.eq_ignore_ascii_case("png") {
Some("image/png")
} else if extension.eq_ignore_ascii_case("webp") {
Some("image/webp")
} else if extension.eq_ignore_ascii_case("heic") {
Some("image/heic")
} else if extension.eq_ignore_ascii_case("heif") {
Some("image/heif")
} else if extension.eq_ignore_ascii_case("pdf") {
Some("application/pdf")
} else if extension.eq_ignore_ascii_case("txt") {
Some("text/plain")
} else if extension.eq_ignore_ascii_case("flv") {
Some("video/x-flv")
} else if extension.eq_ignore_ascii_case("mov") {
Some("video/quicktime")
} else if extension.eq_ignore_ascii_case("mpeg") {
Some("video/mpeg")
} else if extension.eq_ignore_ascii_case("mpegs") {
Some("video/mpegs")
} else if extension.eq_ignore_ascii_case("mpg") {
Some("video/mpg")
} else if extension.eq_ignore_ascii_case("wmv") {
Some("video/wmv")
} else if extension.eq_ignore_ascii_case("3gp")
|| extension.eq_ignore_ascii_case("3gpp")
{
Some("video/3gpp")
} else if extension.eq_ignore_ascii_case("aac") {
Some("audio/x-aac")
} else if extension.eq_ignore_ascii_case("flac") {
Some("audio/flac")
} else if extension.eq_ignore_ascii_case("mp3") {
Some("audio/mp3")
} else if extension.eq_ignore_ascii_case("m4a") {
Some("audio/m4a")
} else if extension.eq_ignore_ascii_case("mpga") {
Some("audio/mpga")
} else if extension.eq_ignore_ascii_case("ogg") {
Some("audio/ogg")
} else if extension.eq_ignore_ascii_case("pcm") {
Some("audio/pcm")
} else if extension.eq_ignore_ascii_case("wav") {
Some("audio/wav")
} else {
None
};

if let Some(mime_type) = mime_type {
return mime_type.to_string();
}
}
}

DEFAULT_MIME_TYPE.to_string()
}

fn value_to_map(value: &Value) -> Option<Map<String, Value>> {
match value {
Value::Object(map) => Some(map.clone()),
Expand Down Expand Up @@ -419,10 +487,12 @@ impl TryFromLLM<Message> for GoogleContent {
} else if data.starts_with("http://")
|| data.starts_with("https://")
{
let mime_type =
media_type.unwrap_or_else(|| mime_type_from_url(&data));
converted.push(GooglePart {
file_data: Some(GoogleFileData {
file_uri: Some(data),
mime_type: media_type,
mime_type: Some(mime_type),
}),
..Default::default()
});
Expand Down Expand Up @@ -1310,6 +1380,45 @@ mod tests {
assert!(parts[0].inline_data.is_none());
}

#[test]
fn test_image_url_to_google_file_data_infers_mime_type_from_extension() {
let message = Message::User {
content: UserContent::Array(vec![UserContentPart::Image {
image: Value::String("https://example.com/image.jpg".to_string()),
media_type: None,
provider_options: None,
}]),
};

let content = <GoogleContent as TryFromLLM<Message>>::try_from(message).unwrap();
assert_eq!(content.role.as_deref(), Some("user"));
let parts = content.parts.unwrap();
assert_eq!(parts.len(), 1);
let file_data = parts[0].file_data.as_ref().expect("file_data should exist");
assert_eq!(
file_data.file_uri.as_deref(),
Some("https://example.com/image.jpg")
);
assert_eq!(file_data.mime_type.as_deref(), Some("image/jpeg"));
assert!(parts[0].inline_data.is_none());
}

#[test]
fn test_image_url_to_google_file_data_falls_back_to_default_mime_type() {
let message = Message::User {
content: UserContent::Array(vec![UserContentPart::Image {
image: Value::String("https://example.com/image".to_string()),
media_type: None,
provider_options: None,
}]),
};

let content = <GoogleContent as TryFromLLM<Message>>::try_from(message).unwrap();
let parts = content.parts.unwrap();
let file_data = parts[0].file_data.as_ref().expect("file_data should exist");
assert_eq!(file_data.mime_type.as_deref(), Some(DEFAULT_MIME_TYPE));
}

#[test]
fn test_message_to_google_content_assistant() {
let message = Message::Assistant {
Expand Down
46 changes: 44 additions & 2 deletions payloads/cases/params.ts
Original file line number Diff line number Diff line change
Expand Up @@ -97,8 +97,7 @@ export const paramsCases: TestCaseCollection = {
},
{
type: "input_file",
file_url:
"https://www.berkshirehathaway.com/letters/2024ltr.pdf",
file_url: "https://www.berkshirehathaway.com/letters/2024ltr.pdf",
},
],
},
Expand All @@ -109,6 +108,49 @@ export const paramsCases: TestCaseCollection = {
bedrock: null,
},

imageUrlMimeTypeFallbackParam: {
"chat-completions": {
model: OPENAI_CHAT_COMPLETIONS_MODEL,
messages: [
{
role: "user",
content: [
{
type: "text",
text: "Describe this image.",
},
{
type: "image_url",
image_url: {
url: "https://t3.ftcdn.net/jpg/02/36/99/22/360_F_236992283_sNOxCVQeFLd5pdqaKGh8DRGMZy7P4XKm.jpg",
},
},
],
},
],
},
responses: null,
anthropic: null,
google: {
contents: [
{
role: "user",
parts: [
{ text: "Describe this image." },
{
fileData: {
fileUri:
"https://t3.ftcdn.net/jpg/02/36/99/22/360_F_236992283_sNOxCVQeFLd5pdqaKGh8DRGMZy7P4XKm.jpg",
mimeType: "image/jpeg",
},
},
],
},
],
},
bedrock: null,
},

// === Text Response Configuration ===

textFormatJsonObjectParam: {
Expand Down
Loading
Loading