-
Notifications
You must be signed in to change notification settings - Fork 84
feat(gateway): Add Messages API to HTTP router #1521
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
e4ccb00
e18412e
72d525c
62b38b8
cf1f2c3
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -9,7 +9,7 @@ use serde::{Deserialize, Serialize}; | |
| use serde_json::Value; | ||
| use validator::Validate; | ||
|
|
||
| use crate::{skills::MessagesSkillRef, validated::Normalizable}; | ||
| use crate::{common::GenerationRequest, skills::MessagesSkillRef, validated::Normalizable}; | ||
|
|
||
| // ============================================================================ | ||
| // Request Types | ||
|
|
@@ -105,6 +105,58 @@ impl CreateMessageRequest { | |
| } | ||
| } | ||
|
|
||
| impl GenerationRequest for CreateMessageRequest { | ||
| fn is_stream(&self) -> bool { | ||
| self.stream.unwrap_or(false) | ||
| } | ||
|
|
||
| fn get_model(&self) -> Option<&str> { | ||
| Some(&self.model) | ||
| } | ||
|
|
||
| fn extract_text_for_routing(&self) -> String { | ||
| let mut buffer = String::new(); | ||
| let mut has_content = false; | ||
|
|
||
| let push = |s: &str, has_content: &mut bool, buffer: &mut String| { | ||
| if s.is_empty() { | ||
| return; | ||
| } | ||
| if *has_content { | ||
| buffer.push(' '); | ||
| } | ||
| buffer.push_str(s); | ||
| *has_content = true; | ||
| }; | ||
|
|
||
| if let Some(system) = &self.system { | ||
| match system { | ||
| SystemContent::String(s) => push(s, &mut has_content, &mut buffer), | ||
| SystemContent::Blocks(blocks) => { | ||
| for block in blocks { | ||
| push(&block.text, &mut has_content, &mut buffer); | ||
| } | ||
| } | ||
| } | ||
| } | ||
|
|
||
| for msg in &self.messages { | ||
| match &msg.content { | ||
| InputContent::String(s) => push(s, &mut has_content, &mut buffer), | ||
| InputContent::Blocks(blocks) => { | ||
| for block in blocks { | ||
| if let InputContentBlock::Text(text_block) = block { | ||
| push(&text_block.text, &mut has_content, &mut buffer); | ||
| } | ||
|
Comment on lines
+148
to
+150
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Useful? React with 👍 / 👎. |
||
| } | ||
| } | ||
| } | ||
| } | ||
|
Comment on lines
+143
to
+154
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The current implementation of for msg in &self.messages {
match &msg.content {
InputContent::String(s) => push(s, &mut has_content, &mut buffer),
InputContent::Blocks(blocks) => {
for block in blocks {
match block {
InputContentBlock::Text(text_block) => {
push(&text_block.text, &mut has_content, &mut buffer);
}
InputContentBlock::ToolResult(tool_result) => {
if tool_result.is_control_plane() {
continue;
}
if let Some(content) = &tool_result.content {
match content {
ToolResultContent::String(s) => {
push(s, &mut has_content, &mut buffer);
}
ToolResultContent::Blocks(blocks) => {
for b in blocks {
if let ToolResultContentBlock::Text(t) = b {
push(&t.text, &mut has_content, &mut buffer);
}
}
}
}
}
}
_ => {}
}
}
}
}
}References
|
||
|
|
||
| buffer | ||
| } | ||
| } | ||
|
|
||
| impl Tool { | ||
| fn matches_tool_choice_name(&self, name: &str) -> bool { | ||
| match self { | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -15,6 +15,7 @@ use openai_protocol::{ | |
| completion::CompletionRequest, | ||
| embedding::EmbeddingRequest, | ||
| generate::GenerateRequest, | ||
| messages::CreateMessageRequest, | ||
| rerank::{RerankRequest, RerankResponse, RerankResult}, | ||
| responses::ResponsesRequest, | ||
| transcription::{AudioFile, TranscriptionRequest}, | ||
|
|
@@ -1125,6 +1126,17 @@ impl RouterTrait for Router { | |
| .await | ||
| } | ||
|
|
||
| async fn route_messages( | ||
| &self, | ||
| headers: Option<&HeaderMap>, | ||
| _tenant_meta: &TenantRequestMeta, | ||
| body: &CreateMessageRequest, | ||
| model_id: &str, | ||
| ) -> Response { | ||
| self.route_typed_request(headers, body, "/v1/messages", model_id) | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 🔴 Important: The gRPC routers already use Fix: add |
||
| .await | ||
| } | ||
|
|
||
| async fn route_completion( | ||
| &self, | ||
| headers: Option<&HeaderMap>, | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,155 @@ | ||
| //! Integration tests for the Anthropic Messages API (`/v1/messages`) | ||
| //! against the HTTP backend, which proxies to sglang's native | ||
| //! `/v1/messages` endpoint. | ||
|
|
||
| use axum::{ | ||
| body::Body, | ||
| extract::Request, | ||
| http::{header::CONTENT_TYPE, StatusCode}, | ||
| }; | ||
| use serde_json::json; | ||
| use tower::ServiceExt; | ||
|
|
||
| use crate::common::{ | ||
| mock_worker::{HealthStatus, MockWorkerConfig, WorkerType}, | ||
| AppTestContext, | ||
| }; | ||
|
|
||
| #[tokio::test] | ||
| async fn test_v1_messages_proxy_success() { | ||
| let ctx = AppTestContext::new(vec![MockWorkerConfig { | ||
| port: 18301, | ||
| worker_type: WorkerType::Regular, | ||
| health_status: HealthStatus::Healthy, | ||
| response_delay_ms: 0, | ||
| fail_rate: 0.0, | ||
| }]) | ||
| .await; | ||
|
|
||
| let app = ctx.create_app(); | ||
|
|
||
| let payload = json!({ | ||
| "model": "mock-model", | ||
| "max_tokens": 64, | ||
| "messages": [ | ||
| {"role": "user", "content": "Hello, Claude!"} | ||
| ] | ||
| }); | ||
|
|
||
| let req = Request::builder() | ||
| .method("POST") | ||
| .uri("/v1/messages") | ||
| .header(CONTENT_TYPE, "application/json") | ||
| .body(Body::from(serde_json::to_string(&payload).unwrap())) | ||
| .unwrap(); | ||
|
|
||
| let resp = app.oneshot(req).await.unwrap(); | ||
| assert_eq!(resp.status(), StatusCode::OK); | ||
|
|
||
| let body = axum::body::to_bytes(resp.into_body(), usize::MAX) | ||
| .await | ||
| .unwrap(); | ||
| let body_json: serde_json::Value = serde_json::from_slice(&body).unwrap(); | ||
|
|
||
| assert_eq!(body_json["type"], "message"); | ||
| assert_eq!(body_json["role"], "assistant"); | ||
| assert_eq!(body_json["model"], "mock-model"); | ||
| assert_eq!(body_json["stop_reason"], "end_turn"); | ||
| let content = body_json["content"].as_array().expect("content array"); | ||
| assert_eq!(content.len(), 1); | ||
| assert_eq!(content[0]["type"], "text"); | ||
| assert!(body_json["usage"]["input_tokens"].is_number()); | ||
|
|
||
| ctx.shutdown().await; | ||
| } | ||
|
|
||
| #[tokio::test] | ||
| async fn test_v1_messages_proxy_streaming() { | ||
| let ctx = AppTestContext::new(vec![MockWorkerConfig { | ||
| port: 18302, | ||
| worker_type: WorkerType::Regular, | ||
| health_status: HealthStatus::Healthy, | ||
| response_delay_ms: 0, | ||
| fail_rate: 0.0, | ||
| }]) | ||
| .await; | ||
|
|
||
| let app = ctx.create_app(); | ||
|
|
||
| let payload = json!({ | ||
| "model": "mock-model", | ||
| "max_tokens": 64, | ||
| "stream": true, | ||
| "messages": [ | ||
| {"role": "user", "content": "Stream me a haiku"} | ||
| ] | ||
| }); | ||
|
|
||
| let req = Request::builder() | ||
| .method("POST") | ||
| .uri("/v1/messages") | ||
| .header(CONTENT_TYPE, "application/json") | ||
| .body(Body::from(serde_json::to_string(&payload).unwrap())) | ||
| .unwrap(); | ||
|
|
||
| let resp = app.oneshot(req).await.unwrap(); | ||
| assert_eq!(resp.status(), StatusCode::OK); | ||
| let content_type = resp | ||
| .headers() | ||
| .get(CONTENT_TYPE) | ||
| .and_then(|v| v.to_str().ok()) | ||
| .unwrap_or(""); | ||
| assert!( | ||
| content_type.contains("text/event-stream"), | ||
| "expected SSE content-type, got {content_type:?}" | ||
| ); | ||
|
|
||
| let body = axum::body::to_bytes(resp.into_body(), usize::MAX) | ||
| .await | ||
| .unwrap(); | ||
| let text = std::str::from_utf8(&body).expect("utf8"); | ||
|
|
||
| // Wire format: `event: <type>\ndata: <json>\n\n` | ||
| let event_types: Vec<&str> = text | ||
| .lines() | ||
| .filter_map(|l| l.strip_prefix("event: ")) | ||
| .collect(); | ||
|
|
||
| assert_eq!(event_types.first().copied(), Some("message_start")); | ||
| assert_eq!(event_types.last().copied(), Some("message_stop")); | ||
| assert!(event_types.contains(&"content_block_delta")); | ||
|
|
||
| ctx.shutdown().await; | ||
| } | ||
|
|
||
| #[tokio::test] | ||
| async fn test_v1_messages_proxy_propagates_upstream_error() { | ||
| let ctx = AppTestContext::new(vec![MockWorkerConfig { | ||
| port: 18303, | ||
| worker_type: WorkerType::Regular, | ||
| health_status: HealthStatus::Healthy, | ||
| response_delay_ms: 0, | ||
| fail_rate: 1.0, // always fail | ||
| }]) | ||
| .await; | ||
|
|
||
| let app = ctx.create_app(); | ||
|
|
||
| let payload = json!({ | ||
| "model": "mock-model", | ||
| "max_tokens": 16, | ||
| "messages": [{"role": "user", "content": "fail please"}] | ||
| }); | ||
|
|
||
| let req = Request::builder() | ||
| .method("POST") | ||
| .uri("/v1/messages") | ||
| .header(CONTENT_TYPE, "application/json") | ||
| .body(Body::from(serde_json::to_string(&payload).unwrap())) | ||
| .unwrap(); | ||
|
|
||
| let resp = app.oneshot(req).await.unwrap(); | ||
| assert_eq!(resp.status(), StatusCode::INTERNAL_SERVER_ERROR); | ||
|
|
||
| ctx.shutdown().await; | ||
| } |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The new
CreateMessageRequest::extract_text_for_routingonly appendsInputContentBlock::Text, so requests whose latest user turn is atool_result(common in tool-calling loops) can produce an empty or incomplete routing key even though they contain substantial text. This degrades text-based worker selection and can misroute/v1/messagestraffic compared with chat routing, which includes tool message content; consider extracting text fromToolResultpayloads (string and text blocks) as well.Useful? React with 👍 / 👎.