From 5dd82dea96597a9b1f40315cc95c0d5739fbcf6c Mon Sep 17 00:00:00 2001 From: Chang Su <8605658+CatherineSue@users.noreply.github.com> Date: Sun, 24 May 2026 11:25:03 -0700 Subject: [PATCH] fix(api): reject schemeless worker URLs in POST /workers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit `WorkerService::create_worker` calls `reserve_id_for_url(config.url)` before the AddWorker workflow runs, then returns a 202 with that WorkerId in the Location header. The reservation key is the submitted URL string; the workflow's `CreateWorkerStep` rewrites the URL via `normalize_url`, which prepends `http://` or `grpc://` for any input that doesn't already start with one of `http://`, `https://`, `grpc://`, or `grpcs://`. When normalization changes the string, the reservation is keyed on the bare URL while the live worker is registered under the canonical URL — two `url_to_id` entries, the bare one orphaned forever, and the WorkerId returned in the 202 points at nothing. The client polls its Location and gets 404. Validate the URL at the API boundary so the orphan can't be created through this path. Extract the existing scheme + host validation in `config::validation::ConfigValidator::validate_urls` into a free `pub(crate) fn validate_worker_url(url: &str) -> Result<(), String>`, have both the static-config validator and the new service-layer wrapper call it. Service-layer wrapper returns `WorkerServiceError::BadRequest` so the API returns 400 with a clear message instead of 202 -> 404. This does not address workflow-failure orphans (the reservation also leaks when the AddWorker workflow itself errors after a successful submission). That class needs a `release_reservation` lifecycle on the registry and is tracked as a separate follow-up. Signed-off-by: Chang Su <8605658+CatherineSue@users.noreply.github.com> --- model_gateway/src/config/validation.rs | 63 +++++++++++------------- model_gateway/src/worker/service.rs | 66 +++++++++++++++++++++++++- 2 files changed, 93 insertions(+), 36 deletions(-) diff --git a/model_gateway/src/config/validation.rs b/model_gateway/src/config/validation.rs index 13639baac..ce168b65f 100644 --- a/model_gateway/src/config/validation.rs +++ b/model_gateway/src/config/validation.rs @@ -827,51 +827,44 @@ impl ConfigValidator { fn validate_urls(urls: &[String]) -> ConfigResult<()> { for url in urls { - if url.is_empty() { + if let Err(reason) = validate_worker_url(url) { return Err(ConfigError::InvalidValue { field: "worker_url".to_string(), value: url.clone(), - reason: "URL cannot be empty".to_string(), + reason, }); } + } + Ok(()) + } +} - // Case-insensitive scheme allow-list. Compare just the scheme - // segment so we don't allocate a lowercased copy of the full URL. - const ALLOWED_SCHEMES: &[&str] = &["http", "https", "grpc", "grpcs"]; - let scheme = url.split_once("://").map_or("", |(s, _)| s); - if !ALLOWED_SCHEMES - .iter() - .any(|allowed| scheme.eq_ignore_ascii_case(allowed)) - { - return Err(ConfigError::InvalidValue { - field: "worker_url".to_string(), - value: url.clone(), - reason: "URL must start with http://, https://, grpc://, or grpcs://" - .to_string(), - }); - } +/// Reject empty / schemeless / unparsable worker URLs so callers can wrap the +/// failure reason in whatever error type fits their layer. +pub(crate) fn validate_worker_url(url: &str) -> Result<(), String> { + if url.is_empty() { + return Err("URL cannot be empty".to_string()); + } - match ::url::Url::parse(url) { - Ok(parsed) => { - if parsed.host_str().is_none() { - return Err(ConfigError::InvalidValue { - field: "worker_url".to_string(), - value: url.clone(), - reason: "URL must have a valid host".to_string(), - }); - } - } - Err(e) => { - return Err(ConfigError::InvalidValue { - field: "worker_url".to_string(), - value: url.clone(), - reason: format!("Invalid URL format: {e}"), - }); - } + const ALLOWED_SCHEMES: &[&str] = &["http", "https", "grpc", "grpcs"]; + let scheme = url.split_once("://").map_or("", |(s, _)| s); + if !ALLOWED_SCHEMES + .iter() + .any(|allowed| scheme.eq_ignore_ascii_case(allowed)) + { + return Err("URL must start with http://, https://, grpc://, or grpcs://".to_string()); + } + + match ::url::Url::parse(url) { + Ok(parsed) => { + if parsed.host_str().is_none() { + return Err("URL must have a valid host".to_string()); } } - Ok(()) + Err(e) => return Err(format!("Invalid URL format: {e}")), } + + Ok(()) } fn validate_mebibyte_limit(field: &str, value_mb: usize) -> ConfigResult<()> { diff --git a/model_gateway/src/worker/service.rs b/model_gateway/src/worker/service.rs index 5c6a0cc53..7764f679e 100644 --- a/model_gateway/src/worker/service.rs +++ b/model_gateway/src/worker/service.rs @@ -16,7 +16,7 @@ use serde_json::json; use tracing::warn; use crate::{ - config::RouterConfig, + config::{validation::validate_worker_url, RouterConfig}, worker::{registry::WorkerId, worker::worker_to_info, WorkerRegistry}, workflow::{Job, JobQueue}, }; @@ -241,6 +241,8 @@ impl WorkerService { &self, config: WorkerSpec, ) -> Result { + validate_worker_url_request(&config.url)?; + if self.router_config.api_key.is_some() && config.api_key.is_none() { warn!( "Adding worker {} without API key while router has API key configured. \ @@ -431,3 +433,65 @@ impl WorkerService { Ok(UpdateWorkerResult { worker_id, url }) } } + +/// Wrap [`validate_worker_url`] so the API layer surfaces a 400 instead of a +/// config-layer error. +fn validate_worker_url_request(url: &str) -> Result<(), WorkerServiceError> { + validate_worker_url(url).map_err(|reason| WorkerServiceError::BadRequest { + message: format!("Worker URL '{url}' is invalid: {reason}"), + }) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn validate_worker_url_request_accepts_all_four_schemes() { + assert!(validate_worker_url_request("http://10.0.0.5:8000").is_ok()); + assert!(validate_worker_url_request("https://10.0.0.5:8000").is_ok()); + assert!(validate_worker_url_request("grpc://10.0.0.5:8000").is_ok()); + assert!(validate_worker_url_request("grpcs://10.0.0.5:8000").is_ok()); + } + + #[test] + fn validate_worker_url_request_accepts_case_insensitive_schemes() { + assert!(validate_worker_url_request("HTTP://10.0.0.5:8000").is_ok()); + assert!(validate_worker_url_request("GrPc://10.0.0.5:8000").is_ok()); + } + + #[test] + fn validate_worker_url_request_rejects_bare_host_port_as_400() { + let err = validate_worker_url_request("10.0.0.5:8000").unwrap_err(); + assert!(matches!(err, WorkerServiceError::BadRequest { .. })); + assert_eq!(err.status_code(), StatusCode::BAD_REQUEST); + assert!(err + .to_string() + .contains("http://, https://, grpc://, or grpcs://")); + } + + #[test] + fn validate_worker_url_request_rejects_empty_as_400() { + let err = validate_worker_url_request("").unwrap_err(); + assert!(matches!(err, WorkerServiceError::BadRequest { .. })); + assert!(err.to_string().contains("empty")); + } + + #[test] + fn validate_worker_url_request_rejects_unknown_scheme() { + let err = validate_worker_url_request("ftp://10.0.0.5:8000").unwrap_err(); + assert!(matches!(err, WorkerServiceError::BadRequest { .. })); + } + + #[test] + fn validate_worker_url_request_rejects_missing_host() { + let err = validate_worker_url_request("http://").unwrap_err(); + assert!(matches!(err, WorkerServiceError::BadRequest { .. })); + } + + #[test] + fn validate_worker_url_request_rejects_unparsable_url() { + let err = validate_worker_url_request("http://[invalid").unwrap_err(); + assert!(matches!(err, WorkerServiceError::BadRequest { .. })); + } +}