Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 7 additions & 4 deletions crates/sbproxy-ai/src/ai_metrics.rs
Original file line number Diff line number Diff line change
Expand Up @@ -124,7 +124,10 @@ static AI_OUTPUT_THROUGHPUT: LazyLock<HistogramVec> = LazyLock::new(|| {
// non-success outcome back to a named provider (transport error,
// timeout, upstream 4xx/5xx, parse failure). The dashboard groups by
// `provider`; `error_kind` is intended for ad-hoc drill-downs and
// should stay low cardinality (handful of stable strings).
// should stay low cardinality (handful of stable strings). The AI
// gateway dispatch path uses the same stable categories it records on
// span `error.type`, such as `rate_limited`, `content_filter`,
// `upstream_5xx`, and `timeout`.
static AI_PROVIDER_ERRORS: LazyLock<CounterVec> = LazyLock::new(|| {
register_counter_vec!(
Opts::new(
Expand Down Expand Up @@ -619,9 +622,9 @@ pub fn record_output_throughput(provider: &str, model: &str, tokens_per_second:
/// Record a per-provider error.
///
/// `error_kind` is a short, low-cardinality label (e.g. `transport`,
/// `timeout`, `http_4xx`, `http_5xx`, `parse`). Free-form upstream
/// strings should be mapped to one of these stable buckets before
/// being passed in.
/// `timeout`, `rate_limited`, `content_filter`, `upstream_5xx`,
/// `http_4xx`, `http_5xx`, `parse`). Free-form upstream strings should
/// be mapped to one of these stable buckets before being passed in.
pub fn record_provider_error(provider: &str, error_kind: &str) {
AI_PROVIDER_ERRORS
.with_label_values(&[provider, error_kind])
Expand Down
21 changes: 20 additions & 1 deletion crates/sbproxy-ai/src/tracing_spans.rs
Original file line number Diff line number Diff line change
Expand Up @@ -369,10 +369,16 @@ pub mod error_type {
pub const GUARDRAIL_BLOCKED: &str = "guardrail_blocked";
/// The provider returned HTTP 429 (rate limited).
pub const RATE_LIMITED: &str = "rate_limited";
/// The provider returned a 5xx server error.
/// A generic provider or transport failure that does not fit a narrower class.
pub const PROVIDER_ERROR: &str = "provider_error";
/// The provider's content filter rejected the request or response.
pub const CONTENT_FILTER: &str = "content_filter";
/// An AI spend or quota budget blocked the request before dispatch.
pub const BUDGET_EXCEEDED: &str = "budget_exceeded";
/// The provider returned a 5xx server error.
pub const UPSTREAM_5XX: &str = "upstream_5xx";
/// The upstream request or response stream timed out.
pub const TIMEOUT: &str = "timeout";
}

/// Mark an AI span as failed (WOR-1231).
Expand Down Expand Up @@ -775,6 +781,19 @@ mod tests {
let span = find_span(&spans, "ai.request");
assert_field(span, "otel.status_code", "ERROR");
assert_field(span, "error.type", "guardrail_blocked");
assert_field(span, "otel.status_message", "blocked by input guardrail");
}

/// WOR-1215: the stable AI error taxonomy includes each failure
/// category the dispatch path records on `error.type`.
#[test]
fn error_type_constants_cover_ai_failure_taxonomy() {
assert_eq!(error_type::GUARDRAIL_BLOCKED, "guardrail_blocked");
assert_eq!(error_type::RATE_LIMITED, "rate_limited");
assert_eq!(error_type::CONTENT_FILTER, "content_filter");
assert_eq!(error_type::BUDGET_EXCEEDED, "budget_exceeded");
assert_eq!(error_type::UPSTREAM_5XX, "upstream_5xx");
assert_eq!(error_type::TIMEOUT, "timeout");
}

/// WOR-1228: prompt / completion content lands on the OpenInference
Expand Down
Loading
Loading