From 6e4e769e023ecc2394aef2b52d7c788c4ff198c3 Mon Sep 17 00:00:00 2001 From: krisztianfekete Date: Tue, 7 Apr 2026 14:12:26 +0200 Subject: [PATCH 1/3] support OTel gen_ai semconv v1.40 --- .../openai-agents/requirements.txt | 3 +- .../zero-code-examples/openai-agents/run.py | 1 + src/agentevals/api/models.py | 3 + src/agentevals/extraction.py | 86 ++++++++++++++++- src/agentevals/streaming/ws_server.py | 46 ++++++++- src/agentevals/trace_attrs.py | 39 ++++++++ src/agentevals/trace_metrics.py | 47 +++++++-- ui/src/api/client.ts | 3 + .../components/inspector/ComparisonPanel.tsx | 11 ++- ui/src/components/inspector/InspectorView.tsx | 10 ++ .../inspector/PerformanceSection.tsx | 45 ++++++++- ui/src/components/streaming/SessionCard.tsx | 42 ++++++++ .../components/streaming/SessionMetadata.tsx | 95 +++++++++++-------- ui/src/context/TraceProvider.tsx | 3 + ui/src/lib/types.ts | 8 ++ uv.lock | 6 +- 16 files changed, 392 insertions(+), 56 deletions(-) diff --git a/examples/zero-code-examples/openai-agents/requirements.txt b/examples/zero-code-examples/openai-agents/requirements.txt index b3bc37f..7b968e9 100644 --- a/examples/zero-code-examples/openai-agents/requirements.txt +++ b/examples/zero-code-examples/openai-agents/requirements.txt @@ -1,4 +1,5 @@ -openai-agents>=0.3.3 +openai>=2.30.0 +openai-agents>=0.13.0 opentelemetry-instrumentation-openai-agents-v2>=0.1.0 opentelemetry-sdk>=1.36.0 diff --git a/examples/zero-code-examples/openai-agents/run.py b/examples/zero-code-examples/openai-agents/run.py index 6618159..ca4b0d7 100644 --- a/examples/zero-code-examples/openai-agents/run.py +++ b/examples/zero-code-examples/openai-agents/run.py @@ -56,6 +56,7 @@ def main(): print(f"OTLP endpoint: {endpoint}") os.environ.setdefault("OTEL_INSTRUMENTATION_GENAI_CAPTURE_MESSAGE_CONTENT", "span_and_event") + os.environ.setdefault("OTEL_SEMCONV_STABILITY_OPT_IN", "gen_ai_latest_experimental") os.environ.setdefault( "OTEL_RESOURCE_ATTRIBUTES", diff --git a/src/agentevals/api/models.py b/src/agentevals/api/models.py index 4b3aebc..1e6a1a7 100644 --- a/src/agentevals/api/models.py +++ b/src/agentevals/api/models.py @@ -113,7 +113,10 @@ class DebugLoadData(CamelModel): class TraceConversionMetadata(CamelModel): agent_name: str | None = None + agent_id: str | None = None model: str | None = None + response_model: str | None = None + provider: str | None = None start_time: int | None = None user_input_preview: str | None = None final_output_preview: str | None = None diff --git a/src/agentevals/extraction.py b/src/agentevals/extraction.py index 7b0c328..1b3dbec 100644 --- a/src/agentevals/extraction.py +++ b/src/agentevals/extraction.py @@ -22,14 +22,26 @@ ADK_SCOPE_VALUE, ADK_TOOL_CALL_ARGS, ADK_TOOL_RESPONSE, + OTEL_ERROR_TYPE, OTEL_GENAI_INPUT_MESSAGES, OTEL_GENAI_OP, OTEL_GENAI_OUTPUT_MESSAGES, + OTEL_GENAI_PROVIDER_NAME, + OTEL_GENAI_REQUEST_MAX_TOKENS, OTEL_GENAI_REQUEST_MODEL, + OTEL_GENAI_REQUEST_TEMPERATURE, + OTEL_GENAI_RESPONSE_FINISH_REASONS, + OTEL_GENAI_RESPONSE_ID, + OTEL_GENAI_RESPONSE_MODEL, + OTEL_GENAI_SYSTEM, OTEL_GENAI_TOOL_CALL_ARGUMENTS, OTEL_GENAI_TOOL_CALL_ID, OTEL_GENAI_TOOL_CALL_RESULT, + OTEL_GENAI_TOOL_DESCRIPTION, OTEL_GENAI_TOOL_NAME, + OTEL_GENAI_TOOL_TYPE, + OTEL_GENAI_USAGE_CACHE_CREATION_TOKENS, + OTEL_GENAI_USAGE_CACHE_READ_TOKENS, OTEL_GENAI_USAGE_INPUT_TOKENS, OTEL_GENAI_USAGE_OUTPUT_TOKENS, OTEL_SCOPE, @@ -139,6 +151,68 @@ def extract_token_usage_from_attrs( return 0, 0, model +def extract_extended_model_info_from_attrs(attrs: dict[str, Any]) -> dict[str, Any]: + """Extract extended model and provider metadata from span attributes. + + Returns a dict with provider info, response metadata, request parameters, + cache token usage, and error classification. Uses gen_ai.system as fallback + for provider when gen_ai.provider.name is absent (backward compat with + pre-v1.37.0 instrumentors). + """ + provider = attrs.get(OTEL_GENAI_PROVIDER_NAME) + if not provider: + provider = attrs.get(OTEL_GENAI_SYSTEM) + + finish_reasons_raw = attrs.get(OTEL_GENAI_RESPONSE_FINISH_REASONS) + finish_reasons: list[str] = [] + if isinstance(finish_reasons_raw, list): + finish_reasons = [str(r) for r in finish_reasons_raw] + elif isinstance(finish_reasons_raw, str): + parsed = parse_json(finish_reasons_raw) + if isinstance(parsed, list): + finish_reasons = [str(r) for r in parsed] + elif finish_reasons_raw: + finish_reasons = [finish_reasons_raw] + + temperature = attrs.get(OTEL_GENAI_REQUEST_TEMPERATURE) + if temperature is not None: + try: + temperature = float(temperature) + except (TypeError, ValueError): + temperature = None + + max_tokens = attrs.get(OTEL_GENAI_REQUEST_MAX_TOKENS) + if max_tokens is not None: + try: + max_tokens = int(max_tokens) + except (TypeError, ValueError): + max_tokens = None + + cache_creation = attrs.get(OTEL_GENAI_USAGE_CACHE_CREATION_TOKENS, 0) + cache_read = attrs.get(OTEL_GENAI_USAGE_CACHE_READ_TOKENS, 0) + try: + cache_creation = int(cache_creation) + except (TypeError, ValueError): + cache_creation = 0 + try: + cache_read = int(cache_read) + except (TypeError, ValueError): + cache_read = 0 + + return { + "request_model": attrs.get(OTEL_GENAI_REQUEST_MODEL), + "response_model": attrs.get(OTEL_GENAI_RESPONSE_MODEL), + "provider": provider, + "finish_reasons": finish_reasons, + "response_id": attrs.get(OTEL_GENAI_RESPONSE_ID), + "temperature": temperature, + "max_tokens": max_tokens, + "cache_creation_tokens": cache_creation, + "cache_read_tokens": cache_read, + "error_type": attrs.get(OTEL_ERROR_TYPE), + } + + def extract_tool_call_from_attrs( attrs: dict[str, Any], operation_name: str = "", span_id: str = "" ) -> dict[str, Any] | None: @@ -171,7 +245,17 @@ def extract_tool_call_from_attrs( if fallback_id: tool_call_id = fallback_id - return {"id": tool_call_id, "name": tool_name, "args": args} + result: dict[str, Any] = {"id": tool_call_id, "name": tool_name, "args": args} + + tool_type = attrs.get(OTEL_GENAI_TOOL_TYPE) + if tool_type: + result["type"] = tool_type + + tool_description = attrs.get(OTEL_GENAI_TOOL_DESCRIPTION) + if tool_description: + result["description"] = tool_description + + return result def parse_tool_response_content(content: Any) -> dict: diff --git a/src/agentevals/streaming/ws_server.py b/src/agentevals/streaming/ws_server.py index 5b90b86..b2d4e26 100644 --- a/src/agentevals/streaming/ws_server.py +++ b/src/agentevals/streaming/ws_server.py @@ -20,7 +20,12 @@ WSSpanReceivedEvent, ) from ..converter import convert_traces -from ..extraction import extract_token_usage_from_attrs, is_llm_span, parse_tool_response_content +from ..extraction import ( + extract_extended_model_info_from_attrs, + extract_token_usage_from_attrs, + is_llm_span, + parse_tool_response_content, +) from ..loader.base import Trace from ..loader.otlp import OtlpJsonLoader from ..trace_attrs import OTEL_GENAI_INPUT_MESSAGES, OTEL_GENAI_REQUEST_MODEL @@ -794,6 +799,14 @@ def _extract_model_info_from_trace(self, trace: Trace, invocation_idx: int) -> d models_used: set[str] = set() total_input_tokens = 0 total_output_tokens = 0 + total_cache_creation_tokens = 0 + total_cache_read_tokens = 0 + providers: set[str] = set() + response_models: set[str] = set() + finish_reasons: set[str] = set() + error_types: set[str] = set() + first_temperature: float | None = None + first_max_tokens: int | None = None llm_spans = [s for s in trace.all_spans if is_llm_span(s) or "call_llm" in s.operation_name] @@ -808,12 +821,43 @@ def _extract_model_info_from_trace(self, trace: Trace, invocation_idx: int) -> d total_input_tokens += in_toks total_output_tokens += out_toks + ext = extract_extended_model_info_from_attrs(span.tags) + if ext["provider"]: + providers.add(ext["provider"]) + if ext["response_model"]: + response_models.add(ext["response_model"]) + finish_reasons.update(ext["finish_reasons"]) + total_cache_creation_tokens += ext["cache_creation_tokens"] + total_cache_read_tokens += ext["cache_read_tokens"] + if ext["error_type"]: + error_types.add(ext["error_type"]) + if first_temperature is None and ext["temperature"] is not None: + first_temperature = ext["temperature"] + if first_max_tokens is None and ext["max_tokens"] is not None: + first_max_tokens = ext["max_tokens"] + if models_used: model_info["models"] = list(models_used) if total_input_tokens > 0: model_info["inputTokens"] = total_input_tokens if total_output_tokens > 0: model_info["outputTokens"] = total_output_tokens + if providers: + model_info["provider"] = next(iter(providers)) + if response_models: + model_info["responseModels"] = list(response_models) + if finish_reasons: + model_info["finishReasons"] = list(finish_reasons) + if total_cache_creation_tokens > 0: + model_info["cacheCreationTokens"] = total_cache_creation_tokens + if total_cache_read_tokens > 0: + model_info["cacheReadTokens"] = total_cache_read_tokens + if first_temperature is not None: + model_info["temperature"] = first_temperature + if first_max_tokens is not None: + model_info["maxTokens"] = first_max_tokens + if error_types: + model_info["errorTypes"] = list(error_types) return model_info diff --git a/src/agentevals/trace_attrs.py b/src/agentevals/trace_attrs.py index 37ea351..5aedc88 100644 --- a/src/agentevals/trace_attrs.py +++ b/src/agentevals/trace_attrs.py @@ -2,6 +2,8 @@ Single source of truth for all attribute names used across the converter, extraction, streaming, and runner modules. + +Covers OTel GenAI semantic conventions up to v1.40.0. """ # OTel scope @@ -25,6 +27,43 @@ OTEL_GENAI_TOOL_CALL_RESULT = "gen_ai.tool.call.result" OTEL_GENAI_CONVERSATION_ID = "gen_ai.conversation.id" +# Provider and response metadata (v1.37.0+) +OTEL_GENAI_PROVIDER_NAME = "gen_ai.provider.name" +OTEL_GENAI_RESPONSE_MODEL = "gen_ai.response.model" +OTEL_GENAI_RESPONSE_ID = "gen_ai.response.id" +OTEL_GENAI_RESPONSE_FINISH_REASONS = "gen_ai.response.finish_reasons" + +# Deprecated provider attribute (pre-v1.37.0, renamed to gen_ai.provider.name) +OTEL_GENAI_SYSTEM = "gen_ai.system" + +# Agent identity (v1.31.0+) +OTEL_GENAI_AGENT_ID = "gen_ai.agent.id" +OTEL_GENAI_AGENT_DESCRIPTION = "gen_ai.agent.description" + +# Tool metadata (v1.31.0+) +OTEL_GENAI_TOOL_DESCRIPTION = "gen_ai.tool.description" +OTEL_GENAI_TOOL_TYPE = "gen_ai.tool.type" + +# Error classification +OTEL_ERROR_TYPE = "error.type" + +# Request parameters +OTEL_GENAI_REQUEST_TEMPERATURE = "gen_ai.request.temperature" +OTEL_GENAI_REQUEST_MAX_TOKENS = "gen_ai.request.max_tokens" +OTEL_GENAI_REQUEST_TOP_P = "gen_ai.request.top_p" +OTEL_GENAI_REQUEST_TOP_K = "gen_ai.request.top_k" + +# Cache token usage (Anthropic/OpenAI prompt caching) +OTEL_GENAI_USAGE_CACHE_CREATION_TOKENS = "gen_ai.usage.cache_creation.input_tokens" +OTEL_GENAI_USAGE_CACHE_READ_TOKENS = "gen_ai.usage.cache_read.input_tokens" + +# System/tool definitions (opt-in, v1.37.0+) +OTEL_GENAI_SYSTEM_INSTRUCTIONS = "gen_ai.system_instructions" +OTEL_GENAI_TOOL_DEFINITIONS = "gen_ai.tool.definitions" + +# Output type +OTEL_GENAI_OUTPUT_TYPE = "gen_ai.output.type" + # ADK-specific custom attributes (gcp.vertex.agent.*) ADK_LLM_REQUEST = "gcp.vertex.agent.llm_request" ADK_LLM_RESPONSE = "gcp.vertex.agent.llm_response" diff --git a/src/agentevals/trace_metrics.py b/src/agentevals/trace_metrics.py index 45c208a..ef1a043 100644 --- a/src/agentevals/trace_metrics.py +++ b/src/agentevals/trace_metrics.py @@ -6,11 +6,16 @@ from .extraction import ( extract_agent_response_from_attrs, + extract_extended_model_info_from_attrs, extract_token_usage_from_attrs, extract_user_text_from_attrs, get_extractor, ) -from .trace_attrs import OTEL_GENAI_AGENT_NAME, OTEL_GENAI_REQUEST_MODEL +from .trace_attrs import ( + OTEL_GENAI_AGENT_ID, + OTEL_GENAI_AGENT_NAME, + OTEL_GENAI_REQUEST_MODEL, +) def _truncate(text: str, max_length: int = 200) -> str: @@ -41,6 +46,8 @@ def extract_performance_metrics(trace, extractor=None) -> dict[str, Any]: prompt_tokens = [] output_tokens = [] total_tokens = [] + cache_creation_tokens_total = 0 + cache_read_tokens_total = 0 if extractor is None: extractor = get_extractor(trace) @@ -64,21 +71,30 @@ def extract_performance_metrics(trace, extractor=None) -> dict[str, Any]: prompt_tokens.append(in_toks) output_tokens.append(out_toks) total_tokens.append(in_toks + out_toks) + ext = extract_extended_model_info_from_attrs(span.tags) + cache_creation_tokens_total += ext["cache_creation_tokens"] + cache_read_tokens_total += ext["cache_read_tokens"] elif role == "tool": tool_latencies.append(duration_ms) + tokens_info: dict[str, Any] = { + "total_prompt": sum(prompt_tokens) if prompt_tokens else 0, + "total_output": sum(output_tokens) if output_tokens else 0, + "total": sum(total_tokens) if total_tokens else 0, + "per_llm_call": _calc_percentiles(total_tokens) if total_tokens else {"p50": 0.0, "p95": 0.0, "p99": 0.0}, + } + if cache_creation_tokens_total > 0: + tokens_info["cache_creation_tokens"] = cache_creation_tokens_total + if cache_read_tokens_total > 0: + tokens_info["cache_read_tokens"] = cache_read_tokens_total + return { "latency": { "overall": _calc_percentiles(agent_latencies), "llm_calls": _calc_percentiles(llm_latencies), "tool_executions": _calc_percentiles(tool_latencies), }, - "tokens": { - "total_prompt": sum(prompt_tokens) if prompt_tokens else 0, - "total_output": sum(output_tokens) if output_tokens else 0, - "total": sum(total_tokens) if total_tokens else 0, - "per_llm_call": _calc_percentiles(total_tokens) if total_tokens else {"p50": 0.0, "p95": 0.0, "p99": 0.0}, - }, + "tokens": tokens_info, } @@ -86,7 +102,10 @@ def extract_trace_metadata(trace, extractor=None) -> dict[str, Any]: """Extract agent name, model, timing, and preview text from a trace.""" metadata: dict[str, Any] = { "agent_name": None, + "agent_id": None, "model": None, + "response_model": None, + "provider": None, "start_time": None, "user_input_preview": None, "final_output_preview": None, @@ -99,12 +118,19 @@ def extract_trace_metadata(trace, extractor=None) -> dict[str, Any]: if invocation_spans: first_inv = invocation_spans[0] metadata["agent_name"] = first_inv.get_tag(OTEL_GENAI_AGENT_NAME) + metadata["agent_id"] = first_inv.get_tag(OTEL_GENAI_AGENT_ID) metadata["start_time"] = first_inv.start_time llm_spans = extractor.find_llm_spans_in(first_inv) if llm_spans: metadata["model"] = llm_spans[0].get_tag(OTEL_GENAI_REQUEST_MODEL) + ext = extract_extended_model_info_from_attrs(llm_spans[0].tags) + if ext["response_model"]: + metadata["response_model"] = ext["response_model"] + if ext["provider"]: + metadata["provider"] = ext["provider"] + user_text = extract_user_text_from_attrs(llm_spans[0].tags) if user_text: metadata["user_input_preview"] = _truncate(user_text) @@ -123,4 +149,11 @@ def extract_trace_metadata(trace, extractor=None) -> dict[str, Any]: metadata["model"] = model break + if not metadata["provider"]: + for span in trace.all_spans: + ext = extract_extended_model_info_from_attrs(span.tags) + if ext["provider"]: + metadata["provider"] = ext["provider"] + break + return metadata diff --git a/ui/src/api/client.ts b/ui/src/api/client.ts index 42f05fa..931d08a 100644 --- a/ui/src/api/client.ts +++ b/ui/src/api/client.ts @@ -146,8 +146,11 @@ export async function evaluateTracesStreaming( conversionWarnings: [], performanceMetrics: eventData.performanceMetrics, agentName: tm.agentName, + agentId: tm.agentId, sessionId: tm.sessionName, model: tm.model, + responseModel: tm.responseModel, + provider: tm.provider, startTime: tm.startTime, userInputPreview: tm.userInputPreview, finalOutputPreview: tm.finalOutputPreview, diff --git a/ui/src/components/inspector/ComparisonPanel.tsx b/ui/src/components/inspector/ComparisonPanel.tsx index f2dee7f..0715370 100644 --- a/ui/src/components/inspector/ComparisonPanel.tsx +++ b/ui/src/components/inspector/ComparisonPanel.tsx @@ -5,6 +5,13 @@ import type { Invocation, MetricResult, PerformanceMetrics } from '../../lib/typ import { MetricsComparisonSection } from './MetricsComparisonSection'; import { PerformanceSection } from './PerformanceSection'; +interface TraceInfo { + provider?: string; + model?: string; + responseModel?: string; + agentName?: string; +} + interface ComparisonPanelProps { actualInvocation: Invocation | null; expectedInvocation: Invocation | null; @@ -13,6 +20,7 @@ interface ComparisonPanelProps { selectedMetrics: string[]; isEvaluating: boolean; performanceMetrics?: PerformanceMetrics; + traceInfo?: TraceInfo; allActualInvocations?: Invocation[]; allExpectedInvocations?: Invocation[]; } @@ -25,6 +33,7 @@ export const ComparisonPanel: React.FC = ({ selectedMetrics, isEvaluating, performanceMetrics, + traceInfo, allActualInvocations, allExpectedInvocations, }) => { @@ -60,7 +69,7 @@ export const ComparisonPanel: React.FC = ({
{performanceMetrics && (
- +
)} diff --git a/ui/src/components/inspector/InspectorView.tsx b/ui/src/components/inspector/InspectorView.tsx index 19b6d39..3576d72 100644 --- a/ui/src/components/inspector/InspectorView.tsx +++ b/ui/src/components/inspector/InspectorView.tsx @@ -30,6 +30,10 @@ export const InspectorView: React.FC = () => { metricResults: Array.from(tableRow.metricResults.values()), conversionWarnings: tableRow.conversionWarnings, performanceMetrics: tableRow.performanceMetrics, + agentName: tableRow.agentName, + model: tableRow.model, + responseModel: tableRow.responseModel, + provider: tableRow.provider, }; } return state.results.find(r => r.traceId === state.selectedTraceId); @@ -207,6 +211,12 @@ export const InspectorView: React.FC = () => { selectedMetrics={state.selectedMetrics} isEvaluating={state.isEvaluating} performanceMetrics={traceResult.performanceMetrics} + traceInfo={{ + provider: traceResult.provider, + model: traceResult.model, + responseModel: traceResult.responseModel, + agentName: traceResult.agentName, + }} allActualInvocations={invocations} allExpectedInvocations={expectedInvocations} /> diff --git a/ui/src/components/inspector/PerformanceSection.tsx b/ui/src/components/inspector/PerformanceSection.tsx index 396707a..1a98514 100644 --- a/ui/src/components/inspector/PerformanceSection.tsx +++ b/ui/src/components/inspector/PerformanceSection.tsx @@ -2,16 +2,27 @@ import React from 'react'; import { css } from '@emotion/react'; import type { PerformanceMetrics } from '../../lib/types'; +interface TraceInfo { + provider?: string; + model?: string; + responseModel?: string; + agentName?: string; +} + interface PerformanceSectionProps { metrics: PerformanceMetrics; + traceInfo?: TraceInfo; } -export const PerformanceSection: React.FC = ({ metrics }) => { +export const PerformanceSection: React.FC = ({ metrics, traceInfo }) => { if (!metrics || !metrics.latency || !metrics.tokens) { return null; } const { latency, tokens } = metrics; + const hasCacheTokens = (tokens.cacheCreationTokens && tokens.cacheCreationTokens > 0) + || (tokens.cacheReadTokens && tokens.cacheReadTokens > 0); + const hasTraceInfo = traceInfo && (traceInfo.provider || traceInfo.responseModel); return (
@@ -19,6 +30,22 @@ export const PerformanceSection: React.FC = ({ metrics + {hasTraceInfo && ( + <> + {traceInfo.provider && ( + + + + + )} + {traceInfo.responseModel && traceInfo.responseModel !== traceInfo.model && ( + + + + + )} + + )} @@ -39,6 +66,22 @@ export const PerformanceSection: React.FC = ({ metrics + {hasCacheTokens && ( + <> + {tokens.cacheReadTokens && tokens.cacheReadTokens > 0 && ( + + + + + )} + {tokens.cacheCreationTokens && tokens.cacheCreationTokens > 0 && ( + + + + + )} + + )}
Provider{traceInfo.provider}
Response Model{traceInfo.responseModel}
Overall Latency (p99) {latency.overall.p99.toFixed(0)} msTokens per LLM Call (p99) {tokens.perLlmCall.p99.toFixed(0)}
Cache Read Tokens{tokens.cacheReadTokens.toLocaleString()}
Cache Creation Tokens{tokens.cacheCreationTokens.toLocaleString()}
diff --git a/ui/src/components/streaming/SessionCard.tsx b/ui/src/components/streaming/SessionCard.tsx index 6cfb43a..7319bcb 100644 --- a/ui/src/components/streaming/SessionCard.tsx +++ b/ui/src/components/streaming/SessionCard.tsx @@ -13,6 +13,14 @@ interface Invocation { models?: string[]; inputTokens?: number; outputTokens?: number; + provider?: string; + responseModels?: string[]; + finishReasons?: string[]; + cacheCreationTokens?: number; + cacheReadTokens?: number; + temperature?: number; + maxTokens?: number; + errorTypes?: string[]; }; } @@ -92,6 +100,11 @@ export function SessionCard({ session, isSelected, onSelect, onRemove, evaluatio session.invocations?.[0]?.modelInfo?.models?.[0] || 'Unknown'; + const providerName = session.invocations?.[0]?.modelInfo?.provider || null; + + const totalCacheCreation = session.invocations?.reduce((sum, inv) => sum + (inv.modelInfo?.cacheCreationTokens || 0), 0) || 0; + const totalCacheRead = session.invocations?.reduce((sum, inv) => sum + (inv.modelInfo?.cacheReadTokens || 0), 0) || 0; + return (
+ {providerName && ( + + {providerName} + + )} + {session.invocations && session.invocations.length > 0 && ( )} + {(totalCacheCreation > 0 || totalCacheRead > 0) && ( + + cache {totalCacheRead > 0 ? `${totalCacheRead.toLocaleString()} read` : ''} + {totalCacheCreation > 0 && totalCacheRead > 0 ? ' / ' : ''} + {totalCacheCreation > 0 ? `${totalCacheCreation.toLocaleString()} created` : ''} + + )} + {queueNames && queueNames.length > 0 && queueNames.map(name => ( diff --git a/ui/src/components/streaming/SessionMetadata.tsx b/ui/src/components/streaming/SessionMetadata.tsx index a55dd43..0ff9308 100644 --- a/ui/src/components/streaming/SessionMetadata.tsx +++ b/ui/src/components/streaming/SessionMetadata.tsx @@ -5,6 +5,13 @@ interface SessionMetadataProps { metadata: Record; startedAt: string; status: 'active' | 'complete'; + invocations?: Array<{ + modelInfo?: { + provider?: string; + cacheCreationTokens?: number; + cacheReadTokens?: number; + }; + }>; }; liveStats: { totalInputTokens: number; @@ -12,8 +19,34 @@ interface SessionMetadataProps { }; } +function MetadataItem({ label, children }: { label: string; children: React.ReactNode }) { + return ( +
+
+ {label} +
+
+ {children} +
+
+ ); +} + export function SessionMetadata({ session, liveStats }: SessionMetadataProps) { const totalTokens = liveStats.totalInputTokens + liveStats.totalOutputTokens; + const provider = session.invocations?.[0]?.modelInfo?.provider; + const totalCacheCreation = session.invocations?.reduce((sum, inv) => sum + (inv.modelInfo?.cacheCreationTokens || 0), 0) || 0; + const totalCacheRead = session.invocations?.reduce((sum, inv) => sum + (inv.modelInfo?.cacheReadTokens || 0), 0) || 0; return (
{totalTokens > 0 && ( -
-
+ + {totalTokens.toLocaleString()} + + - Tokens -
-
- {totalTokens.toLocaleString()} - - (↓{liveStats.totalInputTokens.toLocaleString()} ↑{liveStats.totalOutputTokens.toLocaleString()}) - -
-
+ (↓{liveStats.totalInputTokens.toLocaleString()} ↑{liveStats.totalOutputTokens.toLocaleString()}) + + + )} + + {(totalCacheCreation > 0 || totalCacheRead > 0) && ( + + + {totalCacheRead > 0 && `${totalCacheRead.toLocaleString()} read`} + {totalCacheCreation > 0 && totalCacheRead > 0 && ' / '} + {totalCacheCreation > 0 && `${totalCacheCreation.toLocaleString()} created`} + + )} {Object.keys(session.metadata).length > 0 && Object.entries(session.metadata).map(([key, value]) => ( -
-
- {key} -
-
- {String(value)} -
-
+ {String(value)} ))}
); diff --git a/ui/src/context/TraceProvider.tsx b/ui/src/context/TraceProvider.tsx index 2ffdd22..7f18575 100644 --- a/ui/src/context/TraceProvider.tsx +++ b/ui/src/context/TraceProvider.tsx @@ -162,8 +162,11 @@ export const TraceProvider: React.FC = ({ children }) => { sessionId: partialResult.sessionId ?? existingRow?.sessionId ?? metadata?.sessionId, status: allMetricsComplete ? 'complete' : 'loading', agentName: partialResult.agentName ?? existingRow?.agentName ?? metadata?.agentName, + agentId: partialResult.agentId ?? existingRow?.agentId, startTime: partialResult.startTime ?? existingRow?.startTime ?? metadata?.startTime, model: partialResult.model ?? existingRow?.model ?? metadata?.model, + responseModel: partialResult.responseModel ?? existingRow?.responseModel, + provider: partialResult.provider ?? existingRow?.provider, userInputPreview: partialResult.userInputPreview ?? existingRow?.userInputPreview ?? metadata?.userInputPreview, finalOutputPreview: partialResult.finalOutputPreview ?? existingRow?.finalOutputPreview ?? metadata?.finalOutputPreview, invocations: metadata?.invocations, diff --git a/ui/src/lib/types.ts b/ui/src/lib/types.ts index eced01f..bc9a399 100644 --- a/ui/src/lib/types.ts +++ b/ui/src/lib/types.ts @@ -137,6 +137,8 @@ export interface PerformanceMetrics { totalOutput: number; total: number; perLlmCall: { p50: number; p95: number; p99: number }; + cacheCreationTokens?: number; + cacheReadTokens?: number; }; } @@ -148,7 +150,10 @@ export interface TraceResult { conversionWarnings: string[]; performanceMetrics?: PerformanceMetrics; agentName?: string; + agentId?: string; model?: string; + responseModel?: string; + provider?: string; startTime?: number; userInputPreview?: string; finalOutputPreview?: string; @@ -181,8 +186,11 @@ export interface TraceTableRow { sessionId?: string; status: TraceRowStatus; agentName?: string; + agentId?: string; startTime?: number; model?: string; + responseModel?: string; + provider?: string; userInputPreview?: string; finalOutputPreview?: string; metricResults: Map; diff --git a/uv.lock b/uv.lock index 8e8ebf3..39decd3 100644 --- a/uv.lock +++ b/uv.lock @@ -2253,7 +2253,7 @@ wheels = [ [[package]] name = "openai" -version = "2.21.0" +version = "2.30.0" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "anyio" }, @@ -2265,9 +2265,9 @@ dependencies = [ { name = "tqdm" }, { name = "typing-extensions" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/92/e5/3d197a0947a166649f566706d7a4c8f7fe38f1fa7b24c9bcffe4c7591d44/openai-2.21.0.tar.gz", hash = "sha256:81b48ce4b8bbb2cc3af02047ceb19561f7b1dc0d4e52d1de7f02abfd15aa59b7", size = 644374, upload-time = "2026-02-14T00:12:01.577Z" } +sdist = { url = "https://files.pythonhosted.org/packages/88/15/52580c8fbc16d0675d516e8749806eda679b16de1e4434ea06fb6feaa610/openai-2.30.0.tar.gz", hash = "sha256:92f7661c990bda4b22a941806c83eabe4896c3094465030dd882a71abe80c885", size = 676084, upload-time = "2026-03-25T22:08:59.96Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/cc/56/0a89092a453bb2c676d66abee44f863e742b2110d4dbb1dbcca3f7e5fc33/openai-2.21.0-py3-none-any.whl", hash = "sha256:0bc1c775e5b1536c294eded39ee08f8407656537ccc71b1004104fe1602e267c", size = 1103065, upload-time = "2026-02-14T00:11:59.603Z" }, + { url = "https://files.pythonhosted.org/packages/2a/9e/5bfa2270f902d5b92ab7d41ce0475b8630572e71e349b2a4996d14bdda93/openai-2.30.0-py3-none-any.whl", hash = "sha256:9a5ae616888eb2748ec5e0c5b955a51592e0b201a11f4262db920f2a78c5231d", size = 1146656, upload-time = "2026-03-25T22:08:58.2Z" }, ] [[package]] From 4458cda1e0ac5b07e04c7a7ea855d5491e54dd39 Mon Sep 17 00:00:00 2001 From: krisztianfekete Date: Tue, 7 Apr 2026 14:27:53 +0200 Subject: [PATCH 2/3] address feedback --- src/agentevals/streaming/ws_server.py | 19 +- tests/test_extraction.py | 206 ++++++++++++++++++ .../components/streaming/SessionMetadata.tsx | 4 + 3 files changed, 220 insertions(+), 9 deletions(-) diff --git a/src/agentevals/streaming/ws_server.py b/src/agentevals/streaming/ws_server.py index b2d4e26..0163245 100644 --- a/src/agentevals/streaming/ws_server.py +++ b/src/agentevals/streaming/ws_server.py @@ -801,7 +801,7 @@ def _extract_model_info_from_trace(self, trace: Trace, invocation_idx: int) -> d total_output_tokens = 0 total_cache_creation_tokens = 0 total_cache_read_tokens = 0 - providers: set[str] = set() + first_provider: str | None = None response_models: set[str] = set() finish_reasons: set[str] = set() error_types: set[str] = set() @@ -809,6 +809,7 @@ def _extract_model_info_from_trace(self, trace: Trace, invocation_idx: int) -> d first_max_tokens: int | None = None llm_spans = [s for s in trace.all_spans if is_llm_span(s) or "call_llm" in s.operation_name] + llm_spans.sort(key=lambda s: s.start_time) for span in llm_spans: in_toks, out_toks, model = extract_token_usage_from_attrs(span.tags) @@ -822,8 +823,8 @@ def _extract_model_info_from_trace(self, trace: Trace, invocation_idx: int) -> d total_output_tokens += out_toks ext = extract_extended_model_info_from_attrs(span.tags) - if ext["provider"]: - providers.add(ext["provider"]) + if first_provider is None and ext["provider"]: + first_provider = ext["provider"] if ext["response_model"]: response_models.add(ext["response_model"]) finish_reasons.update(ext["finish_reasons"]) @@ -837,17 +838,17 @@ def _extract_model_info_from_trace(self, trace: Trace, invocation_idx: int) -> d first_max_tokens = ext["max_tokens"] if models_used: - model_info["models"] = list(models_used) + model_info["models"] = sorted(models_used) if total_input_tokens > 0: model_info["inputTokens"] = total_input_tokens if total_output_tokens > 0: model_info["outputTokens"] = total_output_tokens - if providers: - model_info["provider"] = next(iter(providers)) + if first_provider: + model_info["provider"] = first_provider if response_models: - model_info["responseModels"] = list(response_models) + model_info["responseModels"] = sorted(response_models) if finish_reasons: - model_info["finishReasons"] = list(finish_reasons) + model_info["finishReasons"] = sorted(finish_reasons) if total_cache_creation_tokens > 0: model_info["cacheCreationTokens"] = total_cache_creation_tokens if total_cache_read_tokens > 0: @@ -857,7 +858,7 @@ def _extract_model_info_from_trace(self, trace: Trace, invocation_idx: int) -> d if first_max_tokens is not None: model_info["maxTokens"] = first_max_tokens if error_types: - model_info["errorTypes"] = list(error_types) + model_info["errorTypes"] = sorted(error_types) return model_info diff --git a/tests/test_extraction.py b/tests/test_extraction.py index 4c7fba9..ad1d63d 100644 --- a/tests/test_extraction.py +++ b/tests/test_extraction.py @@ -10,6 +10,7 @@ AdkExtractor, GenAIExtractor, extract_agent_response_from_attrs, + extract_extended_model_info_from_attrs, extract_token_usage_from_attrs, extract_tool_call_from_attrs, extract_user_text_from_attrs, @@ -26,14 +27,26 @@ ADK_LLM_RESPONSE, ADK_SCOPE_VALUE, ADK_TOOL_CALL_ARGS, + OTEL_ERROR_TYPE, OTEL_GENAI_AGENT_NAME, OTEL_GENAI_INPUT_MESSAGES, OTEL_GENAI_OP, OTEL_GENAI_OUTPUT_MESSAGES, + OTEL_GENAI_PROVIDER_NAME, + OTEL_GENAI_REQUEST_MAX_TOKENS, OTEL_GENAI_REQUEST_MODEL, + OTEL_GENAI_REQUEST_TEMPERATURE, + OTEL_GENAI_RESPONSE_FINISH_REASONS, + OTEL_GENAI_RESPONSE_ID, + OTEL_GENAI_RESPONSE_MODEL, + OTEL_GENAI_SYSTEM, OTEL_GENAI_TOOL_CALL_ARGUMENTS, OTEL_GENAI_TOOL_CALL_ID, + OTEL_GENAI_TOOL_DESCRIPTION, OTEL_GENAI_TOOL_NAME, + OTEL_GENAI_TOOL_TYPE, + OTEL_GENAI_USAGE_CACHE_CREATION_TOKENS, + OTEL_GENAI_USAGE_CACHE_READ_TOKENS, OTEL_GENAI_USAGE_INPUT_TOKENS, OTEL_GENAI_USAGE_OUTPUT_TOKENS, OTEL_SCOPE, @@ -522,3 +535,196 @@ def test_find_tool_spans_in(self): root = _span(op="agent_run", children=[child]) ext = GenAIExtractor() assert [s.span_id for s in ext.find_tool_spans_in(root)] == ["tool1"] + + +# --------------------------------------------------------------------------- +# extract_extended_model_info_from_attrs +# --------------------------------------------------------------------------- + + +class TestExtractExtendedModelInfo: + def test_provider_from_provider_name(self): + attrs = {OTEL_GENAI_PROVIDER_NAME: "openai"} + result = extract_extended_model_info_from_attrs(attrs) + assert result["provider"] == "openai" + + def test_provider_fallback_to_gen_ai_system(self): + attrs = {OTEL_GENAI_SYSTEM: "anthropic"} + result = extract_extended_model_info_from_attrs(attrs) + assert result["provider"] == "anthropic" + + def test_provider_name_takes_priority_over_system(self): + attrs = { + OTEL_GENAI_PROVIDER_NAME: "openai", + OTEL_GENAI_SYSTEM: "old_value", + } + result = extract_extended_model_info_from_attrs(attrs) + assert result["provider"] == "openai" + + def test_provider_none_when_absent(self): + result = extract_extended_model_info_from_attrs({}) + assert result["provider"] is None + + def test_response_model(self): + attrs = {OTEL_GENAI_RESPONSE_MODEL: "gpt-4o-2024-08-06"} + result = extract_extended_model_info_from_attrs(attrs) + assert result["response_model"] == "gpt-4o-2024-08-06" + + def test_request_model(self): + attrs = {OTEL_GENAI_REQUEST_MODEL: "gpt-4o"} + result = extract_extended_model_info_from_attrs(attrs) + assert result["request_model"] == "gpt-4o" + + def test_response_id(self): + attrs = {OTEL_GENAI_RESPONSE_ID: "chatcmpl-abc123"} + result = extract_extended_model_info_from_attrs(attrs) + assert result["response_id"] == "chatcmpl-abc123" + + def test_finish_reasons_from_list(self): + attrs = {OTEL_GENAI_RESPONSE_FINISH_REASONS: ["stop", "tool_calls"]} + result = extract_extended_model_info_from_attrs(attrs) + assert result["finish_reasons"] == ["stop", "tool_calls"] + + def test_finish_reasons_from_json_string(self): + attrs = {OTEL_GENAI_RESPONSE_FINISH_REASONS: '["stop"]'} + result = extract_extended_model_info_from_attrs(attrs) + assert result["finish_reasons"] == ["stop"] + + def test_finish_reasons_from_plain_string(self): + attrs = {OTEL_GENAI_RESPONSE_FINISH_REASONS: "stop"} + result = extract_extended_model_info_from_attrs(attrs) + assert result["finish_reasons"] == ["stop"] + + def test_finish_reasons_empty_when_absent(self): + result = extract_extended_model_info_from_attrs({}) + assert result["finish_reasons"] == [] + + def test_temperature_numeric(self): + attrs = {OTEL_GENAI_REQUEST_TEMPERATURE: 0.7} + result = extract_extended_model_info_from_attrs(attrs) + assert result["temperature"] == 0.7 + + def test_temperature_from_string(self): + attrs = {OTEL_GENAI_REQUEST_TEMPERATURE: "0.9"} + result = extract_extended_model_info_from_attrs(attrs) + assert result["temperature"] == 0.9 + + def test_temperature_invalid_returns_none(self): + attrs = {OTEL_GENAI_REQUEST_TEMPERATURE: "not_a_number"} + result = extract_extended_model_info_from_attrs(attrs) + assert result["temperature"] is None + + def test_max_tokens_numeric(self): + attrs = {OTEL_GENAI_REQUEST_MAX_TOKENS: 4096} + result = extract_extended_model_info_from_attrs(attrs) + assert result["max_tokens"] == 4096 + + def test_max_tokens_from_string(self): + attrs = {OTEL_GENAI_REQUEST_MAX_TOKENS: "2048"} + result = extract_extended_model_info_from_attrs(attrs) + assert result["max_tokens"] == 2048 + + def test_cache_creation_tokens(self): + attrs = {OTEL_GENAI_USAGE_CACHE_CREATION_TOKENS: 1500} + result = extract_extended_model_info_from_attrs(attrs) + assert result["cache_creation_tokens"] == 1500 + + def test_cache_read_tokens(self): + attrs = {OTEL_GENAI_USAGE_CACHE_READ_TOKENS: 3000} + result = extract_extended_model_info_from_attrs(attrs) + assert result["cache_read_tokens"] == 3000 + + def test_cache_tokens_default_to_zero(self): + result = extract_extended_model_info_from_attrs({}) + assert result["cache_creation_tokens"] == 0 + assert result["cache_read_tokens"] == 0 + + def test_cache_tokens_from_string(self): + attrs = { + OTEL_GENAI_USAGE_CACHE_CREATION_TOKENS: "500", + OTEL_GENAI_USAGE_CACHE_READ_TOKENS: "1000", + } + result = extract_extended_model_info_from_attrs(attrs) + assert result["cache_creation_tokens"] == 500 + assert result["cache_read_tokens"] == 1000 + + def test_error_type(self): + attrs = {OTEL_ERROR_TYPE: "timeout"} + result = extract_extended_model_info_from_attrs(attrs) + assert result["error_type"] == "timeout" + + def test_error_type_none_when_absent(self): + result = extract_extended_model_info_from_attrs({}) + assert result["error_type"] is None + + def test_full_attribute_set(self): + attrs = { + OTEL_GENAI_PROVIDER_NAME: "anthropic", + OTEL_GENAI_REQUEST_MODEL: "claude-sonnet-4-20250514", + OTEL_GENAI_RESPONSE_MODEL: "claude-sonnet-4-20250514", + OTEL_GENAI_RESPONSE_ID: "msg_abc", + OTEL_GENAI_RESPONSE_FINISH_REASONS: ["end_turn"], + OTEL_GENAI_REQUEST_TEMPERATURE: 1.0, + OTEL_GENAI_REQUEST_MAX_TOKENS: 8192, + OTEL_GENAI_USAGE_CACHE_CREATION_TOKENS: 2000, + OTEL_GENAI_USAGE_CACHE_READ_TOKENS: 5000, + OTEL_ERROR_TYPE: None, + } + result = extract_extended_model_info_from_attrs(attrs) + assert result["provider"] == "anthropic" + assert result["request_model"] == "claude-sonnet-4-20250514" + assert result["response_model"] == "claude-sonnet-4-20250514" + assert result["response_id"] == "msg_abc" + assert result["finish_reasons"] == ["end_turn"] + assert result["temperature"] == 1.0 + assert result["max_tokens"] == 8192 + assert result["cache_creation_tokens"] == 2000 + assert result["cache_read_tokens"] == 5000 + assert result["error_type"] is None + + +# --------------------------------------------------------------------------- +# extract_tool_call_from_attrs — tool type and description +# --------------------------------------------------------------------------- + + +class TestExtractToolCallTypeAndDescription: + def test_type_and_description_present(self): + attrs = { + OTEL_GENAI_TOOL_NAME: "search", + OTEL_GENAI_TOOL_CALL_ID: "tc1", + OTEL_GENAI_TOOL_CALL_ARGUMENTS: json.dumps({"q": "test"}), + OTEL_GENAI_TOOL_TYPE: "function", + OTEL_GENAI_TOOL_DESCRIPTION: "Search the web", + } + result = extract_tool_call_from_attrs(attrs) + assert result["name"] == "search" + assert result["type"] == "function" + assert result["description"] == "Search the web" + + def test_type_without_description(self): + attrs = { + OTEL_GENAI_TOOL_NAME: "retriever", + OTEL_GENAI_TOOL_TYPE: "datastore", + } + result = extract_tool_call_from_attrs(attrs) + assert result["type"] == "datastore" + assert "description" not in result + + def test_description_without_type(self): + attrs = { + OTEL_GENAI_TOOL_NAME: "calculator", + OTEL_GENAI_TOOL_DESCRIPTION: "Performs arithmetic", + } + result = extract_tool_call_from_attrs(attrs) + assert result["description"] == "Performs arithmetic" + assert "type" not in result + + def test_absent_type_and_description(self): + attrs = { + OTEL_GENAI_TOOL_NAME: "search", + OTEL_GENAI_TOOL_CALL_ID: "tc1", + } + result = extract_tool_call_from_attrs(attrs) + assert "type" not in result + assert "description" not in result diff --git a/ui/src/components/streaming/SessionMetadata.tsx b/ui/src/components/streaming/SessionMetadata.tsx index 0ff9308..64aa073 100644 --- a/ui/src/components/streaming/SessionMetadata.tsx +++ b/ui/src/components/streaming/SessionMetadata.tsx @@ -73,6 +73,10 @@ export function SessionMetadata({ session, liveStats }: SessionMetadataProps) { )} + {provider && ( + {provider} + )} + {(totalCacheCreation > 0 || totalCacheRead > 0) && ( From aeee10620428f2bdd50f1b5a1f69ea16c909192f Mon Sep 17 00:00:00 2001 From: krisztianfekete Date: Tue, 7 Apr 2026 17:46:31 +0200 Subject: [PATCH 3/3] address review feedback --- src/agentevals/extraction.py | 97 +++++++++++++++++------------------- 1 file changed, 47 insertions(+), 50 deletions(-) diff --git a/src/agentevals/extraction.py b/src/agentevals/extraction.py index 1b3dbec..d78b600 100644 --- a/src/agentevals/extraction.py +++ b/src/agentevals/extraction.py @@ -13,7 +13,7 @@ import json import logging -from typing import Any, Protocol +from typing import Any, Protocol, TypedDict, TypeVar from .loader.base import Span, Trace from .trace_attrs import ( @@ -151,64 +151,61 @@ def extract_token_usage_from_attrs( return 0, 0, model -def extract_extended_model_info_from_attrs(attrs: dict[str, Any]) -> dict[str, Any]: - """Extract extended model and provider metadata from span attributes. - - Returns a dict with provider info, response metadata, request parameters, - cache token usage, and error classification. Uses gen_ai.system as fallback - for provider when gen_ai.provider.name is absent (backward compat with - pre-v1.37.0 instrumentors). - """ - provider = attrs.get(OTEL_GENAI_PROVIDER_NAME) - if not provider: - provider = attrs.get(OTEL_GENAI_SYSTEM) - - finish_reasons_raw = attrs.get(OTEL_GENAI_RESPONSE_FINISH_REASONS) - finish_reasons: list[str] = [] - if isinstance(finish_reasons_raw, list): - finish_reasons = [str(r) for r in finish_reasons_raw] - elif isinstance(finish_reasons_raw, str): - parsed = parse_json(finish_reasons_raw) - if isinstance(parsed, list): - finish_reasons = [str(r) for r in parsed] - elif finish_reasons_raw: - finish_reasons = [finish_reasons_raw] +_T = TypeVar("_T", int, float) - temperature = attrs.get(OTEL_GENAI_REQUEST_TEMPERATURE) - if temperature is not None: - try: - temperature = float(temperature) - except (TypeError, ValueError): - temperature = None - - max_tokens = attrs.get(OTEL_GENAI_REQUEST_MAX_TOKENS) - if max_tokens is not None: - try: - max_tokens = int(max_tokens) - except (TypeError, ValueError): - max_tokens = None - cache_creation = attrs.get(OTEL_GENAI_USAGE_CACHE_CREATION_TOKENS, 0) - cache_read = attrs.get(OTEL_GENAI_USAGE_CACHE_READ_TOKENS, 0) +def _safe_cast(value: Any, target_type: type[_T], default: _T | None = None) -> _T | None: + """Try to cast *value* to *target_type*, returning *default* on failure.""" + if value is None: + return default try: - cache_creation = int(cache_creation) + return target_type(value) except (TypeError, ValueError): - cache_creation = 0 - try: - cache_read = int(cache_read) - except (TypeError, ValueError): - cache_read = 0 + return default + +def _parse_finish_reasons(raw: Any) -> list[str]: + """Parse finish reasons from a list, JSON string, or plain string.""" + if isinstance(raw, list): + return [str(r) for r in raw] + if isinstance(raw, str): + parsed = parse_json(raw) + if isinstance(parsed, list): + return [str(r) for r in parsed] + if raw: + return [raw] + return [] + + +class ExtendedModelInfo(TypedDict): + request_model: str | None + response_model: str | None + provider: str | None + finish_reasons: list[str] + response_id: str | None + temperature: float | None + max_tokens: int | None + cache_creation_tokens: int + cache_read_tokens: int + error_type: str | None + + +def extract_extended_model_info_from_attrs(attrs: dict[str, Any]) -> ExtendedModelInfo: + """Extract extended model and provider metadata from span attributes. + + Uses gen_ai.system as fallback for provider when gen_ai.provider.name is + absent (backward compat with pre-v1.37.0 instrumentors). + """ return { "request_model": attrs.get(OTEL_GENAI_REQUEST_MODEL), "response_model": attrs.get(OTEL_GENAI_RESPONSE_MODEL), - "provider": provider, - "finish_reasons": finish_reasons, + "provider": attrs.get(OTEL_GENAI_PROVIDER_NAME) or attrs.get(OTEL_GENAI_SYSTEM), + "finish_reasons": _parse_finish_reasons(attrs.get(OTEL_GENAI_RESPONSE_FINISH_REASONS)), "response_id": attrs.get(OTEL_GENAI_RESPONSE_ID), - "temperature": temperature, - "max_tokens": max_tokens, - "cache_creation_tokens": cache_creation, - "cache_read_tokens": cache_read, + "temperature": _safe_cast(attrs.get(OTEL_GENAI_REQUEST_TEMPERATURE), float), + "max_tokens": _safe_cast(attrs.get(OTEL_GENAI_REQUEST_MAX_TOKENS), int), + "cache_creation_tokens": _safe_cast(attrs.get(OTEL_GENAI_USAGE_CACHE_CREATION_TOKENS), int, 0), + "cache_read_tokens": _safe_cast(attrs.get(OTEL_GENAI_USAGE_CACHE_READ_TOKENS), int, 0), "error_type": attrs.get(OTEL_ERROR_TYPE), }