From 6e4e769e023ecc2394aef2b52d7c788c4ff198c3 Mon Sep 17 00:00:00 2001
From: krisztianfekete <git@krisztianfekete.org>
Date: Tue, 7 Apr 2026 14:12:26 +0200
Subject: [PATCH 1/3] support OTel gen_ai semconv v1.40

---
 .../openai-agents/requirements.txt            |  3 +-
 .../zero-code-examples/openai-agents/run.py   |  1 +
 src/agentevals/api/models.py                  |  3 +
 src/agentevals/extraction.py                  | 86 ++++++++++++++++-
 src/agentevals/streaming/ws_server.py         | 46 ++++++++-
 src/agentevals/trace_attrs.py                 | 39 ++++++++
 src/agentevals/trace_metrics.py               | 47 +++++++--
 ui/src/api/client.ts                          |  3 +
 .../components/inspector/ComparisonPanel.tsx  | 11 ++-
 ui/src/components/inspector/InspectorView.tsx | 10 ++
 .../inspector/PerformanceSection.tsx          | 45 ++++++++-
 ui/src/components/streaming/SessionCard.tsx   | 42 ++++++++
 .../components/streaming/SessionMetadata.tsx  | 95 +++++++++++--------
 ui/src/context/TraceProvider.tsx              |  3 +
 ui/src/lib/types.ts                           |  8 ++
 uv.lock                                       |  6 +-
 16 files changed, 392 insertions(+), 56 deletions(-)

diff --git a/examples/zero-code-examples/openai-agents/requirements.txt b/examples/zero-code-examples/openai-agents/requirements.txt
index b3bc37f..7b968e9 100644
--- a/examples/zero-code-examples/openai-agents/requirements.txt
+++ b/examples/zero-code-examples/openai-agents/requirements.txt
@@ -1,4 +1,5 @@
-openai-agents>=0.3.3
+openai>=2.30.0
+openai-agents>=0.13.0
 opentelemetry-instrumentation-openai-agents-v2>=0.1.0
 
 opentelemetry-sdk>=1.36.0
diff --git a/examples/zero-code-examples/openai-agents/run.py b/examples/zero-code-examples/openai-agents/run.py
index 6618159..ca4b0d7 100644
--- a/examples/zero-code-examples/openai-agents/run.py
+++ b/examples/zero-code-examples/openai-agents/run.py
@@ -56,6 +56,7 @@ def main():
     print(f"OTLP endpoint: {endpoint}")
 
     os.environ.setdefault("OTEL_INSTRUMENTATION_GENAI_CAPTURE_MESSAGE_CONTENT", "span_and_event")
+    os.environ.setdefault("OTEL_SEMCONV_STABILITY_OPT_IN", "gen_ai_latest_experimental")
 
     os.environ.setdefault(
         "OTEL_RESOURCE_ATTRIBUTES",
diff --git a/src/agentevals/api/models.py b/src/agentevals/api/models.py
index 4b3aebc..1e6a1a7 100644
--- a/src/agentevals/api/models.py
+++ b/src/agentevals/api/models.py
@@ -113,7 +113,10 @@ class DebugLoadData(CamelModel):
 
 class TraceConversionMetadata(CamelModel):
     agent_name: str | None = None
+    agent_id: str | None = None
     model: str | None = None
+    response_model: str | None = None
+    provider: str | None = None
     start_time: int | None = None
     user_input_preview: str | None = None
     final_output_preview: str | None = None
diff --git a/src/agentevals/extraction.py b/src/agentevals/extraction.py
index 7b0c328..1b3dbec 100644
--- a/src/agentevals/extraction.py
+++ b/src/agentevals/extraction.py
@@ -22,14 +22,26 @@
     ADK_SCOPE_VALUE,
     ADK_TOOL_CALL_ARGS,
     ADK_TOOL_RESPONSE,
+    OTEL_ERROR_TYPE,
     OTEL_GENAI_INPUT_MESSAGES,
     OTEL_GENAI_OP,
     OTEL_GENAI_OUTPUT_MESSAGES,
+    OTEL_GENAI_PROVIDER_NAME,
+    OTEL_GENAI_REQUEST_MAX_TOKENS,
     OTEL_GENAI_REQUEST_MODEL,
+    OTEL_GENAI_REQUEST_TEMPERATURE,
+    OTEL_GENAI_RESPONSE_FINISH_REASONS,
+    OTEL_GENAI_RESPONSE_ID,
+    OTEL_GENAI_RESPONSE_MODEL,
+    OTEL_GENAI_SYSTEM,
     OTEL_GENAI_TOOL_CALL_ARGUMENTS,
     OTEL_GENAI_TOOL_CALL_ID,
     OTEL_GENAI_TOOL_CALL_RESULT,
+    OTEL_GENAI_TOOL_DESCRIPTION,
     OTEL_GENAI_TOOL_NAME,
+    OTEL_GENAI_TOOL_TYPE,
+    OTEL_GENAI_USAGE_CACHE_CREATION_TOKENS,
+    OTEL_GENAI_USAGE_CACHE_READ_TOKENS,
     OTEL_GENAI_USAGE_INPUT_TOKENS,
     OTEL_GENAI_USAGE_OUTPUT_TOKENS,
     OTEL_SCOPE,
@@ -139,6 +151,68 @@ def extract_token_usage_from_attrs(
     return 0, 0, model
 
 
+def extract_extended_model_info_from_attrs(attrs: dict[str, Any]) -> dict[str, Any]:
+    """Extract extended model and provider metadata from span attributes.
+
+    Returns a dict with provider info, response metadata, request parameters,
+    cache token usage, and error classification. Uses gen_ai.system as fallback
+    for provider when gen_ai.provider.name is absent (backward compat with
+    pre-v1.37.0 instrumentors).
+    """
+    provider = attrs.get(OTEL_GENAI_PROVIDER_NAME)
+    if not provider:
+        provider = attrs.get(OTEL_GENAI_SYSTEM)
+
+    finish_reasons_raw = attrs.get(OTEL_GENAI_RESPONSE_FINISH_REASONS)
+    finish_reasons: list[str] = []
+    if isinstance(finish_reasons_raw, list):
+        finish_reasons = [str(r) for r in finish_reasons_raw]
+    elif isinstance(finish_reasons_raw, str):
+        parsed = parse_json(finish_reasons_raw)
+        if isinstance(parsed, list):
+            finish_reasons = [str(r) for r in parsed]
+        elif finish_reasons_raw:
+            finish_reasons = [finish_reasons_raw]
+
+    temperature = attrs.get(OTEL_GENAI_REQUEST_TEMPERATURE)
+    if temperature is not None:
+        try:
+            temperature = float(temperature)
+        except (TypeError, ValueError):
+            temperature = None
+
+    max_tokens = attrs.get(OTEL_GENAI_REQUEST_MAX_TOKENS)
+    if max_tokens is not None:
+        try:
+            max_tokens = int(max_tokens)
+        except (TypeError, ValueError):
+            max_tokens = None
+
+    cache_creation = attrs.get(OTEL_GENAI_USAGE_CACHE_CREATION_TOKENS, 0)
+    cache_read = attrs.get(OTEL_GENAI_USAGE_CACHE_READ_TOKENS, 0)
+    try:
+        cache_creation = int(cache_creation)
+    except (TypeError, ValueError):
+        cache_creation = 0
+    try:
+        cache_read = int(cache_read)
+    except (TypeError, ValueError):
+        cache_read = 0
+
+    return {
+        "request_model": attrs.get(OTEL_GENAI_REQUEST_MODEL),
+        "response_model": attrs.get(OTEL_GENAI_RESPONSE_MODEL),
+        "provider": provider,
+        "finish_reasons": finish_reasons,
+        "response_id": attrs.get(OTEL_GENAI_RESPONSE_ID),
+        "temperature": temperature,
+        "max_tokens": max_tokens,
+        "cache_creation_tokens": cache_creation,
+        "cache_read_tokens": cache_read,
+        "error_type": attrs.get(OTEL_ERROR_TYPE),
+    }
+
+
 def extract_tool_call_from_attrs(
     attrs: dict[str, Any], operation_name: str = "", span_id: str = ""
 ) -> dict[str, Any] | None:
@@ -171,7 +245,17 @@ def extract_tool_call_from_attrs(
             if fallback_id:
                 tool_call_id = fallback_id
 
-    return {"id": tool_call_id, "name": tool_name, "args": args}
+    result: dict[str, Any] = {"id": tool_call_id, "name": tool_name, "args": args}
+
+    tool_type = attrs.get(OTEL_GENAI_TOOL_TYPE)
+    if tool_type:
+        result["type"] = tool_type
+
+    tool_description = attrs.get(OTEL_GENAI_TOOL_DESCRIPTION)
+    if tool_description:
+        result["description"] = tool_description
+
+    return result
 
 
 def parse_tool_response_content(content: Any) -> dict:
diff --git a/src/agentevals/streaming/ws_server.py b/src/agentevals/streaming/ws_server.py
index 5b90b86..b2d4e26 100644
--- a/src/agentevals/streaming/ws_server.py
+++ b/src/agentevals/streaming/ws_server.py
@@ -20,7 +20,12 @@
     WSSpanReceivedEvent,
 )
 from ..converter import convert_traces
-from ..extraction import extract_token_usage_from_attrs, is_llm_span, parse_tool_response_content
+from ..extraction import (
+    extract_extended_model_info_from_attrs,
+    extract_token_usage_from_attrs,
+    is_llm_span,
+    parse_tool_response_content,
+)
 from ..loader.base import Trace
 from ..loader.otlp import OtlpJsonLoader
 from ..trace_attrs import OTEL_GENAI_INPUT_MESSAGES, OTEL_GENAI_REQUEST_MODEL
@@ -794,6 +799,14 @@ def _extract_model_info_from_trace(self, trace: Trace, invocation_idx: int) -> d
         models_used: set[str] = set()
         total_input_tokens = 0
         total_output_tokens = 0
+        total_cache_creation_tokens = 0
+        total_cache_read_tokens = 0
+        providers: set[str] = set()
+        response_models: set[str] = set()
+        finish_reasons: set[str] = set()
+        error_types: set[str] = set()
+        first_temperature: float | None = None
+        first_max_tokens: int | None = None
 
         llm_spans = [s for s in trace.all_spans if is_llm_span(s) or "call_llm" in s.operation_name]
 
@@ -808,12 +821,43 @@ def _extract_model_info_from_trace(self, trace: Trace, invocation_idx: int) -> d
             total_input_tokens += in_toks
             total_output_tokens += out_toks
 
+            ext = extract_extended_model_info_from_attrs(span.tags)
+            if ext["provider"]:
+                providers.add(ext["provider"])
+            if ext["response_model"]:
+                response_models.add(ext["response_model"])
+            finish_reasons.update(ext["finish_reasons"])
+            total_cache_creation_tokens += ext["cache_creation_tokens"]
+            total_cache_read_tokens += ext["cache_read_tokens"]
+            if ext["error_type"]:
+                error_types.add(ext["error_type"])
+            if first_temperature is None and ext["temperature"] is not None:
+                first_temperature = ext["temperature"]
+            if first_max_tokens is None and ext["max_tokens"] is not None:
+                first_max_tokens = ext["max_tokens"]
+
         if models_used:
             model_info["models"] = list(models_used)
         if total_input_tokens > 0:
             model_info["inputTokens"] = total_input_tokens
         if total_output_tokens > 0:
             model_info["outputTokens"] = total_output_tokens
+        if providers:
+            model_info["provider"] = next(iter(providers))
+        if response_models:
+            model_info["responseModels"] = list(response_models)
+        if finish_reasons:
+            model_info["finishReasons"] = list(finish_reasons)
+        if total_cache_creation_tokens > 0:
+            model_info["cacheCreationTokens"] = total_cache_creation_tokens
+        if total_cache_read_tokens > 0:
+            model_info["cacheReadTokens"] = total_cache_read_tokens
+        if first_temperature is not None:
+            model_info["temperature"] = first_temperature
+        if first_max_tokens is not None:
+            model_info["maxTokens"] = first_max_tokens
+        if error_types:
+            model_info["errorTypes"] = list(error_types)
 
         return model_info
 
diff --git a/src/agentevals/trace_attrs.py b/src/agentevals/trace_attrs.py
index 37ea351..5aedc88 100644
--- a/src/agentevals/trace_attrs.py
+++ b/src/agentevals/trace_attrs.py
@@ -2,6 +2,8 @@
 
 Single source of truth for all attribute names used across the converter,
 extraction, streaming, and runner modules.
+
+Covers OTel GenAI semantic conventions up to v1.40.0.
 """
 
 # OTel scope
@@ -25,6 +27,43 @@
 OTEL_GENAI_TOOL_CALL_RESULT = "gen_ai.tool.call.result"
 OTEL_GENAI_CONVERSATION_ID = "gen_ai.conversation.id"
 
+# Provider and response metadata (v1.37.0+)
+OTEL_GENAI_PROVIDER_NAME = "gen_ai.provider.name"
+OTEL_GENAI_RESPONSE_MODEL = "gen_ai.response.model"
+OTEL_GENAI_RESPONSE_ID = "gen_ai.response.id"
+OTEL_GENAI_RESPONSE_FINISH_REASONS = "gen_ai.response.finish_reasons"
+
+# Deprecated provider attribute (pre-v1.37.0, renamed to gen_ai.provider.name)
+OTEL_GENAI_SYSTEM = "gen_ai.system"
+
+# Agent identity (v1.31.0+)
+OTEL_GENAI_AGENT_ID = "gen_ai.agent.id"
+OTEL_GENAI_AGENT_DESCRIPTION = "gen_ai.agent.description"
+
+# Tool metadata (v1.31.0+)
+OTEL_GENAI_TOOL_DESCRIPTION = "gen_ai.tool.description"
+OTEL_GENAI_TOOL_TYPE = "gen_ai.tool.type"
+
+# Error classification
+OTEL_ERROR_TYPE = "error.type"
+
+# Request parameters
+OTEL_GENAI_REQUEST_TEMPERATURE = "gen_ai.request.temperature"
+OTEL_GENAI_REQUEST_MAX_TOKENS = "gen_ai.request.max_tokens"
+OTEL_GENAI_REQUEST_TOP_P = "gen_ai.request.top_p"
+OTEL_GENAI_REQUEST_TOP_K = "gen_ai.request.top_k"
+
+# Cache token usage (Anthropic/OpenAI prompt caching)
+OTEL_GENAI_USAGE_CACHE_CREATION_TOKENS = "gen_ai.usage.cache_creation.input_tokens"
+OTEL_GENAI_USAGE_CACHE_READ_TOKENS = "gen_ai.usage.cache_read.input_tokens"
+
+# System/tool definitions (opt-in, v1.37.0+)
+OTEL_GENAI_SYSTEM_INSTRUCTIONS = "gen_ai.system_instructions"
+OTEL_GENAI_TOOL_DEFINITIONS = "gen_ai.tool.definitions"
+
+# Output type
+OTEL_GENAI_OUTPUT_TYPE = "gen_ai.output.type"
+
 # ADK-specific custom attributes (gcp.vertex.agent.*)
 ADK_LLM_REQUEST = "gcp.vertex.agent.llm_request"
 ADK_LLM_RESPONSE = "gcp.vertex.agent.llm_response"
diff --git a/src/agentevals/trace_metrics.py b/src/agentevals/trace_metrics.py
index 45c208a..ef1a043 100644
--- a/src/agentevals/trace_metrics.py
+++ b/src/agentevals/trace_metrics.py
@@ -6,11 +6,16 @@
 
 from .extraction import (
     extract_agent_response_from_attrs,
+    extract_extended_model_info_from_attrs,
     extract_token_usage_from_attrs,
     extract_user_text_from_attrs,
     get_extractor,
 )
-from .trace_attrs import OTEL_GENAI_AGENT_NAME, OTEL_GENAI_REQUEST_MODEL
+from .trace_attrs import (
+    OTEL_GENAI_AGENT_ID,
+    OTEL_GENAI_AGENT_NAME,
+    OTEL_GENAI_REQUEST_MODEL,
+)
 
 
 def _truncate(text: str, max_length: int = 200) -> str:
@@ -41,6 +46,8 @@ def extract_performance_metrics(trace, extractor=None) -> dict[str, Any]:
     prompt_tokens = []
     output_tokens = []
     total_tokens = []
+    cache_creation_tokens_total = 0
+    cache_read_tokens_total = 0
 
     if extractor is None:
         extractor = get_extractor(trace)
@@ -64,21 +71,30 @@ def extract_performance_metrics(trace, extractor=None) -> dict[str, Any]:
                 prompt_tokens.append(in_toks)
                 output_tokens.append(out_toks)
                 total_tokens.append(in_toks + out_toks)
+            ext = extract_extended_model_info_from_attrs(span.tags)
+            cache_creation_tokens_total += ext["cache_creation_tokens"]
+            cache_read_tokens_total += ext["cache_read_tokens"]
         elif role == "tool":
             tool_latencies.append(duration_ms)
 
+    tokens_info: dict[str, Any] = {
+        "total_prompt": sum(prompt_tokens) if prompt_tokens else 0,
+        "total_output": sum(output_tokens) if output_tokens else 0,
+        "total": sum(total_tokens) if total_tokens else 0,
+        "per_llm_call": _calc_percentiles(total_tokens) if total_tokens else {"p50": 0.0, "p95": 0.0, "p99": 0.0},
+    }
+    if cache_creation_tokens_total > 0:
+        tokens_info["cache_creation_tokens"] = cache_creation_tokens_total
+    if cache_read_tokens_total > 0:
+        tokens_info["cache_read_tokens"] = cache_read_tokens_total
+
     return {
         "latency": {
             "overall": _calc_percentiles(agent_latencies),
             "llm_calls": _calc_percentiles(llm_latencies),
             "tool_executions": _calc_percentiles(tool_latencies),
         },
-        "tokens": {
-            "total_prompt": sum(prompt_tokens) if prompt_tokens else 0,
-            "total_output": sum(output_tokens) if output_tokens else 0,
-            "total": sum(total_tokens) if total_tokens else 0,
-            "per_llm_call": _calc_percentiles(total_tokens) if total_tokens else {"p50": 0.0, "p95": 0.0, "p99": 0.0},
-        },
+        "tokens": tokens_info,
     }
 
 
@@ -86,7 +102,10 @@ def extract_trace_metadata(trace, extractor=None) -> dict[str, Any]:
     """Extract agent name, model, timing, and preview text from a trace."""
     metadata: dict[str, Any] = {
         "agent_name": None,
+        "agent_id": None,
         "model": None,
+        "response_model": None,
+        "provider": None,
         "start_time": None,
         "user_input_preview": None,
         "final_output_preview": None,
@@ -99,12 +118,19 @@ def extract_trace_metadata(trace, extractor=None) -> dict[str, Any]:
     if invocation_spans:
         first_inv = invocation_spans[0]
         metadata["agent_name"] = first_inv.get_tag(OTEL_GENAI_AGENT_NAME)
+        metadata["agent_id"] = first_inv.get_tag(OTEL_GENAI_AGENT_ID)
         metadata["start_time"] = first_inv.start_time
 
         llm_spans = extractor.find_llm_spans_in(first_inv)
         if llm_spans:
             metadata["model"] = llm_spans[0].get_tag(OTEL_GENAI_REQUEST_MODEL)
 
+            ext = extract_extended_model_info_from_attrs(llm_spans[0].tags)
+            if ext["response_model"]:
+                metadata["response_model"] = ext["response_model"]
+            if ext["provider"]:
+                metadata["provider"] = ext["provider"]
+
             user_text = extract_user_text_from_attrs(llm_spans[0].tags)
             if user_text:
                 metadata["user_input_preview"] = _truncate(user_text)
@@ -123,4 +149,11 @@ def extract_trace_metadata(trace, extractor=None) -> dict[str, Any]:
                 metadata["model"] = model
                 break
 
+    if not metadata["provider"]:
+        for span in trace.all_spans:
+            ext = extract_extended_model_info_from_attrs(span.tags)
+            if ext["provider"]:
+                metadata["provider"] = ext["provider"]
+                break
+
     return metadata
diff --git a/ui/src/api/client.ts b/ui/src/api/client.ts
index 42f05fa..931d08a 100644
--- a/ui/src/api/client.ts
+++ b/ui/src/api/client.ts
@@ -146,8 +146,11 @@ export async function evaluateTracesStreaming(
               conversionWarnings: [],
               performanceMetrics: eventData.performanceMetrics,
               agentName: tm.agentName,
+              agentId: tm.agentId,
               sessionId: tm.sessionName,
               model: tm.model,
+              responseModel: tm.responseModel,
+              provider: tm.provider,
               startTime: tm.startTime,
               userInputPreview: tm.userInputPreview,
               finalOutputPreview: tm.finalOutputPreview,
diff --git a/ui/src/components/inspector/ComparisonPanel.tsx b/ui/src/components/inspector/ComparisonPanel.tsx
index f2dee7f..0715370 100644
--- a/ui/src/components/inspector/ComparisonPanel.tsx
+++ b/ui/src/components/inspector/ComparisonPanel.tsx
@@ -5,6 +5,13 @@ import type { Invocation, MetricResult, PerformanceMetrics } from '../../lib/typ
 import { MetricsComparisonSection } from './MetricsComparisonSection';
 import { PerformanceSection } from './PerformanceSection';
 
+interface TraceInfo {
+  provider?: string;
+  model?: string;
+  responseModel?: string;
+  agentName?: string;
+}
+
 interface ComparisonPanelProps {
   actualInvocation: Invocation | null;
   expectedInvocation: Invocation | null;
@@ -13,6 +20,7 @@ interface ComparisonPanelProps {
   selectedMetrics: string[];
   isEvaluating: boolean;
   performanceMetrics?: PerformanceMetrics;
+  traceInfo?: TraceInfo;
   allActualInvocations?: Invocation[];
   allExpectedInvocations?: Invocation[];
 }
@@ -25,6 +33,7 @@ export const ComparisonPanel: React.FC<ComparisonPanelProps> = ({
   selectedMetrics,
   isEvaluating,
   performanceMetrics,
+  traceInfo,
   allActualInvocations,
   allExpectedInvocations,
 }) => {
@@ -60,7 +69,7 @@ export const ComparisonPanel: React.FC<ComparisonPanelProps> = ({
       <div css={panelContentStyles}>
         {performanceMetrics && (
           <div css={performanceSectionContainerStyles}>
-            <PerformanceSection metrics={performanceMetrics} />
+            <PerformanceSection metrics={performanceMetrics} traceInfo={traceInfo} />
           </div>
         )}
 
diff --git a/ui/src/components/inspector/InspectorView.tsx b/ui/src/components/inspector/InspectorView.tsx
index 19b6d39..3576d72 100644
--- a/ui/src/components/inspector/InspectorView.tsx
+++ b/ui/src/components/inspector/InspectorView.tsx
@@ -30,6 +30,10 @@ export const InspectorView: React.FC = () => {
         metricResults: Array.from(tableRow.metricResults.values()),
         conversionWarnings: tableRow.conversionWarnings,
         performanceMetrics: tableRow.performanceMetrics,
+        agentName: tableRow.agentName,
+        model: tableRow.model,
+        responseModel: tableRow.responseModel,
+        provider: tableRow.provider,
       };
     }
     return state.results.find(r => r.traceId === state.selectedTraceId);
@@ -207,6 +211,12 @@ export const InspectorView: React.FC = () => {
       selectedMetrics={state.selectedMetrics}
       isEvaluating={state.isEvaluating}
       performanceMetrics={traceResult.performanceMetrics}
+      traceInfo={{
+        provider: traceResult.provider,
+        model: traceResult.model,
+        responseModel: traceResult.responseModel,
+        agentName: traceResult.agentName,
+      }}
       allActualInvocations={invocations}
       allExpectedInvocations={expectedInvocations}
     />
diff --git a/ui/src/components/inspector/PerformanceSection.tsx b/ui/src/components/inspector/PerformanceSection.tsx
index 396707a..1a98514 100644
--- a/ui/src/components/inspector/PerformanceSection.tsx
+++ b/ui/src/components/inspector/PerformanceSection.tsx
@@ -2,16 +2,27 @@ import React from 'react';
 import { css } from '@emotion/react';
 import type { PerformanceMetrics } from '../../lib/types';
 
+interface TraceInfo {
+  provider?: string;
+  model?: string;
+  responseModel?: string;
+  agentName?: string;
+}
+
 interface PerformanceSectionProps {
   metrics: PerformanceMetrics;
+  traceInfo?: TraceInfo;
 }
 
-export const PerformanceSection: React.FC<PerformanceSectionProps> = ({ metrics }) => {
+export const PerformanceSection: React.FC<PerformanceSectionProps> = ({ metrics, traceInfo }) => {
   if (!metrics || !metrics.latency || !metrics.tokens) {
     return null;
   }
 
   const { latency, tokens } = metrics;
+  const hasCacheTokens = (tokens.cacheCreationTokens && tokens.cacheCreationTokens > 0)
+    || (tokens.cacheReadTokens && tokens.cacheReadTokens > 0);
+  const hasTraceInfo = traceInfo && (traceInfo.provider || traceInfo.responseModel);
 
   return (
     <div css={sectionStyle}>
@@ -19,6 +30,22 @@ export const PerformanceSection: React.FC<PerformanceSectionProps> = ({ metrics
 
       <table>
         <tbody>
+          {hasTraceInfo && (
+            <>
+              {traceInfo.provider && (
+                <tr>
+                  <td>Provider</td>
+                  <td>{traceInfo.provider}</td>
+                </tr>
+              )}
+              {traceInfo.responseModel && traceInfo.responseModel !== traceInfo.model && (
+                <tr>
+                  <td>Response Model</td>
+                  <td>{traceInfo.responseModel}</td>
+                </tr>
+              )}
+            </>
+          )}
           <tr>
             <td>Overall Latency (p99)</td>
             <td>{latency.overall.p99.toFixed(0)} ms</td>
@@ -39,6 +66,22 @@ export const PerformanceSection: React.FC<PerformanceSectionProps> = ({ metrics
             <td>Tokens per LLM Call (p99)</td>
             <td>{tokens.perLlmCall.p99.toFixed(0)}</td>
           </tr>
+          {hasCacheTokens && (
+            <>
+              {tokens.cacheReadTokens && tokens.cacheReadTokens > 0 && (
+                <tr>
+                  <td>Cache Read Tokens</td>
+                  <td>{tokens.cacheReadTokens.toLocaleString()}</td>
+                </tr>
+              )}
+              {tokens.cacheCreationTokens && tokens.cacheCreationTokens > 0 && (
+                <tr>
+                  <td>Cache Creation Tokens</td>
+                  <td>{tokens.cacheCreationTokens.toLocaleString()}</td>
+                </tr>
+              )}
+            </>
+          )}
         </tbody>
       </table>
     </div>
diff --git a/ui/src/components/streaming/SessionCard.tsx b/ui/src/components/streaming/SessionCard.tsx
index 6cfb43a..7319bcb 100644
--- a/ui/src/components/streaming/SessionCard.tsx
+++ b/ui/src/components/streaming/SessionCard.tsx
@@ -13,6 +13,14 @@ interface Invocation {
     models?: string[];
     inputTokens?: number;
     outputTokens?: number;
+    provider?: string;
+    responseModels?: string[];
+    finishReasons?: string[];
+    cacheCreationTokens?: number;
+    cacheReadTokens?: number;
+    temperature?: number;
+    maxTokens?: number;
+    errorTypes?: string[];
   };
 }
 
@@ -92,6 +100,11 @@ export function SessionCard({ session, isSelected, onSelect, onRemove, evaluatio
     session.invocations?.[0]?.modelInfo?.models?.[0] ||
     'Unknown';
 
+  const providerName = session.invocations?.[0]?.modelInfo?.provider || null;
+
+  const totalCacheCreation = session.invocations?.reduce((sum, inv) => sum + (inv.modelInfo?.cacheCreationTokens || 0), 0) || 0;
+  const totalCacheRead = session.invocations?.reduce((sum, inv) => sum + (inv.modelInfo?.cacheReadTokens || 0), 0) || 0;
+
   return (
     <div
       style={{
@@ -124,6 +137,19 @@ export function SessionCard({ session, isSelected, onSelect, onRemove, evaluatio
               {modelName}
             </span>
 
+            {providerName && (
+              <span style={{
+                fontSize: '11px',
+                fontWeight: 600,
+                color: '#3b82f6',
+                background: 'rgba(59, 130, 246, 0.1)',
+                padding: '4px 10px',
+                borderRadius: '6px',
+              }}>
+                {providerName}
+              </span>
+            )}
+
             {session.invocations && session.invocations.length > 0 && (
               <span style={{
                 fontSize: '11px',
@@ -150,6 +176,21 @@ export function SessionCard({ session, isSelected, onSelect, onRemove, evaluatio
               </span>
             )}
 
+            {(totalCacheCreation > 0 || totalCacheRead > 0) && (
+              <span style={{
+                fontSize: '11px',
+                fontWeight: 600,
+                color: '#f59e0b',
+                background: 'rgba(245, 158, 11, 0.1)',
+                padding: '4px 10px',
+                borderRadius: '6px',
+              }}>
+                cache {totalCacheRead > 0 ? `${totalCacheRead.toLocaleString()} read` : ''}
+                {totalCacheCreation > 0 && totalCacheRead > 0 ? ' / ' : ''}
+                {totalCacheCreation > 0 ? `${totalCacheCreation.toLocaleString()} created` : ''}
+              </span>
+            )}
+
             {queueNames && queueNames.length > 0 && queueNames.map(name => (
               <span key={name} style={{
                 fontSize: '11px',
@@ -408,6 +449,7 @@ export function SessionCard({ session, isSelected, onSelect, onRemove, evaluatio
             metadata: session.metadata,
             startedAt: session.startedAt,
             status: session.status,
+            invocations: session.invocations,
           }}
           liveStats={liveStats}
         />
diff --git a/ui/src/components/streaming/SessionMetadata.tsx b/ui/src/components/streaming/SessionMetadata.tsx
index a55dd43..0ff9308 100644
--- a/ui/src/components/streaming/SessionMetadata.tsx
+++ b/ui/src/components/streaming/SessionMetadata.tsx
@@ -5,6 +5,13 @@ interface SessionMetadataProps {
     metadata: Record<string, any>;
     startedAt: string;
     status: 'active' | 'complete';
+    invocations?: Array<{
+      modelInfo?: {
+        provider?: string;
+        cacheCreationTokens?: number;
+        cacheReadTokens?: number;
+      };
+    }>;
   };
   liveStats: {
     totalInputTokens: number;
@@ -12,8 +19,34 @@ interface SessionMetadataProps {
   };
 }
 
+function MetadataItem({ label, children }: { label: string; children: React.ReactNode }) {
+  return (
+    <div>
+      <div style={{
+        fontSize: '10px',
+        color: 'var(--text-tertiary)',
+        marginBottom: '4px',
+        fontWeight: 600,
+        textTransform: 'uppercase' as const,
+      }}>
+        {label}
+      </div>
+      <div style={{
+        fontSize: '14px',
+        fontWeight: 600,
+        color: 'var(--text-primary)',
+      }}>
+        {children}
+      </div>
+    </div>
+  );
+}
+
 export function SessionMetadata({ session, liveStats }: SessionMetadataProps) {
   const totalTokens = liveStats.totalInputTokens + liveStats.totalOutputTokens;
+  const provider = session.invocations?.[0]?.modelInfo?.provider;
+  const totalCacheCreation = session.invocations?.reduce((sum, inv) => sum + (inv.modelInfo?.cacheCreationTokens || 0), 0) || 0;
+  const totalCacheRead = session.invocations?.reduce((sum, inv) => sum + (inv.modelInfo?.cacheReadTokens || 0), 0) || 0;
 
   return (
     <div style={{
@@ -26,52 +59,32 @@ export function SessionMetadata({ session, liveStats }: SessionMetadataProps) {
       flexWrap: 'wrap',
     }}>
       {totalTokens > 0 && (
-        <div>
-          <div style={{
-            fontSize: '10px',
+        <MetadataItem label="Tokens">
+          <span style={{ color: '#10b981' }}>
+            {totalTokens.toLocaleString()}
+          </span>
+          <span style={{
+            fontSize: '11px',
             color: 'var(--text-tertiary)',
-            marginBottom: '4px',
-            fontWeight: 600,
-            textTransform: 'uppercase' as const,
-          }}>
-            Tokens
-          </div>
-          <div style={{
-            fontSize: '14px',
-            fontWeight: 600,
-            color: '#10b981',
+            marginLeft: '6px',
           }}>
-            {totalTokens.toLocaleString()}
-            <span style={{
-              fontSize: '11px',
-              color: 'var(--text-tertiary)',
-              marginLeft: '6px',
-            }}>
-              (↓{liveStats.totalInputTokens.toLocaleString()} ↑{liveStats.totalOutputTokens.toLocaleString()})
-            </span>
-          </div>
-        </div>
+            (↓{liveStats.totalInputTokens.toLocaleString()} ↑{liveStats.totalOutputTokens.toLocaleString()})
+          </span>
+        </MetadataItem>
+      )}
+
+      {(totalCacheCreation > 0 || totalCacheRead > 0) && (
+        <MetadataItem label="Cache Tokens">
+          <span style={{ color: '#f59e0b' }}>
+            {totalCacheRead > 0 && `${totalCacheRead.toLocaleString()} read`}
+            {totalCacheCreation > 0 && totalCacheRead > 0 && ' / '}
+            {totalCacheCreation > 0 && `${totalCacheCreation.toLocaleString()} created`}
+          </span>
+        </MetadataItem>
       )}
 
       {Object.keys(session.metadata).length > 0 && Object.entries(session.metadata).map(([key, value]) => (
-        <div key={key}>
-          <div style={{
-            fontSize: '10px',
-            color: 'var(--text-tertiary)',
-            marginBottom: '4px',
-            fontWeight: 600,
-            textTransform: 'uppercase' as const,
-          }}>
-            {key}
-          </div>
-          <div style={{
-            fontSize: '14px',
-            fontWeight: 600,
-            color: 'var(--text-primary)',
-          }}>
-            {String(value)}
-          </div>
-        </div>
+        <MetadataItem key={key} label={key}>{String(value)}</MetadataItem>
       ))}
     </div>
   );
diff --git a/ui/src/context/TraceProvider.tsx b/ui/src/context/TraceProvider.tsx
index 2ffdd22..7f18575 100644
--- a/ui/src/context/TraceProvider.tsx
+++ b/ui/src/context/TraceProvider.tsx
@@ -162,8 +162,11 @@ export const TraceProvider: React.FC<TraceProviderProps> = ({ children }) => {
                   sessionId: partialResult.sessionId ?? existingRow?.sessionId ?? metadata?.sessionId,
                   status: allMetricsComplete ? 'complete' : 'loading',
                   agentName: partialResult.agentName ?? existingRow?.agentName ?? metadata?.agentName,
+                  agentId: partialResult.agentId ?? existingRow?.agentId,
                   startTime: partialResult.startTime ?? existingRow?.startTime ?? metadata?.startTime,
                   model: partialResult.model ?? existingRow?.model ?? metadata?.model,
+                  responseModel: partialResult.responseModel ?? existingRow?.responseModel,
+                  provider: partialResult.provider ?? existingRow?.provider,
                   userInputPreview: partialResult.userInputPreview ?? existingRow?.userInputPreview ?? metadata?.userInputPreview,
                   finalOutputPreview: partialResult.finalOutputPreview ?? existingRow?.finalOutputPreview ?? metadata?.finalOutputPreview,
                   invocations: metadata?.invocations,
diff --git a/ui/src/lib/types.ts b/ui/src/lib/types.ts
index eced01f..bc9a399 100644
--- a/ui/src/lib/types.ts
+++ b/ui/src/lib/types.ts
@@ -137,6 +137,8 @@ export interface PerformanceMetrics {
     totalOutput: number;
     total: number;
     perLlmCall: { p50: number; p95: number; p99: number };
+    cacheCreationTokens?: number;
+    cacheReadTokens?: number;
   };
 }
 
@@ -148,7 +150,10 @@ export interface TraceResult {
   conversionWarnings: string[];
   performanceMetrics?: PerformanceMetrics;
   agentName?: string;
+  agentId?: string;
   model?: string;
+  responseModel?: string;
+  provider?: string;
   startTime?: number;
   userInputPreview?: string;
   finalOutputPreview?: string;
@@ -181,8 +186,11 @@ export interface TraceTableRow {
   sessionId?: string;
   status: TraceRowStatus;
   agentName?: string;
+  agentId?: string;
   startTime?: number;
   model?: string;
+  responseModel?: string;
+  provider?: string;
   userInputPreview?: string;
   finalOutputPreview?: string;
   metricResults: Map<string, MetricResult>;
diff --git a/uv.lock b/uv.lock
index 8e8ebf3..39decd3 100644
--- a/uv.lock
+++ b/uv.lock
@@ -2253,7 +2253,7 @@ wheels = [
 
 [[package]]
 name = "openai"
-version = "2.21.0"
+version = "2.30.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "anyio" },
@@ -2265,9 +2265,9 @@ dependencies = [
     { name = "tqdm" },
     { name = "typing-extensions" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/92/e5/3d197a0947a166649f566706d7a4c8f7fe38f1fa7b24c9bcffe4c7591d44/openai-2.21.0.tar.gz", hash = "sha256:81b48ce4b8bbb2cc3af02047ceb19561f7b1dc0d4e52d1de7f02abfd15aa59b7", size = 644374, upload-time = "2026-02-14T00:12:01.577Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/88/15/52580c8fbc16d0675d516e8749806eda679b16de1e4434ea06fb6feaa610/openai-2.30.0.tar.gz", hash = "sha256:92f7661c990bda4b22a941806c83eabe4896c3094465030dd882a71abe80c885", size = 676084, upload-time = "2026-03-25T22:08:59.96Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/cc/56/0a89092a453bb2c676d66abee44f863e742b2110d4dbb1dbcca3f7e5fc33/openai-2.21.0-py3-none-any.whl", hash = "sha256:0bc1c775e5b1536c294eded39ee08f8407656537ccc71b1004104fe1602e267c", size = 1103065, upload-time = "2026-02-14T00:11:59.603Z" },
+    { url = "https://files.pythonhosted.org/packages/2a/9e/5bfa2270f902d5b92ab7d41ce0475b8630572e71e349b2a4996d14bdda93/openai-2.30.0-py3-none-any.whl", hash = "sha256:9a5ae616888eb2748ec5e0c5b955a51592e0b201a11f4262db920f2a78c5231d", size = 1146656, upload-time = "2026-03-25T22:08:58.2Z" },
 ]
 
 [[package]]

From 4458cda1e0ac5b07e04c7a7ea855d5491e54dd39 Mon Sep 17 00:00:00 2001
From: krisztianfekete <git@krisztianfekete.org>
Date: Tue, 7 Apr 2026 14:27:53 +0200
Subject: [PATCH 2/3] address feedback

---
 src/agentevals/streaming/ws_server.py         |  19 +-
 tests/test_extraction.py                      | 206 ++++++++++++++++++
 .../components/streaming/SessionMetadata.tsx  |   4 +
 3 files changed, 220 insertions(+), 9 deletions(-)

diff --git a/src/agentevals/streaming/ws_server.py b/src/agentevals/streaming/ws_server.py
index b2d4e26..0163245 100644
--- a/src/agentevals/streaming/ws_server.py
+++ b/src/agentevals/streaming/ws_server.py
@@ -801,7 +801,7 @@ def _extract_model_info_from_trace(self, trace: Trace, invocation_idx: int) -> d
         total_output_tokens = 0
         total_cache_creation_tokens = 0
         total_cache_read_tokens = 0
-        providers: set[str] = set()
+        first_provider: str | None = None
         response_models: set[str] = set()
         finish_reasons: set[str] = set()
         error_types: set[str] = set()
@@ -809,6 +809,7 @@ def _extract_model_info_from_trace(self, trace: Trace, invocation_idx: int) -> d
         first_max_tokens: int | None = None
 
         llm_spans = [s for s in trace.all_spans if is_llm_span(s) or "call_llm" in s.operation_name]
+        llm_spans.sort(key=lambda s: s.start_time)
 
         for span in llm_spans:
             in_toks, out_toks, model = extract_token_usage_from_attrs(span.tags)
@@ -822,8 +823,8 @@ def _extract_model_info_from_trace(self, trace: Trace, invocation_idx: int) -> d
             total_output_tokens += out_toks
 
             ext = extract_extended_model_info_from_attrs(span.tags)
-            if ext["provider"]:
-                providers.add(ext["provider"])
+            if first_provider is None and ext["provider"]:
+                first_provider = ext["provider"]
             if ext["response_model"]:
                 response_models.add(ext["response_model"])
             finish_reasons.update(ext["finish_reasons"])
@@ -837,17 +838,17 @@ def _extract_model_info_from_trace(self, trace: Trace, invocation_idx: int) -> d
                 first_max_tokens = ext["max_tokens"]
 
         if models_used:
-            model_info["models"] = list(models_used)
+            model_info["models"] = sorted(models_used)
         if total_input_tokens > 0:
             model_info["inputTokens"] = total_input_tokens
         if total_output_tokens > 0:
             model_info["outputTokens"] = total_output_tokens
-        if providers:
-            model_info["provider"] = next(iter(providers))
+        if first_provider:
+            model_info["provider"] = first_provider
         if response_models:
-            model_info["responseModels"] = list(response_models)
+            model_info["responseModels"] = sorted(response_models)
         if finish_reasons:
-            model_info["finishReasons"] = list(finish_reasons)
+            model_info["finishReasons"] = sorted(finish_reasons)
         if total_cache_creation_tokens > 0:
             model_info["cacheCreationTokens"] = total_cache_creation_tokens
         if total_cache_read_tokens > 0:
@@ -857,7 +858,7 @@ def _extract_model_info_from_trace(self, trace: Trace, invocation_idx: int) -> d
         if first_max_tokens is not None:
             model_info["maxTokens"] = first_max_tokens
         if error_types:
-            model_info["errorTypes"] = list(error_types)
+            model_info["errorTypes"] = sorted(error_types)
 
         return model_info
 
diff --git a/tests/test_extraction.py b/tests/test_extraction.py
index 4c7fba9..ad1d63d 100644
--- a/tests/test_extraction.py
+++ b/tests/test_extraction.py
@@ -10,6 +10,7 @@
     AdkExtractor,
     GenAIExtractor,
     extract_agent_response_from_attrs,
+    extract_extended_model_info_from_attrs,
     extract_token_usage_from_attrs,
     extract_tool_call_from_attrs,
     extract_user_text_from_attrs,
@@ -26,14 +27,26 @@
     ADK_LLM_RESPONSE,
     ADK_SCOPE_VALUE,
     ADK_TOOL_CALL_ARGS,
+    OTEL_ERROR_TYPE,
     OTEL_GENAI_AGENT_NAME,
     OTEL_GENAI_INPUT_MESSAGES,
     OTEL_GENAI_OP,
     OTEL_GENAI_OUTPUT_MESSAGES,
+    OTEL_GENAI_PROVIDER_NAME,
+    OTEL_GENAI_REQUEST_MAX_TOKENS,
     OTEL_GENAI_REQUEST_MODEL,
+    OTEL_GENAI_REQUEST_TEMPERATURE,
+    OTEL_GENAI_RESPONSE_FINISH_REASONS,
+    OTEL_GENAI_RESPONSE_ID,
+    OTEL_GENAI_RESPONSE_MODEL,
+    OTEL_GENAI_SYSTEM,
     OTEL_GENAI_TOOL_CALL_ARGUMENTS,
     OTEL_GENAI_TOOL_CALL_ID,
+    OTEL_GENAI_TOOL_DESCRIPTION,
     OTEL_GENAI_TOOL_NAME,
+    OTEL_GENAI_TOOL_TYPE,
+    OTEL_GENAI_USAGE_CACHE_CREATION_TOKENS,
+    OTEL_GENAI_USAGE_CACHE_READ_TOKENS,
     OTEL_GENAI_USAGE_INPUT_TOKENS,
     OTEL_GENAI_USAGE_OUTPUT_TOKENS,
     OTEL_SCOPE,
@@ -522,3 +535,196 @@ def test_find_tool_spans_in(self):
         root = _span(op="agent_run", children=[child])
         ext = GenAIExtractor()
         assert [s.span_id for s in ext.find_tool_spans_in(root)] == ["tool1"]
+
+
+# ---------------------------------------------------------------------------
+# extract_extended_model_info_from_attrs
+# ---------------------------------------------------------------------------
+
+
+class TestExtractExtendedModelInfo:
+    def test_provider_from_provider_name(self):
+        attrs = {OTEL_GENAI_PROVIDER_NAME: "openai"}
+        result = extract_extended_model_info_from_attrs(attrs)
+        assert result["provider"] == "openai"
+
+    def test_provider_fallback_to_gen_ai_system(self):
+        attrs = {OTEL_GENAI_SYSTEM: "anthropic"}
+        result = extract_extended_model_info_from_attrs(attrs)
+        assert result["provider"] == "anthropic"
+
+    def test_provider_name_takes_priority_over_system(self):
+        attrs = {
+            OTEL_GENAI_PROVIDER_NAME: "openai",
+            OTEL_GENAI_SYSTEM: "old_value",
+        }
+        result = extract_extended_model_info_from_attrs(attrs)
+        assert result["provider"] == "openai"
+
+    def test_provider_none_when_absent(self):
+        result = extract_extended_model_info_from_attrs({})
+        assert result["provider"] is None
+
+    def test_response_model(self):
+        attrs = {OTEL_GENAI_RESPONSE_MODEL: "gpt-4o-2024-08-06"}
+        result = extract_extended_model_info_from_attrs(attrs)
+        assert result["response_model"] == "gpt-4o-2024-08-06"
+
+    def test_request_model(self):
+        attrs = {OTEL_GENAI_REQUEST_MODEL: "gpt-4o"}
+        result = extract_extended_model_info_from_attrs(attrs)
+        assert result["request_model"] == "gpt-4o"
+
+    def test_response_id(self):
+        attrs = {OTEL_GENAI_RESPONSE_ID: "chatcmpl-abc123"}
+        result = extract_extended_model_info_from_attrs(attrs)
+        assert result["response_id"] == "chatcmpl-abc123"
+
+    def test_finish_reasons_from_list(self):
+        attrs = {OTEL_GENAI_RESPONSE_FINISH_REASONS: ["stop", "tool_calls"]}
+        result = extract_extended_model_info_from_attrs(attrs)
+        assert result["finish_reasons"] == ["stop", "tool_calls"]
+
+    def test_finish_reasons_from_json_string(self):
+        attrs = {OTEL_GENAI_RESPONSE_FINISH_REASONS: '["stop"]'}
+        result = extract_extended_model_info_from_attrs(attrs)
+        assert result["finish_reasons"] == ["stop"]
+
+    def test_finish_reasons_from_plain_string(self):
+        attrs = {OTEL_GENAI_RESPONSE_FINISH_REASONS: "stop"}
+        result = extract_extended_model_info_from_attrs(attrs)
+        assert result["finish_reasons"] == ["stop"]
+
+    def test_finish_reasons_empty_when_absent(self):
+        result = extract_extended_model_info_from_attrs({})
+        assert result["finish_reasons"] == []
+
+    def test_temperature_numeric(self):
+        attrs = {OTEL_GENAI_REQUEST_TEMPERATURE: 0.7}
+        result = extract_extended_model_info_from_attrs(attrs)
+        assert result["temperature"] == 0.7
+
+    def test_temperature_from_string(self):
+        attrs = {OTEL_GENAI_REQUEST_TEMPERATURE: "0.9"}
+        result = extract_extended_model_info_from_attrs(attrs)
+        assert result["temperature"] == 0.9
+
+    def test_temperature_invalid_returns_none(self):
+        attrs = {OTEL_GENAI_REQUEST_TEMPERATURE: "not_a_number"}
+        result = extract_extended_model_info_from_attrs(attrs)
+        assert result["temperature"] is None
+
+    def test_max_tokens_numeric(self):
+        attrs = {OTEL_GENAI_REQUEST_MAX_TOKENS: 4096}
+        result = extract_extended_model_info_from_attrs(attrs)
+        assert result["max_tokens"] == 4096
+
+    def test_max_tokens_from_string(self):
+        attrs = {OTEL_GENAI_REQUEST_MAX_TOKENS: "2048"}
+        result = extract_extended_model_info_from_attrs(attrs)
+        assert result["max_tokens"] == 2048
+
+    def test_cache_creation_tokens(self):
+        attrs = {OTEL_GENAI_USAGE_CACHE_CREATION_TOKENS: 1500}
+        result = extract_extended_model_info_from_attrs(attrs)
+        assert result["cache_creation_tokens"] == 1500
+
+    def test_cache_read_tokens(self):
+        attrs = {OTEL_GENAI_USAGE_CACHE_READ_TOKENS: 3000}
+        result = extract_extended_model_info_from_attrs(attrs)
+        assert result["cache_read_tokens"] == 3000
+
+    def test_cache_tokens_default_to_zero(self):
+        result = extract_extended_model_info_from_attrs({})
+        assert result["cache_creation_tokens"] == 0
+        assert result["cache_read_tokens"] == 0
+
+    def test_cache_tokens_from_string(self):
+        attrs = {
+            OTEL_GENAI_USAGE_CACHE_CREATION_TOKENS: "500",
+            OTEL_GENAI_USAGE_CACHE_READ_TOKENS: "1000",
+        }
+        result = extract_extended_model_info_from_attrs(attrs)
+        assert result["cache_creation_tokens"] == 500
+        assert result["cache_read_tokens"] == 1000
+
+    def test_error_type(self):
+        attrs = {OTEL_ERROR_TYPE: "timeout"}
+        result = extract_extended_model_info_from_attrs(attrs)
+        assert result["error_type"] == "timeout"
+
+    def test_error_type_none_when_absent(self):
+        result = extract_extended_model_info_from_attrs({})
+        assert result["error_type"] is None
+
+    def test_full_attribute_set(self):
+        attrs = {
+            OTEL_GENAI_PROVIDER_NAME: "anthropic",
+            OTEL_GENAI_REQUEST_MODEL: "claude-sonnet-4-20250514",
+            OTEL_GENAI_RESPONSE_MODEL: "claude-sonnet-4-20250514",
+            OTEL_GENAI_RESPONSE_ID: "msg_abc",
+            OTEL_GENAI_RESPONSE_FINISH_REASONS: ["end_turn"],
+            OTEL_GENAI_REQUEST_TEMPERATURE: 1.0,
+            OTEL_GENAI_REQUEST_MAX_TOKENS: 8192,
+            OTEL_GENAI_USAGE_CACHE_CREATION_TOKENS: 2000,
+            OTEL_GENAI_USAGE_CACHE_READ_TOKENS: 5000,
+            OTEL_ERROR_TYPE: None,
+        }
+        result = extract_extended_model_info_from_attrs(attrs)
+        assert result["provider"] == "anthropic"
+        assert result["request_model"] == "claude-sonnet-4-20250514"
+        assert result["response_model"] == "claude-sonnet-4-20250514"
+        assert result["response_id"] == "msg_abc"
+        assert result["finish_reasons"] == ["end_turn"]
+        assert result["temperature"] == 1.0
+        assert result["max_tokens"] == 8192
+        assert result["cache_creation_tokens"] == 2000
+        assert result["cache_read_tokens"] == 5000
+        assert result["error_type"] is None
+
+
+# ---------------------------------------------------------------------------
+# extract_tool_call_from_attrs — tool type and description
+# ---------------------------------------------------------------------------
+
+
+class TestExtractToolCallTypeAndDescription:
+    def test_type_and_description_present(self):
+        attrs = {
+            OTEL_GENAI_TOOL_NAME: "search",
+            OTEL_GENAI_TOOL_CALL_ID: "tc1",
+            OTEL_GENAI_TOOL_CALL_ARGUMENTS: json.dumps({"q": "test"}),
+            OTEL_GENAI_TOOL_TYPE: "function",
+            OTEL_GENAI_TOOL_DESCRIPTION: "Search the web",
+        }
+        result = extract_tool_call_from_attrs(attrs)
+        assert result["name"] == "search"
+        assert result["type"] == "function"
+        assert result["description"] == "Search the web"
+
+    def test_type_without_description(self):
+        attrs = {
+            OTEL_GENAI_TOOL_NAME: "retriever",
+            OTEL_GENAI_TOOL_TYPE: "datastore",
+        }
+        result = extract_tool_call_from_attrs(attrs)
+        assert result["type"] == "datastore"
+        assert "description" not in result
+
+    def test_description_without_type(self):
+        attrs = {
+            OTEL_GENAI_TOOL_NAME: "calculator",
+            OTEL_GENAI_TOOL_DESCRIPTION: "Performs arithmetic",
+        }
+        result = extract_tool_call_from_attrs(attrs)
+        assert result["description"] == "Performs arithmetic"
+        assert "type" not in result
+
+    def test_absent_type_and_description(self):
+        attrs = {
+            OTEL_GENAI_TOOL_NAME: "search",
+            OTEL_GENAI_TOOL_CALL_ID: "tc1",
+        }
+        result = extract_tool_call_from_attrs(attrs)
+        assert "type" not in result
+        assert "description" not in result
diff --git a/ui/src/components/streaming/SessionMetadata.tsx b/ui/src/components/streaming/SessionMetadata.tsx
index 0ff9308..64aa073 100644
--- a/ui/src/components/streaming/SessionMetadata.tsx
+++ b/ui/src/components/streaming/SessionMetadata.tsx
@@ -73,6 +73,10 @@ export function SessionMetadata({ session, liveStats }: SessionMetadataProps) {
         </MetadataItem>
       )}
 
+      {provider && (
+        <MetadataItem label="Provider">{provider}</MetadataItem>
+      )}
+
       {(totalCacheCreation > 0 || totalCacheRead > 0) && (
         <MetadataItem label="Cache Tokens">
           <span style={{ color: '#f59e0b' }}>

From aeee10620428f2bdd50f1b5a1f69ea16c909192f Mon Sep 17 00:00:00 2001
From: krisztianfekete <git@krisztianfekete.org>
Date: Tue, 7 Apr 2026 17:46:31 +0200
Subject: [PATCH 3/3] address review feedback

---
 src/agentevals/extraction.py | 97 +++++++++++++++++-------------------
 1 file changed, 47 insertions(+), 50 deletions(-)

diff --git a/src/agentevals/extraction.py b/src/agentevals/extraction.py
index 1b3dbec..d78b600 100644
--- a/src/agentevals/extraction.py
+++ b/src/agentevals/extraction.py
@@ -13,7 +13,7 @@
 
 import json
 import logging
-from typing import Any, Protocol
+from typing import Any, Protocol, TypedDict, TypeVar
 
 from .loader.base import Span, Trace
 from .trace_attrs import (
@@ -151,64 +151,61 @@ def extract_token_usage_from_attrs(
     return 0, 0, model
 
 
-def extract_extended_model_info_from_attrs(attrs: dict[str, Any]) -> dict[str, Any]:
-    """Extract extended model and provider metadata from span attributes.
-
-    Returns a dict with provider info, response metadata, request parameters,
-    cache token usage, and error classification. Uses gen_ai.system as fallback
-    for provider when gen_ai.provider.name is absent (backward compat with
-    pre-v1.37.0 instrumentors).
-    """
-    provider = attrs.get(OTEL_GENAI_PROVIDER_NAME)
-    if not provider:
-        provider = attrs.get(OTEL_GENAI_SYSTEM)
-
-    finish_reasons_raw = attrs.get(OTEL_GENAI_RESPONSE_FINISH_REASONS)
-    finish_reasons: list[str] = []
-    if isinstance(finish_reasons_raw, list):
-        finish_reasons = [str(r) for r in finish_reasons_raw]
-    elif isinstance(finish_reasons_raw, str):
-        parsed = parse_json(finish_reasons_raw)
-        if isinstance(parsed, list):
-            finish_reasons = [str(r) for r in parsed]
-        elif finish_reasons_raw:
-            finish_reasons = [finish_reasons_raw]
+_T = TypeVar("_T", int, float)
 
-    temperature = attrs.get(OTEL_GENAI_REQUEST_TEMPERATURE)
-    if temperature is not None:
-        try:
-            temperature = float(temperature)
-        except (TypeError, ValueError):
-            temperature = None
-
-    max_tokens = attrs.get(OTEL_GENAI_REQUEST_MAX_TOKENS)
-    if max_tokens is not None:
-        try:
-            max_tokens = int(max_tokens)
-        except (TypeError, ValueError):
-            max_tokens = None
 
-    cache_creation = attrs.get(OTEL_GENAI_USAGE_CACHE_CREATION_TOKENS, 0)
-    cache_read = attrs.get(OTEL_GENAI_USAGE_CACHE_READ_TOKENS, 0)
+def _safe_cast(value: Any, target_type: type[_T], default: _T | None = None) -> _T | None:
+    """Try to cast *value* to *target_type*, returning *default* on failure."""
+    if value is None:
+        return default
     try:
-        cache_creation = int(cache_creation)
+        return target_type(value)
     except (TypeError, ValueError):
-        cache_creation = 0
-    try:
-        cache_read = int(cache_read)
-    except (TypeError, ValueError):
-        cache_read = 0
+        return default
+
 
+def _parse_finish_reasons(raw: Any) -> list[str]:
+    """Parse finish reasons from a list, JSON string, or plain string."""
+    if isinstance(raw, list):
+        return [str(r) for r in raw]
+    if isinstance(raw, str):
+        parsed = parse_json(raw)
+        if isinstance(parsed, list):
+            return [str(r) for r in parsed]
+        if raw:
+            return [raw]
+    return []
+
+
+class ExtendedModelInfo(TypedDict):
+    request_model: str | None
+    response_model: str | None
+    provider: str | None
+    finish_reasons: list[str]
+    response_id: str | None
+    temperature: float | None
+    max_tokens: int | None
+    cache_creation_tokens: int
+    cache_read_tokens: int
+    error_type: str | None
+
+
+def extract_extended_model_info_from_attrs(attrs: dict[str, Any]) -> ExtendedModelInfo:
+    """Extract extended model and provider metadata from span attributes.
+
+    Uses gen_ai.system as fallback for provider when gen_ai.provider.name is
+    absent (backward compat with pre-v1.37.0 instrumentors).
+    """
     return {
         "request_model": attrs.get(OTEL_GENAI_REQUEST_MODEL),
         "response_model": attrs.get(OTEL_GENAI_RESPONSE_MODEL),
-        "provider": provider,
-        "finish_reasons": finish_reasons,
+        "provider": attrs.get(OTEL_GENAI_PROVIDER_NAME) or attrs.get(OTEL_GENAI_SYSTEM),
+        "finish_reasons": _parse_finish_reasons(attrs.get(OTEL_GENAI_RESPONSE_FINISH_REASONS)),
         "response_id": attrs.get(OTEL_GENAI_RESPONSE_ID),
-        "temperature": temperature,
-        "max_tokens": max_tokens,
-        "cache_creation_tokens": cache_creation,
-        "cache_read_tokens": cache_read,
+        "temperature": _safe_cast(attrs.get(OTEL_GENAI_REQUEST_TEMPERATURE), float),
+        "max_tokens": _safe_cast(attrs.get(OTEL_GENAI_REQUEST_MAX_TOKENS), int),
+        "cache_creation_tokens": _safe_cast(attrs.get(OTEL_GENAI_USAGE_CACHE_CREATION_TOKENS), int, 0),
+        "cache_read_tokens": _safe_cast(attrs.get(OTEL_GENAI_USAGE_CACHE_READ_TOKENS), int, 0),
         "error_type": attrs.get(OTEL_ERROR_TYPE),
     }