From c1d429a6cda1e5401198ceaf9a8977c3449e4c46 Mon Sep 17 00:00:00 2001
From: Derek Xu <xzrderek@gmail.com>
Date: Tue, 10 Mar 2026 01:34:22 -0700
Subject: [PATCH 1/2] Revert "use litellm sdk (#424)"

This reverts commit acba670cde2e5c8cade00be9e7c219308fa7aab0.
---
 eval_protocol/adapters/fireworks_tracing.py |  13 +-
 eval_protocol/proxy/Dockerfile.gateway      |  18 +-
 eval_protocol/proxy/README.md               |  18 +-
 eval_protocol/proxy/config_no_cache.yaml    |   3 +-
 eval_protocol/proxy/docker-compose.yml      |  35 +++-
 eval_protocol/proxy/proxy_core/app.py       |  26 +--
 eval_protocol/proxy/proxy_core/litellm.py   | 183 +++++++++++---------
 eval_protocol/proxy/proxy_core/models.py    |   2 +-
 eval_protocol/proxy/requirements.txt        |   7 +
 pyproject.toml                              |   8 +-
 uv.lock                                     | 115 ++++--------
 11 files changed, 209 insertions(+), 219 deletions(-)
 create mode 100644 eval_protocol/proxy/requirements.txt

diff --git a/eval_protocol/adapters/fireworks_tracing.py b/eval_protocol/adapters/fireworks_tracing.py
index 4b0c9cb9..3c701ab2 100644
--- a/eval_protocol/adapters/fireworks_tracing.py
+++ b/eval_protocol/adapters/fireworks_tracing.py
@@ -46,7 +46,7 @@ def __call__(
         ...
 
 
-def extract_otel_attributes(observations: List[Dict[str, Any]]) -> Optional[Dict[str, Any]]:
+def extract_openai_response(observations: List[Dict[str, Any]]) -> Optional[Dict[str, Any]]:
     """Attempt to extract and parse attributes from raw_gen_ai_request observation. This only works when stored in OTEL format.
 
     Args:
@@ -137,14 +137,9 @@ def convert_trace_dict_to_evaluation_row(
 
         observations = trace.get("observations") or []
         # We can only extract when stored in OTEL format.
-        otel_attributes = extract_otel_attributes(observations)
-        if otel_attributes:
-            # Find choices from any provider (llm.*.choices pattern)
-            choices = None
-            for key, value in otel_attributes.items():
-                if key.endswith(".choices") and isinstance(value, list):
-                    choices = value
-                    break
+        openai_response = extract_openai_response(observations)
+        if openai_response:
+            choices = openai_response.get("llm.openai.choices")
             if choices and len(choices) > 0:
                 execution_metadata.finish_reason = choices[0].get("finish_reason")
 
diff --git a/eval_protocol/proxy/Dockerfile.gateway b/eval_protocol/proxy/Dockerfile.gateway
index 7fc41f46..a9308faa 100644
--- a/eval_protocol/proxy/Dockerfile.gateway
+++ b/eval_protocol/proxy/Dockerfile.gateway
@@ -1,4 +1,4 @@
-# Metadata Extraction Gateway - Uses LiteLLM SDK directly with Langfuse OTEL
+# Metadata Extraction Gateway - Sits in front of LiteLLM
 FROM python:3.11-slim
 
 WORKDIR /app
@@ -6,20 +6,18 @@ WORKDIR /app
 # Prevent Python from buffering stdout/stderr
 ENV PYTHONUNBUFFERED=1
 
-# Copy the entire package for local install (context is repo root)
-COPY pyproject.toml /app/pyproject.toml
-COPY eval_protocol /app/eval_protocol
-COPY README.md /app/README.md
+# Copy requirements file
+COPY ./requirements.txt /app/requirements.txt
 
-# Install from local source with proxy extras
-RUN pip install --no-cache-dir ".[proxy]"
+# Install dependencies
+RUN pip install --no-cache-dir -r requirements.txt
 
-# Copy the proxy package (local overrides for main.py, auth.py, etc.)
-COPY eval_protocol/proxy/proxy_core /app/proxy_core
+# Copy the proxy package
+COPY ./proxy_core /app/proxy_core
 
 # Expose port
 EXPOSE 4000
 
 # Run the gateway as a module
-# LANGFUSE_HOST and REDIS_HOST will be set by environment (docker-compose or Cloud Run)
+# LITELLM_URL will be set by environment (docker-compose or Cloud Run)
 CMD ["python", "-m", "proxy_core.main"]
diff --git a/eval_protocol/proxy/README.md b/eval_protocol/proxy/README.md
index 9223bba2..ffcdaf25 100644
--- a/eval_protocol/proxy/README.md
+++ b/eval_protocol/proxy/README.md
@@ -59,9 +59,9 @@ This enables distributed evaluation systems to track which LLM completions belon
    - Stores insertion IDs per rollout for completeness checking
    - Uses Redis Sets: `rollout_id -> {insertion_id_1, insertion_id_2, ...}`
 
-#### 3. **LiteLLM SDK (Direct)**
-   - Uses LiteLLM SDK directly for LLM calls (no separate proxy server needed)
-   - Integrated with Langfuse via `langfuse_otel` OpenTelemetry callback
+#### 3. **LiteLLM Backend**
+   - Standard LiteLLM proxy for routing to LLM providers
+   - Configured with Langfuse callbacks for automatic tracing
 
 ## Key Features
 
@@ -244,11 +244,12 @@ Forwards any other request to LiteLLM backend with API key injection.
 
 | Variable | Required | Default | Description |
 |----------|----------|---------|-------------|
+| `LITELLM_URL` | Yes | - | URL of LiteLLM backend |
 | `REDIS_HOST` | Yes | - | Redis hostname |
 | `REDIS_PORT` | No | 6379 | Redis port |
 | `REDIS_PASSWORD` | No | - | Redis password |
 | `SECRETS_PATH` | No | `proxy_core/secrets.yaml` | Path to secrets file (YAML) |
-| `LANGFUSE_HOST` | No | `https://us.cloud.langfuse.com` | Langfuse OTEL host for tracing |
+| `LANGFUSE_HOST` | No | `https://cloud.langfuse.com` | Langfuse base URL |
 | `REQUEST_TIMEOUT` | No | 300.0 | Request timeout (LLM calls) in seconds |
 | `LOG_LEVEL` | No | INFO | Logging level |
 | `PORT` | No | 4000 | Gateway port |
@@ -271,14 +272,15 @@ default_project_id: project-1
 
 ### LiteLLM Configuration
 
-The `config_no_cache.yaml` configures LiteLLM (only needed if running a standalone LiteLLM proxy):
+The `config_no_cache.yaml` configures LiteLLM:
 ```yaml
 model_list:
   - model_name: "*"
     litellm_params:
       model: "*"
 litellm_settings:
-  callbacks: ["langfuse_otel"]
+  success_callback: ["langfuse"]
+  failure_callback: ["langfuse"]
   drop_params: True
 general_settings:
   allow_client_side_credentials: true
@@ -286,11 +288,9 @@ general_settings:
 
 Key settings:
 - **Wildcard model support**: Route any model to any provider
-- **Langfuse OTEL**: OpenTelemetry-based tracing via `langfuse_otel` callback
+- **Langfuse callbacks**: Automatic tracing on success/failure
 - **Client-side credentials**: Accept API keys from request body
 
-**Note:** The proxy now uses the LiteLLM SDK directly with `langfuse_otel` integration, so a separate LiteLLM proxy server is no longer required.
-
 ## Security Considerations
 
 ### Authentication
diff --git a/eval_protocol/proxy/config_no_cache.yaml b/eval_protocol/proxy/config_no_cache.yaml
index 1d772705..7adb5a72 100644
--- a/eval_protocol/proxy/config_no_cache.yaml
+++ b/eval_protocol/proxy/config_no_cache.yaml
@@ -3,7 +3,8 @@ model_list:
     litellm_params:
       model: "*"
 litellm_settings:
-  callbacks: ["langfuse_otel"]
+  success_callback: ["langfuse"]
+  failure_callback: ["langfuse"]
   drop_params: True
 general_settings:
   allow_client_side_credentials: true
diff --git a/eval_protocol/proxy/docker-compose.yml b/eval_protocol/proxy/docker-compose.yml
index 0983e2eb..a6058e0e 100644
--- a/eval_protocol/proxy/docker-compose.yml
+++ b/eval_protocol/proxy/docker-compose.yml
@@ -7,19 +7,41 @@ services:
     ports:
       - "6379:6379"  # Expose for debugging if needed
     networks:
-      - proxy-network
+      - litellm-network
     restart: unless-stopped
     command: redis-server --appendonly yes
     volumes:
       - redis-data:/data
 
-  # Metadata Gateway - Handles LLM calls directly via LiteLLM SDK with Langfuse OTEL
+  # LiteLLM Backend - Handles actual LLM proxying
+  litellm-backend:
+    image: litellm/litellm:v1.77.3-stable
+    platform: linux/amd64
+    container_name: litellm-backend
+    command: ["--config", "/app/config.yaml", "--port", "4000", "--host", "0.0.0.0"]
+    # If you want to be able to use other model providers like OpenAI, Anthropic, etc., you need to set keys in .env file.
+    env_file:
+      - .env  # Load API keys from .env file
+    environment:
+      - LANGFUSE_PUBLIC_KEY=dummy  # Set dummy public and private key so Langfuse instance initializes in LiteLLM, then real keys get sent in proxy
+      - LANGFUSE_SECRET_KEY=dummy
+    volumes:
+      - ./config_no_cache.yaml:/app/config.yaml:ro
+    ports:
+      - "4001:4000"  # Expose on 4001 for direct access if needed
+    networks:
+      - litellm-network
+    restart: unless-stopped
+
+  # Metadata Gateway - Public-facing service that extracts metadata from URLs
   metadata-gateway:
     build:
-      context: ../..
-      dockerfile: eval_protocol/proxy/Dockerfile.gateway
+      context: .
+      dockerfile: Dockerfile.gateway
     container_name: metadata-gateway
     environment:
+      # Point to the LiteLLM backend service
+      - LITELLM_URL=http://litellm-backend:4000
       - PORT=4000
       # Redis configuration for assistant message counting
       - REDIS_HOST=redis
@@ -34,13 +56,14 @@ services:
     ports:
       - "4000:4000"  # Main public-facing port
     networks:
-      - proxy-network
+      - litellm-network
     depends_on:
+      - litellm-backend
       - redis
     restart: unless-stopped
 
 networks:
-  proxy-network:
+  litellm-network:
     driver: bridge
 
 volumes:
diff --git a/eval_protocol/proxy/proxy_core/app.py b/eval_protocol/proxy/proxy_core/app.py
index 633df539..751d5dc1 100644
--- a/eval_protocol/proxy/proxy_core/app.py
+++ b/eval_protocol/proxy/proxy_core/app.py
@@ -15,7 +15,7 @@
 
 from .models import ProxyConfig, LangfuseTracesResponse, TracesParams, ChatParams, ChatRequestHook, TracesRequestHook
 from .auth import AuthProvider, NoAuthProvider
-from .litellm import handle_chat_completion
+from .litellm import handle_chat_completion, proxy_to_litellm
 from .langfuse import fetch_langfuse_traces, pointwise_fetch_langfuse_trace
 
 # Configure logging before any other imports (so all modules inherit this config)
@@ -35,6 +35,10 @@ def build_proxy_config(
     preprocess_traces_request: Optional[TracesRequestHook] = None,
 ) -> ProxyConfig:
     """Load environment and secrets, and build ProxyConfig"""
+    # Env
+    litellm_url = os.getenv("LITELLM_URL")
+    if not litellm_url:
+        raise ValueError("LITELLM_URL environment variable must be set")
     request_timeout = float(os.getenv("REQUEST_TIMEOUT", "300.0"))
     langfuse_host = os.getenv("LANGFUSE_HOST", "https://cloud.langfuse.com")
 
@@ -62,6 +66,7 @@ def build_proxy_config(
         raise ValueError(f"Invalid format in secrets file {secrets_path.name}: {e}")
 
     return ProxyConfig(
+        litellm_url=litellm_url,
         request_timeout=request_timeout,
         langfuse_host=langfuse_host,
         langfuse_keys=langfuse_keys,
@@ -108,16 +113,6 @@ async def lifespan(app: FastAPI):
         app.state.config = build_proxy_config(preprocess_chat_request, preprocess_traces_request)
         app.state.redis = init_redis()
 
-        config = app.state.config
-        default_keys = config.langfuse_keys[config.default_project_id]
-        os.environ["LANGFUSE_PUBLIC_KEY"] = default_keys["public_key"]
-        os.environ["LANGFUSE_SECRET_KEY"] = default_keys["secret_key"]
-        os.environ.setdefault("LANGFUSE_HOST", config.langfuse_host)
-
-        import litellm
-
-        litellm.callbacks = ["langfuse_otel"]
-
         try:
             yield
         finally:
@@ -302,4 +297,13 @@ async def pointwise_get_langfuse_trace(
     async def health():
         return {"status": "healthy", "service": "metadata-proxy"}
 
+    # Catch-all
+    @app.api_route("/{path:path}", methods=["GET", "POST", "PUT", "DELETE", "PATCH"])
+    async def catch_all_proxy(
+        path: str,
+        request: Request,
+        config: ProxyConfig = Depends(get_config),
+    ):
+        return await proxy_to_litellm(config, path, request)
+
     return app
diff --git a/eval_protocol/proxy/proxy_core/litellm.py b/eval_protocol/proxy/proxy_core/litellm.py
index e55bfa0d..cdd2383b 100644
--- a/eval_protocol/proxy/proxy_core/litellm.py
+++ b/eval_protocol/proxy/proxy_core/litellm.py
@@ -1,17 +1,14 @@
 """
-LiteLLM client - handles all LLM calls directly via LiteLLM SDK with Langfuse OTEL integration.
+LiteLLM client - handles all communication with LiteLLM service.
 """
 
 import json
 import base64
+import httpx
 import logging
 from uuid6 import uuid7
 from fastapi import Request, Response, HTTPException
-from fastapi.responses import StreamingResponse
 import redis
-import openai
-from litellm import acompletion
-
 from .redis_utils import register_insertion_id
 from .models import ProxyConfig, ChatParams
 
@@ -25,12 +22,12 @@ async def handle_chat_completion(
     params: ChatParams,
 ) -> Response:
     """
-    Handle chat completion requests using LiteLLM SDK directly with Langfuse OTEL.
+    Handle chat completion requests and forward to LiteLLM.
 
     If metadata IDs (rollout_id, etc.) are provided, they'll be added as tags
     and the assistant message count will be tracked in Redis.
 
-    If encoded_base_url is provided, it will be decoded and used as api_base.
+    If encoded_base_url is provided, it will be decoded and added to the request.
     """
     body = await request.body()
     data = json.loads(body) if body else {}
@@ -53,26 +50,36 @@ async def handle_chat_completion(
     # Decode and add base_url if provided
     if encoded_base_url:
         try:
+            # Decode from URL-safe base64
             decoded_bytes = base64.urlsafe_b64decode(encoded_base_url)
-            data["base_url"] = decoded_bytes.decode("utf-8")
-            logger.debug(f"Decoded base_url: {data['base_url']}")
+            base_url = decoded_bytes.decode("utf-8")
+            data["base_url"] = base_url
+            logger.debug(f"Decoded base_url: {base_url}")
         except Exception as e:
             logger.error(f"Failed to decode base_url: {e}")
             raise HTTPException(status_code=400, detail=f"Invalid encoded_base_url: {str(e)}")
 
-    # Extract API key from Authorization header and add to data
+    # Extract API key from Authorization header and inject into request body
     auth_header = request.headers.get("authorization", "")
     if auth_header.startswith("Bearer "):
-        data["api_key"] = auth_header.replace("Bearer ", "").strip()
+        api_key = auth_header.replace("Bearer ", "").strip()
+        # Only inject API key if model is a Fireworks model
+        model = data.get("model")
+        if model and isinstance(model, str) and model.startswith("fireworks_ai"):
+            data["api_key"] = api_key
 
-    # Build metadata with tags for Langfuse
+    # If metadata IDs are provided, add them as tags
     insertion_id = None
-    metadata = data.pop("metadata", {}) or {}
-    tags = list(metadata.pop("tags", []) or [])
-
     if rollout_id is not None:
         insertion_id = str(uuid7())
-        tags.extend(
+
+        if "metadata" not in data:
+            data["metadata"] = {}
+        if "tags" not in data["metadata"]:
+            data["metadata"]["tags"] = []
+
+        # Add extracted IDs as tags
+        data["metadata"]["tags"].extend(
             [
                 f"rollout_id:{rollout_id}",
                 f"insertion_id:{insertion_id}",
@@ -83,72 +90,84 @@ async def handle_chat_completion(
             ]
         )
 
-    # Build Langfuse metadata (tags + user if present)
-    # Convert user_id (from preprocess hook) to trace_user_id for Langfuse
-    user_id = metadata.pop("user_id", None) or data.get("user")
-    litellm_metadata = {"tags": tags, **metadata}
-    if user_id:
-        litellm_metadata["trace_user_id"] = user_id
-
-    langfuse_keys = config.langfuse_keys[project_id]
-
-    # Check if streaming is requested
-    is_streaming = data.get("stream", False)
-
-    # Pop fields that we pass explicitly to avoid duplicate kwarg errors
-    request_timeout = data.pop("timeout", None) or config.request_timeout
-    data.pop("langfuse_public_key", None)
-    data.pop("langfuse_secret_key", None)
-
-    try:
-        # Make the completion call - pass all params through
-        # Note: langfuse_host is set via LANGFUSE_HOST env var at startup; OTEL doesn't support per-request host override
-        response = await acompletion(
-            **data,
-            metadata=litellm_metadata,
-            timeout=request_timeout,
-            langfuse_public_key=langfuse_keys["public_key"],
-            langfuse_secret_key=langfuse_keys["secret_key"],
+    # Add Langfuse configuration
+    data["langfuse_public_key"] = config.langfuse_keys[project_id]["public_key"]
+    data["langfuse_secret_key"] = config.langfuse_keys[project_id]["secret_key"]
+    data["langfuse_host"] = config.langfuse_host
+
+    # Forward to LiteLLM's standard /chat/completions endpoint
+    # Set longer timeout for LLM API calls (LLMs can be slow)
+    timeout = httpx.Timeout(config.request_timeout)
+    async with httpx.AsyncClient(timeout=timeout) as client:
+        # Copy headers from original request but exclude content-length (httpx will set it correctly)
+        headers = dict(request.headers)
+        headers.pop("host", None)
+        headers.pop("content-length", None)  # Let httpx calculate the correct length
+        headers["content-type"] = "application/json"
+
+        # Forward to LiteLLM
+        litellm_url = f"{config.litellm_url}/chat/completions"
+
+        response = await client.post(
+            litellm_url,
+            json=data,  # httpx will serialize and set correct Content-Length
+            headers=headers,
+        )
+
+        # Register insertion_id in Redis only on successful response
+        if response.status_code == 200 and insertion_id is not None and rollout_id is not None:
+            register_insertion_id(redis_client, rollout_id, insertion_id)
+
+        # Return the response
+        return Response(
+            content=response.content,
+            status_code=response.status_code,
+            headers=dict(response.headers),
+        )
+
+
+async def proxy_to_litellm(config: ProxyConfig, path: str, request: Request) -> Response:
+    """
+    Catch-all proxy: Forward any request to LiteLLM, extracting API key from Authorization header.
+    """
+    # Set longer timeout for LLM API calls (LLMs can be slow)
+    timeout = httpx.Timeout(config.request_timeout)
+    async with httpx.AsyncClient(timeout=timeout) as client:
+        # Copy headers
+        headers = dict(request.headers)
+        headers.pop("host", None)
+        headers.pop("content-length", None)
+
+        # Get body
+        body = await request.body()
+
+        # Pass through API key from Authorization header
+        if request.method in ["POST", "PUT", "PATCH"] and body:
+            try:
+                data = json.loads(body)
+
+                auth_header = request.headers.get("authorization", "")
+                if auth_header.startswith("Bearer "):
+                    api_key = auth_header.replace("Bearer ", "").strip()
+                    data["api_key"] = api_key
+
+                # Re-serialize
+                body = json.dumps(data).encode()
+            except json.JSONDecodeError:
+                pass
+
+        # Forward to LiteLLM
+        litellm_url = f"{config.litellm_url}/{path}"
+
+        response = await client.request(
+            method=request.method,
+            url=litellm_url,
+            headers=headers,
+            content=body,
         )
 
-        if is_streaming:
-            # For streaming, return a StreamingResponse with SSE format
-            # Register insertion_id only after stream completes successfully
-            async def stream_generator():
-                async for chunk in response:  # type: ignore[union-attr]
-                    yield f"data: {chunk.model_dump_json()}\n\n"
-                yield "data: [DONE]\n\n"
-                # Stream completed successfully - now register
-                if insertion_id is not None and rollout_id is not None:
-                    register_insertion_id(redis_client, rollout_id, insertion_id)
-
-            return StreamingResponse(
-                stream_generator(),
-                media_type="text/event-stream",
-                headers={
-                    "Cache-Control": "no-cache",
-                    "Connection": "keep-alive",
-                },
-            )
-        else:
-            # Non-streaming: register insertion_id on success
-            if insertion_id is not None and rollout_id is not None:
-                register_insertion_id(redis_client, rollout_id, insertion_id)
-
-            return Response(
-                content=response.model_dump_json(),
-                status_code=200,
-                media_type="application/json",
-            )
-
-    except HTTPException:
-        raise
-    except openai.APIError as e:
-        # Convert to HTTPException and let FastAPI handle it
-        raise HTTPException(
-            status_code=getattr(e, "status_code", 500),
-            detail=str(e),
+        return Response(
+            content=response.content,
+            status_code=response.status_code,
+            headers=dict(response.headers),
         )
-    except Exception as e:
-        logger.error(f"Unexpected error: {e}", exc_info=True)
-        raise HTTPException(status_code=500, detail=str(e))
diff --git a/eval_protocol/proxy/proxy_core/models.py b/eval_protocol/proxy/proxy_core/models.py
index 062a870c..f3b5e614 100644
--- a/eval_protocol/proxy/proxy_core/models.py
+++ b/eval_protocol/proxy/proxy_core/models.py
@@ -53,6 +53,7 @@ class TracesParams(BaseModel):
 class ProxyConfig(BaseModel):
     """Configuration model for the LiteLLM Metadata Proxy"""
 
+    litellm_url: str
     request_timeout: float = 300.0
     langfuse_host: str
     langfuse_keys: Dict[str, Dict[str, str]]
@@ -72,7 +73,6 @@ class ObservationResponse(BaseModel):
     input: Optional[Any] = None
     output: Optional[Any] = None
     parent_observation_id: Optional[str] = None
-    metadata: Optional[Dict[str, Any]] = None
 
 
 class TraceResponse(BaseModel):
diff --git a/eval_protocol/proxy/requirements.txt b/eval_protocol/proxy/requirements.txt
new file mode 100644
index 00000000..15d21d0b
--- /dev/null
+++ b/eval_protocol/proxy/requirements.txt
@@ -0,0 +1,7 @@
+fastapi>=0.116.1
+uvicorn>=0.24.0
+httpx>=0.25.0
+redis>=5.0.0
+langfuse>=2.0.0
+uuid6>=2025.0.0
+PyYAML>=6.0.0
diff --git a/pyproject.toml b/pyproject.toml
index 68b88160..3025582f 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -31,7 +31,7 @@ dependencies = [
     "omegaconf>=2.3.0",
     "httpx>=0.24.0",
     "anthropic>=0.59.0",
-    "litellm>=1.81.0,<1.82.0",
+    "litellm<1.75.0",
     "pytest>=6.0.0",
     "pytest-asyncio>=0.21.0",
     "peewee>=3.18.2",
@@ -146,17 +146,13 @@ langgraph = [
 langgraph_tools = [
     "langgraph>=0.6.7",
     "langchain>=0.3.0",
-    # langchain-fireworks removed: incompatible with fireworks-ai>=1.0.0
+    "langchain-fireworks>=0.3.0",
 ]
 
 proxy = [
     "redis>=5.0.0",
     "langfuse>=2.0.0",
     "uuid6>=2025.0.0",
-    "litellm>=1.81.0,<1.82.0",
-    "opentelemetry-api>=1.29.0",
-    "opentelemetry-sdk>=1.29.0",
-    "opentelemetry-exporter-otlp>=1.29.0",
 ]
 
 [project.scripts]
diff --git a/uv.lock b/uv.lock
index 0168540a..ae420524 100644
--- a/uv.lock
+++ b/uv.lock
@@ -1251,6 +1251,7 @@ langgraph = [
 ]
 langgraph-tools = [
     { name = "langchain" },
+    { name = "langchain-fireworks" },
     { name = "langgraph" },
 ]
 langsmith = [
@@ -1264,10 +1265,6 @@ openevals = [
 ]
 proxy = [
     { name = "langfuse" },
-    { name = "litellm" },
-    { name = "opentelemetry-api" },
-    { name = "opentelemetry-exporter-otlp" },
-    { name = "opentelemetry-sdk" },
     { name = "redis" },
     { name = "uuid6" },
 ]
@@ -1329,13 +1326,13 @@ requires-dist = [
     { name = "langchain", marker = "extra == 'langgraph-tools'", specifier = ">=0.3.0" },
     { name = "langchain-core", marker = "extra == 'langchain'", specifier = ">=0.3.0" },
     { name = "langchain-core", marker = "extra == 'langgraph'", specifier = ">=0.3.75" },
+    { name = "langchain-fireworks", marker = "extra == 'langgraph-tools'", specifier = ">=0.3.0" },
     { name = "langfuse", marker = "extra == 'langfuse'", specifier = ">=2.0.0" },
     { name = "langfuse", marker = "extra == 'proxy'", specifier = ">=2.0.0" },
     { name = "langgraph", marker = "extra == 'langgraph'", specifier = ">=0.6.7" },
     { name = "langgraph", marker = "extra == 'langgraph-tools'", specifier = ">=0.6.7" },
     { name = "langsmith", marker = "extra == 'langsmith'", specifier = ">=0.1.86" },
-    { name = "litellm", specifier = ">=1.81.0,<1.82.0" },
-    { name = "litellm", marker = "extra == 'proxy'", specifier = ">=1.81.0,<1.82.0" },
+    { name = "litellm", specifier = "<1.75.0" },
     { name = "loguru", specifier = ">=0.6.0" },
     { name = "mcp", specifier = ">=1.9.2" },
     { name = "omegaconf", specifier = ">=2.3.0" },
@@ -1343,9 +1340,6 @@ requires-dist = [
     { name = "openai", marker = "extra == 'dev'", specifier = ">=1.78.1" },
     { name = "openenv-core", marker = "extra == 'openenv'" },
     { name = "openevals", marker = "extra == 'openevals'", specifier = ">=0.1.0" },
-    { name = "opentelemetry-api", marker = "extra == 'proxy'", specifier = ">=1.29.0" },
-    { name = "opentelemetry-exporter-otlp", marker = "extra == 'proxy'", specifier = ">=1.29.0" },
-    { name = "opentelemetry-sdk", marker = "extra == 'proxy'", specifier = ">=1.29.0" },
     { name = "pandas", marker = "extra == 'dev'", specifier = ">=1.5.0" },
     { name = "peewee", specifier = ">=3.18.2" },
     { name = "peft", marker = "extra == 'trl'", specifier = ">=0.7.0" },
@@ -1577,69 +1571,6 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/dc/05/4958cccbe862958d862b6a15f2d10d2f5ec3c411268dcb131a433e5e7a0d/fastmcp-2.10.6-py3-none-any.whl", hash = "sha256:9782416a8848cc0f4cfcc578e5c17834da620bef8ecf4d0daabf5dd1272411a2", size = 202613, upload-time = "2025-07-19T20:02:11.47Z" },
 ]
 
-[[package]]
-name = "fastuuid"
-version = "0.14.0"
-source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/c3/7d/d9daedf0f2ebcacd20d599928f8913e9d2aea1d56d2d355a93bfa2b611d7/fastuuid-0.14.0.tar.gz", hash = "sha256:178947fc2f995b38497a74172adee64fdeb8b7ec18f2a5934d037641ba265d26", size = 18232, upload-time = "2025-10-19T22:19:22.402Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/ad/b2/731a6696e37cd20eed353f69a09f37a984a43c9713764ee3f7ad5f57f7f9/fastuuid-0.14.0-cp310-cp310-macosx_10_12_x86_64.macosx_11_0_arm64.macosx_10_12_universal2.whl", hash = "sha256:6e6243d40f6c793c3e2ee14c13769e341b90be5ef0c23c82fa6515a96145181a", size = 516760, upload-time = "2025-10-19T22:25:21.509Z" },
-    { url = "https://files.pythonhosted.org/packages/c5/79/c73c47be2a3b8734d16e628982653517f80bbe0570e27185d91af6096507/fastuuid-0.14.0-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:13ec4f2c3b04271f62be2e1ce7e95ad2dd1cf97e94503a3760db739afbd48f00", size = 264748, upload-time = "2025-10-19T22:41:52.873Z" },
-    { url = "https://files.pythonhosted.org/packages/24/c5/84c1eea05977c8ba5173555b0133e3558dc628bcf868d6bf1689ff14aedc/fastuuid-0.14.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:b2fdd48b5e4236df145a149d7125badb28e0a383372add3fbaac9a6b7a394470", size = 254537, upload-time = "2025-10-19T22:33:55.603Z" },
-    { url = "https://files.pythonhosted.org/packages/0e/23/4e362367b7fa17dbed646922f216b9921efb486e7abe02147e4b917359f8/fastuuid-0.14.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f74631b8322d2780ebcf2d2d75d58045c3e9378625ec51865fe0b5620800c39d", size = 278994, upload-time = "2025-10-19T22:26:17.631Z" },
-    { url = "https://files.pythonhosted.org/packages/b2/72/3985be633b5a428e9eaec4287ed4b873b7c4c53a9639a8b416637223c4cd/fastuuid-0.14.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:83cffc144dc93eb604b87b179837f2ce2af44871a7b323f2bfed40e8acb40ba8", size = 280003, upload-time = "2025-10-19T22:23:45.415Z" },
-    { url = "https://files.pythonhosted.org/packages/b3/6d/6ef192a6df34e2266d5c9deb39cd3eea986df650cbcfeaf171aa52a059c3/fastuuid-0.14.0-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:1a771f135ab4523eb786e95493803942a5d1fc1610915f131b363f55af53b219", size = 303583, upload-time = "2025-10-19T22:26:00.756Z" },
-    { url = "https://files.pythonhosted.org/packages/9d/11/8a2ea753c68d4fece29d5d7c6f3f903948cc6e82d1823bc9f7f7c0355db3/fastuuid-0.14.0-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:4edc56b877d960b4eda2c4232f953a61490c3134da94f3c28af129fb9c62a4f6", size = 460955, upload-time = "2025-10-19T22:36:25.196Z" },
-    { url = "https://files.pythonhosted.org/packages/23/42/7a32c93b6ce12642d9a152ee4753a078f372c9ebb893bc489d838dd4afd5/fastuuid-0.14.0-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:bcc96ee819c282e7c09b2eed2b9bd13084e3b749fdb2faf58c318d498df2efbe", size = 480763, upload-time = "2025-10-19T22:24:28.451Z" },
-    { url = "https://files.pythonhosted.org/packages/b9/e9/a5f6f686b46e3ed4ed3b93770111c233baac87dd6586a411b4988018ef1d/fastuuid-0.14.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:7a3c0bca61eacc1843ea97b288d6789fbad7400d16db24e36a66c28c268cfe3d", size = 452613, upload-time = "2025-10-19T22:25:06.827Z" },
-    { url = "https://files.pythonhosted.org/packages/b4/c9/18abc73c9c5b7fc0e476c1733b678783b2e8a35b0be9babd423571d44e98/fastuuid-0.14.0-cp310-cp310-win32.whl", hash = "sha256:7f2f3efade4937fae4e77efae1af571902263de7b78a0aee1a1653795a093b2a", size = 155045, upload-time = "2025-10-19T22:28:32.732Z" },
-    { url = "https://files.pythonhosted.org/packages/5e/8a/d9e33f4eb4d4f6d9f2c5c7d7e96b5cdbb535c93f3b1ad6acce97ee9d4bf8/fastuuid-0.14.0-cp310-cp310-win_amd64.whl", hash = "sha256:ae64ba730d179f439b0736208b4c279b8bc9c089b102aec23f86512ea458c8a4", size = 156122, upload-time = "2025-10-19T22:23:15.59Z" },
-    { url = "https://files.pythonhosted.org/packages/98/f3/12481bda4e5b6d3e698fbf525df4443cc7dce746f246b86b6fcb2fba1844/fastuuid-0.14.0-cp311-cp311-macosx_10_12_x86_64.macosx_11_0_arm64.macosx_10_12_universal2.whl", hash = "sha256:73946cb950c8caf65127d4e9a325e2b6be0442a224fd51ba3b6ac44e1912ce34", size = 516386, upload-time = "2025-10-19T22:42:40.176Z" },
-    { url = "https://files.pythonhosted.org/packages/59/19/2fc58a1446e4d72b655648eb0879b04e88ed6fa70d474efcf550f640f6ec/fastuuid-0.14.0-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:12ac85024637586a5b69645e7ed986f7535106ed3013640a393a03e461740cb7", size = 264569, upload-time = "2025-10-19T22:25:50.977Z" },
-    { url = "https://files.pythonhosted.org/packages/78/29/3c74756e5b02c40cfcc8b1d8b5bac4edbd532b55917a6bcc9113550e99d1/fastuuid-0.14.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:05a8dde1f395e0c9b4be515b7a521403d1e8349443e7641761af07c7ad1624b1", size = 254366, upload-time = "2025-10-19T22:29:49.166Z" },
-    { url = "https://files.pythonhosted.org/packages/52/96/d761da3fccfa84f0f353ce6e3eb8b7f76b3aa21fd25e1b00a19f9c80a063/fastuuid-0.14.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:09378a05020e3e4883dfdab438926f31fea15fd17604908f3d39cbeb22a0b4dc", size = 278978, upload-time = "2025-10-19T22:35:41.306Z" },
-    { url = "https://files.pythonhosted.org/packages/fc/c2/f84c90167cc7765cb82b3ff7808057608b21c14a38531845d933a4637307/fastuuid-0.14.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bbb0c4b15d66b435d2538f3827f05e44e2baafcc003dd7d8472dc67807ab8fd8", size = 279692, upload-time = "2025-10-19T22:25:36.997Z" },
-    { url = "https://files.pythonhosted.org/packages/af/7b/4bacd03897b88c12348e7bd77943bac32ccf80ff98100598fcff74f75f2e/fastuuid-0.14.0-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:cd5a7f648d4365b41dbf0e38fe8da4884e57bed4e77c83598e076ac0c93995e7", size = 303384, upload-time = "2025-10-19T22:29:46.578Z" },
-    { url = "https://files.pythonhosted.org/packages/c0/a2/584f2c29641df8bd810d00c1f21d408c12e9ad0c0dafdb8b7b29e5ddf787/fastuuid-0.14.0-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:c0a94245afae4d7af8c43b3159d5e3934c53f47140be0be624b96acd672ceb73", size = 460921, upload-time = "2025-10-19T22:36:42.006Z" },
-    { url = "https://files.pythonhosted.org/packages/24/68/c6b77443bb7764c760e211002c8638c0c7cce11cb584927e723215ba1398/fastuuid-0.14.0-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:2b29e23c97e77c3a9514d70ce343571e469098ac7f5a269320a0f0b3e193ab36", size = 480575, upload-time = "2025-10-19T22:28:18.975Z" },
-    { url = "https://files.pythonhosted.org/packages/5a/87/93f553111b33f9bb83145be12868c3c475bf8ea87c107063d01377cc0e8e/fastuuid-0.14.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:1e690d48f923c253f28151b3a6b4e335f2b06bf669c68a02665bc150b7839e94", size = 452317, upload-time = "2025-10-19T22:25:32.75Z" },
-    { url = "https://files.pythonhosted.org/packages/9e/8c/a04d486ca55b5abb7eaa65b39df8d891b7b1635b22db2163734dc273579a/fastuuid-0.14.0-cp311-cp311-win32.whl", hash = "sha256:a6f46790d59ab38c6aa0e35c681c0484b50dc0acf9e2679c005d61e019313c24", size = 154804, upload-time = "2025-10-19T22:24:15.615Z" },
-    { url = "https://files.pythonhosted.org/packages/9c/b2/2d40bf00820de94b9280366a122cbaa60090c8cf59e89ac3938cf5d75895/fastuuid-0.14.0-cp311-cp311-win_amd64.whl", hash = "sha256:e150eab56c95dc9e3fefc234a0eedb342fac433dacc273cd4d150a5b0871e1fa", size = 156099, upload-time = "2025-10-19T22:24:31.646Z" },
-    { url = "https://files.pythonhosted.org/packages/02/a2/e78fcc5df65467f0d207661b7ef86c5b7ac62eea337c0c0fcedbeee6fb13/fastuuid-0.14.0-cp312-cp312-macosx_10_12_x86_64.macosx_11_0_arm64.macosx_10_12_universal2.whl", hash = "sha256:77e94728324b63660ebf8adb27055e92d2e4611645bf12ed9d88d30486471d0a", size = 510164, upload-time = "2025-10-19T22:31:45.635Z" },
-    { url = "https://files.pythonhosted.org/packages/2b/b3/c846f933f22f581f558ee63f81f29fa924acd971ce903dab1a9b6701816e/fastuuid-0.14.0-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:caa1f14d2102cb8d353096bc6ef6c13b2c81f347e6ab9d6fbd48b9dea41c153d", size = 261837, upload-time = "2025-10-19T22:38:38.53Z" },
-    { url = "https://files.pythonhosted.org/packages/54/ea/682551030f8c4fa9a769d9825570ad28c0c71e30cf34020b85c1f7ee7382/fastuuid-0.14.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:d23ef06f9e67163be38cece704170486715b177f6baae338110983f99a72c070", size = 251370, upload-time = "2025-10-19T22:40:26.07Z" },
-    { url = "https://files.pythonhosted.org/packages/14/dd/5927f0a523d8e6a76b70968e6004966ee7df30322f5fc9b6cdfb0276646a/fastuuid-0.14.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0c9ec605ace243b6dbe3bd27ebdd5d33b00d8d1d3f580b39fdd15cd96fd71796", size = 277766, upload-time = "2025-10-19T22:37:23.779Z" },
-    { url = "https://files.pythonhosted.org/packages/16/6e/c0fb547eef61293153348f12e0f75a06abb322664b34a1573a7760501336/fastuuid-0.14.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:808527f2407f58a76c916d6aa15d58692a4a019fdf8d4c32ac7ff303b7d7af09", size = 278105, upload-time = "2025-10-19T22:26:56.821Z" },
-    { url = "https://files.pythonhosted.org/packages/2d/b1/b9c75e03b768f61cf2e84ee193dc18601aeaf89a4684b20f2f0e9f52b62c/fastuuid-0.14.0-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:2fb3c0d7fef6674bbeacdd6dbd386924a7b60b26de849266d1ff6602937675c8", size = 301564, upload-time = "2025-10-19T22:30:31.604Z" },
-    { url = "https://files.pythonhosted.org/packages/fc/fa/f7395fdac07c7a54f18f801744573707321ca0cee082e638e36452355a9d/fastuuid-0.14.0-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:ab3f5d36e4393e628a4df337c2c039069344db5f4b9d2a3c9cea48284f1dd741", size = 459659, upload-time = "2025-10-19T22:31:32.341Z" },
-    { url = "https://files.pythonhosted.org/packages/66/49/c9fd06a4a0b1f0f048aacb6599e7d96e5d6bc6fa680ed0d46bf111929d1b/fastuuid-0.14.0-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:b9a0ca4f03b7e0b01425281ffd44e99d360e15c895f1907ca105854ed85e2057", size = 478430, upload-time = "2025-10-19T22:26:22.962Z" },
-    { url = "https://files.pythonhosted.org/packages/be/9c/909e8c95b494e8e140e8be6165d5fc3f61fdc46198c1554df7b3e1764471/fastuuid-0.14.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:3acdf655684cc09e60fb7e4cf524e8f42ea760031945aa8086c7eae2eeeabeb8", size = 450894, upload-time = "2025-10-19T22:27:01.647Z" },
-    { url = "https://files.pythonhosted.org/packages/90/eb/d29d17521976e673c55ef7f210d4cdd72091a9ec6755d0fd4710d9b3c871/fastuuid-0.14.0-cp312-cp312-win32.whl", hash = "sha256:9579618be6280700ae36ac42c3efd157049fe4dd40ca49b021280481c78c3176", size = 154374, upload-time = "2025-10-19T22:29:19.879Z" },
-    { url = "https://files.pythonhosted.org/packages/cc/fc/f5c799a6ea6d877faec0472d0b27c079b47c86b1cdc577720a5386483b36/fastuuid-0.14.0-cp312-cp312-win_amd64.whl", hash = "sha256:d9e4332dc4ba054434a9594cbfaf7823b57993d7d8e7267831c3e059857cf397", size = 156550, upload-time = "2025-10-19T22:27:49.658Z" },
-    { url = "https://files.pythonhosted.org/packages/a5/83/ae12dd39b9a39b55d7f90abb8971f1a5f3c321fd72d5aa83f90dc67fe9ed/fastuuid-0.14.0-cp313-cp313-macosx_10_12_x86_64.macosx_11_0_arm64.macosx_10_12_universal2.whl", hash = "sha256:77a09cb7427e7af74c594e409f7731a0cf887221de2f698e1ca0ebf0f3139021", size = 510720, upload-time = "2025-10-19T22:42:34.633Z" },
-    { url = "https://files.pythonhosted.org/packages/53/b0/a4b03ff5d00f563cc7546b933c28cb3f2a07344b2aec5834e874f7d44143/fastuuid-0.14.0-cp313-cp313-macosx_10_12_x86_64.whl", hash = "sha256:9bd57289daf7b153bfa3e8013446aa144ce5e8c825e9e366d455155ede5ea2dc", size = 262024, upload-time = "2025-10-19T22:30:25.482Z" },
-    { url = "https://files.pythonhosted.org/packages/9c/6d/64aee0a0f6a58eeabadd582e55d0d7d70258ffdd01d093b30c53d668303b/fastuuid-0.14.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:ac60fc860cdf3c3f327374db87ab8e064c86566ca8c49d2e30df15eda1b0c2d5", size = 251679, upload-time = "2025-10-19T22:36:14.096Z" },
-    { url = "https://files.pythonhosted.org/packages/60/f5/a7e9cda8369e4f7919d36552db9b2ae21db7915083bc6336f1b0082c8b2e/fastuuid-0.14.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ab32f74bd56565b186f036e33129da77db8be09178cd2f5206a5d4035fb2a23f", size = 277862, upload-time = "2025-10-19T22:36:23.302Z" },
-    { url = "https://files.pythonhosted.org/packages/f0/d3/8ce11827c783affffd5bd4d6378b28eb6cc6d2ddf41474006b8d62e7448e/fastuuid-0.14.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:33e678459cf4addaedd9936bbb038e35b3f6b2061330fd8f2f6a1d80414c0f87", size = 278278, upload-time = "2025-10-19T22:29:43.809Z" },
-    { url = "https://files.pythonhosted.org/packages/a2/51/680fb6352d0bbade04036da46264a8001f74b7484e2fd1f4da9e3db1c666/fastuuid-0.14.0-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:1e3cc56742f76cd25ecb98e4b82a25f978ccffba02e4bdce8aba857b6d85d87b", size = 301788, upload-time = "2025-10-19T22:36:06.825Z" },
-    { url = "https://files.pythonhosted.org/packages/fa/7c/2014b5785bd8ebdab04ec857635ebd84d5ee4950186a577db9eff0fb8ff6/fastuuid-0.14.0-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:cb9a030f609194b679e1660f7e32733b7a0f332d519c5d5a6a0a580991290022", size = 459819, upload-time = "2025-10-19T22:35:31.623Z" },
-    { url = "https://files.pythonhosted.org/packages/01/d2/524d4ceeba9160e7a9bc2ea3e8f4ccf1ad78f3bde34090ca0c51f09a5e91/fastuuid-0.14.0-cp313-cp313-musllinux_1_1_i686.whl", hash = "sha256:09098762aad4f8da3a888eb9ae01c84430c907a297b97166b8abc07b640f2995", size = 478546, upload-time = "2025-10-19T22:26:03.023Z" },
-    { url = "https://files.pythonhosted.org/packages/bc/17/354d04951ce114bf4afc78e27a18cfbd6ee319ab1829c2d5fb5e94063ac6/fastuuid-0.14.0-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:1383fff584fa249b16329a059c68ad45d030d5a4b70fb7c73a08d98fd53bcdab", size = 450921, upload-time = "2025-10-19T22:31:02.151Z" },
-    { url = "https://files.pythonhosted.org/packages/fb/be/d7be8670151d16d88f15bb121c5b66cdb5ea6a0c2a362d0dcf30276ade53/fastuuid-0.14.0-cp313-cp313-win32.whl", hash = "sha256:a0809f8cc5731c066c909047f9a314d5f536c871a7a22e815cc4967c110ac9ad", size = 154559, upload-time = "2025-10-19T22:36:36.011Z" },
-    { url = "https://files.pythonhosted.org/packages/22/1d/5573ef3624ceb7abf4a46073d3554e37191c868abc3aecd5289a72f9810a/fastuuid-0.14.0-cp313-cp313-win_amd64.whl", hash = "sha256:0df14e92e7ad3276327631c9e7cec09e32572ce82089c55cb1bb8df71cf394ed", size = 156539, upload-time = "2025-10-19T22:33:35.898Z" },
-    { url = "https://files.pythonhosted.org/packages/16/c9/8c7660d1fe3862e3f8acabd9be7fc9ad71eb270f1c65cce9a2b7a31329ab/fastuuid-0.14.0-cp314-cp314-macosx_10_12_x86_64.macosx_11_0_arm64.macosx_10_12_universal2.whl", hash = "sha256:b852a870a61cfc26c884af205d502881a2e59cc07076b60ab4a951cc0c94d1ad", size = 510600, upload-time = "2025-10-19T22:43:44.17Z" },
-    { url = "https://files.pythonhosted.org/packages/4c/f4/a989c82f9a90d0ad995aa957b3e572ebef163c5299823b4027986f133dfb/fastuuid-0.14.0-cp314-cp314-macosx_10_12_x86_64.whl", hash = "sha256:c7502d6f54cd08024c3ea9b3514e2d6f190feb2f46e6dbcd3747882264bb5f7b", size = 262069, upload-time = "2025-10-19T22:43:38.38Z" },
-    { url = "https://files.pythonhosted.org/packages/da/6c/a1a24f73574ac995482b1326cf7ab41301af0fabaa3e37eeb6b3df00e6e2/fastuuid-0.14.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:1ca61b592120cf314cfd66e662a5b54a578c5a15b26305e1b8b618a6f22df714", size = 251543, upload-time = "2025-10-19T22:32:22.537Z" },
-    { url = "https://files.pythonhosted.org/packages/1a/20/2a9b59185ba7a6c7b37808431477c2d739fcbdabbf63e00243e37bd6bf49/fastuuid-0.14.0-cp314-cp314-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:aa75b6657ec129d0abded3bec745e6f7ab642e6dba3a5272a68247e85f5f316f", size = 277798, upload-time = "2025-10-19T22:33:53.821Z" },
-    { url = "https://files.pythonhosted.org/packages/ef/33/4105ca574f6ded0af6a797d39add041bcfb468a1255fbbe82fcb6f592da2/fastuuid-0.14.0-cp314-cp314-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a8a0dfea3972200f72d4c7df02c8ac70bad1bb4c58d7e0ec1e6f341679073a7f", size = 278283, upload-time = "2025-10-19T22:29:02.812Z" },
-    { url = "https://files.pythonhosted.org/packages/fe/8c/fca59f8e21c4deb013f574eae05723737ddb1d2937ce87cb2a5d20992dc3/fastuuid-0.14.0-cp314-cp314-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:1bf539a7a95f35b419f9ad105d5a8a35036df35fdafae48fb2fd2e5f318f0d75", size = 301627, upload-time = "2025-10-19T22:35:54.985Z" },
-    { url = "https://files.pythonhosted.org/packages/cb/e2/f78c271b909c034d429218f2798ca4e89eeda7983f4257d7865976ddbb6c/fastuuid-0.14.0-cp314-cp314-musllinux_1_1_aarch64.whl", hash = "sha256:9a133bf9cc78fdbd1179cb58a59ad0100aa32d8675508150f3658814aeefeaa4", size = 459778, upload-time = "2025-10-19T22:28:00.999Z" },
-    { url = "https://files.pythonhosted.org/packages/1e/f0/5ff209d865897667a2ff3e7a572267a9ced8f7313919f6d6043aed8b1caa/fastuuid-0.14.0-cp314-cp314-musllinux_1_1_i686.whl", hash = "sha256:f54d5b36c56a2d5e1a31e73b950b28a0d83eb0c37b91d10408875a5a29494bad", size = 478605, upload-time = "2025-10-19T22:36:21.764Z" },
-    { url = "https://files.pythonhosted.org/packages/e0/c8/2ce1c78f983a2c4987ea865d9516dbdfb141a120fd3abb977ae6f02ba7ca/fastuuid-0.14.0-cp314-cp314-musllinux_1_1_x86_64.whl", hash = "sha256:ec27778c6ca3393ef662e2762dba8af13f4ec1aaa32d08d77f71f2a70ae9feb8", size = 450837, upload-time = "2025-10-19T22:34:37.178Z" },
-    { url = "https://files.pythonhosted.org/packages/df/60/dad662ec9a33b4a5fe44f60699258da64172c39bd041da2994422cdc40fe/fastuuid-0.14.0-cp314-cp314-win32.whl", hash = "sha256:e23fc6a83f112de4be0cc1990e5b127c27663ae43f866353166f87df58e73d06", size = 154532, upload-time = "2025-10-19T22:35:18.217Z" },
-    { url = "https://files.pythonhosted.org/packages/1f/f6/da4db31001e854025ffd26bc9ba0740a9cbba2c3259695f7c5834908b336/fastuuid-0.14.0-cp314-cp314-win_amd64.whl", hash = "sha256:df61342889d0f5e7a32f7284e55ef95103f2110fee433c2ae7c2c0956d76ac8a", size = 156457, upload-time = "2025-10-19T22:33:44.579Z" },
-]
-
 [[package]]
 name = "filelock"
 version = "3.18.0"
@@ -3036,6 +2967,22 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/da/e8/e7a090ebe37f2b071c64e81b99fb1273b3151ae932f560bb94c22f191cde/langchain_core-0.3.80-py3-none-any.whl", hash = "sha256:2141e3838d100d17dce2359f561ec0df52c526bae0de6d4f469f8026c5747456", size = 450786, upload-time = "2025-11-19T22:23:17.133Z" },
 ]
 
+[[package]]
+name = "langchain-fireworks"
+version = "0.3.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "aiohttp" },
+    { name = "fireworks-ai" },
+    { name = "langchain-core" },
+    { name = "openai" },
+    { name = "requests" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/1d/80/78ea4a04b1170cfa7564557808fd80e4c6f812cb5655c95a0374ca79c7ac/langchain_fireworks-0.3.0.tar.gz", hash = "sha256:09db8a06cd50df07068c07c4862e87d70b0da0f7d4e1b06f062c292af61c1433", size = 20900, upload-time = "2025-04-23T14:14:32.438Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/05/68/79696d5e1573a674141a44c9c59c04629e1ba25673d64a7b03f3843ae162/langchain_fireworks-0.3.0-py3-none-any.whl", hash = "sha256:ef2ea22f8cae3e654f0e1d3eb3a60c5fcd4a914643ab324507997f89f5831166", size = 17770, upload-time = "2025-04-23T14:14:31.373Z" },
+]
+
 [[package]]
 name = "langchain-openai"
 version = "0.3.35"
@@ -3064,21 +3011,22 @@ wheels = [
 
 [[package]]
 name = "langfuse"
-version = "2.60.10"
+version = "3.2.1"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "anyio" },
     { name = "backoff" },
     { name = "httpx" },
-    { name = "idna" },
+    { name = "opentelemetry-api" },
+    { name = "opentelemetry-exporter-otlp" },
+    { name = "opentelemetry-sdk" },
     { name = "packaging" },
     { name = "pydantic" },
     { name = "requests" },
     { name = "wrapt" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/eb/45/77fdf53c9e9f49bb78f72eba3f992f2f3d8343e05976aabfe1fca276a640/langfuse-2.60.10.tar.gz", hash = "sha256:a26d0d927a28ee01b2d12bb5b862590b643cc4e60a28de6e2b0c2cfff5dbfc6a", size = 152648, upload-time = "2025-09-16T15:08:12.426Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/61/0d/8fc51099cf337fb3b56cb7d305074bc0223c62e1ccabf80cc6285ccf5b31/langfuse-3.2.1.tar.gz", hash = "sha256:f79b0380dfcf52c7525bb5d7f8e9d8786a6fc8b37867def047bb388930a7beb3", size = 153369, upload-time = "2025-07-16T09:50:28.434Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/76/69/08584fbd69e14398d3932a77d0c8d7e20389da3e6470210d6719afba2801/langfuse-2.60.10-py3-none-any.whl", hash = "sha256:815c6369194aa5b2a24f88eb9952f7c3fc863272c41e90642a71f3bc76f4a11f", size = 275568, upload-time = "2025-09-16T15:08:10.166Z" },
+    { url = "https://files.pythonhosted.org/packages/92/b0/8f08df3f0fa584c4132937690c6dd33e0a116f963ecf2b35567f614e0ca7/langfuse-3.2.1-py3-none-any.whl", hash = "sha256:07a84e8c1eed6ac8e149bdda1431fd866e4aee741b66124316336fb2bc7e6a32", size = 299315, upload-time = "2025-07-16T09:50:26.582Z" },
 ]
 
 [[package]]
@@ -3166,12 +3114,11 @@ wheels = [
 
 [[package]]
 name = "litellm"
-version = "1.81.3"
+version = "1.74.9"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "aiohttp" },
     { name = "click" },
-    { name = "fastuuid" },
     { name = "httpx" },
     { name = "importlib-metadata" },
     { name = "jinja2" },
@@ -3182,9 +3129,9 @@ dependencies = [
     { name = "tiktoken" },
     { name = "tokenizers" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/ce/dd/d70835d5b231617761717cd5ba60342b677693093a71d5ce13ae9d254aee/litellm-1.81.3.tar.gz", hash = "sha256:a7688b429a88abfdd02f2a8c3158ebb5385689cfb7f9d4ac1473d018b2047e1b", size = 13612652, upload-time = "2026-01-25T02:45:58.888Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/6d/5d/646bebdb4769d77e6a018b9152c9ccf17afe15d0f88974f338d3f2ee7c15/litellm-1.74.9.tar.gz", hash = "sha256:4a32eff70342e1aee4d1cbf2de2a6ed64a7c39d86345c58d4401036af018b7de", size = 9660510, upload-time = "2025-07-28T16:42:39.297Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/83/62/d3f53c665261fdd5bb2401246e005a4ea8194ad1c4d8c663318ae3d638bf/litellm-1.81.3-py3-none-any.whl", hash = "sha256:3f60fd8b727587952ad3dd18b68f5fed538d6f43d15bb0356f4c3a11bccb2b92", size = 11946995, upload-time = "2026-01-25T02:45:55.887Z" },
+    { url = "https://files.pythonhosted.org/packages/5f/e4/f1546746049c99c6b8b247e2f34485b9eae36faa9322b84e2a17262e6712/litellm-1.74.9-py3-none-any.whl", hash = "sha256:ab8f8a6e4d8689d3c7c4f9c3bbc7e46212cc3ebc74ddd0f3c0c921bb459c9874", size = 8740449, upload-time = "2025-07-28T16:42:36.8Z" },
 ]
 
 [[package]]
@@ -4040,7 +3987,7 @@ wheels = [
 
 [[package]]
 name = "openai"
-version = "2.15.0"
+version = "1.109.1"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "anyio" },
@@ -4052,9 +3999,9 @@ dependencies = [
     { name = "tqdm" },
     { name = "typing-extensions" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/94/f4/4690ecb5d70023ce6bfcfeabfe717020f654bde59a775058ec6ac4692463/openai-2.15.0.tar.gz", hash = "sha256:42eb8cbb407d84770633f31bf727d4ffb4138711c670565a41663d9439174fba", size = 627383, upload-time = "2026-01-09T22:10:08.603Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/c6/a1/a303104dc55fc546a3f6914c842d3da471c64eec92043aef8f652eb6c524/openai-1.109.1.tar.gz", hash = "sha256:d173ed8dbca665892a6db099b4a2dfac624f94d20a93f46eb0b56aae940ed869", size = 564133, upload-time = "2025-09-24T13:00:53.075Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/b5/df/c306f7375d42bafb379934c2df4c2fa3964656c8c782bac75ee10c102818/openai-2.15.0-py3-none-any.whl", hash = "sha256:6ae23b932cd7230f7244e52954daa6602716d6b9bf235401a107af731baea6c3", size = 1067879, upload-time = "2026-01-09T22:10:06.446Z" },
+    { url = "https://files.pythonhosted.org/packages/1d/2a/7dd3d207ec669cacc1f186fd856a0f61dbc255d24f6fdc1a6715d6051b0f/openai-1.109.1-py3-none-any.whl", hash = "sha256:6bcaf57086cf59159b8e27447e4e7dd019db5d29a438072fbd49c290c7e65315", size = 948627, upload-time = "2025-09-24T13:00:50.754Z" },
 ]
 
 [[package]]

From 95cf759a6d3dd6b953d3118bdd7a3faa5c6edd27 Mon Sep 17 00:00:00 2001
From: Derek Xu <xzrderek@gmail.com>
Date: Tue, 10 Mar 2026 01:34:47 -0700
Subject: [PATCH 2/2] Revert "add finish reason (#421)"

This reverts commit 1d078782822d40428e42e69de016a80016edb938.
---
 eval_protocol/adapters/fireworks_tracing.py  | 53 ++------------------
 eval_protocol/proxy/proxy_core/langfuse.py   |  1 -
 eval_protocol/reward_function.py             |  1 +
 tests/remote_server/remote_server.py         |  3 --
 tests/remote_server/test_remote_fireworks.py | 43 +---------------
 5 files changed, 5 insertions(+), 96 deletions(-)

diff --git a/eval_protocol/adapters/fireworks_tracing.py b/eval_protocol/adapters/fireworks_tracing.py
index 3c701ab2..4913e33b 100644
--- a/eval_protocol/adapters/fireworks_tracing.py
+++ b/eval_protocol/adapters/fireworks_tracing.py
@@ -8,10 +8,8 @@
 import logging
 import requests
 from datetime import datetime
-import ast
-import json
-import os
 from typing import Any, Dict, List, Optional, Protocol
+import os
 
 from eval_protocol.models import EvaluationRow, InputMetadata, ExecutionMetadata, Message
 from .base import BaseAdapter
@@ -46,43 +44,6 @@ def __call__(
         ...
 
 
-def extract_openai_response(observations: List[Dict[str, Any]]) -> Optional[Dict[str, Any]]:
-    """Attempt to extract and parse attributes from raw_gen_ai_request observation. This only works when stored in OTEL format.
-
-    Args:
-        observations: List of observation dictionaries from the trace
-
-    Returns:
-        Dict with all attributes parsed. Or None if not found.
-    """
-    for obs in observations:
-        if obs.get("name") == "raw_gen_ai_request" and obs.get("type") == "SPAN":
-            metadata = obs.get("metadata") or {}
-            attributes = metadata.get("attributes") or {}
-
-            result: Dict[str, Any] = {}
-
-            for key, value in attributes.items():
-                # Try to parse stringified objects (could be Python repr or JSON)
-                if isinstance(value, str) and value.startswith(("[", "{")):
-                    try:
-                        result[key] = ast.literal_eval(value)
-                    except Exception as e:
-                        logger.debug("Failed to parse %s with ast.literal_eval: %s", key, e)
-                        try:
-                            result[key] = json.loads(value)
-                        except Exception as e:
-                            logger.debug("Failed to parse %s with json.loads: %s", key, e)
-                            result[key] = value
-                else:
-                    result[key] = value
-
-            if result:
-                return result
-
-    return None
-
-
 def convert_trace_dict_to_evaluation_row(
     trace: Dict[str, Any], include_tool_calls: bool = True, span_name: Optional[str] = None
 ) -> Optional[EvaluationRow]:
@@ -135,14 +96,6 @@ def convert_trace_dict_to_evaluation_row(
                 ):
                     break  # Break early if we've found all the metadata we need
 
-        observations = trace.get("observations") or []
-        # We can only extract when stored in OTEL format.
-        openai_response = extract_openai_response(observations)
-        if openai_response:
-            choices = openai_response.get("llm.openai.choices")
-            if choices and len(choices) > 0:
-                execution_metadata.finish_reason = choices[0].get("finish_reason")
-
         return EvaluationRow(
             messages=messages,
             tools=tools,
@@ -207,7 +160,7 @@ def extract_messages_from_trace_dict(
         # Fallback: use the last GENERATION observation which typically contains full chat history
         if not messages:
             try:
-                all_observations = trace.get("observations") or []
+                all_observations = trace.get("observations", [])
                 gens = [obs for obs in all_observations if obs.get("type") == "GENERATION"]
                 if gens:
                     gens.sort(key=lambda x: x.get("start_time", ""))
@@ -233,7 +186,7 @@ def get_final_generation_in_span_dict(trace: Dict[str, Any], span_name: str) ->
         The final generation dictionary, or None if not found
     """
     # Get all observations from the trace
-    all_observations = trace.get("observations") or []
+    all_observations = trace.get("observations", [])
 
     # Find a span with the given name that has generation children
     parent_span = None
diff --git a/eval_protocol/proxy/proxy_core/langfuse.py b/eval_protocol/proxy/proxy_core/langfuse.py
index ec0e9475..d91da681 100644
--- a/eval_protocol/proxy/proxy_core/langfuse.py
+++ b/eval_protocol/proxy/proxy_core/langfuse.py
@@ -50,7 +50,6 @@ def _serialize_trace_to_dict(trace_full: Any) -> Dict[str, Any]:
                 "input": getattr(obs, "input", None),
                 "output": getattr(obs, "output", None),
                 "parent_observation_id": getattr(obs, "parent_observation_id", None),
-                "metadata": getattr(obs, "metadata", None),
             }
             for obs in getattr(trace_full, "observations", [])
         ]
diff --git a/eval_protocol/reward_function.py b/eval_protocol/reward_function.py
index 743d3c7c..6bd11974 100644
--- a/eval_protocol/reward_function.py
+++ b/eval_protocol/reward_function.py
@@ -12,6 +12,7 @@
 from .models import EvaluateResult, MetricResult
 from .typed_interface import reward_function
 
+logging.basicConfig(level=logging.INFO)
 logger = logging.getLogger(__name__)
 
 T = TypeVar("T", bound=Callable[..., EvaluateResult])
diff --git a/tests/remote_server/remote_server.py b/tests/remote_server/remote_server.py
index c7655671..4ac4fd6c 100644
--- a/tests/remote_server/remote_server.py
+++ b/tests/remote_server/remote_server.py
@@ -13,9 +13,6 @@
 
 app = FastAPI()
 
-# Configure logging for the remote server (required for INFO-level logs to be emitted)
-logging.basicConfig(level=logging.INFO, format="%(name)s - %(levelname)s - %(message)s")
-
 # Attach Fireworks tracing handler to root logger
 fireworks_handler = FireworksTracingHttpHandler()
 logging.getLogger().addHandler(fireworks_handler)
diff --git a/tests/remote_server/test_remote_fireworks.py b/tests/remote_server/test_remote_fireworks.py
index b196cb34..43da29ed 100644
--- a/tests/remote_server/test_remote_fireworks.py
+++ b/tests/remote_server/test_remote_fireworks.py
@@ -1,6 +1,5 @@
 # AUTO SERVER STARTUP: Server is automatically started and stopped by the test
 
-import logging
 import subprocess
 import socket
 import time
@@ -20,23 +19,10 @@
 ROLLOUT_IDS = set()
 
 
-class StatusLogCaptureHandler(logging.Handler):
-    """Custom handler to capture status log messages."""
-
-    def __init__(self):
-        super().__init__()
-        self.status_100_messages: List[str] = []
-
-    def emit(self, record):
-        msg = record.getMessage()  # Use getMessage(), not .message attribute
-        if "Found Fireworks log" in msg and "with status code 100" in msg:
-            self.status_100_messages.append(msg)
-
-
 @pytest.fixture(autouse=True)
 def check_rollout_coverage(monkeypatch):
     """
-    Ensure we attempted to fetch remote traces for each rollout and received status logs.
+    Ensure we attempted to fetch remote traces for each rollout.
 
     This wraps the built-in default_fireworks_output_data_loader (without making it configurable)
     and tracks rollout_ids passed through its DataLoaderConfig.
@@ -51,32 +37,9 @@ def wrapped_loader(config: DataLoaderConfig) -> DynamicDataLoader:
         return original_loader(config)
 
     monkeypatch.setattr(remote_rollout_processor_module, "default_fireworks_output_data_loader", wrapped_loader)
-
-    # Add custom handler to capture status logs
-    status_handler = StatusLogCaptureHandler()
-    status_handler.setLevel(logging.INFO)
-    rrp_logger = logging.getLogger("eval_protocol.pytest.remote_rollout_processor")
-    rrp_logger.addHandler(status_handler)
-    # Ensure the logger level allows INFO messages through
-    original_level = rrp_logger.level
-    rrp_logger.setLevel(logging.INFO)
-
     yield
-
-    # Cleanup handler and restore level
-    rrp_logger.removeHandler(status_handler)
-    rrp_logger.setLevel(original_level)
-
-    # After test completes, verify we saw status logs for all 3 rollouts
     assert len(ROLLOUT_IDS) == 3, f"Expected to see 3 rollout_ids, but only saw {ROLLOUT_IDS}"
 
-    # Check that we received "Found Fireworks log ... with status code 100" for each rollout
-    assert len(status_handler.status_100_messages) == 3, (
-        f"Expected 3 'Found Fireworks log ... with status code 100' messages, but found {len(status_handler.status_100_messages)}. "
-        f"This means the status logs from the remote server were not received. "
-        f"Messages captured: {status_handler.status_100_messages}"
-    )
-
 
 def find_available_port() -> int:
     """Find an available port on localhost"""
@@ -178,8 +141,4 @@ async def test_remote_rollout_and_fetch_fireworks(row: EvaluationRow) -> Evaluat
     assert "data_loader_type" in row.input_metadata.dataset_info
     assert "data_loader_num_rows" in row.input_metadata.dataset_info
 
-    assert row.execution_metadata.finish_reason == "stop", (
-        f"Expected finish_reason='stop', got {row.execution_metadata.finish_reason}"
-    )
-
     return row