Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 12 additions & 1 deletion analytics/trace_comparison_rules/src/llm_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@

import logging
import os
import re
from typing import Optional

import litellm
Expand All @@ -17,6 +18,14 @@
# Drop unsupported params (e.g. 'seed') silently for providers that don't accept them.
litellm.drop_params = True

# Claude 4.x deprecated the `temperature` parameter entirely.
_CLAUDE4_RE = re.compile(r"claude-(opus|sonnet|haiku)-4[-_]", re.IGNORECASE)


def _model_supports_temperature(model: str) -> bool:
return not _CLAUDE4_RE.search(model)


# Configuration constants
DEFAULT_MODEL = "Azure/gpt-4.1"

Expand Down Expand Up @@ -93,10 +102,12 @@ def analyze(
{"role": "system", "content": system_message},
{"role": "user", "content": prompt},
],
"temperature": self.temperature,
"max_tokens": (max_tokens if max_tokens is not None else self.max_tokens),
}

if _model_supports_temperature(self.model):
completion_args["temperature"] = self.temperature

Comment on lines +108 to +110

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🛠️ Refactor suggestion | 🟠 Major | ⚡ Quick win

Consider adding tests for the temperature suppression logic.

This is a critical bug fix with no automated test coverage (per PR objectives). Without tests, the fix could regress if:

  1. The regex pattern is modified incorrectly
  2. The conditional logic is refactored
  3. LiteLLM behavior changes
🧪 Minimal test skeleton

Create analytics/trace_comparison_rules/src/test_llm_client.py:

import pytest
from llm_client import _model_supports_temperature

class TestTemperatureSupport:
    """Test temperature parameter suppression for Claude 4.x models."""
    
    `@pytest.mark.parametrize`("model,expected", [
        # Claude 4.x models - should NOT support temperature
        ("aws/claude-opus-4-7", False),
        ("aws/claude-sonnet-4-20250131", False),
        ("claude-haiku-4-v1", False),
        ("anthropic.claude-opus-4-6", False),
        
        # Other models - should support temperature  
        ("Azure/gpt-4.1", True),
        ("gpt-4o-2024-08-06", True),
        ("claude-opus-3-5", True),  # Claude 3.x
        ("claude-3-opus", True),
        
        # Case insensitive
        ("AWS/CLAUDE-OPUS-4-7", False),
    ])
    def test_model_supports_temperature(self, model, expected):
        assert _model_supports_temperature(model) == expected

This test would have caught the original bug and prevents regression.

🤖 Prompt for AI Agents
Verify each finding against current code. Fix only still-valid issues, skip the
rest with a brief reason, keep changes minimal, and validate.

In `@analytics/trace_comparison_rules/src/llm_client.py` around lines 108 - 110,
Add automated tests covering the temperature suppression logic in
analytics/trace_comparison_rules/src/llm_client.py by creating a pytest file
(e.g., analytics/trace_comparison_rules/src/test_llm_client.py) that exercises
the _model_supports_temperature(model) helper; include parametrized cases for
Claude 4.x variants (aws/claude-opus-4-7, aws/claude-sonnet-4-20250131,
claude-haiku-4-v1, anthropic.claude-opus-4-6) which must return False, a set of
non-4.x models that must return True (Azure/gpt-4.1, gpt-4o-2024-08-06,
claude-opus-3-5, claude-3-opus), and a case-insensitivity check
(AWS/CLAUDE-OPUS-4-7 -> False); run pytest to ensure the
_model_supports_temperature matching regex and conditional logic do not regress.

# Add seed if provided (for reproducibility)
seed_value = seed if seed is not None else self.seed
if seed_value is not None:
Expand Down
Loading