Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion altk_evolve/llm/guidelines/clustering.py
Original file line number Diff line number Diff line change
Expand Up @@ -141,6 +141,7 @@ def combine_cluster(entities: list[RecordedEntity]) -> list[Guideline]:
Raises:
EvolveException: If the LLM call fails after 3 attempts.
"""
is_groq = llm_settings.custom_llm_provider == "groq" or llm_settings.guidelines_model.startswith("groq/")
supported_params = get_supported_openai_params(
model=llm_settings.guidelines_model,
custom_llm_provider=llm_settings.custom_llm_provider,
Expand All @@ -150,7 +151,7 @@ def combine_cluster(entities: list[RecordedEntity]) -> list[Guideline]:
model=llm_settings.guidelines_model,
custom_llm_provider=llm_settings.custom_llm_provider,
)
constrained_decoding_supported = supports_response_format and response_schema_enabled
constrained_decoding_supported = not is_groq and supports_response_format and response_schema_enabled

# Deduplicate task descriptions
task_descriptions = list(
Expand Down
3 changes: 2 additions & 1 deletion altk_evolve/llm/guidelines/guidelines.py
Original file line number Diff line number Diff line change
Expand Up @@ -171,6 +171,7 @@ def generate_guidelines(messages: list[dict]) -> list[GuidelineGenerationResult]
Returns a list with one GuidelineGenerationResult per subtask (or one for the full
trajectory when segmentation is disabled or produces fewer than 2 subtasks).
"""
is_groq = llm_settings.custom_llm_provider == "groq" or llm_settings.guidelines_model.startswith("groq/")
supported_params = get_supported_openai_params(
model=llm_settings.guidelines_model,
custom_llm_provider=llm_settings.custom_llm_provider,
Expand All @@ -180,7 +181,7 @@ def generate_guidelines(messages: list[dict]) -> list[GuidelineGenerationResult]
model=llm_settings.guidelines_model,
custom_llm_provider=llm_settings.custom_llm_provider,
)
constrained_decoding_supported = bool(supports_response_format and response_schema_enabled)
constrained_decoding_supported = bool(not is_groq and supports_response_format and response_schema_enabled)

trajectory_data = parse_openai_agents_trajectory(messages)
task_instruction = trajectory_data["task_instruction"]
Expand Down
30 changes: 28 additions & 2 deletions tests/unit/test_combine_guidelines.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,11 @@

import pytest

from altk_evolve.llm.guidelines import clustering as clustering_module
from altk_evolve.llm.guidelines.clustering import combine_cluster
from altk_evolve.schema.core import RecordedEntity
from altk_evolve.schema.exceptions import EvolveException
from altk_evolve.schema.guidelines import Guideline, ConsolidationResult
from altk_evolve.schema.guidelines import ConsolidationResult, Guideline


def _make_entity(entity_id: str, content: str, task_description: str = "do a task") -> RecordedEntity:
Expand Down Expand Up @@ -110,7 +111,9 @@ def test_combine_cluster_raises_after_max_retries(self, _mock_params, _mock_sche
@patch("altk_evolve.llm.guidelines.clustering.completion")
@patch("altk_evolve.llm.guidelines.clustering.supports_response_schema", return_value=True)
@patch("altk_evolve.llm.guidelines.clustering.get_supported_openai_params", return_value=["response_format"])
def test_combine_cluster_uses_structured_output(self, _mock_params, _mock_schema, mock_completion):
def test_combine_cluster_uses_structured_output(self, _mock_params, _mock_schema, mock_completion, monkeypatch):
monkeypatch.setattr(clustering_module.llm_settings, "guidelines_model", "gpt-4o")
monkeypatch.setattr(clustering_module.llm_settings, "custom_llm_provider", "openai")
mock_completion.return_value = _mock_completion_response(SAMPLE_GUIDELINES[:1])

entities = [_make_entity("1", "Guideline A"), _make_entity("2", "Guideline B")]
Expand All @@ -121,6 +124,29 @@ def test_combine_cluster_uses_structured_output(self, _mock_params, _mock_schema
_, kwargs = mock_completion.call_args
assert "response_format" in kwargs

@patch("altk_evolve.llm.guidelines.clustering.completion")
@patch("altk_evolve.llm.guidelines.clustering.supports_response_schema", return_value=True)
@patch("altk_evolve.llm.guidelines.clustering.get_supported_openai_params", return_value=["response_format"])
def test_combine_cluster_uses_json_prompt_for_groq_even_when_schema_is_reported(
self,
_mock_params,
_mock_schema,
mock_completion,
monkeypatch,
):
monkeypatch.setattr(clustering_module.llm_settings, "guidelines_model", "groq/openai/gpt-oss-120b")
monkeypatch.setattr(clustering_module.llm_settings, "custom_llm_provider", "groq")
mock_completion.return_value = _mock_completion_response(SAMPLE_GUIDELINES[:1])

entities = [_make_entity("1", "Guideline A"), _make_entity("2", "Guideline B")]
result = combine_cluster(entities)

assert len(result) == 1
_, kwargs = mock_completion.call_args
assert "response_format" not in kwargs
assert kwargs["custom_llm_provider"] == "groq"
assert "Output Format (JSON)" in kwargs["messages"][0]["content"]


# ---------------------------------------------------------------------------
# consolidate_guidelines tests
Expand Down
48 changes: 47 additions & 1 deletion tests/unit/test_guidelines.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,19 @@
"""Tests for guideline generation utilities."""

import json
from unittest.mock import MagicMock, patch

import pytest

from altk_evolve.llm.guidelines.guidelines import parse_openai_agents_trajectory
from altk_evolve.llm.guidelines import guidelines as guidelines_module
from altk_evolve.llm.guidelines.guidelines import generate_guidelines, parse_openai_agents_trajectory


def _mock_completion_response(payload: dict) -> MagicMock:
response = MagicMock()
response.choices = [MagicMock()]
response.choices[0].message.content = json.dumps(payload)
return response


@pytest.mark.unit
Expand All @@ -23,3 +34,38 @@ def test_fallback_when_no_user_message(self):
def test_fallback_when_empty_messages(self):
result = parse_openai_agents_trajectory([])
assert result["task_instruction"] == "Task description unknown"

@patch("altk_evolve.llm.guidelines.guidelines.completion")
@patch("altk_evolve.llm.guidelines.guidelines.supports_response_schema", return_value=True)
@patch("altk_evolve.llm.guidelines.guidelines.get_supported_openai_params", return_value=["response_format"])
def test_generate_guidelines_uses_json_prompt_for_groq_even_when_schema_is_reported(
self,
_mock_params,
_mock_schema,
mock_completion,
monkeypatch,
):
monkeypatch.setattr(guidelines_module.llm_settings, "guidelines_model", "groq/openai/gpt-oss-120b")
monkeypatch.setattr(guidelines_module.llm_settings, "custom_llm_provider", "groq")
monkeypatch.setattr(guidelines_module.evolve_config, "segmentation_enabled", False)
mock_completion.return_value = _mock_completion_response(
{
"guidelines": [
{
"content": "Validate files before parsing",
"rationale": "Avoids parser crashes on empty inputs",
"category": "strategy",
"trigger": "Before reading user-provided CSV files",
"implementation_steps": ["Check file size", "Return an empty DataFrame for empty files"],
}
]
}
)

results = generate_guidelines([{"role": "user", "content": "Fix CSV parsing"}])

assert results[0].guidelines[0].content == "Validate files before parsing"
_, kwargs = mock_completion.call_args
assert "response_format" not in kwargs
assert kwargs["custom_llm_provider"] == "groq"
assert "Output Format (JSON)" in kwargs["messages"][0]["content"]
Loading