Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion src/google/adk/evaluation/evaluation_generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -209,7 +209,7 @@ async def _generate_inferences_from_root_agent(
app_name = (
initial_session.app_name if initial_session else "EvaluationGenerator"
)
user_id = initial_session.user_id if initial_session else "test_user_id"
user_id = initial_session.user_id if initial_session else "user"
session_id = session_id if session_id else str(uuid.uuid4())

_ = await session_service.create_session(
Expand Down
2 changes: 1 addition & 1 deletion src/google/adk/evaluation/local_eval_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -265,7 +265,7 @@ async def _evaluate_single_inference_result(
user_id = (
eval_case.session_input.user_id
if eval_case.session_input and eval_case.session_input.user_id
else 'test_user_id'
else 'user'
)

if eval_case.conversation_scenario is None and len(
Expand Down
38 changes: 28 additions & 10 deletions src/google/adk/evaluation/local_eval_sets_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -247,19 +247,37 @@ def list_eval_sets(self, app_name: str) -> list[str]:
Raises:
NotFoundError: If the eval directory for the app is not found.
"""
eval_set_file_path = os.path.join(self._agents_dir, app_name)
app_dir = os.path.join(self._agents_dir, app_name)
if not os.path.isdir(app_dir):
raise NotFoundError(
f"Eval directory for app `{app_name}` not found."
)
eval_sets = []
try:
for file in os.listdir(eval_set_file_path):
for file in os.listdir(app_dir):
if file.endswith(_EVAL_SET_FILE_EXTENSION):
eval_sets.append(
file.removesuffix(_EVAL_SET_FILE_EXTENSION)
)

# Warn about .evalset.json files in subdirectories that won't be
# discovered. This helps users who organize eval sets into folders
# understand why they don't appear in the UI.
for dirpath, _, filenames in os.walk(app_dir):
if dirpath == app_dir:
continue
for file in filenames:
if file.endswith(_EVAL_SET_FILE_EXTENSION):
eval_sets.append(
os.path.basename(file).removesuffix(_EVAL_SET_FILE_EXTENSION)
rel_path = os.path.relpath(
os.path.join(dirpath, file), app_dir
)
return sorted(eval_sets)
except FileNotFoundError as e:
raise NotFoundError(
f"Eval directory for app `{app_name}` not found."
) from e
logger.warning(
"Eval set file '%s' found in subdirectory and will be"
" ignored. Move it to '%s' to make it discoverable.",
rel_path,
app_dir,
)

return sorted(eval_sets)

@override
def get_eval_case(
Expand Down
44 changes: 32 additions & 12 deletions tests/unittests/evaluation/test_local_eval_sets_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
from __future__ import annotations

import json
import logging
import os
import uuid

Expand Down Expand Up @@ -409,27 +410,46 @@ def test_local_eval_sets_manager_create_eval_set_already_exists(
local_eval_sets_manager.create_eval_set(app_name, eval_set_id)

def test_local_eval_sets_manager_list_eval_sets_success(
self, local_eval_sets_manager, mocker
self, tmp_path
):
app_name = "test_app"
mock_listdir_return = [
"eval_set_1.evalset.json",
"eval_set_2.evalset.json",
"not_an_eval_set.txt",
]
mocker.patch("os.listdir", return_value=mock_listdir_return)
mocker.patch("os.path.join", return_value="dummy_path")
mocker.patch("os.path.basename", side_effect=lambda x: x)
app_dir = tmp_path / app_name
app_dir.mkdir()
(app_dir / "eval_set_1.evalset.json").write_text("{}")
(app_dir / "eval_set_2.evalset.json").write_text("{}")
(app_dir / "not_an_eval_set.txt").write_text("")

local_eval_sets_manager = LocalEvalSetsManager(agents_dir=str(tmp_path))
eval_sets = local_eval_sets_manager.list_eval_sets(app_name)

assert eval_sets == ["eval_set_1", "eval_set_2"]

def test_local_eval_sets_manager_list_eval_sets_not_found(
self, local_eval_sets_manager, mocker
def test_local_eval_sets_manager_list_eval_sets_subdirectories(
self, tmp_path, caplog
):
"""Eval sets in subdirectories should be ignored with a warning."""
app_name = "test_app"
mocker.patch("os.listdir", side_effect=FileNotFoundError)
app_dir = tmp_path / app_name
app_dir.mkdir()
(app_dir / "top_level.evalset.json").write_text("{}")
sub_dir = app_dir / "eval_sets"
sub_dir.mkdir()
(sub_dir / "nested.evalset.json").write_text("{}")

local_eval_sets_manager = LocalEvalSetsManager(agents_dir=str(tmp_path))
with caplog.at_level(logging.WARNING):
eval_sets = local_eval_sets_manager.list_eval_sets(app_name)

# Only root-level eval sets are returned.
assert eval_sets == ["top_level"]
# A warning is logged for the subdirectory file.
assert "nested.evalset.json" in caplog.text
assert "ignored" in caplog.text

def test_local_eval_sets_manager_list_eval_sets_not_found(
self, local_eval_sets_manager
):
app_name = "nonexistent_app"

with pytest.raises(NotFoundError):
local_eval_sets_manager.list_eval_sets(app_name)
Expand Down