From e9215a20ac4ae8935664e7b27ac5191679b0ebd3 Mon Sep 17 00:00:00 2001 From: brucearctor <5032356+brucearctor@users.noreply.github.com> Date: Mon, 20 Apr 2026 19:22:31 -0700 Subject: [PATCH] fix: align eval user_id default and warn on subdirectory evalset files Fixes two server-side issues from google/adk-python#5423: 1. Align default user_id fallback from 'test_user_id' to 'user' to match the adk-web frontend default, preventing session 404 errors when eval sets don't specify a user_id. 2. Log a warning when .evalset.json files are found in subdirectories of the agent directory. These files are not discovered by the UI and should be moved to the root agent directory. This addresses the 'fragile and undocumented' evalset discovery reported in the issue. Note on port change (Issue 3): adk web now serves on port 8000 instead of the previous 8501. This is not a bug but should be called out in migration/upgrade notes. --- .../adk/evaluation/evaluation_generator.py | 2 +- .../adk/evaluation/local_eval_service.py | 2 +- .../adk/evaluation/local_eval_sets_manager.py | 38 +++++++++++----- .../test_local_eval_sets_manager.py | 44 ++++++++++++++----- 4 files changed, 62 insertions(+), 24 deletions(-) diff --git a/src/google/adk/evaluation/evaluation_generator.py b/src/google/adk/evaluation/evaluation_generator.py index f8fb6795aa..d51abed10a 100644 --- a/src/google/adk/evaluation/evaluation_generator.py +++ b/src/google/adk/evaluation/evaluation_generator.py @@ -209,7 +209,7 @@ async def _generate_inferences_from_root_agent( app_name = ( initial_session.app_name if initial_session else "EvaluationGenerator" ) - user_id = initial_session.user_id if initial_session else "test_user_id" + user_id = initial_session.user_id if initial_session else "user" session_id = session_id if session_id else str(uuid.uuid4()) _ = await session_service.create_session( diff --git a/src/google/adk/evaluation/local_eval_service.py b/src/google/adk/evaluation/local_eval_service.py index 2426204ca0..7413bb8bf1 100644 --- a/src/google/adk/evaluation/local_eval_service.py +++ b/src/google/adk/evaluation/local_eval_service.py @@ -265,7 +265,7 @@ async def _evaluate_single_inference_result( user_id = ( eval_case.session_input.user_id if eval_case.session_input and eval_case.session_input.user_id - else 'test_user_id' + else 'user' ) if eval_case.conversation_scenario is None and len( diff --git a/src/google/adk/evaluation/local_eval_sets_manager.py b/src/google/adk/evaluation/local_eval_sets_manager.py index 8d2290b911..77e457c408 100644 --- a/src/google/adk/evaluation/local_eval_sets_manager.py +++ b/src/google/adk/evaluation/local_eval_sets_manager.py @@ -247,19 +247,37 @@ def list_eval_sets(self, app_name: str) -> list[str]: Raises: NotFoundError: If the eval directory for the app is not found. """ - eval_set_file_path = os.path.join(self._agents_dir, app_name) + app_dir = os.path.join(self._agents_dir, app_name) + if not os.path.isdir(app_dir): + raise NotFoundError( + f"Eval directory for app `{app_name}` not found." + ) eval_sets = [] - try: - for file in os.listdir(eval_set_file_path): + for file in os.listdir(app_dir): + if file.endswith(_EVAL_SET_FILE_EXTENSION): + eval_sets.append( + file.removesuffix(_EVAL_SET_FILE_EXTENSION) + ) + + # Warn about .evalset.json files in subdirectories that won't be + # discovered. This helps users who organize eval sets into folders + # understand why they don't appear in the UI. + for dirpath, _, filenames in os.walk(app_dir): + if dirpath == app_dir: + continue + for file in filenames: if file.endswith(_EVAL_SET_FILE_EXTENSION): - eval_sets.append( - os.path.basename(file).removesuffix(_EVAL_SET_FILE_EXTENSION) + rel_path = os.path.relpath( + os.path.join(dirpath, file), app_dir ) - return sorted(eval_sets) - except FileNotFoundError as e: - raise NotFoundError( - f"Eval directory for app `{app_name}` not found." - ) from e + logger.warning( + "Eval set file '%s' found in subdirectory and will be" + " ignored. Move it to '%s' to make it discoverable.", + rel_path, + app_dir, + ) + + return sorted(eval_sets) @override def get_eval_case( diff --git a/tests/unittests/evaluation/test_local_eval_sets_manager.py b/tests/unittests/evaluation/test_local_eval_sets_manager.py index 3450fb9338..925eb34001 100644 --- a/tests/unittests/evaluation/test_local_eval_sets_manager.py +++ b/tests/unittests/evaluation/test_local_eval_sets_manager.py @@ -15,6 +15,7 @@ from __future__ import annotations import json +import logging import os import uuid @@ -409,27 +410,46 @@ def test_local_eval_sets_manager_create_eval_set_already_exists( local_eval_sets_manager.create_eval_set(app_name, eval_set_id) def test_local_eval_sets_manager_list_eval_sets_success( - self, local_eval_sets_manager, mocker + self, tmp_path ): app_name = "test_app" - mock_listdir_return = [ - "eval_set_1.evalset.json", - "eval_set_2.evalset.json", - "not_an_eval_set.txt", - ] - mocker.patch("os.listdir", return_value=mock_listdir_return) - mocker.patch("os.path.join", return_value="dummy_path") - mocker.patch("os.path.basename", side_effect=lambda x: x) + app_dir = tmp_path / app_name + app_dir.mkdir() + (app_dir / "eval_set_1.evalset.json").write_text("{}") + (app_dir / "eval_set_2.evalset.json").write_text("{}") + (app_dir / "not_an_eval_set.txt").write_text("") + local_eval_sets_manager = LocalEvalSetsManager(agents_dir=str(tmp_path)) eval_sets = local_eval_sets_manager.list_eval_sets(app_name) assert eval_sets == ["eval_set_1", "eval_set_2"] - def test_local_eval_sets_manager_list_eval_sets_not_found( - self, local_eval_sets_manager, mocker + def test_local_eval_sets_manager_list_eval_sets_subdirectories( + self, tmp_path, caplog ): + """Eval sets in subdirectories should be ignored with a warning.""" app_name = "test_app" - mocker.patch("os.listdir", side_effect=FileNotFoundError) + app_dir = tmp_path / app_name + app_dir.mkdir() + (app_dir / "top_level.evalset.json").write_text("{}") + sub_dir = app_dir / "eval_sets" + sub_dir.mkdir() + (sub_dir / "nested.evalset.json").write_text("{}") + + local_eval_sets_manager = LocalEvalSetsManager(agents_dir=str(tmp_path)) + with caplog.at_level(logging.WARNING): + eval_sets = local_eval_sets_manager.list_eval_sets(app_name) + + # Only root-level eval sets are returned. + assert eval_sets == ["top_level"] + # A warning is logged for the subdirectory file. + assert "nested.evalset.json" in caplog.text + assert "ignored" in caplog.text + + def test_local_eval_sets_manager_list_eval_sets_not_found( + self, local_eval_sets_manager + ): + app_name = "nonexistent_app" with pytest.raises(NotFoundError): local_eval_sets_manager.list_eval_sets(app_name)