diff --git a/src/google/adk/evaluation/evaluation_generator.py b/src/google/adk/evaluation/evaluation_generator.py index f8fb6795aa..d51abed10a 100644 --- a/src/google/adk/evaluation/evaluation_generator.py +++ b/src/google/adk/evaluation/evaluation_generator.py @@ -209,7 +209,7 @@ async def _generate_inferences_from_root_agent( app_name = ( initial_session.app_name if initial_session else "EvaluationGenerator" ) - user_id = initial_session.user_id if initial_session else "test_user_id" + user_id = initial_session.user_id if initial_session else "user" session_id = session_id if session_id else str(uuid.uuid4()) _ = await session_service.create_session( diff --git a/src/google/adk/evaluation/local_eval_service.py b/src/google/adk/evaluation/local_eval_service.py index 2426204ca0..7413bb8bf1 100644 --- a/src/google/adk/evaluation/local_eval_service.py +++ b/src/google/adk/evaluation/local_eval_service.py @@ -265,7 +265,7 @@ async def _evaluate_single_inference_result( user_id = ( eval_case.session_input.user_id if eval_case.session_input and eval_case.session_input.user_id - else 'test_user_id' + else 'user' ) if eval_case.conversation_scenario is None and len( diff --git a/src/google/adk/evaluation/local_eval_sets_manager.py b/src/google/adk/evaluation/local_eval_sets_manager.py index 8d2290b911..77e457c408 100644 --- a/src/google/adk/evaluation/local_eval_sets_manager.py +++ b/src/google/adk/evaluation/local_eval_sets_manager.py @@ -247,19 +247,37 @@ def list_eval_sets(self, app_name: str) -> list[str]: Raises: NotFoundError: If the eval directory for the app is not found. """ - eval_set_file_path = os.path.join(self._agents_dir, app_name) + app_dir = os.path.join(self._agents_dir, app_name) + if not os.path.isdir(app_dir): + raise NotFoundError( + f"Eval directory for app `{app_name}` not found." + ) eval_sets = [] - try: - for file in os.listdir(eval_set_file_path): + for file in os.listdir(app_dir): + if file.endswith(_EVAL_SET_FILE_EXTENSION): + eval_sets.append( + file.removesuffix(_EVAL_SET_FILE_EXTENSION) + ) + + # Warn about .evalset.json files in subdirectories that won't be + # discovered. This helps users who organize eval sets into folders + # understand why they don't appear in the UI. + for dirpath, _, filenames in os.walk(app_dir): + if dirpath == app_dir: + continue + for file in filenames: if file.endswith(_EVAL_SET_FILE_EXTENSION): - eval_sets.append( - os.path.basename(file).removesuffix(_EVAL_SET_FILE_EXTENSION) + rel_path = os.path.relpath( + os.path.join(dirpath, file), app_dir ) - return sorted(eval_sets) - except FileNotFoundError as e: - raise NotFoundError( - f"Eval directory for app `{app_name}` not found." - ) from e + logger.warning( + "Eval set file '%s' found in subdirectory and will be" + " ignored. Move it to '%s' to make it discoverable.", + rel_path, + app_dir, + ) + + return sorted(eval_sets) @override def get_eval_case( diff --git a/tests/unittests/evaluation/test_local_eval_sets_manager.py b/tests/unittests/evaluation/test_local_eval_sets_manager.py index 3450fb9338..925eb34001 100644 --- a/tests/unittests/evaluation/test_local_eval_sets_manager.py +++ b/tests/unittests/evaluation/test_local_eval_sets_manager.py @@ -15,6 +15,7 @@ from __future__ import annotations import json +import logging import os import uuid @@ -409,27 +410,46 @@ def test_local_eval_sets_manager_create_eval_set_already_exists( local_eval_sets_manager.create_eval_set(app_name, eval_set_id) def test_local_eval_sets_manager_list_eval_sets_success( - self, local_eval_sets_manager, mocker + self, tmp_path ): app_name = "test_app" - mock_listdir_return = [ - "eval_set_1.evalset.json", - "eval_set_2.evalset.json", - "not_an_eval_set.txt", - ] - mocker.patch("os.listdir", return_value=mock_listdir_return) - mocker.patch("os.path.join", return_value="dummy_path") - mocker.patch("os.path.basename", side_effect=lambda x: x) + app_dir = tmp_path / app_name + app_dir.mkdir() + (app_dir / "eval_set_1.evalset.json").write_text("{}") + (app_dir / "eval_set_2.evalset.json").write_text("{}") + (app_dir / "not_an_eval_set.txt").write_text("") + local_eval_sets_manager = LocalEvalSetsManager(agents_dir=str(tmp_path)) eval_sets = local_eval_sets_manager.list_eval_sets(app_name) assert eval_sets == ["eval_set_1", "eval_set_2"] - def test_local_eval_sets_manager_list_eval_sets_not_found( - self, local_eval_sets_manager, mocker + def test_local_eval_sets_manager_list_eval_sets_subdirectories( + self, tmp_path, caplog ): + """Eval sets in subdirectories should be ignored with a warning.""" app_name = "test_app" - mocker.patch("os.listdir", side_effect=FileNotFoundError) + app_dir = tmp_path / app_name + app_dir.mkdir() + (app_dir / "top_level.evalset.json").write_text("{}") + sub_dir = app_dir / "eval_sets" + sub_dir.mkdir() + (sub_dir / "nested.evalset.json").write_text("{}") + + local_eval_sets_manager = LocalEvalSetsManager(agents_dir=str(tmp_path)) + with caplog.at_level(logging.WARNING): + eval_sets = local_eval_sets_manager.list_eval_sets(app_name) + + # Only root-level eval sets are returned. + assert eval_sets == ["top_level"] + # A warning is logged for the subdirectory file. + assert "nested.evalset.json" in caplog.text + assert "ignored" in caplog.text + + def test_local_eval_sets_manager_list_eval_sets_not_found( + self, local_eval_sets_manager + ): + app_name = "nonexistent_app" with pytest.raises(NotFoundError): local_eval_sets_manager.list_eval_sets(app_name)