Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
56 changes: 40 additions & 16 deletions src/agentops/services/browse.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@
# ---------------------------------------------------------------------------

_DEFAULT_AGENTOPS_DIR = ".agentops"
_LATEST_RUN_DIR_NAME = "latest"
_RESULTS_FILENAME = "results.json"


def _resolve_workspace(directory: Path) -> Path:
Expand Down Expand Up @@ -187,6 +189,36 @@ class RunListResult:
results_dir: Path


def _has_results_file(run_dir: Path) -> bool:
"""Return whether a run directory contains persisted results."""
return (run_dir / _RESULTS_FILENAME).exists()


def _history_run_dirs(results_dir: Path) -> list[Path]:
"""Return non-latest run directories that have persisted results."""
return [
run_dir
for run_dir in sorted(results_dir.iterdir(), reverse=True)
if run_dir.is_dir()
and run_dir.name != _LATEST_RUN_DIR_NAME
and _has_results_file(run_dir)
]


def _listable_run_dirs(results_dir: Path) -> list[Path]:
"""Return run directories that should appear in ``agentops run list``.

``latest`` mirrors the newest run when timestamped history exists, so list it
only when it is the sole run directory with persisted results.
"""
history_run_dirs = _history_run_dirs(results_dir)
if history_run_dirs:
return history_run_dirs

latest_dir = results_dir / _LATEST_RUN_DIR_NAME
return [latest_dir] if _has_results_file(latest_dir) else []


def list_runs(directory: Path = Path(".")) -> RunListResult:
"""List all past evaluation runs in the workspace."""
workspace = _resolve_workspace(directory)
Expand All @@ -196,16 +228,8 @@ def list_runs(directory: Path = Path(".")) -> RunListResult:
return RunListResult(runs=[], results_dir=results_dir)

summaries: list[RunSummary] = []
for run_dir in sorted(results_dir.iterdir(), reverse=True):
if not run_dir.is_dir():
continue
if run_dir.name == "latest":
continue # Skip the symlink/copy

results_file = run_dir / "results.json"
if not results_file.exists():
continue

for run_dir in _listable_run_dirs(results_dir):
results_file = run_dir / _RESULTS_FILENAME
try:
data = json.loads(results_file.read_text(encoding="utf-8"))
result = RunResult.model_validate(data)
Expand Down Expand Up @@ -270,17 +294,17 @@ def show_run(run_id: str, directory: Path = Path(".")) -> RunDetail:

run_dir = (results_dir / run_id).resolve()
if not run_dir.is_dir():
available = [
d.name
for d in sorted(results_dir.iterdir(), reverse=True)
if d.is_dir() and d.name != "latest" and (d / "results.json").exists()
]
available = (
[listable_dir.name for listable_dir in _listable_run_dirs(results_dir)]
if results_dir.is_dir()
else []
)
hint = ", ".join(available[:5]) if available else "(none)"
raise FileNotFoundError(
f"Run '{run_id}' not found in {results_dir}. Recent runs: {hint}"
)

results_file = run_dir / "results.json"
results_file = run_dir / _RESULTS_FILENAME
if not results_file.exists():
raise FileNotFoundError(f"No results.json in {run_dir}")

Expand Down
85 changes: 84 additions & 1 deletion tests/unit/test_browse.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,14 @@ def _create_workspace(tmp_path: Path) -> Path:
ws.mkdir()
(ws / "bundles").mkdir()
(ws / "results").mkdir()
return ws


def _create_workspace_without_results(tmp_path: Path) -> Path:
"""Create a .agentops workspace that has no results directory."""
ws = tmp_path / ".agentops"
ws.mkdir()
(ws / "bundles").mkdir()
return ws


Expand Down Expand Up @@ -150,6 +158,11 @@ def test_empty(self, tmp_path: Path) -> None:
result = list_runs(directory=tmp_path)
assert result.runs == []

def test_missing_results_dir_returns_empty(self, tmp_path: Path) -> None:
_create_workspace_without_results(tmp_path)
result = list_runs(directory=tmp_path)
assert result.runs == []

def test_lists_runs(self, tmp_path: Path) -> None:
ws = _create_workspace(tmp_path)
_write_run(ws, "2026-04-07_100000", passed=True)
Expand All @@ -162,12 +175,55 @@ def test_lists_runs(self, tmp_path: Path) -> None:
assert result.runs[1].run_id == "2026-04-07_100000"
assert result.runs[1].overall_passed is True

def test_skips_latest_dir(self, tmp_path: Path) -> None:
def test_skips_latest_when_history_runs_exist(self, tmp_path: Path) -> None:
ws = _create_workspace(tmp_path)
_write_run(ws, "2026-04-07_100000")
_write_run(ws, "2026-04-07_110000")
_write_run(ws, "latest")
result = list_runs(directory=tmp_path)
assert [run.run_id for run in result.runs] == [
"2026-04-07_110000",
"2026-04-07_100000",
]

def test_skips_empty_latest_when_no_history_runs(self, tmp_path: Path) -> None:
ws = _create_workspace(tmp_path)
(ws / "results" / "latest").mkdir()
result = list_runs(directory=tmp_path)
assert result.runs == []

def test_lists_malformed_history_run_and_skips_latest_mirror(
self, tmp_path: Path
) -> None:
ws = _create_workspace(tmp_path)
malformed_run = ws / "results" / "2026-04-07_100000"
malformed_run.mkdir()
(malformed_run / "results.json").write_text("{", encoding="utf-8")
_write_run(ws, "latest")
result = list_runs(directory=tmp_path)
assert len(result.runs) == 1
assert result.runs[0].run_id == "2026-04-07_100000"
assert result.runs[0].status == "error"

def test_lists_malformed_latest_when_no_history_runs(
self, tmp_path: Path
) -> None:
ws = _create_workspace(tmp_path)
latest_run = ws / "results" / "latest"
latest_run.mkdir()
(latest_run / "results.json").write_text("{", encoding="utf-8")
result = list_runs(directory=tmp_path)
assert len(result.runs) == 1
assert result.runs[0].run_id == "latest"
assert result.runs[0].status == "error"

def test_lists_latest_when_no_history_runs(self, tmp_path: Path) -> None:
ws = _create_workspace(tmp_path)
_write_run(ws, "latest", passed=False)
result = list_runs(directory=tmp_path)
assert len(result.runs) == 1
assert result.runs[0].run_id == "latest"
assert result.runs[0].overall_passed is False


class TestShowRun:
Expand All @@ -185,6 +241,25 @@ def test_not_found(self, tmp_path: Path) -> None:
_create_workspace(tmp_path)
with pytest.raises(FileNotFoundError, match="not found"):
show_run("nonexistent", directory=tmp_path)

def test_not_found_hints_latest_when_latest_is_only_listable_run(
self, tmp_path: Path
) -> None:
ws = _create_workspace(tmp_path)
_write_run(ws, "latest")
with pytest.raises(FileNotFoundError) as exc_info:
show_run("nonexistent", directory=tmp_path)

assert "Recent runs: latest" in str(exc_info.value)

def test_not_found_with_missing_results_dir_has_empty_recent_hint(
self, tmp_path: Path
) -> None:
_create_workspace_without_results(tmp_path)
with pytest.raises(FileNotFoundError) as exc_info:
show_run("nonexistent", directory=tmp_path)

assert "Recent runs: (none)" in str(exc_info.value)


# ---------------------------------------------------------------------------
Expand Down Expand Up @@ -238,6 +313,14 @@ def test_lists_runs(self, tmp_path: Path) -> None:
assert "2026-04-07_100000" in result.stdout
assert "PASS" in result.stdout

def test_lists_latest_when_no_history_runs(self, tmp_path: Path) -> None:
ws = _create_workspace(tmp_path)
_write_run(ws, "latest", passed=True)
result = runner.invoke(app, ["run", "list", "--dir", str(tmp_path)])
assert result.exit_code == 0
assert "latest" in result.stdout
assert "No runs found" not in result.stdout


class TestRunShowCLI:
def test_shows_run(self, tmp_path: Path) -> None:
Expand Down
Loading