From 14d521387d9f4a4ea180671a4584e91a330dbd36 Mon Sep 17 00:00:00 2001
From: Dongbumlee <donlee@microsoft.com>
Date: Tue, 7 Apr 2026 14:13:19 -0700
Subject: [PATCH] feat: implement run view command with table and entry detail
 modes

- run view <id>: per-row metrics table with scores across all evaluators
- run view <id> --entry N: drill-down with scores + threshold pass/fail
- 7 new tests (service + CLI)
- No more stubs in browse_commands.py
---
 src/agentops/cli/browse_commands.py |  76 +++++++++++++--
 src/agentops/services/browse.py     | 116 +++++++++++++++++++++++
 tests/unit/test_browse.py           | 140 +++++++++++++++++++++++++++-
 3 files changed, 322 insertions(+), 10 deletions(-)
diff --git a/src/agentops/cli/browse_commands.py b/src/agentops/cli/browse_commands.py
index c3db6139..5aa33782 100644
--- a/src/agentops/cli/browse_commands.py
+++ b/src/agentops/cli/browse_commands.py
@@ -7,8 +7,6 @@
 
 import typer
 
-from agentops.cli._planned import _planned_command
-
 run_app = typer.Typer(help="Run history and inspection commands.")
 bundle_app = typer.Typer(help="Bundle browsing commands.")
 
@@ -170,12 +168,76 @@ def cmd_run_show(
 
 @run_app.command("view")
 def cmd_run_view(
-    run_id: str,
+    run_id: str = typer.Argument(help="Run ID (timestamp folder name or 'latest')."),
     entry: Annotated[
         int | None,
-        typer.Option("--entry", help="Optional row/entry index for deep inspection."),
+        typer.Option("--entry", help="Show detail for a specific row index."),
     ] = None,
+    directory: Path = typer.Option(
+        Path("."),
+        "--dir",
+        help="Workspace directory.",
+    ),
 ) -> None:
-    """Deep-inspect run details (planned)."""
-    _ = run_id, entry
-    _planned_command("agentops run view <id> [--entry N]")
+    """View per-row metrics for an evaluation run."""
+    from agentops.services.browse import view_run
+
+    try:
+        result = view_run(run_id=run_id, directory=directory, entry=entry)
+    except (FileNotFoundError, ValueError) as exc:
+        typer.echo(f"Error: {exc}", err=True)
+        raise typer.Exit(code=1) from exc
+
+    status = "PASS" if result.overall_passed else "FAIL"
+    typer.echo(
+        f"Run: {result.run_id} ({status})  "
+        f"bundle={result.bundle_name}  dataset={result.dataset_name}"
+    )
+
+    if entry is not None:
+        # Detail view for a single row
+        row = result.rows[0]
+        row_status = "PASS" if row.passed_all else "FAIL"
+        typer.echo(f"\nRow {row.row_index}: {row_status}")
+        typer.echo("")
+        typer.echo("Scores:")
+        for name, value in row.scores.items():
+            typer.echo(f"  {name:<40} {value:.4f}")
+        if row.threshold_results:
+            typer.echo("")
+            typer.echo("Thresholds:")
+            for t in row.threshold_results:
+                mark = "PASS" if t["passed"] else "FAIL"
+                typer.echo(
+                    f"  {t['evaluator']:<40} {t['criteria']} {t['expected']:<10} "
+                    f"actual={t['actual']:<10} {mark}"
+                )
+    else:
+        # Table view for all rows
+        if not result.rows:
+            typer.echo("\nNo per-row metrics available.")
+            return
+
+        # Collect metric names (excluding samples_evaluated)
+        metric_names = [n for n in result.evaluator_names if n != "samples_evaluated"]
+
+        # Header
+        typer.echo("")
+        header = f"{'Row':>4}  {'Pass':4}"
+        for name in metric_names:
+            short = name.replace("Evaluator", "")
+            header += f"  {short:>10}"
+        typer.echo(header)
+        typer.echo("-" * len(header))
+
+        # Rows
+        for row in result.rows:
+            row_status = "PASS" if row.passed_all else "FAIL"
+            line = f"{row.row_index:>4}  {row_status:4}"
+            for name in metric_names:
+                val = row.scores.get(name)
+                if val is not None:
+                    line += f"  {val:>10.2f}"
+                else:
+                    line += f"  {'—':>10}"
+            typer.echo(line)
diff --git a/src/agentops/services/browse.py b/src/agentops/services/browse.py
index 37e0506a..7c607ed2 100644
--- a/src/agentops/services/browse.py
+++ b/src/agentops/services/browse.py
@@ -314,3 +314,119 @@ def show_run(run_id: str, directory: Path = Path(".")) -> RunDetail:
         report_path=report_path,
         foundry_url=foundry_url,
     )
+
+
+# ---------------------------------------------------------------------------
+# Run view (row-level detail)
+# ---------------------------------------------------------------------------
+
+
+@dataclass(frozen=True)
+class RowView:
+    """Per-row metrics and threshold results."""
+
+    row_index: int
+    passed_all: bool
+    scores: Dict[str, float]
+    threshold_results: List[Dict[str, Any]]
+
+
+@dataclass(frozen=True)
+class RunViewResult:
+    """Result of viewing a run with row-level detail."""
+
+    run_id: str
+    bundle_name: str
+    dataset_name: str
+    overall_passed: bool
+    rows: List[RowView]
+    evaluator_names: List[str]
+
+
+def view_run(
+    run_id: str,
+    directory: Path = Path("."),
+    entry: Optional[int] = None,
+) -> RunViewResult:
+    """Load run results with per-row metric breakdown.
+
+    If ``entry`` is provided, only that row is included.
+    """
+    workspace = resolve_workspace(directory)
+    results_dir = workspace / "results"
+
+    run_dir = (results_dir / run_id).resolve()
+    if not run_dir.is_dir():
+        available = [
+            d.name
+            for d in sorted(results_dir.iterdir(), reverse=True)
+            if d.is_dir() and d.name != "latest" and (d / "results.json").exists()
+        ]
+        hint = ", ".join(available[:5]) if available else "(none)"
+        raise FileNotFoundError(
+            f"Run '{run_id}' not found in {results_dir}. Recent runs: {hint}"
+        )
+
+    results_file = run_dir / "results.json"
+    if not results_file.exists():
+        raise FileNotFoundError(f"No results.json in {run_dir}")
+
+    data = json.loads(results_file.read_text(encoding="utf-8"))
+    result = RunResult.model_validate(data)
+
+    # Build per-row scores lookup
+    row_scores: Dict[int, Dict[str, float]] = {}
+    evaluator_names_set: dict[str, None] = {}
+    for row in result.row_metrics:
+        scores = {}
+        for m in row.metrics:
+            scores[m.name] = m.value
+            evaluator_names_set[m.name] = None
+        row_scores[row.row_index] = scores
+
+    # Build per-row threshold results
+    row_thresholds: Dict[int, List[Dict[str, Any]]] = {}
+    row_passed: Dict[int, bool] = {}
+    for item in result.item_evaluations:
+        row_passed[item.row_index] = item.passed_all
+        row_thresholds[item.row_index] = [
+            {
+                "evaluator": t.evaluator,
+                "criteria": t.criteria,
+                "expected": t.expected,
+                "actual": t.actual,
+                "passed": t.passed,
+            }
+            for t in item.thresholds
+        ]
+
+    # Build row views
+    all_row_indices = sorted(set(row_scores.keys()) | set(row_passed.keys()))
+
+    if entry is not None:
+        if entry not in all_row_indices:
+            raise ValueError(
+                f"Entry {entry} not found. Available rows: "
+                + ", ".join(str(i) for i in all_row_indices)
+            )
+        all_row_indices = [entry]
+
+    rows: List[RowView] = []
+    for idx in all_row_indices:
+        rows.append(
+            RowView(
+                row_index=idx,
+                passed_all=row_passed.get(idx, True),
+                scores=row_scores.get(idx, {}),
+                threshold_results=row_thresholds.get(idx, []),
+            )
+        )
+
+    return RunViewResult(
+        run_id=run_id,
+        bundle_name=result.bundle.name,
+        dataset_name=result.dataset.name,
+        overall_passed=result.summary.overall_passed,
+        rows=rows,
+        evaluator_names=list(evaluator_names_set.keys()),
+    )
diff --git a/tests/unit/test_browse.py b/tests/unit/test_browse.py
index 077426f6..9d742964 100644
--- a/tests/unit/test_browse.py
+++ b/tests/unit/test_browse.py
@@ -14,6 +14,7 @@
     list_runs,
     show_bundle,
     show_run,
+    view_run,
 )
 from agentops.utils.yaml import save_yaml
 
@@ -65,10 +66,51 @@ def _write_run(ws: Path, run_id: str, *, passed: bool = True) -> Path:
             {"name": "CoherenceEvaluator", "value": 4.5},
             {"name": "samples_evaluated", "value": 3.0},
         ],
-        "row_metrics": [],
+        "row_metrics": [
+            {
+                "row_index": 1,
+                "metrics": [
+                    {"name": "CoherenceEvaluator", "value": 5.0},
+                    {"name": "RelevanceEvaluator", "value": 4.0},
+                ],
+            },
+            {
+                "row_index": 2,
+                "metrics": [
+                    {"name": "CoherenceEvaluator", "value": 4.0},
+                    {"name": "RelevanceEvaluator", "value": 5.0},
+                ],
+            },
+        ],
         "item_evaluations": [
-            {"row_index": 1, "passed_all": True, "thresholds": []},
-            {"row_index": 2, "passed_all": passed, "thresholds": []},
+            {
+                "row_index": 1,
+                "passed_all": True,
+                "thresholds": [
+                    {
+                        "row_index": 1,
+                        "evaluator": "CoherenceEvaluator",
+                        "criteria": ">=",
+                        "expected": "3.000000",
+                        "actual": "5.000000",
+                        "passed": True,
+                    }
+                ],
+            },
+            {
+                "row_index": 2,
+                "passed_all": passed,
+                "thresholds": [
+                    {
+                        "row_index": 2,
+                        "evaluator": "CoherenceEvaluator",
+                        "criteria": ">=",
+                        "expected": "3.000000",
+                        "actual": "4.000000",
+                        "passed": passed,
+                    }
+                ],
+            },
         ],
         "thresholds": [
             {
@@ -257,3 +299,95 @@ def test_not_found(self, tmp_path: Path) -> None:
         )
         assert result.exit_code == 1
         assert "not found" in (result.stdout + result.stderr)
+
+
+# ---------------------------------------------------------------------------
+# view_run service tests
+# ---------------------------------------------------------------------------
+
+
+class TestViewRun:
+    def test_table_view(self, tmp_path: Path) -> None:
+        ws = _create_workspace(tmp_path)
+        _write_run(ws, "2026-04-07_100000", passed=True)
+        result = view_run("2026-04-07_100000", directory=tmp_path)
+        assert result.run_id == "2026-04-07_100000"
+        assert len(result.rows) == 2
+        assert result.rows[0].row_index == 1
+        assert result.rows[0].scores["CoherenceEvaluator"] == 5.0
+        assert result.rows[1].scores["RelevanceEvaluator"] == 5.0
+
+    def test_entry_filter(self, tmp_path: Path) -> None:
+        ws = _create_workspace(tmp_path)
+        _write_run(ws, "2026-04-07_100000")
+        result = view_run("2026-04-07_100000", directory=tmp_path, entry=2)
+        assert len(result.rows) == 1
+        assert result.rows[0].row_index == 2
+
+    def test_entry_not_found(self, tmp_path: Path) -> None:
+        ws = _create_workspace(tmp_path)
+        _write_run(ws, "2026-04-07_100000")
+        with pytest.raises(ValueError, match="Entry 99 not found"):
+            view_run("2026-04-07_100000", directory=tmp_path, entry=99)
+
+    def test_entry_has_thresholds(self, tmp_path: Path) -> None:
+        ws = _create_workspace(tmp_path)
+        _write_run(ws, "2026-04-07_100000")
+        result = view_run("2026-04-07_100000", directory=tmp_path, entry=1)
+        row = result.rows[0]
+        assert len(row.threshold_results) == 1
+        assert row.threshold_results[0]["evaluator"] == "CoherenceEvaluator"
+        assert row.threshold_results[0]["passed"] is True
+
+
+# ---------------------------------------------------------------------------
+# run view CLI tests
+# ---------------------------------------------------------------------------
+
+
+class TestRunViewCLI:
+    def test_table_view(self, tmp_path: Path) -> None:
+        ws = _create_workspace(tmp_path)
+        _write_run(ws, "2026-04-07_100000")
+        result = runner.invoke(
+            app, ["run", "view", "2026-04-07_100000", "--dir", str(tmp_path)]
+        )
+        assert result.exit_code == 0
+        assert "Coherence" in result.stdout
+        assert "Relevance" in result.stdout
+
+    def test_entry_view(self, tmp_path: Path) -> None:
+        ws = _create_workspace(tmp_path)
+        _write_run(ws, "2026-04-07_100000")
+        result = runner.invoke(
+            app,
+            [
+                "run",
+                "view",
+                "2026-04-07_100000",
+                "--entry",
+                "1",
+                "--dir",
+                str(tmp_path),
+            ],
+        )
+        assert result.exit_code == 0
+        assert "Row 1: PASS" in result.stdout
+        assert "CoherenceEvaluator" in result.stdout
+
+    def test_entry_not_found(self, tmp_path: Path) -> None:
+        ws = _create_workspace(tmp_path)
+        _write_run(ws, "2026-04-07_100000")
+        result = runner.invoke(
+            app,
+            [
+                "run",
+                "view",
+                "2026-04-07_100000",
+                "--entry",
+                "99",
+                "--dir",
+                str(tmp_path),
+            ],
+        )
+        assert result.exit_code == 1