From f887f65ca2e8962144c8acd36a8b1dbdf0b83877 Mon Sep 17 00:00:00 2001 From: Dongbumlee Date: Tue, 7 Apr 2026 11:57:36 -0700 Subject: [PATCH 1/3] feat: implement bundle list/show and run list/show commands - Add services/browse.py with list_bundles, show_bundle, list_runs, show_run - Replace planned stubs with working implementations in cli/app.py - bundle list: shows all bundles with evaluators and threshold count - bundle show: displays full bundle detail (evaluators, thresholds, metadata) - run list: shows all past runs with status, bundle, dataset, duration - run show: displays full run detail (metrics, thresholds, items, Foundry URL) - Add 16 unit tests (service + CLI) in test_browse.py - All commands are read-only, no side effects, no Azure API calls --- src/agentops/cli/app.py | 145 ++++++++++++-- src/agentops/services/browse.py | 332 ++++++++++++++++++++++++++++++++ tests/unit/test_browse.py | 259 +++++++++++++++++++++++++ 3 files changed, 724 insertions(+), 12 deletions(-) create mode 100644 src/agentops/services/browse.py create mode 100644 tests/unit/test_browse.py diff --git a/src/agentops/cli/app.py b/src/agentops/cli/app.py index a9f9e7bd..d23b4d64 100644 --- a/src/agentops/cli/app.py +++ b/src/agentops/cli/app.py @@ -311,15 +311,80 @@ def cmd_report_export() -> None: @run_app.command("list") -def cmd_run_list() -> None: - """List past evaluation runs (planned).""" - _planned_command("agentops run list") +def cmd_run_list( + directory: Path = typer.Option( + Path("."), + "--dir", + help="Workspace directory.", + ), +) -> None: + """List past evaluation runs.""" + from agentops.services.browse import list_runs + + try: + result = list_runs(directory=directory) + except FileNotFoundError as exc: + typer.echo(f"Error: {exc}", err=True) + raise typer.Exit(code=1) from exc + + if not result.runs: + typer.echo(f"No runs found in {result.results_dir}") + return + + typer.echo(f"Runs in {result.results_dir}:\n") + for run in result.runs: + status = "PASS" if run.overall_passed else "FAIL" + typer.echo( + f" {run.run_id} {status:<4} " + f"bundle={run.bundle_name} dataset={run.dataset_name} " + f"duration={run.duration_seconds:.1f}s" + ) @run_app.command("show") -def cmd_run_show() -> None: - """Show summary of a past run (planned).""" - _planned_command("agentops run show") +def cmd_run_show( + run_id: str = typer.Argument(help="Run ID (timestamp folder name or 'latest')."), + directory: Path = typer.Option( + Path("."), + "--dir", + help="Workspace directory.", + ), +) -> None: + """Show summary of a past evaluation run.""" + from agentops.services.browse import show_run + + try: + detail = show_run(run_id=run_id, directory=directory) + except (FileNotFoundError, ValueError) as exc: + typer.echo(f"Error: {exc}", err=True) + raise typer.Exit(code=1) from exc + + status = "PASS" if detail.overall_passed else "FAIL" + typer.echo(f"Run: {detail.run_id}") + typer.echo(f"Status: {status}") + typer.echo(f"Bundle: {detail.bundle_name}") + typer.echo(f"Dataset: {detail.dataset_name}") + typer.echo(f"Backend: {detail.backend}") + typer.echo(f"Started: {detail.started_at}") + typer.echo(f"Duration: {detail.duration_seconds:.1f}s") + typer.echo(f"Items: {detail.items_passed}/{detail.items_total} passed") + typer.echo("") + typer.echo("Metrics:") + for m in detail.metrics: + typer.echo(f" {m['name']:<40} {m['value']:.4f}") + if detail.thresholds: + typer.echo("") + typer.echo("Thresholds:") + for t in detail.thresholds: + mark = "PASS" if t["passed"] else "FAIL" + typer.echo( + f" {t['evaluator']:<40} {t['criteria']} {t['expected']:<10} " + f"actual={t['actual']:<10} {mark}" + ) + if detail.foundry_url: + typer.echo(f"\nFoundry portal: {detail.foundry_url}") + if detail.report_path: + typer.echo(f"Report: {detail.report_path}") @run_app.command("view") @@ -336,15 +401,71 @@ def cmd_run_view( @bundle_app.command("list") -def cmd_bundle_list() -> None: - """List available bundles (planned).""" - _planned_command("agentops bundle list") +def cmd_bundle_list( + directory: Path = typer.Option( + Path("."), + "--dir", + help="Workspace directory.", + ), +) -> None: + """List available evaluation bundles.""" + from agentops.services.browse import list_bundles + + try: + result = list_bundles(directory=directory) + except FileNotFoundError as exc: + typer.echo(f"Error: {exc}", err=True) + raise typer.Exit(code=1) from exc + + if not result.bundles: + typer.echo(f"No bundles found in {result.bundles_dir}") + return + + typer.echo(f"Bundles in {result.bundles_dir}:\n") + for b in result.bundles: + evals = ", ".join(b.evaluators) if b.evaluators else "(none)" + typer.echo(f" {b.name}") + if b.description: + typer.echo(f" {b.description}") + typer.echo(f" evaluators: {evals}") + typer.echo(f" thresholds: {b.thresholds}") + typer.echo("") @bundle_app.command("show") -def cmd_bundle_show() -> None: - """Show bundle details (planned).""" - _planned_command("agentops bundle show") +def cmd_bundle_show( + bundle_name: str = typer.Argument(help="Bundle name or filename (without .yaml)."), + directory: Path = typer.Option( + Path("."), + "--dir", + help="Workspace directory.", + ), +) -> None: + """Show details of an evaluation bundle.""" + from agentops.services.browse import show_bundle + + try: + detail = show_bundle(bundle_name=bundle_name, directory=directory) + except (FileNotFoundError, ValueError) as exc: + typer.echo(f"Error: {exc}", err=True) + raise typer.Exit(code=1) from exc + + typer.echo(f"Bundle: {detail.name}") + typer.echo(f"Path: {detail.path}") + if detail.description: + typer.echo(f"Description: {detail.description}") + if detail.metadata: + typer.echo(f"Metadata: {detail.metadata}") + typer.echo("") + typer.echo("Evaluators:") + for e in detail.evaluators: + status = "enabled" if e["enabled"] else "disabled" + typer.echo(f" {e['name']} (source={e['source']}, {status})") + typer.echo("") + typer.echo("Thresholds:") + for t in detail.thresholds: + value = t["value"] if t["value"] is not None else "" + typer.echo(f" {t['evaluator']} {t['criteria']} {value}") @dataset_app.command("validate") diff --git a/src/agentops/services/browse.py b/src/agentops/services/browse.py new file mode 100644 index 00000000..93f777d9 --- /dev/null +++ b/src/agentops/services/browse.py @@ -0,0 +1,332 @@ +"""Browse services for listing and inspecting bundles and runs.""" + +from __future__ import annotations + +import json +from dataclasses import dataclass +from pathlib import Path +from typing import Any, Dict, List, Optional + +from agentops.core.config_loader import load_bundle_config +from agentops.core.models import RunResult + + +# --------------------------------------------------------------------------- +# Workspace resolution +# --------------------------------------------------------------------------- + +_DEFAULT_AGENTOPS_DIR = ".agentops" + + +def _resolve_workspace(directory: Path) -> Path: + """Resolve the .agentops workspace directory.""" + workspace = (directory / _DEFAULT_AGENTOPS_DIR).resolve() + if not workspace.is_dir(): + raise FileNotFoundError( + f"No .agentops workspace found at {workspace}. Run 'agentops init' first." + ) + return workspace + + +# --------------------------------------------------------------------------- +# Bundle browsing +# --------------------------------------------------------------------------- + + +@dataclass(frozen=True) +class BundleSummary: + """Summary info for a single bundle.""" + + name: str + path: Path + description: str + evaluators: List[str] + thresholds: int + + +@dataclass(frozen=True) +class BundleListResult: + """Result of listing bundles.""" + + bundles: List[BundleSummary] + bundles_dir: Path + + +def list_bundles(directory: Path = Path(".")) -> BundleListResult: + """List all bundle YAML files in the workspace.""" + workspace = _resolve_workspace(directory) + bundles_dir = workspace / "bundles" + + if not bundles_dir.is_dir(): + return BundleListResult(bundles=[], bundles_dir=bundles_dir) + + summaries: List[BundleSummary] = [] + for yaml_file in sorted(bundles_dir.glob("*.yaml")): + try: + bundle = load_bundle_config(yaml_file) + enabled = [e.name for e in bundle.evaluators if e.enabled] + summaries.append( + BundleSummary( + name=bundle.name, + path=yaml_file, + description=bundle.description or "", + evaluators=enabled, + thresholds=len(bundle.thresholds), + ) + ) + except Exception: # noqa: BLE001 + # Skip malformed bundles — still list them with minimal info + summaries.append( + BundleSummary( + name=yaml_file.stem, + path=yaml_file, + description="(error loading bundle)", + evaluators=[], + thresholds=0, + ) + ) + + return BundleListResult(bundles=summaries, bundles_dir=bundles_dir) + + +@dataclass(frozen=True) +class BundleDetail: + """Full detail of a single bundle.""" + + name: str + path: Path + description: str + evaluators: List[Dict[str, Any]] + thresholds: List[Dict[str, Any]] + metadata: Dict[str, Any] + + +def show_bundle(bundle_name: str, directory: Path = Path(".")) -> BundleDetail: + """Load and return full details of a bundle by name.""" + workspace = _resolve_workspace(directory) + bundles_dir = workspace / "bundles" + + # Try exact filename first, then search by bundle name + candidates = [ + bundles_dir / f"{bundle_name}.yaml", + bundles_dir / f"{bundle_name}", + ] + + bundle_path: Optional[Path] = None + for candidate in candidates: + if candidate.is_file(): + bundle_path = candidate + break + + # Search by bundle name field if not found by filename + if bundle_path is None and bundles_dir.is_dir(): + for yaml_file in bundles_dir.glob("*.yaml"): + try: + bundle = load_bundle_config(yaml_file) + if bundle.name == bundle_name: + bundle_path = yaml_file + break + except Exception: # noqa: BLE001 + continue + + if bundle_path is None: + raise FileNotFoundError( + f"Bundle '{bundle_name}' not found in {bundles_dir}. " + f"Available bundles: {', '.join(f.stem for f in bundles_dir.glob('*.yaml'))}" + ) + + bundle = load_bundle_config(bundle_path) + return BundleDetail( + name=bundle.name, + path=bundle_path, + description=bundle.description or "", + evaluators=[ + { + "name": e.name, + "source": e.source, + "enabled": e.enabled, + } + for e in bundle.evaluators + ], + thresholds=[ + { + "evaluator": t.evaluator, + "criteria": t.criteria, + "value": t.value, + } + for t in bundle.thresholds + ], + metadata=bundle.metadata, + ) + + +# --------------------------------------------------------------------------- +# Run browsing +# --------------------------------------------------------------------------- + + +@dataclass(frozen=True) +class RunSummary: + """Summary info for a single past run.""" + + run_id: str + path: Path + bundle_name: str + dataset_name: str + status: str + started_at: str + duration_seconds: float + metrics_count: int + overall_passed: bool + + +@dataclass(frozen=True) +class RunListResult: + """Result of listing runs.""" + + runs: List[RunSummary] + results_dir: Path + + +def list_runs(directory: Path = Path(".")) -> RunListResult: + """List all past evaluation runs in the workspace.""" + workspace = _resolve_workspace(directory) + results_dir = workspace / "results" + + if not results_dir.is_dir(): + return RunListResult(runs=[], results_dir=results_dir) + + summaries: List[RunSummary] = [] + for run_dir in sorted(results_dir.iterdir(), reverse=True): + if not run_dir.is_dir(): + continue + if run_dir.name == "latest": + continue # Skip the symlink/copy + + results_file = run_dir / "results.json" + if not results_file.exists(): + continue + + try: + data = json.loads(results_file.read_text(encoding="utf-8")) + result = RunResult.model_validate(data) + summaries.append( + RunSummary( + run_id=run_dir.name, + path=run_dir, + bundle_name=result.bundle.name, + dataset_name=result.dataset.name, + status=result.status, + started_at=result.execution.started_at, + duration_seconds=result.execution.duration_seconds, + metrics_count=len(result.metrics), + overall_passed=result.summary.overall_passed, + ) + ) + except Exception: # noqa: BLE001 + # Include the run with minimal info if results.json is malformed + summaries.append( + RunSummary( + run_id=run_dir.name, + path=run_dir, + bundle_name="(error)", + dataset_name="(error)", + status="error", + started_at="", + duration_seconds=0, + metrics_count=0, + overall_passed=False, + ) + ) + + return RunListResult(runs=summaries, results_dir=results_dir) + + +@dataclass(frozen=True) +class RunDetail: + """Full detail of a single past run.""" + + run_id: str + path: Path + bundle_name: str + dataset_name: str + status: str + backend: str + started_at: str + finished_at: str + duration_seconds: float + overall_passed: bool + metrics: List[Dict[str, Any]] + thresholds: List[Dict[str, Any]] + items_total: int + items_passed: int + report_path: Optional[Path] + foundry_url: Optional[str] + + +def show_run(run_id: str, directory: Path = Path(".")) -> RunDetail: + """Load and return full details of a past run.""" + workspace = _resolve_workspace(directory) + results_dir = workspace / "results" + + run_dir = (results_dir / run_id).resolve() + if not run_dir.is_dir(): + available = [ + d.name + for d in sorted(results_dir.iterdir(), reverse=True) + if d.is_dir() and d.name != "latest" and (d / "results.json").exists() + ] + hint = ", ".join(available[:5]) if available else "(none)" + raise FileNotFoundError( + f"Run '{run_id}' not found in {results_dir}. Recent runs: {hint}" + ) + + results_file = run_dir / "results.json" + if not results_file.exists(): + raise FileNotFoundError(f"No results.json in {run_dir}") + + data = json.loads(results_file.read_text(encoding="utf-8")) + result = RunResult.model_validate(data) + + report_path = run_dir / "report.md" + if not report_path.exists(): + report_path = None + + foundry_url = None + if result.artifacts and result.artifacts.foundry_eval_studio_url: + foundry_url = result.artifacts.foundry_eval_studio_url + + items_total = result.summary.thresholds_count + items_passed = result.summary.thresholds_passed + # Use item_evaluations for more accurate counts + if result.item_evaluations: + items_total = len(result.item_evaluations) + items_passed = sum(1 for i in result.item_evaluations if i.passed_all) + + return RunDetail( + run_id=run_id, + path=run_dir, + bundle_name=result.bundle.name, + dataset_name=result.dataset.name, + status=result.status, + backend=result.execution.backend, + started_at=result.execution.started_at, + finished_at=result.execution.finished_at, + duration_seconds=result.execution.duration_seconds, + overall_passed=result.summary.overall_passed, + metrics=[{"name": m.name, "value": m.value} for m in result.metrics], + thresholds=[ + { + "evaluator": t.evaluator, + "criteria": t.criteria, + "expected": t.expected, + "actual": t.actual, + "passed": t.passed, + } + for t in result.thresholds + ], + items_total=items_total, + items_passed=items_passed, + report_path=report_path, + foundry_url=foundry_url, + ) diff --git a/tests/unit/test_browse.py b/tests/unit/test_browse.py new file mode 100644 index 00000000..077426f6 --- /dev/null +++ b/tests/unit/test_browse.py @@ -0,0 +1,259 @@ +"""Tests for browse services (bundle list/show, run list/show).""" + +from __future__ import annotations + +import json +from pathlib import Path + +import pytest +from typer.testing import CliRunner + +from agentops.cli.app import app +from agentops.services.browse import ( + list_bundles, + list_runs, + show_bundle, + show_run, +) +from agentops.utils.yaml import save_yaml + +runner = CliRunner() + + +def _create_workspace(tmp_path: Path) -> Path: + """Create a minimal .agentops workspace.""" + ws = tmp_path / ".agentops" + ws.mkdir() + (ws / "bundles").mkdir() + (ws / "results").mkdir() + return ws + + +def _write_bundle(ws: Path, name: str, evaluators: list, thresholds: list) -> Path: + bundle_path = ws / "bundles" / f"{name}.yaml" + save_yaml( + bundle_path, + { + "version": 1, + "name": name, + "description": f"Test bundle {name}", + "evaluators": evaluators, + "thresholds": thresholds, + "metadata": {"category": "test"}, + }, + ) + return bundle_path + + +def _write_run(ws: Path, run_id: str, *, passed: bool = True) -> Path: + run_dir = ws / "results" / run_id + run_dir.mkdir(parents=True) + results = { + "version": 1, + "status": "completed", + "bundle": {"name": "test_bundle", "path": "bundles/test.yaml"}, + "dataset": {"name": "test_dataset", "path": "datasets/test.yaml"}, + "execution": { + "backend": "foundry", + "command": "test", + "started_at": "2026-04-07T10:00:00Z", + "finished_at": "2026-04-07T10:01:00Z", + "duration_seconds": 60.0, + "exit_code": 0, + }, + "metrics": [ + {"name": "CoherenceEvaluator", "value": 4.5}, + {"name": "samples_evaluated", "value": 3.0}, + ], + "row_metrics": [], + "item_evaluations": [ + {"row_index": 1, "passed_all": True, "thresholds": []}, + {"row_index": 2, "passed_all": passed, "thresholds": []}, + ], + "thresholds": [ + { + "evaluator": "CoherenceEvaluator", + "criteria": ">=", + "expected": "3.000000", + "actual": "2/2 items", + "passed": passed, + } + ], + "summary": { + "metrics_count": 2, + "thresholds_count": 1, + "thresholds_passed": 1 if passed else 0, + "thresholds_failed": 0 if passed else 1, + "overall_passed": passed, + }, + } + (run_dir / "results.json").write_text( + json.dumps(results, indent=2), encoding="utf-8" + ) + (run_dir / "report.md").write_text("# Report", encoding="utf-8") + return run_dir + + +# --------------------------------------------------------------------------- +# Service tests +# --------------------------------------------------------------------------- + + +class TestListBundles: + def test_empty_workspace(self, tmp_path: Path) -> None: + _create_workspace(tmp_path) + result = list_bundles(directory=tmp_path) + assert result.bundles == [] + + def test_lists_bundles(self, tmp_path: Path) -> None: + ws = _create_workspace(tmp_path) + _write_bundle( + ws, + "baseline", + [{"name": "CoherenceEvaluator", "source": "foundry", "enabled": True}], + [{"evaluator": "CoherenceEvaluator", "criteria": ">=", "value": 3}], + ) + result = list_bundles(directory=tmp_path) + assert len(result.bundles) == 1 + assert result.bundles[0].name == "baseline" + assert result.bundles[0].evaluators == ["CoherenceEvaluator"] + assert result.bundles[0].thresholds == 1 + + def test_no_workspace_raises(self, tmp_path: Path) -> None: + with pytest.raises(FileNotFoundError, match="No .agentops workspace"): + list_bundles(directory=tmp_path) + + +class TestShowBundle: + def test_by_name(self, tmp_path: Path) -> None: + ws = _create_workspace(tmp_path) + _write_bundle( + ws, + "my_bundle", + [{"name": "FluencyEvaluator", "source": "foundry", "enabled": True}], + [{"evaluator": "FluencyEvaluator", "criteria": ">=", "value": 4}], + ) + detail = show_bundle("my_bundle", directory=tmp_path) + assert detail.name == "my_bundle" + assert len(detail.evaluators) == 1 + assert detail.evaluators[0]["name"] == "FluencyEvaluator" + + def test_not_found(self, tmp_path: Path) -> None: + _create_workspace(tmp_path) + with pytest.raises(FileNotFoundError, match="not found"): + show_bundle("nonexistent", directory=tmp_path) + + +class TestListRuns: + def test_empty(self, tmp_path: Path) -> None: + _create_workspace(tmp_path) + result = list_runs(directory=tmp_path) + assert result.runs == [] + + def test_lists_runs(self, tmp_path: Path) -> None: + ws = _create_workspace(tmp_path) + _write_run(ws, "2026-04-07_100000", passed=True) + _write_run(ws, "2026-04-07_110000", passed=False) + result = list_runs(directory=tmp_path) + assert len(result.runs) == 2 + # Sorted reverse (newest first) + assert result.runs[0].run_id == "2026-04-07_110000" + assert result.runs[0].overall_passed is False + assert result.runs[1].run_id == "2026-04-07_100000" + assert result.runs[1].overall_passed is True + + def test_skips_latest_dir(self, tmp_path: Path) -> None: + ws = _create_workspace(tmp_path) + _write_run(ws, "2026-04-07_100000") + (ws / "results" / "latest").mkdir() + result = list_runs(directory=tmp_path) + assert len(result.runs) == 1 + + +class TestShowRun: + def test_shows_run(self, tmp_path: Path) -> None: + ws = _create_workspace(tmp_path) + _write_run(ws, "2026-04-07_100000", passed=True) + detail = show_run("2026-04-07_100000", directory=tmp_path) + assert detail.run_id == "2026-04-07_100000" + assert detail.bundle_name == "test_bundle" + assert detail.overall_passed is True + assert detail.items_total == 2 + assert detail.items_passed == 2 + + def test_not_found(self, tmp_path: Path) -> None: + _create_workspace(tmp_path) + with pytest.raises(FileNotFoundError, match="not found"): + show_run("nonexistent", directory=tmp_path) + + +# --------------------------------------------------------------------------- +# CLI tests +# --------------------------------------------------------------------------- + + +class TestBundleListCLI: + def test_lists_bundles(self, tmp_path: Path) -> None: + ws = _create_workspace(tmp_path) + _write_bundle( + ws, + "baseline", + [{"name": "CoherenceEvaluator", "source": "foundry", "enabled": True}], + [{"evaluator": "CoherenceEvaluator", "criteria": ">=", "value": 3}], + ) + result = runner.invoke(app, ["bundle", "list", "--dir", str(tmp_path)]) + assert result.exit_code == 0 + assert "baseline" in result.stdout + assert "CoherenceEvaluator" in result.stdout + + def test_no_workspace(self, tmp_path: Path) -> None: + result = runner.invoke(app, ["bundle", "list", "--dir", str(tmp_path)]) + assert result.exit_code == 1 + assert "No .agentops workspace" in (result.stdout + result.stderr) + + +class TestBundleShowCLI: + def test_shows_bundle(self, tmp_path: Path) -> None: + ws = _create_workspace(tmp_path) + _write_bundle( + ws, + "my_bundle", + [{"name": "FluencyEvaluator", "source": "foundry", "enabled": True}], + [{"evaluator": "FluencyEvaluator", "criteria": ">=", "value": 4}], + ) + result = runner.invoke( + app, ["bundle", "show", "my_bundle", "--dir", str(tmp_path)] + ) + assert result.exit_code == 0 + assert "my_bundle" in result.stdout + assert "FluencyEvaluator" in result.stdout + + +class TestRunListCLI: + def test_lists_runs(self, tmp_path: Path) -> None: + ws = _create_workspace(tmp_path) + _write_run(ws, "2026-04-07_100000", passed=True) + result = runner.invoke(app, ["run", "list", "--dir", str(tmp_path)]) + assert result.exit_code == 0 + assert "2026-04-07_100000" in result.stdout + assert "PASS" in result.stdout + + +class TestRunShowCLI: + def test_shows_run(self, tmp_path: Path) -> None: + ws = _create_workspace(tmp_path) + _write_run(ws, "2026-04-07_100000") + result = runner.invoke( + app, ["run", "show", "2026-04-07_100000", "--dir", str(tmp_path)] + ) + assert result.exit_code == 0 + assert "test_bundle" in result.stdout + assert "CoherenceEvaluator" in result.stdout + + def test_not_found(self, tmp_path: Path) -> None: + _create_workspace(tmp_path) + result = runner.invoke( + app, ["run", "show", "nonexistent", "--dir", str(tmp_path)] + ) + assert result.exit_code == 1 + assert "not found" in (result.stdout + result.stderr) From 2d4a52c04a836867e6b63c0710167c0dba6d5ea4 Mon Sep 17 00:00:00 2001 From: Dongbumlee Date: Tue, 7 Apr 2026 12:13:54 -0700 Subject: [PATCH 2/3] refactor: split CLI into command modules MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Split app.py (487 lines) into focused command modules: - app.py (114 lines) — root app, global callback, init, sub-app registration - eval_commands.py (108 lines) — eval run, eval compare - report_commands.py (66 lines) — report, report show/export stubs - browse_commands.py (152 lines) — bundle list/show, run list/show/view - config_commands.py (56 lines) — config cicd, config validate/show stubs - planned.py (57 lines) — dataset, monitor, trace, model, agent stubs - _planned.py (12 lines) — shared planned command helper No behavior changes. All 96 tests pass. --- src/agentops/cli/_planned.py | 16 + src/agentops/cli/app.py | 518 ++-------------------------- src/agentops/cli/browse_commands.py | 181 ++++++++++ src/agentops/cli/config_commands.py | 71 ++++ src/agentops/cli/eval_commands.py | 129 +++++++ src/agentops/cli/planned.py | 87 +++++ src/agentops/cli/report_commands.py | 83 +++++ 7 files changed, 605 insertions(+), 480 deletions(-) create mode 100644 src/agentops/cli/_planned.py create mode 100644 src/agentops/cli/browse_commands.py create mode 100644 src/agentops/cli/config_commands.py create mode 100644 src/agentops/cli/eval_commands.py create mode 100644 src/agentops/cli/planned.py create mode 100644 src/agentops/cli/report_commands.py diff --git a/src/agentops/cli/_planned.py b/src/agentops/cli/_planned.py new file mode 100644 index 00000000..f593d7c8 --- /dev/null +++ b/src/agentops/cli/_planned.py @@ -0,0 +1,16 @@ +"""Shared helper for planned (stub) commands.""" + +from __future__ import annotations + +import typer + + +def _planned_command(command_name: str) -> None: + """Print a message and exit with code 1 for unimplemented commands.""" + typer.echo( + "This command is planned but not implemented in this release:\n" + f" {command_name}\n" + "Please use the currently available commands" + " (`init`, `eval run`, `eval compare`, `report`, `config cicd`) for now." + ) + raise typer.Exit(code=1) diff --git a/src/agentops/cli/app.py b/src/agentops/cli/app.py index d23b4d64..09bd54a8 100644 --- a/src/agentops/cli/app.py +++ b/src/agentops/cli/app.py @@ -1,3 +1,17 @@ +"""AgentOps CLI — main application and sub-command registration. + +This module creates the root Typer app, registers sub-command groups +from their respective modules, and defines the global callback (logging, +version) and the ``init`` command. + +Command modules: + eval_commands — eval run, eval compare + report_commands — report, report show, report export + browse_commands — bundle list/show, run list/show/view + config_commands — config cicd, config validate, config show + planned — dataset, monitor, trace, model, agent (stubs) +""" + from __future__ import annotations from pathlib import Path @@ -5,32 +19,34 @@ import typer -from agentops.services.initializer import initialize_workspace -from agentops.services.reporting import generate_report_from_results -from agentops.services.runner import run_evaluation from agentops.utils.logging import get_logger, setup_logging +# --------------------------------------------------------------------------- +# Import sub-command apps from their modules +# --------------------------------------------------------------------------- +from agentops.cli.eval_commands import eval_app +from agentops.cli.report_commands import report_app +from agentops.cli.browse_commands import bundle_app, run_app +from agentops.cli.config_commands import config_app +from agentops.cli.planned import ( + agent_app, + dataset_app, + model_app, + monitor_app, + trace_app, +) + +# --------------------------------------------------------------------------- +# Root app +# --------------------------------------------------------------------------- + app = typer.Typer( name="agentops", help="AgentOps — standardized evaluation workflows for AI projects.", add_completion=False, ) -eval_app = typer.Typer( - help=( - "Evaluation sub-commands. " - "Use `agentops eval run --help` to see run options like " - "`--config` (`-c`) and `--output` (`-o`)." - ) -) -run_app = typer.Typer(help="Run history and inspection commands.") -bundle_app = typer.Typer(help="Bundle browsing commands.") -dataset_app = typer.Typer(help="Dataset utility commands.") -config_app = typer.Typer(help="Configuration utility commands.") -report_app = typer.Typer(help="Reporting commands.", invoke_without_command=True) -monitor_app = typer.Typer(help="Monitoring setup and operations.") -trace_app = typer.Typer(help="Tracing commands.") -model_app = typer.Typer(help="Model discovery commands.") -agent_app = typer.Typer(help="Agent discovery commands.") + +# Register sub-command groups app.add_typer(eval_app, name="eval") app.add_typer(run_app, name="run") app.add_typer(bundle_app, name="bundle") @@ -43,16 +59,6 @@ app.add_typer(agent_app, name="agent") log = get_logger(__name__) -DEFAULT_REPORT_INPUT = Path(".agentops/results/latest/results.json") - - -def _planned_command(command_name: str) -> None: - typer.echo( - "This command is planned but not implemented in this release:\n" - f" {command_name}\n" - "Please use the currently available commands (`init`, `eval run`, `report`) for now." - ) - raise typer.Exit(code=1) # --------------------------------------------------------------------------- @@ -88,7 +94,7 @@ def _main( # --------------------------------------------------------------------------- -# agentops init +# agentops init (top-level command, lives here) # --------------------------------------------------------------------------- @@ -105,6 +111,8 @@ def cmd_init( ), ) -> None: """Initialise an AgentOps workspace (creates .agentops/config.yaml).""" + from agentops.services.initializer import initialize_workspace + log.debug("cmd_init called force=%s dir=%s", force, directory) try: result = initialize_workspace(directory=directory, force=force) @@ -129,455 +137,5 @@ def cmd_init( typer.echo(f" - skipped {skipped}") -# --------------------------------------------------------------------------- -# agentops eval run -# --------------------------------------------------------------------------- - - -@eval_app.command("run") -def cmd_eval_run( - config: Annotated[ - Path | None, - typer.Option( - "--config", - "-c", - help="Path to run.yaml (default: .agentops/run.yaml).", - ), - ] = None, - output: Annotated[ - Path | None, - typer.Option("--output", "-o", help="Output directory for results."), - ] = None, - report_format: Annotated[ - str, typer.Option("--format", "-f", help="Report format: md, html, or all.") - ] = "md", -) -> None: - """Run an evaluation defined in a run.yaml file.""" - if report_format not in ("md", "html", "all"): - typer.echo("Error: --format must be md, html, or all.", err=True) - raise typer.Exit(code=1) - - log.debug( - "cmd_eval_run called config=%s output=%s format=%s", - config, - output, - report_format, - ) - try: - run_result = run_evaluation( - config_path=config, output_override=output, report_format=report_format - ) - except Exception as exc: - typer.echo(f"Error: evaluation failed: {exc}", err=True) - raise typer.Exit(code=1) from exc - - typer.echo(f"Evaluation output directory: {run_result.output_dir}") - typer.echo(f"results.json: {run_result.results_path}") - typer.echo(f"report: {run_result.report_path}") - - if run_result.exit_code == 2: - typer.echo("Threshold status: FAILED") - raise typer.Exit(code=2) - - typer.echo("Threshold status: PASSED") - - -@eval_app.command("compare") -def cmd_eval_compare( - runs: Annotated[ - str, - typer.Option( - "--runs", help="Comma-separated run ids (example: ID1,ID2 or ID1,ID2,ID3)." - ), - ], - output: Annotated[ - Path | None, - typer.Option("--output", "-o", help="Output directory for comparison results."), - ] = None, - report_format: Annotated[ - str, typer.Option("--format", "-f", help="Report format: md, html, or all.") - ] = "md", -) -> None: - """Compare two or more past evaluation runs.""" - from agentops.services.comparison import run_comparison - - if report_format not in ("md", "html", "all"): - typer.echo("Error: --format must be md, html, or all.", err=True) - raise typer.Exit(code=1) - - parts = [p.strip() for p in runs.split(",")] - if len(parts) < 2: - typer.echo( - "Error: --runs must contain at least two comma-separated run ids.", err=True - ) - raise typer.Exit(code=1) - - log.debug( - "cmd_eval_compare called runs=%s output=%s format=%s", - parts, - output, - report_format, - ) - try: - result = run_comparison( - run_ids=parts, - output_dir=output, - report_format=report_format, - ) - except Exception as exc: - typer.echo(f"Error: comparison failed: {exc}", err=True) - raise typer.Exit(code=1) from exc - - typer.echo(f"comparison.json: {result.comparison_json_path}") - if result.comparison_md_path: - typer.echo(f"comparison.md: {result.comparison_md_path}") - if result.comparison_html_path: - typer.echo(f"comparison.html: {result.comparison_html_path}") - - if result.has_regressions: - typer.echo("Comparison verdict: REGRESSIONS DETECTED") - raise typer.Exit(code=2) - - typer.echo("Comparison verdict: NO REGRESSIONS") - - -# --------------------------------------------------------------------------- -# agentops report -# --------------------------------------------------------------------------- - - -@report_app.callback(invoke_without_command=True) -def cmd_report( - ctx: typer.Context, - results_in: Annotated[ - Path | None, - typer.Option( - "--in", - help=( - "Path to results.json. " - "If omitted, uses .agentops/results/latest/results.json" - ), - ), - ] = None, - report_out: Annotated[ - Path | None, - typer.Option("--out", help="Output path for report."), - ] = None, - report_format: Annotated[ - str, typer.Option("--format", "-f", help="Report format: md, html, or all.") - ] = "md", -) -> None: - """Regenerate report from a results.json file.""" - if ctx.invoked_subcommand is not None: - return - - if report_format not in ("md", "html", "all"): - typer.echo("Error: --format must be md, html, or all.", err=True) - raise typer.Exit(code=1) - - resolved_results_in = results_in or DEFAULT_REPORT_INPUT - log.debug( - "cmd_report called in=%s out=%s format=%s", - resolved_results_in, - report_out, - report_format, - ) - try: - report_result = generate_report_from_results( - results_path=resolved_results_in, - output_path=report_out, - report_format=report_format, - ) - except Exception as exc: - typer.echo(f"Error: report generation failed: {exc}", err=True) - raise typer.Exit(code=1) from exc - - typer.echo(f"Loaded results: {report_result.input_results_path}") - typer.echo(f"Generated report: {report_result.output_report_path}") - if report_result.html_report_path: - typer.echo(f"Generated report: {report_result.html_report_path}") - - -@report_app.command("show") -def cmd_report_show() -> None: - """View reports in table format (planned).""" - _planned_command("agentops report show") - - -@report_app.command("export") -def cmd_report_export() -> None: - """Export reports in JSON/Markdown/CSV formats (planned).""" - _planned_command("agentops report export") - - -@run_app.command("list") -def cmd_run_list( - directory: Path = typer.Option( - Path("."), - "--dir", - help="Workspace directory.", - ), -) -> None: - """List past evaluation runs.""" - from agentops.services.browse import list_runs - - try: - result = list_runs(directory=directory) - except FileNotFoundError as exc: - typer.echo(f"Error: {exc}", err=True) - raise typer.Exit(code=1) from exc - - if not result.runs: - typer.echo(f"No runs found in {result.results_dir}") - return - - typer.echo(f"Runs in {result.results_dir}:\n") - for run in result.runs: - status = "PASS" if run.overall_passed else "FAIL" - typer.echo( - f" {run.run_id} {status:<4} " - f"bundle={run.bundle_name} dataset={run.dataset_name} " - f"duration={run.duration_seconds:.1f}s" - ) - - -@run_app.command("show") -def cmd_run_show( - run_id: str = typer.Argument(help="Run ID (timestamp folder name or 'latest')."), - directory: Path = typer.Option( - Path("."), - "--dir", - help="Workspace directory.", - ), -) -> None: - """Show summary of a past evaluation run.""" - from agentops.services.browse import show_run - - try: - detail = show_run(run_id=run_id, directory=directory) - except (FileNotFoundError, ValueError) as exc: - typer.echo(f"Error: {exc}", err=True) - raise typer.Exit(code=1) from exc - - status = "PASS" if detail.overall_passed else "FAIL" - typer.echo(f"Run: {detail.run_id}") - typer.echo(f"Status: {status}") - typer.echo(f"Bundle: {detail.bundle_name}") - typer.echo(f"Dataset: {detail.dataset_name}") - typer.echo(f"Backend: {detail.backend}") - typer.echo(f"Started: {detail.started_at}") - typer.echo(f"Duration: {detail.duration_seconds:.1f}s") - typer.echo(f"Items: {detail.items_passed}/{detail.items_total} passed") - typer.echo("") - typer.echo("Metrics:") - for m in detail.metrics: - typer.echo(f" {m['name']:<40} {m['value']:.4f}") - if detail.thresholds: - typer.echo("") - typer.echo("Thresholds:") - for t in detail.thresholds: - mark = "PASS" if t["passed"] else "FAIL" - typer.echo( - f" {t['evaluator']:<40} {t['criteria']} {t['expected']:<10} " - f"actual={t['actual']:<10} {mark}" - ) - if detail.foundry_url: - typer.echo(f"\nFoundry portal: {detail.foundry_url}") - if detail.report_path: - typer.echo(f"Report: {detail.report_path}") - - -@run_app.command("view") -def cmd_run_view( - run_id: str, - entry: Annotated[ - int | None, - typer.Option("--entry", help="Optional row/entry index for deep inspection."), - ] = None, -) -> None: - """Deep-inspect run details (planned).""" - _ = run_id, entry - _planned_command("agentops run view [--entry N]") - - -@bundle_app.command("list") -def cmd_bundle_list( - directory: Path = typer.Option( - Path("."), - "--dir", - help="Workspace directory.", - ), -) -> None: - """List available evaluation bundles.""" - from agentops.services.browse import list_bundles - - try: - result = list_bundles(directory=directory) - except FileNotFoundError as exc: - typer.echo(f"Error: {exc}", err=True) - raise typer.Exit(code=1) from exc - - if not result.bundles: - typer.echo(f"No bundles found in {result.bundles_dir}") - return - - typer.echo(f"Bundles in {result.bundles_dir}:\n") - for b in result.bundles: - evals = ", ".join(b.evaluators) if b.evaluators else "(none)" - typer.echo(f" {b.name}") - if b.description: - typer.echo(f" {b.description}") - typer.echo(f" evaluators: {evals}") - typer.echo(f" thresholds: {b.thresholds}") - typer.echo("") - - -@bundle_app.command("show") -def cmd_bundle_show( - bundle_name: str = typer.Argument(help="Bundle name or filename (without .yaml)."), - directory: Path = typer.Option( - Path("."), - "--dir", - help="Workspace directory.", - ), -) -> None: - """Show details of an evaluation bundle.""" - from agentops.services.browse import show_bundle - - try: - detail = show_bundle(bundle_name=bundle_name, directory=directory) - except (FileNotFoundError, ValueError) as exc: - typer.echo(f"Error: {exc}", err=True) - raise typer.Exit(code=1) from exc - - typer.echo(f"Bundle: {detail.name}") - typer.echo(f"Path: {detail.path}") - if detail.description: - typer.echo(f"Description: {detail.description}") - if detail.metadata: - typer.echo(f"Metadata: {detail.metadata}") - typer.echo("") - typer.echo("Evaluators:") - for e in detail.evaluators: - status = "enabled" if e["enabled"] else "disabled" - typer.echo(f" {e['name']} (source={e['source']}, {status})") - typer.echo("") - typer.echo("Thresholds:") - for t in detail.thresholds: - value = t["value"] if t["value"] is not None else "" - typer.echo(f" {t['evaluator']} {t['criteria']} {value}") - - -@dataset_app.command("validate") -def cmd_dataset_validate() -> None: - """Validate dataset files (planned).""" - _planned_command("agentops dataset validate") - - -@dataset_app.command("describe") -def cmd_dataset_describe() -> None: - """Describe dataset schema and shape (planned).""" - _planned_command("agentops dataset describe") - - -@dataset_app.command("import") -def cmd_dataset_import() -> None: - """Import external datasets (planned).""" - _planned_command("agentops dataset import") - - -@config_app.command("validate") -def cmd_config_validate() -> None: - """Validate configuration files (planned).""" - _planned_command("agentops config validate") - - -@config_app.command("show") -def cmd_config_show() -> None: - """Show merged runtime config (planned).""" - _planned_command("agentops config show") - - -@config_app.command("cicd") -def cmd_config_cicd( - force: bool = typer.Option( - False, "--force", help="Overwrite existing workflow file." - ), - directory: Path = typer.Option( - Path("."), - "--dir", - help="Target repository root directory.", - ), -) -> None: - """Generate a GitHub Actions workflow for AgentOps evaluation.""" - from agentops.services.cicd import generate_cicd_workflow - - log.debug("cmd_config_cicd called force=%s dir=%s", force, directory) - try: - result = generate_cicd_workflow(directory=directory, force=force) - except Exception as exc: - typer.echo(f"Error: failed to generate CI/CD workflow: {exc}", err=True) - raise typer.Exit(code=1) from exc - - for created in result.created_files: - typer.echo(f" + created {created}") - for overwritten in result.overwritten_files: - typer.echo(f" ~ overwritten {overwritten}") - for skipped in result.skipped_files: - typer.echo(f" - skipped {skipped} (use --force to overwrite)") - - if result.created_files or result.overwritten_files: - typer.echo("") - typer.echo("Next steps:") - typer.echo( - " 1. Set GitHub repository variables: AZURE_CLIENT_ID, AZURE_TENANT_ID, AZURE_SUBSCRIPTION_ID" - ) - typer.echo( - " 2. Set GitHub repository secret: AZURE_AI_FOUNDRY_PROJECT_ENDPOINT" - ) - typer.echo( - " 3. Configure Azure Workload Identity Federation (see docs/ci-github-actions.md)" - ) - typer.echo(" 4. Commit and push the workflow file") - elif result.skipped_files: - typer.echo("No files written. Use --force to overwrite existing workflow.") - - -@trace_app.command("init") -def cmd_trace_init() -> None: - """Set up tracing integration (planned).""" - _planned_command("agentops trace init") - - -@monitor_app.command("setup") -def cmd_monitor_setup() -> None: - """Set up monitoring resources (planned).""" - _planned_command("agentops monitor setup") - - -@monitor_app.command("dashboard") -def cmd_monitor_dashboard() -> None: - """Show monitoring dashboard setup instructions (planned).""" - _planned_command("agentops monitor dashboard") - - -@monitor_app.command("alert") -def cmd_monitor_alert() -> None: - """Configure monitoring alerts (planned).""" - _planned_command("agentops monitor alert") - - -@model_app.command("list") -def cmd_model_list() -> None: - """List chat-capable models in Foundry project (planned).""" - _planned_command("agentops model list") - - -@agent_app.command("list") -def cmd_agent_list() -> None: - """List agents in Foundry project (planned).""" - _planned_command("agentops agent list") - - def main() -> None: app() diff --git a/src/agentops/cli/browse_commands.py b/src/agentops/cli/browse_commands.py new file mode 100644 index 00000000..c3db6139 --- /dev/null +++ b/src/agentops/cli/browse_commands.py @@ -0,0 +1,181 @@ +"""Browse sub-commands: bundle list/show, run list/show/view.""" + +from __future__ import annotations + +from pathlib import Path +from typing import Annotated + +import typer + +from agentops.cli._planned import _planned_command + +run_app = typer.Typer(help="Run history and inspection commands.") +bundle_app = typer.Typer(help="Bundle browsing commands.") + + +# --------------------------------------------------------------------------- +# bundle list / show +# --------------------------------------------------------------------------- + + +@bundle_app.command("list") +def cmd_bundle_list( + directory: Path = typer.Option( + Path("."), + "--dir", + help="Workspace directory.", + ), +) -> None: + """List available evaluation bundles.""" + from agentops.services.browse import list_bundles + + try: + result = list_bundles(directory=directory) + except FileNotFoundError as exc: + typer.echo(f"Error: {exc}", err=True) + raise typer.Exit(code=1) from exc + + if not result.bundles: + typer.echo(f"No bundles found in {result.bundles_dir}") + return + + typer.echo(f"Bundles in {result.bundles_dir}:\n") + for b in result.bundles: + evals = ", ".join(b.evaluators) if b.evaluators else "(none)" + typer.echo(f" {b.name}") + if b.description: + typer.echo(f" {b.description}") + typer.echo(f" evaluators: {evals}") + typer.echo(f" thresholds: {b.thresholds}") + typer.echo("") + + +@bundle_app.command("show") +def cmd_bundle_show( + bundle_name: str = typer.Argument(help="Bundle name or filename (without .yaml)."), + directory: Path = typer.Option( + Path("."), + "--dir", + help="Workspace directory.", + ), +) -> None: + """Show details of an evaluation bundle.""" + from agentops.services.browse import show_bundle + + try: + detail = show_bundle(bundle_name=bundle_name, directory=directory) + except (FileNotFoundError, ValueError) as exc: + typer.echo(f"Error: {exc}", err=True) + raise typer.Exit(code=1) from exc + + typer.echo(f"Bundle: {detail.name}") + typer.echo(f"Path: {detail.path}") + if detail.description: + typer.echo(f"Description: {detail.description}") + if detail.metadata: + typer.echo(f"Metadata: {detail.metadata}") + typer.echo("") + typer.echo("Evaluators:") + for e in detail.evaluators: + status = "enabled" if e["enabled"] else "disabled" + typer.echo(f" {e['name']} (source={e['source']}, {status})") + typer.echo("") + typer.echo("Thresholds:") + for t in detail.thresholds: + value = t["value"] if t["value"] is not None else "" + typer.echo(f" {t['evaluator']} {t['criteria']} {value}") + + +# --------------------------------------------------------------------------- +# run list / show / view +# --------------------------------------------------------------------------- + + +@run_app.command("list") +def cmd_run_list( + directory: Path = typer.Option( + Path("."), + "--dir", + help="Workspace directory.", + ), +) -> None: + """List past evaluation runs.""" + from agentops.services.browse import list_runs + + try: + result = list_runs(directory=directory) + except FileNotFoundError as exc: + typer.echo(f"Error: {exc}", err=True) + raise typer.Exit(code=1) from exc + + if not result.runs: + typer.echo(f"No runs found in {result.results_dir}") + return + + typer.echo(f"Runs in {result.results_dir}:\n") + for run in result.runs: + status = "PASS" if run.overall_passed else "FAIL" + typer.echo( + f" {run.run_id} {status:<4} " + f"bundle={run.bundle_name} dataset={run.dataset_name} " + f"duration={run.duration_seconds:.1f}s" + ) + + +@run_app.command("show") +def cmd_run_show( + run_id: str = typer.Argument(help="Run ID (timestamp folder name or 'latest')."), + directory: Path = typer.Option( + Path("."), + "--dir", + help="Workspace directory.", + ), +) -> None: + """Show summary of a past evaluation run.""" + from agentops.services.browse import show_run + + try: + detail = show_run(run_id=run_id, directory=directory) + except (FileNotFoundError, ValueError) as exc: + typer.echo(f"Error: {exc}", err=True) + raise typer.Exit(code=1) from exc + + status = "PASS" if detail.overall_passed else "FAIL" + typer.echo(f"Run: {detail.run_id}") + typer.echo(f"Status: {status}") + typer.echo(f"Bundle: {detail.bundle_name}") + typer.echo(f"Dataset: {detail.dataset_name}") + typer.echo(f"Backend: {detail.backend}") + typer.echo(f"Started: {detail.started_at}") + typer.echo(f"Duration: {detail.duration_seconds:.1f}s") + typer.echo(f"Items: {detail.items_passed}/{detail.items_total} passed") + typer.echo("") + typer.echo("Metrics:") + for m in detail.metrics: + typer.echo(f" {m['name']:<40} {m['value']:.4f}") + if detail.thresholds: + typer.echo("") + typer.echo("Thresholds:") + for t in detail.thresholds: + mark = "PASS" if t["passed"] else "FAIL" + typer.echo( + f" {t['evaluator']:<40} {t['criteria']} {t['expected']:<10} " + f"actual={t['actual']:<10} {mark}" + ) + if detail.foundry_url: + typer.echo(f"\nFoundry portal: {detail.foundry_url}") + if detail.report_path: + typer.echo(f"Report: {detail.report_path}") + + +@run_app.command("view") +def cmd_run_view( + run_id: str, + entry: Annotated[ + int | None, + typer.Option("--entry", help="Optional row/entry index for deep inspection."), + ] = None, +) -> None: + """Deep-inspect run details (planned).""" + _ = run_id, entry + _planned_command("agentops run view [--entry N]") diff --git a/src/agentops/cli/config_commands.py b/src/agentops/cli/config_commands.py new file mode 100644 index 00000000..f435b444 --- /dev/null +++ b/src/agentops/cli/config_commands.py @@ -0,0 +1,71 @@ +"""Config sub-commands: config validate, config show, config cicd.""" + +from __future__ import annotations + +from pathlib import Path + +import typer + +from agentops.cli._planned import _planned_command +from agentops.utils.logging import get_logger + +log = get_logger(__name__) + +config_app = typer.Typer(help="Configuration utility commands.") + + +@config_app.command("validate") +def cmd_config_validate() -> None: + """Validate configuration files (planned).""" + _planned_command("agentops config validate") + + +@config_app.command("show") +def cmd_config_show() -> None: + """Show merged runtime config (planned).""" + _planned_command("agentops config show") + + +@config_app.command("cicd") +def cmd_config_cicd( + force: bool = typer.Option( + False, "--force", help="Overwrite existing workflow file." + ), + directory: Path = typer.Option( + Path("."), + "--dir", + help="Target repository root directory.", + ), +) -> None: + """Generate a GitHub Actions workflow for AgentOps evaluation.""" + from agentops.services.cicd import generate_cicd_workflow + + log.debug("cmd_config_cicd called force=%s dir=%s", force, directory) + try: + result = generate_cicd_workflow(directory=directory, force=force) + except Exception as exc: + typer.echo(f"Error: failed to generate CI/CD workflow: {exc}", err=True) + raise typer.Exit(code=1) from exc + + for created in result.created_files: + typer.echo(f" + created {created}") + for overwritten in result.overwritten_files: + typer.echo(f" ~ overwritten {overwritten}") + for skipped in result.skipped_files: + typer.echo(f" - skipped {skipped} (use --force to overwrite)") + + if result.created_files or result.overwritten_files: + typer.echo("") + typer.echo("Next steps:") + typer.echo( + " 1. Set GitHub repository variables: AZURE_CLIENT_ID, AZURE_TENANT_ID, AZURE_SUBSCRIPTION_ID" + ) + typer.echo( + " 2. Set GitHub repository secret: AZURE_AI_FOUNDRY_PROJECT_ENDPOINT" + ) + typer.echo( + " 3. Configure Azure Workload Identity Federation (see docs/ci-github-actions.md)" + ) + typer.echo(" 4. Commit and push the workflow file") + elif result.skipped_files: + typer.echo("No files written. Use --force to overwrite existing workflow.") diff --git a/src/agentops/cli/eval_commands.py b/src/agentops/cli/eval_commands.py new file mode 100644 index 00000000..efb10c73 --- /dev/null +++ b/src/agentops/cli/eval_commands.py @@ -0,0 +1,129 @@ +"""Evaluation sub-commands: eval run, eval compare.""" + +from __future__ import annotations + +from pathlib import Path +from typing import Annotated + +import typer + +from agentops.utils.logging import get_logger + +log = get_logger(__name__) + +eval_app = typer.Typer( + help=( + "Evaluation sub-commands. " + "Use `agentops eval run --help` to see run options like " + "`--config` (`-c`) and `--output` (`-o`)." + ) +) + + +@eval_app.command("run") +def cmd_eval_run( + config: Annotated[ + Path | None, + typer.Option( + "--config", + "-c", + help="Path to run.yaml (default: .agentops/run.yaml).", + ), + ] = None, + output: Annotated[ + Path | None, + typer.Option("--output", "-o", help="Output directory for results."), + ] = None, + report_format: Annotated[ + str, typer.Option("--format", "-f", help="Report format: md, html, or all.") + ] = "md", +) -> None: + """Run an evaluation defined in a run.yaml file.""" + from agentops.services.runner import run_evaluation + + if report_format not in ("md", "html", "all"): + typer.echo("Error: --format must be md, html, or all.", err=True) + raise typer.Exit(code=1) + + log.debug( + "cmd_eval_run called config=%s output=%s format=%s", + config, + output, + report_format, + ) + try: + run_result = run_evaluation( + config_path=config, output_override=output, report_format=report_format + ) + except Exception as exc: + typer.echo(f"Error: evaluation failed: {exc}", err=True) + raise typer.Exit(code=1) from exc + + typer.echo(f"Evaluation output directory: {run_result.output_dir}") + typer.echo(f"results.json: {run_result.results_path}") + typer.echo(f"report: {run_result.report_path}") + + if run_result.exit_code == 2: + typer.echo("Threshold status: FAILED") + raise typer.Exit(code=2) + + typer.echo("Threshold status: PASSED") + + +@eval_app.command("compare") +def cmd_eval_compare( + runs: Annotated[ + str, + typer.Option( + "--runs", help="Comma-separated run ids (example: ID1,ID2 or ID1,ID2,ID3)." + ), + ], + output: Annotated[ + Path | None, + typer.Option("--output", "-o", help="Output directory for comparison results."), + ] = None, + report_format: Annotated[ + str, typer.Option("--format", "-f", help="Report format: md, html, or all.") + ] = "md", +) -> None: + """Compare two or more past evaluation runs.""" + from agentops.services.comparison import run_comparison + + if report_format not in ("md", "html", "all"): + typer.echo("Error: --format must be md, html, or all.", err=True) + raise typer.Exit(code=1) + + parts = [p.strip() for p in runs.split(",")] + if len(parts) < 2: + typer.echo( + "Error: --runs must contain at least two comma-separated run ids.", err=True + ) + raise typer.Exit(code=1) + + log.debug( + "cmd_eval_compare called runs=%s output=%s format=%s", + parts, + output, + report_format, + ) + try: + result = run_comparison( + run_ids=parts, + output_dir=output, + report_format=report_format, + ) + except Exception as exc: + typer.echo(f"Error: comparison failed: {exc}", err=True) + raise typer.Exit(code=1) from exc + + typer.echo(f"comparison.json: {result.comparison_json_path}") + if result.comparison_md_path: + typer.echo(f"comparison.md: {result.comparison_md_path}") + if result.comparison_html_path: + typer.echo(f"comparison.html: {result.comparison_html_path}") + + if result.has_regressions: + typer.echo("Comparison verdict: REGRESSIONS DETECTED") + raise typer.Exit(code=2) + + typer.echo("Comparison verdict: NO REGRESSIONS") diff --git a/src/agentops/cli/planned.py b/src/agentops/cli/planned.py new file mode 100644 index 00000000..fd9e3ab5 --- /dev/null +++ b/src/agentops/cli/planned.py @@ -0,0 +1,87 @@ +"""Planned stub commands: dataset, monitor, trace, model, agent.""" + +from __future__ import annotations + +import typer + +from agentops.cli._planned import _planned_command + +dataset_app = typer.Typer(help="Dataset utility commands.") +monitor_app = typer.Typer(help="Monitoring setup and operations.") +trace_app = typer.Typer(help="Tracing commands.") +model_app = typer.Typer(help="Model discovery commands.") +agent_app = typer.Typer(help="Agent discovery commands.") + + +# --------------------------------------------------------------------------- +# dataset +# --------------------------------------------------------------------------- + + +@dataset_app.command("validate") +def cmd_dataset_validate() -> None: + """Validate dataset files (planned).""" + _planned_command("agentops dataset validate") + + +@dataset_app.command("describe") +def cmd_dataset_describe() -> None: + """Describe dataset schema and shape (planned).""" + _planned_command("agentops dataset describe") + + +@dataset_app.command("import") +def cmd_dataset_import() -> None: + """Import external datasets (planned).""" + _planned_command("agentops dataset import") + + +# --------------------------------------------------------------------------- +# monitor +# --------------------------------------------------------------------------- + + +@monitor_app.command("setup") +def cmd_monitor_setup() -> None: + """Set up monitoring resources (planned).""" + _planned_command("agentops monitor setup") + + +@monitor_app.command("dashboard") +def cmd_monitor_dashboard() -> None: + """Show monitoring dashboard setup instructions (planned).""" + _planned_command("agentops monitor dashboard") + + +@monitor_app.command("alert") +def cmd_monitor_alert() -> None: + """Configure monitoring alerts (planned).""" + _planned_command("agentops monitor alert") + + +# --------------------------------------------------------------------------- +# trace +# --------------------------------------------------------------------------- + + +@trace_app.command("init") +def cmd_trace_init() -> None: + """Set up tracing integration (planned).""" + _planned_command("agentops trace init") + + +# --------------------------------------------------------------------------- +# model / agent +# --------------------------------------------------------------------------- + + +@model_app.command("list") +def cmd_model_list() -> None: + """List chat-capable models in Foundry project (planned).""" + _planned_command("agentops model list") + + +@agent_app.command("list") +def cmd_agent_list() -> None: + """List agents in Foundry project (planned).""" + _planned_command("agentops agent list") diff --git a/src/agentops/cli/report_commands.py b/src/agentops/cli/report_commands.py new file mode 100644 index 00000000..93c4ac3c --- /dev/null +++ b/src/agentops/cli/report_commands.py @@ -0,0 +1,83 @@ +"""Report sub-commands: report, report show, report export.""" + +from __future__ import annotations + +from pathlib import Path +from typing import Annotated + +import typer + +from agentops.cli._planned import _planned_command +from agentops.utils.logging import get_logger + +log = get_logger(__name__) + +DEFAULT_REPORT_INPUT = Path(".agentops/results/latest/results.json") + +report_app = typer.Typer(help="Reporting commands.", invoke_without_command=True) + + +@report_app.callback(invoke_without_command=True) +def cmd_report( + ctx: typer.Context, + results_in: Annotated[ + Path | None, + typer.Option( + "--in", + help=( + "Path to results.json. " + "If omitted, uses .agentops/results/latest/results.json" + ), + ), + ] = None, + report_out: Annotated[ + Path | None, + typer.Option("--out", help="Output path for report."), + ] = None, + report_format: Annotated[ + str, typer.Option("--format", "-f", help="Report format: md, html, or all.") + ] = "md", +) -> None: + """Regenerate report from a results.json file.""" + from agentops.services.reporting import generate_report_from_results + + if ctx.invoked_subcommand is not None: + return + + if report_format not in ("md", "html", "all"): + typer.echo("Error: --format must be md, html, or all.", err=True) + raise typer.Exit(code=1) + + resolved_results_in = results_in or DEFAULT_REPORT_INPUT + log.debug( + "cmd_report called in=%s out=%s format=%s", + resolved_results_in, + report_out, + report_format, + ) + try: + report_result = generate_report_from_results( + results_path=resolved_results_in, + output_path=report_out, + report_format=report_format, + ) + except Exception as exc: + typer.echo(f"Error: report generation failed: {exc}", err=True) + raise typer.Exit(code=1) from exc + + typer.echo(f"Loaded results: {report_result.input_results_path}") + typer.echo(f"Generated report: {report_result.output_report_path}") + if report_result.html_report_path: + typer.echo(f"Generated report: {report_result.html_report_path}") + + +@report_app.command("show") +def cmd_report_show() -> None: + """View reports in table format (planned).""" + _planned_command("agentops report show") + + +@report_app.command("export") +def cmd_report_export() -> None: + """Export reports in JSON/Markdown/CSV formats (planned).""" + _planned_command("agentops report export") From 6017f3a0629dea12e413e750fee1ce6f8f653fb3 Mon Sep 17 00:00:00 2001 From: Dongbumlee Date: Tue, 7 Apr 2026 12:16:33 -0700 Subject: [PATCH 3/3] refactor: remove planned.py, move stubs to their command files MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Move dataset stubs to dataset_commands.py (ready for Tier 2 implementation) - Inline monitor/trace/model/agent stubs in app.py (1-2 commands each) - Delete planned.py — no more catch-all stub file --- src/agentops/cli/app.py | 58 ++++++++++++++++--- src/agentops/cli/dataset_commands.py | 27 +++++++++ src/agentops/cli/planned.py | 87 ---------------------------- 3 files changed, 76 insertions(+), 96 deletions(-) create mode 100644 src/agentops/cli/dataset_commands.py delete mode 100644 src/agentops/cli/planned.py diff --git a/src/agentops/cli/app.py b/src/agentops/cli/app.py index 09bd54a8..e3eb453a 100644 --- a/src/agentops/cli/app.py +++ b/src/agentops/cli/app.py @@ -24,17 +24,57 @@ # --------------------------------------------------------------------------- # Import sub-command apps from their modules # --------------------------------------------------------------------------- -from agentops.cli.eval_commands import eval_app -from agentops.cli.report_commands import report_app +from agentops.cli._planned import _planned_command from agentops.cli.browse_commands import bundle_app, run_app from agentops.cli.config_commands import config_app -from agentops.cli.planned import ( - agent_app, - dataset_app, - model_app, - monitor_app, - trace_app, -) +from agentops.cli.dataset_commands import dataset_app +from agentops.cli.eval_commands import eval_app +from agentops.cli.report_commands import report_app + +# --------------------------------------------------------------------------- +# Stub sub-apps for future command groups (1-2 commands each) +# --------------------------------------------------------------------------- +monitor_app = typer.Typer(help="Monitoring setup and operations.") +trace_app = typer.Typer(help="Tracing commands.") +model_app = typer.Typer(help="Model discovery commands.") +agent_app = typer.Typer(help="Agent discovery commands.") + + +@monitor_app.command("setup") +def cmd_monitor_setup() -> None: + """Set up monitoring resources (planned).""" + _planned_command("agentops monitor setup") + + +@monitor_app.command("dashboard") +def cmd_monitor_dashboard() -> None: + """Show monitoring dashboard setup instructions (planned).""" + _planned_command("agentops monitor dashboard") + + +@monitor_app.command("alert") +def cmd_monitor_alert() -> None: + """Configure monitoring alerts (planned).""" + _planned_command("agentops monitor alert") + + +@trace_app.command("init") +def cmd_trace_init() -> None: + """Set up tracing integration (planned).""" + _planned_command("agentops trace init") + + +@model_app.command("list") +def cmd_model_list() -> None: + """List chat-capable models in Foundry project (planned).""" + _planned_command("agentops model list") + + +@agent_app.command("list") +def cmd_agent_list() -> None: + """List agents in Foundry project (planned).""" + _planned_command("agentops agent list") + # --------------------------------------------------------------------------- # Root app diff --git a/src/agentops/cli/dataset_commands.py b/src/agentops/cli/dataset_commands.py new file mode 100644 index 00000000..c768963c --- /dev/null +++ b/src/agentops/cli/dataset_commands.py @@ -0,0 +1,27 @@ +"""Dataset sub-commands: dataset validate, dataset describe, dataset import.""" + +from __future__ import annotations + +import typer + +from agentops.cli._planned import _planned_command + +dataset_app = typer.Typer(help="Dataset utility commands.") + + +@dataset_app.command("validate") +def cmd_dataset_validate() -> None: + """Validate dataset files (planned).""" + _planned_command("agentops dataset validate") + + +@dataset_app.command("describe") +def cmd_dataset_describe() -> None: + """Describe dataset schema and shape (planned).""" + _planned_command("agentops dataset describe") + + +@dataset_app.command("import") +def cmd_dataset_import() -> None: + """Import external datasets (planned).""" + _planned_command("agentops dataset import") diff --git a/src/agentops/cli/planned.py b/src/agentops/cli/planned.py deleted file mode 100644 index fd9e3ab5..00000000 --- a/src/agentops/cli/planned.py +++ /dev/null @@ -1,87 +0,0 @@ -"""Planned stub commands: dataset, monitor, trace, model, agent.""" - -from __future__ import annotations - -import typer - -from agentops.cli._planned import _planned_command - -dataset_app = typer.Typer(help="Dataset utility commands.") -monitor_app = typer.Typer(help="Monitoring setup and operations.") -trace_app = typer.Typer(help="Tracing commands.") -model_app = typer.Typer(help="Model discovery commands.") -agent_app = typer.Typer(help="Agent discovery commands.") - - -# --------------------------------------------------------------------------- -# dataset -# --------------------------------------------------------------------------- - - -@dataset_app.command("validate") -def cmd_dataset_validate() -> None: - """Validate dataset files (planned).""" - _planned_command("agentops dataset validate") - - -@dataset_app.command("describe") -def cmd_dataset_describe() -> None: - """Describe dataset schema and shape (planned).""" - _planned_command("agentops dataset describe") - - -@dataset_app.command("import") -def cmd_dataset_import() -> None: - """Import external datasets (planned).""" - _planned_command("agentops dataset import") - - -# --------------------------------------------------------------------------- -# monitor -# --------------------------------------------------------------------------- - - -@monitor_app.command("setup") -def cmd_monitor_setup() -> None: - """Set up monitoring resources (planned).""" - _planned_command("agentops monitor setup") - - -@monitor_app.command("dashboard") -def cmd_monitor_dashboard() -> None: - """Show monitoring dashboard setup instructions (planned).""" - _planned_command("agentops monitor dashboard") - - -@monitor_app.command("alert") -def cmd_monitor_alert() -> None: - """Configure monitoring alerts (planned).""" - _planned_command("agentops monitor alert") - - -# --------------------------------------------------------------------------- -# trace -# --------------------------------------------------------------------------- - - -@trace_app.command("init") -def cmd_trace_init() -> None: - """Set up tracing integration (planned).""" - _planned_command("agentops trace init") - - -# --------------------------------------------------------------------------- -# model / agent -# --------------------------------------------------------------------------- - - -@model_app.command("list") -def cmd_model_list() -> None: - """List chat-capable models in Foundry project (planned).""" - _planned_command("agentops model list") - - -@agent_app.command("list") -def cmd_agent_list() -> None: - """List agents in Foundry project (planned).""" - _planned_command("agentops agent list")