From 6dcf222395b0f06fe1309f3df13b56579d0799fc Mon Sep 17 00:00:00 2001
From: "David E. Bernal Neira" <dbernaln@purdue.edu>
Date: Sun, 10 May 2026 10:39:45 -0400
Subject: [PATCH] Add single-case benchmark runner (#17)

---
 benchmarks/README.md          |  20 ++
 benchmarks/adapters.py        |  16 +-
 benchmarks/run_single_case.py | 332 ++++++++++++++++++++++++++++++++++
 tests/test_benchmarks.py      | 186 ++++++++++++++++++-
 4 files changed, 544 insertions(+), 10 deletions(-)
 create mode 100644 benchmarks/run_single_case.py

diff --git a/benchmarks/README.md b/benchmarks/README.md
index 489769e..81219a3 100644
--- a/benchmarks/README.md
+++ b/benchmarks/README.md
@@ -44,9 +44,29 @@ Notebook cells produce:
 - Scatter plots and histograms
 - Summary interpretation
 
+### Debug One Case
+Use `run_single_case.py` to reproduce one SciPy baseline and one Pyomo run
+without writing JSONL or figure artifacts:
+
+```bash
+python benchmarks/run_single_case.py \
+  --task both --scenario baseline \
+  --set product.A1=20 \
+  --set ht.KC=4e-4 \
+  --method fd \
+  --n-elements 24 \
+  --tsh-ramp 40 \
+  --pch-ramp 0.05 \
+  --tee
+```
+
+The runner prints compact SciPy and Pyomo summaries with objective time, solver
+status, termination condition, trajectory size, and validation metrics.
+
 ## Core Modules
 
 - **`grid_cli.py`** — CLI for N-dimensional grid generation (replaces legacy `run_grid*.py`)
+- **`run_single_case.py`** — CLI for one-case SciPy/Pyomo debug runs without generated artifacts
 - **`adapters.py`** — Normalized scipy/Pyomo runners with discretization metadata
 - **`scenarios.py`** — Scenario definitions (vial, product, ht, eq_cap, nVial)
 - **`schema.py`** — Versioned record serialization (v2: trajectories + hashing)
diff --git a/benchmarks/adapters.py b/benchmarks/adapters.py
index 576d6d8..fdebdb4 100644
--- a/benchmarks/adapters.py
+++ b/benchmarks/adapters.py
@@ -16,9 +16,8 @@
 from typing import Any
 
 import numpy as np
-from lyopronto import opt_Pch, opt_Pch_Tsh, opt_Tsh
-
 from benchmarks.validate import compare_trajectories
+from lyopronto import opt_Pch, opt_Pch_Tsh, opt_Tsh
 
 DRYNESS_TARGET = 98.9  # Percentage (0-100)
 ACCEPTABLE_PYOMO_TERMINATIONS = {"optimal", "locallyoptimal"}
@@ -245,15 +244,11 @@ def ipopt_replay_adapter(
             "residual_tol": residual_tol,
             "max_constraint_residual": meta.get("max_constraint_residual"),
             "residuals": meta.get("residuals"),
-            "max_scipy_trajectory_residual": meta.get(
-                "max_scipy_trajectory_residual"
-            ),
+            "max_scipy_trajectory_residual": meta.get("max_scipy_trajectory_residual"),
             "scipy_trajectory_residuals": meta.get("scipy_trajectory_residuals"),
             "max_scipy_mesh_residual": meta.get("max_scipy_mesh_residual"),
             "scipy_mesh_residuals": meta.get("scipy_mesh_residuals"),
-            "max_replay_solution_residual": meta.get(
-                "max_replay_solution_residual"
-            ),
+            "max_replay_solution_residual": meta.get("max_replay_solution_residual"),
             "replay_solution_residuals": meta.get("replay_solution_residuals"),
             "trajectory_comparison": comparison,
         },
@@ -282,6 +277,7 @@ def pyomo_adapter(
     use_secant_ramp_constraints: bool = True,  # Add secant slope constraints for collocation
     solver_cpu_time: float | None = None,
     solver_wall_time: float | None = None,
+    tee: bool = False,
 ) -> dict[str, Any]:
     """Run Pyomo optimizer counterpart for specified task with discretization controls.
 
@@ -301,6 +297,8 @@ def pyomo_adapter(
         When using collocation with ramp constraints, add explicit secant slope constraints.
         Default True. If False, only polynomial derivative constraints are used, which may
         allow numerical derivatives between mesh points to exceed the ramp limit (by ~15%).
+    tee : bool
+        Show solver output for interactive debugging.
     """
     pyomo_opt = _load_pyomo_optimizers()
 
@@ -343,7 +341,7 @@ def pyomo_adapter(
             treat_n_elements_as_effective=treat_eff,
             warmstart_scipy=warmstart,
             return_metadata=True,
-            tee=False,
+            tee=tee,
             use_secant_ramp_constraints=use_secant_ramp_constraints,
             solver_cpu_time=solver_cpu_time,
             solver_wall_time=solver_wall_time,
diff --git a/benchmarks/run_single_case.py b/benchmarks/run_single_case.py
new file mode 100644
index 0000000..041933e
--- /dev/null
+++ b/benchmarks/run_single_case.py
@@ -0,0 +1,332 @@
+#!/usr/bin/env python
+
+# Copyright (C) 2026, SECQUOIA
+
+"""Run one benchmark case with SciPy and Pyomo summaries.
+
+This debug runner reuses the benchmark adapters and validation helpers without
+writing JSONL records or image artifacts. It is intended for reproducing one
+case before adding it to a larger grid run.
+"""
+
+from __future__ import annotations
+
+import argparse
+import ast
+import copy
+import sys
+from pathlib import Path
+from typing import Any
+
+import numpy as np
+
+if __package__ in {None, ""}:
+    sys.path.insert(0, str(Path(__file__).resolve().parents[1]))
+
+from benchmarks.adapters import pyomo_adapter, scipy_adapter
+from benchmarks.grid_cli import (
+    metrics_failed,
+    pyomo_metric_kwargs,
+    scipy_metric_kwargs,
+    set_nested,
+)
+from benchmarks.scenarios import SCENARIOS
+from benchmarks.validate import compute_residuals
+
+
+def _parse_scalar(raw: str) -> Any:
+    """Parse one CLI value while preserving strings when numeric parsing fails."""
+    lowered = raw.strip().lower()
+    if lowered in {"true", "false"}:
+        return lowered == "true"
+    if lowered in {"none", "null"}:
+        return None
+    try:
+        return ast.literal_eval(raw)
+    except (SyntaxError, ValueError):
+        try:
+            return float("1" + raw) if lowered.startswith("e") else float(raw)
+        except ValueError:
+            return raw
+
+
+def parse_override(spec: str) -> tuple[str, Any]:
+    """Parse a single PATH=VALUE override."""
+    if "=" not in spec:
+        raise argparse.ArgumentTypeError(
+            f"Invalid override '{spec}'. Expected PATH=VALUE."
+        )
+    path, raw_value = spec.split("=", 1)
+    path = path.strip()
+    if not path:
+        raise argparse.ArgumentTypeError(
+            f"Invalid override '{spec}'. Expected non-empty PATH."
+        )
+    return path, _parse_scalar(raw_value.strip())
+
+
+def build_case(scenario_name: str, overrides: list[tuple[str, Any]]) -> dict[str, Any]:
+    """Return a scenario copy with single-case overrides applied."""
+    scenario = copy.deepcopy(SCENARIOS[scenario_name])
+    for path, value in overrides:
+        set_nested(scenario, path, value)
+    return scenario
+
+
+def _trajectory_size(result: dict[str, Any]) -> str:
+    traj = result.get("trajectory")
+    shape = getattr(traj, "shape", ())
+    if len(shape) == 2:
+        return f"{shape[0]} points x {shape[1]} columns"
+    if len(shape) == 1:
+        return f"{shape[0]} points"
+    return "0 points"
+
+
+def _fmt(value: Any, digits: int = 4) -> str:
+    if value is None:
+        return "n/a"
+    if isinstance(value, float):
+        return f"{value:.{digits}g}"
+    return str(value)
+
+
+def _compute_metrics(
+    result: dict[str, Any], metric_kwargs: dict[str, Any]
+) -> dict[str, Any]:
+    traj = result.get("trajectory")
+    if traj is None or getattr(traj, "size", 0) == 0:
+        return compute_residuals(np.array([]), **metric_kwargs)
+    return compute_residuals(traj, **metric_kwargs)
+
+
+def _metric_items(metrics: dict[str, Any]) -> list[tuple[str, Any]]:
+    keys = [
+        "final_percent_dried",
+        "dryness_target_met",
+        "product_temp_ok",
+        "max_product_temp_violation_C",
+        "tsh_ramp_ok",
+        "max_tsh_ramp_C_per_hr",
+        "max_tsh_ramp_violation_C_per_hr",
+        "pch_ramp_ok",
+        "max_pch_ramp_Torr_per_hr",
+        "max_pch_ramp_violation_Torr_per_hr",
+    ]
+    return [(key, metrics.get(key)) for key in keys if key in metrics]
+
+
+def print_summary(name: str, result: dict[str, Any], metrics: dict[str, Any]) -> None:
+    """Print a compact solver and validation summary."""
+    solver = result.get("solver") or {}
+    print(f"\n{name} summary")
+    print(f"  success: {result.get('success')}")
+    print(f"  objective_time_hr: {_fmt(result.get('objective_time_hr'))}")
+    print(f"  wall_time_s: {_fmt(result.get('wall_time_s'))}")
+    print(f"  solver_status: {_fmt(solver.get('status'))}")
+    print(f"  termination_condition: {_fmt(solver.get('termination_condition'))}")
+    print(f"  trajectory_size: {_trajectory_size(result)}")
+    if result.get("message"):
+        print(f"  message: {result['message']}")
+    print("  validation_metrics:")
+    for key, value in _metric_items(metrics):
+        print(f"    {key}: {_fmt(value)}")
+
+
+def run_case(args: argparse.Namespace) -> int:
+    try:
+        overrides = [parse_override(spec) for spec in args.overrides]
+    except argparse.ArgumentTypeError as exc:
+        print(f"ERROR: {exc}", file=sys.stderr)
+        return 2
+    scenario = build_case(args.scenario, overrides)
+
+    vial = scenario["vial"]
+    product = scenario["product"]
+    ht = scenario["ht"]
+    eq_cap = scenario["eq_cap"]
+    n_vial = scenario.get("nVial", 400)
+
+    print("Single-case benchmark")
+    print(f"  task: {args.task}")
+    print(f"  scenario: {args.scenario}")
+    print(f"  method: {args.method}")
+    print(f"  n_elements: {args.n_elements}")
+    if args.method == "colloc":
+        print(f"  n_collocation: {args.n_collocation}")
+        print(f"  effective_nfe: {not args.raw_colloc}")
+    print(f"  warmstart: {args.warmstart}")
+    print(f"  tee: {args.tee}")
+    print(f"  tsh_ramp: {_fmt(args.tsh_ramp)}")
+    print(f"  pch_ramp: {_fmt(args.pch_ramp)}")
+    if overrides:
+        print("  overrides:")
+        for path, value in overrides:
+            print(f"    {path}: {_fmt(value)}")
+
+    scipy_result = scipy_adapter(
+        args.task,
+        vial,
+        product,
+        ht,
+        eq_cap,
+        n_vial,
+        scenario,
+        dt=args.dt,
+    )
+    scipy_metrics = _compute_metrics(
+        scipy_result,
+        scipy_metric_kwargs(args, product),
+    )
+    print_summary("SciPy", scipy_result, scipy_metrics)
+
+    try:
+        pyomo_result = pyomo_adapter(
+            args.task,
+            vial,
+            product,
+            ht,
+            eq_cap,
+            n_vial,
+            scenario,
+            dt=args.dt,
+            warmstart=args.warmstart,
+            method=args.method,
+            n_elements=args.n_elements,
+            n_collocation=args.n_collocation,
+            effective_nfe=(not args.raw_colloc),
+            tsh_ramp_rate=args.tsh_ramp,
+            pch_ramp_rate=args.pch_ramp,
+            use_secant_ramp_constraints=(not args.no_secant_constraints),
+            solver_cpu_time=args.solver_timeout,
+            solver_wall_time=args.solver_wall_time,
+            tee=args.tee,
+        )
+    except RuntimeError as exc:
+        print("\nPyomo summary")
+        print("  success: False")
+        print(f"  message: {exc}", file=sys.stderr)
+        return 1
+
+    pyomo_metrics = _compute_metrics(
+        pyomo_result,
+        pyomo_metric_kwargs(args, product),
+    )
+    print_summary("Pyomo", pyomo_result, pyomo_metrics)
+
+    failed = (
+        (not scipy_result["success"])
+        or (not pyomo_result["success"])
+        or metrics_failed(scipy_metrics)
+        or metrics_failed(pyomo_metrics)
+    )
+    return 1 if failed else 0
+
+
+def build_parser() -> argparse.ArgumentParser:
+    parser = argparse.ArgumentParser(
+        prog="run_single_case",
+        description="Run one SciPy/Pyomo benchmark case without writing artifacts.",
+    )
+    parser.add_argument(
+        "--task",
+        required=True,
+        choices=["Tsh", "Pch", "both"],
+        help="Optimization task variant.",
+    )
+    parser.add_argument(
+        "--scenario",
+        default="baseline",
+        choices=sorted(SCENARIOS),
+        help="Scenario name from benchmarks.scenarios.",
+    )
+    parser.add_argument(
+        "--set",
+        "--override",
+        dest="overrides",
+        action="append",
+        default=[],
+        metavar="PATH=VALUE",
+        help="Single parameter override using dotted scenario paths; repeatable.",
+    )
+    parser.add_argument(
+        "--method",
+        default="fd",
+        choices=["fd", "colloc"],
+        help="Pyomo discretization method.",
+    )
+    parser.add_argument(
+        "--n-elements",
+        type=int,
+        default=24,
+        help="Number of finite elements.",
+    )
+    parser.add_argument(
+        "--n-collocation",
+        type=int,
+        default=3,
+        help="Collocation points per element.",
+    )
+    parser.add_argument(
+        "--raw-colloc",
+        action="store_true",
+        help="Disable effective-nfe parity reporting for collocation.",
+    )
+    parser.add_argument(
+        "--warmstart",
+        action="store_true",
+        help="Enable scipy warmstart for the Pyomo solve.",
+    )
+    parser.add_argument(
+        "--dt",
+        type=float,
+        default=0.01,
+        help="Time step for scipy baseline trajectory.",
+    )
+    parser.add_argument(
+        "--tsh-ramp",
+        type=float,
+        default=None,
+        help="Max shelf temperature ramp rate [C/hr] for Pyomo and validation.",
+    )
+    parser.add_argument(
+        "--pch-ramp",
+        type=float,
+        default=None,
+        help="Max chamber pressure ramp rate [Torr/hr] for Pyomo and validation.",
+    )
+    parser.add_argument(
+        "--no-secant-constraints",
+        action="store_true",
+        help="Disable explicit secant slope constraints for collocation ramp rates.",
+    )
+    parser.add_argument(
+        "--solver-timeout",
+        type=float,
+        default=None,
+        help="Optional IPOPT CPU-time limit in seconds.",
+    )
+    parser.add_argument(
+        "--solver-wall-time",
+        type=float,
+        default=None,
+        help="Optional IPOPT wall-clock time limit in seconds when supported.",
+    )
+    parser.add_argument(
+        "--tee",
+        "--solver-verbose",
+        dest="tee",
+        action="store_true",
+        help="Show solver output from Pyomo/IPOPT.",
+    )
+    return parser
+
+
+def main(argv: list[str] | None = None) -> int:
+    parser = build_parser()
+    args = parser.parse_args(argv)
+    return run_case(args)
+
+
+if __name__ == "__main__":  # pragma: no cover
+    raise SystemExit(main())
diff --git a/tests/test_benchmarks.py b/tests/test_benchmarks.py
index 21421e0..744850f 100644
--- a/tests/test_benchmarks.py
+++ b/tests/test_benchmarks.py
@@ -8,7 +8,7 @@
 
 import numpy as np
 import pytest
-from benchmarks import adapters
+from benchmarks import adapters, run_single_case
 from benchmarks.grid_cli import (
     build_parser,
     metrics_failed,
@@ -44,6 +44,188 @@ def test_grid_cli_script_entrypoint_help(repo_root, tmp_path):
     assert "Benchmark generation CLI" in result.stdout
 
 
+def test_single_case_script_entrypoint_help(repo_root, tmp_path):
+    env = os.environ.copy()
+    env["MPLCONFIGDIR"] = str(tmp_path / "mpl")
+
+    result = subprocess.run(
+        [sys.executable, "benchmarks/run_single_case.py", "--help"],
+        cwd=repo_root,
+        env=env,
+        capture_output=True,
+        text=True,
+        check=False,
+    )
+
+    assert result.returncode == 0, result.stderr
+    assert "Run one SciPy/Pyomo benchmark case" in result.stdout
+    assert "--set" in result.stdout
+    assert "--tee" in result.stdout
+
+
+def test_single_case_parser_accepts_debug_options():
+    parser = run_single_case.build_parser()
+
+    args = parser.parse_args(
+        [
+            "--task",
+            "both",
+            "--scenario",
+            "baseline",
+            "--set",
+            "product.A1=20",
+            "--set",
+            "ht.KC=4e-4",
+            "--method",
+            "colloc",
+            "--n-elements",
+            "8",
+            "--n-collocation",
+            "2",
+            "--raw-colloc",
+            "--warmstart",
+            "--tsh-ramp",
+            "40",
+            "--pch-ramp",
+            "0.05",
+            "--no-secant-constraints",
+            "--solver-timeout",
+            "12.5",
+            "--solver-wall-time",
+            "30",
+            "--tee",
+        ]
+    )
+
+    assert args.task == "both"
+    assert args.overrides == ["product.A1=20", "ht.KC=4e-4"]
+    assert args.method == "colloc"
+    assert args.raw_colloc is True
+    assert args.warmstart is True
+    assert args.tsh_ramp == 40.0
+    assert args.pch_ramp == 0.05
+    assert args.no_secant_constraints is True
+    assert args.solver_timeout == 12.5
+    assert args.solver_wall_time == 30.0
+    assert args.tee is True
+
+
+def test_single_case_runner_uses_current_adapters(monkeypatch, capsys):
+    fake_traj = np.array(
+        [
+            [0.0, -30.0, -25.0, -20.0, 100.0, 0.2, 0.0],
+            [1.0, -29.0, -25.0, -15.0, 150.0, 0.1, 99.0],
+        ]
+    )
+    seen = {}
+
+    def fake_scipy_adapter(task, vial, product, ht, eq_cap, nVial, scenario, dt):
+        seen["scipy"] = {
+            "task": task,
+            "product_A1": product["A1"],
+            "ht_KC": ht["KC"],
+            "nVial": nVial,
+            "dt": dt,
+        }
+        return {
+            "trajectory": fake_traj,
+            "success": True,
+            "message": "scipy ok",
+            "wall_time_s": 0.01,
+            "objective_time_hr": 1.0,
+            "solver": {"status": "n/a", "termination_condition": "n/a"},
+        }
+
+    def fake_pyomo_adapter(
+        task,
+        vial,
+        product,
+        ht,
+        eq_cap,
+        nVial,
+        scenario,
+        **kwargs,
+    ):
+        seen["pyomo"] = {
+            "task": task,
+            "product_A1": product["A1"],
+            "ht_KC": ht["KC"],
+            "nVial": nVial,
+            **kwargs,
+        }
+        return {
+            "trajectory": fake_traj,
+            "success": True,
+            "message": "pyomo ok",
+            "wall_time_s": 0.02,
+            "objective_time_hr": 1.0,
+            "solver": {"status": "ok", "termination_condition": "optimal"},
+            "warmstart_used": kwargs["warmstart"],
+            "discretization": {"method": kwargs["method"]},
+        }
+
+    monkeypatch.setattr(run_single_case, "scipy_adapter", fake_scipy_adapter)
+    monkeypatch.setattr(run_single_case, "pyomo_adapter", fake_pyomo_adapter)
+
+    exit_code = run_single_case.main(
+        [
+            "--task",
+            "both",
+            "--scenario",
+            "baseline",
+            "--set",
+            "product.A1=20",
+            "--set",
+            "ht.KC=4e-4",
+            "--method",
+            "colloc",
+            "--n-elements",
+            "8",
+            "--n-collocation",
+            "2",
+            "--raw-colloc",
+            "--warmstart",
+            "--tsh-ramp",
+            "40",
+            "--pch-ramp",
+            "0.05",
+            "--no-secant-constraints",
+            "--solver-timeout",
+            "12.5",
+            "--solver-wall-time",
+            "30",
+            "--tee",
+        ]
+    )
+
+    output = capsys.readouterr().out
+
+    assert exit_code == 0
+    assert seen["scipy"] == {
+        "task": "both",
+        "product_A1": 20,
+        "ht_KC": 4e-4,
+        "nVial": 400,
+        "dt": 0.01,
+    }
+    assert seen["pyomo"]["method"] == "colloc"
+    assert seen["pyomo"]["n_elements"] == 8
+    assert seen["pyomo"]["n_collocation"] == 2
+    assert seen["pyomo"]["effective_nfe"] is False
+    assert seen["pyomo"]["warmstart"] is True
+    assert seen["pyomo"]["tsh_ramp_rate"] == 40.0
+    assert seen["pyomo"]["pch_ramp_rate"] == 0.05
+    assert seen["pyomo"]["use_secant_ramp_constraints"] is False
+    assert seen["pyomo"]["solver_cpu_time"] == 12.5
+    assert seen["pyomo"]["solver_wall_time"] == 30.0
+    assert seen["pyomo"]["tee"] is True
+    assert "SciPy summary" in output
+    assert "Pyomo summary" in output
+    assert "objective_time_hr: 1" in output
+    assert "termination_condition: optimal" in output
+    assert "trajectory_size: 2 points x 7 columns" in output
+
+
 def test_benchmark_results_tracks_only_policy_files(repo_root):
     result = subprocess.run(
         ["git", "ls-files", "benchmarks/results"],
@@ -350,10 +532,12 @@ def fake_optimizer(*args, **kwargs):
         baseline,
         solver_cpu_time=12.5,
         solver_wall_time=30.0,
+        tee=True,
     )
 
     assert seen["solver_cpu_time"] == 12.5
     assert seen["solver_wall_time"] == 30.0
+    assert seen["tee"] is True
     assert result["solver"]["max_cpu_time_s"] == 12.5
     assert result["solver"]["max_wall_time_s"] == 30.0
     assert result["solver"]["timeout_options"] == {