diff --git a/assayer/cli/main.py b/assayer/cli/main.py index abe2db6..9cb91c1 100644 --- a/assayer/cli/main.py +++ b/assayer/cli/main.py @@ -61,6 +61,7 @@ def cli() -> None: @click.option( "--judge-criteria", default=None, help="Comma-separated evaluation criteria." ) +@click.option("--timeout", type=float, default=30.0, help="Per-model timeout in seconds (default: 30).") def run( prompt: str | None, models: str, @@ -73,6 +74,7 @@ def run( score: bool, judge: str | None, judge_criteria: str | None, + timeout: float, ) -> None: if prompt_file: with open(prompt_file) as f: @@ -113,6 +115,7 @@ def run( system=system, temperature=temperature, max_tokens=max_tokens, + timeout=timeout, ) ) similarity = compute_similarity(results) if score else None diff --git a/assayer/exporter.py b/assayer/exporter.py index a3395ff..c1eef6f 100644 --- a/assayer/exporter.py +++ b/assayer/exporter.py @@ -4,6 +4,16 @@ from assayer.models import ModelResult +_FIELDS = [ + "model", + "output", + "tokens_input", + "tokens_output", + "latency_seconds", + "cost_usd", + "error", +] + def _to_dict(result: ModelResult) -> dict: return { @@ -23,9 +33,10 @@ def export(results: list[ModelResult], path: str) -> None: if dest.suffix.lower() == ".csv": with dest.open("w", newline="", encoding="utf-8") as f: - writer = csv.DictWriter(f, fieldnames=list(records[0].keys())) + writer = csv.DictWriter(f, fieldnames=_FIELDS) writer.writeheader() - writer.writerows(records) + if records: + writer.writerows(records) else: dest.write_text( json.dumps(records, indent=2, ensure_ascii=False), encoding="utf-8" diff --git a/assayer/runner.py b/assayer/runner.py index d052199..1d28cd5 100644 --- a/assayer/runner.py +++ b/assayer/runner.py @@ -8,9 +8,6 @@ from assayer.providers.openai import OpenAIProvider -_TIMEOUT = 30.0 - - def _make_provider(model: str) -> BaseProvider: if model.startswith("ollama/"): return OllamaProvider(model) @@ -27,6 +24,7 @@ async def _run_one( system: str | None, temperature: float | None, max_tokens: int | None, + timeout: float = 30.0, ) -> ModelResult: provider = _make_provider(model) try: @@ -37,7 +35,7 @@ async def _run_one( temperature=temperature, max_tokens=max_tokens, ), - timeout=_TIMEOUT, + timeout=timeout, ) except TimeoutError: return ModelResult( @@ -45,9 +43,9 @@ async def _run_one( output="", tokens_input=0, tokens_output=0, - latency_seconds=30.0, + latency_seconds=timeout, cost_usd=0.0, - error="Request timed out after 30 seconds", + error=f"Request timed out after {timeout} seconds", ) @@ -57,9 +55,10 @@ async def run_all( system: str | None = None, temperature: float | None = None, max_tokens: int | None = None, + timeout: float = 30.0, ) -> list[ModelResult]: tasks = [ - _run_one(model, prompt, system, temperature, max_tokens) + _run_one(model, prompt, system, temperature, max_tokens, timeout) for model in models ] return list(await asyncio.gather(*tasks)) diff --git a/tests/test_exporter.py b/tests/test_exporter.py index e82f48b..ca539b0 100644 --- a/tests/test_exporter.py +++ b/tests/test_exporter.py @@ -109,3 +109,26 @@ def test_export_unknown_extension_writes_json(tmp_path): data = json.loads(path.read_text(encoding="utf-8")) assert len(data) == 2 + +def test_export_csv_empty_results_does_not_crash(tmp_path): + path = tmp_path / "results.csv" + export([], str(path)) # should not raise + + +def test_export_csv_empty_results_has_header(tmp_path): + path = tmp_path / "results.csv" + export([], str(path)) + + with path.open(encoding="utf-8") as f: + reader = csv.DictReader(f) + assert set(reader.fieldnames) == _EXPECTED_FIELDS + rows = list(reader) + assert rows == [] + + +def test_export_json_empty_results(tmp_path): + path = tmp_path / "results.json" + export([], str(path)) + + data = json.loads(path.read_text(encoding="utf-8")) + assert data == [] diff --git a/tests/test_runner.py b/tests/test_runner.py index cb0ffa9..5332467 100644 --- a/tests/test_runner.py +++ b/tests/test_runner.py @@ -116,9 +116,8 @@ async def _slow_run(self, prompt, **kwargs): ) monkeypatch.setattr(OpenAIProvider, "run", _slow_run) - monkeypatch.setattr("assayer.runner._TIMEOUT", 0.05) - result = await _run_one("gpt-4o-mini", "test", None, None, None) + result = await _run_one("gpt-4o-mini", "test", None, None, None, timeout=0.05) assert result.error is not None assert "timed out" in result.error