From 4001edc224dde0dfcfb55b77464746fce9929ed2 Mon Sep 17 00:00:00 2001 From: vuxuan2098-droid <> Date: Tue, 19 May 2026 01:16:00 +0700 Subject: [PATCH] fix: resolve IndexError in exporter and add timeout flag to run command Fixes #8: exporter.py now uses a fixed _FIELDS constant instead of deriving field names from records[0].keys(), preventing IndexError when results list is empty. Added guard so CSV writer skips rows when there are no records. Fixes #9: added --timeout option to the run CLI command (default 30s). Timeout is now passed through run_all() and _run_one() instead of using a hardcoded module-level _TIMEOUT constant. Updated tests: added 3 new tests for empty-results export, updated test_run_one_records_timeout to pass timeout directly. --- assayer/cli/main.py | 3 +++ assayer/exporter.py | 15 +++++++++++++-- assayer/runner.py | 13 ++++++------- tests/test_exporter.py | 23 +++++++++++++++++++++++ tests/test_runner.py | 3 +-- 5 files changed, 46 insertions(+), 11 deletions(-) diff --git a/assayer/cli/main.py b/assayer/cli/main.py index abe2db6..9cb91c1 100644 --- a/assayer/cli/main.py +++ b/assayer/cli/main.py @@ -61,6 +61,7 @@ def cli() -> None: @click.option( "--judge-criteria", default=None, help="Comma-separated evaluation criteria." ) +@click.option("--timeout", type=float, default=30.0, help="Per-model timeout in seconds (default: 30).") def run( prompt: str | None, models: str, @@ -73,6 +74,7 @@ def run( score: bool, judge: str | None, judge_criteria: str | None, + timeout: float, ) -> None: if prompt_file: with open(prompt_file) as f: @@ -113,6 +115,7 @@ def run( system=system, temperature=temperature, max_tokens=max_tokens, + timeout=timeout, ) ) similarity = compute_similarity(results) if score else None diff --git a/assayer/exporter.py b/assayer/exporter.py index a3395ff..c1eef6f 100644 --- a/assayer/exporter.py +++ b/assayer/exporter.py @@ -4,6 +4,16 @@ from assayer.models import ModelResult +_FIELDS = [ + "model", + "output", + "tokens_input", + "tokens_output", + "latency_seconds", + "cost_usd", + "error", +] + def _to_dict(result: ModelResult) -> dict: return { @@ -23,9 +33,10 @@ def export(results: list[ModelResult], path: str) -> None: if dest.suffix.lower() == ".csv": with dest.open("w", newline="", encoding="utf-8") as f: - writer = csv.DictWriter(f, fieldnames=list(records[0].keys())) + writer = csv.DictWriter(f, fieldnames=_FIELDS) writer.writeheader() - writer.writerows(records) + if records: + writer.writerows(records) else: dest.write_text( json.dumps(records, indent=2, ensure_ascii=False), encoding="utf-8" diff --git a/assayer/runner.py b/assayer/runner.py index d052199..1d28cd5 100644 --- a/assayer/runner.py +++ b/assayer/runner.py @@ -8,9 +8,6 @@ from assayer.providers.openai import OpenAIProvider -_TIMEOUT = 30.0 - - def _make_provider(model: str) -> BaseProvider: if model.startswith("ollama/"): return OllamaProvider(model) @@ -27,6 +24,7 @@ async def _run_one( system: str | None, temperature: float | None, max_tokens: int | None, + timeout: float = 30.0, ) -> ModelResult: provider = _make_provider(model) try: @@ -37,7 +35,7 @@ async def _run_one( temperature=temperature, max_tokens=max_tokens, ), - timeout=_TIMEOUT, + timeout=timeout, ) except TimeoutError: return ModelResult( @@ -45,9 +43,9 @@ async def _run_one( output="", tokens_input=0, tokens_output=0, - latency_seconds=30.0, + latency_seconds=timeout, cost_usd=0.0, - error="Request timed out after 30 seconds", + error=f"Request timed out after {timeout} seconds", ) @@ -57,9 +55,10 @@ async def run_all( system: str | None = None, temperature: float | None = None, max_tokens: int | None = None, + timeout: float = 30.0, ) -> list[ModelResult]: tasks = [ - _run_one(model, prompt, system, temperature, max_tokens) + _run_one(model, prompt, system, temperature, max_tokens, timeout) for model in models ] return list(await asyncio.gather(*tasks)) diff --git a/tests/test_exporter.py b/tests/test_exporter.py index e82f48b..ca539b0 100644 --- a/tests/test_exporter.py +++ b/tests/test_exporter.py @@ -109,3 +109,26 @@ def test_export_unknown_extension_writes_json(tmp_path): data = json.loads(path.read_text(encoding="utf-8")) assert len(data) == 2 + +def test_export_csv_empty_results_does_not_crash(tmp_path): + path = tmp_path / "results.csv" + export([], str(path)) # should not raise + + +def test_export_csv_empty_results_has_header(tmp_path): + path = tmp_path / "results.csv" + export([], str(path)) + + with path.open(encoding="utf-8") as f: + reader = csv.DictReader(f) + assert set(reader.fieldnames) == _EXPECTED_FIELDS + rows = list(reader) + assert rows == [] + + +def test_export_json_empty_results(tmp_path): + path = tmp_path / "results.json" + export([], str(path)) + + data = json.loads(path.read_text(encoding="utf-8")) + assert data == [] diff --git a/tests/test_runner.py b/tests/test_runner.py index cb0ffa9..5332467 100644 --- a/tests/test_runner.py +++ b/tests/test_runner.py @@ -116,9 +116,8 @@ async def _slow_run(self, prompt, **kwargs): ) monkeypatch.setattr(OpenAIProvider, "run", _slow_run) - monkeypatch.setattr("assayer.runner._TIMEOUT", 0.05) - result = await _run_one("gpt-4o-mini", "test", None, None, None) + result = await _run_one("gpt-4o-mini", "test", None, None, None, timeout=0.05) assert result.error is not None assert "timed out" in result.error