Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions assayer/cli/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,7 @@ def cli() -> None:
@click.option(
"--judge-criteria", default=None, help="Comma-separated evaluation criteria."
)
@click.option("--timeout", type=float, default=30.0, help="Per-model timeout in seconds (default: 30).")
def run(
prompt: str | None,
models: str,
Expand All @@ -73,6 +74,7 @@ def run(
score: bool,
judge: str | None,
judge_criteria: str | None,
timeout: float,
) -> None:
if prompt_file:
with open(prompt_file) as f:
Expand Down Expand Up @@ -113,6 +115,7 @@ def run(
system=system,
temperature=temperature,
max_tokens=max_tokens,
timeout=timeout,
)
)
similarity = compute_similarity(results) if score else None
Expand Down
15 changes: 13 additions & 2 deletions assayer/exporter.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,16 @@

from assayer.models import ModelResult

_FIELDS = [
"model",
"output",
"tokens_input",
"tokens_output",
"latency_seconds",
"cost_usd",
"error",
]


def _to_dict(result: ModelResult) -> dict:
return {
Expand All @@ -23,9 +33,10 @@ def export(results: list[ModelResult], path: str) -> None:

if dest.suffix.lower() == ".csv":
with dest.open("w", newline="", encoding="utf-8") as f:
writer = csv.DictWriter(f, fieldnames=list(records[0].keys()))
writer = csv.DictWriter(f, fieldnames=_FIELDS)
writer.writeheader()
writer.writerows(records)
if records:
writer.writerows(records)
else:
dest.write_text(
json.dumps(records, indent=2, ensure_ascii=False), encoding="utf-8"
Expand Down
13 changes: 6 additions & 7 deletions assayer/runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,6 @@
from assayer.providers.openai import OpenAIProvider


_TIMEOUT = 30.0


def _make_provider(model: str) -> BaseProvider:
if model.startswith("ollama/"):
return OllamaProvider(model)
Expand All @@ -27,6 +24,7 @@ async def _run_one(
system: str | None,
temperature: float | None,
max_tokens: int | None,
timeout: float = 30.0,
) -> ModelResult:
provider = _make_provider(model)
try:
Expand All @@ -37,17 +35,17 @@ async def _run_one(
temperature=temperature,
max_tokens=max_tokens,
),
timeout=_TIMEOUT,
timeout=timeout,
)
except TimeoutError:
return ModelResult(
model=model,
output="",
tokens_input=0,
tokens_output=0,
latency_seconds=30.0,
latency_seconds=timeout,
cost_usd=0.0,
error="Request timed out after 30 seconds",
error=f"Request timed out after {timeout} seconds",
)


Expand All @@ -57,9 +55,10 @@ async def run_all(
system: str | None = None,
temperature: float | None = None,
max_tokens: int | None = None,
timeout: float = 30.0,
) -> list[ModelResult]:
tasks = [
_run_one(model, prompt, system, temperature, max_tokens)
_run_one(model, prompt, system, temperature, max_tokens, timeout)
for model in models
]
return list(await asyncio.gather(*tasks))
23 changes: 23 additions & 0 deletions tests/test_exporter.py
Original file line number Diff line number Diff line change
Expand Up @@ -109,3 +109,26 @@ def test_export_unknown_extension_writes_json(tmp_path):

data = json.loads(path.read_text(encoding="utf-8"))
assert len(data) == 2

def test_export_csv_empty_results_does_not_crash(tmp_path):
path = tmp_path / "results.csv"
export([], str(path)) # should not raise


def test_export_csv_empty_results_has_header(tmp_path):
path = tmp_path / "results.csv"
export([], str(path))

with path.open(encoding="utf-8") as f:
reader = csv.DictReader(f)
assert set(reader.fieldnames) == _EXPECTED_FIELDS
rows = list(reader)
assert rows == []


def test_export_json_empty_results(tmp_path):
path = tmp_path / "results.json"
export([], str(path))

data = json.loads(path.read_text(encoding="utf-8"))
assert data == []
3 changes: 1 addition & 2 deletions tests/test_runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -116,9 +116,8 @@ async def _slow_run(self, prompt, **kwargs):
)

monkeypatch.setattr(OpenAIProvider, "run", _slow_run)
monkeypatch.setattr("assayer.runner._TIMEOUT", 0.05)

result = await _run_one("gpt-4o-mini", "test", None, None, None)
result = await _run_one("gpt-4o-mini", "test", None, None, None, timeout=0.05)

assert result.error is not None
assert "timed out" in result.error
Loading