diff --git a/eval_protocol/benchmarks/test_aime25.py b/eval_protocol/benchmarks/test_aime25.py index 6eb785a7..5e1d14e0 100644 --- a/eval_protocol/benchmarks/test_aime25.py +++ b/eval_protocol/benchmarks/test_aime25.py @@ -91,7 +91,6 @@ def aime2025_dataset_adapter(rows: List[Dict[str, Any]]) -> List[EvaluationRow]: aggregation_method="mean", passed_threshold=0.8, num_runs=8, - max_dataset_rows=2, max_concurrent_rollouts=4, mode="pointwise", )