From d95ed3b636c1e2cc088b697fe02ed8de5f623899 Mon Sep 17 00:00:00 2001
From: Ryan Hatter <25823361+RNHTTR@users.noreply.github.com>
Date: Fri, 23 Jan 2026 21:41:40 -0500
Subject: [PATCH 1/2] Fix bug in preprocess_fn codepath

When passing a preprocess_fn to the `@evaluation_test` decorator, it fails with the following error:

```
            if preprocess_fn:
>               if not data_loaders:
                       ^^^^^^^^^^^^
E               UnboundLocalError: cannot access local variable 'data_loaders' where it is not associated with a value

.venv/lib/python3.12/site-packages/eval_protocol/pytest/evaluation_test.py:350: UnboundLocalError
```

Python 3.12
eval-protocol==0.3.0
---
 eval_protocol/pytest/evaluation_test.py | 7 +------
 1 file changed, 1 insertion(+), 6 deletions(-)

diff --git a/eval_protocol/pytest/evaluation_test.py b/eval_protocol/pytest/evaluation_test.py
index 2256b35f..57816bbd 100644
--- a/eval_protocol/pytest/evaluation_test.py
+++ b/eval_protocol/pytest/evaluation_test.py
@@ -351,12 +351,7 @@ def _log_eval_error(status: Status, rows: list[EvaluationRow] | None, passed: bo
                     apply preprocess_fn.
                     """
                     if preprocess_fn:
-                        if not data_loaders:
-                            data = preprocess_fn(data)
-                        else:
-                            raise ValueError(
-                                "preprocess_fn should not be used with data_loaders. Pass preprocess_fn to data_loaders instead."
-                            )
+                        data = preprocess_fn(data)
 
                     for row in data:
                         # generate a stable row_id for each row

From 6bebd55ef8bb808572fa05ed729ec2ab87daf220 Mon Sep 17 00:00:00 2001
From: Benny Chen <youfychenbc5000@gmail.com>
Date: Sun, 8 Feb 2026 21:29:48 -0800
Subject: [PATCH 2/2] Avoid double preprocess with data loaders and add tests

---
 eval_protocol/pytest/evaluation_test.py       | 12 +--
 .../pytest/test_preprocess_fn_data_loaders.py | 92 +++++++++++++++++++
 2 files changed, 98 insertions(+), 6 deletions(-)
 create mode 100644 tests/pytest/test_preprocess_fn_data_loaders.py

diff --git a/eval_protocol/pytest/evaluation_test.py b/eval_protocol/pytest/evaluation_test.py
index 57816bbd..e70f8794 100644
--- a/eval_protocol/pytest/evaluation_test.py
+++ b/eval_protocol/pytest/evaluation_test.py
@@ -307,6 +307,7 @@ def _log_eval_error(status: Status, rows: list[EvaluationRow] | None, passed: bo
                 try:
                     # Handle dataset loading
                     data: list[EvaluationRow] = []
+                    data_loader_rows_preprocessed = False
                     # Track all rows processed in the current run for error logging
                     processed_rows_in_run: list[EvaluationRow] = []
                     if "data_loaders" in kwargs and kwargs["data_loaders"] is not None:
@@ -318,6 +319,7 @@ def _log_eval_error(status: Status, rows: list[EvaluationRow] | None, passed: bo
                             results = data_loader.load()
                             for result in results:
                                 data.extend(result.rows)
+                                data_loader_rows_preprocessed = data_loader_rows_preprocessed or result.preprocessed
                         # Apply max_dataset_rows limit to data from data loaders
                         if max_dataset_rows is not None:
                             data = data[:max_dataset_rows]
@@ -345,13 +347,11 @@ def _log_eval_error(status: Status, rows: list[EvaluationRow] | None, passed: bo
                     if filtered_row_ids is not None:
                         data = [row for row in data if row.input_metadata.row_id in filtered_row_ids]
 
-                    """
-                    data_loaders handles preprocess_fn internally so we want
-                    to specially handle data_loaders here so we don't double
-                    apply preprocess_fn.
-                    """
                     if preprocess_fn:
-                        data = preprocess_fn(data)
+                        # If data loaders already applied preprocessing, skip the decorator-level
+                        # preprocess_fn to avoid running the same transform twice.
+                        if not data_loader_rows_preprocessed:
+                            data = preprocess_fn(data)
 
                     for row in data:
                         # generate a stable row_id for each row
diff --git a/tests/pytest/test_preprocess_fn_data_loaders.py b/tests/pytest/test_preprocess_fn_data_loaders.py
new file mode 100644
index 00000000..60b91ebc
--- /dev/null
+++ b/tests/pytest/test_preprocess_fn_data_loaders.py
@@ -0,0 +1,92 @@
+from eval_protocol.data_loader import DynamicDataLoader
+from eval_protocol.dataset_logger.dataset_logger import DatasetLogger
+from eval_protocol.models import EvaluateResult, EvaluationRow, Message
+from eval_protocol.pytest import evaluation_test
+from eval_protocol.pytest.default_no_op_rollout_processor import NoOpRolloutProcessor
+
+
+class InMemoryLogger(DatasetLogger):
+    def log(self, row: EvaluationRow) -> None:
+        return None
+
+    def read(self) -> list[EvaluationRow]:
+        return []
+
+
+class StopAfterPreprocess(Exception):
+    pass
+
+
+class StopAfterPreprocessRolloutProcessor(NoOpRolloutProcessor):
+    def setup(self) -> None:
+        raise StopAfterPreprocess("Stop after preprocessing for focused test assertions")
+
+
+def _build_rows() -> list[EvaluationRow]:
+    return [
+        EvaluationRow(
+            messages=[
+                Message(role="user", content="question"),
+                Message(role="assistant", content="answer"),
+            ]
+        )
+    ]
+
+
+async def test_preprocess_fn_runs_with_data_loader_without_loader_preprocess():
+    call_count = {"decorator_preprocess": 0}
+
+    def decorator_preprocess(rows: list[EvaluationRow]) -> list[EvaluationRow]:
+        call_count["decorator_preprocess"] += 1
+        return rows
+
+    data_loader = DynamicDataLoader(generators=[_build_rows])
+
+    @evaluation_test(
+        data_loaders=data_loader,
+        preprocess_fn=decorator_preprocess,
+        rollout_processor=StopAfterPreprocessRolloutProcessor(),
+        logger=InMemoryLogger(),
+    )
+    def eval_fn(row: EvaluationRow) -> EvaluationRow:
+        row.evaluation_result = EvaluateResult(score=1.0, reason="ok")
+        return row
+
+    try:
+        await eval_fn(data_loaders=data_loader)
+    except StopAfterPreprocess:
+        pass
+
+    assert call_count["decorator_preprocess"] == 1
+
+
+async def test_preprocess_fn_not_double_applied_when_data_loader_preprocess_exists():
+    call_count = {"loader_preprocess": 0, "decorator_preprocess": 0}
+
+    def loader_preprocess(rows: list[EvaluationRow]) -> list[EvaluationRow]:
+        call_count["loader_preprocess"] += 1
+        return rows
+
+    def decorator_preprocess(rows: list[EvaluationRow]) -> list[EvaluationRow]:
+        call_count["decorator_preprocess"] += 1
+        return rows
+
+    data_loader = DynamicDataLoader(generators=[_build_rows], preprocess_fn=loader_preprocess)
+
+    @evaluation_test(
+        data_loaders=data_loader,
+        preprocess_fn=decorator_preprocess,
+        rollout_processor=StopAfterPreprocessRolloutProcessor(),
+        logger=InMemoryLogger(),
+    )
+    def eval_fn(row: EvaluationRow) -> EvaluationRow:
+        row.evaluation_result = EvaluateResult(score=1.0, reason="ok")
+        return row
+
+    try:
+        await eval_fn(data_loaders=data_loader)
+    except StopAfterPreprocess:
+        pass
+
+    assert call_count["loader_preprocess"] == 1
+    assert call_count["decorator_preprocess"] == 0