tenzir · mavam · Jan 30, 2026 · Jan 30, 2026 · Jan 30, 2026 · Jan 30, 2026
diff --git a/.claude/settings.json b/.claude/settings.json
@@ -1,7 +1,4 @@
 {
-  "env": {
-    "CLAUDE_CODE_TASK_LIST_ID": "tenzir-test"
-  },
   "extraKnownMarketplaces": {
     "tenzir": {
       "source": {

diff --git a/changelog/unreleased/add-pre-compare-transforms-for-non-deterministic-output.md b/changelog/unreleased/add-pre-compare-transforms-for-non-deterministic-output.md
@@ -0,0 +1,19 @@
+---
+title: Add pre-compare transforms for non-deterministic output
+type: feature
+authors:
+  - mavam
+  - claude
+created: 2026-01-30T20:46:00.000000Z
+---
+
+The test framework now supports pre-compare transforms that normalize output before comparison with baselines. This helps handle tests with non-deterministic output like unordered result sets from hash-based aggregations or parallel operations.
+
+Configure the `pre-compare` option in `test.yaml` or per-test frontmatter to apply transforms to both actual output and baselines before comparison:
+
+```yaml
+# Sort output lines for comparison (baseline stays unchanged)
+pre-compare: sort
+```
+
+The `sort` transform sorts output lines lexicographically, making it easy to handle unordered results. Transforms only affect comparison—baseline files remain untransformed on disk, and `--update` continues to store original output.
diff --git a/example-project/tests/pre-compare-sort.sh b/example-project/tests/pre-compare-sort.sh
@@ -0,0 +1,11 @@
+#!/usr/bin/env bash
+# pre-compare: sort
+
+# Demonstrate pre-compare transform for handling non-deterministic output.
+# This test produces lines in random order, but the sort transform ensures
+# comparison succeeds against a sorted baseline.
+
+echo "zebra"
+echo "alpha"
+echo "charlie"
+echo "bravo"
diff --git a/example-project/tests/pre-compare-sort.txt b/example-project/tests/pre-compare-sort.txt
@@ -0,0 +1,4 @@
+alpha
+bravo
+charlie
+zebra
diff --git a/src/tenzir_test/run.py b/src/tenzir_test/run.py
@@ -1201,6 +1201,7 @@ def _default_test_config() -> TestConfig:
         "retry": 1,
         "suite": None,
         "package_dirs": tuple(),
+        "pre_compare": tuple(),
     }
 
 
@@ -1209,13 +1210,17 @@ def _canonical_config_key(key: str) -> str:
         return "fixtures"
     if key in {"package_dirs", "package-dirs"}:
         return "package_dirs"
+    if key in {"pre_compare", "pre-compare"}:
+        return "pre_compare"
     return key
 
 
 ConfigOrigin = Literal["directory", "frontmatter"]
 
 
-def _raise_config_error(location: Path | str, message: str, line_number: int | None = None) -> None:
+def _raise_config_error(
+    location: Path | str, message: str, line_number: int | None = None
+) -> typing.NoReturn:
     base = str(location)
     if line_number is not None:
         base = f"{base}:{line_number}"
@@ -1246,7 +1251,6 @@ def _normalize_fixtures_value(
             f"Invalid value for 'fixtures', expected string or list, got '{value}'",
             line_number,
         )
-        return tuple()
 
     fixtures: list[str] = []
     for entry in raw:
@@ -1307,7 +1311,6 @@ def _normalize_inputs_value(
         f"Invalid value for 'inputs', expected string, got '{value}'",
         line_number,
     )
-    return None
 
 
 def _normalize_package_dirs_value(
@@ -1324,7 +1327,6 @@ def _normalize_package_dirs_value(
             f"Invalid value for 'package-dirs', expected list of strings, got '{value}'",
             line_number,
         )
-        return tuple()
     base_dir = _extract_location_path(location).parent
     normalized: list[str] = []
     for entry in value:
@@ -1334,15 +1336,13 @@ def _normalize_package_dirs_value(
                 f"Invalid package-dirs entry '{entry}', expected string",
                 line_number,
             )
-            continue
         raw = os.fspath(entry).strip()
         if not raw:
             _raise_config_error(
                 location,
                 "Invalid package-dirs entry: must be non-empty string",
                 line_number,
             )
-            continue
         path = Path(raw)
         if not path.is_absolute():
             path = base_dir / path
@@ -1354,6 +1354,58 @@ def _normalize_package_dirs_value(
     return tuple(normalized)
 
 
+def _normalize_pre_compare_value(
+    value: typing.Any,
+    *,
+    location: Path | str,
+    line_number: int | None = None,
+) -> tuple[str, ...]:
+    entries: typing.Any
+    if isinstance(value, list):
+        entries = value
+    elif isinstance(value, str):
+        try:
+            parsed = yaml.safe_load(value)
+        except yaml.YAMLError:
+            parsed = None
+        if isinstance(parsed, list):
+            entries = parsed
+        else:
+            entries = [value]
+    else:
+        _raise_config_error(
+            location,
+            f"Invalid value for 'pre-compare', expected string or list, got '{value}'",
+            line_number,
+        )
+
+    transforms: list[str] = []
+    valid_names = set(_TRANSFORMS.keys())
+    for entry in entries:
+        if not isinstance(entry, str):
+            _raise_config_error(
+                location,
+                f"Invalid pre-compare entry '{entry}', expected string",
+                line_number,
+            )
+        name = entry.strip()
+        if not name:
+            _raise_config_error(
+                location,
+                "Pre-compare transform names must be non-empty strings",
+                line_number,
+            )
+        if name not in valid_names:
+            valid_list = ", ".join(sorted(valid_names))
+            _raise_config_error(
+                location,
+                f"Unknown pre-compare transform '{name}', valid transforms: {valid_list}",
+                line_number,
+            )
+        transforms.append(name)
+    return tuple(transforms)
+
+
 def _assign_config_option(
     config: TestConfig,
     key: str,
@@ -1373,6 +1425,7 @@ def _assign_config_option(
         "inputs",
         "retry",
         "package_dirs",
+        "pre_compare",
     }
     if origin == "directory":
         valid_keys.add("suite")
@@ -1419,7 +1472,6 @@ def _assign_config_option(
             f"Invalid value for '{canonical}', expected 'true' or 'false', got '{value}'",
             line_number,
         )
-        return
 
     if canonical == "timeout":
         if isinstance(value, int):
@@ -1432,7 +1484,6 @@ def _assign_config_option(
                 f"Invalid value for 'timeout', expected integer, got '{value}'",
                 line_number,
             )
-            return
         if timeout_value <= 0:
             _raise_config_error(
                 location,
@@ -1488,7 +1539,6 @@ def _assign_config_option(
                 f"Invalid value for 'retry', expected integer, got '{value}'",
                 line_number,
             )
-            return
         if retry_value <= 0:
             _raise_config_error(
                 location,
@@ -1498,6 +1548,11 @@ def _assign_config_option(
         config[canonical] = retry_value
         return
 
+    if canonical == "pre_compare":
+        transforms = _normalize_pre_compare_value(value, location=location, line_number=line_number)
+        config[canonical] = transforms
+        return
+
     if canonical == "runner":
         if not isinstance(value, str):
             _raise_config_error(
@@ -2798,6 +2853,43 @@ def _format_lines_changed(total: int) -> str:
     return f"{_BLOCK_INDENT}└ {total} {line} changed"
 
 
+def _transform_sort(output: bytes) -> bytes:
+    """Sort output lines lexicographically.
+
+    Uses surrogateescape to preserve undecodable bytes as surrogate escapes,
+    allowing the transform to handle binary data gracefully.
+    """
+    if not output:
+        return output
+    has_trailing_newline = output.endswith(b"\n")
+    text = output.decode("utf-8", errors="surrogateescape")
+    lines = text.splitlines(keepends=False)
+    sorted_lines = sorted(lines)
+    result = "\n".join(sorted_lines)
+    if has_trailing_newline:
+        result += "\n"
+    return result.encode("utf-8", errors="surrogateescape")
+
+
+# Transforms are intentionally simple and hardcoded rather than using the plugin
+# architecture (like runners and fixtures). Rationale:
+# - Transforms are core comparison utilities, not user-extensible features
+# - Currently only one transform exists; extensibility can be added if needed
+# - Pre-compare transforms are rarely customized per-project compared to runners
+# - If custom transforms become necessary, this can be refactored to use a
+#   registry pattern similar to runners/__init__.py
+_TRANSFORMS: dict[str, typing.Callable[[bytes], bytes]] = {
+    "sort": _transform_sort,
+}
+
+
+def apply_pre_compare(output: bytes, transforms: tuple[str, ...]) -> bytes:
+    """Apply pre-compare transforms in order."""
+    for name in transforms:
+        output = _TRANSFORMS[name](output)
+    return output
+
+
 def print_diff(expected: bytes, actual: bytes, path: Path) -> None:
     if should_suppress_failure_output():
         return
@@ -3041,12 +3133,15 @@ def run_simple_test(
             return False
         log_comparison(test, ref_path, mode="comparing")
         expected = ref_path.read_bytes()
-        if expected != output:
+        pre_compare = cast(tuple[str, ...], test_config.get("pre_compare", tuple()))
+        expected_transformed = apply_pre_compare(expected, pre_compare)
+        output_transformed = apply_pre_compare(output, pre_compare)
+        if expected_transformed != output_transformed:
             if interrupt_requested():
                 report_interrupted_test(test)
             else:
                 report_failure(test, "")
-                print_diff(expected, output, ref_path)
+                print_diff(expected_transformed, output_transformed, ref_path)
             return False
     success(test)
     return True

diff --git a/src/tenzir_test/runners/custom_python_fixture_runner.py b/src/tenzir_test/runners/custom_python_fixture_runner.py
@@ -135,12 +135,17 @@ def run(self, test: Path, update: bool, coverage: bool = False) -> bool:
                         return False
                     run_mod.log_comparison(test, ref_path, mode="comparing")
                     expected = ref_path.read_bytes()
-                    if expected != output:
+                    pre_compare = typing.cast(
+                        tuple[str, ...], test_config.get("pre_compare", tuple())
+                    )
+                    expected_transformed = run_mod.apply_pre_compare(expected, pre_compare)
+                    output_transformed = run_mod.apply_pre_compare(output, pre_compare)
+                    if expected_transformed != output_transformed:
                         if run_mod.interrupt_requested():
                             run_mod.report_interrupted_test(test)
                         else:
                             run_mod.report_failure(test, "")
-                            run_mod.print_diff(expected, output, ref_path)
+                            run_mod.print_diff(expected_transformed, output_transformed, ref_path)
                         return False
             finally:
                 fixture_api.pop_context(context_token)

diff --git a/src/tenzir_test/runners/diff_runner.py b/src/tenzir_test/runners/diff_runner.py
@@ -68,6 +68,7 @@ def run(self, test: Path, update: bool, coverage: bool = False) -> bool | str:
                     raise RuntimeError("TENZIR_BINARY must be configured for diff runners")
                 base_cmd: list[str] = [*binary, *config_args]
 
+                coverage_dir = ""
                 if coverage:
                     coverage_dir = env.get(
                         "CMAKE_COVERAGE_OUTPUT_DIRECTORY",
@@ -111,6 +112,7 @@ def run(self, test: Path, update: bool, coverage: bool = False) -> bool | str:
         root_bytes = str(run_mod.ROOT).encode() + b"/"
         unoptimized_stdout = unoptimized.stdout.replace(root_bytes, b"")
         optimized_stdout = optimized.stdout.replace(root_bytes, b"")
+        # Generate diff without transforms first
         diff_chunks = list(
             difflib.diff_bytes(
                 difflib.unified_diff,
@@ -130,12 +132,15 @@ def run(self, test: Path, update: bool, coverage: bool = False) -> bool | str:
             ref_path.write_bytes(diff_bytes)
         else:
             expected = ref_path.read_bytes()
-            if diff_bytes != expected:
+            pre_compare = typing.cast(tuple[str, ...], test_config.get("pre_compare", tuple()))
+            expected_transformed = run_mod.apply_pre_compare(expected, pre_compare)
+            actual_transformed = run_mod.apply_pre_compare(diff_bytes, pre_compare)
+            if actual_transformed != expected_transformed:
                 if run_mod.interrupt_requested():
                     run_mod.report_interrupted_test(test)
                 else:
                     run_mod.report_failure(test, "")
-                    run_mod.print_diff(expected, diff_bytes, ref_path)
+                    run_mod.print_diff(expected_transformed, actual_transformed, ref_path)
                 return False
         run_mod.success(test)
         return True

diff --git a/src/tenzir_test/runners/shell_runner.py b/src/tenzir_test/runners/shell_runner.py
@@ -125,6 +125,7 @@ def run(self, test: Path, update: bool, coverage: bool = False) -> bool:
             run_mod.success(test)
             return True
 
+        pre_compare = typing.cast(tuple[str, ...], test_config.get("pre_compare", tuple()))
         if combined_bytes:
             if not stdout_path.exists():
                 run_mod.report_failure(
@@ -134,22 +135,28 @@ def run(self, test: Path, update: bool, coverage: bool = False) -> bool:
                 return False
             run_mod.log_comparison(test, stdout_path, mode="comparing")
             expected_stdout = stdout_path.read_bytes()
-            if expected_stdout != combined_bytes:
+            expected_transformed = run_mod.apply_pre_compare(expected_stdout, pre_compare)
+            actual_transformed = run_mod.apply_pre_compare(combined_bytes, pre_compare)
+            if expected_transformed != actual_transformed:
                 if run_mod.interrupt_requested():
                     run_mod.report_interrupted_test(test)
                 else:
                     run_mod.report_failure(test, "")
-                    run_mod.print_diff(expected_stdout, combined_bytes, stdout_path)
+                    run_mod.print_diff(expected_transformed, actual_transformed, stdout_path)
                 return False
         elif stdout_path.exists():
             expected_stdout = stdout_path.read_bytes()
+            # Check if original baseline is empty before transformation
             if expected_stdout not in {b"", b"\n"}:
-                if run_mod.interrupt_requested():
-                    run_mod.report_interrupted_test(test)
-                else:
-                    run_mod.report_failure(test, "")
-                    run_mod.print_diff(expected_stdout, b"", stdout_path)
-                return False
+                expected_transformed = run_mod.apply_pre_compare(expected_stdout, pre_compare)
+                actual_transformed = run_mod.apply_pre_compare(b"", pre_compare)
+                if expected_transformed != actual_transformed:
+                    if run_mod.interrupt_requested():
+                        run_mod.report_interrupted_test(test)
+                    else:
+                        run_mod.report_failure(test, "")
+                        run_mod.print_diff(expected_transformed, actual_transformed, stdout_path)
+                    return False
 
         run_mod.success(test)
         return True
-Original file line number
+Diff line change
@@ -0,0 +1,4 @@
+    alpha
+    bravo
+    charlie
+    zebra