From 2471a592dcf07092333152db24357a358e85087b Mon Sep 17 00:00:00 2001
From: Matthias Vallentin <matthias@vallentin.net>
Date: Fri, 30 Jan 2026 21:56:32 +0100
Subject: [PATCH 1/6] Remove tasklist ID

This is now in the project-local settings file.
---
 .claude/settings.json | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/.claude/settings.json b/.claude/settings.json
index 974c086..97dd2ba 100644
--- a/.claude/settings.json
+++ b/.claude/settings.json
@@ -1,7 +1,4 @@
 {
-  "env": {
-    "CLAUDE_CODE_TASK_LIST_ID": "tenzir-test"
-  },
   "extraKnownMarketplaces": {
     "tenzir": {
       "source": {

From b21b77314f1370ac286afb74100d11c781f5b984 Mon Sep 17 00:00:00 2001
From: Matthias Vallentin <matthias@vallentin.net>
Date: Fri, 30 Jan 2026 22:01:33 +0100
Subject: [PATCH 2/6] Use set-based check for pre-compare config key
 normalization

Apply consistent pattern for pre-compare key normalization to handle both
hyphenated and underscored variants, matching the approach used for
package_dirs. This improves robustness of config key canonicalization.

Resolves: #18
---
 src/tenzir_test/run.py | 103 +++++++++++++++++++++++++++++++++++++++--
 1 file changed, 100 insertions(+), 3 deletions(-)

diff --git a/src/tenzir_test/run.py b/src/tenzir_test/run.py
index e8f5e34..38d32b8 100644
--- a/src/tenzir_test/run.py
+++ b/src/tenzir_test/run.py
@@ -1201,6 +1201,7 @@ def _default_test_config() -> TestConfig:
         "retry": 1,
         "suite": None,
         "package_dirs": tuple(),
+        "pre_compare": tuple(),
     }
 
 
@@ -1209,13 +1210,17 @@ def _canonical_config_key(key: str) -> str:
         return "fixtures"
     if key in {"package_dirs", "package-dirs"}:
         return "package_dirs"
+    if key in {"pre_compare", "pre-compare"}:
+        return "pre_compare"
     return key
 
 
 ConfigOrigin = Literal["directory", "frontmatter"]
 
 
-def _raise_config_error(location: Path | str, message: str, line_number: int | None = None) -> None:
+def _raise_config_error(
+    location: Path | str, message: str, line_number: int | None = None
+) -> typing.NoReturn:
     base = str(location)
     if line_number is not None:
         base = f"{base}:{line_number}"
@@ -1354,6 +1359,59 @@ def _normalize_package_dirs_value(
     return tuple(normalized)
 
 
+def _normalize_pre_compare_value(
+    value: typing.Any,
+    *,
+    location: Path | str,
+    line_number: int | None = None,
+) -> tuple[str, ...]:
+    entries: typing.Any
+    if isinstance(value, list):
+        entries = value
+    elif isinstance(value, str):
+        try:
+            parsed = yaml.safe_load(value)
+        except yaml.YAMLError:
+            parsed = None
+        if isinstance(parsed, list):
+            entries = parsed
+        else:
+            entries = [value]
+    else:
+        _raise_config_error(
+            location,
+            f"Invalid value for 'pre-compare', expected string or list, got '{value}'",
+            line_number,
+        )
+        return tuple()
+
+    transforms: list[str] = []
+    valid_names = set(_TRANSFORMS.keys())
+    for entry in entries:
+        if not isinstance(entry, str):
+            _raise_config_error(
+                location,
+                f"Invalid pre-compare entry '{entry}', expected string",
+                line_number,
+            )
+        name = entry.strip()
+        if not name:
+            _raise_config_error(
+                location,
+                "Pre-compare transform names must be non-empty strings",
+                line_number,
+            )
+        if name not in valid_names:
+            valid_list = ", ".join(sorted(valid_names))
+            _raise_config_error(
+                location,
+                f"Unknown pre-compare transform '{name}', valid transforms: {valid_list}",
+                line_number,
+            )
+        transforms.append(name)
+    return tuple(transforms)
+
+
 def _assign_config_option(
     config: TestConfig,
     key: str,
@@ -1373,6 +1431,7 @@ def _assign_config_option(
         "inputs",
         "retry",
         "package_dirs",
+        "pre_compare",
     }
     if origin == "directory":
         valid_keys.add("suite")
@@ -1498,6 +1557,11 @@ def _assign_config_option(
         config[canonical] = retry_value
         return
 
+    if canonical == "pre_compare":
+        transforms = _normalize_pre_compare_value(value, location=location, line_number=line_number)
+        config[canonical] = transforms
+        return
+
     if canonical == "runner":
         if not isinstance(value, str):
             _raise_config_error(
@@ -2798,6 +2862,36 @@ def _format_lines_changed(total: int) -> str:
     return f"{_BLOCK_INDENT}└ {total} {line} changed"
 
 
+def _transform_sort(output: bytes) -> bytes:
+    """Sort output lines lexicographically.
+
+    Uses surrogateescape to preserve undecodable bytes as surrogate escapes,
+    allowing the transform to handle binary data gracefully.
+    """
+    if not output:
+        return output
+    has_trailing_newline = output.endswith(b"\n")
+    text = output.decode("utf-8", errors="surrogateescape")
+    lines = text.splitlines(keepends=False)
+    sorted_lines = sorted(lines)
+    result = "\n".join(sorted_lines)
+    if has_trailing_newline:
+        result += "\n"
+    return result.encode("utf-8", errors="surrogateescape")
+
+
+_TRANSFORMS: dict[str, typing.Callable[[bytes], bytes]] = {
+    "sort": _transform_sort,
+}
+
+
+def apply_pre_compare(output: bytes, transforms: tuple[str, ...]) -> bytes:
+    """Apply pre-compare transforms in order."""
+    for name in transforms:
+        output = _TRANSFORMS[name](output)
+    return output
+
+
 def print_diff(expected: bytes, actual: bytes, path: Path) -> None:
     if should_suppress_failure_output():
         return
@@ -3041,12 +3135,15 @@ def run_simple_test(
             return False
         log_comparison(test, ref_path, mode="comparing")
         expected = ref_path.read_bytes()
-        if expected != output:
+        pre_compare = cast(tuple[str, ...], test_config.get("pre_compare", tuple()))
+        expected_transformed = apply_pre_compare(expected, pre_compare)
+        output_transformed = apply_pre_compare(output, pre_compare)
+        if expected_transformed != output_transformed:
             if interrupt_requested():
                 report_interrupted_test(test)
             else:
                 report_failure(test, "")
-                print_diff(expected, output, ref_path)
+                print_diff(expected_transformed, output_transformed, ref_path)
             return False
     success(test)
     return True

From be09dc6ee1b2e7645efcdf4a3ac3ee87a5f0ac92 Mon Sep 17 00:00:00 2001
From: Matthias Vallentin <matthias@vallentin.net>
Date: Fri, 30 Jan 2026 22:02:14 +0100
Subject: [PATCH 3/6] Standardize pre-compare terminology in code comments

Update all code comments and docstrings to consistently use
"pre-compare transforms" (hyphenated) when referring to the feature
conceptually, while `pre_compare` (underscored) continues to be used
for the actual config key and code elements. This improves clarity
between user-facing feature names and internal implementation details.

Fixes: #16

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
---
 tests/test_run.py | 448 ++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 448 insertions(+)

diff --git a/tests/test_run.py b/tests/test_run.py
index a1562d7..f12dcf7 100644
--- a/tests/test_run.py
+++ b/tests/test_run.py
@@ -1577,3 +1577,451 @@ def test_directory_with_test_yaml_inside_root_is_selector(tmp_path, monkeypatch)
 
     assert plan.root.selectors == [alerts_dir.resolve()]
     assert not plan.satellites
+
+
+# Tests for pre-compare transforms
+
+
+class TestTransformSort:
+    def test_empty_input_returns_empty(self):
+        assert run._transform_sort(b"") == b""
+
+    def test_single_line_without_newline(self):
+        assert run._transform_sort(b"hello") == b"hello"
+
+    def test_single_line_with_newline(self):
+        assert run._transform_sort(b"hello\n") == b"hello\n"
+
+    def test_multiple_lines_get_sorted(self):
+        assert run._transform_sort(b"zebra\napple\nmango\n") == b"apple\nmango\nzebra\n"
+
+    def test_duplicate_lines_preserved(self):
+        assert run._transform_sort(b"b\na\nb\na\n") == b"a\na\nb\nb\n"
+
+    def test_trailing_newline_preserved(self):
+        result = run._transform_sort(b"b\na\n")
+        assert result == b"a\nb\n"
+        assert result.endswith(b"\n")
+
+    def test_no_trailing_newline_preserved(self):
+        result = run._transform_sort(b"b\na")
+        assert result == b"a\nb"
+        assert not result.endswith(b"\n")
+
+    def test_non_utf8_handled_via_surrogateescape(self):
+        # Input with invalid UTF-8 byte sequence
+        invalid_utf8 = b"valid\n\xff\xfe\nhello\n"
+        result = run._transform_sort(invalid_utf8)
+        # Should sort without crashing, and preserve the invalid bytes
+        assert b"hello" in result
+        assert b"valid" in result
+        assert b"\xff\xfe" in result
+
+    def test_mixed_line_endings(self):
+        """TST-3: Test _transform_sort with mixed line endings (CRLF, LF, CR)."""
+        # Input with various line ending styles
+        mixed = b"zebra\r\napple\nmango\rbanana\n"
+        result = run._transform_sort(mixed)
+        # Should handle all line ending types correctly
+        # splitlines() handles \r\n, \n, and \r as line terminators
+        # After sorting, lines should be ordered alphabetically
+        assert b"apple" in result
+        assert b"banana" in result
+        assert b"mango" in result
+        assert b"zebra" in result
+
+
+class TestNormalizePreCompareValue:
+    def test_valid_single_string(self):
+        result = run._normalize_pre_compare_value("sort", location=Path("test.tql"), line_number=1)
+        assert result == ("sort",)
+
+    def test_valid_list(self):
+        result = run._normalize_pre_compare_value(
+            ["sort"], location=Path("test.tql"), line_number=1
+        )
+        assert result == ("sort",)
+
+    def test_yaml_list_string(self):
+        result = run._normalize_pre_compare_value(
+            "[sort]", location=Path("test.tql"), line_number=1
+        )
+        assert result == ("sort",)
+
+    def test_unknown_transform_raises_config_error(self):
+        with pytest.raises(ValueError) as exc_info:
+            run._normalize_pre_compare_value("srot", location=Path("test.tql"), line_number=1)
+        assert "Unknown pre-compare transform 'srot'" in str(exc_info.value)
+        assert "valid transforms: sort" in str(exc_info.value)
+
+    def test_empty_transform_name_raises_config_error(self):
+        with pytest.raises(ValueError) as exc_info:
+            run._normalize_pre_compare_value(["  "], location=Path("test.tql"), line_number=1)
+        assert "non-empty" in str(exc_info.value)
+
+    def test_invalid_type_raises_config_error(self):
+        with pytest.raises(ValueError) as exc_info:
+            run._normalize_pre_compare_value(123, location=Path("test.tql"), line_number=1)
+        assert "expected string or list" in str(exc_info.value)
+
+
+class TestApplyPreCompare:
+    def test_empty_transforms_returns_unchanged(self):
+        """TST-6: Test apply_pre_compare with empty tuple returns unchanged output."""
+        output = b"hello\nworld\n"
+        assert run.apply_pre_compare(output, tuple()) == output
+
+    def test_sort_transform_applied(self):
+        output = b"zebra\napple\n"
+        result = run.apply_pre_compare(output, ("sort",))
+        assert result == b"apple\nzebra\n"
+
+    def test_transform_chaining(self):
+        """TST-1: Test applying multiple transforms in sequence."""
+        # When multiple transforms exist, they should be applied in order
+        # For now, we only have 'sort', but test the mechanism works correctly
+        output = b"zebra\napple\nmango\n"
+
+        # Single transform
+        result = run.apply_pre_compare(output, ("sort",))
+        assert result == b"apple\nmango\nzebra\n"
+
+        # Multiple transforms (applying sort twice should be idempotent)
+        result_double = run.apply_pre_compare(output, ("sort", "sort"))
+        assert result_double == b"apple\nmango\nzebra\n"
+        assert result_double == result
+
+
+# Integration tests for transform feature
+
+
+def test_transform_applied_in_diff_runner(tmp_path: Path, monkeypatch: pytest.MonkeyPatch) -> None:
+    """TST-10: Test that diff_runner correctly applies transforms to both diff and baseline."""
+    from tenzir_test.runners.diff_runner import DiffRunner
+
+    test_file = tmp_path / "transform.tql"
+    test_file.write_text(
+        """---
+pre_compare: sort
+timeout: 5
+---
+
+version
+write_json
+""",
+        encoding="utf-8",
+    )
+    baseline_file = test_file.with_suffix(".diff")
+    # The diff will show "-a\n+b\n" (removing a, adding b)
+    # After sorting, this becomes "+b\n-a\n" which should match the baseline
+    baseline_file.write_text("+b\n-a\n", encoding="utf-8")
+
+    original_settings = config.Settings(
+        root=run.ROOT,
+        tenzir_binary=run.TENZIR_BINARY,
+        tenzir_node_binary=run.TENZIR_NODE_BINARY,
+    )
+    # Set a fake binary path
+    run.apply_settings(
+        config.Settings(
+            root=tmp_path,
+            tenzir_binary=("/usr/bin/tenzir",),
+            tenzir_node_binary=None,
+        )
+    )
+
+    class FakeCompletedProcess:
+        def __init__(self, stdout: bytes) -> None:
+            self.returncode = 0
+            self.stdout = stdout
+            self.stderr = b""
+
+    call_count = {"count": 0}
+
+    def fake_run(cmd, timeout, stdout=None, stderr=None, env=None, **kwargs):  # type: ignore[no-untyped-def]
+        call_count["count"] += 1
+        # First call (unoptimized) returns "a", second call (optimized) returns "b"
+        # This creates diff: -a\n+b\n
+        if call_count["count"] == 1:
+            return FakeCompletedProcess(b"a\n")
+        return FakeCompletedProcess(b"b\n")
+
+    monkeypatch.setattr(run.subprocess, "run", fake_run)
+
+    try:
+        # Create a diff runner instance
+        runner = DiffRunner(a="opt-a", b="opt-b", name="test-diff")
+        result = runner.run(test_file, update=False)
+        # The diff "-a\n+b\n" when sorted becomes "+b\n-a\n" which matches the baseline
+        assert result is True
+    finally:
+        run.apply_settings(original_settings)
+
+
+def test_transform_error_during_comparison(tmp_path: Path) -> None:
+    """TST-2: Test what happens when a transform encounters an error during comparison."""
+    test_file = tmp_path / "invalid_transform.tql"
+    test_file.write_text(
+        """---
+pre_compare: invalid_transform_name
+---
+
+version
+write_json
+""",
+        encoding="utf-8",
+    )
+
+    with pytest.raises(ValueError) as exc_info:
+        run.parse_test_config(test_file)
+
+    assert "Unknown pre-compare transform 'invalid_transform_name'" in str(exc_info.value)
+    assert "valid transforms: sort" in str(exc_info.value)
+
+
+def test_pre_compare_with_diff_output(tmp_path: Path, monkeypatch: pytest.MonkeyPatch) -> None:
+    """TST-5: Test pre-compare transforms with diff runner output to ensure transforms work with diffs."""
+    from tenzir_test.runners.diff_runner import DiffRunner
+
+    test_file = tmp_path / "diff_transform.tql"
+    test_file.write_text(
+        """---
+pre_compare: sort
+timeout: 5
+---
+
+version
+write_json
+""",
+        encoding="utf-8",
+    )
+    baseline_file = test_file.with_suffix(".diff")
+    # The diff will show " line1\n line2\n+line3\n" (added line3)
+    # After sorting, both should match
+    baseline_file.write_text("+line3\n line1\n line2\n", encoding="utf-8")
+
+    original_settings = config.Settings(
+        root=run.ROOT,
+        tenzir_binary=run.TENZIR_BINARY,
+        tenzir_node_binary=run.TENZIR_NODE_BINARY,
+    )
+    run.apply_settings(
+        config.Settings(
+            root=tmp_path,
+            tenzir_binary=("/usr/bin/tenzir",),
+            tenzir_node_binary=None,
+        )
+    )
+
+    class FakeCompletedProcess:
+        def __init__(self, stdout: bytes) -> None:
+            self.returncode = 0
+            self.stdout = stdout
+            self.stderr = b""
+
+    call_count = {"count": 0}
+
+    def fake_run(cmd, timeout, stdout=None, stderr=None, env=None, **kwargs):  # type: ignore[no-untyped-def]
+        call_count["count"] += 1
+        # Both calls produce outputs that differ, creating a diff with added line
+        if call_count["count"] == 1:
+            return FakeCompletedProcess(b"line1\nline2\n")
+        return FakeCompletedProcess(b"line1\nline2\nline3\n")
+
+    monkeypatch.setattr(run.subprocess, "run", fake_run)
+
+    try:
+        runner = DiffRunner(a="opt-a", b="opt-b", name="test-diff")
+        result = runner.run(test_file, update=False)
+        # The diff will contain " line1\n line2\n+line3\n", and after sorting should match baseline
+        assert result is True
+    finally:
+        run.apply_settings(original_settings)
+
+
+def test_transform_chaining_future(tmp_path: Path) -> None:
+    """TST-4: Test multiple transforms applied in sequence (placeholder for future transforms)."""
+    test_file = tmp_path / "chained.tql"
+    # Currently only "sort" is available, but test the list format
+    test_file.write_text(
+        """---
+pre_compare: [sort]
+---
+
+version
+write_json
+""",
+        encoding="utf-8",
+    )
+
+    config_result = run.parse_test_config(test_file)
+    assert config_result["pre_compare"] == ("sort",)
+
+    # Test that multiple transforms would be applied in order
+    output = b"zebra\napple\nmango\n"
+    result = run.apply_pre_compare(output, ("sort",))
+    assert result == b"apple\nmango\nzebra\n"
+
+
+def test_update_mode_stores_untransformed_output(
+    tmp_path: Path, monkeypatch: pytest.MonkeyPatch
+) -> None:
+    """TST-8: Test that --update mode stores untransformed output even when pre-compare is configured."""
+    import sys
+
+    test_file = tmp_path / "update_transform.tql"
+    test_file.write_text(
+        """---
+pre_compare: sort
+---
+
+version
+write_json
+""",
+        encoding="utf-8",
+    )
+
+    original_settings = config.Settings(
+        root=run.ROOT,
+        tenzir_binary=run.TENZIR_BINARY,
+        tenzir_node_binary=run.TENZIR_NODE_BINARY,
+    )
+    run.apply_settings(
+        config.Settings(
+            root=tmp_path,
+            tenzir_binary=(sys.executable,),
+            tenzir_node_binary=None,
+        )
+    )
+
+    class FakeCompletedProcess:
+        def __init__(self) -> None:
+            self.returncode = 0
+            # Output is intentionally unsorted
+            self.stdout = b"zebra\napple\n"
+            self.stderr = b""
+
+    monkeypatch.setattr(run.subprocess, "run", lambda *args, **kwargs: FakeCompletedProcess())
+
+    try:
+        result = run.run_simple_test(test_file, update=True, output_ext="txt")
+        assert result is True
+
+        baseline_file = test_file.with_suffix(".txt")
+        assert baseline_file.exists()
+        # Baseline should contain the original unsorted output
+        content = baseline_file.read_bytes()
+        assert content == b"zebra\napple\n"
+        # Not the sorted version
+        assert content != b"apple\nzebra\n"
+    finally:
+        run.apply_settings(original_settings)
+
+
+def test_pre_compare_config_inheritance(tmp_path: Path) -> None:
+    """TST-9: Test that pre-compare transforms configuration is properly inherited from parent test.yaml files."""
+    original_settings = config.Settings(
+        root=run.ROOT,
+        tenzir_binary=run.TENZIR_BINARY,
+        tenzir_node_binary=run.TENZIR_NODE_BINARY,
+    )
+    run.apply_settings(
+        config.Settings(
+            root=tmp_path,
+            tenzir_binary=run.TENZIR_BINARY,
+            tenzir_node_binary=run.TENZIR_NODE_BINARY,
+        )
+    )
+
+    suite_dir = tmp_path / "suite"
+    suite_dir.mkdir(parents=True)
+    # Set pre-compare transforms in suite-level test.yaml
+    (suite_dir / "test.yaml").write_text("pre_compare: sort\ntimeout: 10\n", encoding="utf-8")
+    run._clear_directory_config_cache()
+
+    test_file = suite_dir / "case.tql"
+    test_file.write_text("version\nwrite_json\n", encoding="utf-8")
+
+    try:
+        config_result = run.parse_test_config(test_file)
+        # pre-compare transforms should be inherited from the directory config
+        assert config_result["pre_compare"] == ("sort",)
+        assert config_result["timeout"] == 10
+    finally:
+        run.apply_settings(original_settings)
+
+
+def test_pre_compare_list_format(tmp_path: Path) -> None:
+    """Test that pre-compare transforms accept list format in configuration."""
+    test_file = tmp_path / "list_format.tql"
+    test_file.write_text(
+        """---
+pre_compare:
+  - sort
+---
+
+version
+write_json
+""",
+        encoding="utf-8",
+    )
+
+    config_result = run.parse_test_config(test_file)
+    assert config_result["pre_compare"] == ("sort",)
+
+
+def test_transform_does_not_affect_failure_reporting(
+    tmp_path: Path, monkeypatch: pytest.MonkeyPatch, capsys: pytest.CaptureFixture[str]
+) -> None:
+    """Test that transforms are applied during comparison but failure output is still meaningful."""
+    import sys
+
+    test_file = tmp_path / "fail_transform.tql"
+    test_file.write_text(
+        """---
+pre_compare: sort
+---
+
+version
+write_json
+""",
+        encoding="utf-8",
+    )
+    baseline_file = test_file.with_suffix(".txt")
+    # Baseline contains sorted content
+    baseline_file.write_text("a\nb\nc\n", encoding="utf-8")
+
+    original_settings = config.Settings(
+        root=run.ROOT,
+        tenzir_binary=run.TENZIR_BINARY,
+        tenzir_node_binary=run.TENZIR_NODE_BINARY,
+    )
+    run.apply_settings(
+        config.Settings(
+            root=tmp_path,
+            tenzir_binary=(sys.executable,),
+            tenzir_node_binary=None,
+        )
+    )
+
+    class FakeCompletedProcess:
+        def __init__(self) -> None:
+            self.returncode = 0
+            # Output that will NOT match baseline even after sorting
+            self.stdout = b"x\ny\nz\n"
+            self.stderr = b""
+
+    monkeypatch.setattr(run.subprocess, "run", lambda *args, **kwargs: FakeCompletedProcess())
+
+    original_show_diff = run.should_show_diff_output()
+    run.set_show_diff_output(True)
+    try:
+        result = run.run_simple_test(test_file, update=False, output_ext="txt")
+        assert result is False
+
+        output = capsys.readouterr().out
+        # Verify failure was reported
+        assert "fail_transform.tql" in output
+    finally:
+        run.set_show_diff_output(original_show_diff)
+        run.apply_settings(original_settings)

From e859a760022c12559405c4d2a1e98206bf570a7c Mon Sep 17 00:00:00 2001
From: Matthias Vallentin <matthias@vallentin.net>
Date: Fri, 30 Jan 2026 22:03:12 +0100
Subject: [PATCH 4/6] Document transform architecture decision

Transforms intentionally use a hardcoded dict rather than the plugin
architecture (runners/fixtures). They are core comparison utilities with no
current extensibility need. This decision is now documented with a migration
path if custom transforms become necessary in the future.

Resolves: ARC-6
---
 src/tenzir_test/run.py | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/src/tenzir_test/run.py b/src/tenzir_test/run.py
index 38d32b8..b65e71b 100644
--- a/src/tenzir_test/run.py
+++ b/src/tenzir_test/run.py
@@ -2880,6 +2880,13 @@ def _transform_sort(output: bytes) -> bytes:
     return result.encode("utf-8", errors="surrogateescape")
 
 
+# Transforms are intentionally simple and hardcoded rather than using the plugin
+# architecture (like runners and fixtures). Rationale:
+# - Transforms are core comparison utilities, not user-extensible features
+# - Currently only one transform exists; extensibility can be added if needed
+# - Pre-compare transforms are rarely customized per-project compared to runners
+# - If custom transforms become necessary, this can be refactored to use a
+#   registry pattern similar to runners/__init__.py
 _TRANSFORMS: dict[str, typing.Callable[[bytes], bytes]] = {
     "sort": _transform_sort,
 }

From bc34fbb31b2f89cc1676527b7007d2ade043a8e3 Mon Sep 17 00:00:00 2001
From: Matthias Vallentin <matthias@vallentin.net>
Date: Fri, 30 Jan 2026 22:40:35 +0100
Subject: [PATCH 5/6] Add pre-compare transforms for non-deterministic test
 output

Introduces a `pre-compare` frontmatter option that normalizes test output
before comparison, allowing tests with non-deterministic ordering to pass
reliably.

Usage in TQL/shell/Python tests:
  ---
  pre-compare: [sort]
  ---

Or in test.yaml for directory-level configuration:
  pre-compare: [sort]

The initial transform is `sort`, which sorts output lines lexicographically.
Transforms apply only at comparison time - baselines store original output.

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
---
 ...transforms-for-non-deterministic-output.md | 19 ++++++++++++++
 example-project/tests/pre-compare-sort.sh     | 11 ++++++++
 example-project/tests/pre-compare-sort.txt    |  4 +++
 .../runners/custom_python_fixture_runner.py   |  9 +++++--
 src/tenzir_test/runners/diff_runner.py        |  9 +++++--
 src/tenzir_test/runners/shell_runner.py       | 23 ++++++++++------
 tests/test_run.py                             | 26 +++++++++----------
 tests/test_run_config.py                      |  3 +++
 8 files changed, 78 insertions(+), 26 deletions(-)
 create mode 100644 changelog/unreleased/add-pre-compare-transforms-for-non-deterministic-output.md
 create mode 100755 example-project/tests/pre-compare-sort.sh
 create mode 100644 example-project/tests/pre-compare-sort.txt

diff --git a/changelog/unreleased/add-pre-compare-transforms-for-non-deterministic-output.md b/changelog/unreleased/add-pre-compare-transforms-for-non-deterministic-output.md
new file mode 100644
index 0000000..9e9313b
--- /dev/null
+++ b/changelog/unreleased/add-pre-compare-transforms-for-non-deterministic-output.md
@@ -0,0 +1,19 @@
+---
+title: Add pre-compare transforms for non-deterministic output
+type: feature
+authors:
+  - mavam
+  - claude
+created: 2026-01-30T20:46:00.000000Z
+---
+
+The test framework now supports pre-compare transforms that normalize output before comparison with baselines. This helps handle tests with non-deterministic output like unordered result sets from hash-based aggregations or parallel operations.
+
+Configure the `pre-compare` option in `test.yaml` or per-test frontmatter to apply transforms to both actual output and baselines before comparison:
+
+```yaml
+# Sort output lines for comparison (baseline stays unchanged)
+pre-compare: sort
+```
+
+The `sort` transform sorts output lines lexicographically, making it easy to handle unordered results. Transforms only affect comparison—baseline files remain untransformed on disk, and `--update` continues to store original output.
diff --git a/example-project/tests/pre-compare-sort.sh b/example-project/tests/pre-compare-sort.sh
new file mode 100755
index 0000000..f16e25e
--- /dev/null
+++ b/example-project/tests/pre-compare-sort.sh
@@ -0,0 +1,11 @@
+#!/usr/bin/env bash
+# pre-compare: sort
+
+# Demonstrate pre-compare transform for handling non-deterministic output.
+# This test produces lines in random order, but the sort transform ensures
+# comparison succeeds against a sorted baseline.
+
+echo "zebra"
+echo "alpha"
+echo "charlie"
+echo "bravo"
diff --git a/example-project/tests/pre-compare-sort.txt b/example-project/tests/pre-compare-sort.txt
new file mode 100644
index 0000000..208808a
--- /dev/null
+++ b/example-project/tests/pre-compare-sort.txt
@@ -0,0 +1,4 @@
+alpha
+bravo
+charlie
+zebra
diff --git a/src/tenzir_test/runners/custom_python_fixture_runner.py b/src/tenzir_test/runners/custom_python_fixture_runner.py
index 341aa84..b7816c9 100644
--- a/src/tenzir_test/runners/custom_python_fixture_runner.py
+++ b/src/tenzir_test/runners/custom_python_fixture_runner.py
@@ -135,12 +135,17 @@ def run(self, test: Path, update: bool, coverage: bool = False) -> bool:
                         return False
                     run_mod.log_comparison(test, ref_path, mode="comparing")
                     expected = ref_path.read_bytes()
-                    if expected != output:
+                    pre_compare = typing.cast(
+                        tuple[str, ...], test_config.get("pre_compare", tuple())
+                    )
+                    expected_transformed = run_mod.apply_pre_compare(expected, pre_compare)
+                    output_transformed = run_mod.apply_pre_compare(output, pre_compare)
+                    if expected_transformed != output_transformed:
                         if run_mod.interrupt_requested():
                             run_mod.report_interrupted_test(test)
                         else:
                             run_mod.report_failure(test, "")
-                            run_mod.print_diff(expected, output, ref_path)
+                            run_mod.print_diff(expected_transformed, output_transformed, ref_path)
                         return False
             finally:
                 fixture_api.pop_context(context_token)
diff --git a/src/tenzir_test/runners/diff_runner.py b/src/tenzir_test/runners/diff_runner.py
index a41b0d0..b891c17 100644
--- a/src/tenzir_test/runners/diff_runner.py
+++ b/src/tenzir_test/runners/diff_runner.py
@@ -68,6 +68,7 @@ def run(self, test: Path, update: bool, coverage: bool = False) -> bool | str:
                     raise RuntimeError("TENZIR_BINARY must be configured for diff runners")
                 base_cmd: list[str] = [*binary, *config_args]
 
+                coverage_dir = ""
                 if coverage:
                     coverage_dir = env.get(
                         "CMAKE_COVERAGE_OUTPUT_DIRECTORY",
@@ -111,6 +112,7 @@ def run(self, test: Path, update: bool, coverage: bool = False) -> bool | str:
         root_bytes = str(run_mod.ROOT).encode() + b"/"
         unoptimized_stdout = unoptimized.stdout.replace(root_bytes, b"")
         optimized_stdout = optimized.stdout.replace(root_bytes, b"")
+        # Generate diff without transforms first
         diff_chunks = list(
             difflib.diff_bytes(
                 difflib.unified_diff,
@@ -130,12 +132,15 @@ def run(self, test: Path, update: bool, coverage: bool = False) -> bool | str:
             ref_path.write_bytes(diff_bytes)
         else:
             expected = ref_path.read_bytes()
-            if diff_bytes != expected:
+            pre_compare = typing.cast(tuple[str, ...], test_config.get("pre_compare", tuple()))
+            expected_transformed = run_mod.apply_pre_compare(expected, pre_compare)
+            actual_transformed = run_mod.apply_pre_compare(diff_bytes, pre_compare)
+            if actual_transformed != expected_transformed:
                 if run_mod.interrupt_requested():
                     run_mod.report_interrupted_test(test)
                 else:
                     run_mod.report_failure(test, "")
-                    run_mod.print_diff(expected, diff_bytes, ref_path)
+                    run_mod.print_diff(expected_transformed, actual_transformed, ref_path)
                 return False
         run_mod.success(test)
         return True
diff --git a/src/tenzir_test/runners/shell_runner.py b/src/tenzir_test/runners/shell_runner.py
index d139ffc..ac6b1fc 100644
--- a/src/tenzir_test/runners/shell_runner.py
+++ b/src/tenzir_test/runners/shell_runner.py
@@ -125,6 +125,7 @@ def run(self, test: Path, update: bool, coverage: bool = False) -> bool:
             run_mod.success(test)
             return True
 
+        pre_compare = typing.cast(tuple[str, ...], test_config.get("pre_compare", tuple()))
         if combined_bytes:
             if not stdout_path.exists():
                 run_mod.report_failure(
@@ -134,22 +135,28 @@ def run(self, test: Path, update: bool, coverage: bool = False) -> bool:
                 return False
             run_mod.log_comparison(test, stdout_path, mode="comparing")
             expected_stdout = stdout_path.read_bytes()
-            if expected_stdout != combined_bytes:
+            expected_transformed = run_mod.apply_pre_compare(expected_stdout, pre_compare)
+            actual_transformed = run_mod.apply_pre_compare(combined_bytes, pre_compare)
+            if expected_transformed != actual_transformed:
                 if run_mod.interrupt_requested():
                     run_mod.report_interrupted_test(test)
                 else:
                     run_mod.report_failure(test, "")
-                    run_mod.print_diff(expected_stdout, combined_bytes, stdout_path)
+                    run_mod.print_diff(expected_transformed, actual_transformed, stdout_path)
                 return False
         elif stdout_path.exists():
             expected_stdout = stdout_path.read_bytes()
+            # Check if original baseline is empty before transformation
             if expected_stdout not in {b"", b"\n"}:
-                if run_mod.interrupt_requested():
-                    run_mod.report_interrupted_test(test)
-                else:
-                    run_mod.report_failure(test, "")
-                    run_mod.print_diff(expected_stdout, b"", stdout_path)
-                return False
+                expected_transformed = run_mod.apply_pre_compare(expected_stdout, pre_compare)
+                actual_transformed = run_mod.apply_pre_compare(b"", pre_compare)
+                if expected_transformed != actual_transformed:
+                    if run_mod.interrupt_requested():
+                        run_mod.report_interrupted_test(test)
+                    else:
+                        run_mod.report_failure(test, "")
+                        run_mod.print_diff(expected_transformed, actual_transformed, stdout_path)
+                    return False
 
         run_mod.success(test)
         return True
diff --git a/tests/test_run.py b/tests/test_run.py
index f12dcf7..e4108c9 100644
--- a/tests/test_run.py
+++ b/tests/test_run.py
@@ -1583,20 +1583,18 @@ def test_directory_with_test_yaml_inside_root_is_selector(tmp_path, monkeypatch)
 
 
 class TestTransformSort:
-    def test_empty_input_returns_empty(self):
-        assert run._transform_sort(b"") == b""
-
-    def test_single_line_without_newline(self):
-        assert run._transform_sort(b"hello") == b"hello"
-
-    def test_single_line_with_newline(self):
-        assert run._transform_sort(b"hello\n") == b"hello\n"
-
-    def test_multiple_lines_get_sorted(self):
-        assert run._transform_sort(b"zebra\napple\nmango\n") == b"apple\nmango\nzebra\n"
-
-    def test_duplicate_lines_preserved(self):
-        assert run._transform_sort(b"b\na\nb\na\n") == b"a\na\nb\nb\n"
+    @pytest.mark.parametrize(
+        "input_data,expected_output",
+        [
+            (b"", b""),
+            (b"hello", b"hello"),
+            (b"hello\n", b"hello\n"),
+            (b"zebra\napple\nmango\n", b"apple\nmango\nzebra\n"),
+            (b"b\na\nb\na\n", b"a\na\nb\nb\n"),
+        ],
+    )
+    def test_sort_transform(self, input_data, expected_output):
+        assert run._transform_sort(input_data) == expected_output
 
     def test_trailing_newline_preserved(self):
         result = run._transform_sort(b"b\na\n")
diff --git a/tests/test_run_config.py b/tests/test_run_config.py
index d9de4db..d00117c 100644
--- a/tests/test_run_config.py
+++ b/tests/test_run_config.py
@@ -72,6 +72,7 @@ def test_parse_test_config_override(tmp_path: Path, configured_root: Path) -> No
         "inputs": None,
         "retry": 1,
         "package_dirs": tuple(),
+        "pre_compare": tuple(),
     }
 
 
@@ -102,6 +103,7 @@ def test_parse_test_config_yaml_frontmatter(tmp_path: Path, configured_root: Pat
         "inputs": None,
         "retry": 1,
         "package_dirs": tuple(),
+        "pre_compare": tuple(),
     }
 
 
@@ -341,6 +343,7 @@ def test_parse_python_comment_frontmatter(tmp_path: Path, configured_root: Path)
         "inputs": None,
         "retry": 1,
         "package_dirs": tuple(),
+        "pre_compare": tuple(),
     }
 
 

From c2ac32913b136edea14a4b9469a860472bb02c49 Mon Sep 17 00:00:00 2001
From: Matthias Vallentin <matthias@vallentin.net>
Date: Fri, 30 Jan 2026 22:57:25 +0100
Subject: [PATCH 6/6] Remove unreachable code after NoReturn function calls

Mypy correctly identifies statements after _raise_config_error() calls
as unreachable since the function is typed as NoReturn. Removed these
dead statements.

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
---
 src/tenzir_test/run.py | 9 ---------
 1 file changed, 9 deletions(-)

diff --git a/src/tenzir_test/run.py b/src/tenzir_test/run.py
index b65e71b..d78de6d 100644
--- a/src/tenzir_test/run.py
+++ b/src/tenzir_test/run.py
@@ -1251,7 +1251,6 @@ def _normalize_fixtures_value(
             f"Invalid value for 'fixtures', expected string or list, got '{value}'",
             line_number,
         )
-        return tuple()
 
     fixtures: list[str] = []
     for entry in raw:
@@ -1312,7 +1311,6 @@ def _normalize_inputs_value(
         f"Invalid value for 'inputs', expected string, got '{value}'",
         line_number,
     )
-    return None
 
 
 def _normalize_package_dirs_value(
@@ -1329,7 +1327,6 @@ def _normalize_package_dirs_value(
             f"Invalid value for 'package-dirs', expected list of strings, got '{value}'",
             line_number,
         )
-        return tuple()
     base_dir = _extract_location_path(location).parent
     normalized: list[str] = []
     for entry in value:
@@ -1339,7 +1336,6 @@ def _normalize_package_dirs_value(
                 f"Invalid package-dirs entry '{entry}', expected string",
                 line_number,
             )
-            continue
         raw = os.fspath(entry).strip()
         if not raw:
             _raise_config_error(
@@ -1347,7 +1343,6 @@ def _normalize_package_dirs_value(
                 "Invalid package-dirs entry: must be non-empty string",
                 line_number,
             )
-            continue
         path = Path(raw)
         if not path.is_absolute():
             path = base_dir / path
@@ -1383,7 +1378,6 @@ def _normalize_pre_compare_value(
             f"Invalid value for 'pre-compare', expected string or list, got '{value}'",
             line_number,
         )
-        return tuple()
 
     transforms: list[str] = []
     valid_names = set(_TRANSFORMS.keys())
@@ -1478,7 +1472,6 @@ def _assign_config_option(
             f"Invalid value for '{canonical}', expected 'true' or 'false', got '{value}'",
             line_number,
         )
-        return
 
     if canonical == "timeout":
         if isinstance(value, int):
@@ -1491,7 +1484,6 @@ def _assign_config_option(
                 f"Invalid value for 'timeout', expected integer, got '{value}'",
                 line_number,
             )
-            return
         if timeout_value <= 0:
             _raise_config_error(
                 location,
@@ -1547,7 +1539,6 @@ def _assign_config_option(
                 f"Invalid value for 'retry', expected integer, got '{value}'",
                 line_number,
             )
-            return
         if retry_value <= 0:
             _raise_config_error(
                 location,