test(auto-eval): cover legacy from_dict payload without ef_cqs_strict

ychan · claude · ychan · commit 2501f1eace6e · 2026-04-06T21:16:45.000+02:00
Address review comment #4 (Minor) on PR #764 Sub-project A. The backward- compat behavior — old ledger/graveyard JSON written before this PR must reload with ef_cqs_strict defaulted to 0.0 — was correct in code (via the valid_fields filter on both CompanyCQS.from_dict and CQSResult.from_dict) but not pinned by a regression test. Adding the explicit test closes that gap so a future refactor that accidentally requires the field can't silently break re-reads of pre-Sub-A artifacts. Test asserts on the CQSResult top level AND the nested CompanyCQS — both dataclasses need the tolerant-load behavior for checkpoint files to round trip cleanly. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
diff --git a/tests/xbrl/standardization/test_scoring_integrity.py b/tests/xbrl/standardization/test_scoring_integrity.py
@@ -420,6 +420,58 @@ def test_serialization_roundtrip_preserves_ef_cqs_strict(self):
         assert restored.ef_cqs_strict == pytest.approx(0.87)
         assert restored.company_scores["AAPL"].ef_cqs_strict == pytest.approx(0.87)
 
+    def test_from_dict_tolerates_legacy_payload_without_ef_cqs_strict(self):
+        """Backward compat: persisted ledger/graveyard JSON written before this
+        PR has no ``ef_cqs_strict`` key. ``from_dict`` must default it to 0.0
+        instead of raising KeyError — otherwise re-reading old run artifacts
+        (escalation-reports, auto_eval_checkpoint, etc.) would break.
+        """
+        from edgar.xbrl.standardization.tools.auto_eval import CQSResult
+
+        # Minimal legacy dict — exactly the shape Sub-project A predates.
+        # Contains all required fields but NO ef_cqs_strict key, and the
+        # nested CompanyCQS also lacks it.
+        legacy_payload = {
+            "pass_rate": 0.9,
+            "mean_variance": 1.0,
+            "coverage_rate": 0.95,
+            "golden_master_rate": 0.8,
+            "regression_rate": 0.0,
+            "cqs": 0.85,
+            "companies_evaluated": 1,
+            "total_metrics": 37,
+            "total_mapped": 35,
+            "total_valid": 33,
+            "total_regressions": 0,
+            "ef_cqs": 0.93,
+            # ef_cqs_strict intentionally absent
+            "company_scores": {
+                "AAPL": {
+                    "ticker": "AAPL",
+                    "pass_rate": 0.9,
+                    "mean_variance": 1.0,
+                    "coverage_rate": 0.95,
+                    "golden_master_rate": 0.8,
+                    "regression_count": 0,
+                    "metrics_total": 37,
+                    "metrics_mapped": 35,
+                    "metrics_valid": 33,
+                    "metrics_excluded": 2,
+                    "cqs": 0.85,
+                    "ef_cqs": 0.93,
+                    # ef_cqs_strict intentionally absent
+                },
+            },
+        }
+
+        # Must not raise; missing field defaults to 0.0 via dataclass default.
+        restored = CQSResult.from_dict(legacy_payload)
+        assert restored.ef_cqs_strict == 0.0
+        assert restored.company_scores["AAPL"].ef_cqs_strict == 0.0
+        # Pre-existing fields must still roundtrip correctly.
+        assert restored.ef_cqs == pytest.approx(0.93)
+        assert restored.company_scores["AAPL"].ef_cqs == pytest.approx(0.93)
+
 
 class TestCQSResultAggregation:
     """Test that aggregate CQSResult includes scoring integrity fields."""