Skip to content

Commit 2501f1e

Browse files
ychanclaude
andcommitted
test(auto-eval): cover legacy from_dict payload without ef_cqs_strict
Address review comment #4 (Minor) on PR #764 Sub-project A. The backward- compat behavior — old ledger/graveyard JSON written before this PR must reload with ef_cqs_strict defaulted to 0.0 — was correct in code (via the valid_fields filter on both CompanyCQS.from_dict and CQSResult.from_dict) but not pinned by a regression test. Adding the explicit test closes that gap so a future refactor that accidentally requires the field can't silently break re-reads of pre-Sub-A artifacts. Test asserts on the CQSResult top level AND the nested CompanyCQS — both dataclasses need the tolerant-load behavior for checkpoint files to round trip cleanly. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent 2f206cc commit 2501f1e

File tree

1 file changed

+52
-0
lines changed

1 file changed

+52
-0
lines changed

tests/xbrl/standardization/test_scoring_integrity.py

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -420,6 +420,58 @@ def test_serialization_roundtrip_preserves_ef_cqs_strict(self):
420420
assert restored.ef_cqs_strict == pytest.approx(0.87)
421421
assert restored.company_scores["AAPL"].ef_cqs_strict == pytest.approx(0.87)
422422

423+
def test_from_dict_tolerates_legacy_payload_without_ef_cqs_strict(self):
424+
"""Backward compat: persisted ledger/graveyard JSON written before this
425+
PR has no ``ef_cqs_strict`` key. ``from_dict`` must default it to 0.0
426+
instead of raising KeyError — otherwise re-reading old run artifacts
427+
(escalation-reports, auto_eval_checkpoint, etc.) would break.
428+
"""
429+
from edgar.xbrl.standardization.tools.auto_eval import CQSResult
430+
431+
# Minimal legacy dict — exactly the shape Sub-project A predates.
432+
# Contains all required fields but NO ef_cqs_strict key, and the
433+
# nested CompanyCQS also lacks it.
434+
legacy_payload = {
435+
"pass_rate": 0.9,
436+
"mean_variance": 1.0,
437+
"coverage_rate": 0.95,
438+
"golden_master_rate": 0.8,
439+
"regression_rate": 0.0,
440+
"cqs": 0.85,
441+
"companies_evaluated": 1,
442+
"total_metrics": 37,
443+
"total_mapped": 35,
444+
"total_valid": 33,
445+
"total_regressions": 0,
446+
"ef_cqs": 0.93,
447+
# ef_cqs_strict intentionally absent
448+
"company_scores": {
449+
"AAPL": {
450+
"ticker": "AAPL",
451+
"pass_rate": 0.9,
452+
"mean_variance": 1.0,
453+
"coverage_rate": 0.95,
454+
"golden_master_rate": 0.8,
455+
"regression_count": 0,
456+
"metrics_total": 37,
457+
"metrics_mapped": 35,
458+
"metrics_valid": 33,
459+
"metrics_excluded": 2,
460+
"cqs": 0.85,
461+
"ef_cqs": 0.93,
462+
# ef_cqs_strict intentionally absent
463+
},
464+
},
465+
}
466+
467+
# Must not raise; missing field defaults to 0.0 via dataclass default.
468+
restored = CQSResult.from_dict(legacy_payload)
469+
assert restored.ef_cqs_strict == 0.0
470+
assert restored.company_scores["AAPL"].ef_cqs_strict == 0.0
471+
# Pre-existing fields must still roundtrip correctly.
472+
assert restored.ef_cqs == pytest.approx(0.93)
473+
assert restored.company_scores["AAPL"].ef_cqs == pytest.approx(0.93)
474+
423475

424476
class TestCQSResultAggregation:
425477
"""Test that aggregate CQSResult includes scoring integrity fields."""

0 commit comments

Comments
 (0)