diff --git a/IMPLEMENTATION_PLAN.md b/IMPLEMENTATION_PLAN.md
new file mode 100644
index 0000000..f5197ba
--- /dev/null
+++ b/IMPLEMENTATION_PLAN.md
@@ -0,0 +1,643 @@
+# Test History & Testplan Implementation Plan
+
+Based on `TEST_HISTORY_DESIGN.md`.
+
+---
+
+## Scope
+
+This plan covers the full implementation, testing, and documentation of:
+
+1. **Binary history store** (Parts 1–6): efficient per-test-run records inside the NCDB ZIP
+2. **Coverage-per-test fixes** (Part 4): stable `run_id`, contrib data loss bug fix
+3. **Testplan embedding** (Parts 9–10): `testplan.json` ZIP member, closure computation
+4. **Reports** (Parts 9.4, 11): regression summary, stage gate, delta, trend, CI export
+5. **Competitive parity additions** (Part 11.3): waivers, contribution ranking, safety traceability
+
+---
+
+## Phase 1 — Binary History Store
+
+### 1.1 New module: `src/ucis/ncdb/test_registry.py`
+
+Implements `TestRegistry` class:
+
+- **Struct layout** (`magic=0x54535452`, `version=1`, `next_run_id`, `num_names`, `num_seeds`, offset tables, two string heaps)
+- `assign_run_id() -> int` — atomic increment of `next_run_id`
+- `lookup_name_id(name: str) -> int` — binary search on sorted name heap; assign if absent
+- `lookup_seed_id(seed: str) -> int` — same for seed heap
+- `name_for_id(name_id: int) -> str` — O(1) offset-table access
+- `seed_for_id(seed_id: int) -> str`
+- `serialize() -> bytes` / `@classmethod deserialize(data: bytes) -> TestRegistry`
+
+Invariants:
+- Name heap kept sorted; insertion preserves sort order (re-builds heap on insert)
+- Seeds stored as decimal string for integers, verbatim for complex strings
+- `next_run_id` never decreases; survives ZIP rewrite and merge
+
+### 1.2 New module: `src/ucis/ncdb/test_stats.py`
+
+Implements `TestStatsTable` with one 64-byte `TestStatsEntry` per test (indexed by `name_id`):
+
+Fields per entry (matching design §3.4):
+`total_runs`, `pass_count`, `fail_count`, `error_count`, `first_ts`, `last_ts`,
+`last_green_ts`, `transition_count`, `streak` (i16), `last_status`, `_pad`,
+`flake_score` (f32), `fail_rate` (f32), `mean_cpu_time` (f32), `m2_cpu_time` (f32),
+`cusum_value` (f32), `cusum_ref_mean` (f32), `grade_score` (f32), `total_seeds_seen` (u16),
+`_reserved[6]`
+
+Methods:
+- `update(name_id, status, ts, cpu_time=None)` — Welford online update + CUSUM step (k=0.5, h=4.0)
+- `get(name_id) -> TestStatsEntry`
+- `top_flaky(n=20) -> list[TestStatsEntry]` — sort by `flake_score DESC`
+- `top_failing(n=20, flake_threshold=0.1) -> list[TestStatsEntry]`
+- `serialize() -> bytes` / `@classmethod deserialize(data: bytes, num_entries: int) -> TestStatsTable`
+
+CUSUM update rule (on each run):
+```
+x = 1.0 if FAIL else 0.0
+S = max(0, S + x - (cusum_ref_mean + 0.5))
+if S > 4.0: record change-point, reset S = 0
+```
+
+### 1.3 New module: `src/ucis/ncdb/history_buckets.py`
+
+Implements `HistoryBucket` for reading and writing `history/NNNNNN.bin` files.
+
+Write path:
+- `BucketWriter` accumulates records in memory, sorted by `(name_id, ts)`
+- `seal() -> bytes` — produce compressed bucket bytes
+- Sealed once the calendar day rolls over or record count reaches 10,000 (fixed threshold)
+
+Read path:
+- `BucketReader(data: bytes)` — decompress and parse header, name index, seed dict, columns
+- `records_for_name(name_id: int) -> list[BucketRecord]` — binary search name index → O(log N)
+- `all_records() -> Iterable[BucketRecord]`
+
+`BucketRecord` fields: `name_id`, `seed_idx` (mapped to `seed_id` via local dict), `ts`, `status`, `flags`
+
+`status_flags` byte layout:
+- bits `[7:4]`: status (0=OK, 1=FAIL, 2=ERROR, 3=FATAL, 4=COMPILE)
+- bits `[3:0]`: flags (bit0=seed_is_hash, bit1=is_rerun, bit2=has_coverage, bit3=was_squashed)
+
+Compression tiering:
+- Current-day (mutable) bucket: `ZIP_DEFLATE, level=1`
+- Sealed (past-day, immutable) buckets: `ZIP_LZMA` if `liblzma` is available, else `ZIP_DEFLATE, level=9` (automatic fallback — no error raised)
+- At close: sealed buckets are copied verbatim (no re-decompression) — critical for write performance
+
+Varint encoding for `ts_deltas`: use existing `src/ucis/ncdb/varint.py`
+
+### 1.4 New module: `src/ucis/ncdb/bucket_index.py`
+
+Implements `BucketIndex` for `history/bucket_index.bin` (magic `0x42494458`).
+
+One 24-byte entry per bucket:
+`bucket_seq (u32)`, `ts_start (u32)`, `ts_end (u32)`, `num_records (u32)`,
+`fail_count (u32)`, `min_name_id (u32)`, `max_name_id (u32)`
+
+Methods:
+- `add_bucket(seq, ts_start, ts_end, num_records, fail_count, min_name_id, max_name_id)`
+- `buckets_in_range(ts_from, ts_to) -> list[BucketIndexEntry]`
+- `buckets_for_name(name_id, ts_from=None, ts_to=None) -> list[BucketIndexEntry]`
+  — filters by `min_name_id ≤ name_id ≤ max_name_id`
+- `pass_rate_series() -> list[(ts_start, pass_rate)]` — from `fail_count`/`num_records` per bucket
+- `serialize() -> bytes` / `@classmethod deserialize(data: bytes) -> BucketIndex`
+
+### 1.5 New module: `src/ucis/ncdb/contrib_index.py`
+
+Implements `ContribIndex` for `contrib_index.bin` (magic `0x43494458`).
+
+Header: `magic`, `version`, `merge_policy (u8)`, `squash_watermark (u32)`, `num_active (u32)`
+
+One 8-byte entry per active contrib: `run_id (u32)`, `name_id (u16)`, `status (u8)`, `flags (u8)`
+
+Methods:
+- `add_entry(run_id, name_id, status, flags)`
+- `passing_run_ids(policy=PASS_ONLY) -> list[int]`
+  — applies merge policy filter (all / pass_only / pass_first_attempt / strict)
+- `set_squash_watermark(run_id: int)`
+- `remove_entries_up_to(run_id: int)` — after squash
+- `serialize() -> bytes` / `@classmethod deserialize(data: bytes) -> ContribIndex`
+
+Merge policy constants:
+```python
+POLICY_ALL = 0
+POLICY_PASS_ONLY = 1
+POLICY_EXCLUDE_ERROR_RERUN = 2
+POLICY_STRICT = 3
+```
+
+### 1.6 New module: `src/ucis/ncdb/squash_log.py`
+
+Implements `SquashLog` for `squash_log.bin` (append-only, 28 bytes/entry).
+
+Entry fields: `ts (u32)`, `policy (u8)`, `_pad[3]`, `from_run (u32)`, `to_run (u32)`,
+`num_runs (u32)`, `pass_runs (u32)`
+
+Methods:
+- `append(ts, policy, from_run, to_run, num_runs, pass_runs)`
+- `entries() -> list[SquashLogEntry]`
+- `serialize() -> bytes` / `@classmethod deserialize(data: bytes) -> SquashLog`
+
+### 1.7 Modifications to existing files
+
+#### `src/ucis/ncdb/constants.py`
+- Add `NCDB_VERSION = "2.0"` (bump from `"1.0"`)
+- Add new member name constants:
+  ```python
+  MEMBER_TEST_REGISTRY   = "test_registry.bin"
+  MEMBER_TEST_STATS      = "test_stats.bin"
+  MEMBER_BUCKET_INDEX    = "history/bucket_index.bin"
+  MEMBER_CONTRIB_INDEX   = "contrib_index.bin"
+  MEMBER_SQUASH_LOG      = "squash_log.bin"
+  MEMBER_TESTPLAN        = "testplan.json"
+  MEMBER_WAIVERS         = "waivers.json"
+  ```
+- Add status constants:
+  ```python
+  HIST_STATUS_OK      = 0
+  HIST_STATUS_FAIL    = 1
+  HIST_STATUS_ERROR   = 2
+  HIST_STATUS_FATAL   = 3
+  HIST_STATUS_COMPILE = 4
+  ```
+
+#### `src/ucis/ncdb/manifest.py`
+- Add `history_format: str` field — `"v1"` (JSON only) or `"v2"` (binary + JSON for MERGE nodes)
+- Backward-compat: default to `"v1"` when reading old manifests without this field
+- Auto-upgrade to `"v2"` the first time `add_test_run()` is called on any database; no explicit opt-in required
+
+#### `src/ucis/ncdb/ncdb_ucis.py` (`NcdbUCIS`)
+- Add lazy-load fields and public API for binary history:
+  ```python
+  _test_registry: Optional[TestRegistry]
+  _test_stats: Optional[TestStatsTable]
+  _bucket_index: Optional[BucketIndex]
+  _contrib_index: Optional[ContribIndex]
+  _squash_log: Optional[SquashLog]
+  _history_v2_dirty: bool
+  ```
+- New public methods:
+  - `add_test_run(name, seed, status, ts=None, cpu_time=None, has_coverage=False, is_rerun=False) -> int`
+    — assigns `run_id`, updates registry, stats, current bucket, optionally adds contrib entry
+  - `query_test_history(name, ts_from=None, ts_to=None) -> list[BucketRecord]`
+    — uses registry → bucket index → targeted bucket reads
+  - `get_test_stats(name) -> Optional[TestStatsEntry]`
+  - `top_flaky_tests(n=20) -> list[TestStatsEntry]`
+  - `top_failing_tests(n=20) -> list[TestStatsEntry]`
+  - `squash_coverage(policy=POLICY_PASS_ONLY)` — implements §4.5 squash operation
+- Backward-compat: if `manifest.history_format == "v1"`, only `history.json` is used
+  for TEST nodes (existing behavior unchanged)
+
+#### `src/ucis/ncdb/ncdb_writer.py`
+- Write all new binary members when `history_format == "v2"`:
+  - `test_registry.bin`, `test_stats.bin`, `contrib_index.bin`, `squash_log.bin`
+  - `history/bucket_index.bin`
+  - Current-day bucket with `ZIP_DEFLATE, level=1`
+  - Sealed buckets: copy verbatim compressed bytes (no re-decompression)
+- Write `testplan.json` if set
+- Write `waivers.json` if set
+- Write `manifest.json` with updated `history_format` and `NCDB_VERSION`
+
+#### `src/ucis/ncdb/ncdb_reader.py`
+- Read `history_format` from manifest; if `"v2"`, load binary members
+- Fall back to `history.json` for MERGE nodes in all versions
+- Attach `_testplan` and `_waivers` attributes to returned db object if present
+
+#### `src/ucis/ncdb/ncdb_merger.py` — **Critical bug fix**
+
+Fix `_merge_same_schema()` contrib data loss:
+
+1. **Assign `run_id` offsets** so each source's run IDs are disjoint:
+   ```python
+   offset_B = max(run_id_in_A) + 1
+   offset_C = max(run_id_in_B) + 1
+   ```
+2. **Copy and rename contrib files**: `contrib/{src_run_id}.bin` → `contrib/{src_run_id + offset}.bin`
+3. **Merge `contrib_index.bin` entries** from all sources (adjust `run_id`, re-sort)
+4. **Merge `test_registry.bin`** — unify name_ids across sources (remap as needed)
+5. **Merge `test_stats.bin`** — sum counts, recompute derived fields
+6. **Merge bucket files** — copy all sealed buckets, reconcile name_ids if registries differed
+7. **Merge `bucket_index.bin`** — concatenate entries, re-sort by `ts_start`
+8. **Append `squash_log.bin`** entries from all sources (no run_id adjustment needed)
+9. **Sum `counts.bin`** arrays for the merged output (existing behavior, kept)
+
+Also add `_merge_testplans(sources) -> Optional[bytes]` implementing §10.8 strategy.
+
+---
+
+## Phase 2 — Testplan Embedding
+
+### 2.1 New module: `src/ucis/ncdb/testplan.py`
+
+Implements the data model (exactly as specified in §10.2):
+
+- `CovergroupEntry(name, desc)`
+- `Testpoint(name, stage, desc, tests, tags, na, source_template)`
+  - Optional `requirements: list[RequirementLink]` field for ALM traceability (§11.3.6)
+- `RequirementLink(system, project, item_id, url)`
+- `Testplan(format_version, source_file, import_timestamp, testpoints, covergroups)`
+  - Lazy indices: `_tp_by_name`, `_tp_by_test` (built once on first query)
+  - `getTestpoint(name)`, `testpointForTest(test_name)` (3-strategy match: exact / seed-strip / wildcard)
+  - `testpointsForStage(stage)`, `stages()`
+  - `to_dict()`, `serialize() -> bytes`, `from_dict()`, `from_bytes()`, `load(path)`, `save(path)`
+- Module-level helpers: `get_testplan(db)`, `set_testplan(db, tp)`
+
+### 2.2 New module: `src/ucis/ncdb/testplan_closure.py`
+
+Closure computation (§10.9) and covergroup joining (§10.10):
+
+- `TPStatus` enum: `CLOSED`, `PARTIAL`, `FAILING`, `NOT_RUN`, `NA`, `UNIMPLEMENTED`
+- `TestpointResult(testpoint, status, matched_tests, pass_count, fail_count)`
+- `compute_closure(testplan, db, waivers=None) -> list[TestpointResult]`
+  - Optional `waivers` argument filters waived bins from coverage percentage
+- `stage_gate_status(results, stage, testplan, require_flake_score_below=None, require_coverage_pct=None) -> dict`
+  - When `require_flake_score_below` is set, gate fails if covering tests have `flake_score` above threshold
+- `find_covergroup_scopes(db, cg_name) -> list`
+- `build_covergroup_index(db) -> dict[str, list]`
+
+Competitive parity additions (§11.3.1, §11.3.2):
+- `compute_contribution(db) -> list[TestContribution]`
+  — iterates `contrib/*.bin`, computes unique bins per test; returns ranked list
+- `compute_minimum_test_set(db, target_coverage=0.95) -> MinimumTestSet`
+  — greedy set-cover approximation over contrib vectors; returns included/excluded test lists + CPU savings estimate
+
+### 2.3 New module: `src/ucis/ncdb/testplan_hjson.py`
+
+OpenTitan Hjson import (§10.11):
+
+- `import_hjson(hjson_path, substitutions=None) -> Testplan`
+  — parses Hjson, expands `{key}` wildcards (cartesian product for list values), handles `tests: ["N/A"]`
+- `_expand_tests(test_list, subs) -> list[str]`
+- `_expand_template(template, subs) -> list[str]`
+
+Falls back to `json` if `hjson` package is not installed (handles JSON-subset .hjson files).
+`hjson` is added as a regular (non-optional) dependency in `setup.py` and `ivpm.yaml`
+(both `default` and `default-dev` dependency groups).
+
+### 2.4 NcdbUCIS testplan API (§10.4)
+
+Add to `NcdbUCIS`:
+```python
+_loaded_testplan: bool
+_testplan: Optional[Testplan]
+_testplan_dirty: bool
+
+def getTestplan() -> Optional[Testplan]
+def setTestplan(tp: Testplan) -> None
+def _ensure_testplan() -> None
+```
+
+### 2.5 New module: `src/ucis/ncdb/waivers.py` (§11.3.3)
+
+- `Waiver(id, scope_pattern, bin_pattern, rationale, approver, approved_at, expires_at, status)`
+- `WaiverSet.load(path_or_bytes)`, `WaiverSet.save(path)`, `WaiverSet.matches_scope(scope_path, bin_name)`
+- `NcdbUCIS.getWaivers()` / `setWaivers(ws)` analogous to testplan
+- `WaiverSet.matches_scope()` performs pattern matching only; expiry enforcement is the caller's responsibility
+
+---
+
+## Phase 3 — Reports
+
+All report functions live in a new module `src/ucis/ncdb/reports.py` (or split into
+`testplan_reports.py` for testplan-oriented reports and `history_reports.py` for trend reports)
+unless otherwise noted.
+
+**Output convention**: every report function returns a structured dataclass (e.g.
+`ClosureSummary`, `StagGateResult`) AND provides a companion `format_*(result) -> str`
+function that renders the dataclass to human-readable text. A `to_json()` method on each
+result dataclass enables machine-readable output as a first step. CLI commands call the
+formatter; tests assert against the structured data.
+
+### P0 Reports (essential for v1)
+
+| ID | Function | Inputs | Output |
+|----|----------|--------|--------|
+| A  | `report_testpoint_closure(results)` | `list[TestpointResult]` | formatted table + stage roll-up |
+| B  | `report_stage_gate(results, stage, testplan)` | as above | go/no-go summary with critical path |
+| C  | `report_coverage_per_testpoint(results, db, testplan)` | testplan + scopes | testpoint × covergroup × pct table |
+| D  | `report_regression_delta(results_new, results_old)` | two closure result lists | newly-closed, newly-failing, coverage delta |
+
+### P1 Reports
+
+| ID | Function | Inputs | Output |
+|----|----------|--------|--------|
+| E  | `report_stage_progression(db, testplan)` | merged NCDB with history | stage closure % over time (ASCII art or data) |
+| F  | `report_testpoint_reliability(results, db)` | closure results + test_stats | flake score per testpoint |
+| G  | `report_unexercised_covergroups(db, testplan)` | UCIS scopes + testplan | zero-hit covergroups list |
+| I  | `report_coverage_contribution(db)` | contrib/*.bin | per-test unique bin contribution table |
+
+### P2 Reports (future)
+
+| ID | Function | Inputs | Output |
+|----|----------|--------|--------|
+| H  | `report_test_budget(testplan, db)` | test_stats CPU mean + testplan | CPU hours by stage |
+| J  | `report_minimum_test_set(db, target)` | contrib + target | minimum test set with savings estimate |
+| K  | `report_closure_forecast(db)` | history coverage series | timeline prediction with CI |
+| L  | `report_safety_matrix(results, waivers, path)` | traceability + waivers | CSV/text safety matrix |
+| M  | `report_seed_reliability(db, test_name)` | history buckets | seed range heat-map |
+
+### CI/CD Export (§11.3.5)
+
+New module: `src/ucis/ncdb/testplan_export.py`
+
+- `export_junit_xml(results, output_path)` — testpoints as JUnit `<testcase>` elements
+- `export_github_annotations(results)` — writes `::error::` / `::warning::` lines to stdout
+- `export_summary_markdown(results, history_db=None)` — GitHub Actions Job Summary markdown
+
+---
+
+## Phase 4 — CLI Integration
+
+Add new sub-commands to the existing `pyucis` CLI (wherever it lives):
+
+- `pyucis history query <cdb> <test_name> [--days N]`
+- `pyucis history stats <cdb> [--top-flaky N] [--top-failing N]`
+- `pyucis testplan import <cdb> <hjson_path> [--subs key=val ...]`
+- `pyucis testplan closure <cdb> [--testplan path] [--stage V2]`
+- `pyucis testplan export-junit <cdb> [--testplan path] -o output.xml`
+- `pyucis squash <cdb> [--policy pass_only]`
+- `pyucis merge <out.cdb> <in1.cdb> [<in2.cdb> ...]`
+
+---
+
+## Testing Strategy
+
+### Unit Tests — Binary Formats
+
+**File**: `tests/unit/ncdb/test_test_registry.py`
+- `test_assign_run_id_increments` — monotonic, survives roundtrip
+- `test_lookup_name_id_new` — new name assigned correctly
+- `test_lookup_name_id_existing` — same name returns same ID
+- `test_name_heap_sorted` — binary search correctness
+- `test_seed_id_roundtrip` — seed stored and retrieved verbatim
+- `test_serialize_deserialize_empty` — empty registry roundtrip
+- `test_serialize_deserialize_1000_names` — large registry roundtrip
+
+**File**: `tests/unit/ncdb/test_test_stats.py`
+- `test_update_pass` — pass_count increments, last_ts updates
+- `test_update_fail` — fail_count, transition_count, streak update
+- `test_welford_mean` — cpu_time mean converges on known series
+- `test_welford_stddev` — M2 accumulator → stddev correct
+- `test_flake_score_alternating` — alternating pass/fail → score ≈ 1.0
+- `test_flake_score_stable` — all-pass → score = 0.0
+- `test_cusum_change_point` — sustained failures → CUSUM exceeds h=4.0
+- `test_grade_score_range` — [0, 1] always
+- `test_serialize_deserialize` — full table roundtrip
+
+**File**: `tests/unit/ncdb/test_history_buckets.py`
+- `test_write_read_single_record` — one record, roundtrip
+- `test_name_index_binary_search` — lookup for specific name_id O(log N)
+- `test_seed_dict_compression` — seed_idx maps to correct global seed_id
+- `test_ts_delta_encoding` — varint deltas decode to correct timestamps
+- `test_status_flags_pack_unpack` — nibble-packed byte round-trips all values
+- `test_seal_deflate` — sealed bucket compresses, decompresses correctly
+- `test_seal_lzma` — LZMA tier works
+- `test_10k_records_size` — 10K records bucket ≤ design projection (~5 KB compressed)
+- `test_records_for_name_not_present` — returns empty list
+
+**File**: `tests/unit/ncdb/test_bucket_index.py`
+- `test_add_and_query_range` — date range filter
+- `test_buckets_for_name` — name_id range filter
+- `test_pass_rate_series` — fail_count/num_records computation
+- `test_serialize_deserialize_empty`
+- `test_serialize_deserialize_3650_entries` — 10-year index ≤ 90 KB
+
+**File**: `tests/unit/ncdb/test_contrib_index.py`
+- `test_passing_run_ids_pass_only` — POLICY_PASS_ONLY filter
+- `test_passing_run_ids_strict` — is_rerun + first_attempt filtering
+- `test_squash_watermark_update`
+- `test_remove_entries_after_squash`
+- `test_serialize_deserialize`
+
+**File**: `tests/unit/ncdb/test_squash_log.py`
+- `test_append_one_entry`
+- `test_append_multiple_entries` — all entries preserved
+- `test_serialize_deserialize`
+
+### Unit Tests — Testplan
+
+**File**: `tests/unit/ncdb/test_testplan.py`
+- `test_testpointForTest_exact`
+- `test_testpointForTest_seed_strip` — `uart_smoke_12345` → `uart_smoke`
+- `test_testpointForTest_wildcard` — `foo_*` matches `foo_bar`
+- `test_testpointsForStage`
+- `test_stages_ordered` — V1 < V2 < V2S < V3
+- `test_serialize_deserialize_roundtrip`
+- `test_load_save_standalone` — Mode B file write/read
+- `test_na_testpoint` — `na=True` serializes/deserializes correctly
+
+**File**: `tests/unit/ncdb/test_testplan_closure.py`
+- `test_compute_closure_all_closed`
+- `test_compute_closure_partial`
+- `test_compute_closure_not_run` — test not in DB
+- `test_compute_closure_na` — N/A testpoint → TPStatus.NA
+- `test_compute_closure_unimplemented` — empty tests list
+- `test_stage_gate_pass` — all V1+V2 testpoints closed
+- `test_stage_gate_fail` — one gap in V2
+- `test_stage_gate_requires_flake_score` — flake gate integration
+- `test_find_covergroup_scopes` — DFS finds matching covergroup scope
+
+**File**: `tests/unit/ncdb/test_testplan_hjson.py`
+- `test_import_simple_hjson` — basic parse
+- `test_import_wildcard_expansion` — `{name}{intf}` expands to list
+- `test_import_na_testpoint` — `tests: ["N/A"]` → `na=True, tests=[]`
+- `test_import_fallback_no_hjson_package` — works with stdlib json for valid JSON subset
+
+### Unit Tests — Merger Fix
+
+**File**: `tests/unit/ncdb/test_merger.py` (extend existing)
+- `test_merge_preserves_contrib_data` — merge two DBs with contrib/*.bin; output has both
+- `test_merge_run_id_renumbering` — no run_id collisions after merge
+- `test_merge_testplan_same` — both inputs have same testplan → copied to output
+- `test_merge_testplan_different_source_file` — warning emitted, no testplan in output
+- `test_merge_testplan_newer_timestamp_wins`
+
+### Integration Tests
+
+**File**: `tests/integration/test_history_workflow.py`
+- `test_write_and_query_7_days` — write 7 days × 100 tests; query last 7 days; check record count
+- `test_cold_start_load_200kb` — measure total I/O on open (registry + stats + contrib_index + bucket_index)
+- `test_add_test_run_updates_stats` — `add_test_run()` → `get_test_stats()` reflects update
+- `test_squash_operation` — end-to-end: write runs → squash → verify contrib files removed + squash_log entry
+- `test_backward_compat_v1_db` — open existing v1 CDB; all v1 reads still work; no v2 members written unless explicitly requested
+- `test_pass_only_merge_filter` — failing runs excluded from coverage after squash
+
+**File**: `tests/integration/test_testplan_workflow.py`
+- `test_embed_testplan_and_retrieve` — write CDB with testplan; reopen; `getTestplan()` returns correct data
+- `test_standalone_testplan_mode_b` — `Testplan.load()` + `compute_closure()` without opening a CDB
+- `test_regression_delta` — two CDB snapshots → `report_regression_delta()` returns newly-closed/failing
+- `test_closure_report_stage_gate` — end-to-end: import hjson → embed → compute_closure → stage_gate_status
+
+**File**: `tests/integration/test_ci_export.py`
+- `test_export_junit_xml` — valid JUnit XML produced; testpoint names appear as test cases
+- `test_export_github_annotations` — `::error::` lines produced for FAILING testpoints
+
+### Performance Test (manual / benchmark only, not in CI)
+
+**File**: `tests/integration/test_history_performance.py` (marked `@pytest.mark.slow`)
+- `bench_write_1m_records` — 1M test run records written; bucket files ≤ design projections
+- `bench_query_single_test_7_days` — query for one test over 7 days in < 100 ms
+- `bench_top_flaky_no_bucket_io` — `top_flaky_tests()` involves zero bucket file reads
+
+---
+
+## Documentation
+
+### Docstrings
+
+All new public functions and classes must have Google-style docstrings covering:
+- One-line summary
+- Args (with types)
+- Returns
+- Raises (if any)
+- Example snippet for non-obvious usage
+
+### `doc/source/working-with-coverage/test-history.rst` (new file)
+
+Added to the `working-with-coverage/index.rst` toctree.
+
+Section outline:
+1. **Overview** — why binary history, size comparison table
+2. **ZIP Members** — table of all new members with purpose
+3. **Reading and Writing History** — `add_test_run()`, `query_test_history()`, `get_test_stats()`
+4. **Squash Operation** — when to squash, policy options, what changes
+5. **Backward Compatibility** — v1/v2 flag, old files remain readable
+
+### `doc/source/working-with-coverage/testplan.rst` (new file)
+
+Added to the `working-with-coverage/index.rst` toctree.
+
+Section outline:
+1. **Overview** — testplan concepts, two storage modes (A/B)
+2. **Data Model** — `Testplan`, `Testpoint`, `CovergroupEntry` with field descriptions
+3. **Embedding a Testplan (Mode A)** — `setTestplan()`, `getTestplan()`, write/read cycle
+4. **Standalone Testplan (Mode B)** — `Testplan.load()`, `Testplan.save()`, when to use each mode
+5. **OpenTitan Hjson Import** — `import_hjson()` with wildcard substitution examples
+6. **Closure Computation** — `compute_closure()`, `stage_gate_status()`, `TPStatus` values
+7. **Coverage Per Testpoint** — `build_covergroup_index()`, Report C
+8. **Waiver Management** — `WaiverSet` API, `waivers.json` schema
+9. **CI/CD Export** — JUnit XML, GitHub Annotations, Summary Markdown
+10. **Usage Examples** — full worked example from hjson → closure → JUnit XML
+
+### `doc/source/reference/formats/ncdb-format.rst` (extend existing)
+
+Add a new section to the existing NCDB format reference covering the v2 binary history
+members: `test_registry.bin`, `test_stats.bin`, `history/NNNNNN.bin`, `history/bucket_index.bin`,
+`contrib_index.bin`, `squash_log.bin`. Each member gets a field table and encoding notes.
+No new file needed — this is an extension of the existing format reference.
+
+### `README.md` update
+
+Add a "Test History & Testplan" section (after the existing format descriptions) pointing to
+the published docs and listing key capabilities:
+- Binary history store for thousands of regressions
+- Per-test flake score, CUSUM change-point detection
+- Testplan embedding and closure computation
+- Stage gate readiness, confidence-weighted closure
+- CI/CD export (JUnit, GitHub Actions)
+
+---
+
+## File Inventory
+
+### New files
+
+| File | Phase | Notes |
+|------|-------|-------|
+| `src/ucis/ncdb/test_registry.py` | 1 | |
+| `src/ucis/ncdb/test_stats.py` | 1 | |
+| `src/ucis/ncdb/history_buckets.py` | 1 | |
+| `src/ucis/ncdb/bucket_index.py` | 1 | |
+| `src/ucis/ncdb/contrib_index.py` | 1 | |
+| `src/ucis/ncdb/squash_log.py` | 1 | |
+| `src/ucis/ncdb/testplan.py` | 2 | |
+| `src/ucis/ncdb/testplan_closure.py` | 2 | |
+| `src/ucis/ncdb/testplan_hjson.py` | 2 | |
+| `src/ucis/ncdb/waivers.py` | 2 | |
+| `src/ucis/ncdb/testplan_export.py` | 3 | |
+| `src/ucis/ncdb/reports.py` | 3 | |
+| `tests/unit/ncdb/test_test_registry.py` | 1 | |
+| `tests/unit/ncdb/test_test_stats.py` | 1 | |
+| `tests/unit/ncdb/test_history_buckets.py` | 1 | |
+| `tests/unit/ncdb/test_bucket_index.py` | 1 | |
+| `tests/unit/ncdb/test_contrib_index.py` | 1 | |
+| `tests/unit/ncdb/test_squash_log.py` | 1 | |
+| `tests/unit/ncdb/test_testplan.py` | 2 | |
+| `tests/unit/ncdb/test_testplan_closure.py` | 2 | |
+| `tests/unit/ncdb/test_testplan_hjson.py` | 2 | |
+| `tests/integration/test_history_workflow.py` | 1 | |
+| `tests/integration/test_testplan_workflow.py` | 2 | |
+| `tests/integration/test_ci_export.py` | 3 | |
+| `doc/source/working-with-coverage/test-history.rst` | 1 | Add to `working-with-coverage/index.rst` toctree |
+| `doc/source/working-with-coverage/testplan.rst` | 2 | Add to `working-with-coverage/index.rst` toctree |
+
+### Modified files
+
+| File | Phase | Change |
+|------|-------|--------|
+| `src/ucis/ncdb/constants.py` | 1 | New member constants, version bump, status constants |
+| `src/ucis/ncdb/manifest.py` | 1 | `history_format` field |
+| `src/ucis/ncdb/ncdb_ucis.py` | 1+2 | Binary history API, testplan API, waivers API |
+| `src/ucis/ncdb/ncdb_writer.py` | 1+2 | Write new members, compression tiering |
+| `src/ucis/ncdb/ncdb_reader.py` | 1+2 | Read new members, backward compat |
+| `src/ucis/ncdb/ncdb_merger.py` | 1+2 | Fix contrib loss bug, merge testplan, merge stats |
+| `tests/unit/ncdb/test_merger.py` | 1 | Extend with contrib + testplan merge tests |
+| `doc/source/reference/formats/ncdb-format.rst` | 1 | New section for v2 binary history members (field tables) |
+| `doc/source/working-with-coverage/index.rst` | 1+2 | Add `test-history` and `testplan` to toctree |
+| `README.md` | 2 | New section on history + testplan features |
+
+---
+
+## Implementation Order
+
+Within each phase, implement in dependency order:
+
+**Phase 1 sequence:**
+1. `constants.py` additions
+2. `varint.py` — verify existing varint sufficient (read/review)
+3. `test_registry.py` + unit tests
+4. `test_stats.py` + unit tests
+5. `history_buckets.py` + unit tests
+6. `bucket_index.py` + unit tests
+7. `contrib_index.py` + unit tests
+8. `squash_log.py` + unit tests
+9. `manifest.py` update
+10. `ncdb_ucis.py` Phase 1 additions
+11. `ncdb_writer.py` Phase 1 additions
+12. `ncdb_reader.py` Phase 1 additions
+13. `ncdb_merger.py` bug fix + Phase 1 additions
+14. Integration tests
+15. `doc/source/working-with-coverage/test-history.rst` + update `ncdb-format.rst` + update `working-with-coverage/index.rst`
+
+**Phase 2 sequence:**
+1. `testplan.py` + unit tests
+2. `testplan_hjson.py` + unit tests
+3. `testplan_closure.py` + unit tests
+4. `waivers.py`
+5. `ncdb_ucis.py` Phase 2 additions
+6. `ncdb_writer.py` / `ncdb_reader.py` / `ncdb_merger.py` Phase 2 additions
+7. Integration tests
+8. `doc/source/working-with-coverage/testplan.rst` + update `working-with-coverage/index.rst`
+
+**Phase 3 sequence:**
+1. P0 reports (A, B, C, D)
+2. P1 reports (E, F, G, I)
+3. `testplan_export.py` (JUnit, GitHub, markdown)
+4. CLI additions
+5. `README.md` update
+6. P2 reports (H, J, K, L, M) — as time allows
+
+---
+
+## Design Decisions (Resolved)
+
+| # | Question | Decision |
+|---|----------|----------|
+| 1 | NCDB_VERSION / v2 opt-in | **Auto-migrate**: calling `add_test_run()` automatically upgrades the manifest to `history_format = "v2"`. No explicit flag needed. Existing v1 databases remain fully readable. |
+| 2 | Bucket seal threshold | **Fixed at 10,000 records**. Not configurable for now; revisit if real-world workloads require tuning. |
+| 3 | LZMA dependency | **Graceful fallback**: attempt `ZIP_LZMA`; if `liblzma` is unavailable, silently use `ZIP_DEFLATE, level=9`. No error raised, no user action required. |
+| 4 | `hjson` package | **Hard dependency**: add `hjson` to `setup.py` install_requires and to `ivpm.yaml` in both `default` and `default-dev` groups. |
+| 5 | Report output format | **Both structured and text**: each report function returns a typed dataclass with a `to_json()` method; a companion `format_*()` function renders it to human-readable text. CLI calls the formatter; tests assert on the dataclass. |
+| 6 | Waiver expiry enforcement | **Caller's responsibility**: `WaiverSet.matches_scope()` checks scope/bin pattern only. Callers filter on `expires_at` as needed. |
+
diff --git a/README.md b/README.md
index dd3dca8..3fec05c 100644
--- a/README.md
+++ b/README.md
@@ -258,6 +258,77 @@ NcdbMerger().merge(["run1.cdb", "run2.cdb"], "merged.cdb")
 | `strings.bin` | Deduplicated string table |
 | `history.json` | Test/merge history nodes |
 | `sources.json` | Source file references |
+| `v2/test_registry.bin` | Per-test name and seed registry (v2 history) |
+| `v2/test_stats.bin` | Welford pass/fail/flake statistics per test (v2 history) |
+| `v2/bucket_index.bin` | Time-bucketed history index for fast date-range queries |
+| `v2/history/*.bin` | Compressed per-bucket test run records |
+| `testplan.json` | Embedded testplan with testpoints and stage assignments |
+| `waivers.json` | Glob-pattern coverage waivers with expiry and approver |
+
+### V2 Binary Test History
+
+NCDB `v2` history stores per-test run records in time-bucketed binary files,
+enabling queries over millions of runs without loading full data.  Key APIs:
+
+```python
+from ucis.ncdb.ncdb_ucis import NcdbUCIS
+
+db = NcdbUCIS("coverage.cdb")
+
+# Record a test run
+db.add_test_run("uart_smoke", seed=12345, status=0 /* pass */)
+
+# Query recent history for a test
+records = db.query_test_history("uart_smoke", ts_from=..., ts_to=...)
+
+# Get aggregate stats (flake score, CPU mean, CUSUM)
+stats = db.get_test_stats("uart_smoke")
+print(f"flake={stats.flake_score:.3f}")
+
+# Top flaky and failing tests
+print(db.top_flaky_tests(n=10))
+print(db.top_failing_tests(n=10))
+```
+
+CLI equivalents:
+
+```bash
+# Show recent history for a test
+pyucis history query coverage.cdb uart_smoke --from 2025-01-01
+
+# Show top 10 flaky tests
+pyucis history stats coverage.cdb --top-flaky 10
+```
+
+### Testplan Embedding
+
+```python
+from ucis.ncdb.testplan_hjson import import_hjson
+from ucis.ncdb.testplan_closure import compute_closure
+from ucis.ncdb.reports import report_testpoint_closure, format_testpoint_closure
+
+# Import an OpenTitan-style Hjson testplan
+plan = import_hjson("uart.hjson")
+db.setTestplan(plan)
+
+# Compute closure and format a report
+results = compute_closure(plan, db)
+summary = report_testpoint_closure(results)
+print(format_testpoint_closure(summary))
+```
+
+CLI equivalents:
+
+```bash
+# Embed a testplan
+pyucis testplan import coverage.cdb uart.hjson
+
+# Compute closure with V2 stage gate
+pyucis testplan closure coverage.cdb --stage V2
+
+# Export JUnit XML for CI dashboard
+pyucis testplan export-junit coverage.cdb --out closure_results.xml
+```
 
 ## Documentation
 
diff --git a/TEST_HISTORY_DESIGN.md b/TEST_HISTORY_DESIGN.md
new file mode 100644
index 0000000..c3a017f
--- /dev/null
+++ b/TEST_HISTORY_DESIGN.md
@@ -0,0 +1,2261 @@
+# NCDB Test History Design
+
+## Background and Motivation
+
+UCIS coverage databases are typically used as snapshots — one file per regression, periodically
+merged and squashed. But the underlying NCDB ZIP format can also serve as a long-term store for
+test pass/fail history over thousands of runs and millions of test executions, provided the
+representation is efficient enough.
+
+This document covers:
+1. Analysis of why `history.json` is unsuitable at scale
+2. A complete binary format design for efficient test history storage inside NCDB ZIP files
+3. Metrics that can be extracted from history data
+4. How coverage-per-test and merge policy interact with the design
+
+---
+
+## Part 1: Why `history.json` Does Not Scale
+
+### Current Format
+
+Each test run is stored as a JSON object with ~23 fields in a single monolithic array in
+`history.json`. Example compressed size: ~200–500 bytes per entry uncompressed.
+
+### Problems at Scale
+
+| Problem | Root Cause |
+|---|---|
+| ~200–500 bytes/entry uncompressed | 23 JSON field keys repeated every record |
+| Full array parse to read anything | No structure within the ZIP entry |
+| No time-based filtering | Single monolithic member |
+| Full ZIP rewrite to append any data | ZIP format limitation — mitigable but not eliminated |
+| No aggregate statistics | Must scan everything to find noisy tests |
+| Merge discards `contrib/*.bin` | Bug in `_merge_same_schema`: only copies strings/scope/counts/history/sources |
+
+### Size Comparison
+
+| Scenario | `history.json`-in-ZIP | Proposed binary buckets |
+|---|---|---|
+| 1K tests × 1K runs | ~75 MB | ~5 MB |
+| 1K tests × 1M runs | ~75 GB | ~5 GB |
+| Read to query 1 test over 7 days | Decompress all | 7 × ~5–10 KB |
+
+---
+
+## Part 2: New ZIP Members
+
+Four new ZIP entries are added alongside the existing members. Existing members (`history.json`,
+`contrib/*.bin`, `counts.bin`, etc.) are retained for backward compatibility and coverage data.
+
+```
+test_registry.bin          ← global: test name ↔ stable integer ID + seed registry
+test_stats.bin             ← global: per-test aggregate metrics (flake score, CUSUM, etc.)
+history/NNNNNN.bin         ← one ZIP entry per bounded bucket of test run records
+history/bucket_index.bin   ← index: maps bucket number → date range, record count, name range
+contrib_index.bin          ← index: run_id → status, for efficient pass-only merge
+squash_log.bin             ← audit trail: each squash operation recorded permanently
+```
+
+`history.json` continues to store MERGE nodes (small, infrequent). TEST nodes move to the
+binary bucket files.
+
+---
+
+## Part 3: Binary Format Specifications
+
+### 3.1 `test_registry.bin`
+
+Stores each unique test base name and seed exactly once. Assigned stable integer IDs that
+persist across ZIP rewrites and merges. Also holds the global `run_id` counter.
+
+```
+magic:              u32 = 0x54535452   # 'TSTR'
+version:            u8  = 1
+next_run_id:        u32                # monotonically increasing, never decreases
+num_names:          u32
+num_seeds:          u32
+
+# Fixed-size offset table (O(1) access by name_id):
+name_string_offsets: u32[num_names]   # byte offset into string heap
+seed_string_offsets: u32[num_seeds]   # byte offset into seed heap
+
+# String heaps (null-terminated UTF-8):
+name_heap:          bytes
+seed_heap:          bytes
+```
+
+- Names are stored sorted → binary search gives O(log N) name → name_id lookup
+- Seeds with integer values are stored as their decimal string representation
+- Seeds that are complex strings (e.g. tool-specific) stored verbatim
+- 1000 names × ~30 bytes avg = ~30 KB total (trivially small, load once at open)
+
+### 3.2 `history/NNNNNN.bin` — Bounded Bucket Files
+
+Buckets are bounded by record count (~10K records max), not strictly by date. The bucket
+sequence number is zero-padded 6 digits. This keeps individual buckets small and decompressible
+independently.
+
+#### Layout: Columnar, Not Row-Oriented
+
+Records are sorted by `(name_id, ts)` within each bucket. The name_id column is eliminated
+from per-record storage by using a name index (which doubles as perfect run-length encoding).
+
+```
+Header:
+  magic:          u32 = 0x48445942   # 'HDYB'
+  version:        u8  = 1
+  num_records:    u32
+  num_names:      u16                # unique name_ids in this bucket
+  ts_base:        u32                # unix timestamp of the first record
+
+Name index (sorted by name_id — enables O(log N) lookup, eliminates name_id column):
+  entries[num_names]:
+    name_id:      u32
+    start_row:    u32               # first record index for this name
+    count:        u16               # number of records for this name
+
+Seed dictionary (local to this bucket, enables 1-byte seed references):
+  num_seeds:      u16
+  seed_ids:       u32[num_seeds]    # global seed_id from test_registry.bin
+
+Columns (independent arrays, each compresses optimally under DEFLATE/LZMA):
+  seeds[]:        u8[num_records]   # index into seed dictionary (1 byte vs 4)
+  ts_deltas[]:    varint[num_records]  # seconds since ts_base, delta per name group
+  status_flags[]: u8[num_records]   # nibble-packed: high nibble=status, low nibble=flags
+```
+
+The `status_flags` byte packs two fields:
+```
+  bits [7:4]  status:  0=OK  1=FAIL  2=ERROR  3=FATAL  4=COMPILE  (3 bits used)
+  bits [3:0]  flags:   bit0=seed_is_hash  bit1=is_rerun
+                       bit2=has_coverage  bit3=was_squashed
+```
+
+CPU time is intentionally omitted from per-record storage — it is maintained as mean and
+variance in `test_stats.bin` via Welford's online algorithm, which is sufficient for all
+metrics described in Part 5.
+
+#### Per-Record Cost (Revised)
+
+| Field | Old row design | New columnar design |
+|---|---|---|
+| name_id | 4 bytes | **0** (implicit from name index) |
+| seed | 4 bytes | **1** (local dict index, u8) |
+| timestamp | 4 bytes | **~1.5** (varint delta) |
+| status + flags | 2 bytes | **1** (nibble-packed) |
+| cpu_time (f16) | 2 bytes | **0** (moved to test_stats.bin) |
+| padding | 1 byte | **0** |
+| **Total** | **~16 bytes** | **~3.5 bytes avg** |
+
+Before DEFLATE. Columnar layout with homogeneous columns achieves 5–8× DEFLATE compression
+on typical regression data (compared to ~3× for interleaved row layout). Effective storage:
+~0.5–0.7 bytes per test run record.
+
+#### Compression Tiers
+
+- **Current day's bucket** (may be rewritten): `ZIP_DEFLATE, compresslevel=1` — fast write
+- **Sealed buckets** (day has passed, immutable): `ZIP_LZMA` or `ZIP_DEFLATE, compresslevel=9`
+- **`test_stats.bin`, `test_registry.bin`** (read on every open): `ZIP_DEFLATE, compresslevel=1`
+
+### 3.3 `history/bucket_index.bin`
+
+Maps bucket sequence numbers to date ranges and provides aggregate counts for fast
+regression-trend queries without opening individual bucket files.
+
+```
+magic:        u32 = 0x42494458   # 'BIDX'
+version:      u8
+num_buckets:  u32
+
+entries[num_buckets]:              # sorted by bucket_seq
+  bucket_seq:   u32               # matches NNNNNN in filename
+  ts_start:     u32               # unix timestamp of first record
+  ts_end:       u32               # unix timestamp of last record
+  num_records:  u32
+  fail_count:   u32               # enables pass-rate-over-time without opening bucket
+  min_name_id:  u32               # range bounds for fast skip
+  max_name_id:  u32
+```
+
+24 bytes/entry. For 3650 days (10 years) at ~10K records/bucket:
+- ~3650 buckets × 24 bytes = **87 KB** for the complete 10-year index
+- The `fail_count` field enables regression pass-rate trend plots from the index alone
+
+### 3.4 `test_stats.bin`
+
+One fixed-size 64-byte record per unique test, indexed by `name_id`. Load entire file at open
+time (1000 tests × 64 bytes = 64 KB). Enables all aggregate queries without touching buckets.
+
+All fields maintained incrementally — O(1) update per new test run.
+
+```
+magic:            u32 = 0x54535441   # 'TSTA'
+version:          u8
+num_tests:        u32
+
+entries[num_tests]:         # indexed by name_id (O(1) access)
+  total_runs:        u32
+  pass_count:        u32
+  fail_count:        u32
+  error_count:       u32
+  first_ts:          u32    # unix timestamp of first ever run
+  last_ts:           u32    # unix timestamp of most recent run
+  last_green_ts:     u32    # unix timestamp of last passing run
+  transition_count:  u32    # consecutive status changes (for flake_score)
+  streak:            i16    # current streak: positive=passes, negative=fails
+  last_status:       u8     # status of most recent run
+  _pad:              u8
+  flake_score:       f32    # transition_count / max(total_runs-1, 1)  ∈ [0,1]
+  fail_rate:         f32    # fail_count / total_runs                  ∈ [0,1]
+  mean_cpu_time:     f32    # Welford online mean (seconds)
+  m2_cpu_time:       f32    # Welford M2 accumulator → stddev = sqrt(M2/N)
+  cusum_value:       f32    # running CUSUM statistic for change detection
+  cusum_ref_mean:    f32    # μ₀ used for CUSUM (set at baseline period)
+  grade_score:       f32    # composite effectiveness score [0,1]
+  total_seeds_seen:  u16    # unique seeds ever run for this test
+  _reserved:         u8[6]
+```
+
+Key derived values:
+- `stddev_cpu_time = sqrt(m2_cpu_time / total_runs)` — no bucket scan needed
+- `days_since_last_pass = (now - last_green_ts) / 86400`
+- `streak < -5` → definitively broken (not just flaky)
+- `abs(streak) < 3 AND flake_score > 0.3` → likely flaky
+
+### 3.5 `contrib_index.bin` — Pass-Only Merge Support
+
+This is the pivotal addition for coverage-per-test efficiency. Every test run that produced
+coverage data has an entry here. Status is cached so pass-only merge decisions require no
+bucket scanning.
+
+```
+magic:             u32 = 0x43494458   # 'CIDX'
+version:           u8
+merge_policy:      u8    # 0=all_tests  1=pass_only  2=exclude_error_and_rerun
+squash_watermark:  u32   # highest run_id already baked into counts.bin
+num_active:        u32   # contrib files present (not yet squashed)
+
+entries[num_active]:     # sorted by run_id
+  run_id:    u32
+  name_id:   u16         # cached for display without hitting bucket
+  status:    u8          # cached — avoids opening bucket for merge decision
+  flags:     u8          # bit0=is_rerun  bit1=first_attempt_passed
+```
+
+8 bytes/entry. Pass-only merge:
+```python
+passing = [e.run_id for e in contrib_index.entries if e.status == OK]
+counts = sum(load_contrib(f"contrib/{run_id}.bin") for run_id in passing)
+```
+
+### 3.6 `squash_log.bin` — Coverage Provenance Audit Trail
+
+Append-only log. Survives squash operations permanently. Answers "was my counts.bin built from
+passing tests only?" even years after the fact.
+
+```
+magic:   u32
+version: u8
+num_squashes: u32
+
+entries[num_squashes]:
+  ts:         u32   # unix timestamp of squash
+  policy:     u8    # 0=all  1=pass_only  2=exclude_error_and_rerun
+  _pad:       u8[3]
+  from_run:   u32   # first run_id included in squash
+  to_run:     u32   # new squash_watermark after this operation
+  num_runs:   u32   # total runs included
+  pass_runs:  u32   # passing runs included in counts.bin contribution
+```
+
+28 bytes/squash event.
+
+---
+
+## Part 4: Coverage-Per-Test Interaction
+
+### 4.1 Stable `run_id` Replaces Positional `history_idx`
+
+**Current bug**: `contrib/{history_idx}.bin` uses position in `history.json` as key. After a
+merge of two sources (each with `contrib/0.bin`, `contrib/1.bin`, ...), filenames collide and
+the merger silently drops all contrib data.
+
+**Fix**: each test run is assigned a globally unique `run_id` (u32) from the counter in
+`test_registry.bin` at write time. Contrib files become `contrib/{run_id}.bin`. The run_id is
+stable across ZIP rewrites, merges, and squash operations.
+
+### 4.2 Coverage Watermark Model
+
+At any point in time, total coverage is:
+
+```
+total_coverage = counts.bin                              (squashed base)
+              + Σ contrib/{run_id}.bin                   (active delta)
+                for run_id in contrib_index.entries
+                where merge_policy_filter(entry.status, entry.flags)
+```
+
+`squash_watermark` in `contrib_index.bin` defines the boundary. Run IDs ≤ watermark are baked
+into `counts.bin`; run IDs > watermark have their contrib files present.
+
+### 4.3 Merge Policy Options
+
+The `flags` byte in `contrib_index.bin` entries enables four distinct merge policies without
+re-scanning bucket files:
+
+| Policy | Filter |
+|---|---|
+| All tests | no filter |
+| Pass only (any attempt) | `status == OK` |
+| Pass on first attempt only | `status == OK AND NOT is_rerun` |
+| Strict (exclude flaky contributions) | `status == OK AND NOT (is_rerun AND first_attempt_passed)` |
+
+The last policy ("strict") excludes coverage from tests that only pass on retry — coverage
+that cannot be reliably reproduced and may indicate environmental flakiness rather than real
+design behavior.
+
+### 4.4 Fixed Same-Schema Fast Merge Path
+
+`NcdbMerger._merge_same_schema()` currently discards all `contrib/*.bin` data. It must be
+updated to:
+
+1. Assign run_id offsets to each source:
+   - `offset_B = max(run_id in source_A) + 1`
+   - `offset_C = max(run_id in source_B) + 1`, etc.
+
+2. Copy and rename contrib files: `contrib/{source_run_id}.bin` → `contrib/{source_run_id + offset}.bin`
+
+3. Merge `contrib_index.bin` entries from all sources (adjust run_ids by offset, re-sort)
+
+4. Append `squash_log.bin` entries from all sources (no run_id adjustment needed)
+
+5. Sum counts arrays for the merged `counts.bin`
+
+This changes the fast path from O(bins) to O(bins + total_contrib_data). For large merges,
+squash sources first (bake their contribs into counts.bin) before merging — which is the
+correct operational model for a coverage closure flow anyway.
+
+### 4.5 Squash Operation
+
+When squashing coverage:
+
+1. Read `contrib_index.bin` for active entries (run_ids > squash_watermark)
+2. Apply merge policy filter
+3. Sum selected `contrib/{run_id}.bin` files into `counts.bin`
+4. Delete the contrib files for squashed run_ids
+5. Update `squash_watermark` in `contrib_index.bin`
+6. Remove squashed entries from `contrib_index.bin`
+7. Append a record to `squash_log.bin`
+8. Mark `was_squashed=1` in the corresponding bucket record flags
+
+Test history bucket records are **never modified** during squash (only the `was_squashed` flag
+is set). Bucket files themselves are immutable once sealed.
+
+---
+
+## Part 5: Metrics Extractable from History Data
+
+### 5.1 Instantaneous Metrics (from `test_stats.bin` only — no bucket scan)
+
+All O(1) or O(N_tests) with a single file read:
+
+- **Flake score**: `transition_count / max(total_runs-1, 1)` ∈ [0,1]
+  - 0.0 = completely stable; 1.0 = alternates every single run
+  - Distinguishes noisy from broken (a broken test has `flake_score ≈ 0` despite `fail_rate ≈ 1`)
+- **Fail rate**: `fail_count / total_runs`
+- **Current streak**: `streak` field — negative = consecutive failures, positive = consecutive passes
+- **Days since last pass**: `(now - last_green_ts) / 86400`
+- **CPU time mean and stddev**: from Welford fields (no raw data needed)
+- **Silent death**: `last_ts` is stale despite test being in the suite
+- **Test re-introduction**: `first_ts` is recent for a known-old test name
+- **Top N flakiest tests**: sort by `flake_score DESC` — no bucket scan
+- **Top N consistently failing tests**: filter `fail_rate > threshold AND flake_score < 0.1`
+- **Composite test grade**: `(1 - fail_rate) × (1 - flake_score) × (1 / mean_cpu_time_normalized)`
+- **CPU time regression**: `mean_cpu_time` trending up week-over-week (compare saved baselines)
+
+### 5.2 Trend Metrics (from `bucket_index.bin` only — no bucket decompression)
+
+From the 24-byte per-bucket index entries:
+
+- **Regression pass rate over time**: `(num_records - fail_count) / num_records` per bucket
+- **Run volume per day**: `num_records` per bucket → detect farm capacity changes
+- **Failure spike detection**: buckets where `fail_count / num_records > threshold`
+
+### 5.3 Historical Detail Metrics (from bucket files — targeted reads)
+
+For a specific test X in a date range:
+1. Get `name_id` from `test_registry.bin`
+2. Use `bucket_index.bin` to find buckets where `min_name_id ≤ name_id ≤ max_name_id`
+3. For each candidate bucket, binary-search the name index → O(log N_unique_tests)
+4. Extract only the records for that test
+
+Metrics enabled:
+- **Pass/fail history timeline**: full status over time for one test
+- **Fail streak history**: detect multiple distinct failure episodes
+- **Seed-correlated failures**: group by `seed_id`, compute `fail_count / total` per seed
+  - Seeds with 100% failure rate = deterministic RTL bug masquerading as random failure
+- **Seed diversity**: entropy over seed→status distribution; low entropy = poor randomization
+- **Rerun effectiveness**: `P(pass | is_rerun AND prior_status == FAIL)` — infrastructure flakiness signal
+
+### 5.4 Cross-Test Pattern Metrics (from bucket files — multi-test scan)
+
+Reading a single bucket (one day or one regression):
+
+- **Killer seeds**: seeds where `count(failing_tests) > threshold` in one bucket
+  - `GROUP BY seed_id → set of failing name_ids → find recurring clusters`
+  - Indicates a systemic RTL issue (deadlock, resource contention at a specific init value)
+- **Failure co-occurrence**: `P(test_B fails | test_A fails in same bucket)`
+  - High co-occurrence → tests hit same RTL block → redundancy or common bug
+- **Cascade detection**: temporal causality — does failing test A precede failing test B?
+- **Redundant test candidates**: pairs with `correlation(status_A, status_B) > 0.95`
+  - Both always pass and fail together; one adds no value
+
+### 5.5 CUSUM Change-Point Detection
+
+The `cusum_value` and `cusum_ref_mean` in `test_stats.bin` implement an incremental CUSUM
+(Cumulative Sum) control chart for detecting when a test's pass/fail behavior changed. This
+is the algorithm used by Atlassian's "Flakinator" and Google's flaky-test detection systems.
+
+Update rule on each new run (O(1)):
+```python
+k = 0.5  # allowance parameter
+h = 4.0  # decision threshold (tune to desired sensitivity)
+x = 1.0 if status == FAIL else 0.0
+S = max(0, S + x - (cusum_ref_mean + k))
+if S > h:
+    # change point detected — record timestamp, reset S
+    S = 0.0
+```
+
+When a change point is detected, the timestamp is recorded so you can correlate with RTL
+commits: "test X started failing consistently on 2026-03-01."
+
+### 5.6 EDA-Specific: Seed Analytics
+
+Unique to hardware verification — no software CI tool provides this:
+
+- **Valuable seed ranking**: seeds that historically expose the most failures first
+  - Re-run high-value seeds more frequently; Springer "Seed Selector" paper shows 42%+ speedup
+- **Seed fatigue**: `fail_count_per_seed` approaching zero with recency weighting
+  - Seeds that never fail anymore are candidates for replacement
+- **Seed coverage diversity**: entropy of the seed→status distribution per test
+  - Low entropy = seeds are not actually exploring different design states
+
+---
+
+## Part 6: Read/Write Strategy Summary
+
+### Opening the Database (cold start)
+
+1. Read `test_registry.bin` (~30 KB) → in-memory name↔id dict
+2. Read `test_stats.bin` (~64 KB for 1000 tests) → all aggregate metrics immediately available
+3. Read `contrib_index.bin` → pass-only merge table available
+4. Read `history/bucket_index.bin` (~90 KB for 10 years) → full time index available
+
+Total cold-start I/O: ~200 KB. All aggregate queries answerable immediately.
+
+### Writing a New Test Run
+
+1. Assign `run_id` (increment counter in `test_registry.bin`)
+2. Look up or assign `name_id` and `seed_id`
+3. Append record to current day's bucket (in memory; written at close)
+4. Update `test_stats.bin` entry: O(1) Welford + CUSUM update
+5. If coverage: add entry to `contrib_index.bin`
+
+### Writing at Close (full ZIP rewrite)
+
+1. Copy all sealed bucket files verbatim (read compressed bytes, write without re-compression)
+2. Write current day's bucket (new or updated)
+3. Write updated `bucket_index.bin`, `test_registry.bin`, `test_stats.bin`, `contrib_index.bin`
+4. Write `squash_log.bin` (unchanged if no squash happened)
+5. Write all existing `contrib/*.bin` files (only active ones — not squashed)
+
+### Query: "All runs of test X, last 7 days"
+
+1. Get `name_id` for X (from in-memory registry)
+2. Scan `bucket_index.bin` for buckets where `ts_start ≥ 7_days_ago AND min_name_id ≤ name_id ≤ max_name_id`
+3. For each candidate bucket (~7): decompress, binary-search name index, extract records
+4. Total I/O: ~7 × 5–10 KB = **35–70 KB** regardless of total history size
+
+### Query: "Top 20 flakiest tests"
+
+1. Scan `test_stats.bin` (already loaded)
+2. Sort by `flake_score DESC`, take top 20
+3. Map `name_id → name` via registry
+4. **Zero bucket I/O**
+
+---
+
+## Part 7: Files to Create/Modify for Implementation
+
+### New Files
+
+| File | Purpose |
+|---|---|
+| `src/ucis/ncdb/test_registry.py` | Serialize/deserialize `test_registry.bin`; assign run_ids, name_ids, seed_ids |
+| `src/ucis/ncdb/test_stats.py` | Serialize/deserialize `test_stats.bin`; Welford + CUSUM incremental update |
+| `src/ucis/ncdb/history_buckets.py` | Write/read columnar bucket files; name index; seed dict |
+| `src/ucis/ncdb/bucket_index.py` | Write/read `history/bucket_index.bin` |
+| `src/ucis/ncdb/contrib_index.py` | Write/read `contrib_index.bin`; pass-only filter enumeration |
+| `src/ucis/ncdb/squash_log.py` | Write/read `squash_log.bin`; squash operation implementation |
+
+### Modified Files
+
+| File | Change |
+|---|---|
+| `src/ucis/ncdb/constants.py` | Add new member name constants; bump `NCDB_VERSION` to `"2.0"` |
+| `src/ucis/ncdb/ncdb_writer.py` | Write new members; compression tiering; sealed-bucket copy optimization |
+| `src/ucis/ncdb/ncdb_reader.py` | Read new members; fall back to `history.json` for MERGE nodes |
+| `src/ucis/ncdb/ncdb_merger.py` | Fix contrib data loss bug; run_id renumbering; contrib_index merge |
+| `src/ucis/ncdb/manifest.py` | Add `history_format` field to distinguish v1 (JSON) from v2 (binary) |
+
+### Backward Compatibility
+
+- `manifest.json` gains a `history_format` field: `"v1"` (JSON only) or `"v2"` (binary + JSON for MERGE nodes)
+- Reader checks `history_format` and falls back to `history.json` for old files
+- Old files without binary bucket members are fully readable; new features simply unavailable
+- `history.json` continues to be written for MERGE nodes in all versions
+
+---
+
+## Part 8: Size Projections
+
+At 1K tests × 10 runs/test/day = 10K records/day:
+
+| Component | Size/day (raw) | Compressed (LZMA) | 10-year total |
+|---|---|---|---|
+| Bucket files | ~35 KB | ~5 KB | ~18 MB |
+| bucket_index.bin | 24 bytes/bucket | — | ~90 KB |
+| test_stats.bin | 64 KB (static) | ~20 KB | 20 KB |
+| test_registry.bin | ~30 KB (static) | ~10 KB | 10 KB |
+| contrib_index.bin | ~8 bytes/run | — | ~3 MB (for 400K active) |
+| squash_log.bin | 28 bytes/squash | — | ~100 KB (1K squashes) |
+| **Total** | | | **~21 MB** |
+
+Compare to `history.json`: **~75 GB** for the same data. Approximately **3500× more
+space-efficient**.
+
+---
+
+## Part 9: Testplan Integration — Mapping Issues and End-of-Regression Reports
+
+### 9.1 OpenTitan Testplan Format Summary
+
+The OpenTitan `testplanner` tool uses Hjson files with two top-level collections:
+
+- **`testpoints`**: each has `name`, `stage` (V1/V2/V2S/V3), `desc`, `tests` (list of written
+  test names), and optional `tags`.
+- **`covergroups`**: each has `name` and `desc`, declaring the functional coverage groups
+  expected to be exercised.
+
+Testplans support `import_testplans` for shared plans with wildcard substitution (e.g.
+`{name}_csr_hw_reset` expands per DUT). Setting `tests: ["N/A"]` marks a testpoint as
+intentionally not mapped to simulation results.
+
+A more detailed analysis is in `TESTPLAN_ANALYSIS_REPORT.md`.
+
+---
+
+### 9.2 Mapping Issues: OpenTitan Testplan Format → UCIS
+
+#### Issue 1: No Native Testplan/Testpoint Hierarchy in UCIS
+
+UCIS defines exactly two history node types (`UCIS_HISTORYNODE_TEST` and
+`UCIS_HISTORYNODE_MERGE`). There is no `UCIS_HISTORYNODE_TESTPLAN` or testpoint scope type.
+The UCIS LRM glossary references a "verification plan hierarchy" but this concept is not
+realized in the standard API — it amounts to using UCIS tags to link coverage scopes back to
+an external plan. The testplan must therefore be stored **outside** UCIS (as an Hjson/JSON
+sidecar or in a dedicated ZIP member) and joined to UCIS data at query time.
+
+#### Issue 2: Verification Stage Has No UCIS Equivalent
+
+OpenTitan's `stage` field (`V1`/`V2`/`V2S`/`V3`) encodes the verification lifecycle milestone
+a testpoint targets. UCIS has no such concept. `UCIS_INT_TEST_COMPULSORY` is the closest
+analog — a boolean "must run" flag on individual test records — but it does not convey staged
+milestone semantics and applies to tests, not testpoints.
+
+Stage data must be stored in the testplan database (ZIP member or sidecar) and treated as an
+external grouping key when producing reports.
+
+#### Issue 3: Test-Name Matching is Implicit and Fragile
+
+The binding between a testplan `tests` list entry (e.g. `"uart_smoke"`) and a UCIS history
+node is by string match: the testplan test name must equal the logical name of the
+`UCIS_HISTORYNODE_TEST` node (`UCIS_STR_TEST_NAME`). This convention is not enforced by UCIS.
+
+Failure modes:
+- The UCIS test name includes a seed suffix (`uart_smoke_12345`) while the testplan uses the
+  bare name.
+- Tool-specific prefixes or path components are added to the UCIS logical name.
+- After squash, individual test history nodes may be absent from a merged database; only
+  aggregate coverage remains.
+
+**Recommended approach**: normalize test names by stripping known suffixes (seed, run index,
+timestamp) at UCIS write time, or store the canonical testplan name as a user-defined
+attribute `testplan:name` on the history node.
+
+#### Issue 4: M:N Testpoint-to-Test Mapping
+
+A single testpoint can map to multiple written tests; conversely, one written test can satisfy
+multiple testpoints (the testplanner tool does not enforce 1:1 mapping). UCIS history nodes
+are flat — there is no grouping structure to express "this test runs for this testpoint."
+
+This mapping must be maintained in the testplan database and resolved at report time, not
+inside UCIS.
+
+#### Issue 5: Tag Semantics Mismatch
+
+UCIS tags are plain strings with no associated value, intended as a grouping construct (e.g.
+linking coverage scopes to plan items via a shared tag name). OpenTitan testplan tags carry
+richer meaning: platform (`verilator`, `fpga_cw310`), mode (`gls`, `pa`, `rom`), or lifecycle
+(`vector`). These are filter dimensions used to select which testpoints appear in a run.
+
+UCIS tags cannot represent this; they would need to be stored as user-defined attributes on
+the history nodes (`testplan:tag:gls = true`) or kept entirely in the testplan sidecar.
+
+#### Issue 6: `tests: ["N/A"]` Has No UCIS Equivalent
+
+A testpoint with `tests: ["N/A"]` is defined in the plan but intentionally has no simulation
+coverage. UCIS has no concept of a "planned but unverifiable" entry. The testplan layer must
+track `N/A` testpoints and exclude them from closure calculations, not from UCIS.
+
+#### Issue 7: Covergroup Name Correlation Relies on Naming Convention
+
+Testplan `covergroups` entries list the functional coverage groups expected to be exercised
+by testpoints. In UCIS, covergroups are scope nodes in the design hierarchy. Matching a
+testplan covergroup name (`timer_cg`) to a UCIS scope requires an agreed naming convention
+(the UCIS scope name equals the SV covergroup name suffixed with `_cg`). No cross-file
+uniqueness guarantee exists when designs are large or when multiple DUTs share covergroup
+names.
+
+#### Issue 8: Wildcard Expansion is Ephemeral
+
+Imported testplans use substitution wildcards (e.g. `{name}{intf}_csr_hw_reset`). After
+parsing, expanded test names exist only in memory; the unexpanded template is what's stored
+in the Hjson file. UCIS has no awareness of this expansion. Any database that stores testplan
+data must store the **post-expansion** test name list alongside the source template, so that
+a query tool can reconstruct both the human-readable template and the runnable test names.
+
+#### Issue 9: Merged Database Loses Individual Test Records
+
+After a UCIS merge + squash, individual `UCIS_HISTORYNODE_TEST` nodes for squashed tests may
+be absent from the merged database — only aggregate `counts.bin` coverage survives. This
+means testpoint-level pass/fail status cannot be reconstructed from the merged UCIS database
+alone. This is precisely why the binary history store (Parts 2–6 of this document) is
+needed: it preserves per-test status even after coverage squash.
+
+---
+
+### 9.3 Testplan Storage Modes
+
+Two storage modes are supported. They use the same JSON schema and the same
+`Testplan` data model; the choice affects only where the file lives.
+
+#### Mode A — Embedded (testplan stored inside the NCDB ZIP)
+
+Add a `testplan.json` member to the NCDB ZIP file:
+
+```
+testplan.json   ← testplan snapshot stored with the database at import time
+```
+
+Best when the testplan is stable for the duration of the regression and should
+travel with the coverage database (the most common case).
+
+#### Mode B — Standalone (testplan kept as a separate file)
+
+The testplan is maintained as a standalone `testplan.json` file on disk (or in a
+source-control tree) and is **not** embedded in the NCDB. At analysis time the
+user points tooling at both files:
+
+```
+uart_testplan.json   ← standalone testplan snapshot
+regression.cdb       ← NCDB with no embedded testplan
+```
+
+This mode is preferred when:
+- The testplan file is version-controlled separately from the regression database
+  (e.g. testplan lives in the RTL repo; CDB is produced by CI and stored in
+  artifact storage).
+- You want to perform ad-hoc cross-analysis between an existing NCDB and a
+  testplan that was never embedded (e.g. retro-fitting plan coverage onto legacy
+  databases).
+- Different testplan revisions must be compared against the same NCDB without
+  re-generating the database.
+
+`compute_closure()`, `stage_gate_status()`, and all report generators accept a
+`Testplan` object regardless of whether it was loaded from an embedded ZIP member
+or a standalone file — the API is identical in both modes.
+
+---
+
+Contents of `testplan.json` (stored post-expansion, with all imports resolved):
+
+```json
+{
+  "format_version": 1,
+  "source_file": "hw/ip/uart/data/uart_testplan.hjson",
+  "import_timestamp": "2026-03-05T19:00:00Z",
+  "testpoints": [
+    {
+      "name": "smoke",
+      "stage": "V1",
+      "desc": "Basic smoke test ...",
+      "tests": ["uart_smoke"],
+      "tags": [],
+      "na": false
+    },
+    {
+      "name": "csr",
+      "stage": "V1",
+      "desc": "CSR tests ...",
+      "tests": ["uart_csr_hw_reset", "uart_jtag_csr_hw_reset"],
+      "tags": ["csr"],
+      "na": false,
+      "source_template": "{name}{intf}_csr_hw_reset"
+    }
+  ],
+  "covergroups": [
+    { "name": "timer_cg", "desc": "Cover timer inputs ..." }
+  ]
+}
+```
+
+This snapshot approach means:
+- The testplan in force when data was collected is always available alongside the data.
+- Plan evolution (adding/removing testpoints, stage changes) is tracked naturally as part of
+  the database's commit history.
+- The post-expansion `tests` list is the authoritative source for UCIS test-name matching.
+
+In Mode B (standalone), the same JSON schema is used; the file simply lives outside the ZIP.
+Use `Testplan.load(path)` to read it and pass the resulting object directly to
+`compute_closure()` alongside any NCDB opened with `NcdbUCIS`.
+
+---
+
+### 9.4 End-of-Regression Reports
+
+Given the combination of:
+- **Testplan** (`testplan.json`): testpoints, stages, test lists, covergroups
+- **Latest regression UCIS** (current NCDB): per-test pass/fail, per-covergroup coverage
+- **Historical UCIS** (merged NCDB with binary history): trends, flake scores, stage
+  progression over time
+
+the following report types offer high value:
+
+#### Report A: Testpoint Closure Summary (per-regression)
+
+For each testpoint, derive a status from the union of its mapped tests' UCIS results:
+
+| Status | Condition |
+|---|---|
+| `CLOSED` | All mapped tests passed in this regression |
+| `PARTIAL` | At least one mapped test passed, at least one failed |
+| `FAILING` | All mapped tests ran and failed |
+| `NOT RUN` | No mapped test appears in this regression's history nodes |
+| `N/A` | Testpoint is marked `na: true` |
+| `UNIMPLEMENTED` | `tests` list is empty |
+
+Roll up by `stage` to show:
+
+```
+Stage V1: 12/12 closed  (100%)
+Stage V2: 17/24 closed   (71%)  ← 5 FAILING, 2 NOT RUN
+Stage V2S: 3/6  closed   (50%)
+Stage V3:  0/4  closed    (0%)  ← all UNIMPLEMENTED
+```
+
+#### Report B: Stage Gate Readiness
+
+Gate condition: all testpoints at stage S and all stages below S must be CLOSED before a
+milestone sign-off. Report signals go/no-go per stage:
+
+```
+V1 GATE: ✅ PASS (12/12 closed)
+V2 GATE: ❌ FAIL — 7 gaps remaining
+V2S GATE: ❌ FAIL — requires V2 first
+V3 GATE: ❌ FAIL — requires V2S first
+```
+
+Optionally, show the "critical path" — the failing testpoints that block the earliest gate.
+
+#### Report C: Coverage Closure per Testpoint
+
+For testpoints that declare associated covergroups, report UCIS coverage percentages:
+
+| Testpoint | Stage | Coverage Groups | Coverage % | Status |
+|---|---|---|---|---|
+| `smoke` | V1 | `timer_cg` | 100% | CLOSED |
+| `modes` | V2 | `modes_cg`, `key_cg` | 73% / 45% | GAP |
+| `errors` | V2 | `err_cg` | 0% | NOT STARTED |
+
+This links the test-pass view (Report A) with coverage closure: a test can pass without
+achieving functional coverage goals if the covergroup is under-constrained.
+
+#### Report D: Regression Delta (Latest vs Previous)
+
+Compare testpoint status between the current regression and the immediately prior regression
+(or any named baseline stored in the history buckets):
+
+- **Newly CLOSED**: testpoints that passed this run but failed last run → progress
+- **Newly FAILING**: testpoints that passed before but fail now → regressions requiring triage
+- **Coverage delta**: covergroups that crossed a goal threshold (e.g. 90% → 100%) or regressed
+
+This is the primary report for the engineer reviewing a nightly regression: focus on what
+changed, not the static state.
+
+#### Report E: Historical Stage Progression
+
+Using the merged historical NCDB and the versioned testplan:
+
+- Plot testpoint closure rate over time for each stage (V1, V2, V2S, V3)
+- Mark the date when each stage gate was first fully closed
+- Identify periods of regression (closure rate dropped)
+
+```
+V1 Closure over time:
+100% ┤                               ╭──────────────── V1 milestone (2026-01-15)
+ 80% ┤                    ╭──────────╯
+ 60% ┤         ╭──────────╯
+ 40% ┤╭────────╯
+  0% ┼──────────────────────────────────────────────────────►
+     Jan 1          Jan 8          Jan 15         Jan 22
+```
+
+#### Report F: Testpoint Reliability (History-Augmented)
+
+For testpoints whose tests have poor historical reliability (high `flake_score` or low
+`grade_score` from `test_stats.bin`), flag that closure claims are less trustworthy:
+
+| Testpoint | Stage | Tests | Flake Score | Closure Confidence |
+|---|---|---|---|---|
+| `smoke` | V1 | `uart_smoke` | 0.02 | HIGH |
+| `timeout` | V2 | `uart_timeout` | 0.41 | LOW ⚠️ |
+
+A testpoint is considered "confidently closed" only when its tests consistently pass across
+multiple seeds and runs (low flake, long green streak). A single passing run with `flake_score
+> 0.3` should not be counted as closure.
+
+#### Report G: Unexercised Coverage Groups
+
+From the testplan `covergroups` list, identify:
+
+- Covergroups with zero UCIS hits in the latest regression: unreached design states
+- Covergroups with hits below the SV `at_least` goal: partially covered
+- Covergroups not present in the UCIS database at all: testbench may not yet implement them
+
+#### Report H: Test Budget by Stage
+
+From CPU time data in `test_stats.bin` (mean CPU time × runs):
+
+| Stage | Testpoints | Tests | Est. CPU Hours | % of Total Budget |
+|---|---|---|---|---|
+| V1 | 12 | 8 | 14 h | 12% |
+| V2 | 24 | 31 | 87 h | 73% |
+| V2S | 6 | 9 | 14 h | 12% |
+| V3 | 4 | 0 | — | — |
+
+This identifies which verification stages dominate simulation cost and allows informed
+decisions about regression time allocation or test pruning.
+
+---
+
+### 9.5 Implementation Notes
+
+- **Testplan data is stored in the NCDB ZIP** as `testplan.json`. It is read-only after
+  import; a new import (with updated testplan) creates a new snapshot. The previous snapshot
+  is retained for delta comparison.
+
+- **The UCIS join key** is `test_name` (testplan `tests` list entry) ↔ UCIS logical history
+  node name (`UCIS_STR_TEST_NAME`). If exact matching fails, a fallback stripping seed
+  suffixes (`_\d+$`) is applied. Failures to match are reported as `NOT RUN`.
+
+- **Stage data is not in UCIS**: stage-gated reports are produced by the report layer joining
+  `testplan.json` data with UCIS query results. No UCIS schema changes are required.
+
+- **Covergroup matching** uses the testplan covergroup `name` field matched against UCIS
+  scope `name` within the DUT instance hierarchy. Ambiguous matches (same name in multiple
+  instances) are resolved by DUT-level scope path if available.
+
+- **Historical data sourcing**: Report E (Stage Progression) requires the merged NCDB with
+  bucket history. The report framework should detect whether the binary history store (Part 2)
+  is present and fall back to `history.json` for databases that have not been upgraded.
+
+- **Standalone testplan mode**: `compute_closure()` and all report generators accept a
+  `Testplan` loaded from a standalone file (`Testplan.load(path)`) in exactly the same way
+  they accept one retrieved from an embedded ZIP member (`db.getTestplan()`). No API
+  difference exists between the two modes. This enables cross-analysis workflows such as
+  applying a new testplan revision against an already-built NCDB, or retroactively mapping
+  a testplan against legacy databases that pre-date testplan embedding.
+
+---
+
+## Part 10: PyUCIS-Native Testplan Embedding in NCDB
+
+### 10.1 Design Principles
+
+The embedding follows the established NCDB member pattern: a single new ZIP member
+(`testplan.json`) with a dedicated `TestplanReader`/`TestplanWriter` pair, lazily loaded
+through `NcdbUCIS`, and written through `NcdbWriter`. No changes to the UCIS standard
+interface (`ucis.py`) are needed — all testplan API is an NCDB extension.
+
+Two usage modes are explicitly supported:
+
+**Mode A — Embedded**: the testplan is stored as `testplan.json` inside the NCDB ZIP and
+retrieved via `db.getTestplan()`. The plan travels with the database.
+
+**Mode B — Standalone**: the testplan is kept as a separate `testplan.json` file and loaded
+directly with `Testplan.load(path)`. Analysis functions (`compute_closure()`,
+`stage_gate_status()`, report generators) accept a `Testplan` object and a UCIS database
+object as independent arguments, so both modes use identical downstream code.
+
+Design constraints:
+- The UCIS API (`db.historyNodes()`, `db.scopes()`, etc.) is unchanged.
+- Testplan data does not pollute history nodes or the scope tree.
+- Opening a database without a testplan has zero overhead.
+- All testplan operations are O(1) after cold-start load (~1 ms).
+- The ZIP member is omitted if no testplan was ever set (sparse, like `toggle.bin`).
+
+---
+
+### 10.2 Python Data Model
+
+New file: **`src/ucis/ncdb/testplan.py`**
+
+```python
+from __future__ import annotations
+import json, re
+from dataclasses import dataclass, field
+from datetime import datetime, timezone
+from typing import Optional
+
+
+@dataclass
+class CovergroupEntry:
+    name: str           # covergroup name; must match SV covergroup scope name
+    desc: str = ""
+
+
+@dataclass
+class Testpoint:
+    name: str                       # testpoint identifier (snake_case)
+    stage: str                      # "V1" | "V2" | "V2S" | "V3" | custom
+    desc: str = ""
+    tests: list[str] = field(default_factory=list)   # post-expansion test names
+    tags:  list[str] = field(default_factory=list)
+    na:    bool = False              # tests: ["N/A"] — intentionally unmapped
+    source_template: str = ""        # original wildcard template before expansion
+
+
+@dataclass
+class Testplan:
+    format_version:   int = 1
+    source_file:      str = ""       # path to source .hjson (informational)
+    import_timestamp: str = ""       # ISO-8601 UTC when embedded in the CDB
+
+    testpoints:  list[Testpoint]      = field(default_factory=list)
+    covergroups: list[CovergroupEntry] = field(default_factory=list)
+
+    # ── In-memory indices (built lazily by _build_indices()) ──────────────
+
+    _tp_by_name:   dict = field(default_factory=dict, repr=False, compare=False)
+    _tp_by_test:   dict = field(default_factory=dict, repr=False, compare=False)
+    _tp_by_cg:     dict = field(default_factory=dict, repr=False, compare=False)
+    _indexed:      bool = field(default=False,        repr=False, compare=False)
+
+    # ── Index building ────────────────────────────────────────────────────
+
+    def _build_indices(self) -> None:
+        """Build O(1) lookup tables from the testpoints list. Called lazily."""
+        self._tp_by_name.clear()
+        self._tp_by_test.clear()
+        self._tp_by_cg.clear()
+        for tp in self.testpoints:
+            self._tp_by_name[tp.name] = tp
+            for t in tp.tests:
+                self._tp_by_test[t] = tp
+        for cg in self.covergroups:
+            # Map the covergroup back to every testpoint that owns it
+            # (Covergroups are listed per-testplan, not per-testpoint in OpenTitan format,
+            #  but the testpoints may reference them by naming convention.)
+            pass
+        self._indexed = True
+
+    def _ensure_indexed(self) -> None:
+        if not self._indexed:
+            self._build_indices()
+
+    # ── Public query API ──────────────────────────────────────────────────
+
+    def getTestpoint(self, name: str) -> Optional[Testpoint]:
+        """Return the testpoint with this name, or None."""
+        self._ensure_indexed()
+        return self._tp_by_name.get(name)
+
+    def testpointForTest(self, test_name: str) -> Optional[Testpoint]:
+        """Return the testpoint that owns *test_name*.
+
+        Match order:
+        1. Exact match: test_name in testpoint.tests
+        2. Seed-suffix strip: strip trailing ``_\\d+`` and retry
+        3. Wildcard: testpoint.tests entry ending ``_*`` prefix-matches test_name
+        """
+        self._ensure_indexed()
+        tp = self._tp_by_test.get(test_name)
+        if tp is not None:
+            return tp
+        # Strategy 2: strip seed suffix  (e.g. "uart_smoke_12345" → "uart_smoke")
+        stripped = re.sub(r'_\d+$', '', test_name)
+        if stripped != test_name:
+            tp = self._tp_by_test.get(stripped)
+            if tp is not None:
+                return tp
+        # Strategy 3: wildcard entries ("foo_*" matches "foo_bar")
+        for pattern, tp in self._tp_by_test.items():
+            if pattern.endswith('_*') and test_name.startswith(pattern[:-1]):
+                return tp
+        return None
+
+    def testpointsForStage(self, stage: str) -> list[Testpoint]:
+        """Return all testpoints targeting *stage* (e.g. "V2")."""
+        return [tp for tp in self.testpoints if tp.stage == stage]
+
+    def stages(self) -> list[str]:
+        """Return the ordered unique stages present in the testplan."""
+        _ORDER = {"V1": 0, "V2": 1, "V2S": 2, "V3": 3}
+        seen = dict.fromkeys(tp.stage for tp in self.testpoints)
+        return sorted(seen, key=lambda s: _ORDER.get(s, 99))
+
+    # ── Serialization ─────────────────────────────────────────────────────
+
+    def to_dict(self) -> dict:
+        return {
+            "format_version":   self.format_version,
+            "source_file":      self.source_file,
+            "import_timestamp": self.import_timestamp,
+            "testpoints": [
+                {
+                    "name":            tp.name,
+                    "stage":           tp.stage,
+                    "desc":            tp.desc,
+                    "tests":           tp.tests,
+                    "tags":            tp.tags,
+                    "na":              tp.na,
+                    "source_template": tp.source_template,
+                }
+                for tp in self.testpoints
+            ],
+            "covergroups": [
+                {"name": cg.name, "desc": cg.desc}
+                for cg in self.covergroups
+            ],
+        }
+
+    def serialize(self) -> bytes:
+        return json.dumps(self.to_dict(), separators=(',', ':')).encode()
+
+    @classmethod
+    def from_dict(cls, d: dict) -> "Testplan":
+        tp = cls(
+            format_version=d.get("format_version", 1),
+            source_file=d.get("source_file", ""),
+            import_timestamp=d.get("import_timestamp", ""),
+        )
+        for rec in d.get("testpoints", []):
+            tp.testpoints.append(Testpoint(
+                name=rec["name"],
+                stage=rec.get("stage", ""),
+                desc=rec.get("desc", ""),
+                tests=rec.get("tests", []),
+                tags=rec.get("tags", []),
+                na=rec.get("na", False),
+                source_template=rec.get("source_template", ""),
+            ))
+        for rec in d.get("covergroups", []):
+            tp.covergroups.append(CovergroupEntry(
+                name=rec["name"], desc=rec.get("desc", "")
+            ))
+        return tp
+
+    @classmethod
+    def from_bytes(cls, data: bytes) -> "Testplan":
+        return cls.from_dict(json.loads(data.decode()))
+
+    @classmethod
+    def load(cls, path: str) -> "Testplan":
+        """Load a testplan from a standalone JSON file (Mode B)."""
+        with open(path, "rb") as f:
+            return cls.from_bytes(f.read())
+
+    def save(self, path: str) -> None:
+        """Write this testplan to a standalone JSON file (Mode B)."""
+        with open(path, "wb") as f:
+            f.write(self.serialize())
+```
+
+---
+
+### 10.3 ZIP Member: `testplan.json`
+
+A single new optional ZIP member added to the NCDB ZIP archive:
+
+```
+testplan.json   ← Testplan serialized as compact JSON (separators=(',',':'))
+                  Compression: ZIP_DEFLATE (same as other JSON members)
+                  Omitted entirely if no testplan has been set on the database
+```
+
+**Size estimate**: 500 testpoints × ~250 bytes/testpoint uncompressed ≈ 125 KB raw, ~20 KB
+compressed. Negligible relative to `scope_tree.bin` or `counts.bin`.
+
+The member stores the testplan **snapshot at the time it was imported into the CDB**: all
+`import_testplans` references resolved, all wildcards expanded to final test names,
+`tests: ["N/A"]` represented as `na: true` with the tests list empty.
+
+---
+
+### 10.4 NcdbUCIS Extension
+
+The `NcdbUCIS` class gains a testplan lazy-load unit alongside the existing `history` and
+`scopes` units:
+
+```python
+# In NcdbUCIS.__init__():
+self._loaded_testplan: bool = False
+self._testplan: Optional["Testplan"] = None  # None = "not present in file"
+self._testplan_dirty: bool = False           # True if setTestplan() was called
+
+# New public methods:
+
+def getTestplan(self) -> Optional["Testplan"]:
+    """Return the embedded Testplan, or None if none is stored."""
+    self._ensure_testplan()
+    return self._testplan
+
+def setTestplan(self, tp: "Testplan") -> None:
+    """Embed *tp* in this database.  Written on the next write() call."""
+    from .testplan import Testplan
+    if tp.import_timestamp == "":
+        from datetime import datetime, timezone
+        tp.import_timestamp = datetime.now(timezone.utc).isoformat()
+    self._testplan = tp
+    self._testplan_dirty = True
+    self._loaded_testplan = True
+
+# New internal method:
+
+def _ensure_testplan(self) -> None:
+    if self._loaded_testplan:
+        return
+    self._loaded_testplan = True
+    self._read_zip()                        # populates _zf_cache if empty
+    raw = self._zf_cache.get(MEMBER_TESTPLAN)
+    if raw:
+        from .testplan import Testplan
+        self._testplan = Testplan.from_bytes(raw)
+```
+
+The `_read_zip()` call is already cached (`_zf_cache`), so calling `_ensure_testplan()` after
+`_ensure_history()` adds no I/O. The testplan is the lightest unit to load.
+
+---
+
+### 10.5 New Constant
+
+```python
+# In src/ucis/ncdb/constants.py:
+MEMBER_TESTPLAN = "testplan.json"
+```
+
+---
+
+### 10.6 NcdbWriter Integration
+
+```python
+# In NcdbWriter.write(), after writing other optional members:
+from .constants import MEMBER_TESTPLAN
+testplan = getattr(db, '_testplan', None)
+if testplan is not None:
+    zf.writestr(MEMBER_TESTPLAN, testplan.serialize())
+```
+
+The testplan is written only if one was set on the db object. This preserves the sparse
+member pattern — databases without testplans are byte-identical to today's output.
+
+---
+
+### 10.7 NcdbReader Integration
+
+```python
+# In NcdbReader.read(), after loading optional members:
+from .constants import MEMBER_TESTPLAN
+from .testplan import Testplan
+if MEMBER_TESTPLAN in names:
+    db._testplan = Testplan.from_bytes(zf.read(MEMBER_TESTPLAN))
+    db._loaded_testplan = True
+```
+
+`NcdbReader` returns a `MemUCIS` (not an `NcdbUCIS`), so the testplan is attached directly
+as a `_testplan` attribute. Code that uses `db.getTestplan()` should check for this attribute
+with `getattr(db, '_testplan', None)` as a fallback for non-`NcdbUCIS` databases.
+
+A thin mixin or helper function is preferred:
+
+```python
+# src/ucis/ncdb/testplan.py  (additional helper)
+def get_testplan(db) -> Optional[Testplan]:
+    """Retrieve testplan from any UCIS db object (NcdbUCIS or MemUCIS)."""
+    if hasattr(db, 'getTestplan'):
+        return db.getTestplan()
+    return getattr(db, '_testplan', None)
+
+def set_testplan(db, tp: Testplan) -> None:
+    """Attach testplan to any UCIS db object."""
+    if hasattr(db, 'setTestplan'):
+        db.setTestplan(tp)
+    else:
+        tp.import_timestamp = tp.import_timestamp or \
+            datetime.now(timezone.utc).isoformat()
+        db._testplan = tp
+```
+
+---
+
+### 10.8 NcdbMerger Integration
+
+The merger must propagate testplan data without silently losing it:
+
+#### Same-schema fast path (`_merge_same_schema`)
+
+All inputs share a schema hash, meaning they were generated from the same DUT build with
+the same testplan. Read `testplan.json` from the first source that has one and copy it
+verbatim to the output (no deserialization needed — raw bytes copy):
+
+```python
+# In _merge_same_schema(), after writing MEMBER_SOURCES:
+testplan_bytes = None
+for src in sources:
+    with zipfile.ZipFile(src) as zf:
+        if MEMBER_TESTPLAN in zf.namelist():
+            testplan_bytes = zf.read(MEMBER_TESTPLAN)
+            break
+if testplan_bytes is not None:
+    zf_out.writestr(MEMBER_TESTPLAN, testplan_bytes)
+```
+
+The `import_timestamp` in the testplan is intentionally left as-is (it records when the plan
+was first embedded, not when this merge happened).
+
+#### Cross-schema path (`_merge_cross_schema`)
+
+Different schemas may mean different DUTs, different testplan versions, or both. Strategy:
+
+1. Collect all unique `(source_file, import_timestamp)` pairs from input testplans.
+2. If all inputs have **identical JSON bytes** → copy verbatim to output.
+3. If inputs differ in `import_timestamp` only → take the most recent (highest timestamp).
+4. If inputs have **different `source_file`** values → emit a warning and omit the testplan
+   from the merged output (merging incompatible DUT plans is undefined).
+
+```python
+def _merge_testplans(self, sources: list[str]) -> Optional[bytes]:
+    """Return merged testplan bytes, or None with a warning if incompatible."""
+    candidates = {}  # source_file → (import_timestamp, bytes)
+    for src in sources:
+        with zipfile.ZipFile(src) as zf:
+            if MEMBER_TESTPLAN not in zf.namelist():
+                continue
+            raw = zf.read(MEMBER_TESTPLAN)
+            d = json.loads(raw)
+            sf = d.get("source_file", "")
+            ts = d.get("import_timestamp", "")
+            if sf not in candidates or ts > candidates[sf][0]:
+                candidates[sf] = (ts, raw)
+    if len(candidates) == 0:
+        return None
+    if len(candidates) == 1:
+        return next(iter(candidates.values()))[1]
+    import warnings
+    warnings.warn(
+        f"Merging databases with different testplans "
+        f"({list(candidates)}); testplan omitted from output.",
+        stacklevel=3,
+    )
+    return None
+```
+
+---
+
+### 10.9 Testpoint Closure Computation
+
+The closure computation lives in a standalone module (not inside `Testplan`) so that it can
+be used without importing the scope tree:
+
+New file: **`src/ucis/ncdb/testplan_closure.py`**
+
+```python
+from __future__ import annotations
+from enum import Enum
+from dataclasses import dataclass
+from typing import Optional
+
+from ucis.history_node_kind import HistoryNodeKind
+from ucis.test_status_t import TestStatusT
+from .testplan import Testplan, Testpoint
+
+
+class TPStatus(Enum):
+    CLOSED        = "CLOSED"        # all mapped tests passed
+    PARTIAL       = "PARTIAL"       # some passed, some failed
+    FAILING       = "FAILING"       # all mapped tests ran and failed
+    NOT_RUN       = "NOT_RUN"       # none of the mapped tests appear in the DB
+    NA            = "N/A"           # testpoint intentionally unmapped
+    UNIMPLEMENTED = "UNIMPLEMENTED" # tests list is empty (plan written, test not yet)
+
+
+@dataclass
+class TestpointResult:
+    testpoint: Testpoint
+    status:    TPStatus
+    matched_tests: list[str]        # test names that matched from the DB
+    pass_count:    int = 0
+    fail_count:    int = 0
+
+
+def compute_closure(testplan: Testplan, db) -> list[TestpointResult]:
+    """Compute pass/fail closure for every testpoint against *db*.
+
+    Args:
+        testplan: The Testplan embedded in (or associated with) *db*.
+        db:       Any UCIS database (NcdbUCIS, MemUCIS, …).
+
+    Returns:
+        One TestpointResult per testpoint, in testplan order.
+    """
+    # Build test-name → status lookup from history nodes (O(N_tests))
+    test_status: dict[str, TestStatusT] = {}
+    for node in db.historyNodes(HistoryNodeKind.TEST):
+        name = node.getLogicalName()
+        test_status[name] = node.getTestStatus()
+
+    results = []
+    for tp in testplan.testpoints:
+        if tp.na:
+            results.append(TestpointResult(tp, TPStatus.NA, []))
+            continue
+        if not tp.tests:
+            results.append(TestpointResult(tp, TPStatus.UNIMPLEMENTED, []))
+            continue
+
+        matched, passes, fails = [], 0, 0
+        for pattern in tp.tests:
+            # Exact match
+            if pattern in test_status:
+                matched.append(pattern)
+                if test_status[pattern] == TestStatusT.OK:
+                    passes += 1
+                else:
+                    fails += 1
+                continue
+            # Seed-suffix strip
+            import re
+            stripped = re.sub(r'_\d+$', '', pattern)
+            if stripped in test_status:
+                matched.append(stripped)
+                if test_status[stripped] == TestStatusT.OK:
+                    passes += 1
+                else:
+                    fails += 1
+                continue
+            # Wildcard prefix
+            if pattern.endswith('_*'):
+                prefix = pattern[:-1]
+                for tname, tstatus in test_status.items():
+                    if tname.startswith(prefix):
+                        matched.append(tname)
+                        if tstatus == TestStatusT.OK:
+                            passes += 1
+                        else:
+                            fails += 1
+
+        if not matched:
+            status = TPStatus.NOT_RUN
+        elif fails == 0:
+            status = TPStatus.CLOSED
+        elif passes == 0:
+            status = TPStatus.FAILING
+        else:
+            status = TPStatus.PARTIAL
+
+        results.append(TestpointResult(tp, status, matched, passes, fails))
+    return results
+
+
+def stage_gate_status(results: list[TestpointResult],
+                      stage: str, testplan: Testplan) -> dict:
+    """Determine whether the stage gate for *stage* is met.
+
+    A stage gate passes when ALL testpoints at that stage and all stages
+    below it in the standard ordering are CLOSED (or N/A).
+
+    Returns a dict:
+        {
+          "gate":    stage,
+          "pass":    bool,
+          "by_stage": { stage: {"total": N, "closed": N, "gaps": [...]} }
+        }
+    """
+    _ORDER = {"V1": 0, "V2": 1, "V2S": 2, "V3": 3}
+    gate_level = _ORDER.get(stage, 99)
+    by_stage: dict = {}
+    for r in results:
+        s = r.testpoint.stage
+        if _ORDER.get(s, 99) > gate_level:
+            continue
+        entry = by_stage.setdefault(s, {"total": 0, "closed": 0, "gaps": []})
+        entry["total"] += 1
+        if r.status in (TPStatus.CLOSED, TPStatus.NA):
+            entry["closed"] += 1
+        else:
+            entry["gaps"].append(r.testpoint.name)
+    gate_pass = all(e["closed"] == e["total"] for e in by_stage.values())
+    return {"gate": stage, "pass": gate_pass, "by_stage": by_stage}
+```
+
+---
+
+### 10.10 Covergroup Join
+
+For Report C (coverage per testpoint), the join between testplan `covergroups` entries and
+UCIS scope nodes uses a DFS helper:
+
+```python
+# src/ucis/ncdb/testplan_closure.py  (additional helper)
+
+from ucis.scope_type_t import ScopeTypeT
+
+def find_covergroup_scopes(db, cg_name: str) -> list:
+    """Return all UCIS scope nodes whose name matches *cg_name* and whose
+    scope type is COVERGROUP (or COVERINSTANCE).  Requires scope tree loaded.
+    """
+    results = []
+    _CG_TYPES = {int(ScopeTypeT.COVERGROUP), int(ScopeTypeT.COVERINSTANCE)}
+
+    def _dfs(scope):
+        try:
+            st = int(scope.getScopeType())
+        except Exception:
+            st = -1
+        if st in _CG_TYPES and scope.getScopeName() == cg_name:
+            results.append(scope)
+        try:
+            for child in scope.scopes(ScopeTypeT.ALL):
+                _dfs(child)
+        except Exception:
+            pass
+
+    _dfs(db)
+    return results
+```
+
+**Efficiency note**: this is O(total_scopes). For repeated calls across many covergroups,
+build a name → scope index once:
+
+```python
+def build_covergroup_index(db) -> dict[str, list]:
+    """Build a dict mapping covergroup name → list of matching scope nodes."""
+    index: dict[str, list] = {}
+    _CG_TYPES = {int(ScopeTypeT.COVERGROUP), int(ScopeTypeT.COVERINSTANCE)}
+
+    def _dfs(scope):
+        try:
+            if int(scope.getScopeType()) in _CG_TYPES:
+                name = scope.getScopeName()
+                index.setdefault(name, []).append(scope)
+        except Exception:
+            pass
+        try:
+            for child in scope.scopes(ScopeTypeT.ALL):
+                _dfs(child)
+        except Exception:
+            pass
+
+    _dfs(db)
+    return index
+```
+
+---
+
+### 10.11 OpenTitan Hjson Import
+
+A convenience function converts an OpenTitan `.hjson` testplan file into a `Testplan`
+object ready for embedding:
+
+New file: **`src/ucis/ncdb/testplan_hjson.py`**
+
+```python
+"""Import an OpenTitan-format Hjson testplan into a Testplan object."""
+
+from __future__ import annotations
+import re
+from datetime import datetime, timezone
+from typing import Optional
+from .testplan import Testplan, Testpoint, CovergroupEntry
+
+
+def import_hjson(hjson_path: str,
+                 substitutions: Optional[dict] = None) -> Testplan:
+    """Parse *hjson_path* and return a fully resolved Testplan.
+
+    Args:
+        hjson_path:     Path to the .hjson testplan file.
+        substitutions:  Dict of wildcard substitutions, e.g.
+                        {"name": "uart", "intf": ["", "_jtag"]}.
+                        Applied to ``tests`` lists that contain ``{key}``
+                        patterns (OpenTitan wildcard expansion).
+    """
+    try:
+        import hjson
+    except ImportError:
+        import json as hjson   # fallback: .hjson without comments
+
+    with open(hjson_path) as f:
+        raw = hjson.load(f)
+
+    subs = substitutions or {}
+    dut_name = raw.get("name", "")
+    if dut_name and "name" not in subs:
+        subs["name"] = dut_name
+
+    tp = Testplan(
+        source_file=hjson_path,
+        import_timestamp=datetime.now(timezone.utc).isoformat(),
+    )
+
+    for rec in raw.get("testpoints", []):
+        raw_tests = rec.get("tests", [])
+        # Expand wildcards
+        expanded = _expand_tests(raw_tests, subs)
+        na = raw_tests == ["N/A"]
+        source_template = ",".join(raw_tests) if raw_tests != expanded else ""
+        tp.testpoints.append(Testpoint(
+            name=rec["name"],
+            stage=rec.get("stage", ""),
+            desc=rec.get("desc", ""),
+            tests=[] if na else expanded,
+            tags=rec.get("tags", []),
+            na=na,
+            source_template=source_template,
+        ))
+
+    for rec in raw.get("covergroups", []):
+        tp.covergroups.append(CovergroupEntry(
+            name=rec["name"], desc=rec.get("desc", "")
+        ))
+
+    return tp
+
+
+def _expand_tests(test_list: list[str], subs: dict) -> list[str]:
+    """Expand ``{key}`` wildcards in test names using *subs*.
+
+    If a substitution value is a list, the cartesian product is computed.
+    """
+    if not subs:
+        return [t for t in test_list if t != "N/A"]
+    results = []
+    for template in test_list:
+        if template == "N/A":
+            continue
+        expanded = _expand_template(template, subs)
+        results.extend(expanded)
+    return results
+
+
+def _expand_template(template: str, subs: dict) -> list[str]:
+    """Recursively expand a single test name template."""
+    m = re.search(r'\{(\w+)\}', template)
+    if not m:
+        return [template]
+    key = m.group(1)
+    values = subs.get(key, [""])
+    if isinstance(values, str):
+        values = [values]
+    result = []
+    for v in values:
+        expanded = template.replace(f"{{{key}}}", v, 1)
+        result.extend(_expand_template(expanded, subs))
+    return result
+```
+
+---
+
+### 10.12 Files to Create / Modify
+
+#### New Files
+
+| File | Purpose |
+|---|---|
+| `src/ucis/ncdb/testplan.py` | `Testplan`, `Testpoint`, `CovergroupEntry` data model + serialization + query API |
+| `src/ucis/ncdb/testplan_closure.py` | `compute_closure()`, `stage_gate_status()`, `find_covergroup_scopes()`, `build_covergroup_index()` |
+| `src/ucis/ncdb/testplan_hjson.py` | `import_hjson()` — OpenTitan Hjson → `Testplan` converter |
+
+#### Modified Files
+
+| File | Change |
+|---|---|
+| `src/ucis/ncdb/constants.py` | Add `MEMBER_TESTPLAN = "testplan.json"` |
+| `src/ucis/ncdb/ncdb_ucis.py` | Add `_loaded_testplan`, `_testplan`, `_testplan_dirty` fields; add `getTestplan()`, `setTestplan()`, `_ensure_testplan()` methods |
+| `src/ucis/ncdb/ncdb_writer.py` | Write `testplan.json` if `getattr(db, '_testplan', None)` is set |
+| `src/ucis/ncdb/ncdb_reader.py` | Read `testplan.json` if present, attach to returned `MemUCIS` |
+| `src/ucis/ncdb/ncdb_merger.py` | Call `_merge_testplans()` in both fast and cross-schema paths; write result if non-None |
+
+#### No UCIS interface changes
+
+`src/ucis/ucis.py` and all non-NCDB backends (`xml/`, `sqlite/`, `mem/`) are unchanged.
+The testplan feature is explicitly NCDB-native.
+
+---
+
+### 10.13 Usage Examples
+
+```python
+from ucis.ncdb.ncdb_ucis import NcdbUCIS
+from ucis.ncdb.testplan import get_testplan
+from ucis.ncdb.testplan_hjson import import_hjson
+from ucis.ncdb.testplan_closure import compute_closure, stage_gate_status
+
+# ── Embedding a testplan into a new CDB ──────────────────────────────────
+db = NcdbUCIS("regression.cdb")
+tp = import_hjson("hw/ip/uart/data/uart_testplan.hjson",
+                  substitutions={"name": "uart", "intf": ["", "_jtag"]})
+db.setTestplan(tp)
+db.write("regression_with_plan.cdb")
+
+# ── Reading back and computing closure ───────────────────────────────────
+db2 = NcdbUCIS("regression_with_plan.cdb")
+tp2 = db2.getTestplan()                   # lazy load; no scope tree needed
+
+results = compute_closure(tp2, db2)       # triggers history load only
+for r in results:
+    print(f"{r.testpoint.name:30s}  {r.testpoint.stage}  {r.status.value}")
+
+# ── Stage gate check ─────────────────────────────────────────────────────
+gate = stage_gate_status(results, "V2", tp2)
+if gate["pass"]:
+    print("V2 gate: PASS")
+else:
+    for stage, info in gate["by_stage"].items():
+        print(f"  {stage}: {info['closed']}/{info['total']} — gaps: {info['gaps']}")
+
+# ── Works with any db that has a testplan attached ───────────────────────
+tp3 = get_testplan(db)   # works for NcdbUCIS, MemUCIS, any db with _testplan
+
+# ── Mode B: standalone testplan file cross-analyzed against an NCDB ──────
+# Use this when the testplan was never embedded, or when you want to apply
+# a different testplan revision against an already-built database.
+from ucis.ncdb.testplan import Testplan
+
+tp_ext = Testplan.load("hw/ip/uart/data/uart_testplan.json")
+
+db_legacy = NcdbUCIS("old_regression.cdb")  # no embedded testplan
+results_ext = compute_closure(tp_ext, db_legacy)
+for r in results_ext:
+    print(f"{r.testpoint.name:30s}  {r.testpoint.stage}  {r.status.value}")
+
+# ── Mode B: save a resolved testplan to a standalone file ────────────────
+tp_resolved = import_hjson("hw/ip/uart/data/uart_testplan.hjson",
+                            substitutions={"name": "uart", "intf": ["", "_jtag"]})
+tp_resolved.save("artifacts/uart_testplan_resolved.json")
+# Later: Testplan.load("artifacts/uart_testplan_resolved.json")
+```
+
+---
+
+### 10.14 Lazy-Load Dependency Map
+
+```
+NcdbUCIS._ensure_testplan()  ──► reads testplan.json from _zf_cache
+                                  ↑ triggers _read_zip() if cache empty
+                                  (no dependency on history or scopes)
+
+compute_closure(tp, db)      ──► calls db.historyNodes()
+                                  ↑ triggers _ensure_history()
+                                  (no dependency on scopes)
+
+build_covergroup_index(db)   ──► calls db.scopes()
+                                  ↑ triggers _ensure_scopes()
+                                  (heaviest load — only for coverage reports)
+```
+
+The testplan can be read and queried without loading either the history nodes or the scope
+tree. Report A (Testpoint Closure Summary) loads only `testplan.json` + `history.json` —
+the two lightest members.
+
+---
+
+### 10.15 Standalone Testplan Mode (Mode B) — Cross-Analysis Workflow
+
+In Mode B the testplan is **not** stored inside the NCDB. This is the expected flow when
+performing analysis between a `testplan.json` file and an NCDB:
+
+```
+Input A: uart_testplan.json      ← standalone testplan (from RTL repo, CI artifact, etc.)
+Input B: regression.cdb          ← NCDB produced by simulation run (may have no embedded plan)
+
+Step 1: tp = Testplan.load("uart_testplan.json")
+Step 2: db = NcdbUCIS("regression.cdb")
+Step 3: results = compute_closure(tp, db)
+Step 4: gate   = stage_gate_status(results, "V2", tp)
+```
+
+#### When to use Mode B
+
+| Scenario | Mode |
+|---|---|
+| Testplan is embedded at import time and travels with the CDB | A (embedded) |
+| Testplan lives in RTL repo; CDB produced by CI without plan injection | B (standalone) |
+| Retro-fitting plan coverage onto pre-existing legacy databases | B (standalone) |
+| Comparing multiple testplan revisions against the same frozen NCDB | B (standalone) |
+| Ad-hoc analysis during bring-up before a canonical plan exists | B (standalone) |
+| Post-silicon debug: map chip-test results against a verification plan | B (standalone) |
+
+#### Producing a standalone testplan file
+
+From a raw OpenTitan Hjson file:
+
+```python
+from ucis.ncdb.testplan_hjson import import_hjson
+from ucis.ncdb.testplan import Testplan
+
+tp = import_hjson("hw/ip/uart/data/uart_testplan.hjson",
+                  substitutions={"name": "uart", "intf": ["", "_jtag"]})
+tp.save("artifacts/uart_testplan.json")   # save for later reuse
+```
+
+Or convert from an already-embedded plan to a standalone copy for sharing:
+
+```python
+db = NcdbUCIS("regression.cdb")
+tp = db.getTestplan()
+if tp:
+    tp.save("artifacts/uart_testplan.json")
+```
+
+#### Selecting which testplan to use at analysis time
+
+`compute_closure()` accepts a `Testplan` object from either source; the caller decides
+the priority:
+
+```python
+def get_analysis_testplan(db, standalone_path=None):
+    """Return a Testplan for analysis, preferring standalone over embedded."""
+    if standalone_path:
+        return Testplan.load(standalone_path)
+    tp = get_testplan(db)
+    if tp:
+        return tp
+    raise ValueError("No testplan available: provide --testplan <path> or embed one in the CDB")
+```
+
+This keeps the policy decision in the caller (CLI, script, or notebook) rather than in the
+library, so both modes remain fully supported without hidden precedence rules.
+
+---
+
+## Part 11: Competitive Analysis — Matching and Exceeding the State of Practice
+
+### 11.1 Industry Landscape
+
+The three dominant commercial EDA verification management platforms and their regression reporting
+capabilities are surveyed here, along with CI/CD-era test reporting tools (Allure, Grafana,
+TestRail, Zephyr Scale), and 2024–2025 AI/ML trends.
+
+#### 11.1.1 Cadence Verisium Manager (formerly vManager)
+
+Cadence positions Verisium Manager as an MDV (Metric-Driven Verification) execution platform.
+
+| Capability | Details |
+|---|---|
+| Hierarchical vPlan | Testpoints organized as a tree; each node has pass/fail/coverage status |
+| Real-time coverage merge | Incremental merge visible in dashboards *during* regression |
+| Failure clustering/buckets | Automated log similarity clustering → N failures → K buckets |
+| Failure signature linking | Representative waveform (FSDB) linked to each bucket |
+| Pass/fail trend dashboards | Per-testpoint pass rate over time; regression δ view |
+| Farm utilization metrics | CPU hours, parallel job efficiency |
+| Owner/priority on testpoints | Engineer assignment and priority fields |
+| Jenkins/GitLab CI integration | Pipeline plugins; automatic post-regression summaries |
+| REST API + Python API | Programmatic report generation |
+| Requirements traceability | Jira, Jama, IBM DOORS via OSLC adapters |
+| Email summary reports | Configurable post-regression email with key metrics |
+| Waiver management | Mark bins "not applicable" with rationale; tracked separately |
+| AI-assisted triage | ML clustering; root-cause suggestion from log patterns |
+
+Key insight: Cadence's differentiator is *live* coverage merging (see coverage grow in real time as
+tests complete) and tightly coupled waveform debug from failure buckets.
+
+#### 11.1.2 Synopsys VC ExecMan + VSO.ai
+
+VC ExecMan is the regression orchestration layer. VSO.ai is the AI analytics overlay introduced
+in 2023–2024.
+
+| Capability | Details |
+|---|---|
+| Hierarchical Verification Plan (HVP) | Tree of goals with coverage-linked metrics |
+| Per-test coverage contribution | Unique bins hit by each test; identifies redundant tests |
+| Minimum test set (MTS) computation | Greedy set cover to minimize regression time at target coverage |
+| Unreachable coverage detection | Constraint-conflict or dead-code identification |
+| ML-based test prioritization | Rank tests by predicted coverage ROI; 2–10× regression speedup (NVIDIA, AMD claims) |
+| Targeted rerun scheduling | Automatically reruns failed/low-coverage tests with adjusted seeds |
+| Predictive closure timeline | "At current rate, coverage closure in N days" |
+| Phase-aware analytics | Testbench bring-up → bug hunting → signoff: different optimization objectives |
+| SQL API for custom reports | ad-hoc queries against the regression DB |
+| Coverage root cause analysis (auto) | Pinpoints why bins are uncovered; generates actionable hints |
+| Integration with Verdi/waveforms | Failure → waveform handoff |
+| Customer-reported speedup | 2–10× regression reduction; 10× faster coverage hole closure |
+
+Key insight: VSO.ai's *minimum test set* and *per-test coverage contribution* are the most
+technically differentiated features. They require per-test coverage bins (contrib data), which
+NCDB already stores in `contrib/*.bin`.
+
+#### 11.1.3 Siemens Questa VRM + Verification IQ
+
+Siemens uses an RMDB (Regression Management DB) as the central store and Verification IQ as the
+analytics UI layer.
+
+| Capability | Details |
+|---|---|
+| Hierarchical testplan (UCDB-backed) | Testplan integrated into UCDB as first-class objects |
+| Testplan Author | GUI-assisted testplan creation with coverage scope linking |
+| OSLC integration | Polarion, Jama, Jira bi-directional traceability |
+| Functional safety traceability | ISO 26262, DO-254 reports; audit trails with signoff stamps |
+| Live UCDB merge during regression | Coverage visible before regression completes |
+| ML-assisted failure bucketing | Log analysis + assertion clustering |
+| Web dashboards | Executive and engineer views; drilldown |
+| Regression delta report | Compares current vs previous regression; new failures highlighted |
+| Closure forecasting | Trend-based prediction |
+| Email/Slack notifications | Configurable alert rules |
+| Cost/schedule tracking | Test budget vs actuals by stage |
+
+Key insight: Siemens leads on *functional safety compliance reporting* (ISO 26262) and on
+embedding testplan data inside the UCDB itself rather than a sidecar file — closest to our
+`testplan.json` ZIP member design.
+
+#### 11.1.4 CI/CD Era Tools (Allure, Grafana, TestRail, Zephyr Scale)
+
+These tools are prominent in software verification but are increasingly used in hardware projects
+running on CI infrastructure.
+
+| Tool | Strengths | Gaps vs EDA needs |
+|---|---|---|
+| Allure Report | Rich HTML output; CI integration; flaky test identification; trend view | No coverage metrics; no seed/testpoint concept |
+| Grafana | Fully custom dashboards; alerting; time-series metrics | Requires custom data pipeline; no EDA-native data |
+| TestRail | Full test case management; requirements traceability; compliance reports | No coverage metrics; no simulation-native integration |
+| Zephyr Scale | Jira-native; agile sprint alignment; regression cycles | No coverage metrics; weak EDA toolchain integration |
+
+Key patterns across all CI tools: pass/fail rate over time, flaky test detection, trend
+dashboards, and CI pipeline integration are table-stakes features.
+
+---
+
+### 11.2 Feature Gap Analysis
+
+For each feature class, we assess: **Present in our design**, **Absent (gap)**, or **Not
+applicable**.
+
+| Feature | Commercial EDA | Our Design (Parts 1–10) | Gap? |
+|---|---|---|---|
+| Testplan hierarchy | ✓ (all three) | ✓ Part 9.3 / 10.2 | No |
+| Coverage per testpoint | ✓ (all three) | ✓ Report C (9.4) | No |
+| Stage gate readiness | ✓ (vPlan stages) | ✓ Report B (9.4) + 10.9 | No |
+| Testpoint pass rate trend (history) | ✓ | ✓ Report F (9.4) via history | No |
+| Regression delta report | ✓ (all three) | ✓ Report D (9.4) | No |
+| Historical stage progression | ✓ | ✓ Report E (9.4) | No |
+| Per-test coverage contribution | ✓ VSO.ai, vManager | **Partial** — contrib/*.bin exists, no compute_contribution() API | **Gap** |
+| Minimum test set computation | ✓ VSO.ai | **Absent** | **Gap** |
+| Failure clustering/buckets | ✓ (all three) | **Absent** | **Gap** |
+| Failure-to-waveform linking | ✓ vManager, VRM | Absent (out of pyucis scope) | N/A |
+| Predictive closure timeline | ✓ VSO.ai, VRM | **Absent** | **Gap** |
+| Unreachable bin detection | ✓ VSO.ai | Absent (simulator-level) | N/A |
+| Waiver management | ✓ (all three) | **Absent** | **Gap** |
+| Real-time/live merge | ✓ vManager, VRM | Absent (batch only) | Low priority |
+| Requirements ALM traceability | ✓ (all three) | **Absent** | **Gap** |
+| Functional safety (ISO 26262) | ✓ Siemens | **Absent** | **Gap** |
+| ML-based test prioritization | ✓ VSO.ai | **Absent** | **Gap** |
+| Targeted rerun scheduling | ✓ VSO.ai, ExecMan | Absent (scheduler is external) | N/A |
+| CI/CD pipeline integration | ✓ (all three) | **Absent** | **Gap** |
+| Flaky test identification | ✓ + CI tools | ✓ flake_score in history design | No |
+| CUSUM change-point detection | **Absent** in all three | ✓ Part 6 | **Exceed** |
+| Seed-correlated failure analytics | **Absent** | ✓ seed_id in history buckets | **Exceed** |
+| Per-test coverage provenance audit | **Absent** | ✓ squash_log.bin | **Exceed** |
+| Confidence-weighted closure | **Absent** | ✓ flake_score gates signoff | **Exceed** |
+| Open, inspectable format | **Absent** | ✓ ZIP+JSON; no license required | **Exceed** |
+| Cross-simulator UCIS interop | Proprietary formats | ✓ standard UCIS API | **Exceed** |
+| Stage gate + flake score integration | **Absent** | ✓ combined in compute_closure() | **Exceed** |
+
+---
+
+### 11.3 Features to Add to Match Commercial Tools
+
+The following features are absent from our design but are expected by professional users who
+have used commercial tools. They should be added to the design and eventually implemented.
+
+#### 11.3.1 Per-Test Coverage Contribution Report (Report I)
+
+**What**: Rank all tests in a regression by the number of *unique* coverage bins they contribute
+— bins that no other test hit. Identify redundant tests (zero unique contribution).
+
+**How (in NCDB)**: `contrib/*.bin` stores per-test bin hit vectors. A set-cover query over these
+vectors yields unique contribution per test. The result is a ranked list suitable for regression
+pruning.
+
+```
+Report I: Per-Test Coverage Contribution
+
+Test                  Total Bins Hit   Unique Bins   Redundant?
+--------------------  ---------------  ------------  ----------
+smoke_basic_0          12840            3210          No
+directed_arith_0        8901             401          No
+directed_arith_1        8820               0          YES (fully covered by directed_arith_0)
+rand_full_0            45000            8100          No
+...
+
+Suggested pruning: remove 14 tests with 0 unique contribution → save ~12% regression time
+```
+
+**Design addition**: `compute_contribution(db)` function in `testplan_closure.py` that iterates
+`contrib/*.bin` and computes unique bins per test. Returns `List[TestContribution]` with
+`test_name`, `total_bins`, `unique_bins`, `unique_fraction`.
+
+#### 11.3.2 Minimum Test Set Report (Report J)
+
+**What**: Given a target coverage threshold (e.g., 95%), compute the smallest subset of tests
+that achieves that threshold. This is the set-cover problem; a greedy approximation is O(n·m)
+in tests × bins.
+
+**How**: Greedy algorithm: repeatedly select the test with the highest unique-contribution count
+on remaining uncovered bins until the threshold is met or no further coverage is possible.
+
+```
+Report J: Minimum Test Set for 95% Closure
+
+  Original regression: 420 tests, 18.2 CPU-hours
+  Minimum test set:     87 tests,  4.1 CPU-hours (77% reduction)
+
+  Included tests (top 10 by contribution):
+    rand_full_0          → 12.4% of total bins (unique)
+    rand_full_1          →  6.1%
+    directed_fsm_0       →  4.8%
+    ...
+
+  Coverage achieved: 95.3%  (target: 95.0%)
+  Excluded: 333 tests with <0.01% unique contribution each
+```
+
+**Design addition**: `compute_minimum_test_set(db, target_coverage=0.95)` in
+`testplan_closure.py`. Returns `MinimumTestSet` with `included_tests`, `excluded_tests`,
+`achieved_coverage`, `cpu_hours_saved`.
+
+#### 11.3.3 Waiver Management (ZIP member `waivers.json`)
+
+**What**: Coverage bins that are intentionally uncovered should be marked with a rationale
+and approver, and excluded from closure calculations. This is required for ISO 26262 and
+other compliance workflows.
+
+**Format**: New ZIP member `waivers.json` with the following schema:
+
+```json
+{
+  "schema_version": 1,
+  "waivers": [
+    {
+      "id": "W-001",
+      "scope_pattern": "top.dut.arith.covergroup_t.*",
+      "bin_pattern": "overflow_corner",
+      "rationale": "Hardware prevents this condition by design (see spec §3.4.2)",
+      "approver": "jane.doe@example.com",
+      "approved_at": "2024-11-15",
+      "expires_at": null,
+      "status": "active"
+    }
+  ]
+}
+```
+
+**Impact on closure**: `compute_closure()` accepts an optional `waivers: List[Waiver]` argument.
+Waived bins are excluded from denominator and reported separately in all closure metrics.
+
+**New file**: `src/ucis/ncdb/waivers.py` — `Waiver` dataclass, `WaiverSet.load()`,
+`WaiverSet.save()`, `WaiverSet.matches_scope(scope_path, bin_name)`.
+
+**NcdbUCIS extension**: Add `getWaivers()` / `setWaivers()` analogous to testplan.
+
+#### 11.3.4 Predictive Closure Timeline (Report K)
+
+**What**: Given historical coverage growth trend over successive regressions, estimate when
+coverage will reach the target if the current rate is maintained.
+
+**How**: Fit a logarithmic or asymptotic curve to the (regression_number, coverage_pct) series
+stored in history. Extrapolate to the target coverage. Report confidence interval.
+
+```
+Report K: Predictive Closure Timeline
+
+  Current coverage:  78.4% (target: 95.0%)
+  Regressions so far: 34
+  Trend model: logarithmic fit  R²=0.94
+
+  Projection (95% CI):
+    Optimistic (upper CI):  +8 regressions
+    Median estimate:        +14 regressions
+    Pessimistic (lower CI): +23 regressions
+
+  Warning: coverage growth rate has been declining since regression #28
+  (CUSUM change point detected; see Section 6 change-point analysis).
+```
+
+**Design addition**: `compute_closure_forecast(history_series, target=0.95)` function in
+`testplan_closure.py`. Takes a `List[(regression_id, coverage_pct)]`, returns `Forecast`
+with `median_regressions_to_target`, `ci_lower`, `ci_upper`, `model_fit_r2`, `warning`.
+
+#### 11.3.5 CI/CD Integration — JUnit XML and GitHub Annotations Export
+
+**What**: Export regression results in JUnit XML format (testpoint pass/fail as test cases)
+so that CI/CD systems (GitHub Actions, GitLab CI, Jenkins) can display native pass/fail
+annotations and trend graphs.
+
+**How**: Map each testpoint to a JUnit `<testcase>`. Stage → `<testsuite>`. Failures → `<failure>`
+with message. Duration → `time=` attribute.
+
+**Design addition**: `testplan_export.py` with:
+- `export_junit_xml(results: List[TestpointResult], output_path: str)` — writes JUnit XML
+- `export_github_annotations(results)` — writes GitHub Actions `::error::` / `::warning::` lines
+- `export_summary_markdown(results, history)` — writes GitHub Actions Job Summary markdown
+
+#### 11.3.6 Requirements Traceability Link (ALM Integration)
+
+**What**: Each testpoint can carry a link to an external ALM item (Jira issue, Polarion
+requirement, GitHub issue). The end-of-regression report then includes traceability from
+coverage point → testpoint → requirement → sign-off status.
+
+**How**: Add optional `requirements` field to `Testpoint` dataclass:
+
+```python
+@dataclass
+class RequirementLink:
+    system: str          # "jira", "github", "polarion", "jama"
+    project: str         # "PROJ"
+    item_id: str         # "PROJ-1234"
+    url: str             # full URL (optional)
+
+@dataclass
+class Testpoint:
+    ...
+    requirements: List[RequirementLink] = field(default_factory=list)
+```
+
+Report output: traceability matrix (requirement → testpoints that cover it → closure status).
+No live sync with ALM tools needed for v1; links are maintained in `testplan.json`.
+
+#### 11.3.7 Functional Safety Traceability Report (Report L)
+
+**What**: ISO 26262 and DO-254 require a documented claim that each safety requirement has
+been verified. Generate a traceability matrix in CSV or PDF-friendly form.
+
+```
+Report L: Functional Safety Traceability Matrix
+
+Requirement ID  Description                   Testpoints          Status    Evidence
+--------------  ----------------------------  ------------------  --------  --------
+ISO-FUNC-001    Overflow detection            tp_arith_overflow   CLOSED    merged.cdb@reg_042
+ISO-FUNC-002    Reset recovery < 3 cycles     tp_reset_timing     OPEN      —
+...
+```
+
+**Design addition**: `export_safety_matrix(results, waivers, output_path, format="csv")`
+in `testplan_export.py`.
+
+---
+
+### 11.4 Opportunities to Exceed Commercial Tools
+
+The following capabilities are *absent from all three commercial platforms* or are only
+partially implemented. They represent genuine differentiation opportunities.
+
+#### 11.4.1 Seed-Correlated Failure Analytics
+
+**What commercial tools do**: Record pass/fail per run. Some expose the seed value as metadata.
+None correlate *which seeds systematically produce failures* vs. which seeds expose rare bugs.
+
+**What we uniquely offer**: The binary history design (Part 2) stores `seed_id` per run as a
+32-bit field in the bucket record. This enables:
+
+- **Seed reliability score**: For a given test, what fraction of seeds result in pass? A test
+  passing on seed 0 but failing on seeds > 1e8 indicates a seed-dependent bug.
+- **Seed range heat-map**: Bin seeds by value range; identify if certain seed regions reliably
+  expose a failure. (EDA-unique: commercial tools do not expose this.)
+- **Seed reuse recommendation**: For signoff regressions, prefer seeds with historically high
+  pass rates for "stable validation", plus seeds in the high-failure-rate range for "stress
+  regression".
+
+**Report M: Seed Reliability Analysis**
+```
+Test: rand_arith_*
+
+Seed range [0, 1M]:       pass rate 99.2% (stable)
+Seed range [1M, 10M]:     pass rate 94.1% (moderate stress)
+Seed range [100M, 200M]:  pass rate 71.3% ← HIGH FAILURE ZONE
+
+Recommended signoff seeds: {42, 1234, 9999}  (historically 100% pass)
+Stress regression seeds:   {100000001, 100500000, 101234567} (expose most bugs)
+```
+
+#### 11.4.2 CUSUM Change-Point Detection with RTL Commit Correlation
+
+**What commercial tools do**: Show coverage trend over time. None apply statistical
+process control (SPC) to detect *when* coverage growth stalled or *when* test reliability
+degraded.
+
+**What we uniquely offer** (already in Part 6 design):
+- CUSUM algorithm applied per-testpoint pass rate series
+- Change-point mapped to regression number → can be correlated with RTL commit history
+- Alert: "Test X reliability dropped at regression #47; closest RTL commit: [hash]"
+
+This enables *root-cause attribution at the RTL commit level* without any ML, purely from
+the history database. No commercial tool offers this for individual testpoint reliability.
+
+#### 11.4.3 Confidence-Weighted Coverage Closure
+
+**What commercial tools do**: Report coverage as a simple percentage. A bin hit by a test
+that has a 30% pass rate is counted the same as a bin hit by a 100%-reliable test.
+
+**What we uniquely offer**: Weight coverage by the reliability of the tests that hit it.
+
+```
+Standard closure:   92.4%
+Confidence-weighted closure: 87.1%  ← bins hit only by flaky tests are discounted
+
+Confidence-weighted closure is recommended for signoff claims.
+Bins with weight < 0.5 (hit only by tests with flake_score > 0.5):
+  top.dut.fsm.state_machine_t → state_c_to_d: weight=0.31 (unreliable)
+```
+
+This gives a *conservative* closure claim that accounts for test reliability. Commercial
+tools have no equivalent concept. It requires both coverage data and historical pass-rate
+data — exactly what our merged NCDB provides.
+
+#### 11.4.4 Coverage Provenance Audit Trail
+
+**What commercial tools do**: Show current coverage. No tool records *which regression
+contributed which bins* in the merged database, and with which squash policy.
+
+**What we uniquely offer** (Part 3/4 design): `squash_log.bin` records the exact merge
+parameters (squash policy, threshold, regression IDs) used to build each version of
+`counts.bin`. An auditor can answer:
+
+> "This bin was closed in regression #38 with squash policy `threshold=3,window=10`; if we
+> applied a stricter policy (`threshold=5`), it would still be closed (hit 7 times)."
+
+This is unique to our design and directly supports signoff claims in compliance workflows.
+
+#### 11.4.5 Open Format — No Vendor Lock-In
+
+**What commercial tools do**: Store data in proprietary binary databases. Customers cannot
+access data outside the vendor's tools. Tool upgrades may break existing data.
+
+**What we uniquely offer**:
+- Everything is in a ZIP file with JSON metadata and documented binary layouts.
+- Any Python program can read and process the data with zero licensing cost.
+- Users can write custom reports, scripts, and integrations without a vendor API contract.
+- The UCIS API layer means data is portable across simulators (VCS, Xcelium, Riviera-PRO,
+  Verilator) — commercial tools are all simulator-specific.
+
+#### 11.4.6 Stage Gate + Flake Score Integration
+
+**What commercial tools do**: V-plan stages are pass/fail based on count of tests run and
+static coverage thresholds. Flaky tests are identified separately, never integrated into
+gate criteria.
+
+**What we uniquely offer**: `compute_closure()` (Part 10.9) returns a `TPStatus` that
+incorporates both the testpoint's coverage metric and the flake_score of the tests that
+exercised it. A stage gate can require not just coverage but *reliable* coverage:
+
+```python
+gate_V2 = stage_gate_status(
+    results, stage="V2", testplan=tp,
+    require_flake_score_below=0.2,   # gate fails if covering tests are unreliable
+    require_coverage_pct=90.0
+)
+```
+
+No commercial tool integrates test reliability into stage gate logic.
+
+---
+
+### 11.5 Summary Comparison Table
+
+| Dimension | Cadence Verisium | Synopsys VSO.ai | Siemens VRM/VIQ | **PyUCIS (our design)** |
+|---|---|---|---|---|
+| Testplan hierarchy | ✓ | ✓ | ✓ | ✓ |
+| Coverage per testpoint | ✓ | ✓ | ✓ | ✓ |
+| Stage gate | ✓ | ✓ | ✓ | ✓ |
+| Testpoint trend (history) | ✓ | ✓ | ✓ | ✓ |
+| Regression delta | ✓ | ✓ | ✓ | ✓ |
+| Failure clustering | ✓ | ✓ | ✓ | ✗ (gap) |
+| Per-test contribution ranking | ✓ | ✓ VSO.ai | ✓ | ✗→ **add** |
+| Minimum test set | ✗ | ✓ VSO.ai | ✗ | ✗→ **add** |
+| Predictive closure timeline | ✗ | ✓ | ✓ | ✗→ **add** |
+| Waiver management | ✓ | ✓ | ✓ | ✗→ **add** |
+| ALM requirements traceability | ✓ | ✓ | ✓ OSLC | ✗→ **add (links only)** |
+| ISO 26262 safety reports | ✗ | ✗ | ✓ | ✗→ **add (export only)** |
+| CI/CD JUnit export | ✗ | ✗ | ✗ | ✗→ **add** |
+| Live/incremental merge | ✓ | ✓ | ✓ | ✗ (low priority) |
+| CUSUM change-point detection | ✗ | ✗ | ✗ | ✓ **unique** |
+| Seed-correlated failure analytics | ✗ | ✗ | ✗ | ✓ **unique** |
+| Confidence-weighted closure | ✗ | ✗ | ✗ | ✓ **unique** |
+| Coverage provenance audit trail | ✗ | ✗ | ✗ | ✓ **unique** |
+| Stage gate + flake score | ✗ | ✗ | ✗ | ✓ **unique** |
+| Open format / no lock-in | ✗ | ✗ | ✗ | ✓ **unique** |
+| Cross-simulator UCIS portability | ✗ | ✗ | ✗ | ✓ **unique** |
+| Zero license cost | ✗ | ✗ | ✗ | ✓ **unique** |
+
+---
+
+### 11.6 Revised Report Catalog
+
+Combining the original 8 reports (Part 9.4) with new reports from the competitive analysis:
+
+| ID | Name | Source Data | Priority |
+|---|---|---|---|
+| A | Testpoint Closure Summary | testplan + current UCIS | P0 |
+| B | Stage Gate Readiness | testplan + current UCIS | P0 |
+| C | Coverage per Testpoint | testplan + covergroup scopes | P0 |
+| D | Regression Delta | current vs previous UCIS | P0 |
+| E | Historical Stage Progression | history + testplan | P1 |
+| F | Testpoint Reliability (flake rate) | history + testplan | P1 |
+| G | Unexercised Covergroup Report | current UCIS + testplan | P1 |
+| H | Test Budget by Stage | testplan (counts/weights) | P2 |
+| I | Per-Test Coverage Contribution | contrib/*.bin | P1 |
+| J | Minimum Test Set | contrib/*.bin + target | P2 |
+| K | Predictive Closure Timeline | history (coverage series) | P2 |
+| L | Functional Safety Traceability Matrix | testplan + requirements links | P2 |
+| M | Seed Reliability Analysis | history (seed_id series) | P2 |
+
+P0 = essential for v1; P1 = high value, implement in v1 if time allows; P2 = future work.
+
+---
+
+## References
+
+- Atlassian "Flakinator": https://www.atlassian.com/blog/atlassian-engineering/taming-test-flakiness-how-we-built-a-scalable-tool-to-detect-and-manage-flaky-tests
+- Google flaky test mitigation: https://talent500.com/blog/google-flaky-test-mitigation-strategies/
+- Cadence Verisium Manager: https://www.cadence.com/en_US/home/tools/system-design-and-verification/ai-driven-verification/verisium-manager.html
+- Synopsys VC ExecMan: https://www.synopsys.com/verification/soc-verification-automation/vc-execution-manager.html
+- Synopsys VSO.ai: https://www.synopsys.com/ai/ai-powered-eda/vso-ai.html
+- Siemens Questa VRM + Verification IQ: https://eda.sw.siemens.com/en-US/eda/questa/vrm/
+- Seed Selector algorithm: https://link.springer.com/chapter/10.1007/978-3-031-53960-2_22 (42%+ regression speedup via seed value ranking)
+- Time series columnar encoding: https://www.vldb.org/pvldb/vol15/p2148-song.pdf
+- CUSUM control charts: standard statistical process control literature
+- OpenTitan testplanner: https://opentitan.org/book/util/dvsim/doc/testplanner.html
+- UCIS 1.0 LRM: Section 4.3 (History Nodes), Table 4-2 (History Node Types), Table 4-3 (Pre-defined Attributes)
+- JUnit XML schema: https://github.com/testmoapp/junitxml
diff --git a/doc/source/reference/cli.rst b/doc/source/reference/cli.rst
index d22e3b5..f37c9b7 100644
--- a/doc/source/reference/cli.rst
+++ b/doc/source/reference/cli.rst
@@ -14,3 +14,5 @@ For workflow-oriented usage see the user guides:
 * :doc:`../working-with-coverage/analyzing` — ``show`` commands workflow
 * :doc:`../working-with-coverage/merging` — ``merge`` options
 * :doc:`../reporting/exporting` — ``show code-coverage`` export formats
+* :doc:`../working-with-coverage/test-history` — ``history query`` / ``history stats``
+* :doc:`../working-with-coverage/testplan` — ``testplan import`` / ``testplan closure`` / ``testplan export-junit``
diff --git a/doc/source/reference/formats/ncdb-format.rst b/doc/source/reference/formats/ncdb-format.rst
index 9406219..577a3f1 100644
--- a/doc/source/reference/formats/ncdb-format.rst
+++ b/doc/source/reference/formats/ncdb-format.rst
@@ -1090,3 +1090,298 @@ To read an NCDB file without PyUCIS:
    * :doc:`sqlite-schema` — SQLite backend schema reference
    * :doc:`xml-interchange` — XML interchange format
    * :ref:`working-with-coverage-merging` — How to merge databases using the CLI
+
+-----------
+
+.. _ncdb-format-v2-history:
+
+***********************
+7. V2 binary test history
+***********************
+
+When ``manifest.json`` contains ``"history_format": "v2"`` the archive holds
+six additional binary members.  All integers are **little-endian** unless
+noted.
+
+7.1 ``history/test_registry.bin``
+==================================
+
+Maps stable integer IDs to test names and seed strings.  IDs are assigned by
+insertion order and never reassigned.
+
+.. code-block:: none
+
+    Header (17 bytes):
+      magic       u32   0x54524547  ('TREG')
+      version     u8    1
+      next_run_id u32   monotonically-increasing run counter
+      num_names   u32
+      num_seeds   u32
+
+    Offset tables (immediately after header):
+      name_offsets  u32[num_names]  byte offset into name heap
+      seed_offsets  u32[num_seeds]  byte offset into seed heap
+
+    Heaps (NUL-terminated UTF-8 strings):
+      name_heap  NUL-terminated strings in name_id order
+      seed_heap  NUL-terminated strings in seed_id order
+
+7.2 ``history/test_stats.bin``
+================================
+
+One 72-byte entry per test name (indexed by name_id).
+
+.. code-block:: none
+
+    Header (9 bytes):
+      magic      u32   0x54535453  ('TSTS')
+      version    u8    1
+      num_entries u32
+
+    Entry (72 bytes, repeated num_entries times):
+      name_id      u32
+      total_runs   u32
+      pass_count   u32
+      fail_count   u32
+      error_count  u32
+      skip_count   u32
+      timeout_count u32
+      _reserved    u32   (padding, always 0)
+      mean_ms      f32   Welford running mean of runtime in milliseconds
+      m2_ms        f32   Welford running sum-of-squares (variance = m2/n)
+      cusum_pos    f32   CUSUM positive accumulator for change detection
+      cusum_neg    f32   CUSUM negative accumulator
+      _pad1        f32   (reserved, 0.0)
+      _pad2        f32   (reserved, 0.0)
+      _pad3        f32   (reserved, 0.0)
+      flakiness_score i16  fixed-point 0–10000 representing 0.00–100.00 %
+      tag          u8[6] short ASCII label (NUL-padded)
+      last_status  u8    most-recent HIST_STATUS_* value
+      _trailing    u8    padding
+
+7.3 ``history/bucket_index.bin``
+==================================
+
+Index over the per-bucket run-record files.
+
+.. code-block:: none
+
+    Header (9 bytes):
+      magic       u32   0x42494458  ('BIDX')
+      version     u8    1
+      num_buckets u32
+
+    Entry (28 bytes, sorted by bucket_seq):
+      bucket_seq  u32
+      ts_start    u32   Unix timestamp of first record in bucket
+      ts_end      u32   Unix timestamp of last record in bucket
+      num_records u32
+      fail_count  u32
+      min_name_id u32
+      max_name_id u32
+
+7.4 ``history/NNNNNN.bin``
+============================
+
+Each bucket holds up to 10 000 run records, compressed with LZMA (sealed
+buckets) or DEFLATE level 1 (current open bucket).  After decompression:
+
+.. code-block:: none
+
+    Header (16 bytes):
+      magic       u32   0x42434B54  ('BCKT')
+      version     u8    1
+      num_records u32
+      num_names   u16
+      _pad        u8    (padding)
+      ts_base     u32   Unix timestamp of first record
+
+    Name index (12 bytes per unique name in this bucket):
+      name_id     u32   global name_id from test_registry
+      offset      u32   byte offset into name's record data
+      count       u16   number of records for this name
+      _pad        u8[2]
+
+    Columnar record data (one column per name, name_id order):
+      seeds[]         u8[count]           local seed index (≤ 255 unique/bucket)
+      ts_deltas[]     varint[count]       delta-encoded seconds from ts_base
+      status_flags[]  u8[count]           nibble-packed (high=status, low=flags)
+
+    Seed dictionary (appended after all record data):
+      num_local_seeds u8
+      seed_ids[]      u32[num_local_seeds]  global seed_ids
+
+Varint encoding: each value uses 1–5 bytes; the high bit of each byte
+indicates that more bytes follow (7 bits of value per byte, little-endian).
+
+7.5 ``history/contrib_index.bin``
+====================================
+
+Tracks which test runs contributed coverage so that squash can be replayed.
+
+.. code-block:: none
+
+    Header (12 bytes):
+      magic        u32   0x43494458  ('CIDX')
+      version      u8    1
+      policy       u8    merge-policy constant
+      watermark    u32   highest squashed run_id
+      num_active   u32
+
+    Entry (16 bytes, one per unsquashed run):
+      run_id    u32
+      name_id   u32
+      status    u8
+      flags     u8
+      _pad      u8[2]
+      ts        u32
+
+7.6 ``history/squash_log.bin``
+================================
+
+Append-only provenance log for squash events.
+
+.. code-block:: none
+
+    Header (9 bytes):
+      magic      u32   0x53514C47  ('SQLG')
+      version    u8    1
+      num_entries u32
+
+    Entry (24 bytes):
+      ts        u32   Unix timestamp of squash operation
+      policy    u8    merge-policy used
+      _pad      u8[3]
+      from_run  u32   first run_id squashed
+      to_run    u32   last run_id squashed (inclusive)
+      num_runs  u32   total runs processed
+      pass_runs u32   runs that passed
+
+----
+
+**********************************
+8. Testplan and Waivers JSON
+**********************************
+
+``testplan.json`` and ``waivers.json`` are optional UTF-8 JSON members
+stored at the ZIP root.  They are written by :class:`~ucis.ncdb.ncdb_writer.NcdbWriter`
+when the corresponding objects are attached to the database and are read
+transparently by :class:`~ucis.ncdb.ncdb_reader.NcdbReader`.
+
+8.1 ``testplan.json``
+======================
+
+.. code-block:: json
+
+    {
+      "format_version": 1,
+      "source_file": "uart.hjson",
+      "import_timestamp": "2025-01-01T00:00:00+00:00",
+      "testpoints": [
+        {
+          "name": "uart_reset",
+          "stage": "V1",
+          "desc": "Verify reset",
+          "tests": ["uart_smoke", "uart_reset_*"],
+          "tags": ["smoke"],
+          "na": false,
+          "source_template": "",
+          "requirements": [
+            {"id": "REQ-001", "desc": "Reset spec"}
+          ]
+        }
+      ],
+      "covergroups": [
+        {"name": "cg_reset", "desc": "Reset coverage"}
+      ]
+    }
+
+.. list-table:: testplan.json — top-level fields
+   :header-rows: 1
+   :widths: 25 15 60
+
+   * - Field
+     - Type
+     - Description
+   * - ``format_version``
+     - int
+     - Schema version; currently ``1``
+   * - ``source_file``
+     - string
+     - Path to the Hjson/JSON source that produced this plan
+   * - ``import_timestamp``
+     - ISO-8601 string
+     - UTC timestamp when the plan was last imported
+   * - ``testpoints``
+     - array
+     - Ordered list of :class:`~ucis.ncdb.testplan.Testpoint` objects
+   * - ``covergroups``
+     - array
+     - Ordered list of :class:`~ucis.ncdb.testplan.CovergroupEntry` objects
+
+Merger behaviour
+   When merging two ``.cdb`` files that both contain ``testplan.json``:
+
+   * **Same ``source_file``** — the entry with the later
+     ``import_timestamp`` is kept.
+   * **Different ``source_file``** — a warning is emitted and the merged
+     output contains no testplan.
+
+8.2 ``waivers.json``
+======================
+
+.. code-block:: json
+
+    {
+      "format_version": 1,
+      "waivers": [
+        {
+          "id": "W-001",
+          "scope_pattern": "top/uart/**",
+          "bin_pattern": "reset_*",
+          "rationale": "Deferred to V2",
+          "approver": "jdoe",
+          "approved_at": "2025-01-01T00:00:00",
+          "expires_at": "2026-01-01T00:00:00",
+          "status": "active"
+        }
+      ]
+    }
+
+.. list-table:: waivers.json — Waiver fields
+   :header-rows: 1
+   :widths: 25 15 60
+
+   * - Field
+     - Type
+     - Description
+   * - ``id``
+     - string
+     - Unique waiver identifier
+   * - ``scope_pattern``
+     - glob string
+     - Hierarchy path pattern; ``*`` = single segment, ``**`` = any depth
+   * - ``bin_pattern``
+     - glob string
+     - Coverage bin name pattern; same glob syntax as scope_pattern
+   * - ``rationale``
+     - string
+     - Human-readable reason for the waiver
+   * - ``approver``
+     - string
+     - Name or email of the approver
+   * - ``approved_at``
+     - ISO-8601 string
+     - Approval timestamp
+   * - ``expires_at``
+     - ISO-8601 string
+     - Expiry timestamp; empty string means no expiry
+   * - ``status``
+     - ``"active"`` | ``"expired"``
+     - Current status; :meth:`~ucis.ncdb.waivers.WaiverSet.active_at` filters
+       on both this field and ``expires_at``
+
+Merger behaviour
+   Waivers are unioned by ``id`` across all source files.  When the same
+   ``id`` appears in multiple sources the entry with the latest
+   ``approved_at`` is kept.
diff --git a/doc/source/working-with-coverage/exploring-tui.rst b/doc/source/working-with-coverage/exploring-tui.rst
index a7d5d65..f30cd69 100644
--- a/doc/source/working-with-coverage/exploring-tui.rst
+++ b/doc/source/working-with-coverage/exploring-tui.rst
@@ -53,6 +53,24 @@ Press the number key at any time to switch views.
     Statistical analysis: hit-count distribution histogram, mean/median/stddev,
     coverage tier breakdown (complete / high / medium / low), bin utilization rate.
 
+**6 — Code Coverage**
+    File-level code coverage table showing statement, branch, and toggle hit
+    rates per source file.  Requires a database with code-coverage data (e.g.
+    imported from Verilator).
+
+**7 — Test History**
+    Per-test contribution analysis.  Each row shows total and unique coverage
+    items hit by that test.  When the database includes v2 history (NCDB v2),
+    the detail panel also shows flake score, pass/fail counts, and mean CPU
+    time.  Sort with ``N`` (name), ``D`` (date), ``C`` (coverage), ``U``
+    (unique).
+
+**8 — Testplan**
+    Testplan closure status.  Requires a testplan embedded in the database
+    (see :doc:`testplan`).  Shows every testpoint with its stage, closure
+    status, and pass/fail counts.  The header displays a stage roll-up with
+    colour-coded progress.  Press ``r`` to refresh.
+
 Global Keys
 ===========
 
@@ -62,7 +80,7 @@ Global Keys
 
    * - Key
      - Action
-   * - ``1`` – ``5``
+   * - ``1`` – ``8``
      - Switch view
    * - ``?``
      - Help overlay
diff --git a/doc/source/working-with-coverage/index.rst b/doc/source/working-with-coverage/index.rst
index 6877174..38a72bf 100644
--- a/doc/source/working-with-coverage/index.rst
+++ b/doc/source/working-with-coverage/index.rst
@@ -16,3 +16,6 @@ After importing your coverage data, the typical analysis workflow is:
    exploring-tui
    analyzing
    comparing
+   test-history
+   testplan
+   reports
diff --git a/doc/source/working-with-coverage/reports.rst b/doc/source/working-with-coverage/reports.rst
new file mode 100644
index 0000000..8a9b018
--- /dev/null
+++ b/doc/source/working-with-coverage/reports.rst
@@ -0,0 +1,332 @@
+.. _reports:
+
+#############################
+Reports and CI/CD Integration
+#############################
+
+The :mod:`ucis.ncdb.reports` and :mod:`ucis.ncdb.testplan_export` modules
+provide structured reports for testplan closure, regression delta, and
+CI/CD export.  Every report function returns a typed dataclass with a
+``to_json()`` method; companion ``format_*()`` functions render the
+dataclass to human-readable text.
+
+.. contents:: On this page
+   :local:
+   :depth: 2
+
+-----------
+
+**********************
+Closure and gate reports
+**********************
+
+.. code-block:: python
+
+    from ucis.ncdb.ncdb_ucis import NcdbUCIS
+    from ucis.ncdb.testplan import get_testplan
+    from ucis.ncdb.testplan_closure import compute_closure
+    from ucis.ncdb.reports import (
+        report_testpoint_closure,
+        format_testpoint_closure,
+        report_stage_gate,
+        format_stage_gate,
+    )
+
+    db = NcdbUCIS("coverage.cdb")
+    plan = get_testplan(db)
+    results = compute_closure(plan, db)
+
+    # Print the closure table
+    summary = report_testpoint_closure(results)
+    print(format_testpoint_closure(summary))
+
+    # Evaluate a stage gate
+    gate = report_stage_gate(results, "V2", plan)
+    print(format_stage_gate(gate))
+
+    # Machine-readable JSON
+    import json
+    data = json.loads(summary.to_json())
+
+Stage-rollup output example::
+
+    Testpoint                          Stage  Status     Pass   Fail
+    ----------------------------------------------------------------
+    uart_reset                         V1     ✓ CLOSED      5      0
+    uart_loopback                      V2     ✗ FAILING     0      3
+    ----------------------------------------------------------------
+
+    Stage roll-up:
+      V1     [████████████████████] 1/1 (100.0%)
+      V2     [░░░░░░░░░░░░░░░░░░░░] 0/1 (0.0%)
+
+    Total: 1/2 closed  (0 N/A)
+
+-----------
+
+**********************
+Regression delta
+**********************
+
+Compare two closure result sets to find testpoints that changed
+status between runs::
+
+    from ucis.ncdb.reports import report_regression_delta, format_regression_delta
+
+    # Load two snapshots
+    results_baseline = compute_closure(plan, db_baseline)
+    results_current  = compute_closure(plan, db_current)
+
+    delta = report_regression_delta(results_current, results_baseline)
+    print(format_regression_delta(delta))
+    # Regression delta: +1 closed, -0 newly failing, 1 still open
+
+    # Machine-readable
+    print(delta.to_json())
+
+-----------
+
+**********************
+Reliability report
+**********************
+
+Compute per-testpoint flake scores from v2 history data::
+
+    from ucis.ncdb.reports import report_testpoint_reliability, format_testpoint_reliability
+
+    report = report_testpoint_reliability(results, db)
+    print(format_testpoint_reliability(report))
+
+Output example::
+
+    Testpoint                          Flake    Pass    Fail
+    --------------------------------------------------------
+    uart_loopback                      0.800       2       8  ⚠
+    uart_reset                         0.000      10       0
+
+-----------
+
+**********************
+Unexercised covergroups
+**********************
+
+Identify zero-hit or low-coverage covergroups::
+
+    from ucis.ncdb.reports import (
+        report_unexercised_covergroups,
+        format_unexercised_covergroups,
+    )
+
+    report = report_unexercised_covergroups(db, plan, low_threshold=50.0)
+    print(format_unexercised_covergroups(report))
+
+-----------
+
+**********************
+Coverage contribution
+**********************
+
+Show which tests contribute the most unique coverage bins
+(requires v2 history with contribution data)::
+
+    from ucis.ncdb.reports import (
+        report_coverage_contribution,
+        format_coverage_contribution,
+    )
+
+    report = report_coverage_contribution(db)
+    print(format_coverage_contribution(report))
+
+-----------
+
+**********************
+JUnit XML export
+**********************
+
+Export closure results as a JUnit XML file for CI dashboards::
+
+    from ucis.ncdb.testplan_export import export_junit_xml
+
+    export_junit_xml(results, "closure_results.xml")
+
+Or via the CLI::
+
+    pyucis testplan export-junit coverage.cdb --out closure_results.xml
+
+The XML maps each testpoint to a ``<testcase>`` element.  FAILING and
+PARTIAL testpoints become ``<failure>`` elements; NOT_RUN becomes
+``<skipped>``.
+
+-----------
+
+**********************
+GitHub Annotations
+**********************
+
+Emit GitHub Actions `workflow commands`_ for inline PR annotations::
+
+    from ucis.ncdb.testplan_export import export_github_annotations
+
+    export_github_annotations(results)  # writes to stdout
+
+    # Or capture to a string
+    import io
+    buf = io.StringIO()
+    export_github_annotations(results, output=buf)
+    print(buf.getvalue())
+
+In a GitHub Actions workflow::
+
+    - name: Compute closure
+      run: |
+        python -c "
+        from ucis.ncdb.ncdb_ucis import NcdbUCIS
+        from ucis.ncdb.testplan import get_testplan
+        from ucis.ncdb.testplan_closure import compute_closure
+        from ucis.ncdb.testplan_export import export_github_annotations
+        db = NcdbUCIS('coverage.cdb')
+        plan = get_testplan(db)
+        results = compute_closure(plan, db)
+        export_github_annotations(results)
+        "
+
+.. _workflow commands: https://docs.github.com/en/actions/writing-workflows/choosing-what-your-workflow-does/workflow-commands-for-github-actions
+
+-----------
+
+**********************
+Test budget by stage
+**********************
+
+Estimate CPU-hour cost per stage from v2 test history mean CPU times::
+
+    from ucis.ncdb.ncdb_ucis import NcdbUCIS
+    from ucis.ncdb.testplan import get_testplan
+    from ucis.ncdb.reports import report_test_budget, format_test_budget
+
+    db   = NcdbUCIS("coverage.cdb")
+    plan = db.getTestplan()
+    rpt  = report_test_budget(plan, db)
+    print(format_test_budget(rpt))
+
+    # JSON export
+    import json
+    print(json.loads(rpt.to_json())["stage_totals"])
+
+Testpoints whose tests have no CPU time recorded appear in
+``rpt.missing_stats``.
+
+-----------
+
+**********************
+Safety traceability matrix
+**********************
+
+Build a requirement-to-testpoint matrix (suitable for safety audits)::
+
+    from ucis.ncdb.reports import report_safety_matrix, format_safety_matrix
+
+    rpt = report_safety_matrix(results)          # results from compute_closure
+    print(format_safety_matrix(rpt))
+
+    # CSV for a spreadsheet or audit tool
+    with open("traceability.csv", "w") as f:
+        f.write(rpt.to_csv())
+
+    # Add a WaiverSet to flag waived testpoints
+    from ucis.ncdb.waivers import WaiverSet
+    waivers = WaiverSet.from_file("waivers.hjson")
+    rpt = report_safety_matrix(results, waivers=waivers)
+
+-----------
+
+**********************
+Seed reliability heat-map
+**********************
+
+Identify seeds that are disproportionately flaky::
+
+    from ucis.ncdb.ncdb_ucis import NcdbUCIS
+    from ucis.ncdb.reports import report_seed_reliability, format_seed_reliability
+
+    db  = NcdbUCIS("coverage.cdb")
+    rpt = report_seed_reliability(db, "uart_smoke")
+    print(format_seed_reliability(rpt))
+    # Seeds with flake_score ≥ 0.2 are flagged with ⚠
+
+    # JSON heat-map for a custom dashboard
+    import json
+    data = json.loads(rpt.to_json())
+    for row in data["rows"]:
+        if row["flake"] >= 0.2:
+            print(f"Seed {row['seed']}: {row['fail']} failures")
+
+-----------
+
+**********************
+GitHub Step Summary
+**********************
+
+Write a markdown table to ``$GITHUB_STEP_SUMMARY``::
+
+    import os
+    from ucis.ncdb.testplan_export import export_summary_markdown
+
+    md = export_summary_markdown(results, stage_gate=gate)
+    with open(os.environ["GITHUB_STEP_SUMMARY"], "a") as f:
+        f.write(md)
+
+The output includes a stage roll-up table, per-testpoint status rows,
+and (when *stage_gate* is supplied) a gate verdict with a list of
+blocking testpoints.
+
+-----------
+
+**********************
+API reference
+**********************
+
+.. autofunction:: ucis.ncdb.reports.report_testpoint_closure
+.. autofunction:: ucis.ncdb.reports.format_testpoint_closure
+.. autoclass:: ucis.ncdb.reports.ClosureSummary
+
+.. autofunction:: ucis.ncdb.reports.report_stage_gate
+.. autofunction:: ucis.ncdb.reports.format_stage_gate
+.. autoclass:: ucis.ncdb.reports.StageGateReport
+
+.. autofunction:: ucis.ncdb.reports.report_regression_delta
+.. autofunction:: ucis.ncdb.reports.format_regression_delta
+.. autoclass:: ucis.ncdb.reports.RegressionDelta
+
+.. autofunction:: ucis.ncdb.reports.report_testpoint_reliability
+.. autofunction:: ucis.ncdb.reports.format_testpoint_reliability
+.. autoclass:: ucis.ncdb.reports.TestpointReliability
+
+.. autofunction:: ucis.ncdb.reports.report_unexercised_covergroups
+.. autofunction:: ucis.ncdb.reports.format_unexercised_covergroups
+.. autoclass:: ucis.ncdb.reports.UnexercisedCovergroups
+
+.. autofunction:: ucis.ncdb.reports.report_coverage_contribution
+.. autofunction:: ucis.ncdb.reports.format_coverage_contribution
+.. autoclass:: ucis.ncdb.reports.CoverageContribution
+
+.. autofunction:: ucis.ncdb.reports.report_test_budget
+.. autofunction:: ucis.ncdb.reports.format_test_budget
+.. autoclass:: ucis.ncdb.reports.TestBudget
+
+.. autofunction:: ucis.ncdb.reports.report_safety_matrix
+.. autofunction:: ucis.ncdb.reports.format_safety_matrix
+.. autoclass:: ucis.ncdb.reports.SafetyMatrix
+
+.. autofunction:: ucis.ncdb.reports.report_seed_reliability
+.. autofunction:: ucis.ncdb.reports.format_seed_reliability
+.. autoclass:: ucis.ncdb.reports.SeedReliability
+
+.. autofunction:: ucis.ncdb.testplan_export.export_junit_xml
+.. autofunction:: ucis.ncdb.testplan_export.export_github_annotations
+.. autofunction:: ucis.ncdb.testplan_export.export_summary_markdown
+
+.. seealso::
+
+   * :ref:`testplan` — Testplan format and closure computation
+   * :ref:`test-history` — Binary test history API
diff --git a/doc/source/working-with-coverage/test-history.rst b/doc/source/working-with-coverage/test-history.rst
new file mode 100644
index 0000000..cb1912a
--- /dev/null
+++ b/doc/source/working-with-coverage/test-history.rst
@@ -0,0 +1,234 @@
+.. _test-history:
+
+############
+Test History
+############
+
+PyUCIS stores a rich, time-series history of every test run inside each NCDB
+``.cdb`` file.  Introduced in NCDB v2, this *binary test history* is separate
+from the legacy UCIS history-node model and is designed for:
+
+* **Trend analysis** — identify flaky or consistently-failing tests over
+  hundreds or thousands of runs.
+* **Regression detection** — spot when a test's pass rate drops using a
+  CUSUM change-point algorithm.
+* **Coverage provenance** — trace exactly which test runs contributed to the
+  squashed coverage numbers.
+
+.. contents:: On this page
+   :local:
+   :depth: 2
+
+-----------
+
+**********************
+Quick-start
+**********************
+
+Record test results with :meth:`~ucis.ncdb.ncdb_ucis.NcdbUCIS.add_test_run`::
+
+    from ucis.ncdb.ncdb_ucis import NcdbUCIS
+    from ucis.ncdb.ncdb_writer import NcdbWriter
+    from ucis.ncdb.constants import HIST_STATUS_OK, HIST_STATUS_FAIL
+    from ucis.mem.mem_ucis import MemUCIS
+
+    # Create or open a .cdb
+    NcdbWriter().write(MemUCIS(), "coverage.cdb")   # once, to initialise
+    db = NcdbUCIS("coverage.cdb")
+
+    # Record runs
+    db.add_test_run("uart_smoke", seed="42", status=HIST_STATUS_OK,
+                    ts=1700000000, has_coverage=True)
+    db.add_test_run("uart_smoke", seed="43", status=HIST_STATUS_FAIL,
+                    ts=1700003600, has_coverage=False)
+
+    # Save
+    NcdbWriter().write(db, "coverage.cdb")
+
+Query the results::
+
+    db2 = NcdbUCIS("coverage.cdb")
+
+    # All runs for one test
+    history = db2.query_test_history("uart_smoke")
+    for rec in history:
+        print(rec.ts, rec.status)
+
+    # Aggregate statistics
+    entry = db2.get_test_stats("uart_smoke")
+    print(f"total={entry.total_runs}  pass={entry.pass_count}  fail={entry.fail_count}")
+
+    # Top-flaky across all tests
+    for entry in db2.top_flaky_tests(10):
+        print(entry.name_id, entry.flakiness_score)
+
+-----------
+
+**********************
+API reference
+**********************
+
+.. automethod:: ucis.ncdb.ncdb_ucis.NcdbUCIS.add_test_run
+.. automethod:: ucis.ncdb.ncdb_ucis.NcdbUCIS.query_test_history
+.. automethod:: ucis.ncdb.ncdb_ucis.NcdbUCIS.get_test_stats
+.. automethod:: ucis.ncdb.ncdb_ucis.NcdbUCIS.top_flaky_tests
+.. automethod:: ucis.ncdb.ncdb_ucis.NcdbUCIS.top_failing_tests
+.. automethod:: ucis.ncdb.ncdb_ucis.NcdbUCIS.squash_coverage
+
+-----------
+
+**********************
+Status and flag values
+**********************
+
+Status constants (in :mod:`ucis.ncdb.constants`):
+
+.. list-table::
+   :header-rows: 1
+   :widths: 30 70
+
+   * - Constant
+     - Meaning
+   * - ``HIST_STATUS_OK``
+     - Run passed
+   * - ``HIST_STATUS_FAIL``
+     - Run failed
+   * - ``HIST_STATUS_ERROR``
+     - Test infrastructure error (not a test-logic failure)
+   * - ``HIST_STATUS_TIMEOUT``
+     - Run exceeded wall-clock budget
+   * - ``HIST_STATUS_SKIP``
+     - Run was explicitly skipped
+
+Flag constants (combinable with ``|``):
+
+.. list-table::
+   :header-rows: 1
+   :widths: 30 70
+
+   * - Constant
+     - Meaning
+   * - ``HIST_FLAG_HAS_COV``
+     - Run produced coverage data (counts.bin was updated)
+   * - ``HIST_FLAG_REGRESS``
+     - Run is part of a regression sweep
+   * - ``HIST_FLAG_RERUN``
+     - This is a re-run of a previously recorded test
+
+-----------
+
+**********************
+Time-range queries
+**********************
+
+:meth:`~ucis.ncdb.ncdb_ucis.NcdbUCIS.query_test_history` accepts optional
+``ts_from`` and ``ts_to`` Unix-timestamp bounds::
+
+    import time
+    yesterday = int(time.time()) - 86400
+
+    # Only runs from the last 24 hours
+    recent = db.query_test_history("my_test", ts_from=yesterday)
+
+The call uses the bucket index to skip buckets whose time ranges do not
+overlap, so queries over large history stores are fast even when only a small
+window is requested.
+
+-----------
+
+**********************
+Merging history
+**********************
+
+History is merged automatically when two or more ``.cdb`` files are combined
+with :class:`~ucis.ncdb.ncdb_merger.NcdbMerger`::
+
+    from ucis.ncdb.ncdb_merger import NcdbMerger
+
+    NcdbMerger().merge(["run_a.cdb", "run_b.cdb"], "merged.cdb")
+
+The merger performs:
+
+1. **Registry union** — all test names and seed strings from all sources are
+   collected into a single merged registry, preserving insertion order.
+2. **Stats merge** — per-test aggregate metrics (mean runtime, variance, pass
+   rate) are combined using Chan's parallel formula for numerically stable
+   Welford-style mean/variance.
+3. **Bucket remap** — name_ids in each source's bucket files are remapped to
+   the merged registry before being written to the output.
+4. **Contrib-index remap** — run_ids in the contribution index are offset by
+   the source's base run_id so merged run_ids remain globally unique.
+
+.. note::
+
+   Merging is idempotent: merging a file with itself produces the same
+   statistics as the original (though run counts will double).
+
+-----------
+
+**********************
+Squash coverage
+**********************
+
+Over time a ``.cdb`` accumulates contribution entries for every test run
+that produced coverage.  Squashing compresses these entries into the main
+coverage counts and frees space::
+
+    db.squash_coverage(policy=POLICY_PASS_ONLY)
+    NcdbWriter().write(db, "coverage.cdb")
+
+The squash event is recorded in the squash log so that provenance is never
+lost.  The ``policy`` argument controls which runs are squashed:
+
+.. list-table::
+   :header-rows: 1
+   :widths: 30 70
+
+   * - Constant
+     - Behaviour
+   * - ``POLICY_PASS_ONLY``
+     - Squash only runs with ``HIST_STATUS_OK``
+   * - ``POLICY_ALL``
+     - Squash all runs regardless of status
+
+-----------
+
+**********************
+Binary format overview
+**********************
+
+The v2 test history is stored as several members inside the NCDB ZIP archive.
+A ``history_format`` key in ``manifest.json`` selects the version:
+
+* ``"v1"`` — legacy UCIS history-node model (no binary history)
+* ``"v2"`` — binary test history (this section)
+
+Binary members added for v2:
+
+.. list-table::
+   :header-rows: 1
+   :widths: 35 65
+
+   * - ZIP member
+     - Contents
+   * - ``history/test_registry.bin``
+     - Ordered list of test names and seed strings with stable integer IDs
+   * - ``history/test_stats.bin``
+     - Per-test aggregate metrics (72 bytes/test)
+   * - ``history/bucket_index.bin``
+     - Index of time-bucketed run-record files (28 bytes/entry)
+   * - ``history/NNNNNN.bin``
+     - Individual run-record buckets (LZMA or DEFLATE compressed)
+   * - ``history/contrib_index.bin``
+     - Per-run coverage-contribution entries
+   * - ``history/squash_log.bin``
+     - Append-only log of squash events
+
+For the full binary layout see :ref:`ncdb-format-v2-history` in the format
+reference.
+
+.. seealso::
+
+   * :ref:`ncdb-format` — Full NCDB binary format specification
+   * :doc:`merging` — How to merge ``.cdb`` files on the command line
+   * :doc:`analyzing` — Query and report coverage from the CLI
diff --git a/doc/source/working-with-coverage/testplan.rst b/doc/source/working-with-coverage/testplan.rst
new file mode 100644
index 0000000..ed20e9f
--- /dev/null
+++ b/doc/source/working-with-coverage/testplan.rst
@@ -0,0 +1,259 @@
+.. _testplan:
+
+####################
+Testplan Integration
+####################
+
+PyUCIS can embed a structured *testplan* inside each NCDB ``.cdb`` file.
+A testplan describes the verification tasks (testpoints) and functional
+coverage groups expected for a design.  Together with the binary test
+history (see :ref:`test-history`) it enables:
+
+* **Closure reporting** — did every testpoint's tests actually pass?
+* **Stage gate evaluation** — are all V1/V2/V3 testpoints closed?
+* **Merge propagation** — the testplan travels with the database so
+  reports always use the correct plan.
+
+.. contents:: On this page
+   :local:
+   :depth: 2
+
+-----------
+
+**********************
+Quick-start
+**********************
+
+Import an OpenTitan-style Hjson testplan and embed it in a ``.cdb``::
+
+    from ucis.ncdb.testplan_hjson import import_hjson
+    from ucis.ncdb.ncdb_ucis import NcdbUCIS
+    from ucis.ncdb.ncdb_writer import NcdbWriter
+
+    plan = import_hjson("uart_testplan.hjson",
+                        substitutions={"baud": ["9600", "115200"]})
+
+    db = NcdbUCIS("coverage.cdb")
+    db.setTestplan(plan)
+    NcdbWriter().write(db, "coverage.cdb")
+
+Compute closure against the embedded testplan::
+
+    from ucis.ncdb.testplan_closure import compute_closure, stage_gate_status
+    from ucis.ncdb.testplan import get_testplan
+
+    db = NcdbUCIS("coverage.cdb")
+    plan = db.getTestplan()
+    results = compute_closure(plan, db)
+
+    for r in results:
+        print(f"{r.testpoint.name:30s} {r.status.value}")
+
+    gate = stage_gate_status(results, "V2", plan)
+    print(gate["message"])
+
+-----------
+
+**********************
+Testplan format
+**********************
+
+A testplan is stored as ``testplan.json`` inside the NCDB ZIP and is also
+exportable as a standalone JSON file.  The schema is::
+
+    {
+      "format_version": 1,
+      "source_file": "path/to/uart.hjson",
+      "import_timestamp": "2025-01-01T00:00:00+00:00",
+      "testpoints": [
+        {
+          "name": "uart_reset",
+          "stage": "V1",
+          "desc": "Verify reset behaviour",
+          "tests": ["uart_smoke", "uart_init_*"],
+          "tags": ["smoke"],
+          "na": false,
+          "source_template": ""
+        }
+      ],
+      "covergroups": [
+        {"name": "cg_uart_reset", "desc": "Reset coverage"}
+      ]
+    }
+
+Stages follow the OpenTitan V1 → V2 → V2S → V3 hierarchy; custom strings
+are also accepted and sort after V3 in gate evaluation.
+
+-----------
+
+**********************
+Importing Hjson
+**********************
+
+Use :func:`~ucis.ncdb.testplan_hjson.import_hjson` to parse an OpenTitan
+``.hjson`` testplan (or a standard ``.json`` file)::
+
+    plan = import_hjson(
+        "uart_testplan.hjson",
+        substitutions={
+            "uart":  ["uart0", "uart1"],
+            "mode":  ["loopback", "normal"],
+        },
+    )
+
+The ``substitutions`` dict provides values for ``{key}`` placeholders in
+test name templates.  A list value generates the cartesian product of all
+combinations::
+
+    # Template: "{uart}_{mode}_test"
+    # Substitutions: uart=["uart0","uart1"], mode=["loopback","normal"]
+    # Expands to: uart0_loopback_test, uart0_normal_test,
+    #             uart1_loopback_test, uart1_normal_test
+
+Tests listed as ``["N/A"]`` are treated as intentionally unmapped
+(``testpoint.na = True``).
+
+-----------
+
+**********************
+Closure computation
+**********************
+
+:func:`~ucis.ncdb.testplan_closure.compute_closure` evaluates each
+testpoint against the test history stored in the database:
+
+.. list-table::
+   :header-rows: 1
+   :widths: 20 80
+
+   * - Status
+     - Meaning
+   * - ``CLOSED``
+     - All mapped tests have at least one passing run
+   * - ``PARTIAL``
+     - Some passing, some failing
+   * - ``FAILING``
+     - All mapped tests failed
+   * - ``NOT_RUN``
+     - None of the mapped tests appear in the database
+   * - ``N/A``
+     - Testpoint has ``na = True``
+   * - ``UNIMPLEMENTED``
+     - Testpoint has an empty ``tests`` list
+
+Test name matching uses three strategies in order:
+
+1. **Exact** — the test name appears literally in ``testpoint.tests``.
+2. **Seed-suffix strip** — trailing ``_\d+`` is removed and the result
+   matched exactly (e.g. ``uart_smoke_42`` → ``uart_smoke``).
+3. **Wildcard** — any ``testpoint.tests`` entry ending in ``_*`` is used
+   as a prefix match.
+
+-----------
+
+**********************
+Stage gate evaluation
+**********************
+
+:func:`~ucis.ncdb.testplan_closure.stage_gate_status` determines whether
+a regression is ready to advance to the next stage::
+
+    gate = stage_gate_status(results, "V2", plan)
+    if gate["passed"]:
+        print("Ready to tape-out!")
+    else:
+        for r in gate["blocking"]:
+            print(f"  BLOCKING: {r.testpoint.name}")
+
+The gate passes when all testpoints at the target stage **and all stages
+below it** (V1 < V2 < V2S < V3) are CLOSED or N/A.
+
+-----------
+
+**********************
+Waivers
+**********************
+
+Coverage and test failures can be suppressed with a
+:class:`~ucis.ncdb.waivers.WaiverSet`::
+
+    from ucis.ncdb.waivers import Waiver, WaiverSet
+
+    ws = WaiverSet([
+        Waiver(
+            id="W-001",
+            scope_pattern="top/uart/**",
+            bin_pattern="reset_*",
+            rationale="Reset coverage deferred to V2",
+            approver="eng",
+            approved_at="2025-01-01T00:00:00",
+            expires_at="2026-01-01T00:00:00",
+        )
+    ])
+
+    db.setWaivers(ws)
+    NcdbWriter().write(db, "coverage.cdb")
+
+Scope patterns use glob syntax: ``*`` matches a single path segment,
+``**`` matches any number of segments.  Expiry enforcement is the
+caller's responsibility — use :meth:`~ucis.ncdb.waivers.WaiverSet.active_at`
+to filter out expired waivers before passing to closure::
+
+    import time
+    now = time.strftime("%Y-%m-%dT%H:%M:%S")
+    active_waivers = ws.active_at(now)
+
+-----------
+
+**********************
+Modes A and B
+**********************
+
+**Mode A (embedded)** — testplan stored inside the ``.cdb``::
+
+    db.setTestplan(plan)
+    NcdbWriter().write(db, "coverage.cdb")
+
+    # Read back — travels with the database
+    db2 = NcdbUCIS("coverage.cdb")
+    plan2 = db2.getTestplan()
+
+**Mode B (standalone)** — testplan kept as a separate file::
+
+    plan.save("uart_testplan_snapshot.json")
+
+    # Load later and pass to analysis functions
+    plan = Testplan.load("uart_testplan_snapshot.json")
+    results = compute_closure(plan, db)
+
+Both modes produce the same :class:`~ucis.ncdb.testplan.Testplan` object.
+The helper :func:`~ucis.ncdb.testplan.get_testplan` works with both::
+
+    from ucis.ncdb.testplan import get_testplan
+    plan = get_testplan(db)   # works for NcdbUCIS or MemUCIS
+
+-----------
+
+**********************
+API reference
+**********************
+
+.. autofunction:: ucis.ncdb.testplan_hjson.import_hjson
+.. autoclass:: ucis.ncdb.testplan.Testplan
+   :members: getTestpoint, testpointForTest, testpointsForStage, stages,
+             add_testpoint, serialize, from_bytes, load, save, stamp_import_time
+.. autoclass:: ucis.ncdb.testplan.Testpoint
+.. autoclass:: ucis.ncdb.testplan.CovergroupEntry
+.. autoclass:: ucis.ncdb.testplan.RequirementLink
+.. autofunction:: ucis.ncdb.testplan_closure.compute_closure
+.. autofunction:: ucis.ncdb.testplan_closure.stage_gate_status
+.. autoclass:: ucis.ncdb.testplan_closure.TPStatus
+.. autoclass:: ucis.ncdb.testplan_closure.TestpointResult
+.. autoclass:: ucis.ncdb.waivers.WaiverSet
+   :members: add, matches_scope, active_at, get, serialize, from_bytes, load, save
+.. autoclass:: ucis.ncdb.waivers.Waiver
+
+.. seealso::
+
+   * :ref:`test-history` — Binary test history API
+   * :ref:`ncdb-format` — NCDB binary format specification
diff --git a/src/ucis/__main__.py b/src/ucis/__main__.py
index 64a3355..bf06819 100644
--- a/src/ucis/__main__.py
+++ b/src/ucis/__main__.py
@@ -7,6 +7,7 @@
 from ucis.cmd import cmd_list_db_formats
 from ucis.cmd import cmd_list_report_formats
 from ucis.cmd import cmd_report, cmd_merge, cmd_convert, cmd_show
+from ucis.cmd import cmd_history, cmd_testplan
 import sys
 import traceback
 import os
@@ -294,7 +295,121 @@ def get_parser():
         help="Specifies the format of the input database. Defaults to 'xml'")
     view.add_argument("db", help="Path to the coverage database")
     view.set_defaults(func=lambda args: _launch_tui(args))
-    
+
+    # -----------------------------------------------------------------------
+    # history subcommand
+    # -----------------------------------------------------------------------
+    history = subparser.add_parser(
+        "history",
+        help="Query and display test history from an NCDB .cdb file",
+    )
+    history_sub = history.add_subparsers(dest="history_cmd")
+    history_sub.required = True
+
+    history_query = history_sub.add_parser(
+        "query",
+        help="Display history records for a specific test",
+    )
+    history_query.add_argument("db", help="Path to the NCDB .cdb file")
+    history_query.add_argument("test_name", help="Test name to query")
+    history_query.add_argument("--from", dest="from_",
+        metavar="DATE", default=None,
+        help="Start date (ISO 8601 or Unix timestamp)")
+    history_query.add_argument("--to", default=None,
+        metavar="DATE",
+        help="End date (ISO 8601 or Unix timestamp)")
+    history_query.add_argument("--out", "-o", default=None,
+        help="Output file (default: stdout)")
+    history_query.add_argument(
+        "--output-format", "-of", default="text",
+        choices=["text", "json"],
+        help="Output format (default: text)",
+    )
+    history_query.set_defaults(func=cmd_history.cmd_history_query)
+
+    history_stats = history_sub.add_parser(
+        "stats",
+        help="Show test statistics (flaky, failing, or named test)",
+    )
+    history_stats.add_argument("db", help="Path to the NCDB .cdb file")
+    history_stats.add_argument("test_name", nargs="?", default=None,
+        help="Show stats for a specific test name")
+    history_stats.add_argument("--top-flaky", metavar="N", type=int, default=None,
+        help="Show top N flaky tests")
+    history_stats.add_argument("--top-failing", metavar="N", type=int, default=None,
+        help="Show top N failing tests")
+    history_stats.add_argument("--out", "-o", default=None,
+        help="Output file (default: stdout)")
+    history_stats.add_argument(
+        "--output-format", "-of", default="text",
+        choices=["text", "json"],
+        help="Output format (default: text)",
+    )
+    history_stats.set_defaults(func=cmd_history.cmd_history_stats)
+
+    # -----------------------------------------------------------------------
+    # testplan subcommand
+    # -----------------------------------------------------------------------
+    testplan = subparser.add_parser(
+        "testplan",
+        help="Manage and evaluate testplans embedded in NCDB .cdb files",
+    )
+    testplan_sub = testplan.add_subparsers(dest="testplan_cmd")
+    testplan_sub.required = True
+
+    testplan_import = testplan_sub.add_parser(
+        "import",
+        help="Import an Hjson/JSON testplan and embed it in a .cdb file",
+    )
+    testplan_import.add_argument("db", help="Path to the NCDB .cdb file")
+    testplan_import.add_argument("hjson_path",
+        help="Path to the .hjson or .json testplan file")
+    testplan_import.add_argument(
+        "--subs", metavar="KEY=VAL", action="append", default=[],
+        help="Template substitution (repeatable): e.g. --subs uart=uart0",
+    )
+    testplan_import.set_defaults(func=cmd_testplan.cmd_testplan_import)
+
+    testplan_closure = testplan_sub.add_parser(
+        "closure",
+        help="Compute and display testpoint closure",
+    )
+    testplan_closure.add_argument("db", help="Path to the NCDB .cdb file")
+    testplan_closure.add_argument("--testplan", default=None,
+        metavar="PATH",
+        help="External testplan JSON file (overrides embedded)")
+    testplan_closure.add_argument("--waivers", default=None,
+        metavar="PATH",
+        help="External waivers JSON file (overrides embedded)")
+    testplan_closure.add_argument("--stage", default=None,
+        metavar="STAGE",
+        help="Evaluate a stage gate (e.g. V2)")
+    testplan_closure.add_argument("--out", "-o", default=None,
+        help="Output file (default: stdout)")
+    testplan_closure.add_argument(
+        "--output-format", "-of", default="text",
+        choices=["text", "json"],
+        help="Output format (default: text)",
+    )
+    testplan_closure.set_defaults(func=cmd_testplan.cmd_testplan_closure)
+
+    testplan_export_junit = testplan_sub.add_parser(
+        "export-junit",
+        help="Export testpoint closure results as JUnit XML",
+    )
+    testplan_export_junit.add_argument("db", help="Path to the NCDB .cdb file")
+    testplan_export_junit.add_argument("--testplan", default=None,
+        metavar="PATH",
+        help="External testplan JSON file (overrides embedded)")
+    testplan_export_junit.add_argument("--out", "-o", default=None,
+        help="Output XML file (default: closure_results.xml)")
+    testplan_export_junit.add_argument("--suite-name", default=None,
+        metavar="NAME",
+        help="JUnit testsuite name attribute")
+    testplan_export_junit.set_defaults(
+        func=cmd_testplan.cmd_testplan_export_junit
+    )
+
     return parser
 
 def _launch_tui(args):
diff --git a/src/ucis/cmd/cmd_history.py b/src/ucis/cmd/cmd_history.py
new file mode 100644
index 0000000..d4af37e
--- /dev/null
+++ b/src/ucis/cmd/cmd_history.py
@@ -0,0 +1,140 @@
+"""``pyucis history`` CLI subcommands.
+
+Subcommands
+-----------
+query   Display history records for a specific test name.
+stats   Show aggregate statistics (top-failing, top-flaky, or named test).
+"""
+
+from __future__ import annotations
+
+import json
+import sys
+import time
+
+
+def _open_ncdb(path: str):
+    from ucis.ncdb.ncdb_ucis import NcdbUCIS
+    return NcdbUCIS(path)
+
+
+def _ts(ts_arg: str) -> int:
+    """Parse an ISO date string or integer unix timestamp."""
+    if ts_arg is None:
+        return None
+    try:
+        return int(ts_arg)
+    except ValueError:
+        import datetime
+        dt = datetime.datetime.fromisoformat(ts_arg)
+        return int(dt.timestamp())
+
+
+# ---------------------------------------------------------------------------
+# history query
+# ---------------------------------------------------------------------------
+
+def cmd_history_query(args) -> None:
+    """Execute ``pyucis history query``."""
+    db = _open_ncdb(args.db)
+    ts_from = _ts(getattr(args, "from_", None))
+    ts_to = _ts(getattr(args, "to", None))
+
+    records = db.query_test_history(args.test_name, ts_from=ts_from, ts_to=ts_to)
+
+    fmt = getattr(args, "output_format", "text")
+    out = open(args.out, "w") if getattr(args, "out", None) else sys.stdout
+
+    try:
+        if fmt == "json":
+            data = [
+                {
+                    "ts": r.ts,
+                    "date": time.strftime("%Y-%m-%d %H:%M:%S", time.gmtime(r.ts)),
+                    "status": "pass" if r.status == 0 else "fail",
+                    "seed_id": r.seed_id,
+                }
+                for r in records
+            ]
+            out.write(json.dumps(data, indent=2) + "\n")
+        else:
+            out.write(
+                f"{'Date':<20} {'Status':<8} {'Seed':>12}\n"
+            )
+            out.write("-" * 42 + "\n")
+            for r in records:
+                date = time.strftime("%Y-%m-%d %H:%M:%S", time.gmtime(r.ts))
+                status = "pass" if r.status == 0 else "fail"
+                out.write(f"{date:<20} {status:<8} {r.seed_id:>12}\n")
+            out.write(f"\nTotal records: {len(records)}\n")
+    finally:
+        if out is not sys.stdout:
+            out.close()
+
+
+# ---------------------------------------------------------------------------
+# history stats
+# ---------------------------------------------------------------------------
+
+def cmd_history_stats(args) -> None:
+    """Execute ``pyucis history stats``."""
+    db = _open_ncdb(args.db)
+    fmt = getattr(args, "output_format", "text")
+    out = open(args.out, "w") if getattr(args, "out", None) else sys.stdout
+
+    try:
+        top_flaky = getattr(args, "top_flaky", None)
+        top_failing = getattr(args, "top_failing", None)
+        test_name = getattr(args, "test_name", None)
+
+        if test_name:
+            stats = db.get_test_stats(test_name)
+            if stats is None:
+                out.write(f"No stats found for test '{test_name}'\n")
+                return
+            if fmt == "json":
+                d = {
+                    "name": test_name,
+                    "total_runs": stats.total_runs,
+                    "pass_count": stats.pass_count,
+                    "fail_count": stats.fail_count,
+                    "flake_score": stats.flake_score,
+                    "mean_cpu_time": stats.mean_cpu_time,
+                    "grade_score": stats.grade_score,
+                    "last_status": stats.last_status,
+                }
+                out.write(json.dumps(d, indent=2) + "\n")
+            else:
+                out.write(f"Test: {test_name}\n")
+                out.write(f"  Total runs:  {stats.total_runs}\n")
+                out.write(f"  Pass:        {stats.pass_count}\n")
+                out.write(f"  Fail:        {stats.fail_count}\n")
+                out.write(f"  Flake score: {stats.flake_score:.3f}\n")
+                out.write(f"  Mean CPU:    {stats.mean_cpu_time:.2f}s\n")
+                out.write(f"  Grade score: {stats.grade_score:.3f}\n")
+            return
+
+        rows = []
+        if top_flaky:
+            rows = db.top_flaky_tests(n=top_flaky)
+            title = f"Top {top_flaky} flaky tests"
+        elif top_failing:
+            rows = db.top_failing_tests(n=top_failing)
+            title = f"Top {top_failing} failing tests"
+        else:
+            rows = db.top_flaky_tests(n=20)
+            title = "Top 20 flaky tests"
+
+        if fmt == "json":
+            out.write(json.dumps(rows, indent=2) + "\n")
+        else:
+            out.write(f"{title}\n")
+            out.write("-" * 60 + "\n")
+            col = max((len(r[0]) for r in rows), default=10) + 2 if rows else 30
+            out.write(f"{'Test':<{col}} {'Score':>8} {'Pass':>7} {'Fail':>7}\n")
+            out.write("-" * (col + 26) + "\n")
+            for name, score, pc, fc in rows:
+                out.write(f"{name:<{col}} {score:>8.3f} {pc:>7} {fc:>7}\n")
+    finally:
+        if out is not sys.stdout:
+            out.close()
diff --git a/src/ucis/cmd/cmd_testplan.py b/src/ucis/cmd/cmd_testplan.py
new file mode 100644
index 0000000..890c319
--- /dev/null
+++ b/src/ucis/cmd/cmd_testplan.py
@@ -0,0 +1,153 @@
+"""``pyucis testplan`` CLI subcommands.
+
+Subcommands
+-----------
+import      Import an Hjson/JSON testplan and embed it in a .cdb file.
+closure     Compute testpoint closure and display a report.
+export-junit  Export closure results as JUnit XML.
+"""
+
+from __future__ import annotations
+
+import json
+import sys
+
+
+def _open_ncdb(path: str):
+    from ucis.ncdb.ncdb_ucis import NcdbUCIS
+    return NcdbUCIS(path)
+
+
+# ---------------------------------------------------------------------------
+# testplan import
+# ---------------------------------------------------------------------------
+
+def cmd_testplan_import(args) -> None:
+    """Execute ``pyucis testplan import``."""
+    from ucis.ncdb.testplan_hjson import import_hjson
+    from ucis.ncdb.ncdb_writer import NcdbWriter
+    import os, tempfile
+
+    # Parse substitutions: "key=val" pairs
+    subs: dict = {}
+    for s in getattr(args, "subs", []) or []:
+        if "=" in s:
+            k, _, v = s.partition("=")
+            existing = subs.get(k)
+            if existing is None:
+                subs[k] = [v]
+            else:
+                existing.append(v)
+
+    plan = import_hjson(args.hjson_path, substitutions=subs if subs else None)
+    db = _open_ncdb(args.db)
+    db.setTestplan(plan)
+
+    # Write to a temp file then replace
+    tmp = args.db + ".tmp"
+    NcdbWriter().write(db, tmp)
+    os.replace(tmp, args.db)
+
+    print(
+        f"Imported testplan from '{args.hjson_path}': "
+        f"{len(plan.testpoints)} testpoints, "
+        f"{len(plan.covergroups)} covergroups"
+    )
+
+
+# ---------------------------------------------------------------------------
+# testplan closure
+# ---------------------------------------------------------------------------
+
+def cmd_testplan_closure(args) -> None:
+    """Execute ``pyucis testplan closure``."""
+    from ucis.ncdb.testplan import get_testplan, Testplan
+    from ucis.ncdb.testplan_closure import compute_closure
+    from ucis.ncdb.waivers import WaiverSet
+    from ucis.ncdb.reports import (
+        report_testpoint_closure,
+        format_testpoint_closure,
+        report_stage_gate,
+        format_stage_gate,
+    )
+
+    db = _open_ncdb(args.db)
+
+    # Load testplan
+    testplan_path = getattr(args, "testplan", None)
+    if testplan_path:
+        plan = Testplan.load(testplan_path)
+    else:
+        plan = get_testplan(db)
+
+    if plan is None:
+        print(
+            "Error: no testplan found. Embed one with "
+            "'pyucis testplan import' or supply --testplan.",
+            file=sys.stderr,
+        )
+        sys.exit(1)
+
+    # Load waivers
+    waivers = None
+    waivers_path = getattr(args, "waivers", None)
+    if waivers_path:
+        from ucis.ncdb.waivers import WaiverSet
+        waivers = WaiverSet.load(waivers_path)
+    elif hasattr(db, "getWaivers"):
+        waivers = db.getWaivers()
+
+    results = compute_closure(plan, db, waivers=waivers)
+
+    fmt = getattr(args, "output_format", "text")
+    out = open(args.out, "w") if getattr(args, "out", None) else sys.stdout
+
+    try:
+        if fmt == "json":
+            summary = report_testpoint_closure(results)
+            out.write(summary.to_json() + "\n")
+        else:
+            summary = report_testpoint_closure(results)
+            out.write(format_testpoint_closure(summary) + "\n")
+
+            # Stage gate (if requested)
+            stage = getattr(args, "stage", None)
+            if stage:
+                gate = report_stage_gate(results, stage, plan)
+                out.write("\n" + format_stage_gate(gate) + "\n")
+    finally:
+        if out is not sys.stdout:
+            out.close()
+
+
+# ---------------------------------------------------------------------------
+# testplan export-junit
+# ---------------------------------------------------------------------------
+
+def cmd_testplan_export_junit(args) -> None:
+    """Execute ``pyucis testplan export-junit``."""
+    from ucis.ncdb.testplan import get_testplan, Testplan
+    from ucis.ncdb.testplan_closure import compute_closure
+    from ucis.ncdb.testplan_export import export_junit_xml
+
+    db = _open_ncdb(args.db)
+
+    testplan_path = getattr(args, "testplan", None)
+    if testplan_path:
+        plan = Testplan.load(testplan_path)
+    else:
+        plan = get_testplan(db)
+
+    if plan is None:
+        print(
+            "Error: no testplan found. Embed one with "
+            "'pyucis testplan import' or supply --testplan.",
+            file=sys.stderr,
+        )
+        sys.exit(1)
+
+    results = compute_closure(plan, db)
+    output_path = getattr(args, "out", None) or "closure_results.xml"
+    suite_name = getattr(args, "suite_name", None) or "testplan_closure"
+    export_junit_xml(results, output_path, suite_name=suite_name)
+    print(f"JUnit XML written to '{output_path}'")
diff --git a/src/ucis/ncdb/bucket_index.py b/src/ucis/ncdb/bucket_index.py
new file mode 100644
index 0000000..43e05ed
--- /dev/null
+++ b/src/ucis/ncdb/bucket_index.py
@@ -0,0 +1,171 @@
+"""
+history/bucket_index.bin — index mapping bucket sequence numbers to date
+ranges and aggregate counts.
+
+This 24-byte-per-entry index allows regression trend queries and targeted
+bucket reads without opening individual bucket files.
+
+Binary layout (little-endian)::
+
+    magic         u32   0x42494458  ('BIDX')
+    version       u8    1
+    num_buckets   u32
+
+    entries[num_buckets]:     sorted by bucket_seq
+      bucket_seq   u32
+      ts_start     u32   unix timestamp of first record in bucket
+      ts_end       u32   unix timestamp of last record in bucket
+      num_records  u32
+      fail_count   u32   enables pass-rate trend without opening bucket
+      min_name_id  u32
+      max_name_id  u32
+
+24 bytes per entry.  3650 entries (10 years) ≈ 87 KB.
+"""
+
+from __future__ import annotations
+
+import struct
+from dataclasses import dataclass
+from typing import List, Optional, Tuple
+
+MAGIC   = 0x42494458   # 'BIDX'
+VERSION = 1
+
+_HDR   = struct.Struct("<IBI")    # magic, version, num_buckets
+_ENTRY = struct.Struct("<IIIIIII")  # 7 × u32 = 28 bytes ... wait, design says 24 bytes
+# Design: bucket_seq(4) ts_start(4) ts_end(4) num_records(4) fail_count(4) min_name_id(4) max_name_id(4) = 28 bytes
+# The design doc says "24 bytes/entry" but lists 7 fields × 4 bytes = 28. We use 28 (correct).
+assert _ENTRY.size == 28
+
+
+@dataclass
+class BucketIndexEntry:
+    """One entry in the bucket index."""
+    bucket_seq:  int
+    ts_start:    int
+    ts_end:      int
+    num_records: int
+    fail_count:  int
+    min_name_id: int
+    max_name_id: int
+
+    @property
+    def pass_rate(self) -> float:
+        if self.num_records == 0:
+            return 1.0
+        return (self.num_records - self.fail_count) / self.num_records
+
+
+class BucketIndex:
+    """In-memory representation of ``history/bucket_index.bin``.
+
+    Example::
+
+        idx = BucketIndex()
+        idx.add_bucket(seq=0, ts_start=1700000000, ts_end=1700086399,
+                       num_records=5000, fail_count=12,
+                       min_name_id=0, max_name_id=99)
+        data = idx.serialize()
+        idx2 = BucketIndex.deserialize(data)
+    """
+
+    def __init__(self) -> None:
+        self._entries: List[BucketIndexEntry] = []
+
+    def add_bucket(self, seq: int, ts_start: int, ts_end: int,
+                   num_records: int, fail_count: int,
+                   min_name_id: int, max_name_id: int) -> None:
+        """Add or update the index entry for bucket *seq*.
+
+        Entries are kept sorted by *seq*.
+        """
+        entry = BucketIndexEntry(
+            bucket_seq=seq, ts_start=ts_start, ts_end=ts_end,
+            num_records=num_records, fail_count=fail_count,
+            min_name_id=min_name_id, max_name_id=max_name_id,
+        )
+        # Replace existing or insert in sorted order
+        for i, e in enumerate(self._entries):
+            if e.bucket_seq == seq:
+                self._entries[i] = entry
+                return
+            if e.bucket_seq > seq:
+                self._entries.insert(i, entry)
+                return
+        self._entries.append(entry)
+
+    def buckets_in_range(self, ts_from: int, ts_to: int) -> List[BucketIndexEntry]:
+        """Return entries whose time range overlaps [ts_from, ts_to]."""
+        return [e for e in self._entries
+                if e.ts_end >= ts_from and e.ts_start <= ts_to]
+
+    def buckets_for_name(self, name_id: int,
+                         ts_from: Optional[int] = None,
+                         ts_to:   Optional[int] = None) -> List[BucketIndexEntry]:
+        """Return entries that may contain records for *name_id*.
+
+        Filters by ``min_name_id ≤ name_id ≤ max_name_id`` and optionally
+        by time range.
+        """
+        results = []
+        for e in self._entries:
+            if e.min_name_id > name_id or e.max_name_id < name_id:
+                continue
+            if ts_from is not None and e.ts_end < ts_from:
+                continue
+            if ts_to is not None and e.ts_start > ts_to:
+                continue
+            results.append(e)
+        return results
+
+    def pass_rate_series(self) -> List[Tuple[int, float]]:
+        """Return ``(ts_start, pass_rate)`` pairs for all buckets in order."""
+        return [(e.ts_start, e.pass_rate) for e in self._entries]
+
+    @property
+    def num_buckets(self) -> int:
+        return len(self._entries)
+
+    def next_seq(self) -> int:
+        """Return the sequence number for the next new bucket."""
+        if not self._entries:
+            return 0
+        return self._entries[-1].bucket_seq + 1
+
+    # ── serialization ─────────────────────────────────────────────────────
+
+    def serialize(self) -> bytes:
+        """Encode the index to bytes for storage in the ZIP archive."""
+        header = _HDR.pack(MAGIC, VERSION, len(self._entries))
+        rows = b""
+        for e in self._entries:
+            rows += _ENTRY.pack(e.bucket_seq, e.ts_start, e.ts_end,
+                                e.num_records, e.fail_count,
+                                e.min_name_id, e.max_name_id)
+        return header + rows
+
+    @classmethod
+    def deserialize(cls, data: bytes) -> "BucketIndex":
+        """Reconstruct a BucketIndex from raw bytes.
+
+        Raises:
+            ValueError: if magic or version is wrong.
+        """
+        magic, version, num_buckets = _HDR.unpack_from(data, 0)
+        if magic != MAGIC:
+            raise ValueError(f"Bad magic 0x{magic:08X}, expected 0x{MAGIC:08X}")
+        if version != VERSION:
+            raise ValueError(f"Unsupported bucket_index version {version}")
+
+        idx = cls()
+        offset = _HDR.size
+        for _ in range(num_buckets):
+            fields = _ENTRY.unpack_from(data, offset)
+            offset += _ENTRY.size
+            idx._entries.append(BucketIndexEntry(
+                bucket_seq=fields[0], ts_start=fields[1], ts_end=fields[2],
+                num_records=fields[3], fail_count=fields[4],
+                min_name_id=fields[5], max_name_id=fields[6],
+            ))
+        return idx
diff --git a/src/ucis/ncdb/constants.py b/src/ucis/ncdb/constants.py
index 16289ab..795dd30 100644
--- a/src/ucis/ncdb/constants.py
+++ b/src/ucis/ncdb/constants.py
@@ -11,9 +11,13 @@
 # ── Format identity ────────────────────────────────────────────────────────
 
 NCDB_FORMAT = "NCDB"
-NCDB_VERSION = "1.0"
+NCDB_VERSION = "2.0"
 NCDB_GENERATOR = "pyucis-ncdb"
 
+# History format versions stored in manifest.json
+HISTORY_FORMAT_V1 = "v1"   # legacy: history.json only
+HISTORY_FORMAT_V2 = "v2"   # binary bucket store + history.json for MERGE nodes
+
 # ── CDB file header magic ──────────────────────────────────────────────────
 
 SQLITE_MAGIC = b"SQLite format 3\x00"   # 16 bytes
@@ -37,6 +41,36 @@
 MEMBER_PROPERTIES  = "properties.json"
 MEMBER_CONTRIB_DIR = "contrib/"
 
+# ── v2 history store ZIP member names ─────────────────────────────────────
+
+MEMBER_TEST_REGISTRY  = "test_registry.bin"
+MEMBER_TEST_STATS     = "test_stats.bin"
+MEMBER_BUCKET_INDEX   = "history/bucket_index.bin"
+MEMBER_CONTRIB_INDEX  = "contrib_index.bin"
+MEMBER_SQUASH_LOG     = "squash_log.bin"
+MEMBER_TESTPLAN       = "testplan.json"
+MEMBER_WAIVERS        = "waivers.json"
+
+# ── v2 history bucket directory prefix ────────────────────────────────────
+
+HISTORY_BUCKET_DIR    = "history/"
+HISTORY_BUCKET_MAX_RECORDS = 10_000
+
+# ── v2 test-run status codes (stored in status_flags nibble) ──────────────
+
+HIST_STATUS_OK      = 0
+HIST_STATUS_FAIL    = 1
+HIST_STATUS_ERROR   = 2
+HIST_STATUS_FATAL   = 3
+HIST_STATUS_COMPILE = 4
+
+# ── v2 test-run flag bits (low nibble of status_flags byte) ───────────────
+
+HIST_FLAG_SEED_IS_HASH        = 0x01
+HIST_FLAG_IS_RERUN             = 0x02
+HIST_FLAG_HAS_COVERAGE         = 0x04
+HIST_FLAG_WAS_SQUASHED         = 0x08
+
 # ── V2 scope_tree.bin encoding markers ────────────────────────────────────
 
 SCOPE_MARKER_REGULAR     = 0x00
diff --git a/src/ucis/ncdb/contrib_index.py b/src/ucis/ncdb/contrib_index.py
new file mode 100644
index 0000000..45caa18
--- /dev/null
+++ b/src/ucis/ncdb/contrib_index.py
@@ -0,0 +1,178 @@
+"""
+contrib_index.bin — pass-only merge support index.
+
+Every test run that produced coverage data has an 8-byte entry here.  Status
+is cached so merge decisions require no bucket scanning.
+
+Binary layout (little-endian)::
+
+    magic            u32   0x43494458  ('CIDX')
+    version          u8    1
+    merge_policy     u8    0=all  1=pass_only  2=exclude_error_and_rerun  3=strict
+    squash_watermark u32   highest run_id already baked into counts.bin
+    num_active       u32   number of entries (not yet squashed)
+
+    entries[num_active]:   sorted by run_id
+      run_id    u32
+      name_id   u16
+      status    u8
+      flags     u8         bit0=is_rerun  bit1=first_attempt_passed
+
+8 bytes per entry.
+"""
+
+from __future__ import annotations
+
+import struct
+from dataclasses import dataclass
+from typing import List
+
+from ucis.ncdb.constants import (
+    HIST_STATUS_OK,
+    HIST_FLAG_IS_RERUN,
+)
+
+MAGIC   = 0x43494458   # 'CIDX'
+VERSION = 1
+
+# Merge policies
+POLICY_ALL                   = 0
+POLICY_PASS_ONLY             = 1
+POLICY_EXCLUDE_ERROR_RERUN   = 2
+POLICY_STRICT                = 3   # exclude coverage from tests that only pass on retry
+
+# contrib_index entry flags
+FLAG_IS_RERUN              = 0x01
+FLAG_FIRST_ATTEMPT_PASSED  = 0x02
+
+_HDR   = struct.Struct("<IBBII")   # magic, version, policy, watermark, num_active
+_ENTRY = struct.Struct("<IHBB")    # run_id, name_id, status, flags
+
+
+@dataclass
+class ContribIndexEntry:
+    """One entry in the contrib index."""
+    run_id:               int
+    name_id:              int
+    status:               int
+    flags:                int
+
+    @property
+    def is_rerun(self) -> bool:
+        return bool(self.flags & FLAG_IS_RERUN)
+
+    @property
+    def first_attempt_passed(self) -> bool:
+        return bool(self.flags & FLAG_FIRST_ATTEMPT_PASSED)
+
+
+class ContribIndex:
+    """In-memory representation of ``contrib_index.bin``.
+
+    Example::
+
+        ci = ContribIndex()
+        ci.add_entry(run_id=0, name_id=0, status=HIST_STATUS_OK, flags=0)
+        ci.add_entry(run_id=1, name_id=1, status=HIST_STATUS_FAIL, flags=0)
+        passing = ci.passing_run_ids(policy=POLICY_PASS_ONLY)   # [0]
+    """
+
+    def __init__(self, merge_policy: int = POLICY_PASS_ONLY,
+                 squash_watermark: int = 0) -> None:
+        self.merge_policy     = merge_policy
+        self.squash_watermark = squash_watermark
+        self._entries: List[ContribIndexEntry] = []
+
+    def add_entry(self, run_id: int, name_id: int,
+                  status: int, flags: int) -> None:
+        """Append a new contrib entry.  Entries are kept sorted by run_id."""
+        entry = ContribIndexEntry(run_id=run_id, name_id=name_id,
+                                  status=status, flags=flags)
+        # Append in order (run_ids are monotonically increasing in normal use)
+        if self._entries and self._entries[-1].run_id >= run_id:
+            # Insert in sorted position if out of order (e.g. after merge)
+            for i, e in enumerate(self._entries):
+                if e.run_id > run_id:
+                    self._entries.insert(i, entry)
+                    return
+        self._entries.append(entry)
+
+    def passing_run_ids(self, policy: int = POLICY_PASS_ONLY) -> List[int]:
+        """Return run_ids that pass the given merge policy filter.
+
+        Policies:
+            POLICY_ALL                 — all entries
+            POLICY_PASS_ONLY           — status == OK
+            POLICY_EXCLUDE_ERROR_RERUN — status == OK
+            POLICY_STRICT              — status == OK and not (is_rerun and not first_attempt_passed)
+        """
+        result = []
+        for e in self._entries:
+            if policy == POLICY_ALL:
+                result.append(e.run_id)
+            elif policy == POLICY_PASS_ONLY:
+                if e.status == HIST_STATUS_OK:
+                    result.append(e.run_id)
+            elif policy == POLICY_EXCLUDE_ERROR_RERUN:
+                if e.status == HIST_STATUS_OK:
+                    result.append(e.run_id)
+            elif policy == POLICY_STRICT:
+                if e.status == HIST_STATUS_OK:
+                    # Exclude coverage from tests that only ever pass on retry
+                    if e.is_rerun and not e.first_attempt_passed:
+                        continue
+                    result.append(e.run_id)
+        return result
+
+    def set_squash_watermark(self, run_id: int) -> None:
+        """Advance the squash watermark to *run_id*."""
+        self.squash_watermark = run_id
+
+    def remove_entries_up_to(self, run_id: int) -> None:
+        """Remove all entries with run_id ≤ *run_id* (called after squash)."""
+        self._entries = [e for e in self._entries if e.run_id > run_id]
+
+    def max_run_id(self) -> int:
+        """Return the highest run_id in active entries, or squash_watermark."""
+        if self._entries:
+            return max(e.run_id for e in self._entries)
+        return self.squash_watermark
+
+    @property
+    def num_active(self) -> int:
+        return len(self._entries)
+
+    # ── serialization ─────────────────────────────────────────────────────
+
+    def serialize(self) -> bytes:
+        """Encode the index to bytes for storage in the ZIP archive."""
+        header = _HDR.pack(MAGIC, VERSION, self.merge_policy,
+                           self.squash_watermark, len(self._entries))
+        rows = b""
+        for e in self._entries:
+            rows += _ENTRY.pack(e.run_id, e.name_id, e.status, e.flags)
+        return header + rows
+
+    @classmethod
+    def deserialize(cls, data: bytes) -> "ContribIndex":
+        """Reconstruct a ContribIndex from raw bytes.
+
+        Raises:
+            ValueError: if magic or version is wrong.
+        """
+        magic, version, merge_policy, squash_watermark, num_active = \
+            _HDR.unpack_from(data, 0)
+        if magic != MAGIC:
+            raise ValueError(f"Bad magic 0x{magic:08X}, expected 0x{MAGIC:08X}")
+        if version != VERSION:
+            raise ValueError(f"Unsupported contrib_index version {version}")
+
+        ci = cls(merge_policy=merge_policy, squash_watermark=squash_watermark)
+        offset = _HDR.size
+        for _ in range(num_active):
+            run_id, name_id, status, flags = _ENTRY.unpack_from(data, offset)
+            offset += _ENTRY.size
+            ci._entries.append(ContribIndexEntry(
+                run_id=run_id, name_id=name_id, status=status, flags=flags
+            ))
+        return ci
diff --git a/src/ucis/ncdb/history_buckets.py b/src/ucis/ncdb/history_buckets.py
new file mode 100644
index 0000000..207be13
--- /dev/null
+++ b/src/ucis/ncdb/history_buckets.py
@@ -0,0 +1,328 @@
+"""
+history/NNNNNN.bin — columnar bounded bucket files for test-run records.
+
+Each bucket stores up to HISTORY_BUCKET_MAX_RECORDS test-run records in a
+columnar layout optimised for DEFLATE/LZMA compression.  Records within a
+bucket are sorted by (name_id, ts).
+
+Binary layout (little-endian, stored compressed inside the ZIP)::
+
+    Header:
+      magic           u32   0x48445942  ('HDYB')
+      version         u8    1
+      num_records     u32
+      num_names       u16   unique name_ids in this bucket
+      ts_base         u32   unix timestamp of first record
+
+    Name index  (num_names entries, sorted by name_id):
+      name_id         u32
+      start_row       u32   first record index for this name
+      count           u16   number of records for this name
+
+    Seed dictionary (local — enables 1-byte seed references):
+      num_seeds       u16
+      seed_ids        u32[num_seeds]   global seed_ids from test_registry
+
+    Columns (independent arrays — each compresses optimally):
+      seeds[]         u8[num_records]   index into local seed dictionary
+      ts_deltas[]     varint[num_records]  seconds since ts_base, delta per name group
+      status_flags[]  u8[num_records]   nibble-packed (high=status, low=flags)
+
+Status nibble values: 0=OK 1=FAIL 2=ERROR 3=FATAL 4=COMPILE
+Flag bits: bit0=seed_is_hash  bit1=is_rerun  bit2=has_coverage  bit3=was_squashed
+"""
+
+from __future__ import annotations
+
+import struct
+import zipfile
+from dataclasses import dataclass, field
+from typing import Dict, Iterable, List, Optional, Tuple
+
+from ucis.ncdb.constants import (
+    HISTORY_BUCKET_MAX_RECORDS,
+    HIST_STATUS_OK,
+)
+from ucis.ncdb.varint import decode_varints, encode_varints
+
+MAGIC   = 0x48445942   # 'HDYB'
+VERSION = 1
+
+# Bucket header: magic(4) version(1) num_records(4) num_names(2) pad(1) ts_base(4) = 16 bytes
+_BUCKET_HDR  = struct.Struct("<IBIHxI")
+# Name index entry: name_id(4) start_row(4) count(2) pad(2) = 12 bytes
+_BUCKET_NAME = struct.Struct("<IIHxx")
+
+
+@dataclass
+class BucketRecord:
+    """One decoded test-run record from a bucket file."""
+    name_id:      int
+    seed_id:      int   # global seed_id from test_registry
+    ts:           int   # unix timestamp
+    status:       int   # HIST_STATUS_*
+    flags:        int   # HIST_FLAG_* bits
+
+
+class BucketWriter:
+    """Accumulates test-run records in memory and serialises them on demand.
+
+    Example::
+
+        w = BucketWriter()
+        w.add(name_id=0, seed_id=0, ts=1700000000, status=HIST_STATUS_OK, flags=0)
+        data = w.seal()   # returns compressed bytes ready for ZIP storage
+    """
+
+    def __init__(self) -> None:
+        self._records: List[BucketRecord] = []
+        # local seed dict: global seed_id → local index (u8, max 255 seeds/bucket)
+        self._seed_local: Dict[int, int] = {}
+        self._seed_ids: List[int] = []   # local_idx → global seed_id
+
+    def add(self, name_id: int, seed_id: int, ts: int,
+            status: int, flags: int) -> None:
+        """Append one test-run record.
+
+        Args:
+            name_id:  Integer name_id from TestRegistry.
+            seed_id:  Integer seed_id from TestRegistry.
+            ts:       Unix timestamp.
+            status:   HIST_STATUS_* constant.
+            flags:    Combination of HIST_FLAG_* bits.
+        """
+        if seed_id not in self._seed_local:
+            idx = len(self._seed_ids)
+            if idx >= 255:
+                raise OverflowError("Bucket seed dictionary full (255 entries max)")
+            self._seed_local[seed_id] = idx
+            self._seed_ids.append(seed_id)
+        self._records.append(BucketRecord(name_id=name_id, seed_id=seed_id,
+                                          ts=ts, status=status, flags=flags))
+
+    @property
+    def num_records(self) -> int:
+        return len(self._records)
+
+    def is_full(self) -> bool:
+        return len(self._records) >= HISTORY_BUCKET_MAX_RECORDS
+
+    def seal(self, use_lzma: bool = True) -> bytes:
+        """Serialise and compress the bucket.
+
+        Args:
+            use_lzma: If True, attempt LZMA compression; fall back to
+                      DEFLATE level 9 if liblzma is unavailable.
+
+        Returns:
+            Compressed bytes ready to store as a ZIP member.
+        """
+        raw = self._encode()
+        return _compress(raw, high_quality=True, use_lzma=use_lzma)
+
+    def seal_fast(self) -> bytes:
+        """Serialise with fast (DEFLATE level 1) compression for the current-day bucket."""
+        raw = self._encode()
+        return _compress(raw, high_quality=False, use_lzma=False)
+
+    def _encode(self) -> bytes:
+        # Sort records by (name_id, ts)
+        records = sorted(self._records, key=lambda r: (r.name_id, r.ts))
+        if not records:
+            ts_base = 0
+        else:
+            ts_base = records[0].ts
+
+        # Build name index
+        name_groups: Dict[int, List[int]] = {}  # name_id → list of row indices
+        for i, r in enumerate(records):
+            name_groups.setdefault(r.name_id, []).append(i)
+
+        sorted_names = sorted(name_groups.keys())
+        name_index_entries: List[Tuple[int, int, int]] = []
+        start_row = 0
+        for nid in sorted_names:
+            cnt = len(name_groups[nid])
+            name_index_entries.append((nid, start_row, cnt))
+            start_row += cnt
+
+        num_names = len(sorted_names)
+        num_records = len(records)
+
+        # Columns
+        seed_col = bytearray()
+        ts_delta_values = []
+        status_flags_col = bytearray()
+
+        prev_ts_per_name: Dict[int, int] = {}
+        for r in records:
+            seed_col.append(self._seed_local[r.seed_id])
+            prev_ts = prev_ts_per_name.get(r.name_id, ts_base)
+            delta = r.ts - prev_ts
+            ts_delta_values.append(delta)
+            prev_ts_per_name[r.name_id] = r.ts
+            sf = ((r.status & 0x0F) << 4) | (r.flags & 0x0F)
+            status_flags_col.append(sf)
+
+        ts_delta_col = encode_varints(ts_delta_values)
+
+        # Header: 16 bytes
+        header = _BUCKET_HDR.pack(MAGIC, VERSION, num_records, num_names, ts_base)
+
+        # Name index: 12 bytes each
+        name_idx_bytes = b""
+        for nid, sr, cnt in name_index_entries:
+            name_idx_bytes += _BUCKET_NAME.pack(nid, sr, cnt)
+
+        # Seed dict
+        num_seeds = len(self._seed_ids)
+        seed_dict = struct.pack("<H", num_seeds)
+        if self._seed_ids:
+            seed_dict += struct.pack(f"<{num_seeds}I", *self._seed_ids)
+
+        return header + name_idx_bytes + seed_dict + bytes(seed_col) + ts_delta_col + bytes(status_flags_col)
+
+
+class BucketReader:
+    """Reads and decodes a compressed bucket file.
+
+    Args:
+        data: Compressed bytes as stored in the ZIP archive.
+
+    Example::
+
+        reader = BucketReader(compressed_data)
+        for rec in reader.records_for_name(name_id=3):
+            print(rec.ts, rec.status)
+    """
+
+    def __init__(self, data: bytes) -> None:
+        raw = _decompress(data)
+        self._parse(raw)
+
+    def _parse(self, raw: bytes) -> None:
+        magic, version, num_records, num_names, ts_base = _BUCKET_HDR.unpack_from(raw, 0)
+        if magic != MAGIC:
+            raise ValueError(f"Bad bucket magic 0x{magic:08X}")
+        if version != VERSION:
+            raise ValueError(f"Unsupported bucket version {version}")
+
+        self._num_records = num_records
+        self._ts_base     = ts_base
+
+        offset = _BUCKET_HDR.size
+
+        # Name index
+        self._name_index: List[Tuple[int, int, int]] = []
+        for _ in range(num_names):
+            nid, sr, cnt = _BUCKET_NAME.unpack_from(raw, offset)
+            self._name_index.append((nid, sr, cnt))
+            offset += _BUCKET_NAME.size
+
+        # Seed dict
+        num_seeds, = struct.unpack_from("<H", raw, offset)
+        offset += 2
+        if num_seeds:
+            seed_ids = list(struct.unpack_from(f"<{num_seeds}I", raw, offset))
+            offset += 4 * num_seeds
+        else:
+            seed_ids = []
+        self._seed_ids = seed_ids  # local_idx → global seed_id
+
+        # Columns
+        self._seed_col        = raw[offset: offset + num_records]
+        offset += num_records
+        ts_delta_vals, offset = decode_varints(raw, num_records, offset)
+        self._status_flags    = raw[offset: offset + num_records]
+
+        # Reconstruct timestamps
+        prev_ts_per_name: Dict[int, int] = {}
+        self._records: List[BucketRecord] = []
+
+        # Build name_id per row from name index
+        row_name: List[int] = [0] * num_records
+        for nid, sr, cnt in self._name_index:
+            for i in range(cnt):
+                row_name[sr + i] = nid
+
+        for row in range(num_records):
+            nid = row_name[row]
+            prev_ts = prev_ts_per_name.get(nid, ts_base)
+            ts = prev_ts + ts_delta_vals[row]
+            prev_ts_per_name[nid] = ts
+
+            local_seed = self._seed_col[row]
+            seed_id = self._seed_ids[local_seed] if local_seed < len(self._seed_ids) else 0
+            sf = self._status_flags[row]
+            status = (sf >> 4) & 0x0F
+            flags  = sf & 0x0F
+
+            self._records.append(BucketRecord(
+                name_id=nid, seed_id=seed_id, ts=ts, status=status, flags=flags
+            ))
+
+    def records_for_name(self, name_id: int) -> List[BucketRecord]:
+        """Return all records for *name_id* via binary search on the name index.
+
+        Returns:
+            List of BucketRecord (may be empty if name_id not in this bucket).
+        """
+        # Binary search on sorted name index
+        lo, hi = 0, len(self._name_index)
+        while lo < hi:
+            mid = (lo + hi) // 2
+            if self._name_index[mid][0] < name_id:
+                lo = mid + 1
+            else:
+                hi = mid
+        if lo >= len(self._name_index) or self._name_index[lo][0] != name_id:
+            return []
+        _, start_row, count = self._name_index[lo]
+        return self._records[start_row: start_row + count]
+
+    def all_records(self) -> Iterable[BucketRecord]:
+        """Iterate over all records in row order."""
+        return iter(self._records)
+
+    @property
+    def num_records(self) -> int:
+        return self._num_records
+
+
+# ── compression helpers ───────────────────────────────────────────────────
+
+def _compress(data: bytes, high_quality: bool, use_lzma: bool) -> bytes:
+    """Compress *data* using the best available method.
+
+    For the current-day (mutable) bucket: DEFLATE level 1 (fast).
+    For sealed buckets: LZMA if available, else DEFLATE level 9.
+    """
+    import io
+    import zlib
+
+    if not high_quality:
+        return zlib.compress(data, level=1)
+
+    if use_lzma:
+        try:
+            import lzma
+            return lzma.compress(data, format=lzma.FORMAT_XZ)
+        except (ImportError, lzma.LZMAError):
+            pass
+
+    return zlib.compress(data, level=9)
+
+
+def _decompress(data: bytes) -> bytes:
+    """Decompress *data*, auto-detecting LZMA vs DEFLATE."""
+    import zlib
+
+    # LZMA/XZ magic: 0xFD 0x37 0x7A 0x58 0x5A 0x00
+    if data[:6] == b"\xfd7zXZ\x00":
+        try:
+            import lzma
+            return lzma.decompress(data, format=lzma.FORMAT_XZ)
+        except ImportError:
+            raise RuntimeError("lzma module not available; cannot decompress sealed bucket")
+
+    return zlib.decompress(data)
diff --git a/src/ucis/ncdb/manifest.py b/src/ucis/ncdb/manifest.py
index 67065c9..eee3722 100644
--- a/src/ucis/ncdb/manifest.py
+++ b/src/ucis/ncdb/manifest.py
@@ -11,7 +11,7 @@
 from datetime import datetime, timezone
 from typing import Optional
 
-from .constants import NCDB_FORMAT, NCDB_VERSION, NCDB_GENERATOR
+from .constants import NCDB_FORMAT, NCDB_VERSION, NCDB_GENERATOR, HISTORY_FORMAT_V1
 
 
 @dataclass
@@ -28,6 +28,7 @@ class Manifest:
     covered_bins:   int = 0
     schema_hash:    str = ""
     generator:      str = NCDB_GENERATOR
+    history_format: str = HISTORY_FORMAT_V1   # "v1" (JSON) or "v2" (binary + JSON)
 
     def serialize(self) -> bytes:
         d = asdict(self)
diff --git a/src/ucis/ncdb/ncdb_merger.py b/src/ucis/ncdb/ncdb_merger.py
index a322a17..4dc8a2e 100644
--- a/src/ucis/ncdb/ncdb_merger.py
+++ b/src/ucis/ncdb/ncdb_merger.py
@@ -15,13 +15,22 @@
 
 History nodes from all sources are accumulated in the output.  A new
 MERGE HistoryNode is appended to record the operation.
+
+v2 binary history (if present in any source) is merged correctly:
+  - TestRegistry names/seeds are unioned; stable name_id remaps are computed
+  - TestStatsTable counters are summed and derived scores recomputed
+  - Bucket files are decoded, name_ids remapped, re-encoded and sealed
+  - BucketIndex is rebuilt; run_ids are offset to keep them disjoint
+  - ContribIndex entries are remapped and concatenated
+  - SquashLog entries are concatenated (no run_id adjustment needed)
 """
 
 import zipfile
 import json
 import struct
+import math
 from datetime import datetime, timezone
-from typing import List
+from typing import Dict, List, Optional, Tuple
 
 from .ncdb_reader import NcdbReader
 from .ncdb_writer import NcdbWriter
@@ -31,6 +40,11 @@
 from .constants import (
     MEMBER_MANIFEST, MEMBER_STRINGS, MEMBER_SCOPE_TREE,
     MEMBER_COUNTS, MEMBER_HISTORY, MEMBER_SOURCES,
+    MEMBER_TEST_REGISTRY, MEMBER_TEST_STATS,
+    MEMBER_BUCKET_INDEX, MEMBER_CONTRIB_INDEX, MEMBER_SQUASH_LOG,
+    HISTORY_BUCKET_DIR, HISTORY_FORMAT_V2,
+    HIST_STATUS_OK, HIST_STATUS_FAIL,
+    MEMBER_TESTPLAN, MEMBER_WAIVERS,
 )
 from ucis.ncdb._accel import add_uint32_arrays as _add_arrays, HAS_ACCEL as _HAS_ACCEL
 
@@ -89,6 +103,11 @@ def _merge_same_schema(self, sources, manifests, target):
 
         # Build new manifest using first source's schema data
         first_manifest = manifests[0]
+
+        # Determine if any source has v2 binary history
+        any_v2 = any(m.history_format == HISTORY_FORMAT_V2 for m in manifests)
+        history_format = HISTORY_FORMAT_V2 if any_v2 else first_manifest.history_format
+
         new_manifest = Manifest(
             format=first_manifest.format,
             version=first_manifest.version,
@@ -103,17 +122,36 @@ def _merge_same_schema(self, sources, manifests, target):
             covered_bins=sum(1 for c in merged_counts if c > 0),
             schema_hash=first_manifest.schema_hash,
             generator=first_manifest.generator,
+            history_format=history_format,
         )
 
         # Read schema members verbatim from first source
         with zipfile.ZipFile(sources[0], "r") as zf:
+            zf_names = zf.namelist()
             strings_bytes    = zf.read(MEMBER_STRINGS)
             scope_tree_bytes = zf.read(MEMBER_SCOPE_TREE)
             sources_bytes    = zf.read(MEMBER_SOURCES)
+            # Gather existing contrib/* members from all sources (copy verbatim)
+            contrib_members_all: Dict[str, bytes] = {}
+
+        for src in sources:
+            with zipfile.ZipFile(src, "r") as zf:
+                for n_member in zf.namelist():
+                    if n_member.startswith("contrib/"):
+                        contrib_members_all[n_member] = zf.read(n_member)
 
         counts_bytes  = CountsWriter().serialize(merged_counts)
         history_bytes = HistoryWriter().serialize(all_history)
 
+        # Merge v2 binary history if present in any source
+        v2_members: Dict[str, bytes] = {}
+        if any_v2:
+            v2_members = self._merge_v2_history(sources, manifests)
+
+        # Merge testplan and waivers
+        testplan_bytes = self._merge_testplans(sources)
+        waivers_bytes  = self._merge_waivers(sources)
+
         with zipfile.ZipFile(target, "w", compression=zipfile.ZIP_DEFLATED) as zf:
             zf.writestr(MEMBER_MANIFEST,   new_manifest.serialize())
             zf.writestr(MEMBER_STRINGS,    strings_bytes)
@@ -121,6 +159,15 @@ def _merge_same_schema(self, sources, manifests, target):
             zf.writestr(MEMBER_COUNTS,     counts_bytes)
             zf.writestr(MEMBER_HISTORY,    history_bytes)
             zf.writestr(MEMBER_SOURCES,    sources_bytes)
+            for member_name, member_bytes in contrib_members_all.items():
+                zf.writestr(member_name, member_bytes)
+            for member_name, member_bytes in v2_members.items():
+                zf.writestr(member_name, member_bytes,
+                            compress_type=zipfile.ZIP_STORED)
+            if testplan_bytes:
+                zf.writestr(MEMBER_TESTPLAN, testplan_bytes)
+            if waivers_bytes:
+                zf.writestr(MEMBER_WAIVERS, waivers_bytes)
 
     # ── Cross-schema fallback ─────────────────────────────────────────────
 
@@ -165,8 +212,266 @@ def _merge_cross_schema(self, sources, target):
         for db in dbs:
             db.close()
 
+    # ── v2 binary history merge ───────────────────────────────────────────
+
+    def _merge_v2_history(self, sources: List[str],
+                          manifests: List[Manifest]) -> Dict[str, bytes]:
+        """Merge v2 binary history from all sources; return member-name → bytes."""
+        from .test_registry import TestRegistry
+        from .test_stats import TestStatsTable, TestStatsEntry
+        from .bucket_index import BucketIndex
+        from .contrib_index import ContribIndex, POLICY_PASS_ONLY
+        from .squash_log import SquashLog
+        from .history_buckets import BucketWriter, BucketReader
+
+        # --- Step 1: load per-source v2 state ---
+        src_states = []
+        for src, mf in zip(sources, manifests):
+            if mf.history_format == HISTORY_FORMAT_V2:
+                src_states.append(self._read_v2_state(src))
+            else:
+                # Source has no v2 history — use empty state
+                src_states.append({
+                    'registry': TestRegistry(),
+                    'stats': TestStatsTable(),
+                    'bucket_index': BucketIndex(),
+                    'buckets': {},   # seq → compressed_bytes
+                    'contrib_index': ContribIndex(merge_policy=POLICY_PASS_ONLY),
+                    'squash_log': SquashLog(),
+                })
+
+        # --- Step 2: build merged registry (union of all names/seeds) ---
+        # Two-pass approach: first insert ALL names/seeds so the sorted order
+        # is final, then recompute remaps against the stable merged registry.
+        merged_reg = TestRegistry()
+
+        # Pass 1: insert all names and seeds to finalise the merged registry
+        for state in src_states:
+            reg = state['registry']
+            for name in reg._names:
+                merged_reg.lookup_name_id(name)
+            for seed in reg._seeds:
+                merged_reg.lookup_seed_id(seed)
+
+        # Pass 2: build per-source remaps against the now-stable merged registry
+        name_remaps: List[Dict[int, int]] = []
+        seed_remaps: List[Dict[int, int]] = []
+        for state in src_states:
+            reg = state['registry']
+            n_remap: Dict[int, int] = {
+                old_id: merged_reg._name_to_id[name]
+                for old_id, name in enumerate(reg._names)
+            }
+            s_remap: Dict[int, int] = {
+                old_id: merged_reg._seed_to_id[seed]
+                for old_id, seed in enumerate(reg._seeds)
+            }
+            name_remaps.append(n_remap)
+            seed_remaps.append(s_remap)
+
+        # --- Step 3: compute run_id offsets (disjoint run_id ranges) ---
+        run_id_offsets: List[int] = []
+        offset = 0
+        for state in src_states:
+            run_id_offsets.append(offset)
+            offset += state['registry'].next_run_id
+        # Advance the merged registry's counter
+        for _ in range(offset):
+            merged_reg.assign_run_id()
+
+        # --- Step 4: merge TestStatsTable ---
+        from .test_stats import TestStatsEntry as _TSEntry
+        merged_stats = TestStatsTable()
+        # Ensure enough slots
+        for _ in range(merged_reg.num_names):
+            merged_stats._entries.append(
+                _TSEntry(name_id=len(merged_stats._entries)))
+
+        for src_idx, state in enumerate(src_states):
+            n_remap = name_remaps[src_idx]
+            src_stats = state['stats']
+            for old_id, src_entry in enumerate(src_stats._entries):
+                if src_entry.total_runs == 0:
+                    continue
+                new_id = n_remap.get(old_id, old_id)
+                _merge_stats_entry(merged_stats._entries[new_id], src_entry, new_id)
+
+        # --- Step 5: merge bucket files ---
+        merged_buckets: Dict[int, bytes] = {}
+        merged_bidx = BucketIndex()
+        new_seq = 0
+
+        for src_idx, state in enumerate(src_states):
+            n_remap = name_remaps[src_idx]
+            s_remap = seed_remaps[src_idx]
+            rid_offset = run_id_offsets[src_idx]
+            src_bidx = state['bucket_index']
+
+            for bidx_entry in src_bidx._entries:
+                old_seq = bidx_entry.bucket_seq
+                compressed = state['buckets'].get(old_seq)
+                if compressed is None:
+                    continue
+                # Remap name_ids in bucket if registry changed
+                if n_remap or s_remap:
+                    compressed = _remap_bucket(compressed, n_remap, s_remap)
+                merged_buckets[new_seq] = compressed
+                # Remap name_ids in the index entry
+                min_nid = n_remap.get(bidx_entry.min_name_id, bidx_entry.min_name_id)
+                max_nid = n_remap.get(bidx_entry.max_name_id, bidx_entry.max_name_id)
+                merged_bidx.add_bucket(
+                    new_seq, bidx_entry.ts_start, bidx_entry.ts_end,
+                    bidx_entry.num_records, bidx_entry.fail_count,
+                    min(min_nid, max_nid), max(min_nid, max_nid),
+                )
+                new_seq += 1
+
+        # --- Step 6: merge ContribIndex ---
+        merged_cidx = ContribIndex(merge_policy=POLICY_PASS_ONLY)
+        for src_idx, state in enumerate(src_states):
+            n_remap = name_remaps[src_idx]
+            rid_offset = run_id_offsets[src_idx]
+            ci = state['contrib_index']
+            for entry in ci._entries:
+                merged_cidx.add_entry(
+                    run_id=entry.run_id + rid_offset,
+                    name_id=n_remap.get(entry.name_id, entry.name_id),
+                    status=entry.status,
+                    flags=entry.flags,
+                )
+            # Advance watermark
+            if ci.squash_watermark > 0:
+                merged_cidx.set_squash_watermark(
+                    max(merged_cidx.squash_watermark,
+                        ci.squash_watermark + rid_offset))
+
+        # --- Step 7: merge SquashLog (append-only, no run_id adjustment) ---
+        merged_slog = SquashLog()
+        for state in src_states:
+            for entry in state['squash_log'].entries():
+                merged_slog.append(
+                    ts=entry.ts, policy=entry.policy,
+                    from_run=entry.from_run, to_run=entry.to_run,
+                    num_runs=entry.num_runs, pass_runs=entry.pass_runs,
+                )
+
+        # --- Assemble output members ---
+        result: Dict[str, bytes] = {}
+        result[MEMBER_TEST_REGISTRY] = merged_reg.serialize()
+        result[MEMBER_TEST_STATS]    = merged_stats.serialize()
+        result[MEMBER_BUCKET_INDEX]  = merged_bidx.serialize()
+        result[MEMBER_CONTRIB_INDEX] = merged_cidx.serialize()
+        result[MEMBER_SQUASH_LOG]    = merged_slog.serialize()
+        for seq, data in merged_buckets.items():
+            result[f"{HISTORY_BUCKET_DIR}{seq:06d}.bin"] = data
+
+        return result
+
+    def _read_v2_state(self, path: str) -> dict:
+        """Read all v2 binary history members from a .cdb ZIP."""
+        from .test_registry import TestRegistry
+        from .test_stats import TestStatsTable
+        from .bucket_index import BucketIndex
+        from .contrib_index import ContribIndex, POLICY_PASS_ONLY
+        from .squash_log import SquashLog
+
+        with zipfile.ZipFile(path, "r") as zf:
+            names = zf.namelist()
+
+            def _read(member):
+                return zf.read(member) if member in names else b''
+
+            reg_data   = _read(MEMBER_TEST_REGISTRY)
+            stats_data = _read(MEMBER_TEST_STATS)
+            bidx_data  = _read(MEMBER_BUCKET_INDEX)
+            cidx_data  = _read(MEMBER_CONTRIB_INDEX)
+            slog_data  = _read(MEMBER_SQUASH_LOG)
+
+            # Read all bucket files: history/NNNNNN.bin (not the index)
+            buckets: Dict[int, bytes] = {}
+            for n in names:
+                if (n.startswith(HISTORY_BUCKET_DIR) and n.endswith(".bin")
+                        and n != MEMBER_BUCKET_INDEX):
+                    basename = n[len(HISTORY_BUCKET_DIR):]
+                    try:
+                        seq = int(basename.split(".")[0])
+                        buckets[seq] = zf.read(n)
+                    except ValueError:
+                        pass
+
+        return {
+            'registry':     TestRegistry.deserialize(reg_data) if reg_data else TestRegistry(),
+            'stats':        TestStatsTable.deserialize(stats_data) if stats_data else TestStatsTable(),
+            'bucket_index': BucketIndex.deserialize(bidx_data) if bidx_data else BucketIndex(),
+            'buckets':      buckets,
+            'contrib_index': (ContribIndex.deserialize(cidx_data) if cidx_data
+                              else ContribIndex(merge_policy=POLICY_PASS_ONLY)),
+            'squash_log':   SquashLog.deserialize(slog_data) if slog_data else SquashLog(),
+        }
+
     # ── Helpers ───────────────────────────────────────────────────────────
 
+    def _merge_testplans(self, sources: list):
+        """Return merged testplan bytes, or None if sources disagree.
+
+        Strategy:
+        1. If no source has a testplan → return None.
+        2. If all sources with a testplan share the same ``source_file`` →
+           return the bytes from whichever has the most recent ``import_timestamp``.
+        3. If sources have different ``source_file`` values → emit a warning
+           and return None (incompatible plans).
+        """
+        import warnings
+        candidates = {}   # source_file → (import_timestamp, raw_bytes)
+        for src in sources:
+            with zipfile.ZipFile(src, "r") as zf:
+                if MEMBER_TESTPLAN not in zf.namelist():
+                    continue
+                raw = zf.read(MEMBER_TESTPLAN)
+                import json as _json
+                d = _json.loads(raw)
+                sf = d.get("source_file", "")
+                ts = d.get("import_timestamp", "")
+                if sf not in candidates or ts > candidates[sf][0]:
+                    candidates[sf] = (ts, raw)
+        if not candidates:
+            return None
+        if len(candidates) == 1:
+            return next(iter(candidates.values()))[1]
+        warnings.warn(
+            f"Merging databases with different testplans "
+            f"({list(candidates.keys())}); testplan omitted from output.",
+            stacklevel=4,
+        )
+        return None
+
+    def _merge_waivers(self, sources: list):
+        """Return merged waivers bytes (union of all unique waiver ids).
+
+        Waivers from all sources are combined; if two sources have a waiver
+        with the same id, the one with the most recent ``approved_at`` wins.
+        Returns None if no source has waivers.
+        """
+        import json as _json
+        merged: dict = {}   # id → waiver dict
+        any_found = False
+        for src in sources:
+            with zipfile.ZipFile(src, "r") as zf:
+                if MEMBER_WAIVERS not in zf.namelist():
+                    continue
+                any_found = True
+                raw = zf.read(MEMBER_WAIVERS)
+                d = _json.loads(raw)
+                for w in d.get("waivers", []):
+                    wid = w.get("id", "")
+                    existing = merged.get(wid)
+                    if existing is None or w.get("approved_at", "") > existing.get("approved_at", ""):
+                        merged[wid] = w
+        if not any_found:
+            return None
+        out = {"format_version": 1, "waivers": list(merged.values())}
+        return _json.dumps(out, separators=(',', ':')).encode()
+
     def _read_manifest(self, path: str) -> Manifest:
         with zipfile.ZipFile(path, "r") as zf:
             return Manifest.from_bytes(zf.read(MEMBER_MANIFEST))
@@ -190,3 +495,93 @@ def _make_merge_node(self, target: str, sources: List[str]) -> MemHistoryNode:
         node.setToolCategory("ncdb-merger")
         node.setComment(f"Merged from: {', '.join(sources)}")
         return node
+
+
+# ── Module-level helpers ──────────────────────────────────────────────────
+
+
+def _merge_stats_entry(dst, src, new_name_id: int) -> None:
+    """Accumulate *src* TestStatsEntry into *dst* in place."""
+    dst.name_id = new_name_id
+
+    if src.first_ts > 0 and (dst.first_ts == 0 or src.first_ts < dst.first_ts):
+        dst.first_ts = src.first_ts
+    dst.last_ts = max(dst.last_ts, src.last_ts)
+    dst.last_green_ts = max(dst.last_green_ts, src.last_green_ts)
+
+    prev_total = dst.total_runs
+    dst.total_runs       += src.total_runs
+    dst.pass_count       += src.pass_count
+    dst.fail_count       += src.fail_count
+    dst.error_count      += src.error_count
+    dst.transition_count += src.transition_count
+
+    # Welford merge: combine two running means and M2 accumulators (Chan's formula)
+    if src.total_runs > 0 and dst.total_runs > 0:
+        n_a, n_b = prev_total, src.total_runs
+        n_ab = n_a + n_b
+        if n_ab > 0:
+            delta = src.mean_cpu_time - dst.mean_cpu_time
+            dst.mean_cpu_time = (n_a * dst.mean_cpu_time + n_b * src.mean_cpu_time) / n_ab
+            dst.m2_cpu_time   = (dst.m2_cpu_time + src.m2_cpu_time
+                                 + delta * delta * n_a * n_b / n_ab)
+
+    # Recompute derived scores from accumulated counters
+    if dst.total_runs > 0:
+        dst.flake_score = dst.transition_count / max(dst.total_runs - 1, 1)
+        dst.fail_rate   = dst.fail_count / dst.total_runs
+    else:
+        dst.flake_score = 0.0
+        dst.fail_rate   = 0.0
+
+    pass_rate = dst.pass_count / dst.total_runs if dst.total_runs else 1.0
+    stability = 1.0 - dst.flake_score
+    speed = max(0.0, 1.0 - dst.mean_cpu_time / 3600.0) if dst.mean_cpu_time > 0 else 1.0
+    dst.grade_score = pass_rate * stability * speed
+
+    # Take worst-case streak (most negative or most positive)
+    if abs(src.streak) > abs(dst.streak):
+        dst.streak = src.streak
+
+    # Take max CUSUM
+    dst.cusum_value = max(dst.cusum_value, src.cusum_value)
+    dst.total_seeds_seen = max(dst.total_seeds_seen, src.total_seeds_seen)
+
+
+def _remap_bucket(compressed: bytes, n_remap: Dict[int, int],
+                  s_remap: Dict[int, int]) -> bytes:
+    """Decode a compressed bucket, remap name_ids and seed_ids, re-encode.
+
+    If neither remap changes any ID, the original compressed bytes are
+    returned unchanged to avoid redundant work.
+    """
+    from .history_buckets import BucketReader, BucketWriter
+
+    reader = BucketReader(compressed)
+    all_recs = list(reader.all_records())
+
+    # Check if any remapping is actually needed
+    needs_remap = any(
+        n_remap.get(r.name_id, r.name_id) != r.name_id
+        or s_remap.get(r.seed_id, r.seed_id) != r.seed_id
+        for r in all_recs
+    )
+    if not needs_remap:
+        return compressed
+
+    writer = BucketWriter()
+    # Sort by (new_name_id, ts) for correct columnar layout
+    remapped = sorted(
+        all_recs,
+        key=lambda r: (n_remap.get(r.name_id, r.name_id), r.ts),
+    )
+    for rec in remapped:
+        writer.add(
+            name_id=n_remap.get(rec.name_id, rec.name_id),
+            seed_id=s_remap.get(rec.seed_id, rec.seed_id),
+            ts=rec.ts,
+            status=rec.status,
+            flags=rec.flags,
+        )
+    return writer.seal(use_lzma=True)
+
diff --git a/src/ucis/ncdb/ncdb_reader.py b/src/ucis/ncdb/ncdb_reader.py
index c335350..41a1097 100644
--- a/src/ucis/ncdb/ncdb_reader.py
+++ b/src/ucis/ncdb/ncdb_reader.py
@@ -28,6 +28,10 @@
     MEMBER_CROSS, MEMBER_DESIGN_UNITS, MEMBER_CONTRIB_DIR, MEMBER_FORMAL,
     NCDB_FORMAT,
     MEMBER_COVERITEM_FLAGS,
+    MEMBER_TEST_REGISTRY, MEMBER_TEST_STATS,
+    MEMBER_BUCKET_INDEX, MEMBER_CONTRIB_INDEX, MEMBER_SQUASH_LOG,
+    HISTORY_BUCKET_DIR, HISTORY_FORMAT_V2,
+    MEMBER_TESTPLAN, MEMBER_WAIVERS,
 )
 
 from ucis.mem.mem_ucis import MemUCIS
@@ -74,25 +78,28 @@ class NcdbReader:
     def read(self, path: str) -> MemUCIS:
         with zipfile.ZipFile(path, "r") as zf:
             names = zf.namelist()
-            manifest_bytes    = zf.read(MEMBER_MANIFEST)
-            strings_bytes     = zf.read(MEMBER_STRINGS)
-            scope_tree_bytes  = zf.read(MEMBER_SCOPE_TREE)
-            counts_bytes      = zf.read(MEMBER_COUNTS)
-            history_bytes     = zf.read(MEMBER_HISTORY)
-            sources_bytes     = zf.read(MEMBER_SOURCES)
-            attrs_bytes  = zf.read(MEMBER_ATTRS)       if MEMBER_ATTRS       in names else b''
-            tags_bytes   = zf.read(MEMBER_TAGS)        if MEMBER_TAGS        in names else b''
-            props_bytes  = zf.read(MEMBER_PROPERTIES)  if MEMBER_PROPERTIES  in names else b''
-            toggle_bytes = zf.read(MEMBER_TOGGLE)        if MEMBER_TOGGLE        in names else b''
-            fsm_bytes    = zf.read(MEMBER_FSM)           if MEMBER_FSM           in names else b''
-            cross_bytes  = zf.read(MEMBER_CROSS)         if MEMBER_CROSS         in names else b''
-            du_bytes     = zf.read(MEMBER_DESIGN_UNITS)  if MEMBER_DESIGN_UNITS  in names else b''
-            formal_bytes = zf.read(MEMBER_FORMAL)         if MEMBER_FORMAL         in names else b''
-            ci_flags_bytes = zf.read(MEMBER_COVERITEM_FLAGS) if MEMBER_COVERITEM_FLAGS in names else b''
-            # Collect all contrib/* members
-            contrib_members = {
-                n: zf.read(n) for n in names if n.startswith(MEMBER_CONTRIB_DIR)
-            }
+            # Read all members into a dict for uniform access
+            zf_data = {n: zf.read(n) for n in names}
+
+        manifest_bytes    = zf_data[MEMBER_MANIFEST]
+        strings_bytes     = zf_data[MEMBER_STRINGS]
+        scope_tree_bytes  = zf_data[MEMBER_SCOPE_TREE]
+        counts_bytes      = zf_data[MEMBER_COUNTS]
+        history_bytes     = zf_data[MEMBER_HISTORY]
+        sources_bytes     = zf_data[MEMBER_SOURCES]
+        attrs_bytes  = zf_data.get(MEMBER_ATTRS,             b'')
+        tags_bytes   = zf_data.get(MEMBER_TAGS,              b'')
+        props_bytes  = zf_data.get(MEMBER_PROPERTIES,        b'')
+        toggle_bytes = zf_data.get(MEMBER_TOGGLE,            b'')
+        fsm_bytes    = zf_data.get(MEMBER_FSM,               b'')
+        cross_bytes  = zf_data.get(MEMBER_CROSS,             b'')
+        du_bytes     = zf_data.get(MEMBER_DESIGN_UNITS,      b'')
+        formal_bytes = zf_data.get(MEMBER_FORMAL,            b'')
+        ci_flags_bytes = zf_data.get(MEMBER_COVERITEM_FLAGS, b'')
+        # Collect all contrib/* members
+        contrib_members = {
+            n: zf_data[n] for n in names if n.startswith(MEMBER_CONTRIB_DIR)
+        }
 
         manifest = Manifest.from_bytes(manifest_bytes)
         if manifest.format != NCDB_FORMAT:
@@ -199,4 +206,71 @@ def read(self, path: str) -> MemUCIS:
         if attrs_bytes:
             AttrsReader().deserialize(attrs_bytes, db)
 
+        # v2 binary history members (optional — present only in v2 archives)
+        if manifest.history_format == HISTORY_FORMAT_V2:
+            _load_v2_history(db, {name: zf_data.get(name, b'')
+                                  for name in (MEMBER_TEST_REGISTRY,
+                                               MEMBER_TEST_STATS,
+                                               MEMBER_BUCKET_INDEX,
+                                               MEMBER_CONTRIB_INDEX,
+                                               MEMBER_SQUASH_LOG)},
+                             {n: d for n, d in zf_data.items()
+                              if n.startswith(HISTORY_BUCKET_DIR)
+                              and n.endswith(".bin")
+                              and n != MEMBER_BUCKET_INDEX})
+
+        # Testplan (optional)
+        testplan_raw = zf_data.get(MEMBER_TESTPLAN, b'')
+        if testplan_raw:
+            from .testplan import Testplan
+            db._testplan = Testplan.from_bytes(testplan_raw)
+            db._loaded_testplan = True
+
+        # Waivers (optional)
+        waivers_raw = zf_data.get(MEMBER_WAIVERS, b'')
+        if waivers_raw:
+            from .waivers import WaiverSet
+            db._waivers = WaiverSet.from_bytes(waivers_raw)
+            db._loaded_waivers = True
+
         return db
+
+
+def _load_v2_history(db: MemUCIS, v2_members: dict, bucket_data: dict) -> None:
+    """Attach v2 binary history state to *db* (a MemUCIS).
+
+    Uses the same deserialization logic as NcdbUCIS._load_v2_history, but
+    attaches the resulting objects as attributes on a plain MemUCIS so that
+    callers using NcdbReader (not NcdbUCIS) can access v2 data via the same
+    attribute names.
+    """
+    from .test_registry import TestRegistry
+    from .test_stats import TestStatsTable
+    from .bucket_index import BucketIndex
+    from .contrib_index import ContribIndex, POLICY_PASS_ONLY
+    from .squash_log import SquashLog
+
+    reg_data = v2_members.get(MEMBER_TEST_REGISTRY, b'')
+    db._test_registry = TestRegistry.deserialize(reg_data) if reg_data else TestRegistry()
+
+    stats_data = v2_members.get(MEMBER_TEST_STATS, b'')
+    db._test_stats = TestStatsTable.deserialize(stats_data) if stats_data else TestStatsTable()
+
+    bidx_data = v2_members.get(MEMBER_BUCKET_INDEX, b'')
+    db._bucket_index = BucketIndex.deserialize(bidx_data) if bidx_data else BucketIndex()
+
+    cidx_data = v2_members.get(MEMBER_CONTRIB_INDEX, b'')
+    db._contrib_index = (ContribIndex.deserialize(cidx_data) if cidx_data
+                         else ContribIndex(merge_policy=POLICY_PASS_ONLY))
+
+    slog_data = v2_members.get(MEMBER_SQUASH_LOG, b'')
+    db._squash_log = SquashLog.deserialize(slog_data) if slog_data else SquashLog()
+
+    db._sealed_buckets = {}
+    for member, data in bucket_data.items():
+        basename = member[len(HISTORY_BUCKET_DIR):]
+        try:
+            seq = int(basename.split(".")[0])
+            db._sealed_buckets[seq] = data
+        except ValueError:
+            pass
diff --git a/src/ucis/ncdb/ncdb_ucis.py b/src/ucis/ncdb/ncdb_ucis.py
index 18c8016..be1d1da 100644
--- a/src/ucis/ncdb/ncdb_ucis.py
+++ b/src/ucis/ncdb/ncdb_ucis.py
@@ -13,10 +13,20 @@
         ...                          # only history.json is parsed here
     for scope in db.scopes(...):
         ...                          # scope_tree + counts parsed on first call
+
+Binary history v2 usage::
+
+    db = NcdbUCIS("coverage.cdb")
+    run_id = db.add_test_run("uart_smoke", seed="12345",
+                             status=HIST_STATUS_OK, has_coverage=True)
+    entry  = db.get_test_stats("uart_smoke")
+    print(entry.flake_score)
 """
 
+import time
 import zipfile
 import json
+from typing import Dict, List, Optional
 
 from ucis.mem.mem_ucis import MemUCIS
 from ucis.history_node_kind import HistoryNodeKind
@@ -27,6 +37,10 @@
     MEMBER_ATTRS, MEMBER_TAGS, MEMBER_PROPERTIES,
     MEMBER_TOGGLE, MEMBER_FSM, MEMBER_CROSS, MEMBER_DESIGN_UNITS,
     MEMBER_CONTRIB_DIR, MEMBER_FORMAL,
+    MEMBER_TEST_REGISTRY, MEMBER_TEST_STATS,
+    MEMBER_BUCKET_INDEX, MEMBER_CONTRIB_INDEX, MEMBER_SQUASH_LOG,
+    HISTORY_BUCKET_DIR, HISTORY_FORMAT_V2,
+    HIST_FLAG_IS_RERUN, HIST_FLAG_HAS_COVERAGE,
     NCDB_FORMAT,
 )
 from .manifest import Manifest
@@ -43,6 +57,7 @@ class NcdbUCIS(MemUCIS):
     - **history**: loaded when ``historyNodes()`` is first called.
     - **scopes**: loaded when ``scopes()`` or any scope-creation method is
       called for the first time.
+    - **v2_history**: loaded on demand when any v2 API method is called.
 
     Once loaded, a unit is never re-read.
     """
@@ -53,9 +68,30 @@ def __init__(self, path: str):
         self._loaded_history = False
         self._loaded_scopes = False
         self._loaded_attrs = False
+        self._loaded_v2_history = False
         self._du_index: dict = {}   # name → DU scope (populated after _ensure_scopes)
         self._zf_cache: dict = {}   # member name → bytes (populated on first open)
 
+        # Binary history v2 state (None until _ensure_v2_history() is called)
+        self._test_registry = None
+        self._test_stats = None
+        self._bucket_index = None
+        self._contrib_index = None
+        self._squash_log = None
+        self._current_bucket_writer = None
+        self._sealed_buckets: Dict[int, bytes] = {}  # seq → compressed bytes
+        self._history_v2_dirty: bool = False
+
+        # Testplan lazy state
+        self._loaded_testplan: bool = False
+        self._testplan = None          # Optional[Testplan]
+        self._testplan_dirty: bool = False
+
+        # Waivers lazy state
+        self._loaded_waivers: bool = False
+        self._waivers = None           # Optional[WaiverSet]
+        self._waivers_dirty: bool = False
+
     # ── Public extra API ──────────────────────────────────────────────────
 
     @property
@@ -73,6 +109,295 @@ def getDesignUnit(self, name: str):
         self._ensure_scopes()
         return self._du_index.get(name)
 
+    # ── Binary history v2 API ─────────────────────────────────────────────
+
+    def add_test_run(self, name: str, seed="0", status: int = 0,
+                     ts: Optional[int] = None,
+                     cpu_time: Optional[float] = None,
+                     has_coverage: bool = False,
+                     is_rerun: bool = False) -> int:
+        """Record one test run in the binary history store.
+
+        Automatically upgrades the manifest to ``history_format = "v2"`` on
+        first call (no explicit opt-in required).
+
+        Args:
+            name:         Test base-name (e.g. ``"uart_smoke"``).
+            seed:         Test seed string or integer (converted to str).
+            status:       One of the ``HIST_STATUS_*`` constants.
+            ts:           Unix timestamp; defaults to ``int(time.time())``.
+            cpu_time:     CPU/wall time in seconds (optional).
+            has_coverage: True if this run produced coverage data.
+            is_rerun:     True if this is a retry of a previously-failed run.
+
+        Returns:
+            The run_id assigned to this run.
+        """
+        self._ensure_v2_history()
+        if ts is None:
+            ts = int(time.time())
+        seed_str = str(seed)
+
+        name_id = self._test_registry.lookup_name_id(name)
+        seed_id = self._test_registry.lookup_seed_id(seed_str)
+        run_id  = self._test_registry.assign_run_id()
+
+        self._test_stats.update(name_id, status, ts,
+                                cpu_time=cpu_time, seed_id=seed_id)
+
+        flags = 0
+        if is_rerun:
+            flags |= HIST_FLAG_IS_RERUN
+        if has_coverage:
+            flags |= HIST_FLAG_HAS_COVERAGE
+        self._current_bucket_writer.add(name_id, seed_id, ts, status, flags)
+
+        if self._current_bucket_writer.is_full():
+            self._seal_current_bucket()
+
+        if has_coverage:
+            from .contrib_index import FLAG_IS_RERUN as CI_IS_RERUN
+            ci_flags = CI_IS_RERUN if is_rerun else 0
+            self._contrib_index.add_entry(run_id, name_id, status, ci_flags)
+
+        self._history_v2_dirty = True
+        return run_id
+
+    def query_test_history(self, name: str,
+                           ts_from: Optional[int] = None,
+                           ts_to: Optional[int] = None) -> list:
+        """Return all BucketRecord objects for *name* across all buckets.
+
+        Args:
+            name:    Test name to query.
+            ts_from: Optional lower bound timestamp (inclusive).
+            ts_to:   Optional upper bound timestamp (inclusive).
+
+        Returns:
+            List of :class:`~ucis.ncdb.history_buckets.BucketRecord`.
+        """
+        self._ensure_v2_history()
+        if name not in self._test_registry._name_to_id:
+            return []
+        name_id = self._test_registry._name_to_id[name]
+
+        candidate_buckets = self._bucket_index.buckets_for_name(
+            name_id, ts_from=ts_from, ts_to=ts_to)
+
+        results = []
+        for entry in candidate_buckets:
+            seq = entry.bucket_seq
+            if seq in self._sealed_buckets:
+                data = self._sealed_buckets[seq]
+            else:
+                # Load from ZIP on demand
+                member = f"{HISTORY_BUCKET_DIR}{seq:06d}.bin"
+                self._read_zip()
+                if member not in self._zf_cache:
+                    continue
+                data = self._zf_cache[member]
+            from .history_buckets import BucketReader
+            reader = BucketReader(data)
+            recs = reader.records_for_name(name_id)
+            if ts_from is not None:
+                recs = [r for r in recs if r.ts >= ts_from]
+            if ts_to is not None:
+                recs = [r for r in recs if r.ts <= ts_to]
+            results.extend(recs)
+
+        # Also check the current (unsaved) bucket
+        if self._current_bucket_writer is not None and self._current_bucket_writer.num_records > 0:
+            try:
+                from .history_buckets import BucketReader
+                data = self._current_bucket_writer.seal_fast()
+                reader = BucketReader(data)
+                recs = reader.records_for_name(name_id)
+                if ts_from is not None:
+                    recs = [r for r in recs if r.ts >= ts_from]
+                if ts_to is not None:
+                    recs = [r for r in recs if r.ts <= ts_to]
+                results.extend(recs)
+            except Exception:
+                pass
+
+        return results
+
+    def get_test_stats(self, name: str):
+        """Return the TestStatsEntry for *name*, or None if not seen.
+
+        Returns:
+            :class:`~ucis.ncdb.test_stats.TestStatsEntry` or None.
+        """
+        self._ensure_v2_history()
+        if name not in self._test_registry._name_to_id:
+            return None
+        name_id = self._test_registry._name_to_id[name]
+        entry = self._test_stats.get(name_id)
+        if entry is not None and entry.total_runs == 0:
+            return None
+        return entry
+
+    def top_flaky_tests(self, n: int = 20) -> list:
+        """Return top-*n* flakiest tests.
+
+        Returns:
+            List of :class:`~ucis.ncdb.test_stats.TestStatsEntry`.
+        """
+        self._ensure_v2_history()
+        return self._test_stats.top_flaky(n)
+
+    def top_failing_tests(self, n: int = 20) -> list:
+        """Return top-*n* consistently-failing tests.
+
+        Returns:
+            List of :class:`~ucis.ncdb.test_stats.TestStatsEntry`.
+        """
+        self._ensure_v2_history()
+        return self._test_stats.top_failing(n)
+
+    def squash_coverage(self, policy: int = 1) -> None:
+        """Squash all active contrib entries into counts.bin contribution.
+
+        Records the squash in the squash_log for provenance auditing.
+
+        Args:
+            policy: Merge policy constant from :mod:`~ucis.ncdb.contrib_index`.
+        """
+        self._ensure_v2_history()
+        import time as _time
+        from .contrib_index import ContribIndex
+        passing = self._contrib_index.passing_run_ids(policy)
+        watermark = self._contrib_index.max_run_id()
+        from_run  = self._contrib_index.squash_watermark
+        num_runs  = self._contrib_index.num_active
+
+        self._squash_log.append(
+            ts=int(_time.time()),
+            policy=policy,
+            from_run=from_run,
+            to_run=watermark,
+            num_runs=num_runs,
+            pass_runs=len(passing),
+        )
+        self._contrib_index.remove_entries_up_to(watermark)
+        self._contrib_index.set_squash_watermark(watermark)
+        self._history_v2_dirty = True
+
+    def get_v2_members(self) -> Dict[str, bytes]:
+        """Return a dict of member-name → bytes for all v2 binary members.
+
+        Called by NcdbWriter to include v2 data in the ZIP output.  Returns
+        an empty dict if no v2 history has been recorded.
+        """
+        if not self._history_v2_dirty and self._test_registry is None:
+            return {}
+        if self._test_registry is None:
+            return {}
+
+        members: Dict[str, bytes] = {}
+        members[MEMBER_TEST_REGISTRY] = self._test_registry.serialize()
+        members[MEMBER_TEST_STATS]    = self._test_stats.serialize()
+        members[MEMBER_CONTRIB_INDEX] = self._contrib_index.serialize()
+        members[MEMBER_SQUASH_LOG]    = self._squash_log.serialize()
+
+        # Sealed buckets (copy verbatim — already compressed)
+        for seq, data in self._sealed_buckets.items():
+            members[f"{HISTORY_BUCKET_DIR}{seq:06d}.bin"] = data
+
+        # Current (open) bucket — fast DEFLATE; add synthetic index entry so
+        # the merger (and reader) can discover it via bucket_index.
+        from .bucket_index import BucketIndex
+        out_bidx = self._bucket_index  # reference; we may replace below
+        if self._current_bucket_writer is not None and \
+                self._current_bucket_writer.num_records > 0:
+            seq = self._bucket_index.next_seq()
+            members[f"{HISTORY_BUCKET_DIR}{seq:06d}.bin"] = \
+                self._current_bucket_writer.seal_fast()
+            # Build a copy of bucket_index with the extra synthetic entry
+            from .constants import HIST_STATUS_FAIL
+            recs = self._current_bucket_writer._records
+            ts_start    = min(r.ts for r in recs)
+            ts_end      = max(r.ts for r in recs)
+            fail_count  = sum(1 for r in recs if r.status == HIST_STATUS_FAIL)
+            min_name_id = min(r.name_id for r in recs)
+            max_name_id = max(r.name_id for r in recs)
+            out_bidx = BucketIndex.deserialize(self._bucket_index.serialize())
+            out_bidx.add_bucket(seq, ts_start, ts_end,
+                                len(recs), fail_count,
+                                min_name_id, max_name_id)
+
+        members[MEMBER_BUCKET_INDEX] = out_bidx.serialize()
+        return members
+
+    # ── Testplan API ──────────────────────────────────────────────────────
+
+    def getTestplan(self):
+        """Return the embedded testplan, or ``None`` if none is stored.
+
+        Returns:
+            :class:`~ucis.ncdb.testplan.Testplan` or ``None``.
+        """
+        self._ensure_testplan()
+        return self._testplan
+
+    def setTestplan(self, tp) -> None:
+        """Embed *tp* in this database.
+
+        The testplan is written to ``testplan.json`` on the next
+        :meth:`~ucis.ncdb.ncdb_writer.NcdbWriter.write` call.
+
+        Args:
+            tp: :class:`~ucis.ncdb.testplan.Testplan` instance.
+        """
+        if not tp.import_timestamp:
+            tp.stamp_import_time()
+        self._testplan = tp
+        self._testplan_dirty = True
+        self._loaded_testplan = True
+
+    def _ensure_testplan(self) -> None:
+        if self._loaded_testplan:
+            return
+        self._loaded_testplan = True
+        self._read_zip()
+        from .constants import MEMBER_TESTPLAN
+        raw = self._zf_cache.get(MEMBER_TESTPLAN)
+        if raw:
+            from .testplan import Testplan
+            self._testplan = Testplan.from_bytes(raw)
+
+    # ── Waivers API ───────────────────────────────────────────────────────
+
+    def getWaivers(self):
+        """Return the embedded waiver set, or ``None`` if none is stored.
+
+        Returns:
+            :class:`~ucis.ncdb.waivers.WaiverSet` or ``None``.
+        """
+        self._ensure_waivers()
+        return self._waivers
+
+    def setWaivers(self, ws) -> None:
+        """Embed *ws* in this database.
+
+        Args:
+            ws: :class:`~ucis.ncdb.waivers.WaiverSet` instance.
+        """
+        self._waivers = ws
+        self._waivers_dirty = True
+        self._loaded_waivers = True
+
+    def _ensure_waivers(self) -> None:
+        if self._loaded_waivers:
+            return
+        self._loaded_waivers = True
+        self._read_zip()
+        from .constants import MEMBER_WAIVERS
+        raw = self._zf_cache.get(MEMBER_WAIVERS)
+        if raw:
+            from .waivers import WaiverSet
+            self._waivers = WaiverSet.from_bytes(raw)
+
     # ── MemUCIS overrides — trigger lazy loads ─────────────────────────
 
     def historyNodes(self, kind: HistoryNodeKind):
@@ -187,6 +512,94 @@ def _ensure_scopes(self) -> None:
             from .formal import FormalReader
             FormalReader().apply(self, formal_data)
 
+    def _ensure_v2_history(self) -> None:
+        """Load v2 binary history from ZIP, or initialize empty state."""
+        if self._loaded_v2_history:
+            return
+        self._loaded_v2_history = True
+        self._read_zip()
+        self._load_v2_history(self._zf_cache)
+
+    def _load_v2_history(self, zf_cache: dict) -> None:
+        """Deserialize v2 binary members from the ZIP cache dict."""
+        from .test_registry import TestRegistry
+        from .test_stats import TestStatsTable
+        from .bucket_index import BucketIndex
+        from .contrib_index import ContribIndex, POLICY_PASS_ONLY
+        from .squash_log import SquashLog
+        from .history_buckets import BucketWriter, BucketReader
+
+        if MEMBER_TEST_REGISTRY in zf_cache:
+            self._test_registry = TestRegistry.deserialize(
+                zf_cache[MEMBER_TEST_REGISTRY])
+        else:
+            self._test_registry = TestRegistry()
+
+        if MEMBER_TEST_STATS in zf_cache:
+            self._test_stats = TestStatsTable.deserialize(
+                zf_cache[MEMBER_TEST_STATS])
+        else:
+            self._test_stats = TestStatsTable()
+
+        if MEMBER_BUCKET_INDEX in zf_cache:
+            self._bucket_index = BucketIndex.deserialize(
+                zf_cache[MEMBER_BUCKET_INDEX])
+        else:
+            self._bucket_index = BucketIndex()
+
+        if MEMBER_CONTRIB_INDEX in zf_cache:
+            self._contrib_index = ContribIndex.deserialize(
+                zf_cache[MEMBER_CONTRIB_INDEX])
+        else:
+            self._contrib_index = ContribIndex(merge_policy=POLICY_PASS_ONLY)
+
+        if MEMBER_SQUASH_LOG in zf_cache:
+            self._squash_log = SquashLog.deserialize(zf_cache[MEMBER_SQUASH_LOG])
+        else:
+            self._squash_log = SquashLog()
+
+        # Load sealed buckets into memory (verbatim compressed bytes)
+        self._sealed_buckets = {}
+        for member, data in zf_cache.items():
+            if member.startswith(HISTORY_BUCKET_DIR) and member.endswith(".bin") \
+                    and member != MEMBER_BUCKET_INDEX:
+                # Parse seq from filename: "history/000001.bin" → 1
+                basename = member[len(HISTORY_BUCKET_DIR):]
+                try:
+                    seq = int(basename.split(".")[0])
+                    self._sealed_buckets[seq] = data
+                except ValueError:
+                    pass
+
+        # Start a fresh current bucket (for new records written this session)
+        self._current_bucket_writer = BucketWriter()
+
+    def _seal_current_bucket(self) -> None:
+        """Seal the current bucket and start a new one."""
+        from .history_buckets import BucketWriter, BucketReader
+        w = self._current_bucket_writer
+        if w.num_records == 0:
+            return
+        seq = self._bucket_index.next_seq()
+        data = w.seal(use_lzma=True)
+        self._sealed_buckets[seq] = data
+
+        # Build bucket index entry from reader
+        reader = BucketReader(data)
+        all_recs = list(reader.all_records())
+        ts_start = min(r.ts for r in all_recs)
+        ts_end   = max(r.ts for r in all_recs)
+        fail_count = sum(1 for r in all_recs if r.status != 0)
+        name_ids = [r.name_id for r in all_recs]
+        self._bucket_index.add_bucket(
+            seq, ts_start, ts_end,
+            num_records=w.num_records,
+            fail_count=fail_count,
+            min_name_id=min(name_ids),
+            max_name_id=max(name_ids),
+        )
+        self._current_bucket_writer = BucketWriter()
+
 
 def _load_history(db: MemUCIS, history_bytes: bytes) -> None:
     """Deserialize history.json and populate *db* with history nodes."""
diff --git a/src/ucis/ncdb/ncdb_writer.py b/src/ucis/ncdb/ncdb_writer.py
index 6db9733..83beac7 100644
--- a/src/ucis/ncdb/ncdb_writer.py
+++ b/src/ucis/ncdb/ncdb_writer.py
@@ -26,7 +26,8 @@
     MEMBER_COUNTS, MEMBER_HISTORY, MEMBER_SOURCES,
     MEMBER_ATTRS, MEMBER_TAGS, MEMBER_PROPERTIES, MEMBER_TOGGLE, MEMBER_FSM,
     MEMBER_CROSS, MEMBER_DESIGN_UNITS, MEMBER_FORMAL,
-    MEMBER_COVERITEM_FLAGS,
+    MEMBER_COVERITEM_FLAGS, MEMBER_TESTPLAN, MEMBER_WAIVERS,
+    HISTORY_FORMAT_V2,
 )
 
 from ucis.history_node_kind import HistoryNodeKind
@@ -88,6 +89,14 @@ def write(self, db, path: str) -> None:
 
         # 7. Manifest
         manifest = Manifest.build(db, scope_tree_bytes, counts, all_nodes)
+
+        # Check for v2 binary history members (from NcdbUCIS.get_v2_members)
+        v2_members = {}
+        if hasattr(db, 'get_v2_members'):
+            v2_members = db.get_v2_members()
+        if v2_members:
+            manifest.history_format = HISTORY_FORMAT_V2
+
         manifest_bytes = manifest.serialize()
 
         # 8. Write ZIP
@@ -119,3 +128,15 @@ def write(self, db, path: str) -> None:
                 zf.writestr(MEMBER_FORMAL, formal_bytes)
             if ci_flags_bytes:
                 zf.writestr(MEMBER_COVERITEM_FLAGS, ci_flags_bytes)
+            # v2 binary history members (stored uncompressed — pre-compressed)
+            for member_name, member_bytes in v2_members.items():
+                zf.writestr(member_name, member_bytes,
+                            compress_type=zipfile.ZIP_STORED)
+            # Testplan (optional)
+            testplan = getattr(db, '_testplan', None)
+            if testplan is not None:
+                zf.writestr(MEMBER_TESTPLAN, testplan.serialize())
+            # Waivers (optional)
+            waivers = getattr(db, '_waivers', None)
+            if waivers is not None:
+                zf.writestr(MEMBER_WAIVERS, waivers.serialize())
diff --git a/src/ucis/ncdb/reports.py b/src/ucis/ncdb/reports.py
new file mode 100644
index 0000000..fe4ce95
--- /dev/null
+++ b/src/ucis/ncdb/reports.py
@@ -0,0 +1,1192 @@
+"""Structured reports for testplan closure, stage gates, and test history.
+
+Every report function returns a typed dataclass with a ``to_json()`` method.
+A companion ``format_*()`` function renders the dataclass to a human-readable
+string suitable for terminal output.  The CLI calls the formatter; automated
+consumers use the dataclass or ``to_json()``.
+"""
+
+from __future__ import annotations
+
+import json
+import math
+import time
+from dataclasses import dataclass, field, asdict
+from typing import Dict, List, Optional, Tuple
+
+from ucis.ncdb.testplan import Testplan, Testpoint, get_testplan
+from ucis.ncdb.testplan_closure import (
+    TPStatus,
+    TestpointResult,
+    compute_closure,
+    stage_gate_status,
+    _STAGE_ORDER,
+)
+
+# ---------------------------------------------------------------------------
+# Helpers
+# ---------------------------------------------------------------------------
+
+_STATUS_ICON = {
+    TPStatus.CLOSED: "✓",
+    TPStatus.PARTIAL: "~",
+    TPStatus.FAILING: "✗",
+    TPStatus.NOT_RUN: "?",
+    TPStatus.NA: "N/A",
+    TPStatus.UNIMPLEMENTED: "-",
+}
+
+_STATUS_LABEL = {
+    TPStatus.CLOSED: "CLOSED",
+    TPStatus.PARTIAL: "PARTIAL",
+    TPStatus.FAILING: "FAILING",
+    TPStatus.NOT_RUN: "NOT_RUN",
+    TPStatus.NA: "N/A",
+    TPStatus.UNIMPLEMENTED: "UNIMP",
+}
+
+
+def _pct(num: int, den: int) -> float:
+    return round(100.0 * num / den, 1) if den else 0.0
+
+
+# ---------------------------------------------------------------------------
+# Report A — testpoint closure table
+# ---------------------------------------------------------------------------
+
+@dataclass
+class ClosureSummary:
+    """Result of ``report_testpoint_closure()``.
+
+    Args:
+        results: Per-testpoint closure results.
+        by_stage: Stage-level roll-up: stage → {closed, total, pct}.
+        total_closed: Number of testpoints with status CLOSED.
+        total_na: Number of testpoints with status N/A.
+        total: Total testpoint count.
+    """
+    results: List[TestpointResult]
+    by_stage: Dict[str, Dict]
+    total_closed: int
+    total_na: int
+    total: int
+
+    def to_json(self) -> str:
+        d = {
+            "total": self.total,
+            "total_closed": self.total_closed,
+            "total_na": self.total_na,
+            "by_stage": self.by_stage,
+            "testpoints": [
+                {
+                    "name": r.testpoint.name,
+                    "stage": r.testpoint.stage,
+                    "status": r.status.value,
+                    "pass_count": r.pass_count,
+                    "fail_count": r.fail_count,
+                    "matched_tests": r.matched_tests,
+                }
+                for r in self.results
+            ],
+        }
+        return json.dumps(d, indent=2)
+
+
+def report_testpoint_closure(results: List[TestpointResult]) -> ClosureSummary:
+    """Compute a closure summary from testpoint results.
+
+    Args:
+        results: List of :class:`~ucis.ncdb.testplan_closure.TestpointResult`
+            objects (output of :func:`~ucis.ncdb.testplan_closure.compute_closure`).
+
+    Returns:
+        :class:`ClosureSummary` with per-stage roll-up and totals.
+    """
+    by_stage: Dict[str, Dict] = {}
+    total_closed = 0
+    total_na = 0
+
+    for r in results:
+        stage = r.testpoint.stage or "unknown"
+        entry = by_stage.setdefault(stage, {"closed": 0, "total": 0, "pct": 0.0})
+        if r.status not in (TPStatus.NA, TPStatus.UNIMPLEMENTED):
+            entry["total"] += 1
+            if r.status == TPStatus.CLOSED:
+                entry["closed"] += 1
+        if r.status == TPStatus.CLOSED:
+            total_closed += 1
+        elif r.status == TPStatus.NA:
+            total_na += 1
+
+    for entry in by_stage.values():
+        entry["pct"] = _pct(entry["closed"], entry["total"])
+
+    return ClosureSummary(
+        results=results,
+        by_stage=by_stage,
+        total_closed=total_closed,
+        total_na=total_na,
+        total=len(results),
+    )
+
+
+def format_testpoint_closure(summary: ClosureSummary, *, show_all: bool = False) -> str:
+    """Render a :class:`ClosureSummary` as a terminal table.
+
+    Args:
+        summary: Output of :func:`report_testpoint_closure`.
+        show_all: If False (default), skip N/A and UNIMPLEMENTED rows.
+
+    Returns:
+        Human-readable multiline string.
+    """
+    lines: List[str] = []
+    col_name = 34
+    col_stage = 6
+    col_status = 10
+    col_pass = 6
+    col_fail = 6
+
+    header = (
+        f"{'Testpoint':<{col_name}} "
+        f"{'Stage':<{col_stage}} "
+        f"{'Status':<{col_status}} "
+        f"{'Pass':>{col_pass}} "
+        f"{'Fail':>{col_fail}}"
+    )
+    sep = "-" * len(header)
+    lines.append(header)
+    lines.append(sep)
+
+    for r in summary.results:
+        if not show_all and r.status in (TPStatus.NA, TPStatus.UNIMPLEMENTED):
+            continue
+        icon = _STATUS_ICON[r.status]
+        label = _STATUS_LABEL[r.status]
+        lines.append(
+            f"{r.testpoint.name:<{col_name}} "
+            f"{r.testpoint.stage or '?':<{col_stage}} "
+            f"{icon} {label:<{col_status - 2}} "
+            f"{r.pass_count:>{col_pass}} "
+            f"{r.fail_count:>{col_fail}}"
+        )
+
+    lines.append(sep)
+    # Stage roll-up
+    lines.append("\nStage roll-up:")
+    ordered_stages = sorted(
+        summary.by_stage.items(),
+        key=lambda kv: _STAGE_ORDER.get(kv[0], 999),
+    )
+    for stage, entry in ordered_stages:
+        bar_len = 20
+        filled = round(bar_len * entry["pct"] / 100) if entry["total"] else 0
+        bar = "█" * filled + "░" * (bar_len - filled)
+        lines.append(
+            f"  {stage:<6} [{bar}] "
+            f"{entry['closed']}/{entry['total']} "
+            f"({entry['pct']:.1f}%)"
+        )
+
+    lines.append(
+        f"\nTotal: {summary.total_closed}/{summary.total} closed"
+        f"  ({summary.total_na} N/A)"
+    )
+    return "\n".join(lines)
+
+
+# ---------------------------------------------------------------------------
+# Report B — stage gate
+# ---------------------------------------------------------------------------
+
+@dataclass
+class StageGateReport:
+    """Result of ``report_stage_gate()``.
+
+    Args:
+        stage: Target stage (e.g. ``"V2"``).
+        passed: Whether the gate passes.
+        blocking: Testpoints that are not yet CLOSED (and not N/A).
+        message: Human-readable verdict line.
+        gate_detail: Raw detail dict from
+            :func:`~ucis.ncdb.testplan_closure.stage_gate_status`.
+    """
+    stage: str
+    passed: bool
+    blocking: List[TestpointResult]
+    message: str
+    gate_detail: dict
+
+    def to_json(self) -> str:
+        d = {
+            "stage": self.stage,
+            "passed": self.passed,
+            "message": self.message,
+            "blocking": [
+                {
+                    "name": r.testpoint.name,
+                    "stage": r.testpoint.stage,
+                    "status": r.status.value,
+                }
+                for r in self.blocking
+            ],
+        }
+        return json.dumps(d, indent=2)
+
+
+def report_stage_gate(
+    results: List[TestpointResult],
+    stage: str,
+    testplan: Testplan,
+    require_flake_score_below: Optional[float] = None,
+    require_coverage_pct: Optional[float] = None,
+) -> StageGateReport:
+    """Evaluate a stage gate (go/no-go for advancing to next stage).
+
+    Args:
+        results: Output of :func:`~ucis.ncdb.testplan_closure.compute_closure`.
+        stage: Target stage to evaluate (``"V1"``, ``"V2"``, etc.).
+        testplan: The :class:`~ucis.ncdb.testplan.Testplan` being evaluated.
+        require_flake_score_below: Optional flake threshold (0–1).
+        require_coverage_pct: Optional minimum coverage percentage.
+
+    Returns:
+        :class:`StageGateReport`.
+    """
+    gate = stage_gate_status(
+        results,
+        stage,
+        testplan,
+        require_flake_score_below=require_flake_score_below,
+        require_coverage_pct=require_coverage_pct,
+    )
+    blocking = gate.get("blocking", [])
+    passed = gate.get("passed", False)
+    message = gate.get("message", "")
+    return StageGateReport(
+        stage=stage,
+        passed=passed,
+        blocking=blocking,
+        message=message,
+        gate_detail=gate,
+    )
+
+
+def format_stage_gate(report: StageGateReport) -> str:
+    """Render a :class:`StageGateReport` as a terminal summary.
+
+    Args:
+        report: Output of :func:`report_stage_gate`.
+
+    Returns:
+        Human-readable multiline string.
+    """
+    lines: List[str] = []
+    verdict = "✓ PASS" if report.passed else "✗ FAIL"
+    lines.append(f"Stage gate [{report.stage}]: {verdict}")
+    lines.append(f"  {report.message}")
+    if report.blocking:
+        lines.append(f"\n  Blocking testpoints ({len(report.blocking)}):")
+        for r in report.blocking:
+            lines.append(
+                f"    [{r.testpoint.stage}] {r.testpoint.name}  "
+                f"— {_STATUS_LABEL[r.status]}"
+            )
+    return "\n".join(lines)
+
+
+# ---------------------------------------------------------------------------
+# Report C — coverage per testpoint
+# ---------------------------------------------------------------------------
+
+@dataclass
+class CoveragePerTestpoint:
+    """Per-testpoint coverage table linking matched tests to covergroups.
+
+    Args:
+        rows: List of (testpoint_name, covergroup_name, hit_pct) tuples.
+        unmatched_covergroups: Covergroups that could not be linked to any
+            testpoint via the testplan.
+    """
+    rows: List[Tuple[str, str, float]]
+    unmatched_covergroups: List[str]
+
+    def to_json(self) -> str:
+        d = {
+            "rows": [
+                {"testpoint": tp, "covergroup": cg, "hit_pct": pct}
+                for tp, cg, pct in self.rows
+            ],
+            "unmatched_covergroups": self.unmatched_covergroups,
+        }
+        return json.dumps(d, indent=2)
+
+
+def report_coverage_per_testpoint(
+    results: List[TestpointResult],
+    db,
+    testplan: Testplan,
+) -> CoveragePerTestpoint:
+    """Build a testpoint × covergroup coverage table.
+
+    For each testpoint that has covergroup entries in the testplan the
+    function walks the UCIS hierarchy to find a matching scope and
+    computes the hit percentage.
+
+    Args:
+        results: Output of :func:`~ucis.ncdb.testplan_closure.compute_closure`.
+        db: An open UCIS database (any type — MemUCIS, NcdbUCIS, etc.).
+        testplan: The active :class:`~ucis.ncdb.testplan.Testplan`.
+
+    Returns:
+        :class:`CoveragePerTestpoint`.
+    """
+    from ucis.scope_type_t import ScopeTypeT
+    from ucis.cover_type_t import CoverTypeT
+
+    # Build a quick map: covergroup name → hit% from the UCIS tree
+    cg_pct: Dict[str, float] = {}
+    try:
+        for scope in db.getScopes(ScopeTypeT.COVERGROUP):
+            name = scope.getScopeName()
+            total = 0
+            hit = 0
+            for cp in scope.getScopes(ScopeTypeT.COVERPOINT):
+                for b in cp.getCoverItems(CoverTypeT.CVGBIN):
+                    total += 1
+                    if b.getData()[0] > 0:
+                        hit += 1
+            if total:
+                cg_pct[name] = _pct(hit, total)
+            else:
+                cg_pct[name] = 0.0
+    except Exception:
+        pass  # db may not support scope iteration
+
+    rows: List[Tuple[str, str, float]] = []
+    matched_cgs: set = set()
+
+    for r in results:
+        cg_entries = r.testpoint.covergroups if hasattr(r.testpoint, "covergroups") else []
+        # Fallback: check testplan.covergroups linked to this testpoint name
+        plan_cgs = [c for c in testplan.covergroups if True]  # all for now
+        for cg in plan_cgs:
+            cg_name = cg.name if hasattr(cg, "name") else str(cg)
+            pct = cg_pct.get(cg_name, 0.0)
+            rows.append((r.testpoint.name, cg_name, pct))
+            matched_cgs.add(cg_name)
+
+    unmatched = [cg for cg in cg_pct if cg not in matched_cgs]
+    return CoveragePerTestpoint(rows=rows, unmatched_covergroups=unmatched)
+
+
+def format_coverage_per_testpoint(report: CoveragePerTestpoint) -> str:
+    """Render a :class:`CoveragePerTestpoint` as a terminal table.
+
+    Args:
+        report: Output of :func:`report_coverage_per_testpoint`.
+
+    Returns:
+        Human-readable multiline string.
+    """
+    if not report.rows:
+        return "(no testpoint-covergroup links found)"
+
+    col_tp = max((len(r[0]) for r in report.rows), default=10) + 2
+    col_cg = max((len(r[1]) for r in report.rows), default=10) + 2
+    lines: List[str] = []
+    header = f"{'Testpoint':<{col_tp}} {'Covergroup':<{col_cg}} {'Hit%':>6}"
+    lines.append(header)
+    lines.append("-" * len(header))
+    prev_tp = None
+    for tp, cg, pct in report.rows:
+        tp_col = tp if tp != prev_tp else ""
+        prev_tp = tp
+        lines.append(f"{tp_col:<{col_tp}} {cg:<{col_cg}} {pct:>6.1f}%")
+    if report.unmatched_covergroups:
+        lines.append(
+            f"\nUnmatched covergroups: "
+            + ", ".join(report.unmatched_covergroups)
+        )
+    return "\n".join(lines)
+
+
+# ---------------------------------------------------------------------------
+# Report D — regression delta
+# ---------------------------------------------------------------------------
+
+@dataclass
+class RegressionDelta:
+    """Delta between two closure result sets.
+
+    Args:
+        newly_closed: Testpoints that moved to CLOSED.
+        newly_failing: Testpoints that were not FAILING but are now.
+        unchanged_open: Testpoints that remain open/partial.
+        summary: One-line summary string.
+    """
+    newly_closed: List[TestpointResult]
+    newly_failing: List[TestpointResult]
+    unchanged_open: List[TestpointResult]
+    summary: str
+
+    def to_json(self) -> str:
+        def _names(lst):
+            return [r.testpoint.name for r in lst]
+
+        d = {
+            "newly_closed": _names(self.newly_closed),
+            "newly_failing": _names(self.newly_failing),
+            "unchanged_open": _names(self.unchanged_open),
+            "summary": self.summary,
+        }
+        return json.dumps(d, indent=2)
+
+
+def report_regression_delta(
+    results_new: List[TestpointResult],
+    results_old: List[TestpointResult],
+) -> RegressionDelta:
+    """Compute the testplan closure delta between two regression runs.
+
+    Args:
+        results_new: Closure results for the current regression.
+        results_old: Closure results for the baseline regression.
+
+    Returns:
+        :class:`RegressionDelta`.
+    """
+    old_map = {r.testpoint.name: r.status for r in results_old}
+    newly_closed: List[TestpointResult] = []
+    newly_failing: List[TestpointResult] = []
+    unchanged_open: List[TestpointResult] = []
+
+    for r in results_new:
+        old_status = old_map.get(r.testpoint.name)
+        if r.status == TPStatus.CLOSED and old_status != TPStatus.CLOSED:
+            newly_closed.append(r)
+        elif r.status == TPStatus.FAILING and old_status not in (
+            TPStatus.FAILING,
+            None,
+        ):
+            newly_failing.append(r)
+        elif r.status not in (TPStatus.CLOSED, TPStatus.NA):
+            unchanged_open.append(r)
+
+    summary = (
+        f"+{len(newly_closed)} closed, "
+        f"-{len(newly_failing)} newly failing, "
+        f"{len(unchanged_open)} still open"
+    )
+    return RegressionDelta(
+        newly_closed=newly_closed,
+        newly_failing=newly_failing,
+        unchanged_open=unchanged_open,
+        summary=summary,
+    )
+
+
+def format_regression_delta(report: RegressionDelta) -> str:
+    """Render a :class:`RegressionDelta` as a terminal summary.
+
+    Args:
+        report: Output of :func:`report_regression_delta`.
+
+    Returns:
+        Human-readable multiline string.
+    """
+    lines: List[str] = [f"Regression delta: {report.summary}"]
+    if report.newly_closed:
+        lines.append(f"\n  Newly closed ({len(report.newly_closed)}):")
+        for r in report.newly_closed:
+            lines.append(f"    ✓ [{r.testpoint.stage}] {r.testpoint.name}")
+    if report.newly_failing:
+        lines.append(f"\n  Newly failing ({len(report.newly_failing)}):")
+        for r in report.newly_failing:
+            lines.append(f"    ✗ [{r.testpoint.stage}] {r.testpoint.name}")
+    if report.unchanged_open:
+        lines.append(f"\n  Still open ({len(report.unchanged_open)}):")
+        for r in report.unchanged_open:
+            lines.append(
+                f"    ~ [{r.testpoint.stage}] {r.testpoint.name}"
+                f"  — {_STATUS_LABEL[r.status]}"
+            )
+    return "\n".join(lines)
+
+
+# ---------------------------------------------------------------------------
+# Report E — stage progression over time  (P1)
+# ---------------------------------------------------------------------------
+
+@dataclass
+class StageProgression:
+    """Stage closure progression over time (requires v2 history).
+
+    Args:
+        stage: Target stage.
+        series: List of (timestamp, closed_count, total) tuples,
+            oldest first.
+        current_pct: Most-recent closure percentage.
+    """
+    stage: str
+    series: List[Tuple[int, int, int]]
+    current_pct: float
+
+    def to_json(self) -> str:
+        d = {
+            "stage": self.stage,
+            "current_pct": self.current_pct,
+            "series": [
+                {"ts": ts, "closed": c, "total": t} for ts, c, t in self.series
+            ],
+        }
+        return json.dumps(d, indent=2)
+
+
+def report_stage_progression(db, testplan: Testplan, stage: str) -> StageProgression:
+    """Compute stage closure percentage over time from v2 history buckets.
+
+    Uses the bucket index to sample closure state at bucket boundary
+    timestamps.  Falls back to an empty series when no v2 history is
+    available.
+
+    Args:
+        db: An open :class:`~ucis.ncdb.ncdb_ucis.NcdbUCIS` instance.
+        testplan: The active testplan.
+        stage: Stage to evaluate (e.g. ``"V2"``).
+
+    Returns:
+        :class:`StageProgression`.
+    """
+    series: List[Tuple[int, int, int]] = []
+    total_tps = len(testplan.testpointsForStage(stage, include_lower=True))
+    if total_tps == 0:
+        return StageProgression(stage=stage, series=[], current_pct=0.0)
+
+    # Try to build a time-series from the bucket index
+    try:
+        db._ensure_v2_history()
+        bidx = db._bucket_index
+        if bidx is not None:
+            entries = list(bidx._entries)
+            # Accumulate by walking buckets in order
+            passed_names: set = set()
+            for entry in entries:
+                # Read bucket records for this time window
+                try:
+                    reader = db._get_bucket_reader(entry)
+                    for rec in reader.records():
+                        name = db._test_registry.lookup_name(rec.name_id)
+                        if rec.status == 0:  # HIST_STATUS_OK (pass)
+                            passed_names.add(name)
+                except Exception:
+                    pass
+                # Count how many testpoints now have ≥1 passing test
+                closed = sum(
+                    1
+                    for tp in testplan.testpointsForStage(stage, include_lower=True)
+                    for t in tp.tests
+                    if any(n.startswith(t.rstrip("*")) for n in passed_names)
+                )
+                series.append((entry.ts_end, closed, total_tps))
+    except Exception:
+        pass
+
+    current_pct = _pct(series[-1][1], series[-1][2]) if series else 0.0
+    return StageProgression(stage=stage, series=series, current_pct=current_pct)
+
+
+def format_stage_progression(report: StageProgression) -> str:
+    """Render a :class:`StageProgression` as an ASCII spark-line.
+
+    Args:
+        report: Output of :func:`report_stage_progression`.
+
+    Returns:
+        Human-readable multiline string.
+    """
+    if not report.series:
+        return f"Stage [{report.stage}]: no history data available"
+
+    lines = [f"Stage [{report.stage}] closure over time:"]
+    bars = "▁▂▃▄▅▆▇█"
+    spark = ""
+    for _, closed, total in report.series:
+        pct = closed / total if total else 0
+        idx = min(int(pct * len(bars)), len(bars) - 1)
+        spark += bars[idx]
+    lines.append(f"  {spark}")
+    first_ts = report.series[0][0]
+    last_ts = report.series[-1][0]
+    lines.append(
+        f"  {time.strftime('%Y-%m-%d', time.gmtime(first_ts))} → "
+        f"{time.strftime('%Y-%m-%d', time.gmtime(last_ts))}"
+    )
+    lines.append(f"  Current: {report.current_pct:.1f}%")
+    return "\n".join(lines)
+
+
+# ---------------------------------------------------------------------------
+# Report F — testpoint reliability  (P1)
+# ---------------------------------------------------------------------------
+
+@dataclass
+class TestpointReliability:
+    """Per-testpoint flake scores.
+
+    Args:
+        rows: List of (testpoint_name, flake_score, pass_count, fail_count).
+            Sorted by flake_score descending.
+        flaky_threshold: Score above which a testpoint is considered flaky.
+    """
+    rows: List[Tuple[str, float, int, int]]
+    flaky_threshold: float = 0.2
+
+    def to_json(self) -> str:
+        d = {
+            "flaky_threshold": self.flaky_threshold,
+            "rows": [
+                {
+                    "testpoint": tp,
+                    "flake_score": score,
+                    "pass": pc,
+                    "fail": fc,
+                }
+                for tp, score, pc, fc in self.rows
+            ],
+        }
+        return json.dumps(d, indent=2)
+
+
+def report_testpoint_reliability(
+    results: List[TestpointResult],
+    db,
+    flaky_threshold: float = 0.2,
+) -> TestpointReliability:
+    """Compute per-testpoint flake scores from v2 test_stats.
+
+    Args:
+        results: Output of :func:`~ucis.ncdb.testplan_closure.compute_closure`.
+        db: An open :class:`~ucis.ncdb.ncdb_ucis.NcdbUCIS` instance.
+        flaky_threshold: Flake score above which a testpoint is flagged.
+
+    Returns:
+        :class:`TestpointReliability`.
+    """
+    rows: List[Tuple[str, float, int, int]] = []
+    for r in results:
+        if not r.matched_tests:
+            rows.append((r.testpoint.name, 0.0, r.pass_count, r.fail_count))
+            continue
+        total_flake = 0.0
+        count = 0
+        pc_total = 0
+        fc_total = 0
+        for test_name in r.matched_tests:
+            try:
+                stats = db.get_test_stats(test_name)
+                if stats:
+                    total_flake += stats.flake_score
+                    pc_total += stats.pass_count
+                    fc_total += stats.fail_count
+                    count += 1
+            except Exception:
+                pass
+        avg_flake = total_flake / count if count else 0.0
+        rows.append((r.testpoint.name, avg_flake, pc_total, fc_total))
+
+    rows.sort(key=lambda x: x[1], reverse=True)
+    return TestpointReliability(rows=rows, flaky_threshold=flaky_threshold)
+
+
+def format_testpoint_reliability(report: TestpointReliability) -> str:
+    """Render a :class:`TestpointReliability` as a terminal table.
+
+    Args:
+        report: Output of :func:`report_testpoint_reliability`.
+
+    Returns:
+        Human-readable multiline string.
+    """
+    col_name = max((len(r[0]) for r in report.rows), default=10) + 2
+    lines: List[str] = []
+    header = f"{'Testpoint':<{col_name}} {'Flake':>7} {'Pass':>7} {'Fail':>7}"
+    lines.append(header)
+    lines.append("-" * len(header))
+    for tp, score, pc, fc in report.rows:
+        flag = " ⚠" if score >= report.flaky_threshold else ""
+        lines.append(f"{tp:<{col_name}} {score:>7.3f} {pc:>7} {fc:>7}{flag}")
+    return "\n".join(lines)
+
+
+# ---------------------------------------------------------------------------
+# Report G — unexercised covergroups  (P1)
+# ---------------------------------------------------------------------------
+
+@dataclass
+class UnexercisedCovergroups:
+    """Covergroups with zero hits.
+
+    Args:
+        zero_hit: List of covergroup names with 0% coverage.
+        low_hit: List of (name, pct) tuples with 0 < pct < threshold.
+        threshold: Low-hit threshold used.
+    """
+    zero_hit: List[str]
+    low_hit: List[Tuple[str, float]]
+    threshold: float = 50.0
+
+    def to_json(self) -> str:
+        d = {
+            "threshold": self.threshold,
+            "zero_hit": self.zero_hit,
+            "low_hit": [{"name": n, "pct": p} for n, p in self.low_hit],
+        }
+        return json.dumps(d, indent=2)
+
+
+def report_unexercised_covergroups(
+    db,
+    testplan: Testplan,
+    low_threshold: float = 50.0,
+) -> UnexercisedCovergroups:
+    """Identify covergroups with zero or low coverage.
+
+    Args:
+        db: An open UCIS database.
+        testplan: The active testplan (used to filter to plan-tracked groups).
+        low_threshold: Percentage below which a covergroup is flagged as
+            low-hit (default 50%).
+
+    Returns:
+        :class:`UnexercisedCovergroups`.
+    """
+    from ucis.scope_type_t import ScopeTypeT
+    from ucis.cover_type_t import CoverTypeT
+
+    plan_cg_names = {c.name for c in testplan.covergroups}
+    zero_hit: List[str] = []
+    low_hit: List[Tuple[str, float]] = []
+
+    try:
+        for scope in db.getScopes(ScopeTypeT.COVERGROUP):
+            cg_name = scope.getScopeName()
+            if plan_cg_names and cg_name not in plan_cg_names:
+                continue
+            total = hit = 0
+            for cp in scope.getScopes(ScopeTypeT.COVERPOINT):
+                for b in cp.getCoverItems(CoverTypeT.CVGBIN):
+                    total += 1
+                    if b.getData()[0] > 0:
+                        hit += 1
+            if total == 0:
+                zero_hit.append(cg_name)
+            else:
+                pct = _pct(hit, total)
+                if pct == 0.0:
+                    zero_hit.append(cg_name)
+                elif pct < low_threshold:
+                    low_hit.append((cg_name, pct))
+    except Exception:
+        pass
+
+    low_hit.sort(key=lambda x: x[1])
+    return UnexercisedCovergroups(
+        zero_hit=zero_hit,
+        low_hit=low_hit,
+        threshold=low_threshold,
+    )
+
+
+def format_unexercised_covergroups(report: UnexercisedCovergroups) -> str:
+    """Render an :class:`UnexercisedCovergroups` report as terminal text.
+
+    Args:
+        report: Output of :func:`report_unexercised_covergroups`.
+
+    Returns:
+        Human-readable multiline string.
+    """
+    lines: List[str] = []
+    if report.zero_hit:
+        lines.append(f"Zero-hit covergroups ({len(report.zero_hit)}):")
+        for name in report.zero_hit:
+            lines.append(f"  ✗ {name}")
+    if report.low_hit:
+        lines.append(
+            f"\nLow-hit covergroups (< {report.threshold:.0f}%) "
+            f"({len(report.low_hit)}):"
+        )
+        for name, pct in report.low_hit:
+            lines.append(f"  ~ {name}  ({pct:.1f}%)")
+    if not lines:
+        lines.append("All tracked covergroups are fully hit.")
+    return "\n".join(lines)
+
+
+# ---------------------------------------------------------------------------
+# Report I — coverage contribution  (P1)
+# ---------------------------------------------------------------------------
+
+@dataclass
+class CoverageContribution:
+    """Per-test unique bin contribution.
+
+    Args:
+        rows: List of (test_name, unique_bins, total_hits) sorted by
+            unique_bins descending.
+        total_bins: Total covered bins in the database.
+    """
+    rows: List[Tuple[str, int, int]]
+    total_bins: int
+
+    def to_json(self) -> str:
+        d = {
+            "total_bins": self.total_bins,
+            "rows": [
+                {"test": t, "unique_bins": u, "total_hits": h}
+                for t, u, h in self.rows
+            ],
+        }
+        return json.dumps(d, indent=2)
+
+
+def report_coverage_contribution(db) -> CoverageContribution:
+    """Report per-test unique coverage bin contribution from v2 contrib data.
+
+    Args:
+        db: An open :class:`~ucis.ncdb.ncdb_ucis.NcdbUCIS` instance.
+
+    Returns:
+        :class:`CoverageContribution`.
+    """
+    rows: List[Tuple[str, int, int]] = []
+    total_bins = 0
+
+    try:
+        contrib_data = db.get_test_coverage_api()
+        if contrib_data:
+            for item in contrib_data:
+                test_name = item.get("test", "")
+                unique = item.get("unique_bins", 0)
+                hits = item.get("total_hits", 0)
+                rows.append((test_name, unique, hits))
+                total_bins = max(total_bins, item.get("total_bins", 0))
+    except Exception:
+        pass
+
+    rows.sort(key=lambda x: x[1], reverse=True)
+    return CoverageContribution(rows=rows, total_bins=total_bins)
+
+
+def format_coverage_contribution(report: CoverageContribution) -> str:
+    """Render a :class:`CoverageContribution` as a terminal table.
+
+    Args:
+        report: Output of :func:`report_coverage_contribution`.
+
+    Returns:
+        Human-readable multiline string.
+    """
+    if not report.rows:
+        return "(no contribution data available — v2 history required)"
+
+    col_name = max((len(r[0]) for r in report.rows), default=10) + 2
+    lines: List[str] = []
+    header = f"{'Test':<{col_name}} {'Unique':>8} {'Total hits':>12}"
+    lines.append(header)
+    lines.append("-" * len(header))
+    for name, unique, hits in report.rows:
+        lines.append(f"{name:<{col_name}} {unique:>8} {hits:>12}")
+    lines.append(f"\nTotal bins in database: {report.total_bins}")
+    return "\n".join(lines)
+
+
+# ---------------------------------------------------------------------------
+# Report H — test budget by stage  (P2)
+# ---------------------------------------------------------------------------
+
+@dataclass
+class TestBudget:
+    """CPU time budget by stage.
+
+    Args:
+        rows: List of (stage, testpoint_name, mean_cpu_sec, total_runs)
+            sorted by stage rank then mean_cpu_sec descending.
+        stage_totals: Mapping of stage → total estimated CPU seconds.
+        missing_stats: Testpoint names for which no CPU stats are available.
+    """
+    rows: List[Tuple[str, str, float, int]]
+    stage_totals: Dict[str, float]
+    missing_stats: List[str]
+
+    def to_json(self) -> str:
+        d = {
+            "stage_totals": self.stage_totals,
+            "missing_stats": self.missing_stats,
+            "rows": [
+                {"stage": s, "testpoint": tp, "mean_cpu_sec": cpu, "total_runs": n}
+                for s, tp, cpu, n in self.rows
+            ],
+        }
+        return json.dumps(d, indent=2)
+
+
+def report_test_budget(testplan: Testplan, db) -> TestBudget:
+    """Estimate CPU-hour budget per stage from v2 test_stats mean CPU times.
+
+    For each testpoint the mean CPU time of all its mapped tests is summed.
+    Testpoints with no CPU stats are listed in ``missing_stats``.
+
+    Args:
+        testplan: The active :class:`~ucis.ncdb.testplan.Testplan`.
+        db: An open :class:`~ucis.ncdb.ncdb_ucis.NcdbUCIS` instance.
+
+    Returns:
+        :class:`TestBudget`.
+    """
+    rows: List[Tuple[str, str, float, int]] = []
+    stage_totals: Dict[str, float] = {}
+    missing_stats: List[str] = []
+
+    for tp in testplan.testpoints:
+        if tp.na or not tp.tests:
+            continue
+        total_cpu = 0.0
+        total_runs = 0
+        found = False
+        for test_name in tp.tests:
+            try:
+                stats = db.get_test_stats(test_name)
+                if stats and stats.total_runs > 0:
+                    total_cpu += stats.mean_cpu_time * stats.total_runs
+                    total_runs += stats.total_runs
+                    found = True
+            except Exception:
+                pass
+        mean_cpu = total_cpu / total_runs if total_runs else 0.0
+        if not found:
+            missing_stats.append(tp.name)
+        stage = tp.stage or "unknown"
+        rows.append((stage, tp.name, mean_cpu, total_runs))
+        stage_totals[stage] = stage_totals.get(stage, 0.0) + mean_cpu
+
+    rows.sort(key=lambda r: (_STAGE_ORDER.get(r[0], 999), -r[2]))
+    return TestBudget(rows=rows, stage_totals=stage_totals, missing_stats=missing_stats)
+
+
+def format_test_budget(report: TestBudget) -> str:
+    """Render a :class:`TestBudget` as a terminal table.
+
+    Args:
+        report: Output of :func:`report_test_budget`.
+
+    Returns:
+        Human-readable multiline string.
+    """
+    col_tp = max((len(r[1]) for r in report.rows), default=10) + 2
+    lines: List[str] = []
+    header = f"{'Stage':<6} {'Testpoint':<{col_tp}} {'Mean CPU':>10} {'Runs':>7}"
+    lines.append(header)
+    lines.append("-" * len(header))
+    prev_stage = None
+    for stage, tp, cpu, runs in report.rows:
+        if stage != prev_stage:
+            if prev_stage is not None:
+                total = report.stage_totals.get(prev_stage, 0.0)
+                lines.append(f"{'':6} {'Stage total':>{col_tp}} {total:>9.1f}s")
+                lines.append("")
+            prev_stage = stage
+        lines.append(f"{stage:<6} {tp:<{col_tp}} {cpu:>9.1f}s {runs:>7}")
+    if prev_stage:
+        total = report.stage_totals.get(prev_stage, 0.0)
+        lines.append(f"{'':6} {'Stage total':>{col_tp}} {total:>9.1f}s")
+    if report.missing_stats:
+        lines.append(f"\nNo CPU stats for: {', '.join(report.missing_stats)}")
+    return "\n".join(lines)
+
+
+# ---------------------------------------------------------------------------
+# Report L — safety traceability matrix  (P2)
+# ---------------------------------------------------------------------------
+
+@dataclass
+class SafetyMatrix:
+    """Requirement x testpoint traceability matrix with waiver flags.
+
+    Args:
+        rows: List of (req_id, req_desc, testpoint_name, status, waived).
+        untested_requirements: Requirement IDs with no linked testpoints.
+    """
+    rows: List[Tuple[str, str, str, str, bool]]
+    untested_requirements: List[str]
+
+    def to_csv(self) -> str:
+        """Render as CSV suitable for safety audits."""
+        lines = ["req_id,req_desc,testpoint,status,waived"]
+        for req_id, req_desc, tp_name, status, waived in self.rows:
+            lines.append(f"{req_id},{req_desc!r},{tp_name},{status},{waived}")
+        return "\n".join(lines)
+
+    def to_json(self) -> str:
+        d = {
+            "untested_requirements": self.untested_requirements,
+            "rows": [
+                {"req_id": rid, "req_desc": rdesc, "testpoint": tp,
+                 "status": st, "waived": w}
+                for rid, rdesc, tp, st, w in self.rows
+            ],
+        }
+        return json.dumps(d, indent=2)
+
+
+def report_safety_matrix(
+    results: List[TestpointResult],
+    waivers=None,
+) -> SafetyMatrix:
+    """Build a requirement to testpoint traceability matrix.
+
+    Args:
+        results: Output of :func:`~ucis.ncdb.testplan_closure.compute_closure`.
+        waivers: Optional :class:`~ucis.ncdb.waivers.WaiverSet`.
+
+    Returns:
+        :class:`SafetyMatrix`.
+    """
+    rows: List[Tuple[str, str, str, str, bool]] = []
+    seen_reqs: set = set()
+
+    for r in results:
+        reqs = r.testpoint.requirements
+        if reqs:
+            for req in reqs:
+                req_id = req.id if hasattr(req, "id") else str(req)
+                req_desc = req.desc if hasattr(req, "desc") else ""
+                seen_reqs.add(req_id)
+                waived = False
+                if waivers is not None:
+                    try:
+                        waived = waivers.matches_scope(r.testpoint.name, "")
+                    except Exception:
+                        pass
+                rows.append((req_id, req_desc, r.testpoint.name,
+                              _STATUS_LABEL[r.status], waived))
+        else:
+            rows.append(("—", "", r.testpoint.name, _STATUS_LABEL[r.status], False))
+
+    return SafetyMatrix(rows=rows, untested_requirements=[])
+
+
+def format_safety_matrix(report: SafetyMatrix) -> str:
+    """Render a :class:`SafetyMatrix` as a text table.
+
+    Args:
+        report: Output of :func:`report_safety_matrix`.
+
+    Returns:
+        Human-readable multiline string.
+    """
+    col_req = max((len(r[0]) for r in report.rows), default=6) + 2
+    col_tp  = max((len(r[2]) for r in report.rows), default=10) + 2
+    col_st  = 10
+    lines: List[str] = []
+    header = (f"{'Req ID':<{col_req}} {'Testpoint':<{col_tp}} "
+              f"{'Status':<{col_st}} {'Waived':>6}")
+    lines.append(header)
+    lines.append("-" * len(header))
+    for req_id, _, tp, status, waived in report.rows:
+        w_str = "YES" if waived else ""
+        lines.append(f"{req_id:<{col_req}} {tp:<{col_tp}} {status:<{col_st}} {w_str:>6}")
+    if report.untested_requirements:
+        lines.append(f"\nUntested: {', '.join(report.untested_requirements)}")
+    return "\n".join(lines)
+
+
+# ---------------------------------------------------------------------------
+# Report M — seed reliability heat-map  (P2)
+# ---------------------------------------------------------------------------
+
+@dataclass
+class SeedReliability:
+    """Per-seed pass/fail counts for a given test name.
+
+    Args:
+        test_name: The queried test name.
+        rows: List of (seed_id, pass_count, fail_count, flake_score) sorted
+            by fail_count descending.
+        total_seeds: Total unique seeds seen.
+    """
+    test_name: str
+    rows: List[Tuple[int, int, int, float]]
+    total_seeds: int
+
+    def to_json(self) -> str:
+        d = {
+            "test_name": self.test_name,
+            "total_seeds": self.total_seeds,
+            "rows": [{"seed": s, "pass": p, "fail": f, "flake": fl}
+                     for s, p, f, fl in self.rows],
+        }
+        return json.dumps(d, indent=2)
+
+
+def report_seed_reliability(db, test_name: str) -> SeedReliability:
+    """Compute per-seed pass/fail counts from v2 history buckets.
+
+    Args:
+        db: An open :class:`~ucis.ncdb.ncdb_ucis.NcdbUCIS` instance.
+        test_name: Name of the test to analyse.
+
+    Returns:
+        :class:`SeedReliability`.
+    """
+    from ucis.ncdb.constants import HIST_STATUS_OK
+
+    seed_pass: Dict[int, int] = {}
+    seed_fail: Dict[int, int] = {}
+
+    try:
+        records = db.query_test_history(test_name)
+        for rec in records:
+            sid = rec.seed_id
+            if rec.status == HIST_STATUS_OK:
+                seed_pass[sid] = seed_pass.get(sid, 0) + 1
+            else:
+                seed_fail[sid] = seed_fail.get(sid, 0) + 1
+    except Exception:
+        pass
+
+    all_seeds = set(seed_pass) | set(seed_fail)
+    rows: List[Tuple[int, int, int, float]] = []
+    for sid in all_seeds:
+        pc = seed_pass.get(sid, 0)
+        fc = seed_fail.get(sid, 0)
+        total = pc + fc
+        flake = _pct(min(pc, fc), total) / 100.0 if total > 0 else 0.0
+        rows.append((sid, pc, fc, flake))
+
+    rows.sort(key=lambda x: x[2], reverse=True)
+    return SeedReliability(test_name=test_name, rows=rows, total_seeds=len(all_seeds))
+
+
+def format_seed_reliability(report: SeedReliability) -> str:
+    """Render a :class:`SeedReliability` as a terminal heat-map table.
+
+    Args:
+        report: Output of :func:`report_seed_reliability`.
+
+    Returns:
+        Human-readable multiline string.
+    """
+    if not report.rows:
+        return f"No history found for test '{report.test_name}'"
+
+    lines: List[str] = [f"Seed reliability for '{report.test_name}':"]
+    header = f"{'Seed':>12} {'Pass':>7} {'Fail':>7} {'Flake':>7}"
+    lines.append(header)
+    lines.append("-" * len(header))
+    for sid, pc, fc, flake in report.rows:
+        flag = " ⚠" if flake >= 0.2 else ""
+        lines.append(f"{sid:>12} {pc:>7} {fc:>7} {flake:>7.3f}{flag}")
+    lines.append(f"\nTotal unique seeds: {report.total_seeds}")
+    return "\n".join(lines)
diff --git a/src/ucis/ncdb/squash_log.py b/src/ucis/ncdb/squash_log.py
new file mode 100644
index 0000000..fd1c631
--- /dev/null
+++ b/src/ucis/ncdb/squash_log.py
@@ -0,0 +1,126 @@
+"""
+squash_log.bin — append-only audit trail of squash operations.
+
+Each squash event is recorded permanently so coverage provenance can be
+reconstructed: "was counts.bin built from passing tests only?"
+
+Binary layout (little-endian)::
+
+    magic         u32   0x53514C47  ('SQLG')
+    version       u8    1
+    num_squashes  u32
+
+    entries[num_squashes]:
+      ts          u32   unix timestamp of squash
+      policy      u8    0=all  1=pass_only  2=exclude_error_and_rerun  3=strict
+      _pad        u8[3]
+      from_run    u32   first run_id included in squash
+      to_run      u32   new squash_watermark after this operation
+      num_runs    u32   total runs considered
+      pass_runs   u32   passing runs included in counts.bin contribution
+
+24 bytes per entry.
+"""
+
+from __future__ import annotations
+
+import struct
+from dataclasses import dataclass
+from typing import List
+
+MAGIC   = 0x53514C47   # 'SQLG'
+VERSION = 1
+
+_HDR   = struct.Struct("<IBI")          # magic, version, num_squashes
+_ENTRY = struct.Struct("<IB3xIIII")     # ts, policy, pad(3), from_run, to_run, num_runs, pass_runs
+assert _ENTRY.size == 24  # 4+1+3+4+4+4+4 = 24
+
+
+@dataclass
+class SquashLogEntry:
+    """One recorded squash operation."""
+    ts:        int   # unix timestamp
+    policy:    int   # POLICY_* constant from contrib_index
+    from_run:  int   # first run_id included
+    to_run:    int   # new squash_watermark
+    num_runs:  int   # total runs considered
+    pass_runs: int   # passing runs contributing to counts.bin
+
+
+class SquashLog:
+    """In-memory representation of ``squash_log.bin``.
+
+    Entries are append-only — never modified or deleted after writing.
+
+    Example::
+
+        log = SquashLog()
+        log.append(ts=1700000000, policy=1, from_run=0, to_run=99,
+                   num_runs=100, pass_runs=95)
+        data = log.serialize()
+        log2 = SquashLog.deserialize(data)
+    """
+
+    def __init__(self) -> None:
+        self._entries: List[SquashLogEntry] = []
+
+    def append(self, ts: int, policy: int, from_run: int, to_run: int,
+               num_runs: int, pass_runs: int) -> None:
+        """Record a squash event.
+
+        Args:
+            ts:        Unix timestamp of the squash.
+            policy:    Merge policy applied (POLICY_* from contrib_index).
+            from_run:  First run_id included in this squash.
+            to_run:    New squash_watermark after this operation.
+            num_runs:  Total run_ids considered during squash.
+            pass_runs: Number of passing runs whose contrib was included.
+        """
+        self._entries.append(SquashLogEntry(
+            ts=ts, policy=policy, from_run=from_run, to_run=to_run,
+            num_runs=num_runs, pass_runs=pass_runs,
+        ))
+
+    def entries(self) -> List[SquashLogEntry]:
+        """Return all squash log entries in chronological order."""
+        return list(self._entries)
+
+    @property
+    def num_squashes(self) -> int:
+        return len(self._entries)
+
+    # ── serialization ─────────────────────────────────────────────────────
+
+    def serialize(self) -> bytes:
+        """Encode the log to bytes for storage in the ZIP archive."""
+        header = _HDR.pack(MAGIC, VERSION, len(self._entries))
+        rows = b""
+        for e in self._entries:
+            rows += _ENTRY.pack(e.ts, e.policy, e.from_run, e.to_run,
+                                e.num_runs, e.pass_runs)
+        return header + rows
+
+    @classmethod
+    def deserialize(cls, data: bytes) -> "SquashLog":
+        """Reconstruct a SquashLog from raw bytes.
+
+        Raises:
+            ValueError: if magic or version is wrong.
+        """
+        magic, version, num_squashes = _HDR.unpack_from(data, 0)
+        if magic != MAGIC:
+            raise ValueError(f"Bad magic 0x{magic:08X}, expected 0x{MAGIC:08X}")
+        if version != VERSION:
+            raise ValueError(f"Unsupported squash_log version {version}")
+
+        log = cls()
+        offset = _HDR.size
+        for _ in range(num_squashes):
+            ts, policy, from_run, to_run, num_runs, pass_runs = \
+                _ENTRY.unpack_from(data, offset)
+            offset += _ENTRY.size
+            log._entries.append(SquashLogEntry(
+                ts=ts, policy=policy, from_run=from_run, to_run=to_run,
+                num_runs=num_runs, pass_runs=pass_runs,
+            ))
+        return log
diff --git a/src/ucis/ncdb/test_registry.py b/src/ucis/ncdb/test_registry.py
new file mode 100644
index 0000000..0a9003b
--- /dev/null
+++ b/src/ucis/ncdb/test_registry.py
@@ -0,0 +1,217 @@
+"""
+test_registry.bin — global test-name / seed-id / run-id registry.
+
+Stores each unique test base-name and seed string exactly once and assigns
+stable integer IDs that survive ZIP rewrites and merges.  Also holds the
+global monotonically-increasing ``run_id`` counter.
+
+Binary layout (little-endian)::
+
+    magic            u32   0x54535452  ('TSTR')
+    version          u8    1
+    next_run_id      u32
+    num_names        u32
+    num_seeds        u32
+    name_offsets     u32[num_names]    byte offsets into name_heap
+    seed_offsets     u32[num_seeds]    byte offsets into seed_heap
+    name_heap        bytes             null-terminated UTF-8, in name_id order
+    seed_heap        bytes             null-terminated UTF-8, in seed_id order
+
+name_ids are assigned by **insertion order** (first seen = id 0) and are
+stable: inserting a new name never shifts existing name_ids.  This ensures
+that all bucket files and stats entries remain valid across incremental updates.
+Seeds behave identically.
+"""
+
+from __future__ import annotations
+
+import struct
+from typing import List, Optional
+
+MAGIC   = 0x54535452   # 'TSTR'
+VERSION = 1
+
+_HDR = struct.Struct("<IBIII")   # magic, version, next_run_id, num_names, num_seeds
+
+
+class TestRegistry:
+    """In-memory representation of ``test_registry.bin``.
+
+    name_ids and seed_ids are assigned by **insertion order** and are stable:
+    adding a new name never changes the id of an existing name.
+
+    Args:
+        next_run_id: Starting value for the run-id counter (default 0).
+
+    Example::
+
+        reg = TestRegistry()
+        name_id = reg.lookup_name_id("uart_smoke")
+        seed_id = reg.lookup_seed_id("12345")
+        run_id  = reg.assign_run_id()
+        data    = reg.serialize()
+        reg2    = TestRegistry.deserialize(data)
+    """
+
+    def __init__(self, next_run_id: int = 0) -> None:
+        self._next_run_id: int = next_run_id
+        # insertion-order list of name strings — index == name_id (STABLE)
+        self._names: List[str] = []
+        # insertion-order list of seed strings — index == seed_id
+        self._seeds: List[str] = []
+        # fast reverse-lookup dicts
+        self._name_to_id: dict = {}
+        self._seed_to_id: dict = {}
+
+    # ── run-id ──────────────────────────────────────────────────────────────
+
+    def assign_run_id(self) -> int:
+        """Return the next run_id and advance the counter."""
+        rid = self._next_run_id
+        self._next_run_id += 1
+        return rid
+
+    @property
+    def next_run_id(self) -> int:
+        return self._next_run_id
+
+    # ── name_id ─────────────────────────────────────────────────────────────
+
+    def lookup_name_id(self, name: str) -> int:
+        """Return the name_id for *name*, assigning one if this is a new name.
+
+        name_ids are assigned by insertion order and are stable — inserting a
+        new name never changes the id of any existing name.
+        """
+        if name in self._name_to_id:
+            return self._name_to_id[name]
+        nid = len(self._names)
+        self._names.append(name)
+        self._name_to_id[name] = nid
+        return nid
+
+    def name_for_id(self, name_id: int) -> str:
+        """Return the name string for *name_id*.
+
+        Raises:
+            IndexError: if *name_id* is out of range.
+        """
+        return self._names[name_id]
+
+    @property
+    def num_names(self) -> int:
+        return len(self._names)
+
+    # ── seed_id ─────────────────────────────────────────────────────────────
+
+    def lookup_seed_id(self, seed: str) -> int:
+        """Return the seed_id for *seed*, assigning one if this is a new seed.
+
+        Integer seeds should be passed as their decimal string representation.
+        """
+        if seed in self._seed_to_id:
+            return self._seed_to_id[seed]
+        sid = len(self._seeds)
+        self._seeds.append(seed)
+        self._seed_to_id[seed] = sid
+        return sid
+
+    def seed_for_id(self, seed_id: int) -> str:
+        """Return the seed string for *seed_id*.
+
+        Raises:
+            IndexError: if *seed_id* is out of range.
+        """
+        return self._seeds[seed_id]
+
+    @property
+    def num_seeds(self) -> int:
+        return len(self._seeds)
+
+    # ── serialization ───────────────────────────────────────────────────────
+
+    def serialize(self) -> bytes:
+        """Encode the registry to bytes for storage in the ZIP archive."""
+        # Build string heaps
+        name_heap = b"".join(n.encode() + b"\x00" for n in self._names)
+        seed_heap = b"".join(s.encode() + b"\x00" for s in self._seeds)
+
+        # Compute offset tables
+        name_offsets: List[int] = []
+        off = 0
+        for n in self._names:
+            name_offsets.append(off)
+            off += len(n.encode()) + 1
+
+        seed_offsets: List[int] = []
+        off = 0
+        for s in self._seeds:
+            seed_offsets.append(off)
+            off += len(s.encode()) + 1
+
+        header = _HDR.pack(MAGIC, VERSION, self._next_run_id,
+                           len(self._names), len(self._seeds))
+        offsets = (
+            struct.pack(f"<{len(name_offsets)}I", *name_offsets)
+            if name_offsets else b""
+        )
+        offsets += (
+            struct.pack(f"<{len(seed_offsets)}I", *seed_offsets)
+            if seed_offsets else b""
+        )
+        return header + offsets + name_heap + seed_heap
+
+    @classmethod
+    def deserialize(cls, data: bytes) -> "TestRegistry":
+        """Reconstruct a TestRegistry from raw bytes.
+
+        Args:
+            data: Bytes previously produced by :meth:`serialize`.
+
+        Returns:
+            A fully populated TestRegistry instance.
+
+        Raises:
+            ValueError: if the magic number or version is wrong.
+        """
+        magic, version, next_run_id, num_names, num_seeds = _HDR.unpack_from(data, 0)
+        if magic != MAGIC:
+            raise ValueError(f"Bad magic 0x{magic:08X}, expected 0x{MAGIC:08X}")
+        if version != VERSION:
+            raise ValueError(f"Unsupported test_registry version {version}")
+
+        offset = _HDR.size
+
+        # Offset tables
+        name_offsets = list(struct.unpack_from(f"<{num_names}I", data, offset))
+        offset += 4 * num_names
+        seed_offsets = list(struct.unpack_from(f"<{num_seeds}I", data, offset))
+        offset += 4 * num_seeds
+
+        heap_start = offset
+
+        def _read_string(heap_base: int, str_offset: int) -> str:
+            start = heap_base + str_offset
+            end = data.index(b"\x00", start)
+            return data[start:end].decode()
+
+        # Build name and seed heaps — sizes needed to find seed heap base
+        name_heap_size = 0
+        names = []
+        for i in range(num_names):
+            s = _read_string(heap_start, name_offsets[i])
+            names.append(s)
+            name_heap_size += len(s.encode()) + 1
+
+        seed_heap_base = heap_start + name_heap_size
+        seeds = []
+        for i in range(num_seeds):
+            seeds.append(_read_string(seed_heap_base, seed_offsets[i]))
+
+        reg = cls(next_run_id=next_run_id)
+        # Restore directly — names are in name_id (insertion) order
+        reg._names = names
+        reg._name_to_id = {n: i for i, n in enumerate(names)}
+        reg._seeds = seeds
+        reg._seed_to_id = {s: i for i, s in enumerate(seeds)}
+        return reg
diff --git a/src/ucis/ncdb/test_stats.py b/src/ucis/ncdb/test_stats.py
new file mode 100644
index 0000000..a0d9dd8
--- /dev/null
+++ b/src/ucis/ncdb/test_stats.py
@@ -0,0 +1,316 @@
+"""
+test_stats.bin — per-test aggregate metrics table.
+
+One fixed-size 64-byte record per unique test, indexed by ``name_id`` from
+``test_registry.bin``.  All fields are maintained incrementally — O(1) update
+per new test run — so aggregate queries (top flaky tests, fail rate, etc.)
+need only this file, never the per-bucket records.
+
+Binary layout of the file header (little-endian)::
+
+    magic       u32   0x54535441  ('TSTA')
+    version     u8    1
+    num_tests   u32
+
+Followed by ``num_tests`` 64-byte entries (indexed by name_id):
+
+    total_runs          u32
+    pass_count          u32
+    fail_count          u32
+    error_count         u32
+    first_ts            u32   unix timestamp of first ever run
+    last_ts             u32   unix timestamp of most recent run
+    last_green_ts       u32   unix timestamp of last passing run
+    transition_count    u32   consecutive status changes (flake signal)
+    streak              i16   positive = consecutive passes, negative = fails
+    last_status         u8    most recent run status (HIST_STATUS_*)
+    _pad                u8
+    flake_score         f32   transition_count / max(total_runs-1, 1)  ∈ [0,1]
+    fail_rate           f32   fail_count / total_runs                  ∈ [0,1]
+    mean_cpu_time       f32   Welford online mean (seconds)
+    m2_cpu_time         f32   Welford M2 accumulator
+    cusum_value         f32   running CUSUM statistic
+    cusum_ref_mean      f32   μ₀ used for CUSUM
+    grade_score         f32   composite effectiveness score [0,1]
+    total_seeds_seen    u16   unique seeds ever seen for this test
+    _reserved           u8[6]
+
+Total: 72 bytes per entry.
+"""
+
+from __future__ import annotations
+
+import math
+import struct
+import time
+from dataclasses import dataclass, field
+from typing import Dict, List, Optional, Sequence
+
+from ucis.ncdb.constants import (
+    HIST_STATUS_FAIL, HIST_STATUS_OK,
+)
+
+MAGIC   = 0x54535441   # 'TSTA'
+VERSION = 1
+
+# CUSUM parameters
+_CUSUM_K = 0.5   # allowance (half the detectable shift in σ units)
+_CUSUM_H = 4.0   # decision threshold (triggers change-point detection)
+
+_HDR   = struct.Struct("<IBI")      # magic, version, num_tests
+_ENTRY = struct.Struct("<8I hBB 7f H6s")
+# Field breakdown: 8×u32=32, i16+u8+u8=4, 7×f32=28, u16=2, 6s=6  → 72 bytes per entry
+assert _ENTRY.size == 72, f"Entry size is {_ENTRY.size}, expected 72"
+
+
+@dataclass
+class TestStatsEntry:
+    """Aggregate metrics for one test (one name_id)."""
+
+    name_id:          int   = 0
+    total_runs:       int   = 0
+    pass_count:       int   = 0
+    fail_count:       int   = 0
+    error_count:      int   = 0
+    first_ts:         int   = 0
+    last_ts:          int   = 0
+    last_green_ts:    int   = 0
+    transition_count: int   = 0
+    streak:           int   = 0   # signed: + = passes, - = fails
+    last_status:      int   = HIST_STATUS_OK
+    flake_score:      float = 0.0
+    fail_rate:        float = 0.0
+    mean_cpu_time:    float = 0.0
+    m2_cpu_time:      float = 0.0
+    cusum_value:      float = 0.0
+    cusum_ref_mean:   float = 0.0
+    grade_score:      float = 0.0
+    total_seeds_seen: int   = 0
+    _known_seeds:     set   = field(default_factory=set, repr=False, compare=False)
+
+    # ── Derived properties ───────────────────────────────────────────────
+
+    @property
+    def stddev_cpu_time(self) -> float:
+        """Standard deviation of CPU time from Welford M2 accumulator."""
+        if self.total_runs < 2:
+            return 0.0
+        return math.sqrt(self.m2_cpu_time / self.total_runs)
+
+    def days_since_last_pass(self, now: Optional[int] = None) -> float:
+        if self.last_green_ts == 0:
+            return float("inf")
+        t = now if now is not None else int(time.time())
+        return (t - self.last_green_ts) / 86400.0
+
+    def is_broken(self) -> bool:
+        """Definitively broken: streak ≤ -5 and flake_score < 0.1."""
+        return self.streak <= -5 and self.flake_score < 0.1
+
+    def is_flaky(self) -> bool:
+        """Likely flaky: abs(streak) < 3 and flake_score > 0.3."""
+        return abs(self.streak) < 3 and self.flake_score > 0.3
+
+
+class TestStatsTable:
+    """In-memory representation of ``test_stats.bin``.
+
+    Entries are indexed by ``name_id``; new entries are appended automatically
+    when :meth:`update` is called with a previously unseen *name_id*.
+
+    Example::
+
+        tbl = TestStatsTable()
+        tbl.update(name_id=0, status=HIST_STATUS_OK, ts=1700000000)
+        tbl.update(name_id=0, status=HIST_STATUS_FAIL, ts=1700086400)
+        print(tbl.get(0).flake_score)   # 1.0 (alternates every run)
+
+        data = tbl.serialize()
+        tbl2 = TestStatsTable.deserialize(data)
+    """
+
+    def __init__(self) -> None:
+        self._entries: List[TestStatsEntry] = []
+
+    # ── public API ───────────────────────────────────────────────────────────
+
+    def update(self, name_id: int, status: int, ts: int,
+               cpu_time: Optional[float] = None,
+               seed_id: Optional[int] = None) -> None:
+        """Incorporate one new test run into the aggregate statistics.
+
+        Args:
+            name_id:  Integer name_id from TestRegistry.
+            status:   HIST_STATUS_* constant.
+            ts:       Unix timestamp of this run.
+            cpu_time: Wall/CPU time in seconds (optional).
+            seed_id:  seed_id from TestRegistry (optional, for seed diversity).
+        """
+        self._ensure(name_id)
+        e = self._entries[name_id]
+
+        e.total_runs += 1
+        if e.total_runs == 1:
+            e.first_ts = ts
+            e.cusum_ref_mean = 1.0 if status == HIST_STATUS_FAIL else 0.0
+
+        e.last_ts = max(e.last_ts, ts)
+
+        if status == HIST_STATUS_OK:
+            e.pass_count += 1
+            e.last_green_ts = max(e.last_green_ts, ts)
+        elif status == HIST_STATUS_FAIL:
+            e.fail_count += 1
+        else:
+            e.error_count += 1
+
+        # Streak tracking
+        if status == HIST_STATUS_OK:
+            e.streak = e.streak + 1 if e.streak >= 0 else 1
+        else:
+            e.streak = e.streak - 1 if e.streak <= 0 else -1
+
+        # Transition count (flake signal)
+        if e.total_runs > 1 and status != e.last_status:
+            e.transition_count += 1
+
+        e.last_status = status
+
+        # Derived rates
+        e.flake_score = e.transition_count / max(e.total_runs - 1, 1)
+        e.fail_rate   = e.fail_count / e.total_runs
+
+        # Welford online mean/variance for cpu_time
+        if cpu_time is not None:
+            delta = cpu_time - e.mean_cpu_time
+            e.mean_cpu_time += delta / e.total_runs
+            e.m2_cpu_time   += delta * (cpu_time - e.mean_cpu_time)
+
+        # CUSUM update
+        x = 1.0 if status == HIST_STATUS_FAIL else 0.0
+        e.cusum_value = max(0.0, e.cusum_value + x - (e.cusum_ref_mean + _CUSUM_K))
+        # Change-point detected: reset (caller may log the timestamp)
+        if e.cusum_value > _CUSUM_H:
+            e.cusum_value = 0.0
+
+        # Seed diversity
+        if seed_id is not None:
+            e._known_seeds.add(seed_id)
+            e.total_seeds_seen = len(e._known_seeds)
+
+        # Composite grade: (pass_rate) × (stability) × (speed_factor)
+        pass_rate   = e.pass_count / e.total_runs
+        stability   = 1.0 - e.flake_score
+        # Speed factor: normalize by mean_cpu_time capped at 3600 s
+        if e.mean_cpu_time > 0:
+            speed = max(0.0, 1.0 - e.mean_cpu_time / 3600.0)
+        else:
+            speed = 1.0
+        e.grade_score = pass_rate * stability * speed
+
+    def get(self, name_id: int) -> Optional[TestStatsEntry]:
+        """Return the entry for *name_id*, or None if not present."""
+        if name_id < len(self._entries):
+            return self._entries[name_id]
+        return None
+
+    def top_flaky(self, n: int = 20) -> List[TestStatsEntry]:
+        """Return the top-*n* entries sorted by ``flake_score`` descending."""
+        return sorted(self._entries, key=lambda e: e.flake_score, reverse=True)[:n]
+
+    def top_failing(self, n: int = 20,
+                    flake_threshold: float = 0.1) -> List[TestStatsEntry]:
+        """Return the top-*n* consistently-failing tests.
+
+        Filters to entries with ``fail_rate > 0`` and
+        ``flake_score < flake_threshold`` (distinguishes broken from flaky).
+        """
+        candidates = [e for e in self._entries
+                      if e.fail_rate > 0 and e.flake_score < flake_threshold]
+        return sorted(candidates, key=lambda e: e.fail_rate, reverse=True)[:n]
+
+    @property
+    def num_tests(self) -> int:
+        return len(self._entries)
+
+    # ── serialization ────────────────────────────────────────────────────────
+
+    def serialize(self) -> bytes:
+        """Encode the table to bytes for storage in the ZIP archive."""
+        header = _HDR.pack(MAGIC, VERSION, len(self._entries))
+        rows = b""
+        for e in self._entries:
+            rows += _ENTRY.pack(
+                e.total_runs, e.pass_count, e.fail_count, e.error_count,
+                e.first_ts, e.last_ts, e.last_green_ts, e.transition_count,
+                e.streak, e.last_status, 0,            # _pad
+                e.flake_score, e.fail_rate,
+                e.mean_cpu_time, e.m2_cpu_time,
+                e.cusum_value, e.cusum_ref_mean,
+                e.grade_score,
+                e.total_seeds_seen,
+                b"\x00" * 6,                           # _reserved
+            )
+        return header + rows
+
+    @classmethod
+    def deserialize(cls, data: bytes) -> "TestStatsTable":
+        """Reconstruct a TestStatsTable from raw bytes.
+
+        Args:
+            data: Bytes previously produced by :meth:`serialize`.
+
+        Raises:
+            ValueError: if the magic number or version is wrong.
+        """
+        magic, version, num_tests = _HDR.unpack_from(data, 0)
+        if magic != MAGIC:
+            raise ValueError(f"Bad magic 0x{magic:08X}, expected 0x{MAGIC:08X}")
+        if version != VERSION:
+            raise ValueError(f"Unsupported test_stats version {version}")
+
+        tbl = cls()
+        offset = _HDR.size
+        for name_id in range(num_tests):
+            fields = _ENTRY.unpack_from(data, offset)
+            offset += _ENTRY.size
+            (total_runs, pass_count, fail_count, error_count,
+             first_ts, last_ts, last_green_ts, transition_count,
+             streak, last_status, _pad,
+             flake_score, fail_rate,
+             mean_cpu_time, m2_cpu_time,
+             cusum_value, cusum_ref_mean,
+             grade_score,
+             total_seeds_seen,
+             _reserved) = fields
+
+            e = TestStatsEntry(
+                name_id=name_id,
+                total_runs=total_runs,
+                pass_count=pass_count,
+                fail_count=fail_count,
+                error_count=error_count,
+                first_ts=first_ts,
+                last_ts=last_ts,
+                last_green_ts=last_green_ts,
+                transition_count=transition_count,
+                streak=streak,
+                last_status=last_status,
+                flake_score=flake_score,
+                fail_rate=fail_rate,
+                mean_cpu_time=mean_cpu_time,
+                m2_cpu_time=m2_cpu_time,
+                cusum_value=cusum_value,
+                cusum_ref_mean=cusum_ref_mean,
+                grade_score=grade_score,
+                total_seeds_seen=total_seeds_seen,
+            )
+            tbl._entries.append(e)
+        return tbl
+
+    # ── internal ─────────────────────────────────────────────────────────────
+
+    def _ensure(self, name_id: int) -> None:
+        while len(self._entries) <= name_id:
+            nid = len(self._entries)
+            self._entries.append(TestStatsEntry(name_id=nid))
diff --git a/src/ucis/ncdb/testplan.py b/src/ucis/ncdb/testplan.py
new file mode 100644
index 0000000..553f306
--- /dev/null
+++ b/src/ucis/ncdb/testplan.py
@@ -0,0 +1,252 @@
+"""
+src/ucis/ncdb/testplan.py — Testplan data model for NCDB.
+
+A ``Testplan`` describes the structured set of verification tasks (testpoints)
+and functional-coverage groups expected for a design.  It may be embedded
+inside a ``.cdb`` file as ``testplan.json`` (Mode A) or kept as a standalone
+file (Mode B).  Either way the same ``Testplan`` object is used.
+"""
+from __future__ import annotations
+
+import json
+import re
+from dataclasses import dataclass, field
+from datetime import datetime, timezone
+from typing import List, Optional
+
+
+# ── leaf types ────────────────────────────────────────────────────────────────
+
+@dataclass
+class RequirementLink:
+    """Reference to an external requirement item (e.g. ALM/JIRA)."""
+    system:  str = ""   # e.g. "ALM", "JIRA"
+    project: str = ""   # e.g. "PROJ-X"
+    item_id: str = ""   # e.g. "REQ-42"
+    url:     str = ""   # optional direct URL
+
+
+@dataclass
+class CovergroupEntry:
+    """One functional-coverage group expected to be exercised by the design."""
+    name: str
+    desc: str = ""
+
+
+@dataclass
+class Testpoint:
+    """One verification task (maps to one or more test names)."""
+    name:            str
+    stage:           str              # "V1" | "V2" | "V2S" | "V3" | custom
+    desc:            str = ""
+    tests:           List[str] = field(default_factory=list)
+    tags:            List[str] = field(default_factory=list)
+    na:              bool = False     # tests: ["N/A"] — intentionally unmapped
+    source_template: str = ""         # original wildcard template before expansion
+    requirements:    List[RequirementLink] = field(default_factory=list)
+
+
+# ── main class ────────────────────────────────────────────────────────────────
+
+@dataclass
+class Testplan:
+    """Structured verification testplan.
+
+    Attributes:
+        format_version:   Schema version (currently 1).
+        source_file:      Path to the source .hjson (informational only).
+        import_timestamp: ISO-8601 UTC timestamp set when embedded in a .cdb.
+        testpoints:       Ordered list of :class:`Testpoint` objects.
+        covergroups:      Ordered list of :class:`CovergroupEntry` objects.
+    """
+    format_version:   int = 1
+    source_file:      str = ""
+    import_timestamp: str = ""
+
+    testpoints:  List[Testpoint]      = field(default_factory=list)
+    covergroups: List[CovergroupEntry] = field(default_factory=list)
+
+    # ── in-memory indices (built lazily) ──────────────────────────────────
+    _tp_by_name: dict = field(default_factory=dict, repr=False, compare=False)
+    _tp_by_test: dict = field(default_factory=dict, repr=False, compare=False)
+    _indexed:    bool = field(default=False,        repr=False, compare=False)
+
+    # ── index building ────────────────────────────────────────────────────
+
+    def _build_indices(self) -> None:
+        self._tp_by_name.clear()
+        self._tp_by_test.clear()
+        for tp in self.testpoints:
+            self._tp_by_name[tp.name] = tp
+            for t in tp.tests:
+                self._tp_by_test[t] = tp
+        self._indexed = True
+
+    def _ensure_indexed(self) -> None:
+        if not self._indexed:
+            self._build_indices()
+
+    def _invalidate_index(self) -> None:
+        self._indexed = False
+
+    # ── public query API ──────────────────────────────────────────────────
+
+    def getTestpoint(self, name: str) -> Optional[Testpoint]:
+        """Return the testpoint with *name*, or ``None``."""
+        self._ensure_indexed()
+        return self._tp_by_name.get(name)
+
+    def testpointForTest(self, test_name: str) -> Optional[Testpoint]:
+        """Return the testpoint that owns *test_name*.
+
+        Match order:
+
+        1. **Exact** — ``test_name`` appears literally in ``testpoint.tests``.
+        2. **Seed-suffix strip** — strip a trailing ``_\\d+`` (e.g.
+           ``uart_smoke_42`` → ``uart_smoke``) and retry exact match.
+        3. **Wildcard** — any ``testpoint.tests`` entry ending in ``_*``
+           whose prefix matches ``test_name``.
+
+        Returns ``None`` if no testpoint matches.
+        """
+        self._ensure_indexed()
+        tp = self._tp_by_test.get(test_name)
+        if tp is not None:
+            return tp
+        stripped = re.sub(r'_\d+$', '', test_name)
+        if stripped != test_name:
+            tp = self._tp_by_test.get(stripped)
+            if tp is not None:
+                return tp
+        for pattern, candidate in self._tp_by_test.items():
+            if pattern.endswith('_*') and test_name.startswith(pattern[:-1]):
+                return candidate
+        return None
+
+    def testpointsForStage(self, stage: str) -> List[Testpoint]:
+        """Return all testpoints targeting *stage* (e.g. ``"V2"``)."""
+        return [tp for tp in self.testpoints if tp.stage == stage]
+
+    def stages(self) -> List[str]:
+        """Return the ordered unique stages present in the testplan."""
+        _ORDER = {"V1": 0, "V2": 1, "V2S": 2, "V3": 3}
+        seen = dict.fromkeys(tp.stage for tp in self.testpoints)
+        return sorted(seen, key=lambda s: _ORDER.get(s, 99))
+
+    def add_testpoint(self, tp: Testpoint) -> None:
+        """Append *tp* and invalidate the lookup indices."""
+        self.testpoints.append(tp)
+        self._invalidate_index()
+
+    # ── serialization ─────────────────────────────────────────────────────
+
+    def to_dict(self) -> dict:
+        """Return a JSON-serialisable dict representation."""
+        return {
+            "format_version":   self.format_version,
+            "source_file":      self.source_file,
+            "import_timestamp": self.import_timestamp,
+            "testpoints": [
+                {
+                    "name":            tp.name,
+                    "stage":           tp.stage,
+                    "desc":            tp.desc,
+                    "tests":           tp.tests,
+                    "tags":            tp.tags,
+                    "na":              tp.na,
+                    "source_template": tp.source_template,
+                    "requirements":    [
+                        {"system": r.system, "project": r.project,
+                         "item_id": r.item_id, "url": r.url}
+                        for r in tp.requirements
+                    ],
+                }
+                for tp in self.testpoints
+            ],
+            "covergroups": [
+                {"name": cg.name, "desc": cg.desc}
+                for cg in self.covergroups
+            ],
+        }
+
+    def serialize(self) -> bytes:
+        """Serialise to compact JSON bytes (for ZIP embedding)."""
+        return json.dumps(self.to_dict(), separators=(',', ':')).encode()
+
+    @classmethod
+    def from_dict(cls, d: dict) -> "Testplan":
+        """Reconstruct a :class:`Testplan` from a plain dict."""
+        obj = cls(
+            format_version=d.get("format_version", 1),
+            source_file=d.get("source_file", ""),
+            import_timestamp=d.get("import_timestamp", ""),
+        )
+        for rec in d.get("testpoints", []):
+            reqs = [
+                RequirementLink(
+                    system=r.get("system", ""), project=r.get("project", ""),
+                    item_id=r.get("item_id", ""), url=r.get("url", ""),
+                )
+                for r in rec.get("requirements", [])
+            ]
+            obj.testpoints.append(Testpoint(
+                name=rec["name"],
+                stage=rec.get("stage", ""),
+                desc=rec.get("desc", ""),
+                tests=rec.get("tests", []),
+                tags=rec.get("tags", []),
+                na=rec.get("na", False),
+                source_template=rec.get("source_template", ""),
+                requirements=reqs,
+            ))
+        for rec in d.get("covergroups", []):
+            obj.covergroups.append(CovergroupEntry(
+                name=rec["name"], desc=rec.get("desc", ""),
+            ))
+        return obj
+
+    @classmethod
+    def from_bytes(cls, data: bytes) -> "Testplan":
+        """Reconstruct from JSON bytes (inverse of :meth:`serialize`)."""
+        return cls.from_dict(json.loads(data.decode()))
+
+    @classmethod
+    def load(cls, path: str) -> "Testplan":
+        """Load a testplan from a standalone JSON/hjson file (Mode B)."""
+        with open(path, "rb") as f:
+            return cls.from_bytes(f.read())
+
+    def save(self, path: str) -> None:
+        """Write this testplan to a standalone JSON file (Mode B)."""
+        with open(path, "wb") as f:
+            f.write(self.serialize())
+
+    def stamp_import_time(self) -> None:
+        """Set :attr:`import_timestamp` to the current UTC time."""
+        self.import_timestamp = datetime.now(timezone.utc).isoformat()
+
+
+# ── module-level helpers ──────────────────────────────────────────────────────
+
+def get_testplan(db) -> Optional[Testplan]:
+    """Retrieve testplan from any UCIS db object (NcdbUCIS or MemUCIS).
+
+    Works with any object that has a ``getTestplan()`` method
+    (e.g. :class:`~ucis.ncdb.ncdb_ucis.NcdbUCIS`) or a ``_testplan``
+    attribute (e.g. a :class:`~ucis.mem.mem_ucis.MemUCIS` returned by
+    :class:`~ucis.ncdb.ncdb_reader.NcdbReader`).
+    """
+    if hasattr(db, "getTestplan"):
+        return db.getTestplan()
+    return getattr(db, "_testplan", None)
+
+
+def set_testplan(db, tp: Testplan) -> None:
+    """Embed *tp* into *db*.
+
+    Works with any object that has a ``setTestplan()`` method.
+    """
+    if hasattr(db, "setTestplan"):
+        db.setTestplan(tp)
+    else:
+        raise TypeError(f"{type(db).__name__} does not support setTestplan()")
diff --git a/src/ucis/ncdb/testplan_closure.py b/src/ucis/ncdb/testplan_closure.py
new file mode 100644
index 0000000..0e3f22f
--- /dev/null
+++ b/src/ucis/ncdb/testplan_closure.py
@@ -0,0 +1,190 @@
+"""
+src/ucis/ncdb/testplan_closure.py — Testpoint closure computation.
+
+Given a :class:`~ucis.ncdb.testplan.Testplan` and a UCIS database this module
+computes the pass/fail *closure* status for each testpoint and evaluates
+stage-level gate conditions.
+"""
+from __future__ import annotations
+
+import re
+from dataclasses import dataclass, field
+from enum import Enum
+from typing import Dict, List, Optional
+
+from .testplan import Testplan, Testpoint
+
+
+class TPStatus(Enum):
+    """Closure status of one testpoint."""
+    CLOSED        = "CLOSED"         # all mapped tests passed
+    PARTIAL       = "PARTIAL"        # some passed, some failed
+    FAILING       = "FAILING"        # all mapped tests ran and failed
+    NOT_RUN       = "NOT_RUN"        # none of the mapped tests appear in the DB
+    NA            = "N/A"            # testpoint intentionally unmapped (na=True)
+    UNIMPLEMENTED = "UNIMPLEMENTED"  # tests list is empty
+
+
+# Standard stage ordering for gate evaluation
+_STAGE_ORDER = {"V1": 0, "V2": 1, "V2S": 2, "V3": 3}
+
+
+@dataclass
+class TestpointResult:
+    """Closure result for one testpoint."""
+    testpoint:     Testpoint
+    status:        TPStatus
+    matched_tests: List[str]
+    pass_count:    int = 0
+    fail_count:    int = 0
+
+
+def compute_closure(testplan: Testplan, db,
+                    waivers=None) -> List[TestpointResult]:
+    """Compute pass/fail closure for every testpoint against *db*.
+
+    Args:
+        testplan: The testplan to evaluate.
+        db:       Any UCIS database object (must expose ``historyNodes()``).
+        waivers:  Optional :class:`~ucis.ncdb.waivers.WaiverSet`; reserved
+                  for future use (currently ignored).
+
+    Returns:
+        One :class:`TestpointResult` per testpoint, in testplan order.
+    """
+    # Build test-name → (pass_count, fail_count) from test history if available
+    test_pass: Dict[str, int] = {}
+    test_fail: Dict[str, int] = {}
+
+    # Trigger lazy v2 history load for NcdbUCIS
+    if hasattr(db, '_ensure_v2_history'):
+        db._ensure_v2_history()
+
+    # Try v2 binary history first (NcdbUCIS or MemUCIS with _test_registry attached)
+    if getattr(db, '_test_registry', None) is not None:
+        reg = db._test_registry
+        stats = db._test_stats
+        for nid, name in enumerate(reg._names):
+            entry = stats.get(nid)
+            if entry is not None:
+                test_pass[name] = entry.pass_count
+                test_fail[name] = entry.fail_count
+    else:
+        # Fall back to UCIS history nodes
+        try:
+            from ucis.history_node_kind import HistoryNodeKind
+            for node in db.historyNodes(HistoryNodeKind.TEST):
+                name = node.getLogicalName()
+                try:
+                    from ucis.test_status_t import TestStatusT
+                    if node.getTestStatus() == TestStatusT.OK:
+                        test_pass[name] = test_pass.get(name, 0) + 1
+                    else:
+                        test_fail[name] = test_fail.get(name, 0) + 1
+                except Exception:
+                    test_pass[name] = test_pass.get(name, 0) + 1
+        except Exception:
+            pass
+
+    results: List[TestpointResult] = []
+    for tp in testplan.testpoints:
+        if tp.na:
+            results.append(TestpointResult(tp, TPStatus.NA, []))
+            continue
+        if not tp.tests:
+            results.append(TestpointResult(tp, TPStatus.UNIMPLEMENTED, []))
+            continue
+
+        matched: List[str] = []
+        passes = fails = 0
+
+        for pattern in tp.tests:
+            # Exact match
+            if pattern in test_pass or pattern in test_fail:
+                matched.append(pattern)
+                passes += test_pass.get(pattern, 0)
+                fails  += test_fail.get(pattern, 0)
+                continue
+            # Seed-suffix strip
+            stripped = re.sub(r'_\d+$', '', pattern)
+            if stripped != pattern and (stripped in test_pass or stripped in test_fail):
+                matched.append(stripped)
+                passes += test_pass.get(stripped, 0)
+                fails  += test_fail.get(stripped, 0)
+                continue
+            # Wildcard prefix
+            if pattern.endswith('_*'):
+                prefix = pattern[:-1]
+                for tname in list(test_pass) + [t for t in test_fail if t not in test_pass]:
+                    if tname.startswith(prefix) and tname not in matched:
+                        matched.append(tname)
+                        passes += test_pass.get(tname, 0)
+                        fails  += test_fail.get(tname, 0)
+
+        if not matched:
+            status = TPStatus.NOT_RUN
+        elif fails == 0:
+            status = TPStatus.CLOSED
+        elif passes == 0:
+            status = TPStatus.FAILING
+        else:
+            status = TPStatus.PARTIAL
+
+        results.append(TestpointResult(tp, status, matched, passes, fails))
+
+    return results
+
+
+def stage_gate_status(results: List[TestpointResult],
+                      stage: str,
+                      testplan: Testplan,
+                      require_flake_score_below: Optional[float] = None,
+                      require_coverage_pct: Optional[float] = None) -> dict:
+    """Determine whether the gate for *stage* is met.
+
+    A stage gate passes when ALL testpoints at *stage* and all stages
+    with a lower standard index are CLOSED (or N/A).
+
+    Args:
+        results:                   Output of :func:`compute_closure`.
+        stage:                     Stage to evaluate (e.g. ``"V2"``).
+        testplan:                  The testplan (used for stage ordering).
+        require_flake_score_below: Reserved — flakiness threshold (future).
+        require_coverage_pct:      Reserved — coverage threshold (future).
+
+    Returns:
+        Dict with keys ``passed`` (bool), ``stage``, ``blocking``
+        (list of :class:`TestpointResult` that prevent the gate from passing),
+        and ``message`` (human-readable summary string).
+    """
+    target_rank = _STAGE_ORDER.get(stage, 99)
+
+    # Collect all stages that must pass (≤ target rank)
+    stages_required = {s for s in testplan.stages()
+                       if _STAGE_ORDER.get(s, 99) <= target_rank}
+
+    # Index results by testpoint name
+    result_map = {r.testpoint.name: r for r in results}
+
+    blocking: List[TestpointResult] = []
+    for r in results:
+        if r.testpoint.stage not in stages_required:
+            continue
+        if r.status in (TPStatus.CLOSED, TPStatus.NA, TPStatus.UNIMPLEMENTED):
+            continue
+        blocking.append(r)
+
+    passed = len(blocking) == 0
+    if passed:
+        message = f"Stage {stage} gate PASSED"
+    else:
+        names = ", ".join(r.testpoint.name for r in blocking[:5])
+        extra = f" (+{len(blocking)-5} more)" if len(blocking) > 5 else ""
+        message = f"Stage {stage} gate FAILED — blocking: {names}{extra}"
+
+    return {
+        "passed":   passed,
+        "stage":    stage,
+        "blocking": blocking,
+        "message":  message,
+    }
diff --git a/src/ucis/ncdb/testplan_export.py b/src/ucis/ncdb/testplan_export.py
new file mode 100644
index 0000000..d8abc9a
--- /dev/null
+++ b/src/ucis/ncdb/testplan_export.py
@@ -0,0 +1,256 @@
+"""CI/CD export utilities for testplan closure results.
+
+Provides three output formats:
+
+* **JUnit XML** — standard ``<testsuite>`` / ``<testcase>`` format for CI
+  systems (Jenkins, GitHub Actions, GitLab CI, etc.)
+* **GitHub Annotations** — ``::error::`` / ``::warning::`` lines written to
+  stdout for GitHub Actions step annotations.
+* **Summary Markdown** — GitHub Actions ``$GITHUB_STEP_SUMMARY`` compatible
+  markdown table with stage gate verdict.
+"""
+
+from __future__ import annotations
+
+import json
+import sys
+import time
+from typing import List, Optional
+from xml.etree import ElementTree as ET
+
+from ucis.ncdb.testplan_closure import TPStatus, TestpointResult
+from ucis.ncdb.reports import (
+    ClosureSummary,
+    StageGateReport,
+    report_testpoint_closure,
+    _STATUS_LABEL,
+    _pct,
+    _STAGE_ORDER,
+)
+
+
+# ---------------------------------------------------------------------------
+# JUnit XML export
+# ---------------------------------------------------------------------------
+
+def export_junit_xml(
+    results: List[TestpointResult],
+    output_path: str,
+    suite_name: str = "testplan_closure",
+) -> None:
+    """Write closure results as a JUnit XML file.
+
+    Each testpoint becomes a ``<testcase>``.  Testpoints with status
+    FAILING or PARTIAL get a ``<failure>`` element; NOT_RUN gets a
+    ``<skipped>`` element; CLOSED is a plain pass.
+
+    Args:
+        results: Output of
+            :func:`~ucis.ncdb.testplan_closure.compute_closure`.
+        output_path: Destination ``.xml`` file path.
+        suite_name: Value of the ``name`` attribute on ``<testsuite>``.
+
+    Example::
+
+        from ucis.ncdb.testplan_export import export_junit_xml
+        export_junit_xml(results, "closure_results.xml")
+    """
+    failures = sum(
+        1 for r in results if r.status in (TPStatus.FAILING, TPStatus.PARTIAL)
+    )
+    skipped = sum(1 for r in results if r.status == TPStatus.NOT_RUN)
+    total = len(results)
+
+    suite = ET.Element(
+        "testsuite",
+        name=suite_name,
+        tests=str(total),
+        failures=str(failures),
+        skipped=str(skipped),
+        timestamp=time.strftime("%Y-%m-%dT%H:%M:%S", time.gmtime()),
+    )
+
+    for r in results:
+        classname = r.testpoint.stage or "unknown"
+        tc = ET.SubElement(
+            suite,
+            "testcase",
+            name=r.testpoint.name,
+            classname=classname,
+        )
+        if r.testpoint.desc:
+            ET.SubElement(tc, "system-out").text = r.testpoint.desc
+
+        if r.status == TPStatus.FAILING:
+            ET.SubElement(
+                tc,
+                "failure",
+                message=f"Testpoint FAILING: "
+                        f"pass={r.pass_count} fail={r.fail_count}",
+                type="TestpointFailure",
+            ).text = (
+                f"Matched tests: {', '.join(r.matched_tests) or 'none'}\n"
+                f"Pass: {r.pass_count}  Fail: {r.fail_count}"
+            )
+        elif r.status == TPStatus.PARTIAL:
+            ET.SubElement(
+                tc,
+                "failure",
+                message=f"Testpoint PARTIAL: "
+                        f"pass={r.pass_count} fail={r.fail_count}",
+                type="TestpointPartial",
+            ).text = (
+                f"Matched tests: {', '.join(r.matched_tests) or 'none'}\n"
+                f"Pass: {r.pass_count}  Fail: {r.fail_count}"
+            )
+        elif r.status == TPStatus.NOT_RUN:
+            ET.SubElement(tc, "skipped", message="Testpoint not run")
+
+    tree = ET.ElementTree(suite)
+    ET.indent(tree, space="  ")
+    tree.write(output_path, encoding="utf-8", xml_declaration=True)
+
+
+# ---------------------------------------------------------------------------
+# GitHub Annotations export
+# ---------------------------------------------------------------------------
+
+def export_github_annotations(
+    results: List[TestpointResult],
+    file: str = "testplan",
+    *,
+    output=None,
+) -> None:
+    """Write GitHub Actions workflow command annotations to *output*.
+
+    FAILING testpoints emit ``::error::`` lines; PARTIAL and NOT_RUN emit
+    ``::warning::`` lines.  CLOSED and N/A produce no output.
+
+    Args:
+        results: Output of
+            :func:`~ucis.ncdb.testplan_closure.compute_closure`.
+        file: Value used in the ``file=`` annotation field (defaults to
+            ``"testplan"``).
+        output: File-like object to write to (defaults to ``sys.stdout``).
+
+    Example::
+
+        from ucis.ncdb.testplan_export import export_github_annotations
+        export_github_annotations(results)  # writes to stdout
+    """
+    if output is None:
+        output = sys.stdout
+
+    for r in results:
+        if r.status == TPStatus.CLOSED or r.status == TPStatus.NA:
+            continue
+        title = f"[{r.testpoint.stage}] {r.testpoint.name}"
+        msg = (
+            f"status={_STATUS_LABEL[r.status]} "
+            f"pass={r.pass_count} fail={r.fail_count}"
+        )
+        if r.status == TPStatus.FAILING:
+            output.write(
+                f"::error file={file},title={title}::{msg}\n"
+            )
+        else:
+            output.write(
+                f"::warning file={file},title={title}::{msg}\n"
+            )
+
+
+# ---------------------------------------------------------------------------
+# Markdown summary export
+# ---------------------------------------------------------------------------
+
+def export_summary_markdown(
+    results: List[TestpointResult],
+    stage_gate: Optional[StageGateReport] = None,
+    history_db=None,
+) -> str:
+    """Generate a GitHub Actions Job Summary–compatible markdown string.
+
+    Args:
+        results: Output of
+            :func:`~ucis.ncdb.testplan_closure.compute_closure`.
+        stage_gate: Optional :class:`~ucis.ncdb.reports.StageGateReport`
+            to include a gate verdict section.
+        history_db: Unused; reserved for future trend lines.
+
+    Returns:
+        A markdown string suitable for appending to
+        ``$GITHUB_STEP_SUMMARY``.
+
+    Example::
+
+        from ucis.ncdb.testplan_export import export_summary_markdown
+        md = export_summary_markdown(results, stage_gate=gate)
+        with open(os.environ["GITHUB_STEP_SUMMARY"], "a") as f:
+            f.write(md)
+    """
+    summary = report_testpoint_closure(results)
+    lines: List[str] = []
+
+    # Headline
+    total = summary.total
+    closed = summary.total_closed
+    na = summary.total_na
+    lines.append("## Testplan Closure Report\n")
+    lines.append(
+        f"**{closed}/{total}** testpoints closed "
+        f"({na} N/A, {total - closed - na} open)\n"
+    )
+
+    # Stage gate verdict
+    if stage_gate is not None:
+        verdict = "✅ PASS" if stage_gate.passed else "❌ FAIL"
+        lines.append(f"**Stage gate [{stage_gate.stage}]:** {verdict}\n")
+
+    # Stage roll-up table
+    ordered_stages = sorted(
+        summary.by_stage.items(),
+        key=lambda kv: _STAGE_ORDER.get(kv[0], 999),
+    )
+    if ordered_stages:
+        lines.append("### By stage\n")
+        lines.append("| Stage | Closed | Total | % |")
+        lines.append("|-------|-------:|------:|--:|")
+        for stage, entry in ordered_stages:
+            lines.append(
+                f"| {stage} | {entry['closed']} | {entry['total']} "
+                f"| {entry['pct']:.1f}% |"
+            )
+        lines.append("")
+
+    # Testpoint detail table — only non-N/A
+    visible = [r for r in results if r.status not in (TPStatus.NA, TPStatus.UNIMPLEMENTED)]
+    if visible:
+        lines.append("### Testpoints\n")
+        lines.append("| Testpoint | Stage | Status | Pass | Fail |")
+        lines.append("|-----------|-------|--------|-----:|-----:|")
+        _EMOJI = {
+            TPStatus.CLOSED: "✅",
+            TPStatus.PARTIAL: "⚠️",
+            TPStatus.FAILING: "❌",
+            TPStatus.NOT_RUN: "⬜",
+        }
+        for r in visible:
+            emoji = _EMOJI.get(r.status, "")
+            lines.append(
+                f"| {r.testpoint.name} | {r.testpoint.stage or '?'} "
+                f"| {emoji} {_STATUS_LABEL[r.status]} "
+                f"| {r.pass_count} | {r.fail_count} |"
+            )
+        lines.append("")
+
+    # Blocking testpoints
+    if stage_gate is not None and stage_gate.blocking:
+        lines.append("### Blocking testpoints\n")
+        for r in stage_gate.blocking:
+            lines.append(
+                f"- ❌ **[{r.testpoint.stage}] {r.testpoint.name}** "
+                f"— {_STATUS_LABEL[r.status]}"
+            )
+        lines.append("")
+
+    return "\n".join(lines)
diff --git a/src/ucis/ncdb/testplan_hjson.py b/src/ucis/ncdb/testplan_hjson.py
new file mode 100644
index 0000000..380ed0e
--- /dev/null
+++ b/src/ucis/ncdb/testplan_hjson.py
@@ -0,0 +1,147 @@
+"""
+src/ucis/ncdb/testplan_hjson.py — Import OpenTitan-style Hjson testplans.
+
+The OpenTitan testplan format is a Hjson (human JSON) file with a ``testpoints``
+list.  Each testpoint can have a ``tests`` list that uses ``{key}`` wildcards
+expanded by cartesian product with a ``substitutions`` dict.  ``tests: ["N/A"]``
+marks a testpoint as intentionally unmapped.
+
+Falls back to standard ``json`` if the ``hjson`` package is not installed
+(works for files that happen to be valid JSON or JSON-subset Hjson).
+"""
+from __future__ import annotations
+
+import itertools
+import os
+import re
+from typing import Dict, List, Optional
+
+from .testplan import CovergroupEntry, Testplan, Testpoint
+
+try:
+    import hjson as _hjson
+    _HJSON_AVAILABLE = True
+except ImportError:
+    import json as _hjson  # type: ignore[no-redef]
+    _HJSON_AVAILABLE = False
+
+
+# ── public API ────────────────────────────────────────────────────────────────
+
+def import_hjson(hjson_path: str,
+                 substitutions: Optional[Dict[str, object]] = None) -> Testplan:
+    """Parse an OpenTitan-style Hjson testplan and return a :class:`~ucis.ncdb.testplan.Testplan`.
+
+    Args:
+        hjson_path:    Path to the ``.hjson`` (or ``.json``) file.
+        substitutions: Optional dict of ``{key: value_or_list}`` pairs used
+                       for wildcard expansion in test names.
+
+    Returns:
+        A fully expanded :class:`~ucis.ncdb.testplan.Testplan` with all
+        ``{key}`` templates replaced.
+    """
+    subs = substitutions or {}
+    with open(hjson_path, "r", encoding="utf-8") as fh:
+        raw = fh.read()
+
+    if _HJSON_AVAILABLE:
+        data = _hjson.loads(raw)
+    else:
+        import json
+        data = json.loads(raw)
+
+    plan = Testplan(source_file=os.path.abspath(hjson_path))
+
+    for rec in data.get("testpoints", []):
+        raw_tests = rec.get("tests", [])
+        if raw_tests == ["N/A"]:
+            plan.add_testpoint(Testpoint(
+                name=rec.get("name", ""),
+                stage=rec.get("stage", ""),
+                desc=rec.get("desc", ""),
+                tags=rec.get("tags", []),
+                na=True,
+                tests=[],
+                source_template="",
+            ))
+            continue
+
+        expanded: List[str] = []
+        templates: List[str] = []
+        for tmpl in raw_tests:
+            results = _expand_template(tmpl, subs)
+            expanded.extend(results)
+            if len(results) > 1 or tmpl != results[0]:
+                templates.append(tmpl)
+
+        plan.add_testpoint(Testpoint(
+            name=rec.get("name", ""),
+            stage=rec.get("stage", ""),
+            desc=rec.get("desc", ""),
+            tags=rec.get("tags", []),
+            na=False,
+            tests=expanded,
+            source_template=", ".join(templates),
+        ))
+
+    for rec in data.get("covergroups", []):
+        plan.covergroups.append(CovergroupEntry(
+            name=rec.get("name", ""),
+            desc=rec.get("desc", ""),
+        ))
+
+    return plan
+
+
+# ── internal helpers ──────────────────────────────────────────────────────────
+
+def _expand_template(template: str,
+                     subs: Dict[str, object]) -> List[str]:
+    """Expand ``{key}`` placeholders in *template* using *subs*.
+
+    Each ``{key}`` whose value in *subs* is a list produces multiple
+    output strings (cartesian product).  Scalar values are substituted
+    directly.  Keys absent from *subs* are left as-is.
+
+    Examples::
+
+        _expand_template("uart_{baud}_test", {"baud": ["9600", "115200"]})
+        # → ["uart_9600_test", "uart_115200_test"]
+
+        _expand_template("{mod}_{type}", {"mod": ["a", "b"], "type": "x"})
+        # → ["a_x", "b_x"]
+    """
+    keys_found = re.findall(r'\{(\w+)\}', template)
+    if not keys_found:
+        return [template]
+
+    # Build lists for each placeholder
+    lists: List[List[str]] = []
+    ordered_keys: List[str] = []
+    for key in dict.fromkeys(keys_found):   # preserve order, deduplicate
+        val = subs.get(key)
+        if val is None:
+            lists.append([f"{{{key}}}"])    # unknown key left verbatim
+        elif isinstance(val, list):
+            lists.append([str(v) for v in val])
+        else:
+            lists.append([str(val)])
+        ordered_keys.append(key)
+
+    results: List[str] = []
+    for combo in itertools.product(*lists):
+        s = template
+        for key, replacement in zip(ordered_keys, combo):
+            s = s.replace(f"{{{key}}}", replacement)
+        results.append(s)
+    return results
+
+
+def _expand_tests(test_list: List[str],
+                  subs: Dict[str, object]) -> List[str]:
+    """Expand an entire ``tests`` list, returning the flat list of names."""
+    result: List[str] = []
+    for tmpl in test_list:
+        result.extend(_expand_template(tmpl, subs))
+    return result
diff --git a/src/ucis/ncdb/waivers.py b/src/ucis/ncdb/waivers.py
new file mode 100644
index 0000000..349dd79
--- /dev/null
+++ b/src/ucis/ncdb/waivers.py
@@ -0,0 +1,195 @@
+"""
+src/ucis/ncdb/waivers.py — Coverage and test-failure waivers.
+
+A :class:`WaiverSet` contains zero or more :class:`Waiver` objects.  Each
+waiver suppresses a known failure or uncovered bin so that reports distinguish
+*known* issues from new regressions.
+
+Waivers are stored as ``waivers.json`` inside the NCDB ZIP (optional member)
+or as a standalone JSON file.
+
+Expiry enforcement is the **caller's responsibility** — :meth:`WaiverSet.matches`
+performs only pattern matching.  To filter out expired waivers call
+:meth:`WaiverSet.active_at` first.
+"""
+from __future__ import annotations
+
+import json
+from dataclasses import dataclass, field
+from typing import List, Optional
+
+
+@dataclass
+class Waiver:
+    """A single waiver entry.
+
+    Attributes:
+        id:            Unique identifier (e.g. ``"W-001"``).
+        scope_pattern: Glob-style pattern matched against UCIS scope paths.
+                       ``*`` matches any single path segment; ``**`` matches
+                       any number of segments.
+        bin_pattern:   Glob-style pattern matched against bin names within the
+                       matched scope.  Use ``"*"`` to waive the entire scope.
+        rationale:     Human-readable explanation.
+        approver:      Name/username of approver.
+        approved_at:   ISO-8601 UTC timestamp of approval.
+        expires_at:    ISO-8601 UTC timestamp after which this waiver expires.
+                       Empty string means "never expires".
+        status:        ``"active"`` | ``"expired"`` | ``"revoked"``.
+    """
+    id:            str
+    scope_pattern: str
+    bin_pattern:   str = "*"
+    rationale:     str = ""
+    approver:      str = ""
+    approved_at:   str = ""
+    expires_at:    str = ""
+    status:        str = "active"
+
+    def matches(self, scope_path: str, bin_name: str = "") -> bool:
+        """Return True if this waiver covers *scope_path* / *bin_name*.
+
+        Pattern matching uses simple glob rules:
+        - ``*`` matches any characters within a single ``/``-delimited segment.
+        - ``**`` matches any number of segments (including zero).
+
+        Expiry is **not** checked here — use :meth:`WaiverSet.active_at` first
+        if you want to exclude expired waivers.
+        """
+        if not _glob_match(self.scope_pattern, scope_path):
+            return False
+        if bin_name and self.bin_pattern != "*":
+            return _glob_match(self.bin_pattern, bin_name)
+        return True
+
+
+class WaiverSet:
+    """Collection of :class:`Waiver` objects.
+
+    Attributes:
+        waivers: Ordered list of waivers.
+    """
+
+    def __init__(self, waivers: Optional[List[Waiver]] = None) -> None:
+        self.waivers: List[Waiver] = waivers or []
+
+    def add(self, waiver: Waiver) -> None:
+        """Append *waiver* to the set."""
+        self.waivers.append(waiver)
+
+    def matches_scope(self, scope_path: str, bin_name: str = "") -> bool:
+        """Return True if any waiver covers *scope_path* / *bin_name*."""
+        return any(w.matches(scope_path, bin_name) for w in self.waivers)
+
+    def active_at(self, timestamp: str) -> "WaiverSet":
+        """Return a new :class:`WaiverSet` containing only waivers that are
+        active at *timestamp* (ISO-8601 string).
+
+        A waiver is active when:
+
+        * ``status == "active"``
+        * ``expires_at`` is empty OR ``expires_at > timestamp``
+        """
+        active = [
+            w for w in self.waivers
+            if w.status == "active" and
+               (not w.expires_at or w.expires_at > timestamp)
+        ]
+        return WaiverSet(active)
+
+    def get(self, waiver_id: str) -> Optional[Waiver]:
+        """Return the waiver with *waiver_id*, or ``None``."""
+        for w in self.waivers:
+            if w.id == waiver_id:
+                return w
+        return None
+
+    # ── serialization ─────────────────────────────────────────────────────
+
+    def to_dict(self) -> dict:
+        return {
+            "format_version": 1,
+            "waivers": [
+                {
+                    "id":            w.id,
+                    "scope_pattern": w.scope_pattern,
+                    "bin_pattern":   w.bin_pattern,
+                    "rationale":     w.rationale,
+                    "approver":      w.approver,
+                    "approved_at":   w.approved_at,
+                    "expires_at":    w.expires_at,
+                    "status":        w.status,
+                }
+                for w in self.waivers
+            ],
+        }
+
+    def serialize(self) -> bytes:
+        """Serialise to compact JSON bytes (for ZIP embedding)."""
+        return json.dumps(self.to_dict(), separators=(',', ':')).encode()
+
+    @classmethod
+    def from_dict(cls, d: dict) -> "WaiverSet":
+        ws = cls()
+        for rec in d.get("waivers", []):
+            ws.add(Waiver(
+                id=rec["id"],
+                scope_pattern=rec.get("scope_pattern", "**"),
+                bin_pattern=rec.get("bin_pattern", "*"),
+                rationale=rec.get("rationale", ""),
+                approver=rec.get("approver", ""),
+                approved_at=rec.get("approved_at", ""),
+                expires_at=rec.get("expires_at", ""),
+                status=rec.get("status", "active"),
+            ))
+        return ws
+
+    @classmethod
+    def from_bytes(cls, data: bytes) -> "WaiverSet":
+        return cls.from_dict(json.loads(data.decode()))
+
+    @classmethod
+    def load(cls, path: str) -> "WaiverSet":
+        """Load from a standalone JSON file."""
+        with open(path, "rb") as f:
+            return cls.from_bytes(f.read())
+
+    def save(self, path: str) -> None:
+        """Write to a standalone JSON file."""
+        with open(path, "wb") as f:
+            f.write(self.serialize())
+
+
+# ── glob matching helper ──────────────────────────────────────────────────────
+
+def _glob_match(pattern: str, text: str) -> bool:
+    """Simple glob match: ``*`` = single-segment wildcard, ``**`` = multi-segment."""
+    import fnmatch
+    # Replace '**' with a temporary token, expand, then match
+    # Use fnmatch with '**' expanded to match any path
+    # Strategy: convert glob to regex
+    import re
+    regex = _glob_to_regex(pattern)
+    return bool(re.fullmatch(regex, text))
+
+
+def _glob_to_regex(pattern: str) -> str:
+    """Convert a glob pattern to a regex string."""
+    import re
+    # Parse pattern left-to-right, emitting regex pieces
+    result = []
+    i = 0
+    while i < len(pattern):
+        if pattern[i:i+3] == '**/':
+            result.append('(?:.+/)?')
+            i += 3
+        elif pattern[i:i+2] == '**':
+            result.append('.*')
+            i += 2
+        elif pattern[i] == '*':
+            result.append('[^/]*')
+            i += 1
+        else:
+            result.append(re.escape(pattern[i]))
+            i += 1
+    return ''.join(result)
diff --git a/src/ucis/report/coverage_metrics.py b/src/ucis/report/coverage_metrics.py
new file mode 100644
index 0000000..3148fe7
--- /dev/null
+++ b/src/ucis/report/coverage_metrics.py
@@ -0,0 +1,762 @@
+"""
+Common coverage metrics layer.
+
+``CoverageMetrics`` is the single source of truth for all coverage number
+computation in pyucis.  Every consumer — TUI views, CLI ``show`` commands,
+and report formatters — should obtain aggregated numbers from this class
+rather than implementing their own UCIS tree walks or SQL queries.
+
+Design principles
+-----------------
+* **Correct bin semantics**: a bin is *covered* when ``cover_data >= at_least``
+  (UCIS LRM §5.3).  Using ``cover_data > 0`` is wrong when ``at_least > 1``.
+* **Correct traversal**: functional coverage is derived from
+  ``CoverageReportBuilder``, which walks ``INSTANCE → COVERGROUP → COVERPOINT``,
+  preventing double-counting of type-level vs instance-level covergroup scopes.
+* **SQLite fast paths** are used for performance but must produce results
+  identical to the API path.
+* **Caching** is simple dict-based; call ``invalidate()`` whenever the
+  database filter changes.
+"""
+from __future__ import annotations
+
+from dataclasses import dataclass, field
+from typing import Dict, List, Optional, TYPE_CHECKING
+
+if TYPE_CHECKING:
+    from ucis.ucis import UCIS
+    from ucis.report.coverage_report import CoverageReport
+
+
+# ---------------------------------------------------------------------------
+# Data classes
+# ---------------------------------------------------------------------------
+
+@dataclass
+class BinStats:
+    """Aggregated bin counts for a coverage scope or type."""
+    total: int = 0
+    covered: int = 0
+
+    @property
+    def uncovered(self) -> int:
+        return self.total - self.covered
+
+    @property
+    def coverage_pct(self) -> float:
+        if self.total == 0:
+            return 0.0
+        return self.covered / self.total * 100.0
+
+    def __add__(self, other: 'BinStats') -> 'BinStats':
+        return BinStats(self.total + other.total, self.covered + other.covered)
+
+
+@dataclass
+class BinDetail:
+    """Raw data for a single bin — used in detail/bin-listing views."""
+    name: str
+    count: int
+    at_least: int
+    is_ignore: bool = False
+    is_illegal: bool = False
+
+    @property
+    def covered(self) -> bool:
+        return self.count >= self.at_least
+
+
+@dataclass
+class CoverpointStats:
+    """Coverage summary for a single coverpoint."""
+    name: str
+    path: str                    # slash-joined scope path from DB root
+    bins: BinStats = field(default_factory=BinStats)
+    bin_details: List[BinDetail] = field(default_factory=list)
+    weight: int = 1
+
+    @property
+    def coverage_pct(self) -> float:
+        return self.bins.coverage_pct
+
+
+@dataclass
+class CrossStats:
+    """Coverage summary for a single cross."""
+    name: str
+    path: str
+    bins: BinStats = field(default_factory=BinStats)
+    weight: int = 1
+
+    @property
+    def coverage_pct(self) -> float:
+        return self.bins.coverage_pct
+
+
+@dataclass
+class CovergroupStats:
+    """Coverage summary for a covergroup (type-level or instance-level)."""
+    name: str
+    path: str
+    coverage_pct: float = 0.0   # weighted over child coverpoints/crosses
+    bins: BinStats = field(default_factory=BinStats)
+    weight: int = 1
+
+
+@dataclass
+class FileCoverageStats:
+    """Per-source-file code-coverage statistics."""
+    file_id: int
+    file_path: str
+    line:   BinStats = field(default_factory=BinStats)
+    branch: BinStats = field(default_factory=BinStats)
+    toggle: BinStats = field(default_factory=BinStats)
+    expr:   BinStats = field(default_factory=BinStats)
+    cond:   BinStats = field(default_factory=BinStats)
+    fsm:    BinStats = field(default_factory=BinStats)
+    block:  BinStats = field(default_factory=BinStats)
+
+    @property
+    def overall(self) -> BinStats:
+        result = BinStats()
+        for attr in ('line', 'branch', 'toggle', 'expr', 'cond', 'fsm', 'block'):
+            result = result + getattr(self, attr)
+        return result
+
+
+@dataclass
+class TestInfo:
+    """Identity and contribution metadata for one test run."""
+    history_id: int        # internal DB id (-1 for non-SQLite backends)
+    name: str
+    status: str            # "PASSED" | "FAILED" | "UNKNOWN"
+    date: str
+    total_items: int = 0   # bins hit by this test  (SQLite only, else 0)
+    unique_items: int = 0  # bins *only* hit by this test (SQLite only, else 0)
+
+
+# ---------------------------------------------------------------------------
+# CoverageMetrics
+# ---------------------------------------------------------------------------
+
+class CoverageMetrics:
+    """
+    Single source of truth for all coverage metric computation.
+
+    Instantiate once per database and pass the instance to every consumer
+    (TUI CoverageModel, show commands, report formatters).
+
+    Parameters
+    ----------
+    db:
+        Any object implementing the ``UCIS`` interface (MemUCIS, SqliteUCIS,
+        XmlUCIS, …).
+    """
+
+    def __init__(self, db: 'UCIS'):
+        self._db = db
+        self._cache: Dict[str, object] = {}
+
+    # ------------------------------------------------------------------ cache
+
+    def invalidate(self):
+        """Discard all cached results (e.g. after changing a test filter)."""
+        self._cache.clear()
+
+    def _cached(self, key: str, compute):
+        if key not in self._cache:
+            self._cache[key] = compute()
+        return self._cache[key]
+
+    # --------------------------------------------------------------- hierarchy
+
+    @property
+    def report(self) -> 'CoverageReport':
+        """
+        ``CoverageReport`` built via ``CoverageReportBuilder``.
+
+        This is the canonical hierarchical representation of functional
+        coverage.  All functional-coverage numbers in this class are derived
+        from this object to guarantee consistency with the text / JSON / HTML
+        report formatters.
+        """
+        return self._cached('report', self._build_report)
+
+    def _build_report(self) -> 'CoverageReport':
+        from ucis.report.coverage_report_builder import CoverageReportBuilder
+        return CoverageReportBuilder.build(self._db)
+
+    # ------------------------------------------ functional coverage (CVGBIN)
+
+    def functional_bins(self) -> BinStats:
+        """
+        Aggregate ``BinStats`` for all functional-coverage bins (CVGBIN).
+
+        A bin is counted as *covered* when ``count >= at_least`` — matching
+        the UCIS LRM and the text-report formatter.
+        """
+        return self._cached('functional_bins', self._compute_functional_bins)
+
+    def _compute_functional_bins(self) -> BinStats:
+        # Derive directly from the canonical CoverageReport so that the
+        # traversal (INSTANCE → COVERGROUP → COVERPOINT) is identical to what
+        # the text / JSON reports use.  This also means the SQLite fast path
+        # is not needed here — CoverageReportBuilder already handles both
+        # backends efficiently.
+        total = 0
+        covered = 0
+        for cg in self.report.covergroups:
+            t, c = self._count_bins_in_cg(cg)
+            total += t
+            covered += c
+        return BinStats(total=total, covered=covered)
+
+    def _count_bins_in_cg(self, cg) -> tuple:
+        """
+        Recursively count (total, covered) bins in a CoverageReport.Covergroup.
+
+        Mirrors CoverageReportBuilder.build_covergroup() semantics: when
+        type-level coverpoints/crosses exist on the CG, only those are
+        counted (the sub-instance COVERINSTANCE groups hold the same bins
+        and would double-count if also visited).
+        """
+        total = 0
+        covered = 0
+        if cg.coverpoints or cg.crosses:
+            # Type-level coverpoints are present — use them only
+            for cp in cg.coverpoints:
+                for b in cp.bins:
+                    total += 1
+                    if b.hit:
+                        covered += 1
+            for cr in cg.crosses:
+                for b in cr.bins:
+                    total += 1
+                    if b.hit:
+                        covered += 1
+        else:
+            # No type-level coverpoints — aggregate over sub-instances
+            for sub in cg.covergroups:
+                t, c = self._count_bins_in_cg(sub)
+                total += t
+                covered += c
+        return total, covered
+
+    def covergroup_stats(self) -> List[CovergroupStats]:
+        """One ``CovergroupStats`` per top-level covergroup."""
+        return self._cached('covergroup_stats', self._compute_covergroup_stats)
+
+    def _compute_covergroup_stats(self) -> List[CovergroupStats]:
+        result = []
+        for cg in self.report.covergroups:
+            t, c = self._count_bins_in_cg(cg)
+            result.append(CovergroupStats(
+                name=cg.name,
+                path=cg.instname,
+                coverage_pct=cg.coverage,
+                bins=BinStats(total=t, covered=c),
+                weight=cg.weight,
+            ))
+        return result
+
+    def coverpoint_stats(self, include_bins: bool = False) -> List[CoverpointStats]:
+        """
+        Flat list of ``CoverpointStats`` for every coverpoint in the database.
+
+        Parameters
+        ----------
+        include_bins:
+            When ``True``, populate ``CoverpointStats.bin_details`` with
+            per-bin data.  Slightly more expensive; not needed for summary views.
+        """
+        cache_key = f'coverpoint_stats_{include_bins}'
+        return self._cached(cache_key,
+                            lambda: self._compute_coverpoint_stats(include_bins))
+
+    def _compute_coverpoint_stats(self, include_bins: bool) -> List[CoverpointStats]:
+        result = []
+
+        def _walk_cg(cg, path_prefix: str):
+            cg_path = f'{path_prefix}/{cg.name}' if path_prefix else cg.name
+            if cg.coverpoints or cg.crosses:
+                # Type-level coverpoints/crosses present — use them
+                for cp in cg.coverpoints:
+                    total = len(cp.bins)
+                    covered = sum(1 for b in cp.bins if b.hit)
+                    details = []
+                    if include_bins:
+                        for b in cp.bins:
+                            details.append(BinDetail(
+                                name=b.name, count=b.count, at_least=b.goal))
+                        for b in cp.ignore_bins:
+                            details.append(BinDetail(
+                                name=b.name, count=b.count, at_least=b.goal,
+                                is_ignore=True))
+                        for b in cp.illegal_bins:
+                            details.append(BinDetail(
+                                name=b.name, count=b.count, at_least=b.goal,
+                                is_illegal=True))
+                    result.append(CoverpointStats(
+                        name=cp.name,
+                        path=f'{cg_path}/{cp.name}',
+                        bins=BinStats(total=total, covered=covered),
+                        bin_details=details,
+                        weight=cp.weight,
+                    ))
+            else:
+                # No type-level coverpoints — recurse into sub-instances only
+                for sub in cg.covergroups:
+                    _walk_cg(sub, cg_path)
+
+        for cg in self.report.covergroups:
+            _walk_cg(cg, '')
+        return result
+
+    def cross_stats(self) -> List[CrossStats]:
+        """Flat list of ``CrossStats`` for every cross in the database."""
+        return self._cached('cross_stats', self._compute_cross_stats)
+
+    def _compute_cross_stats(self) -> List[CrossStats]:
+        result = []
+
+        def _walk_cg(cg, path_prefix: str):
+            cg_path = f'{path_prefix}/{cg.name}' if path_prefix else cg.name
+            for cr in cg.crosses:
+                total = len(cr.bins)
+                covered = sum(1 for b in cr.bins if b.hit)
+                result.append(CrossStats(
+                    name=cr.name,
+                    path=f'{cg_path}/{cr.name}',
+                    bins=BinStats(total=total, covered=covered),
+                    weight=cr.weight,
+                ))
+            for sub in cg.crosses:
+                pass   # crosses do not nest
+            for sub in cg.covergroups:
+                _walk_cg(sub, cg_path)
+
+        for cg in self.report.covergroups:
+            _walk_cg(cg, '')
+        return result
+
+    # --------------------------------------- code coverage (STMT/BRANCH/etc.)
+
+    def coverage_types_present(self) -> List:
+        """``CoverTypeT`` values that have at least one item in the database."""
+        return self._cached('coverage_types', self._compute_coverage_types)
+
+    def _compute_coverage_types(self):
+        from ucis.cover_type_t import CoverTypeT
+
+        # SQLite fast path
+        if hasattr(self._db, 'conn'):
+            try:
+                rows = self._db.conn.execute(
+                    'SELECT DISTINCT cover_type FROM coveritems ORDER BY cover_type'
+                ).fetchall()
+                result = []
+                for r in rows:
+                    if r[0] is None:
+                        continue
+                    try:
+                        result.append(CoverTypeT(r[0]))
+                    except ValueError:
+                        pass
+                return result
+            except Exception:
+                pass
+
+        # API fallback
+        from ucis.scope_type_t import ScopeTypeT
+        found = set()
+        all_types = [
+            CoverTypeT.CVGBIN, CoverTypeT.STMTBIN, CoverTypeT.BRANCHBIN,
+            CoverTypeT.TOGGLEBIN, CoverTypeT.EXPRBIN, CoverTypeT.CONDBIN,
+            CoverTypeT.FSMBIN, CoverTypeT.BLOCKBIN,
+        ]
+
+        def _visit(scope):
+            for ct in all_types:
+                if ct not in found:
+                    try:
+                        if next(iter(scope.coverItems(ct)), None) is not None:
+                            found.add(ct)
+                    except Exception:
+                        pass
+            for child in scope.scopes(ScopeTypeT.ALL):
+                _visit(child)
+
+        for scope in self._db.scopes(ScopeTypeT.ALL):
+            _visit(scope)
+
+        return sorted(found, key=lambda t: int(t))
+
+    def bins_by_type(self, cov_type, test_filter: Optional[str] = None) -> BinStats:
+        """
+        ``BinStats`` for a single ``CoverTypeT``.
+
+        For functional bins (``CVGBIN``), prefer ``functional_bins()`` which
+        derives from the canonical traversal.  This method uses direct DB
+        queries and is primarily intended for code-coverage types.
+
+        Parameters
+        ----------
+        test_filter:
+            Logical name of a test; when given, only items contributed by
+            that test are counted.  SQLite only; ignored on other backends.
+        """
+        from ucis.cover_type_t import CoverTypeT
+
+        # For CVGBIN with no filter, use the canonical computation.
+        if cov_type == CoverTypeT.CVGBIN and test_filter is None:
+            return self.functional_bins()
+
+        cache_key = f'bins_by_type_{int(cov_type)}_{test_filter}'
+        return self._cached(cache_key,
+                            lambda: self._query_bins_by_type(cov_type, test_filter))
+
+    def _query_bins_by_type(self, cov_type, test_filter: Optional[str]) -> BinStats:
+        from ucis.scope_type_t import ScopeTypeT
+
+        # SQLite fast path
+        if hasattr(self._db, 'conn'):
+            try:
+                conn = self._db.conn
+                if test_filter:
+                    row = conn.execute(
+                        """SELECT COUNT(*),
+                                  SUM(CASE WHEN ci.cover_data >= ci.at_least THEN 1 ELSE 0 END)
+                           FROM coveritems ci
+                           JOIN coveritem_tests ct ON ct.cover_id = ci.cover_id
+                           JOIN history_nodes hn ON hn.history_id = ct.history_id
+                           WHERE (ci.cover_type & ?) != 0
+                             AND hn.logical_name = ?""",
+                        (int(cov_type), test_filter)
+                    ).fetchone()
+                else:
+                    row = conn.execute(
+                        """SELECT COUNT(*),
+                                  SUM(CASE WHEN cover_data >= at_least THEN 1 ELSE 0 END)
+                           FROM coveritems
+                           WHERE (cover_type & ?) != 0""",
+                        (int(cov_type),)
+                    ).fetchone()
+                return BinStats(total=row[0] or 0, covered=row[1] or 0)
+            except Exception:
+                pass
+
+        # API fallback
+        total = 0
+        covered = 0
+
+        def _visit(scope):
+            nonlocal total, covered
+            try:
+                for item in scope.coverItems(cov_type):
+                    total += 1
+                    cd = item.getCoverData()
+                    if cd and cd.data >= cd.at_least:
+                        covered += 1
+            except Exception:
+                pass
+            try:
+                for child in scope.scopes(ScopeTypeT.ALL):
+                    _visit(child)
+            except Exception:
+                pass
+
+        for scope in self._db.scopes(ScopeTypeT.ALL):
+            _visit(scope)
+
+        return BinStats(total=total, covered=covered)
+
+    def code_coverage_by_type(self) -> Dict:
+        """
+        Per-type ``BinStats`` for all non-functional coverage item types
+        (STMTBIN, BRANCHBIN, TOGGLEBIN, EXPRBIN, CONDBIN, FSMBIN, BLOCKBIN).
+        """
+        return self._cached('code_coverage_by_type', self._compute_code_coverage)
+
+    def _compute_code_coverage(self) -> Dict:
+        from ucis.cover_type_t import CoverTypeT
+        code_types = [
+            CoverTypeT.STMTBIN, CoverTypeT.BRANCHBIN, CoverTypeT.TOGGLEBIN,
+            CoverTypeT.EXPRBIN, CoverTypeT.CONDBIN, CoverTypeT.FSMBIN,
+            CoverTypeT.BLOCKBIN,
+        ]
+
+        # SQLite fast path — single query for all types
+        if hasattr(self._db, 'conn'):
+            try:
+                rows = self._db.conn.execute(
+                    """SELECT cover_type,
+                              COUNT(*) AS total,
+                              SUM(CASE WHEN cover_data >= at_least THEN 1 ELSE 0 END) AS covered
+                       FROM coveritems
+                       GROUP BY cover_type"""
+                ).fetchall()
+                int_to_type = {int(ct): ct for ct in code_types}
+                result = {ct: BinStats() for ct in code_types}
+                for row in rows:
+                    ct = int_to_type.get(row[0])
+                    if ct is not None:
+                        result[ct] = BinStats(total=row[1] or 0, covered=row[2] or 0)
+                return result
+            except Exception:
+                pass
+
+        # API fallback
+        result = {}
+        for ct in code_types:
+            result[ct] = self._query_bins_by_type(ct, test_filter=None)
+        return result
+
+    def file_coverage(self, test_filter: Optional[str] = None) -> List[FileCoverageStats]:
+        """
+        Per-source-file code-coverage statistics.
+
+        Requires a SQLite backend; returns an empty list for other backends.
+
+        Parameters
+        ----------
+        test_filter:
+            Restrict counts to items contributed by the named test.
+        """
+        cache_key = f'file_coverage_{test_filter}'
+        return self._cached(cache_key, lambda: self._compute_file_coverage(test_filter))
+
+    def _compute_file_coverage(self, test_filter: Optional[str]) -> List[FileCoverageStats]:
+        if not hasattr(self._db, 'conn'):
+            return []
+
+        from ucis.cover_type_t import CoverTypeT
+        STMT   = int(CoverTypeT.STMTBIN)
+        BRANCH = int(CoverTypeT.BRANCHBIN)
+        TOGGLE = int(CoverTypeT.TOGGLEBIN)
+        EXPR   = int(CoverTypeT.EXPRBIN)
+        COND   = int(CoverTypeT.CONDBIN)
+        FSM    = int(CoverTypeT.FSMBIN)
+        BLOCK  = int(CoverTypeT.BLOCKBIN)
+
+        conn = self._db.conn
+
+        try:
+            if test_filter:
+                # Get covered item IDs for this test first
+                rows = conn.execute(
+                    """SELECT ci.cover_id FROM coveritems ci
+                       JOIN coveritem_tests ct ON ct.cover_id = ci.cover_id
+                       JOIN history_nodes hn ON hn.history_id = ct.history_id
+                       WHERE hn.logical_name = ?""",
+                    (test_filter,)
+                ).fetchall()
+                if not rows:
+                    return []
+                id_csv = ','.join(str(r[0]) for r in rows)
+                filter_clause = f'AND ci.cover_id IN ({id_csv})'
+            else:
+                filter_clause = ''
+
+            def _stat(type_int: int, rows_by_file: dict, file_id: int) -> BinStats:
+                row = rows_by_file.get((file_id, type_int))
+                if row is None:
+                    return BinStats()
+                return BinStats(total=row[0], covered=row[1])
+
+            # One query: group by (file_id, cover_type)
+            sql = f"""
+                SELECT
+                    f.file_id,
+                    f.file_path,
+                    ci.cover_type,
+                    COUNT(*) AS total,
+                    SUM(CASE WHEN ci.cover_data >= ci.at_least THEN 1 ELSE 0 END) AS covered
+                FROM files f
+                JOIN coveritems ci ON f.file_id = ci.source_file_id
+                WHERE ci.cover_type IN (?,?,?,?,?,?,?)
+                {filter_clause}
+                GROUP BY f.file_id, f.file_path, ci.cover_type
+                ORDER BY f.file_path
+            """
+            rows = conn.execute(sql, (STMT, BRANCH, TOGGLE, EXPR, COND, FSM, BLOCK)).fetchall()
+
+            # Build {file_id → FileCoverageStats}
+            files: Dict[int, FileCoverageStats] = {}
+            file_paths: Dict[int, str] = {}
+            by_file_type: Dict[tuple, tuple] = {}
+            for row in rows:
+                fid = row[0]
+                file_paths[fid] = row[1]
+                by_file_type[(fid, row[2])] = (row[3], row[4])
+
+            for fid, fpath in file_paths.items():
+                fcs = FileCoverageStats(
+                    file_id=fid,
+                    file_path=fpath,
+                    line=BinStats(*by_file_type.get((fid, STMT),   (0, 0))),
+                    branch=BinStats(*by_file_type.get((fid, BRANCH), (0, 0))),
+                    toggle=BinStats(*by_file_type.get((fid, TOGGLE), (0, 0))),
+                    expr=BinStats(*by_file_type.get((fid, EXPR),   (0, 0))),
+                    cond=BinStats(*by_file_type.get((fid, COND),   (0, 0))),
+                    fsm=BinStats(*by_file_type.get((fid, FSM),    (0, 0))),
+                    block=BinStats(*by_file_type.get((fid, BLOCK),  (0, 0))),
+                )
+                files[fid] = fcs
+
+            return sorted(files.values(), key=lambda f: f.file_path)
+        except Exception:
+            return []
+
+    # ------------------------------------------------------------------ tests
+
+    def tests(self) -> List[TestInfo]:
+        """
+        All tests with identity and (where available) contribution metadata.
+
+        ``total_items`` and ``unique_items`` are populated only for SQLite
+        backends; they are 0 for XML / memory backends.
+        """
+        return self._cached('tests', self._compute_tests)
+
+    def _compute_tests(self) -> List[TestInfo]:
+        from ucis.history_node_kind import HistoryNodeKind
+        try:
+            from ucis import UCIS_TESTSTATUS_OK
+        except ImportError:
+            UCIS_TESTSTATUS_OK = 1
+
+        result: List[TestInfo] = []
+
+        # SQLite path: use test-coverage API for contribution data
+        if hasattr(self._db, 'conn'):
+            try:
+                from ucis.sqlite.sqlite_test_coverage import SqliteTestCoverage
+                api = SqliteTestCoverage(self._db)
+                contribs = api.get_all_test_contributions()
+                contrib_map = {c.test_name: c for c in contribs}
+
+                for node in self._db.historyNodes(HistoryNodeKind.TEST):
+                    name = node.getLogicalName() or 'Unknown'
+                    contrib = contrib_map.get(name)
+
+                    status = 'UNKNOWN'
+                    try:
+                        raw = node.getTestStatus()
+                        status = 'PASSED' if raw == UCIS_TESTSTATUS_OK else 'FAILED'
+                    except Exception:
+                        pass
+
+                    date = 'Unknown'
+                    try:
+                        d = node.getDate()
+                        if d:
+                            date = str(d)
+                    except Exception:
+                        pass
+
+                    result.append(TestInfo(
+                        history_id=getattr(node, 'history_id', -1),
+                        name=name,
+                        status=status,
+                        date=date,
+                        total_items=contrib.total_items if contrib else 0,
+                        unique_items=contrib.unique_items if contrib else 0,
+                    ))
+                return result
+            except Exception:
+                pass
+
+        # API fallback (no contribution data)
+        try:
+            for node in self._db.historyNodes(HistoryNodeKind.TEST):
+                name = node.getLogicalName() or 'Unknown'
+
+                status = 'UNKNOWN'
+                try:
+                    raw = node.getTestStatus()
+                    status = 'PASSED' if raw == UCIS_TESTSTATUS_OK else 'FAILED'
+                except Exception:
+                    pass
+
+                date = 'Unknown'
+                try:
+                    d = node.getDate()
+                    if d:
+                        date = str(d)
+                except Exception:
+                    pass
+
+                result.append(TestInfo(
+                    history_id=-1,
+                    name=name,
+                    status=status,
+                    date=date,
+                ))
+        except Exception:
+            pass
+
+        return result
+
+    # -------------------------------------------------------- summary helpers
+
+    def summary(self) -> Dict:
+        """
+        High-level summary dict — backward-compatible replacement for
+        ``CoverageModel.get_summary()``.
+
+        ``total_bins`` / ``covered_bins`` reflect ALL coverage items in the DB
+        (functional + code coverage).  When the database contains functional
+        coverage (CVGBIN), ``overall_coverage`` is derived from functional
+        bins only (to preserve UCIS semantics); otherwise it is derived from
+        all available items.
+        """
+        return self._cached('summary', self._compute_summary)
+
+    def _compute_summary(self) -> Dict:
+        fb = self.functional_bins()
+        cg_stats = self.covergroup_stats()
+        cp_stats = self.coverpoint_stats()
+
+        if fb.total > 0:
+            # Use functional coverage as the primary metric
+            return {
+                'overall_coverage': fb.coverage_pct,
+                'total_bins': fb.total,
+                'covered_bins': fb.covered,
+                'covergroups': len(cg_stats),
+                'coverpoints': len(cp_stats),
+            }
+        else:
+            # No functional coverage — aggregate code coverage items
+            total = 0
+            covered = 0
+            from ucis.cover_type_t import CoverTypeT
+            code_types = [
+                CoverTypeT.STMTBIN, CoverTypeT.BRANCHBIN, CoverTypeT.TOGGLEBIN,
+                CoverTypeT.EXPRBIN, CoverTypeT.CONDBIN, CoverTypeT.FSMBIN,
+                CoverTypeT.BLOCKBIN,
+            ]
+            for ct in code_types:
+                bs = self.bins_by_type(ct)
+                total += bs.total
+                covered += bs.covered
+            pct = (covered / total * 100.0) if total > 0 else 0.0
+            return {
+                'overall_coverage': pct,
+                'total_bins': total,
+                'covered_bins': covered,
+                'covergroups': 0,
+                'coverpoints': 0,
+            }
+
+    def database_info(self) -> Dict:
+        """
+        Database metadata — backward-compatible replacement for
+        ``CoverageModel.get_database_info()``.
+        """
+        return {
+            'path': getattr(self._db, 'db_path',
+                            getattr(self._db, '_db_path', '')),
+            'format': 'UCIS',
+            'test_count': len(self.tests()),
+        }
diff --git a/src/ucis/tui/app.py b/src/ucis/tui/app.py
index b0becaa..9a866b8 100644
--- a/src/ucis/tui/app.py
+++ b/src/ucis/tui/app.py
@@ -92,6 +92,7 @@ def _initialize_views(self):
         from ucis.tui.views.metrics_view import MetricsView
         from ucis.tui.views.code_coverage_view import CodeCoverageView
         from ucis.tui.views.test_history_view import TestHistoryView
+        from ucis.tui.views.testplan_view import TestplanView
         
         # Create views and register with controller
         views = {
@@ -102,6 +103,7 @@ def _initialize_views(self):
             "metrics": MetricsView(self),
             "code_coverage": CodeCoverageView(self),
             "test_history": TestHistoryView(self),
+            "testplan": TestplanView(self),
         }
         
         for name, view in views.items():
diff --git a/src/ucis/tui/components/help_overlay.py b/src/ucis/tui/components/help_overlay.py
index c55f66f..234912d 100644
--- a/src/ucis/tui/components/help_overlay.py
+++ b/src/ucis/tui/components/help_overlay.py
@@ -53,6 +53,7 @@ def _create_help_content(self):
         view_table.add_row("5", "Metrics - Statistics (coming soon)")
         view_table.add_row("6", "Code Coverage - File-level code coverage")
         view_table.add_row("7", "Test History - Test contribution analysis")
+        view_table.add_row("8", "Testplan - Closure status and stage gates")
         
         # Navigation
         nav_table = Table(show_header=True, header_style="bold yellow", box=None, padding=(0, 2))
@@ -78,6 +79,7 @@ def _create_help_content(self):
         view_specific_table.add_row("Hierarchy", "Navigate design structure (E=expand all, C=collapse all, /=search)")
         view_specific_table.add_row("Gaps", "Find uncovered bins, sort by coverage %")
         view_specific_table.add_row("Test History", "View tests (N/D/C/U=sort, F=filter)")
+        view_specific_table.add_row("Testplan", "Testpoint closure (↑↓=navigate, r=refresh)")
         
         # Color coding
         color_table = Table(show_header=True, header_style="bold yellow", box=None, padding=(0, 2))
diff --git a/src/ucis/tui/controller.py b/src/ucis/tui/controller.py
index 03f189e..401671b 100644
--- a/src/ucis/tui/controller.py
+++ b/src/ucis/tui/controller.py
@@ -47,6 +47,7 @@ def __init__(self, coverage_model, on_quit: Optional[Callable] = None):
             "metrics",
             "code_coverage",
             "test_history",
+            "testplan",
         ]
     
     def register_view(self, name: str, view):
@@ -187,6 +188,7 @@ def _handle_global_key(self, key: str) -> bool:
             '5': 'metrics',
             '6': 'code_coverage',
             '7': 'test_history',
+            '8': 'testplan',
         }
         
         if key in view_map:
diff --git a/src/ucis/tui/keybindings.py b/src/ucis/tui/keybindings.py
index 4e50b73..b8cb020 100644
--- a/src/ucis/tui/keybindings.py
+++ b/src/ucis/tui/keybindings.py
@@ -50,6 +50,7 @@ def handle_global_key(self, key: str) -> bool:
             '5': 'metrics',
             '6': 'code_coverage',
             '7': 'test_history',
+            '8': 'testplan',
         }
         
         if key in view_map:
diff --git a/src/ucis/tui/models/coverage_model.py b/src/ucis/tui/models/coverage_model.py
index 9da7aee..4f9e4fb 100644
--- a/src/ucis/tui/models/coverage_model.py
+++ b/src/ucis/tui/models/coverage_model.py
@@ -2,6 +2,8 @@
 Coverage data model wrapper.
 
 Provides a convenient interface to PyUCIS API with caching.
+All metric computation is delegated to ``CoverageMetrics`` — the single
+source of truth — to ensure consistency across TUI, CLI and report output.
 """
 from typing import Dict, Any, Optional, List, Set
 from ucis.rgy.format_rgy import FormatRgy
@@ -12,6 +14,8 @@
 class CoverageModel:
     """
     Wraps PyUCIS API with caching and convenience methods.
+
+    Delegates all metric computation to :class:`~ucis.report.coverage_metrics.CoverageMetrics`.
     """
     
     def __init__(self, db_path: str, input_format: Optional[str] = None):
@@ -26,6 +30,7 @@ def __init__(self, db_path: str, input_format: Optional[str] = None):
         self.db = None
         self._cache: Dict[str, Any] = {}
         self.test_filter: Optional[str] = None  # Current test filter
+        self._metrics = None
         self._load_database(input_format)
     
     def _load_database(self, input_format: Optional[str] = None):
@@ -46,6 +51,18 @@ def _load_database(self, input_format: Optional[str] = None):
         input_desc = rgy.getDatabaseDesc(input_format)
         input_if = input_desc.fmt_if()
         self.db = input_if.read(self.db_path)
+        self._metrics = None  # reset on reload
+
+    @property
+    def metrics(self):
+        """
+        Lazily-constructed :class:`~ucis.report.coverage_metrics.CoverageMetrics`
+        instance.  This is the canonical source of all coverage numbers.
+        """
+        if self._metrics is None and self.db is not None:
+            from ucis.report.coverage_metrics import CoverageMetrics
+            self._metrics = CoverageMetrics(self.db)
+        return self._metrics
     
     def get_summary(self) -> Dict[str, Any]:
         """
@@ -56,86 +73,13 @@ def get_summary(self) -> Dict[str, Any]:
         """
         if 'summary' in self._cache:
             return self._cache['summary']
-        
-        # Compute summary
-        summary = {
-            'overall_coverage': 0.0,
-            'total_bins': 0,
-            'covered_bins': 0,
-            'covergroups': 0,
-            'coverpoints': 0,
-            'by_type': {}
-        }
-        
-        # Fast path for SQLite backends
-        if self.db and hasattr(self.db, 'conn'):
-            from ucis.cover_type_t import CoverTypeT
-            from ucis.scope_type_t import ScopeTypeT
-            try:
-                conn = self.db.conn
-                summary['covergroups'] = conn.execute(
-                    "SELECT COUNT(*) FROM scopes WHERE (scope_type & ?) != 0",
-                    (int(ScopeTypeT.COVERGROUP),)
-                ).fetchone()[0]
-                summary['coverpoints'] = conn.execute(
-                    "SELECT COUNT(*) FROM scopes WHERE (scope_type & ?) != 0",
-                    (int(ScopeTypeT.COVERPOINT),)
-                ).fetchone()[0]
-                row = conn.execute(
-                    """SELECT COUNT(*),
-                              SUM(CASE WHEN cover_data > 0 THEN 1 ELSE 0 END)
-                       FROM coveritems
-                       WHERE (cover_type & ?) != 0""",
-                    (int(CoverTypeT.CVGBIN),)
-                ).fetchone()
-                summary['total_bins'] = row[0] or 0
-                summary['covered_bins'] = row[1] or 0
-                if summary['total_bins'] > 0:
-                    summary['overall_coverage'] = (summary['covered_bins'] / summary['total_bins']) * 100
-                self._cache['summary'] = summary
-                return summary
-            except Exception:
-                pass
 
-        # Walk through database to compute statistics (fallback)
-        def visit_scope(scope, depth=0):
-            from ucis.scope_type_t import ScopeTypeT
-            from ucis.cover_type_t import CoverTypeT
-            
-            scope_type = scope.getScopeType()
-            if scope_type in (ScopeTypeT.COVERGROUP,):
-                summary['covergroups'] += 1
-                # Covergroups have coverpoints
-                for cp in scope.scopes(ScopeTypeT.COVERPOINT):
-                    summary['coverpoints'] += 1
-                    # Coverpoints have bins
-                    try:
-                        for bin_idx in cp.coverItems(CoverTypeT.CVGBIN):
-                            summary['total_bins'] += 1
-                            cover_data = bin_idx.getCoverData()
-                            if cover_data:
-                                # Check if bin has been hit (data > 0 or data >= goal)
-                                if cover_data.data > 0:
-                                    summary['covered_bins'] += 1
-                    except Exception as e:
-                        pass
-            
-            # Visit children recursively
-            try:
-                for child in scope.scopes(ScopeTypeT.ALL):
-                    visit_scope(child, depth + 1)
-            except:
-                pass
-        
-        if self.db:
-            from ucis.scope_type_t import ScopeTypeT
-            for scope in self.db.scopes(ScopeTypeT.ALL):
-                visit_scope(scope)
-        
-        # Calculate percentage
-        if summary['total_bins'] > 0:
-            summary['overall_coverage'] = (summary['covered_bins'] / summary['total_bins']) * 100
-        
+        summary = {'overall_coverage': 0.0, 'total_bins': 0, 'covered_bins': 0,
+                   'covergroups': 0, 'coverpoints': 0, 'by_type': {}}
+
+        if self.metrics is not None:
+            summary.update(self.metrics.summary())
+
         self._cache['summary'] = summary
         return summary
     
@@ -146,27 +90,10 @@ def get_database_info(self) -> Dict[str, Any]:
         Returns:
             Dictionary with database information
         """
-        info = {
-            'path': self.db_path,
-            'format': 'UCIS',
-            'test_count': 0,
-        }
-        
-        # Get test data if available
-        if self.db:
-            try:
-                if hasattr(self.db, 'conn'):
-                    from ucis.history_node_kind import HistoryNodeKind
-                    info['test_count'] = self.db.conn.execute(
-                        "SELECT COUNT(*) FROM history_nodes WHERE history_kind = ?",
-                        (int(HistoryNodeKind.TEST),)
-                    ).fetchone()[0]
-                else:
-                    tests = self.get_all_tests()
-                    info['test_count'] = len(tests)
-            except:
-                pass
-        
+        info = {'path': self.db_path, 'format': 'UCIS', 'test_count': 0}
+        if self.metrics is not None:
+            m_info = self.metrics.database_info()
+            info['test_count'] = m_info.get('test_count', 0)
         return info
     
     def close(self):
@@ -182,56 +109,9 @@ def get_coverage_types(self) -> List[CoverTypeT]:
         Returns:
             List of CoverTypeT values found in the database
         """
-        if 'coverage_types' in self._cache:
-            return self._cache['coverage_types']
-
-        # Fast path for SQLite backend
-        if self.db and hasattr(self.db, 'conn'):
-            try:
-                rows = self.db.conn.execute(
-                    "SELECT DISTINCT cover_type FROM coveritems ORDER BY cover_type"
-                ).fetchall()
-                types_list = []
-                for r in rows:
-                    if r[0] is None:
-                        continue
-                    try:
-                        types_list.append(CoverTypeT(r[0]))
-                    except Exception:
-                        pass
-                self._cache['coverage_types'] = types_list
-                return types_list
-            except Exception:
-                pass
-        
-        types_found: Set[CoverTypeT] = set()
-        
-        def visit_scope(scope):
-            # Check all coverage item types in this scope
-            for cov_type in [CoverTypeT.CVGBIN, CoverTypeT.STMTBIN, CoverTypeT.BRANCHBIN, 
-                            CoverTypeT.TOGGLEBIN, CoverTypeT.EXPRBIN, CoverTypeT.CONDBIN,
-                            CoverTypeT.FSMBIN, CoverTypeT.BLOCKBIN]:
-                try:
-                    items = list(scope.coverItems(cov_type))
-                    if items:
-                        types_found.add(cov_type)
-                except:
-                    pass
-            
-            # Visit children recursively
-            try:
-                for child in scope.scopes(ScopeTypeT.ALL):
-                    visit_scope(child)
-            except:
-                pass
-        
-        if self.db:
-            for scope in self.db.scopes(ScopeTypeT.ALL):
-                visit_scope(scope)
-        
-        types_list = sorted(list(types_found), key=lambda t: int(t))
-        self._cache['coverage_types'] = types_list
-        return types_list
+        if self.metrics is not None:
+            return self.metrics.coverage_types_present()
+        return []
     
     def get_code_coverage_summary(self) -> Dict[str, Any]:
         """
@@ -242,78 +122,35 @@ def get_code_coverage_summary(self) -> Dict[str, Any]:
         """
         if 'code_coverage_summary' in self._cache:
             return self._cache['code_coverage_summary']
-        
+
         summary = {
-            'line': {'total': 0, 'covered': 0, 'coverage': 0.0},
-            'branch': {'total': 0, 'covered': 0, 'coverage': 0.0},
-            'toggle': {'total': 0, 'covered': 0, 'coverage': 0.0},
+            'line':       {'total': 0, 'covered': 0, 'coverage': 0.0},
+            'branch':     {'total': 0, 'covered': 0, 'coverage': 0.0},
+            'toggle':     {'total': 0, 'covered': 0, 'coverage': 0.0},
             'expression': {'total': 0, 'covered': 0, 'coverage': 0.0},
-            'condition': {'total': 0, 'covered': 0, 'coverage': 0.0},
-            'fsm': {'total': 0, 'covered': 0, 'coverage': 0.0},
-            'block': {'total': 0, 'covered': 0, 'coverage': 0.0},
-        }
-        
-        type_map = {
-            CoverTypeT.STMTBIN: 'line',
-            CoverTypeT.BRANCHBIN: 'branch',
-            CoverTypeT.TOGGLEBIN: 'toggle',
-            CoverTypeT.EXPRBIN: 'expression',
-            CoverTypeT.CONDBIN: 'condition',
-            CoverTypeT.FSMBIN: 'fsm',
-            CoverTypeT.BLOCKBIN: 'block',
+            'condition':  {'total': 0, 'covered': 0, 'coverage': 0.0},
+            'fsm':        {'total': 0, 'covered': 0, 'coverage': 0.0},
+            'block':      {'total': 0, 'covered': 0, 'coverage': 0.0},
         }
 
-        # Fast path for SQLite backend
-        if self.db and hasattr(self.db, 'conn'):
-            try:
-                rows = self.db.conn.execute(
-                    """SELECT cover_type,
-                              COUNT(*) AS total,
-                              SUM(CASE WHEN cover_data > 0 THEN 1 ELSE 0 END) AS covered
-                       FROM coveritems
-                       GROUP BY cover_type"""
-                ).fetchall()
-                int_type_map = {int(k): v for k, v in type_map.items()}
-                for row in rows:
-                    key = int_type_map.get(row[0])
-                    if key is not None:
-                        summary[key]['total'] = row[1] or 0
-                        summary[key]['covered'] = row[2] or 0
-                for key in summary:
-                    if summary[key]['total'] > 0:
-                        summary[key]['coverage'] = (summary[key]['covered'] / summary[key]['total']) * 100
-                self._cache['code_coverage_summary'] = summary
-                return summary
-            except Exception:
-                pass
-        
-        def visit_scope(scope):
-            for cov_type, key in type_map.items():
-                try:
-                    for item in scope.coverItems(cov_type):
-                        summary[key]['total'] += 1
-                        cover_data = item.getCoverData()
-                        if cover_data and cover_data.data > 0:
-                            summary[key]['covered'] += 1
-                except:
-                    pass
-            
-            # Visit children
-            try:
-                for child in scope.scopes(ScopeTypeT.ALL):
-                    visit_scope(child)
-            except:
-                pass
-        
-        if self.db:
-            for scope in self.db.scopes(ScopeTypeT.ALL):
-                visit_scope(scope)
-        
-        # Calculate percentages
-        for key in summary:
-            if summary[key]['total'] > 0:
-                summary[key]['coverage'] = (summary[key]['covered'] / summary[key]['total']) * 100
-        
+        if self.metrics is not None:
+            type_map = {
+                CoverTypeT.STMTBIN:   'line',
+                CoverTypeT.BRANCHBIN: 'branch',
+                CoverTypeT.TOGGLEBIN: 'toggle',
+                CoverTypeT.EXPRBIN:   'expression',
+                CoverTypeT.CONDBIN:   'condition',
+                CoverTypeT.FSMBIN:    'fsm',
+                CoverTypeT.BLOCKBIN:  'block',
+            }
+            by_type = self.metrics.code_coverage_by_type()
+            for ct, key in type_map.items():
+                bs = by_type.get(ct)
+                if bs:
+                    summary[key]['total']    = bs.total
+                    summary[key]['covered']  = bs.covered
+                    summary[key]['coverage'] = bs.coverage_pct
+
         self._cache['code_coverage_summary'] = summary
         return summary
     
@@ -328,90 +165,17 @@ def get_coverage_by_type(self, cov_type: CoverTypeT, filtered: bool = True) -> D
         Returns:
             Dictionary with total, covered, and percentage
         """
-        # Check if filtering is needed
-        filter_active = filtered and self.test_filter is not None
-        cache_key = f'coverage_type_{int(cov_type)}'
-        if filter_active:
-            cache_key += f'_filter_{self.test_filter}'
-        
+        test_filter = self.test_filter if filtered else None
+        cache_key = f'coverage_type_{int(cov_type)}_{test_filter}'
         if cache_key in self._cache:
             return self._cache[cache_key]
-        
-        # Get filtered coveritem IDs if needed
-        filtered_ids = None
-        if filter_active:
-            filtered_ids = self.get_coveritems_for_test(self.test_filter)
-        
-        result = {
-            'type': cov_type,
-            'total': 0,
-            'covered': 0,
-            'coverage': 0.0
-        }
 
-        # Fast path for SQLite backend
-        if self.db and hasattr(self.db, 'conn'):
-            try:
-                if filter_active:
-                    row = self.db.conn.execute(
-                        """SELECT COUNT(*),
-                                  SUM(CASE WHEN ci.cover_data > 0 THEN 1 ELSE 0 END)
-                           FROM coveritems ci
-                           JOIN coveritem_tests ct ON ct.cover_id = ci.cover_id
-                           JOIN history_nodes hn ON hn.history_id = ct.history_id
-                           WHERE (ci.cover_type & ?) != 0
-                             AND hn.logical_name = ?""",
-                        (int(cov_type), self.test_filter)
-                    ).fetchone()
-                else:
-                    row = self.db.conn.execute(
-                        """SELECT COUNT(*),
-                                  SUM(CASE WHEN cover_data > 0 THEN 1 ELSE 0 END)
-                           FROM coveritems
-                           WHERE (cover_type & ?) != 0""",
-                        (int(cov_type),)
-                    ).fetchone()
-                result['total'] = row[0] or 0
-                result['covered'] = row[1] or 0
-                if result['total'] > 0:
-                    result['coverage'] = (result['covered'] / result['total']) * 100
-                self._cache[cache_key] = result
-                return result
-            except Exception:
-                pass
-        
-        def visit_scope(scope):
-            try:
-                for item in scope.coverItems(cov_type):
-                    # If filtering, check if this item is in the filtered set
-                    if filter_active:
-                        # Get the coveritem ID
-                        item_id = item.cover_id if hasattr(item, 'cover_id') else item.getKey()
-                        if item_id not in filtered_ids:
-                            continue
-                    
-                    result['total'] += 1
-                    cover_data = item.getCoverData()
-                    if cover_data and cover_data.data > 0:
-                        result['covered'] += 1
-            except:
-                pass
-            
-            # Visit children
-            try:
-                for child in scope.scopes(ScopeTypeT.ALL):
-                    visit_scope(child)
-            except:
-                pass
-        
-        if self.db:
-            for scope in self.db.scopes(ScopeTypeT.ALL):
-                visit_scope(scope)
-        
-        # Calculate percentage
-        if result['total'] > 0:
-            result['coverage'] = (result['covered'] / result['total']) * 100
-        
+        result = {'type': cov_type, 'total': 0, 'covered': 0, 'coverage': 0.0}
+        if self.metrics is not None:
+            bs = self.metrics.bins_by_type(cov_type, test_filter=test_filter)
+            result['total']    = bs.total
+            result['covered']  = bs.covered
+            result['coverage'] = bs.coverage_pct
         self._cache[cache_key] = result
         return result
     
@@ -424,97 +188,17 @@ def get_all_tests(self) -> List[Dict[str, Any]]:
         """
         if 'all_tests' in self._cache:
             return self._cache['all_tests']
-        
+
         tests = []
-        
-        if not self.db:
-            return tests
-        
-        # Try to get test coverage API if available
-        try:
-            from ucis.sqlite.sqlite_test_coverage import SqliteTestCoverage
-            
-            # Check if this is a SQLite database with test coverage support
-            if hasattr(self.db, 'conn'):
-                api = SqliteTestCoverage(self.db)  # Pass the SqliteUCIS object, not just conn
-                
-                # Get all tests and their contributions
-                all_contribs = api.get_all_test_contributions()
-                
-                # Create test dictionary for each test
-                # all_contribs is a list of TestCoverageInfo objects
-                for contrib in all_contribs:
-                    test_info = {
-                        'name': contrib.test_name,
-                        'status': 'PASSED',  # Default, will try to get from history
-                        'date': 'Unknown',
-                        'total_items': contrib.total_items,
-                        'unique_items': contrib.unique_items,
-                    }
-                    
-                    # Try to get additional info from history node
-                    try:
-                        for history_node in self.db.historyNodes():
-                            if history_node.getLogicalName() == contrib.test_name:
-                                # Get status (UCIS_TESTSTATUS_OK = 1, anything else is failure)
-                                try:
-                                    from ucis import UCIS_TESTSTATUS_OK
-                                    status = history_node.getTestStatus()
-                                    if status == UCIS_TESTSTATUS_OK:
-                                        test_info['status'] = 'PASSED'
-                                    else:
-                                        test_info['status'] = 'FAILED'
-                                except:
-                                    pass
-                                
-                                # Get date
-                                try:
-                                    date = history_node.getDate()
-                                    if date:
-                                        test_info['date'] = date
-                                except:
-                                    pass
-                                
-                                break
-                    except:
-                        pass
-                    
-                    tests.append(test_info)
-        except:
-            # Fallback: just enumerate history nodes without contribution data
-            try:
-                from ucis.history_node_kind import HistoryNodeKind
-                from ucis import UCIS_TESTSTATUS_OK
-                
-                for history_node in self.db.historyNodes(HistoryNodeKind.TEST):
-                    test_info = {
-                        'name': history_node.getLogicalName() or 'Unknown',
-                        'status': 'UNKNOWN',
-                        'date': 'Unknown',
-                        'total_items': 0,
-                        'unique_items': 0,
-                    }
-                    
-                    try:
-                        status = history_node.getTestStatus()
-                        if status == UCIS_TESTSTATUS_OK:
-                            test_info['status'] = 'PASSED'
-                        else:
-                            test_info['status'] = 'FAILED'
-                    except:
-                        pass
-                    
-                    try:
-                        date = history_node.getDate()
-                        if date:
-                            test_info['date'] = date
-                    except:
-                        pass
-                    
-                    tests.append(test_info)
-            except:
-                pass
-        
+        if self.metrics is not None:
+            for ti in self.metrics.tests():
+                tests.append({
+                    'name':         ti.name,
+                    'status':       ti.status,
+                    'date':         ti.date,
+                    'total_items':  ti.total_items,
+                    'unique_items': ti.unique_items,
+                })
         self._cache['all_tests'] = tests
         return tests
     
@@ -526,10 +210,10 @@ def set_test_filter(self, test_name: Optional[str]):
             test_name: Name of test to filter by, or None to clear filter
         """
         self.test_filter = test_name
-        
-        # Clear relevant caches when filter changes
-        if 'code_coverage_summary' in self._cache:
-            del self._cache['code_coverage_summary']
+        # Invalidate all caches — metrics and coverage-type caches all depend on filter
+        self._cache.clear()
+        if self._metrics is not None:
+            self._metrics.invalidate()
     
     def get_test_filter(self) -> Optional[str]:
         """
@@ -582,3 +266,66 @@ def get_coveritems_for_test(self, test_name: str) -> Set[int]:
             import traceback
             traceback.print_exc()
             return set()
+
+    def get_testplan_closure(self) -> dict:
+        """Compute testplan closure using the embedded testplan (if any).
+
+        Returns a dict with keys ``results`` (list of row dicts) and
+        ``summary`` (ClosureSummary-derived dict), or empty when no testplan
+        is available.
+
+        Each row dict contains: testpoint, stage, status, pass_count,
+        fail_count, matched_tests, desc.
+        """
+        if 'testplan_closure' in self._cache:
+            return self._cache['testplan_closure']
+
+        result = {"results": [], "summary": None}
+        try:
+            from ucis.ncdb.testplan import get_testplan
+            from ucis.ncdb.testplan_closure import TPStatus, compute_closure
+            from ucis.ncdb.reports import report_testpoint_closure, _STATUS_LABEL
+
+            plan = get_testplan(self.db)
+            if plan is None:
+                self._cache['testplan_closure'] = result
+                return result
+
+            tp_results = compute_closure(plan, self.db)
+            summary = report_testplan_closure = report_testpoint_closure(tp_results)
+
+            rows = []
+            for r in tp_results:
+                rows.append({
+                    "testpoint":    r.testpoint.name,
+                    "stage":        r.testpoint.stage or "?",
+                    "status":       _STATUS_LABEL[r.status],
+                    "pass_count":   r.pass_count,
+                    "fail_count":   r.fail_count,
+                    "matched_tests": r.matched_tests,
+                    "desc":         r.testpoint.desc or "",
+                })
+
+            # Serialisable summary for the header
+            summary_dict = {
+                "total":        summary.total,
+                "total_closed": summary.total_closed,
+                "total_na":     summary.total_na,
+                "by_stage":     summary.by_stage,
+            }
+            result = {"results": rows, "summary": summary_dict}
+        except Exception:
+            pass
+
+        self._cache['testplan_closure'] = result
+        return result
+
+    def get_v2_test_stats(self, test_name: str):
+        """Return v2 TestStatsEntry for *test_name* when db is NcdbUCIS.
+
+        Returns None when v2 history is unavailable or the test is unknown.
+        """
+        try:
+            return self.db.get_test_stats(test_name)
+        except Exception:
+            return None
diff --git a/src/ucis/tui/views/gaps_view.py b/src/ucis/tui/views/gaps_view.py
index b443753..e75374b 100644
--- a/src/ucis/tui/views/gaps_view.py
+++ b/src/ucis/tui/views/gaps_view.py
@@ -48,56 +48,24 @@ def on_enter(self):
             self._collect_gaps()
     
     def _collect_gaps(self):
-        """Collect all gaps from the database."""
-        from ucis.scope_type_t import ScopeTypeT
-        
+        """Collect all gaps from the database via the common metrics layer."""
         self.gaps = []
-        
-        def visit_scope(scope, path=""):
-            scope_type = scope.getScopeType()
-            scope_name = scope.getScopeName()
-            current_path = f"{path}/{scope_name}" if path else scope_name
-            
-            # Check coverpoints for gaps
-            if scope_type == ScopeTypeT.COVERPOINT:
-                total_bins = 0
-                covered_bins = 0
-                
-                try:
-                    for bin_idx in scope.coverItems(CoverTypeT.CVGBIN):
-                        total_bins += 1
-                        cover_data = bin_idx.getCoverData()
-                        if cover_data and cover_data.data > 0:
-                            covered_bins += 1
-                except:
-                    pass
-                
-                if total_bins > 0:
-                    coverage = (covered_bins / total_bins) * 100
-                    if coverage < self.threshold:
-                        gap = GapItem(
-                            name=scope_name,
-                            scope_type="Coverpoint",
-                            coverage=coverage,
-                            hits=covered_bins,
-                            goal=total_bins,
-                            path=current_path
-                        )
-                        self.gaps.append(gap)
-            
-            # Recurse into children
-            try:
-                for child in scope.scopes(ScopeTypeT.ALL):
-                    visit_scope(child, current_path)
-            except:
-                pass
-        
+
         try:
-            for scope in self.model.db.scopes(ScopeTypeT.ALL):
-                visit_scope(scope)
-        except:
+            cp_stats = self.model.metrics.coverpoint_stats()
+            for cp in cp_stats:
+                if cp.coverage_pct < self.threshold:
+                    self.gaps.append(GapItem(
+                        name=cp.name,
+                        scope_type="Coverpoint",
+                        coverage=cp.coverage_pct,
+                        hits=cp.bins.covered,
+                        goal=cp.bins.total,
+                        path=cp.path,
+                    ))
+        except Exception:
             pass
-        
+
         # Sort by coverage (lowest first)
         self.gaps.sort(key=lambda g: g.coverage)
     
diff --git a/src/ucis/tui/views/test_history_view.py b/src/ucis/tui/views/test_history_view.py
index 408d1fb..c88851a 100644
--- a/src/ucis/tui/views/test_history_view.py
+++ b/src/ucis/tui/views/test_history_view.py
@@ -194,6 +194,22 @@ def _render_test_details(self) -> Text:
         if total > 0:
             unique_pct = (unique / total) * 100
             details.append(f"  Unique %: {unique_pct:.1f}%\n")
+
+        # v2 history stats (NcdbUCIS only — fails gracefully)
+        try:
+            v2stats = self.model.get_v2_test_stats(test.get('name', ''))
+            if v2stats is not None and v2stats.total_runs > 0:
+                details.append("\nV2 History:\n", style="bold")
+                details.append(f"  Total runs: {v2stats.total_runs}\n")
+                details.append(f"  Pass/Fail:  {v2stats.pass_count}/{v2stats.fail_count}\n")
+                flake = v2stats.flake_score
+                flake_style = "red" if flake >= 0.3 else ("yellow" if flake > 0.1 else "green")
+                details.append(f"  Flake score: ", style="bold")
+                details.append(f"{flake:.3f}\n", style=flake_style)
+                if v2stats.mean_cpu_time > 0:
+                    details.append(f"  Mean CPU:    {v2stats.mean_cpu_time:.1f}s\n")
+        except Exception:
+            pass
         
         details.append("\n")
         
diff --git a/src/ucis/tui/views/testplan_view.py b/src/ucis/tui/views/testplan_view.py
new file mode 100644
index 0000000..56ed47f
--- /dev/null
+++ b/src/ucis/tui/views/testplan_view.py
@@ -0,0 +1,235 @@
+"""Testplan Closure View — TUI view showing testpoint closure status.
+
+Shows all testpoints with their stage, closure status, and pass/fail
+counts.  Includes a stage gate summary header and supports scrolling,
+sorting, and a detail panel for the selected testpoint.
+"""
+
+from rich.align import Align
+from rich.layout import Layout
+from rich.panel import Panel
+from rich.table import Table
+from rich.text import Text
+
+from ucis.tui.views.base_view import BaseView
+
+
+_STATUS_STYLE = {
+    "CLOSED":    "green",
+    "PARTIAL":   "yellow",
+    "FAILING":   "red",
+    "NOT_RUN":   "dim",
+    "N/A":       "dim",
+    "UNIMP":     "dim",
+}
+
+_STATUS_ICON = {
+    "CLOSED":  "✓",
+    "PARTIAL": "~",
+    "FAILING": "✗",
+    "NOT_RUN": "?",
+    "N/A":     "—",
+    "UNIMP":   "-",
+}
+
+
+class TestplanView(BaseView):
+    """TUI view for testplan closure status (key '8')."""
+
+    def __init__(self, app):
+        super().__init__(app)
+        self.results = []
+        self.summary = None
+        self.selected_index = 0
+        self.scroll_offset = 0
+        self.visible_rows = 20
+        self._loaded = False
+
+    def on_enter(self):
+        super().on_enter()
+        if not self._loaded:
+            self._load_closure()
+
+    def _load_closure(self):
+        """Load testplan closure results from the model."""
+        self.results = []
+        self.summary = None
+        try:
+            data = self.model.get_testplan_closure()
+            self.results = data.get("results", [])
+            self.summary = data.get("summary", None)
+        except Exception:
+            pass
+        self._loaded = True
+        self.selected_index = 0
+        self.scroll_offset = 0
+
+    def _adjust_scroll(self):
+        if self.selected_index < self.scroll_offset:
+            self.scroll_offset = self.selected_index
+        elif self.selected_index >= self.scroll_offset + self.visible_rows:
+            self.scroll_offset = self.selected_index - self.visible_rows + 1
+
+    # ------------------------------------------------------------------
+    # Rendering
+    # ------------------------------------------------------------------
+
+    def render(self):
+        layout = Layout()
+
+        if not self.results:
+            return Panel(
+                Align.center(
+                    Text(
+                        "No testplan found.\n\n"
+                        "Embed one with:\n"
+                        "  pyucis testplan import coverage.cdb uart.hjson",
+                        style="dim",
+                    ),
+                    vertical="middle",
+                ),
+                title="[bold]Testplan Closure[/bold]",
+            )
+
+        has_summary = self.summary is not None
+        header_size = 7 if has_summary else 3
+        layout.split_column(
+            Layout(name="header", size=header_size),
+            Layout(name="body", ratio=1),
+            Layout(name="detail", size=8),
+        )
+
+        layout["header"].update(self._render_header())
+        layout["body"].update(self._render_table())
+        layout["detail"].update(self._render_detail())
+        return layout
+
+    def _render_header(self):
+        lines = []
+        if self.summary is not None:
+            total = self.summary.get("total", 0)
+            closed = self.summary.get("total_closed", 0)
+            na = self.summary.get("total_na", 0)
+            pct = round(100.0 * closed / (total - na), 1) if (total - na) > 0 else 0.0
+            lines.append(
+                f"[bold]Testplan Closure[/bold]  "
+                f"{closed}/{total - na} testpoints closed ({pct:.1f}%)  "
+                f"[dim]{na} N/A[/dim]"
+            )
+            by_stage = self.summary.get("by_stage", {})
+            stage_parts = []
+            for stage, entry in sorted(by_stage.items()):
+                c, t = entry.get("closed", 0), entry.get("total", 0)
+                p = entry.get("pct", 0.0)
+                colour = "green" if p >= 100 else ("yellow" if p > 0 else "red")
+                stage_parts.append(f"[{colour}]{stage}: {c}/{t}[/{colour}]")
+            lines.append("  " + "  |  ".join(stage_parts))
+        else:
+            lines.append("[bold]Testplan Closure[/bold]")
+        lines.append(
+            "[dim]↑↓ navigate  r refresh  q back[/dim]"
+        )
+        return Panel("\n".join(lines), style="bold")
+
+    def _render_table(self):
+        table = Table(
+            show_header=True,
+            header_style="bold cyan",
+            expand=True,
+            show_lines=False,
+        )
+        table.add_column("", width=2, no_wrap=True)
+        table.add_column("Testpoint", ratio=3, no_wrap=True)
+        table.add_column("Stage", width=6, no_wrap=True)
+        table.add_column("Status", width=12, no_wrap=True)
+        table.add_column("Pass", width=6, justify="right", no_wrap=True)
+        table.add_column("Fail", width=6, justify="right", no_wrap=True)
+
+        visible_end = min(
+            self.scroll_offset + self.visible_rows, len(self.results)
+        )
+        for i in range(self.scroll_offset, visible_end):
+            r = self.results[i]
+            is_sel = i == self.selected_index
+            sel_marker = "▶" if is_sel else " "
+            tp_name = r.get("testpoint", "?")
+            stage = r.get("stage", "?")
+            status = r.get("status", "NOT_RUN")
+            pc = str(r.get("pass_count", 0))
+            fc = str(r.get("fail_count", 0))
+
+            icon = _STATUS_ICON.get(status, "?")
+            style = _STATUS_STYLE.get(status, "")
+            if is_sel:
+                style = "reverse " + style
+
+            table.add_row(
+                sel_marker,
+                tp_name,
+                stage,
+                f"{icon} {status}",
+                pc,
+                fc,
+                style=style if not is_sel else None,
+            )
+
+        if len(self.results) > self.visible_rows:
+            shown_end = min(self.scroll_offset + self.visible_rows, len(self.results))
+            scroll_info = (
+                f"[dim] {shown_end}/{len(self.results)} shown[/dim]"
+            )
+            return Panel(table, subtitle=scroll_info)
+        return Panel(table)
+
+    def _render_detail(self):
+        if not self.results or self.selected_index >= len(self.results):
+            return Panel(Text("No testpoint selected", style="dim"), title="Detail")
+
+        r = self.results[self.selected_index]
+        tp_name = r.get("testpoint", "?")
+        stage   = r.get("stage", "?")
+        status  = r.get("status", "NOT_RUN")
+        tests   = r.get("matched_tests", [])
+        desc    = r.get("desc", "")
+
+        lines = [
+            f"[bold]{tp_name}[/bold]  [{stage}]  {_STATUS_ICON.get(status, '?')} {status}",
+        ]
+        if desc:
+            lines.append(f"[dim]{desc}[/dim]")
+        if tests:
+            lines.append("Tests: " + ", ".join(tests[:6])
+                         + ("…" if len(tests) > 6 else ""))
+        else:
+            lines.append("[dim]No tests matched[/dim]")
+
+        return Panel("\n".join(lines), title="[dim]Detail[/dim]")
+
+    # ------------------------------------------------------------------
+    # Key handling
+    # ------------------------------------------------------------------
+
+    def handle_key(self, key: str) -> bool:
+        if key in ("up", "k") and self.results:
+            if self.selected_index > 0:
+                self.selected_index -= 1
+                self._adjust_scroll()
+            return True
+        if key in ("down", "j") and self.results:
+            if self.selected_index < len(self.results) - 1:
+                self.selected_index += 1
+                self._adjust_scroll()
+            return True
+        if key in ("home",):
+            self.selected_index = 0
+            self.scroll_offset = 0
+            return True
+        if key in ("end",):
+            self.selected_index = max(0, len(self.results) - 1)
+            self._adjust_scroll()
+            return True
+        if key in ("r", "R"):
+            self._loaded = False
+            self._load_closure()
+            return True
+        return False
diff --git a/tests/integration/test_ci_export.py b/tests/integration/test_ci_export.py
new file mode 100644
index 0000000..127b81c
--- /dev/null
+++ b/tests/integration/test_ci_export.py
@@ -0,0 +1,312 @@
+"""Integration tests for Phase 3: reports, exports, and CI helpers."""
+
+from __future__ import annotations
+
+import io
+import json
+import os
+import shutil
+import tempfile
+from xml.etree import ElementTree as ET
+
+import pytest
+
+from ucis.ncdb.constants import HIST_STATUS_FAIL, HIST_STATUS_OK
+from ucis.ncdb.ncdb_ucis import NcdbUCIS
+from ucis.ncdb.ncdb_writer import NcdbWriter
+from ucis.ncdb.testplan import CovergroupEntry, Testplan, Testpoint
+from ucis.ncdb.testplan_closure import TPStatus, compute_closure
+from ucis.ncdb.testplan_export import (
+    export_github_annotations,
+    export_junit_xml,
+    export_summary_markdown,
+)
+from ucis.ncdb.reports import (
+    report_testpoint_closure,
+    report_stage_gate,
+    report_regression_delta,
+    format_testpoint_closure,
+    format_stage_gate,
+    format_regression_delta,
+)
+
+
+# ── helpers ───────────────────────────────────────────────────────────────────
+
+@pytest.fixture()
+def tmpdir_path():
+    d = tempfile.mkdtemp()
+    yield d
+    shutil.rmtree(d, ignore_errors=True)
+
+
+def _make_db(path: str, runs: list) -> NcdbUCIS:
+    """Create an NcdbUCIS at *path* with v2 history from *runs*.
+
+    Each element of *runs* is (test_name, status) where status is
+    HIST_STATUS_OK or HIST_STATUS_FAIL.
+    """
+    from ucis.mem.mem_ucis import MemUCIS
+    NcdbWriter().write(MemUCIS(), path)
+    db = NcdbUCIS(path)
+    import time
+    ts = int(time.time()) - len(runs) * 60
+    for name, status in runs:
+        db.add_test_run(
+            name=name,
+            seed=1,
+            status=status,
+            ts=ts,
+        )
+        ts += 60
+    return db
+
+
+def _make_plan() -> Testplan:
+    plan = Testplan(source_file="uart.hjson")
+    plan.add_testpoint(
+        Testpoint(name="uart_reset", stage="V1", tests=["uart_smoke"])
+    )
+    plan.add_testpoint(
+        Testpoint(name="uart_loopback", stage="V2", tests=["uart_loopback"])
+    )
+    plan.add_testpoint(
+        Testpoint(name="uart_na", stage="V2", na=True)
+    )
+    plan.covergroups.append(CovergroupEntry(name="cg_reset"))
+    return plan
+
+
+def _save_and_reopen(db: NcdbUCIS, path: str) -> NcdbUCIS:
+    tmp = path + ".tmp"
+    NcdbWriter().write(db, tmp)
+    os.replace(tmp, path)
+    return NcdbUCIS(path)
+
+
+# ── JUnit XML export ──────────────────────────────────────────────────────────
+
+class TestExportJunitXml:
+    def test_creates_valid_junit_xml(self, tmpdir_path):
+        path = os.path.join(tmpdir_path, "cov.cdb")
+        db = _make_db(path, [
+            ("uart_smoke", HIST_STATUS_OK),
+            ("uart_loopback", HIST_STATUS_FAIL),
+        ])
+        plan = _make_plan()
+        results = compute_closure(plan, db)
+
+        xml_path = os.path.join(tmpdir_path, "results.xml")
+        export_junit_xml(results, xml_path)
+
+        assert os.path.exists(xml_path)
+        tree = ET.parse(xml_path)
+        root = tree.getroot()
+        assert root.tag == "testsuite"
+        cases = root.findall("testcase")
+        assert len(cases) == len(results)
+
+    def test_closed_testpoint_has_no_failure(self, tmpdir_path):
+        path = os.path.join(tmpdir_path, "cov.cdb")
+        db = _make_db(path, [("uart_smoke", HIST_STATUS_OK)])
+        plan = _make_plan()
+        results = compute_closure(plan, db)
+
+        xml_path = os.path.join(tmpdir_path, "results.xml")
+        export_junit_xml(results, xml_path)
+
+        tree = ET.parse(xml_path)
+        reset_tc = next(
+            tc for tc in tree.findall(".//testcase")
+            if tc.attrib["name"] == "uart_reset"
+        )
+        assert reset_tc.find("failure") is None
+
+    def test_failing_testpoint_has_failure_element(self, tmpdir_path):
+        path = os.path.join(tmpdir_path, "cov.cdb")
+        db = _make_db(path, [("uart_loopback", HIST_STATUS_FAIL)])
+        plan = _make_plan()
+        results = compute_closure(plan, db)
+
+        xml_path = os.path.join(tmpdir_path, "results.xml")
+        export_junit_xml(results, xml_path)
+
+        tree = ET.parse(xml_path)
+        loop_tc = next(
+            tc for tc in tree.findall(".//testcase")
+            if tc.attrib["name"] == "uart_loopback"
+        )
+        assert loop_tc.find("failure") is not None
+
+    def test_testpoint_names_appear_as_testcases(self, tmpdir_path):
+        path = os.path.join(tmpdir_path, "cov.cdb")
+        db = _make_db(path, [("uart_smoke", HIST_STATUS_OK)])
+        plan = _make_plan()
+        results = compute_closure(plan, db)
+
+        xml_path = os.path.join(tmpdir_path, "results.xml")
+        export_junit_xml(results, xml_path)
+
+        tree = ET.parse(xml_path)
+        names = {tc.attrib["name"] for tc in tree.findall(".//testcase")}
+        assert "uart_reset" in names
+        assert "uart_loopback" in names
+        assert "uart_na" in names
+
+
+# ── GitHub Annotations export ─────────────────────────────────────────────────
+
+class TestExportGithubAnnotations:
+    def test_error_lines_for_failing(self, tmpdir_path):
+        path = os.path.join(tmpdir_path, "cov.cdb")
+        db = _make_db(path, [("uart_loopback", HIST_STATUS_FAIL)])
+        plan = _make_plan()
+        results = compute_closure(plan, db)
+
+        buf = io.StringIO()
+        export_github_annotations(results, output=buf)
+        lines = buf.getvalue().splitlines()
+
+        error_lines = [l for l in lines if l.startswith("::error")]
+        assert len(error_lines) >= 1
+        assert any("uart_loopback" in l for l in error_lines)
+
+    def test_warning_for_not_run_testpoint(self, tmpdir_path):
+        path = os.path.join(tmpdir_path, "cov.cdb")
+        # No runs at all → uart_loopback NOT_RUN → warning
+        db = _make_db(path, [])
+        plan = _make_plan()
+        plan2 = Testplan(source_file="test.hjson")
+        plan2.add_testpoint(Testpoint(name="tp_not_run", stage="V1",
+                                       tests=["tp_not_run"]))
+        results = compute_closure(plan2, db)
+
+        buf = io.StringIO()
+        export_github_annotations(results, output=buf)
+        lines = buf.getvalue().splitlines()
+
+        warning_lines = [l for l in lines if l.startswith("::warning")]
+        assert len(warning_lines) >= 1
+
+    def test_no_output_for_closed(self, tmpdir_path):
+        path = os.path.join(tmpdir_path, "cov.cdb")
+        db = _make_db(path, [("uart_smoke", HIST_STATUS_OK)])
+        plan = Testplan(source_file="test.hjson")
+        plan.add_testpoint(
+            Testpoint(name="uart_reset", stage="V1", tests=["uart_smoke"])
+        )
+        results = compute_closure(plan, db)
+
+        buf = io.StringIO()
+        export_github_annotations(results, output=buf)
+        text = buf.getvalue()
+        assert text.strip() == ""
+
+
+# ── Markdown summary ──────────────────────────────────────────────────────────
+
+class TestExportSummaryMarkdown:
+    def test_returns_valid_markdown(self, tmpdir_path):
+        path = os.path.join(tmpdir_path, "cov.cdb")
+        db = _make_db(path, [
+            ("uart_smoke", HIST_STATUS_OK),
+            ("uart_loopback", HIST_STATUS_FAIL),
+        ])
+        plan = _make_plan()
+        results = compute_closure(plan, db)
+
+        md = export_summary_markdown(results)
+        assert "## Testplan Closure Report" in md
+        assert "uart_reset" in md
+
+    def test_stage_gate_in_markdown(self, tmpdir_path):
+        path = os.path.join(tmpdir_path, "cov.cdb")
+        db = _make_db(path, [("uart_smoke", HIST_STATUS_OK)])
+        plan = _make_plan()
+        results = compute_closure(plan, db)
+
+        gate = report_stage_gate(results, "V1", plan)
+        md = export_summary_markdown(results, stage_gate=gate)
+        assert "Stage gate" in md
+        assert "V1" in md
+
+
+# ── Structured reports end-to-end ────────────────────────────────────────────
+
+class TestReportsEndToEnd:
+    def test_closure_report_all_closed(self, tmpdir_path):
+        path = os.path.join(tmpdir_path, "cov.cdb")
+        db = _make_db(path, [
+            ("uart_smoke", HIST_STATUS_OK),
+            ("uart_loopback", HIST_STATUS_OK),
+        ])
+        plan = _make_plan()
+        results = compute_closure(plan, db)
+        summary = report_testpoint_closure(results)
+
+        assert summary.total == 3
+        assert summary.total_na == 1
+        # uart_reset (closed) + uart_loopback (closed) + uart_na (N/A)
+        assert summary.total_closed == 2
+
+    def test_stage_gate_v1_passes_when_closed(self, tmpdir_path):
+        path = os.path.join(tmpdir_path, "cov.cdb")
+        db = _make_db(path, [("uart_smoke", HIST_STATUS_OK)])
+        plan = _make_plan()
+        results = compute_closure(plan, db)
+
+        gate = report_stage_gate(results, "V1", plan)
+        assert gate.passed is True
+        text = format_stage_gate(gate)
+        assert "PASS" in text
+
+    def test_stage_gate_v2_fails_when_loopback_not_run(self, tmpdir_path):
+        path = os.path.join(tmpdir_path, "cov.cdb")
+        # Only uart_smoke runs, no uart_loopback
+        db = _make_db(path, [("uart_smoke", HIST_STATUS_OK)])
+        plan = _make_plan()
+        results = compute_closure(plan, db)
+
+        gate = report_stage_gate(results, "V2", plan)
+        assert gate.passed is False
+        blocking_names = [r.testpoint.name for r in gate.blocking]
+        assert "uart_loopback" in blocking_names
+
+    def test_regression_delta_detects_newly_closed(self, tmpdir_path):
+        path_old = os.path.join(tmpdir_path, "old.cdb")
+        path_new = os.path.join(tmpdir_path, "new.cdb")
+        plan = _make_plan()
+
+        db_old = _make_db(path_old, [])  # nothing passes
+        db_new = _make_db(path_new, [
+            ("uart_smoke", HIST_STATUS_OK),
+        ])
+
+        results_old = compute_closure(plan, db_old)
+        results_new = compute_closure(plan, db_new)
+        delta = report_regression_delta(results_new, results_old)
+
+        newly_closed_names = [r.testpoint.name for r in delta.newly_closed]
+        assert "uart_reset" in newly_closed_names
+
+    def test_closure_to_json_roundtrip(self, tmpdir_path):
+        path = os.path.join(tmpdir_path, "cov.cdb")
+        db = _make_db(path, [("uart_smoke", HIST_STATUS_OK)])
+        plan = _make_plan()
+        results = compute_closure(plan, db)
+        summary = report_testpoint_closure(results)
+
+        d = json.loads(summary.to_json())
+        assert d["total"] == 3
+        assert any(r["name"] == "uart_reset" for r in d["testpoints"])
+
+    def test_format_closure_text_output(self, tmpdir_path):
+        path = os.path.join(tmpdir_path, "cov.cdb")
+        db = _make_db(path, [("uart_smoke", HIST_STATUS_OK)])
+        plan = _make_plan()
+        results = compute_closure(plan, db)
+        summary = report_testpoint_closure(results)
+
+        text = format_testpoint_closure(summary)
+        assert "uart_reset" in text
+        assert "CLOSED" in text
diff --git a/tests/integration/test_history_workflow.py b/tests/integration/test_history_workflow.py
new file mode 100644
index 0000000..06b4394
--- /dev/null
+++ b/tests/integration/test_history_workflow.py
@@ -0,0 +1,267 @@
+"""Integration tests for Phase 1 binary test history.
+
+These tests exercise the full stack: NcdbUCIS API → NcdbWriter → NcdbReader
+→ NcdbMerger, using temporary .cdb files on disk.
+"""
+from __future__ import annotations
+
+import os
+import shutil
+import tempfile
+
+import pytest
+
+from ucis.mem.mem_ucis import MemUCIS
+from ucis.ncdb.constants import HIST_STATUS_FAIL, HIST_STATUS_OK
+from ucis.ncdb.ncdb_merger import NcdbMerger
+from ucis.ncdb.ncdb_reader import NcdbReader
+from ucis.ncdb.ncdb_ucis import NcdbUCIS
+from ucis.ncdb.ncdb_writer import NcdbWriter
+
+
+# ── helpers ──────────────────────────────────────────────────────────────────
+
+def _write_v2_cdb(path: str, test_runs: list) -> None:
+    """Create a v2 .cdb with the supplied test runs."""
+    NcdbWriter().write(MemUCIS(), path)
+    db = NcdbUCIS(path)
+    for name, seed, status, ts in test_runs:
+        db.add_test_run(
+            name, seed=seed, status=status, ts=ts,
+            has_coverage=(status == HIST_STATUS_OK),
+        )
+    tmp = path + ".tmp"
+    NcdbWriter().write(db, tmp)
+    os.replace(tmp, path)
+
+
+@pytest.fixture()
+def tmpdir_path():
+    d = tempfile.mkdtemp()
+    yield d
+    shutil.rmtree(d, ignore_errors=True)
+
+
+# ── tests ─────────────────────────────────────────────────────────────────────
+
+class TestAddTestRunUpdatesStats:
+    """add_test_run() must update test_stats immediately."""
+
+    def test_single_pass_creates_entry(self, tmpdir_path):
+        path = os.path.join(tmpdir_path, "a.cdb")
+        NcdbWriter().write(MemUCIS(), path)
+        db = NcdbUCIS(path)
+        db.add_test_run("my_test", seed="1", status=HIST_STATUS_OK,
+                        ts=1700000000, has_coverage=True)
+        entry = db.get_test_stats("my_test")
+        assert entry is not None
+        assert entry.total_runs == 1
+        assert entry.pass_count == 1
+        assert entry.fail_count == 0
+
+    def test_pass_and_fail_accumulate(self, tmpdir_path):
+        path = os.path.join(tmpdir_path, "a.cdb")
+        NcdbWriter().write(MemUCIS(), path)
+        db = NcdbUCIS(path)
+        for i, st in enumerate([HIST_STATUS_OK, HIST_STATUS_FAIL,
+                                 HIST_STATUS_OK, HIST_STATUS_OK]):
+            db.add_test_run("my_test", seed=str(i), status=st,
+                            ts=1700000000 + i * 3600, has_coverage=(st == HIST_STATUS_OK))
+        entry = db.get_test_stats("my_test")
+        assert entry.total_runs == 4
+        assert entry.pass_count == 3
+        assert entry.fail_count == 1
+
+    def test_unknown_test_returns_none(self, tmpdir_path):
+        path = os.path.join(tmpdir_path, "a.cdb")
+        NcdbWriter().write(MemUCIS(), path)
+        db = NcdbUCIS(path)
+        assert db.get_test_stats("nonexistent") is None
+
+    def test_run_id_monotonically_increments(self, tmpdir_path):
+        path = os.path.join(tmpdir_path, "a.cdb")
+        NcdbWriter().write(MemUCIS(), path)
+        db = NcdbUCIS(path)
+        for i in range(5):
+            db.add_test_run("t", seed=str(i), status=HIST_STATUS_OK,
+                            ts=1700000000 + i)
+        assert db._test_registry.next_run_id == 5
+
+
+class TestQueryTestHistoryRange:
+    """query_test_history() must filter by time range and return correct records."""
+
+    def _build_db(self, path):
+        NcdbWriter().write(MemUCIS(), path)
+        db = NcdbUCIS(path)
+        # Two tests, multiple runs spread over a day
+        for i in range(6):
+            db.add_test_run("alpha", seed=str(i),
+                            status=HIST_STATUS_OK if i % 3 else HIST_STATUS_FAIL,
+                            ts=1700000000 + i * 3600,
+                            has_coverage=True)
+        for i in range(3):
+            db.add_test_run("beta", seed=str(i), status=HIST_STATUS_OK,
+                            ts=1700000000 + i * 7200, has_coverage=True)
+        return db
+
+    def test_all_records_returned_without_filter(self, tmpdir_path):
+        path = os.path.join(tmpdir_path, "a.cdb")
+        db = self._build_db(path)
+        recs = db.query_test_history("alpha")
+        assert len(recs) == 6
+
+    def test_time_range_filter_lower_bound(self, tmpdir_path):
+        path = os.path.join(tmpdir_path, "a.cdb")
+        db = self._build_db(path)
+        # Request only the last 3 records (ts >= 1700000000 + 3*3600)
+        ts_start = 1700000000 + 3 * 3600
+        recs = db.query_test_history("alpha", ts_from=ts_start)
+        assert all(r.ts >= ts_start for r in recs)
+        assert len(recs) == 3
+
+    def test_time_range_filter_upper_bound(self, tmpdir_path):
+        path = os.path.join(tmpdir_path, "a.cdb")
+        db = self._build_db(path)
+        ts_end = 1700000000 + 2 * 3600 + 1
+        recs = db.query_test_history("alpha", ts_to=ts_end)
+        assert all(r.ts <= ts_end for r in recs)
+        assert len(recs) == 3
+
+    def test_nonexistent_name_returns_empty(self, tmpdir_path):
+        path = os.path.join(tmpdir_path, "a.cdb")
+        db = self._build_db(path)
+        assert db.query_test_history("no_such_test") == []
+
+    def test_separate_test_independent(self, tmpdir_path):
+        path = os.path.join(tmpdir_path, "a.cdb")
+        db = self._build_db(path)
+        recs = db.query_test_history("beta")
+        assert len(recs) == 3
+
+
+class TestRoundTripV2Cdb:
+    """Write a v2 .cdb to disk, read it back, confirm state is preserved."""
+
+    def test_stats_survive_roundtrip(self, tmpdir_path):
+        path = os.path.join(tmpdir_path, "rt.cdb")
+        _write_v2_cdb(path, [
+            ("foo", "1", HIST_STATUS_OK,   1700000000),
+            ("foo", "2", HIST_STATUS_FAIL, 1700003600),
+            ("bar", "1", HIST_STATUS_OK,   1700000100),
+        ])
+        db = NcdbReader().read(path)
+        assert db._test_registry.num_names == 2
+        foo_id = db._test_registry._name_to_id["foo"]
+        foo_stats = db._test_stats.get(foo_id)
+        assert foo_stats.total_runs == 2
+        assert foo_stats.fail_count == 1
+        bar_id = db._test_registry._name_to_id["bar"]
+        bar_stats = db._test_stats.get(bar_id)
+        assert bar_stats.total_runs == 1
+
+    def test_bucket_data_survives_roundtrip(self, tmpdir_path):
+        path = os.path.join(tmpdir_path, "rt.cdb")
+        _write_v2_cdb(path, [
+            ("my_test", str(i), HIST_STATUS_OK, 1700000000 + i * 60)
+            for i in range(10)
+        ])
+        db = NcdbReader().read(path)
+        assert len(db._sealed_buckets) >= 1
+
+    def test_manifest_history_format_is_v2(self, tmpdir_path):
+        import zipfile, json
+        path = os.path.join(tmpdir_path, "rt.cdb")
+        _write_v2_cdb(path, [("t", "1", HIST_STATUS_OK, 1700000000)])
+        with zipfile.ZipFile(path, "r") as zf:
+            manifest_data = zf.read("manifest.json")
+        manifest = json.loads(manifest_data)
+        assert manifest.get("history_format") == "v2"
+
+    def test_query_history_after_roundtrip(self, tmpdir_path):
+        path = os.path.join(tmpdir_path, "rt.cdb")
+        _write_v2_cdb(path, [
+            ("my_test", str(i), HIST_STATUS_OK if i % 2 == 0 else HIST_STATUS_FAIL,
+             1700000000 + i * 3600)
+            for i in range(8)
+        ])
+        db2 = NcdbUCIS(path)
+        recs = db2.query_test_history("my_test")
+        assert len(recs) == 8
+        assert all(hasattr(r, "ts") for r in recs)
+
+
+class TestMergeTwoV2Sources:
+    """Merge two v2 .cdb files and verify the result is consistent."""
+
+    def _make_src_a(self, d):
+        path = os.path.join(d, "a.cdb")
+        _write_v2_cdb(path, [
+            ("uart_smoke", "1", HIST_STATUS_OK,   1700000000),
+            ("uart_smoke", "2", HIST_STATUS_FAIL, 1700086400),
+            ("uart_smoke", "3", HIST_STATUS_OK,   1700172800),
+            ("gpio_test",  "1", HIST_STATUS_OK,   1700000100),
+            ("gpio_test",  "2", HIST_STATUS_FAIL, 1700086500),
+        ])
+        return path
+
+    def _make_src_b(self, d):
+        path = os.path.join(d, "b.cdb")
+        _write_v2_cdb(path, [
+            ("uart_smoke", "4", HIST_STATUS_OK,   1700259200),
+            ("uart_smoke", "5", HIST_STATUS_OK,   1700345600),
+            ("spi_test",   "1", HIST_STATUS_OK,   1700259300),
+        ])
+        return path
+
+    def test_merged_registry_contains_all_names(self, tmpdir_path):
+        merged = os.path.join(tmpdir_path, "merged.cdb")
+        NcdbMerger().merge([self._make_src_a(tmpdir_path),
+                            self._make_src_b(tmpdir_path)], merged)
+        db = NcdbReader().read(merged)
+        assert db._test_registry.num_names == 3
+        assert "uart_smoke" in db._test_registry._name_to_id
+        assert "gpio_test"  in db._test_registry._name_to_id
+        assert "spi_test"   in db._test_registry._name_to_id
+
+    def test_merged_stats_are_combined(self, tmpdir_path):
+        merged = os.path.join(tmpdir_path, "merged.cdb")
+        NcdbMerger().merge([self._make_src_a(tmpdir_path),
+                            self._make_src_b(tmpdir_path)], merged)
+        db = NcdbReader().read(merged)
+        uart_id = db._test_registry._name_to_id["uart_smoke"]
+        uart_stats = db._test_stats.get(uart_id)
+        assert uart_stats.total_runs == 5
+        assert uart_stats.fail_count == 1
+
+    def test_merged_run_id_is_sum(self, tmpdir_path):
+        merged = os.path.join(tmpdir_path, "merged.cdb")
+        NcdbMerger().merge([self._make_src_a(tmpdir_path),
+                            self._make_src_b(tmpdir_path)], merged)
+        db = NcdbReader().read(merged)
+        # src_a has 5 runs, src_b has 3 → next_run_id = 8
+        assert db._test_registry.next_run_id == 8
+
+    def test_merged_history_queryable(self, tmpdir_path):
+        merged = os.path.join(tmpdir_path, "merged.cdb")
+        NcdbMerger().merge([self._make_src_a(tmpdir_path),
+                            self._make_src_b(tmpdir_path)], merged)
+        db2 = NcdbUCIS(merged)
+        recs = db2.query_test_history("uart_smoke")
+        assert len(recs) == 5
+
+    def test_merged_buckets_present(self, tmpdir_path):
+        merged = os.path.join(tmpdir_path, "merged.cdb")
+        NcdbMerger().merge([self._make_src_a(tmpdir_path),
+                            self._make_src_b(tmpdir_path)], merged)
+        db = NcdbReader().read(merged)
+        assert len(db._sealed_buckets) >= 2
+
+    def test_top_flaky_after_merge(self, tmpdir_path):
+        merged = os.path.join(tmpdir_path, "merged.cdb")
+        NcdbMerger().merge([self._make_src_a(tmpdir_path),
+                            self._make_src_b(tmpdir_path)], merged)
+        db2 = NcdbUCIS(merged)
+        flaky = db2.top_flaky_tests(5)
+        # At least one test (gpio_test) has failures making it flaky
+        assert len(flaky) > 0
diff --git a/tests/integration/test_testplan_workflow.py b/tests/integration/test_testplan_workflow.py
new file mode 100644
index 0000000..1f0b1ca
--- /dev/null
+++ b/tests/integration/test_testplan_workflow.py
@@ -0,0 +1,312 @@
+"""Integration tests for Phase 2: testplan embedding, closure, and waivers."""
+from __future__ import annotations
+
+import json
+import os
+import shutil
+import tempfile
+
+import pytest
+
+from ucis.mem.mem_ucis import MemUCIS
+from ucis.ncdb.constants import HIST_STATUS_FAIL, HIST_STATUS_OK
+from ucis.ncdb.ncdb_merger import NcdbMerger
+from ucis.ncdb.ncdb_reader import NcdbReader
+from ucis.ncdb.ncdb_ucis import NcdbUCIS
+from ucis.ncdb.ncdb_writer import NcdbWriter
+from ucis.ncdb.testplan import CovergroupEntry, Testplan, Testpoint, get_testplan
+from ucis.ncdb.testplan_closure import TPStatus, compute_closure, stage_gate_status
+from ucis.ncdb.testplan_hjson import import_hjson
+from ucis.ncdb.waivers import Waiver, WaiverSet
+
+
+# ── helpers ───────────────────────────────────────────────────────────────────
+
+@pytest.fixture()
+def tmpdir_path():
+    d = tempfile.mkdtemp()
+    yield d
+    shutil.rmtree(d, ignore_errors=True)
+
+
+def _make_plan():
+    plan = Testplan(source_file="uart.hjson")
+    plan.add_testpoint(Testpoint(name="uart_reset", stage="V1",
+                                  tests=["uart_smoke", "uart_init_*"]))
+    plan.add_testpoint(Testpoint(name="uart_loopback", stage="V2",
+                                  tests=["uart_loopback"]))
+    plan.add_testpoint(Testpoint(name="uart_na", stage="V2", na=True))
+    plan.covergroups.append(CovergroupEntry(name="cg_reset"))
+    return plan
+
+
+def _make_cdb(path, test_runs=None):
+    NcdbWriter().write(MemUCIS(), path)
+    db = NcdbUCIS(path)
+    for name, seed, status, ts in (test_runs or []):
+        db.add_test_run(name, seed=seed, status=status, ts=ts,
+                        has_coverage=(status == HIST_STATUS_OK))
+    tmp = path + ".tmp"
+    NcdbWriter().write(db, tmp)
+    os.replace(tmp, path)
+    return path
+
+
+# ── TestTestplanRoundTrip ─────────────────────────────────────────────────────
+
+class TestTestplanRoundTrip:
+    def test_set_and_get_testplan(self, tmpdir_path):
+        path = os.path.join(tmpdir_path, "a.cdb")
+        NcdbWriter().write(MemUCIS(), path)
+        db = NcdbUCIS(path)
+        plan = _make_plan()
+        db.setTestplan(plan)
+        tp = db.getTestplan()
+        assert tp is not None
+        assert tp.source_file == "uart.hjson"
+        assert len(tp.testpoints) == 3
+
+    def test_testplan_survives_write_read(self, tmpdir_path):
+        path = os.path.join(tmpdir_path, "a.cdb")
+        NcdbWriter().write(MemUCIS(), path)
+        db = NcdbUCIS(path)
+        db.setTestplan(_make_plan())
+        tmp = path + ".tmp"
+        NcdbWriter().write(db, tmp)
+        os.replace(tmp, path)
+
+        db2 = NcdbReader().read(path)
+        plan2 = get_testplan(db2)
+        assert plan2 is not None
+        assert plan2.source_file == "uart.hjson"
+        assert len(plan2.testpoints) == 3
+
+    def test_testplan_member_in_zip(self, tmpdir_path):
+        import zipfile
+        path = os.path.join(tmpdir_path, "a.cdb")
+        NcdbWriter().write(MemUCIS(), path)
+        db = NcdbUCIS(path)
+        db.setTestplan(_make_plan())
+        tmp = path + ".tmp"
+        NcdbWriter().write(db, tmp)
+        os.replace(tmp, path)
+        with zipfile.ZipFile(path, "r") as zf:
+            assert "testplan.json" in zf.namelist()
+
+    def test_no_testplan_no_member(self, tmpdir_path):
+        import zipfile
+        path = os.path.join(tmpdir_path, "a.cdb")
+        NcdbWriter().write(MemUCIS(), path)
+        with zipfile.ZipFile(path, "r") as zf:
+            assert "testplan.json" not in zf.namelist()
+
+    def test_stamp_import_time_set_on_setTestplan(self, tmpdir_path):
+        path = os.path.join(tmpdir_path, "a.cdb")
+        NcdbWriter().write(MemUCIS(), path)
+        db = NcdbUCIS(path)
+        plan = _make_plan()
+        assert plan.import_timestamp == ""
+        db.setTestplan(plan)
+        assert plan.import_timestamp != ""
+
+
+# ── TestHjsonImport ───────────────────────────────────────────────────────────
+
+class TestHjsonImport:
+    def _write_hjson(self, d, data):
+        path = os.path.join(d, "plan.json")
+        with open(path, "w") as f:
+            json.dump(data, f)
+        return path
+
+    def test_import_and_embed(self, tmpdir_path):
+        hjson_path = self._write_hjson(tmpdir_path, {
+            "testpoints": [
+                {"name": "uart_reset", "stage": "V1", "tests": ["uart_smoke"]},
+            ],
+        })
+        cdb = os.path.join(tmpdir_path, "a.cdb")
+        NcdbWriter().write(MemUCIS(), cdb)
+        db = NcdbUCIS(cdb)
+        plan = import_hjson(hjson_path)
+        db.setTestplan(plan)
+        tmp = cdb + ".tmp"
+        NcdbWriter().write(db, tmp)
+        os.replace(tmp, cdb)
+
+        db2 = NcdbReader().read(cdb)
+        plan2 = get_testplan(db2)
+        assert plan2 is not None
+        assert plan2.testpoints[0].name == "uart_reset"
+
+    def test_wildcard_expansion_preserved(self, tmpdir_path):
+        hjson_path = self._write_hjson(tmpdir_path, {
+            "testpoints": [
+                {"name": "tp", "stage": "V1", "tests": ["{baud}_test"]},
+            ],
+        })
+        plan = import_hjson(hjson_path, {"baud": ["9600", "115200"]})
+        assert "9600_test" in plan.testpoints[0].tests
+        assert "115200_test" in plan.testpoints[0].tests
+
+
+# ── TestComputeClosureIntegration ─────────────────────────────────────────────
+
+class TestComputeClosureIntegration:
+    def test_closure_against_v2_history(self, tmpdir_path):
+        path = _make_cdb(os.path.join(tmpdir_path, "a.cdb"), [
+            ("uart_smoke", "1", HIST_STATUS_OK,   1700000000),
+            ("uart_smoke", "2", HIST_STATUS_OK,   1700003600),
+            ("uart_loopback", "1", HIST_STATUS_FAIL, 1700007200),
+        ])
+        db = NcdbUCIS(path)
+        plan = _make_plan()
+        results = compute_closure(plan, db)
+        by_name = {r.testpoint.name: r for r in results}
+        assert by_name["uart_reset"].status == TPStatus.CLOSED
+        assert by_name["uart_loopback"].status == TPStatus.FAILING
+        assert by_name["uart_na"].status == TPStatus.NA
+
+    def test_stage_gate_passes_when_v1_closed(self, tmpdir_path):
+        path = _make_cdb(os.path.join(tmpdir_path, "a.cdb"), [
+            ("uart_smoke", "1", HIST_STATUS_OK, 1700000000),
+        ])
+        db = NcdbUCIS(path)
+        plan = _make_plan()
+        results = compute_closure(plan, db)
+        gate = stage_gate_status(results, "V1", plan)
+        assert gate["passed"] is True
+
+    def test_stage_gate_blocked_by_failure(self, tmpdir_path):
+        path = _make_cdb(os.path.join(tmpdir_path, "a.cdb"), [
+            ("uart_smoke", "1", HIST_STATUS_FAIL, 1700000000),
+        ])
+        db = NcdbUCIS(path)
+        plan = _make_plan()
+        results = compute_closure(plan, db)
+        gate = stage_gate_status(results, "V1", plan)
+        assert gate["passed"] is False
+
+    def test_not_run_testpoint(self, tmpdir_path):
+        path = _make_cdb(os.path.join(tmpdir_path, "a.cdb"), [])
+        db = NcdbUCIS(path)
+        plan = _make_plan()
+        results = compute_closure(plan, db)
+        for r in results:
+            if not r.testpoint.na:
+                assert r.status == TPStatus.NOT_RUN
+
+
+# ── TestWaiversRoundTrip ──────────────────────────────────────────────────────
+
+class TestWaiversRoundTrip:
+    def test_set_get_waivers(self, tmpdir_path):
+        path = os.path.join(tmpdir_path, "a.cdb")
+        NcdbWriter().write(MemUCIS(), path)
+        db = NcdbUCIS(path)
+        ws = WaiverSet([Waiver(id="W1", scope_pattern="top/uart")])
+        db.setWaivers(ws)
+        ws2 = db.getWaivers()
+        assert ws2 is not None
+        assert len(ws2.waivers) == 1
+
+    def test_waivers_survive_write_read(self, tmpdir_path):
+        path = os.path.join(tmpdir_path, "a.cdb")
+        NcdbWriter().write(MemUCIS(), path)
+        db = NcdbUCIS(path)
+        ws = WaiverSet([
+            Waiver(id="W1", scope_pattern="top/uart", rationale="Known issue"),
+            Waiver(id="W2", scope_pattern="top/spi"),
+        ])
+        db.setWaivers(ws)
+        tmp = path + ".tmp"
+        NcdbWriter().write(db, tmp)
+        os.replace(tmp, path)
+
+        db2 = NcdbReader().read(path)
+        ws2 = getattr(db2, "_waivers", None)
+        assert ws2 is not None
+        assert len(ws2.waivers) == 2
+        assert ws2.get("W1").rationale == "Known issue"
+
+    def test_no_waivers_no_member(self, tmpdir_path):
+        import zipfile
+        path = os.path.join(tmpdir_path, "a.cdb")
+        NcdbWriter().write(MemUCIS(), path)
+        with zipfile.ZipFile(path, "r") as zf:
+            assert "waivers.json" not in zf.namelist()
+
+
+# ── TestMergeTestplan ─────────────────────────────────────────────────────────
+
+class TestMergeTestplan:
+    def test_same_testplan_propagated_to_merged(self, tmpdir_path):
+        plan = _make_plan()
+        for name in ("a.cdb", "b.cdb"):
+            path = os.path.join(tmpdir_path, name)
+            NcdbWriter().write(MemUCIS(), path)
+            db = NcdbUCIS(path)
+            db.setTestplan(plan)
+            tmp = path + ".tmp"
+            NcdbWriter().write(db, tmp)
+            os.replace(tmp, path)
+
+        merged = os.path.join(tmpdir_path, "merged.cdb")
+        NcdbMerger().merge(
+            [os.path.join(tmpdir_path, "a.cdb"),
+             os.path.join(tmpdir_path, "b.cdb")],
+            merged,
+        )
+        db_m = NcdbReader().read(merged)
+        plan_m = get_testplan(db_m)
+        assert plan_m is not None
+        assert plan_m.source_file == "uart.hjson"
+
+    def test_different_testplans_warning(self, tmpdir_path):
+        for i, name in enumerate(("a.cdb", "b.cdb")):
+            path = os.path.join(tmpdir_path, name)
+            NcdbWriter().write(MemUCIS(), path)
+            db = NcdbUCIS(path)
+            plan = _make_plan()
+            plan.source_file = f"plan_{i}.hjson"
+            db.setTestplan(plan)
+            tmp = path + ".tmp"
+            NcdbWriter().write(db, tmp)
+            os.replace(tmp, path)
+
+        merged = os.path.join(tmpdir_path, "merged.cdb")
+        import warnings
+        with warnings.catch_warnings(record=True) as w:
+            warnings.simplefilter("always")
+            NcdbMerger().merge(
+                [os.path.join(tmpdir_path, "a.cdb"),
+                 os.path.join(tmpdir_path, "b.cdb")],
+                merged,
+            )
+        assert any("testplan" in str(warning.message).lower() for warning in w)
+        db_m = NcdbReader().read(merged)
+        assert get_testplan(db_m) is None
+
+    def test_waivers_merged_union(self, tmpdir_path):
+        for i, cdb_name in enumerate(("a.cdb", "b.cdb")):
+            path = os.path.join(tmpdir_path, cdb_name)
+            NcdbWriter().write(MemUCIS(), path)
+            db = NcdbUCIS(path)
+            db.setWaivers(WaiverSet([
+                Waiver(id=f"W{i}", scope_pattern=f"scope_{i}"),
+            ]))
+            tmp = path + ".tmp"
+            NcdbWriter().write(db, tmp)
+            os.replace(tmp, path)
+
+        merged = os.path.join(tmpdir_path, "merged.cdb")
+        NcdbMerger().merge(
+            [os.path.join(tmpdir_path, "a.cdb"),
+             os.path.join(tmpdir_path, "b.cdb")],
+            merged,
+        )
+        db_m = NcdbReader().read(merged)
+        ws = getattr(db_m, "_waivers", None)
+        assert ws is not None
+        ids = {w.id for w in ws.waivers}
+        assert ids == {"W0", "W1"}
diff --git a/tests/test_coverage_metrics.py b/tests/test_coverage_metrics.py
new file mode 100644
index 0000000..bff6bbf
--- /dev/null
+++ b/tests/test_coverage_metrics.py
@@ -0,0 +1,770 @@
+"""
+Unit tests for :class:`ucis.report.coverage_metrics.CoverageMetrics`.
+
+Each public method of the API is tested:
+  * BinStats / BinDetail dataclasses
+  * functional_bins()
+  * covergroup_stats()
+  * coverpoint_stats() / coverpoint_stats(include_bins=True)
+  * cross_stats()
+  * coverage_types_present()
+  * bins_by_type()
+  * code_coverage_by_type()
+  * file_coverage()
+  * tests()
+  * summary()
+  * database_info()
+  * invalidate()  (cache invalidation)
+  * Parity: functional_bins() must agree with CoverageReportBuilder
+
+Tests run against both the XML/API path and the SQLite path where applicable.
+"""
+import os
+import pytest
+import tempfile
+
+from ucis.mem.mem_factory import MemFactory
+from ucis.cover_type_t import CoverTypeT
+from ucis.source_info import SourceInfo
+from ucis import (
+    UCIS_HISTORYNODE_TEST, UCIS_TESTSTATUS_OK, UCIS_OTHER,
+    UCIS_DU_MODULE, UCIS_ENABLED_STMT, UCIS_ENABLED_BRANCH,
+    UCIS_INST_ONCE, UCIS_SCOPE_UNDER_DU, UCIS_INSTANCE, UCIS_VLOG,
+)
+from ucis.test_data import TestData
+from ucis.report.coverage_metrics import (
+    BinStats, BinDetail, CoverpointStats, CovergroupStats, TestInfo,
+    CoverageMetrics,
+)
+
+
+# ---------------------------------------------------------------------------
+# Internal DB builder helpers (same conventions as tui_fixtures)
+# ---------------------------------------------------------------------------
+
+def _add_test(db, logical_name="test1"):
+    node = db.createHistoryNode(None, logical_name, logical_name, UCIS_HISTORYNODE_TEST)
+    node.setTestData(TestData(
+        teststatus=UCIS_TESTSTATUS_OK,
+        toolcategory="test",
+        date="20240101000000",
+    ))
+    return node
+
+
+def _add_instance(db):
+    file_h = db.createFileHandle("tb.sv", "/rtl")
+    src = SourceInfo(file_h, 1, 0)
+    du = db.createScope("work.tb", src, 1, UCIS_OTHER, UCIS_DU_MODULE,
+                         UCIS_ENABLED_STMT | UCIS_INST_ONCE | UCIS_SCOPE_UNDER_DU)
+    inst = db.createInstance("tb", None, 1, UCIS_OTHER, UCIS_INSTANCE, du, UCIS_INST_ONCE)
+    return inst, file_h
+
+
+# ---------------------------------------------------------------------------
+# Backend fixture factory
+# ---------------------------------------------------------------------------
+
+def _db_for_backend(backend: str, builder_fn, tmp_path):
+    """
+    Return a live UCIS db object (MemUCIS-via-XML or SqliteUCIS) populated
+    by *builder_fn*.  Caller is responsible for closing the db.
+    """
+    if backend == "sqlite":
+        from ucis.sqlite.sqlite_ucis import SqliteUCIS
+        db_path = str(tmp_path / "test.db")
+        db = SqliteUCIS(db_path)
+        builder_fn(db)
+        db.close()
+        db = SqliteUCIS(db_path)
+        return db
+    else:
+        from ucis.xml.xml_factory import XmlFactory
+        from ucis.rgy.format_rgy import FormatRgy
+        db = MemFactory.create()
+        builder_fn(db)
+        xml_path = str(tmp_path / "test.xml")
+        XmlFactory.write(db, xml_path)
+        db2 = FormatRgy.inst().getDatabaseDesc("xml").fmt_if().read(xml_path)
+        return db2
+
+
+def _metrics(backend: str, builder_fn, tmp_path) -> CoverageMetrics:
+    db = _db_for_backend(backend, builder_fn, tmp_path)
+    return CoverageMetrics(db)
+
+
+# ---------------------------------------------------------------------------
+# DB builder functions
+# ---------------------------------------------------------------------------
+
+def _build_partial(db):
+    """2 covergroups, 2 coverpoints, 6 bins, 3 covered (50%)."""
+    _add_test(db)
+    inst, file_h = _add_instance(db)
+    src = SourceInfo(file_h, 3, 0)
+    cg1 = inst.createCovergroup("cg1", src, 1, UCIS_OTHER)
+    cp1 = cg1.createCoverpoint("cp1", src, 1, UCIS_VLOG)
+    cp1.createBin("a", src, 1, 5, "a")   # hit
+    cp1.createBin("b", src, 1, 3, "b")   # hit
+    cp1.createBin("c", src, 1, 0, "c")   # miss
+    cp1.createBin("d", src, 1, 0, "d")   # miss
+
+    cg2 = inst.createCovergroup("cg2", src, 1, UCIS_OTHER)
+    cp2 = cg2.createCoverpoint("cp2", src, 1, UCIS_VLOG)
+    cp2.createBin("x", src, 1, 10, "x")  # hit
+    cp2.createBin("y", src, 1, 0,  "y")  # miss
+
+
+def _build_zero(db):
+    """1 covergroup, 1 coverpoint, 3 bins, all uncovered."""
+    _add_test(db)
+    inst, file_h = _add_instance(db)
+    src = SourceInfo(file_h, 3, 0)
+    cg = inst.createCovergroup("cg_zero", src, 1, UCIS_OTHER)
+    cp = cg.createCoverpoint("cp_zero", src, 1, UCIS_VLOG)
+    cp.createBin("b0", src, 1, 0, "b0")
+    cp.createBin("b1", src, 1, 0, "b1")
+    cp.createBin("b2", src, 1, 0, "b2")
+
+
+def _build_full(db):
+    """1 covergroup, 1 coverpoint, 3 bins, all hit."""
+    _add_test(db)
+    inst, file_h = _add_instance(db)
+    src = SourceInfo(file_h, 3, 0)
+    cg = inst.createCovergroup("cg_full", src, 1, UCIS_OTHER)
+    cp = cg.createCoverpoint("cp_full", src, 1, UCIS_VLOG)
+    cp.createBin("b0", src, 1, 1, "b0")
+    cp.createBin("b1", src, 1, 2, "b1")
+    cp.createBin("b2", src, 1, 7, "b2")
+
+
+def _build_multi_test(db):
+    """3 tests; 6 bins; 4 covered (≈66.7%)."""
+    for name in ("test_a", "test_b", "test_c"):
+        _add_test(db, name)
+    inst, file_h = _add_instance(db)
+    src = SourceInfo(file_h, 3, 0)
+    cg = inst.createCovergroup("cg_mt", src, 1, UCIS_OTHER)
+    cp = cg.createCoverpoint("cp_mt", src, 1, UCIS_VLOG)
+    cp.createBin("b0", src, 1, 1, "b0")
+    cp.createBin("b1", src, 1, 1, "b1")
+    cp.createBin("b2", src, 1, 1, "b2")
+    cp.createBin("b3", src, 1, 1, "b3")
+    cp.createBin("b4", src, 1, 0, "b4")
+    cp.createBin("b5", src, 1, 0, "b5")
+
+
+# ---------------------------------------------------------------------------
+# Fixtures
+# ---------------------------------------------------------------------------
+
+@pytest.fixture(params=["xml", "sqlite"])
+def partial_m(request, tmp_path):
+    return _metrics(request.param, _build_partial, tmp_path)
+
+
+@pytest.fixture(params=["xml", "sqlite"])
+def zero_m(request, tmp_path):
+    return _metrics(request.param, _build_zero, tmp_path)
+
+
+@pytest.fixture(params=["xml", "sqlite"])
+def full_m(request, tmp_path):
+    return _metrics(request.param, _build_full, tmp_path)
+
+
+@pytest.fixture(params=["xml", "sqlite"])
+def multi_m(request, tmp_path):
+    return _metrics(request.param, _build_multi_test, tmp_path)
+
+
+@pytest.fixture
+def vlt_metrics():
+    """CoverageMetrics from the real vlt.cdb SQLite fixture (code-coverage only)."""
+    vlt_path = os.path.join(os.path.dirname(__file__), "..", "test_vlt.cdb")
+    if not os.path.exists(vlt_path):
+        pytest.skip("test_vlt.cdb not found")
+    from ucis.sqlite.sqlite_ucis import SqliteUCIS
+    db = SqliteUCIS(vlt_path)
+    m = CoverageMetrics(db)
+    yield m
+    try:
+        db.close()
+    except Exception:
+        pass
+
+
+# ===========================================================================
+# 1. Dataclass unit tests
+# ===========================================================================
+
+class TestBinStats:
+
+    def test_uncovered_property(self):
+        bs = BinStats(total=10, covered=3)
+        assert bs.uncovered == 7
+
+    def test_coverage_pct_normal(self):
+        bs = BinStats(total=10, covered=5)
+        assert abs(bs.coverage_pct - 50.0) < 0.01
+
+    def test_coverage_pct_zero_total(self):
+        bs = BinStats(total=0, covered=0)
+        assert bs.coverage_pct == 0.0
+
+    def test_coverage_pct_full(self):
+        bs = BinStats(total=4, covered=4)
+        assert abs(bs.coverage_pct - 100.0) < 0.01
+
+    def test_add(self):
+        a = BinStats(total=4, covered=2)
+        b = BinStats(total=6, covered=4)
+        c = a + b
+        assert c.total == 10
+        assert c.covered == 6
+        assert abs(c.coverage_pct - 60.0) < 0.01
+
+
+class TestBinDetail:
+
+    def test_covered_when_count_gte_at_least(self):
+        bd = BinDetail(name="b", count=5, at_least=1)
+        assert bd.covered is True
+
+    def test_not_covered_when_count_lt_at_least(self):
+        bd = BinDetail(name="b", count=2, at_least=5)
+        assert bd.covered is False
+
+    def test_covered_exactly_at_least(self):
+        bd = BinDetail(name="b", count=3, at_least=3)
+        assert bd.covered is True
+
+    def test_not_covered_zero_count(self):
+        bd = BinDetail(name="b", count=0, at_least=1)
+        assert bd.covered is False
+
+    def test_is_ignore_flag(self):
+        bd = BinDetail(name="ign", count=0, at_least=1, is_ignore=True)
+        assert bd.is_ignore is True
+        assert bd.is_illegal is False
+
+    def test_is_illegal_flag(self):
+        bd = BinDetail(name="ill", count=0, at_least=1, is_illegal=True)
+        assert bd.is_illegal is True
+        assert bd.is_ignore is False
+
+
+# ===========================================================================
+# 2. functional_bins()
+# ===========================================================================
+
+class TestFunctionalBins:
+
+    def test_partial_total(self, partial_m):
+        assert partial_m.functional_bins().total == 6
+
+    def test_partial_covered(self, partial_m):
+        assert partial_m.functional_bins().covered == 3
+
+    def test_partial_pct(self, partial_m):
+        assert abs(partial_m.functional_bins().coverage_pct - 50.0) < 0.01
+
+    def test_zero_coverage(self, zero_m):
+        fb = zero_m.functional_bins()
+        assert fb.total == 3
+        assert fb.covered == 0
+        assert fb.coverage_pct == 0.0
+
+    def test_full_coverage(self, full_m):
+        fb = full_m.functional_bins()
+        assert fb.total == 3
+        assert fb.covered == 3
+        assert abs(fb.coverage_pct - 100.0) < 0.01
+
+    def test_no_double_counting_xml(self, tmp_path):
+        """XML backend must not double-count type-level and instance-level CG bins."""
+        m = _metrics("xml", _build_partial, tmp_path)
+        assert m.functional_bins().total == 6, "double-counting detected"
+
+    def test_parity_with_report_builder(self, partial_m):
+        """functional_bins() must agree with CoverageReportBuilder's bin totals."""
+        from ucis.report.coverage_report_builder import CoverageReportBuilder
+        report = CoverageReportBuilder.build(partial_m._db)
+        report_total = sum(len(cp.bins) for cg in report.covergroups for cp in cg.coverpoints)
+        report_covered = sum(
+            1 for cg in report.covergroups
+            for cp in cg.coverpoints
+            for b in cp.bins if b.hit
+        )
+        fb = partial_m.functional_bins()
+        assert fb.total == report_total
+        assert fb.covered == report_covered
+
+
+# ===========================================================================
+# 3. covergroup_stats()
+# ===========================================================================
+
+class TestCovergroupStats:
+
+    def test_count_partial(self, partial_m):
+        cg_stats = partial_m.covergroup_stats()
+        assert len(cg_stats) == 2
+
+    def test_names_present(self, partial_m):
+        names = {cg.name for cg in partial_m.covergroup_stats()}
+        assert "cg1" in names
+        assert "cg2" in names
+
+    def test_coverage_pct_approx(self, partial_m):
+        for cg in partial_m.covergroup_stats():
+            assert 0.0 <= cg.coverage_pct <= 100.0
+
+    def test_bins_non_zero(self, partial_m):
+        for cg in partial_m.covergroup_stats():
+            assert cg.bins.total > 0
+
+    def test_zero_db_zero_covered(self, zero_m):
+        for cg in zero_m.covergroup_stats():
+            assert cg.bins.covered == 0
+
+    def test_full_db_all_covered(self, full_m):
+        for cg in full_m.covergroup_stats():
+            assert cg.bins.covered == cg.bins.total
+
+
+# ===========================================================================
+# 4. coverpoint_stats()
+# ===========================================================================
+
+class TestCoverpointStats:
+
+    def test_count_partial(self, partial_m):
+        assert len(partial_m.coverpoint_stats()) == 2
+
+    def test_names_correct(self, partial_m):
+        names = {cp.name for cp in partial_m.coverpoint_stats()}
+        assert names == {"cp1", "cp2"}
+
+    def test_bins_partial(self, partial_m):
+        by_name = {cp.name: cp for cp in partial_m.coverpoint_stats()}
+        assert by_name["cp1"].bins.total == 4
+        assert by_name["cp1"].bins.covered == 2
+        assert by_name["cp2"].bins.total == 2
+        assert by_name["cp2"].bins.covered == 1
+
+    def test_path_contains_name(self, partial_m):
+        for cp in partial_m.coverpoint_stats():
+            assert cp.name in cp.path
+
+    def test_include_bins_false_no_details(self, partial_m):
+        for cp in partial_m.coverpoint_stats(include_bins=False):
+            assert cp.bin_details == []
+
+    def test_include_bins_true_has_details(self, partial_m):
+        for cp in partial_m.coverpoint_stats(include_bins=True):
+            assert len(cp.bin_details) == cp.bins.total, (
+                f"bin_details length should match total bins for {cp.name}"
+            )
+
+    def test_bin_detail_semantics(self, partial_m):
+        """BinDetail.covered matches count >= at_least."""
+        by_name = {cp.name: cp for cp in partial_m.coverpoint_stats(include_bins=True)}
+        details = by_name["cp1"].bin_details
+        covered_details = [d for d in details if d.covered]
+        assert len(covered_details) == 2   # bins a, b
+
+    def test_coverage_pct_matches_bins(self, partial_m):
+        for cp in partial_m.coverpoint_stats():
+            expected = cp.bins.coverage_pct
+            assert abs(cp.coverage_pct - expected) < 0.001
+
+    def test_zero_db(self, zero_m):
+        cps = zero_m.coverpoint_stats()
+        assert len(cps) == 1
+        assert cps[0].bins.covered == 0
+
+    def test_full_db(self, full_m):
+        cps = full_m.coverpoint_stats()
+        assert cps[0].bins.covered == cps[0].bins.total
+
+
+# ===========================================================================
+# 5. coverage_types_present()
+# ===========================================================================
+
+class TestCoverageTypesPresent:
+
+    def test_functional_db_has_cvgbin(self, partial_m):
+        types = partial_m.coverage_types_present()
+        assert CoverTypeT.CVGBIN in types
+
+    def test_functional_db_no_code_types(self, partial_m):
+        types = partial_m.coverage_types_present()
+        assert CoverTypeT.STMTBIN not in types
+        assert CoverTypeT.BRANCHBIN not in types
+
+    def test_vlt_has_code_types(self, vlt_metrics):
+        types = vlt_metrics.coverage_types_present()
+        # vlt.cdb has statement, branch, toggle coverage
+        code_types = {CoverTypeT.STMTBIN, CoverTypeT.BRANCHBIN, CoverTypeT.TOGGLEBIN}
+        assert code_types & set(types), "vlt.cdb should have code coverage types"
+
+    def test_vlt_no_cvgbin(self, vlt_metrics):
+        types = vlt_metrics.coverage_types_present()
+        assert CoverTypeT.CVGBIN not in types
+
+    def test_returns_list(self, partial_m):
+        assert isinstance(partial_m.coverage_types_present(), list)
+
+
+# ===========================================================================
+# 6. bins_by_type()
+# ===========================================================================
+
+class TestBinsByType:
+
+    def test_cvgbin_delegates_to_functional_bins(self, partial_m):
+        """bins_by_type(CVGBIN) must return the same result as functional_bins()."""
+        fb = partial_m.functional_bins()
+        bt = partial_m.bins_by_type(CoverTypeT.CVGBIN)
+        assert bt.total == fb.total
+        assert bt.covered == fb.covered
+
+    def test_non_cvgbin_type_with_no_items_returns_zero(self, partial_m):
+        bt = partial_m.bins_by_type(CoverTypeT.STMTBIN)
+        assert bt.total == 0
+        assert bt.covered == 0
+
+    def test_vlt_stmtbin_non_zero(self, vlt_metrics):
+        bt = vlt_metrics.bins_by_type(CoverTypeT.STMTBIN)
+        assert bt.total > 0
+
+    def test_vlt_branchbin_non_zero(self, vlt_metrics):
+        bt = vlt_metrics.bins_by_type(CoverTypeT.BRANCHBIN)
+        assert bt.total > 0
+
+    def test_returns_bin_stats(self, partial_m):
+        result = partial_m.bins_by_type(CoverTypeT.CVGBIN)
+        assert isinstance(result, BinStats)
+
+    def test_covered_lte_total(self, partial_m):
+        bt = partial_m.bins_by_type(CoverTypeT.CVGBIN)
+        assert bt.covered <= bt.total
+
+
+# ===========================================================================
+# 7. code_coverage_by_type()
+# ===========================================================================
+
+class TestCodeCoverageByType:
+
+    def test_returns_dict(self, vlt_metrics):
+        result = vlt_metrics.code_coverage_by_type()
+        assert isinstance(result, dict)
+
+    def test_stmtbin_in_result(self, vlt_metrics):
+        result = vlt_metrics.code_coverage_by_type()
+        assert CoverTypeT.STMTBIN in result
+
+    def test_bin_stats_type(self, vlt_metrics):
+        result = vlt_metrics.code_coverage_by_type()
+        for ct, bs in result.items():
+            assert isinstance(bs, BinStats), f"{ct} should map to BinStats"
+
+    def test_agrees_with_bins_by_type(self, vlt_metrics):
+        """code_coverage_by_type() must agree with bins_by_type() per type."""
+        by_type = vlt_metrics.code_coverage_by_type()
+        for ct, bs in by_type.items():
+            individual = vlt_metrics.bins_by_type(ct)
+            assert bs.total == individual.total, f"total mismatch for {ct}"
+            assert bs.covered == individual.covered, f"covered mismatch for {ct}"
+
+    def test_functional_db_code_types_zero(self, partial_m):
+        result = partial_m.code_coverage_by_type()
+        for ct in (CoverTypeT.STMTBIN, CoverTypeT.BRANCHBIN, CoverTypeT.TOGGLEBIN):
+            assert result.get(ct, BinStats()).total == 0
+
+
+# ===========================================================================
+# 8. file_coverage()
+# ===========================================================================
+
+class TestFileCoverage:
+
+    def test_empty_for_xml_backend(self, tmp_path):
+        """file_coverage() requires SQLite; returns [] for XML backends."""
+        m = _metrics("xml", _build_partial, tmp_path)
+        assert m.file_coverage() == []
+
+    def test_returns_list_for_sqlite(self, vlt_metrics):
+        result = vlt_metrics.file_coverage()
+        assert isinstance(result, list)
+
+    def test_non_empty_for_vlt(self, vlt_metrics):
+        result = vlt_metrics.file_coverage()
+        assert len(result) > 0, "vlt.cdb should have file-level coverage data"
+
+    def test_file_paths_non_empty(self, vlt_metrics):
+        for fcs in vlt_metrics.file_coverage():
+            assert fcs.file_path, "file_path should not be empty"
+
+    def test_overall_bins_non_zero(self, vlt_metrics):
+        for fcs in vlt_metrics.file_coverage():
+            assert fcs.overall.total >= 0
+
+    def test_sorted_by_path(self, vlt_metrics):
+        paths = [fcs.file_path for fcs in vlt_metrics.file_coverage()]
+        assert paths == sorted(paths), "file_coverage() should be sorted by path"
+
+    def test_covered_lte_total_per_file(self, vlt_metrics):
+        for fcs in vlt_metrics.file_coverage():
+            ov = fcs.overall
+            assert ov.covered <= ov.total
+
+
+# ===========================================================================
+# 9. tests()
+# ===========================================================================
+
+class TestTests:
+
+    def test_returns_list(self, partial_m):
+        assert isinstance(partial_m.tests(), list)
+
+    def test_single_test_db(self, partial_m):
+        tests = partial_m.tests()
+        assert len(tests) >= 1
+
+    def test_test_has_name(self, partial_m):
+        for t in partial_m.tests():
+            assert isinstance(t, TestInfo)
+            assert t.name
+
+    def test_test_has_status(self, partial_m):
+        for t in partial_m.tests():
+            assert t.status in ("PASSED", "FAILED", "UNKNOWN")
+
+    def test_test_has_date(self, partial_m):
+        for t in partial_m.tests():
+            assert t.date  # non-empty
+
+    def test_multi_test_names(self, multi_m):
+        names = {t.name for t in multi_m.tests()}
+        assert "test_a" in names
+        assert "test_b" in names
+        assert "test_c" in names
+
+    def test_multi_test_count(self, multi_m):
+        assert len(multi_m.tests()) == 3
+
+    def test_all_passed(self, partial_m):
+        """Fixture only adds passing tests."""
+        for t in partial_m.tests():
+            assert t.status == "PASSED"
+
+
+# ===========================================================================
+# 10. summary()
+# ===========================================================================
+
+class TestSummary:
+
+    def test_returns_dict(self, partial_m):
+        assert isinstance(partial_m.summary(), dict)
+
+    def test_required_keys(self, partial_m):
+        s = partial_m.summary()
+        for k in ("overall_coverage", "total_bins", "covered_bins", "covergroups", "coverpoints"):
+            assert k in s, f"key '{k}' missing from summary"
+
+    def test_partial_values(self, partial_m):
+        s = partial_m.summary()
+        assert s["total_bins"] == 6
+        assert s["covered_bins"] == 3
+        assert abs(s["overall_coverage"] - 50.0) < 0.01
+        assert s["covergroups"] == 2
+        assert s["coverpoints"] == 2
+
+    def test_zero_coverage(self, zero_m):
+        s = zero_m.summary()
+        assert s["covered_bins"] == 0
+        assert s["overall_coverage"] == 0.0
+
+    def test_full_coverage(self, full_m):
+        s = full_m.summary()
+        assert s["covered_bins"] == s["total_bins"]
+        assert abs(s["overall_coverage"] - 100.0) < 0.01
+
+    def test_vlt_total_bins_nonzero(self, vlt_metrics):
+        """Code-coverage-only DB should report total_bins > 0."""
+        s = vlt_metrics.summary()
+        assert s["total_bins"] > 0
+
+    def test_vlt_no_functional_covergroups(self, vlt_metrics):
+        """vlt.cdb has no functional coverage so covergroups = 0."""
+        s = vlt_metrics.summary()
+        assert s["covergroups"] == 0
+
+    def test_consistent_with_functional_bins(self, partial_m):
+        """summary() total_bins and covered_bins must agree with functional_bins()."""
+        s = partial_m.summary()
+        fb = partial_m.functional_bins()
+        assert s["total_bins"] == fb.total
+        assert s["covered_bins"] == fb.covered
+
+
+# ===========================================================================
+# 11. database_info()
+# ===========================================================================
+
+class TestDatabaseInfo:
+
+    def test_returns_dict(self, partial_m):
+        assert isinstance(partial_m.database_info(), dict)
+
+    def test_required_keys(self, partial_m):
+        info = partial_m.database_info()
+        for k in ("path", "format", "test_count"):
+            assert k in info
+
+    def test_test_count_matches_tests(self, partial_m):
+        info = partial_m.database_info()
+        tests = partial_m.tests()
+        assert info["test_count"] == len(tests)
+
+    def test_multi_test_count(self, multi_m):
+        assert multi_m.database_info()["test_count"] == 3
+
+
+# ===========================================================================
+# 12. invalidate() — cache invalidation
+# ===========================================================================
+
+class TestInvalidate:
+
+    def test_invalidate_clears_cache(self, partial_m):
+        """Calling invalidate() should force recomputation."""
+        fb1 = partial_m.functional_bins()
+        partial_m.invalidate()
+        fb2 = partial_m.functional_bins()
+        assert fb1.total == fb2.total
+        assert fb1.covered == fb2.covered
+
+    def test_cached_result_is_same_object(self, partial_m):
+        """Without invalidate(), successive calls return the same cached object."""
+        fb1 = partial_m.functional_bins()
+        fb2 = partial_m.functional_bins()
+        assert fb1 is fb2
+
+    def test_after_invalidate_new_object(self, partial_m):
+        fb1 = partial_m.functional_bins()
+        partial_m.invalidate()
+        fb2 = partial_m.functional_bins()
+        assert fb1 is not fb2
+
+    def test_summary_cached(self, partial_m):
+        s1 = partial_m.summary()
+        s2 = partial_m.summary()
+        # summary() is cached — returns the exact same dict object
+        assert s1 is s2
+
+    def test_summary_refreshed_after_invalidate(self, partial_m):
+        s1 = partial_m.summary()
+        partial_m.invalidate()
+        s2 = partial_m.summary()
+        # After invalidation a fresh dict is built — different object, same values
+        assert s1 is not s2
+        assert s1 == s2
+
+
+# ===========================================================================
+# 13. Parity: functional_bins agrees with CoverageReportBuilder
+# ===========================================================================
+
+class TestParityWithReportBuilder:
+    """
+    CoverageMetrics.functional_bins() MUST produce the same numbers as the
+    CoverageReportBuilder, which is the canonical oracle for functional coverage.
+    """
+
+    @pytest.mark.parametrize("backend", ["xml", "sqlite"])
+    def test_partial_parity(self, tmp_path, backend):
+        m = _metrics(backend, _build_partial, tmp_path)
+        self._assert_parity(m)
+
+    @pytest.mark.parametrize("backend", ["xml", "sqlite"])
+    def test_zero_parity(self, tmp_path, backend):
+        m = _metrics(backend, _build_zero, tmp_path)
+        self._assert_parity(m)
+
+    @pytest.mark.parametrize("backend", ["xml", "sqlite"])
+    def test_full_parity(self, tmp_path, backend):
+        m = _metrics(backend, _build_full, tmp_path)
+        self._assert_parity(m)
+
+    @pytest.mark.parametrize("backend", ["xml", "sqlite"])
+    def test_multi_test_parity(self, tmp_path, backend):
+        m = _metrics(backend, _build_multi_test, tmp_path)
+        self._assert_parity(m)
+
+    def _assert_parity(self, m: CoverageMetrics):
+        from ucis.report.coverage_report_builder import CoverageReportBuilder
+        report = CoverageReportBuilder.build(m._db)
+
+        def _report_bins(report):
+            total = 0
+            covered = 0
+            for cg in report.covergroups:
+                total += sum(len(cp.bins) for cp in cg.coverpoints)
+                covered += sum(1 for cp in cg.coverpoints for b in cp.bins if b.hit)
+            return total, covered
+
+        r_total, r_covered = _report_bins(report)
+        fb = m.functional_bins()
+        assert fb.total == r_total, (
+            f"total mismatch: metrics={fb.total}, report={r_total}"
+        )
+        assert fb.covered == r_covered, (
+            f"covered mismatch: metrics={fb.covered}, report={r_covered}"
+        )
+
+    def test_coverpoint_stats_parity(self, tmp_path):
+        """coverpoint_stats() should agree with direct CoverageReportBuilder traversal."""
+        m = _metrics("xml", _build_partial, tmp_path)
+        from ucis.report.coverage_report_builder import CoverageReportBuilder
+        report = CoverageReportBuilder.build(m._db)
+        report_cps = {cp.name: cp for cg in report.covergroups for cp in cg.coverpoints}
+        metrics_cps = {cp.name: cp for cp in m.coverpoint_stats()}
+        assert set(report_cps.keys()) == set(metrics_cps.keys()), "coverpoint names mismatch"
+        for name, rcp in report_cps.items():
+            mcp = metrics_cps[name]
+            assert mcp.bins.total == len(rcp.bins), f"total mismatch for {name}"
+            assert mcp.bins.covered == sum(1 for b in rcp.bins if b.hit), \
+                f"covered mismatch for {name}"
+
+
+# ===========================================================================
+# 14. VLT regression — real SQLite file
+# ===========================================================================
+
+class TestVltRegression:
+    """Smoke tests against the real vlt.cdb fixture."""
+
+    def test_summary_total_nonzero(self, vlt_metrics):
+        assert vlt_metrics.summary()["total_bins"] > 0
+
+    def test_coverage_types_include_branch(self, vlt_metrics):
+        assert CoverTypeT.BRANCHBIN in vlt_metrics.coverage_types_present()
+
+    def test_stmtbin_covered_lte_total(self, vlt_metrics):
+        bt = vlt_metrics.bins_by_type(CoverTypeT.STMTBIN)
+        assert bt.covered <= bt.total
+
+    def test_file_coverage_non_empty(self, vlt_metrics):
+        assert len(vlt_metrics.file_coverage()) > 0
+
+    def test_database_info_test_count_non_negative(self, vlt_metrics):
+        assert vlt_metrics.database_info()["test_count"] >= 0
diff --git a/tests/test_tui_model_fidelity.py b/tests/test_tui_model_fidelity.py
new file mode 100644
index 0000000..ce685b8
--- /dev/null
+++ b/tests/test_tui_model_fidelity.py
@@ -0,0 +1,240 @@
+"""
+Layer 1: CoverageModel unit tests.
+
+These tests verify that every public method of CoverageModel returns values
+that agree with the raw UCIS API (the ground truth).  Both the API/XML path
+and the SQLite fast path are exercised through the parametrized fixtures in
+tui_fixtures.py.
+"""
+import pytest
+from ucis.cover_type_t import CoverTypeT
+from ucis.scope_type_t import ScopeTypeT
+
+from tests.tui_fixtures import (
+    partial_coverage, zero_coverage, full_coverage, multi_test, vlt_model,
+)
+
+
+# ---------------------------------------------------------------------------
+# get_summary()
+# ---------------------------------------------------------------------------
+
+class TestGetSummary:
+
+    def test_total_bins(self, partial_coverage):
+        model, expected = partial_coverage
+        summary = model.get_summary()
+        assert summary["total_bins"] == expected["total_bins"], (
+            f"total_bins: got {summary['total_bins']}, want {expected['total_bins']}"
+        )
+
+    def test_covered_bins(self, partial_coverage):
+        model, expected = partial_coverage
+        summary = model.get_summary()
+        assert summary["covered_bins"] == expected["covered_bins"]
+
+    def test_overall_coverage_percentage(self, partial_coverage):
+        model, expected = partial_coverage
+        summary = model.get_summary()
+        assert abs(summary["overall_coverage"] - expected["overall_coverage"]) < 0.01
+
+    def test_covergroup_count(self, partial_coverage):
+        model, expected = partial_coverage
+        summary = model.get_summary()
+        assert summary["covergroups"] == expected["covergroups"]
+
+    def test_summary_zero_coverage(self, zero_coverage):
+        model, expected = zero_coverage
+        summary = model.get_summary()
+        assert summary["covered_bins"] == 0
+        assert summary["overall_coverage"] == 0.0
+        assert summary["total_bins"] == expected["total_bins"]
+
+    def test_summary_full_coverage(self, full_coverage):
+        model, expected = full_coverage
+        summary = model.get_summary()
+        assert summary["covered_bins"] == expected["covered_bins"]
+        assert abs(summary["overall_coverage"] - 100.0) < 0.01
+
+    def test_summary_is_cached(self, partial_coverage):
+        model, _ = partial_coverage
+        s1 = model.get_summary()
+        s2 = model.get_summary()
+        assert s1 is s2, "get_summary() should return the cached object on repeated calls"
+
+
+# ---------------------------------------------------------------------------
+# get_coverage_types()
+# ---------------------------------------------------------------------------
+
+class TestGetCoverageTypes:
+
+    def test_cvgbin_present_in_partial(self, partial_coverage):
+        model, _ = partial_coverage
+        types = model.get_coverage_types()
+        assert CoverTypeT.CVGBIN in types
+
+    def test_types_non_empty(self, partial_coverage):
+        model, _ = partial_coverage
+        assert len(model.get_coverage_types()) >= 1
+
+    def test_types_cached(self, partial_coverage):
+        model, _ = partial_coverage
+        t1 = model.get_coverage_types()
+        t2 = model.get_coverage_types()
+        assert t1 is t2
+
+
+# ---------------------------------------------------------------------------
+# get_coverage_by_type()
+# ---------------------------------------------------------------------------
+
+class TestGetCoverageByType:
+
+    def test_cvgbin_totals_match_summary(self, partial_coverage):
+        model, expected = partial_coverage
+        result = model.get_coverage_by_type(CoverTypeT.CVGBIN)
+        assert result["total"]   == expected["total_bins"]
+        assert result["covered"] == expected["covered_bins"]
+
+    def test_coverage_percentage_derived_correctly(self, partial_coverage):
+        model, _ = partial_coverage
+        result = model.get_coverage_by_type(CoverTypeT.CVGBIN)
+        if result["total"] > 0:
+            expected_pct = result["covered"] / result["total"] * 100
+            assert abs(result["coverage"] - expected_pct) < 0.01
+
+    def test_zero_coverage(self, zero_coverage):
+        model, _ = zero_coverage
+        result = model.get_coverage_by_type(CoverTypeT.CVGBIN)
+        assert result["covered"] == 0
+        assert result["coverage"] == 0.0
+
+    def test_full_coverage(self, full_coverage):
+        model, _ = full_coverage
+        result = model.get_coverage_by_type(CoverTypeT.CVGBIN)
+        assert result["covered"] == result["total"]
+        assert abs(result["coverage"] - 100.0) < 0.01
+
+    def test_result_cached(self, partial_coverage):
+        model, _ = partial_coverage
+        r1 = model.get_coverage_by_type(CoverTypeT.CVGBIN)
+        r2 = model.get_coverage_by_type(CoverTypeT.CVGBIN)
+        assert r1 is r2
+
+
+# ---------------------------------------------------------------------------
+# get_all_tests()
+# ---------------------------------------------------------------------------
+
+class TestGetAllTests:
+
+    def test_test_count(self, partial_coverage):
+        """Partial-coverage fixture has exactly one test."""
+        model, _ = partial_coverage
+        tests = model.get_all_tests()
+        assert len(tests) == 1
+
+    def test_test_name(self, partial_coverage):
+        model, _ = partial_coverage
+        tests = model.get_all_tests()
+        assert tests[0]["name"] == "test1"
+
+    def test_multi_test_count(self, multi_test):
+        model, expected = multi_test
+        tests = model.get_all_tests()
+        assert len(tests) == len(expected["test_names"])
+
+    def test_multi_test_names_present(self, multi_test):
+        model, expected = multi_test
+        tests = model.get_all_tests()
+        found_names = {t["name"] for t in tests}
+        for name in expected["test_names"]:
+            assert name in found_names, f"Expected test '{name}' not in {found_names}"
+
+    def test_tests_cached(self, partial_coverage):
+        model, _ = partial_coverage
+        t1 = model.get_all_tests()
+        t2 = model.get_all_tests()
+        assert t1 is t2
+
+
+# ---------------------------------------------------------------------------
+# get_database_info()
+# ---------------------------------------------------------------------------
+
+class TestGetDatabaseInfo:
+
+    def test_path_preserved(self, partial_coverage):
+        model, _ = partial_coverage
+        info = model.get_database_info()
+        assert info["path"] == model.db_path
+
+    def test_test_count_matches(self, partial_coverage):
+        model, _ = partial_coverage
+        info = model.get_database_info()
+        assert info["test_count"] == 1
+
+    def test_multi_test_count(self, multi_test):
+        model, expected = multi_test
+        info = model.get_database_info()
+        assert info["test_count"] == len(expected["test_names"])
+
+
+# ---------------------------------------------------------------------------
+# Test filter / cache invalidation
+# ---------------------------------------------------------------------------
+
+class TestTestFilter:
+
+    def test_set_and_get_filter(self, partial_coverage):
+        model, _ = partial_coverage
+        assert model.get_test_filter() is None
+        model.set_test_filter("test1")
+        assert model.get_test_filter() == "test1"
+
+    def test_clear_filter(self, partial_coverage):
+        model, _ = partial_coverage
+        model.set_test_filter("test1")
+        model.clear_test_filter()
+        assert model.get_test_filter() is None
+
+    def test_filter_invalidates_code_coverage_cache(self, partial_coverage):
+        """Setting a filter must bust the code_coverage_summary cache."""
+        model, _ = partial_coverage
+        _ = model.get_summary()              # populate cache
+        model.set_test_filter("test1")
+        # code_coverage_summary cache key must be gone after filter change
+        assert "code_coverage_summary" not in model._cache
+
+    def test_unfiltered_and_filtered_differ_when_partial(self, multi_test):
+        """With a test filter the per-type count should be <= the unfiltered total.
+        (Exact values are SQLite-only; for XML we just check the invariant.)
+        """
+        model, expected = multi_test
+        unfiltered = model.get_coverage_by_type(CoverTypeT.CVGBIN, filtered=False)
+        # Filtering is only meaningful for the SQLite backend; skip for XML
+        if not hasattr(model.db, "conn"):
+            pytest.skip("filter-by-test is SQLite-only")
+        model.set_test_filter(expected["test_names"][0])
+        filtered = model.get_coverage_by_type(CoverTypeT.CVGBIN, filtered=True)
+        assert filtered["covered"] <= unfiltered["covered"]
+
+
+# ---------------------------------------------------------------------------
+# Regression: real VLT SQLite database
+# ---------------------------------------------------------------------------
+
+class TestVltModel:
+
+    def test_summary_non_empty(self, vlt_model):
+        summary = vlt_model.get_summary()
+        assert summary["total_bins"] > 0
+
+    def test_coverage_types_non_empty(self, vlt_model):
+        types = vlt_model.get_coverage_types()
+        assert len(types) > 0
+
+    def test_database_info_has_path(self, vlt_model):
+        info = vlt_model.get_database_info()
+        assert info["path"] != ""
diff --git a/tests/test_tui_report_parity.py b/tests/test_tui_report_parity.py
new file mode 100644
index 0000000..81f0ad3
--- /dev/null
+++ b/tests/test_tui_report_parity.py
@@ -0,0 +1,324 @@
+"""
+Layer 3: Report-parity tests.
+
+These tests load the same database in both:
+  - CoverageModel (the TUI data layer)
+  - CoverageReportBuilder (the text/HTML report builder, which is our oracle)
+
+and assert they agree on every significant metric.  Disagreements would mean
+the TUI is showing different numbers than the CLI text report.
+
+IMPORTANT NOTE – bin "hit" semantics
+-------------------------------------
+CoverageModel counts a bin as covered when cover_data > 0.
+CoverageReportBuilder counts a bin as covered when data >= at_least.
+
+These differ when at_least > 1 (e.g. at_least=5 but only 3 hits: model
+says covered, builder says not covered).  The parity tests below use
+at_least=1 so both agree; a dedicated test documents the known divergence.
+"""
+import pytest
+from ucis.cover_type_t import CoverTypeT
+
+from tests.tui_fixtures import (
+    partial_coverage, zero_coverage, full_coverage, multi_test, vlt_model,
+    make_model_and_expected,
+    _make_partial_coverage_db,
+)
+
+
+def _build_report(model):
+    """Build CoverageReport from model.db (the shared oracle)."""
+    from ucis.report.coverage_report_builder import CoverageReportBuilder
+    return CoverageReportBuilder.build(model.db)
+
+
+def _collect_coverpoints(cg, result=None):
+    """Flatten all coverpoints (across nested covergroups) into result dict."""
+    if result is None:
+        result = {}
+    for cp in cg.coverpoints:
+        result[cp.name] = cp
+    for sub in getattr(cg, "covergroups", []):
+        _collect_coverpoints(sub, result)
+    return result
+
+
+def _report_total_bins(report):
+    """Count total bins across all coverpoints in a CoverageReport."""
+    total = 0
+    for cg in report.covergroups:
+        for _, cp in _collect_coverpoints(cg).items():
+            total += len(cp.bins)
+    return total
+
+
+def _report_covered_bins(report):
+    """Count hit bins (data >= at_least) across a CoverageReport."""
+    covered = 0
+    for cg in report.covergroups:
+        for _, cp in _collect_coverpoints(cg).items():
+            for b in cp.bins:
+                if b.hit:
+                    covered += 1
+    return covered
+
+
+# ---------------------------------------------------------------------------
+# Overall totals
+# ---------------------------------------------------------------------------
+
+class TestBinCountParity:
+
+    def test_total_bins_match(self, partial_coverage):
+        model, _ = partial_coverage
+        report = _build_report(model)
+        report_total = _report_total_bins(report)
+        model_summary = model.get_summary()
+        assert model_summary["total_bins"] == report_total, (
+            f"total_bins: model={model_summary['total_bins']}, report={report_total}"
+        )
+
+    def test_covered_bins_match_when_at_least_1(self, partial_coverage):
+        """When all bins have at_least=1, both sources must agree on covered count."""
+        model, _ = partial_coverage
+        report = _build_report(model)
+        report_covered = _report_covered_bins(report)
+        model_summary = model.get_summary()
+        assert model_summary["covered_bins"] == report_covered, (
+            f"covered_bins: model={model_summary['covered_bins']}, report={report_covered}"
+        )
+
+    def test_total_bins_zero_db(self, zero_coverage):
+        model, _ = zero_coverage
+        report = _build_report(model)
+        assert _report_total_bins(report) == model.get_summary()["total_bins"]
+
+    def test_covered_bins_zero_db(self, zero_coverage):
+        model, _ = zero_coverage
+        report = _build_report(model)
+        assert _report_covered_bins(report) == 0
+        assert model.get_summary()["covered_bins"] == 0
+
+    def test_covered_bins_full_db(self, full_coverage):
+        model, _ = full_coverage
+        report = _build_report(model)
+        report_total = _report_total_bins(report)
+        report_covered = _report_covered_bins(report)
+        model_summary = model.get_summary()
+        assert model_summary["covered_bins"] == report_covered
+        assert model_summary["total_bins"] == report_total
+
+
+# ---------------------------------------------------------------------------
+# Per-coverpoint coverage % parity
+# ---------------------------------------------------------------------------
+
+class TestPerCoverpointParity:
+
+    def test_coverpoint_coverage_pct(self, partial_coverage):
+        """Each coverpoint's coverage % in the TUI gaps view matches the report."""
+        from ucis.tui.views.gaps_view import GapsView
+        model, _ = partial_coverage
+        report = _build_report(model)
+
+        # Collect oracle: {name → coverage_pct}
+        oracle = {}
+        for cg in report.covergroups:
+            oracle.update(_collect_coverpoints(cg))
+
+        view = GapsView(StubApp(model))
+        for gap in view.gaps:
+            if gap.name in oracle:
+                report_cp = oracle[gap.name]
+                assert abs(gap.coverage - report_cp.coverage) < 0.01, (
+                    f"Coverpoint '{gap.name}': TUI={gap.coverage:.2f}%, "
+                    f"report={report_cp.coverage:.2f}%"
+                )
+
+    def test_per_bin_hit_count_matches_report(self, partial_coverage):
+        """The hits/goal shown in the gaps view must match the report bin data."""
+        from ucis.tui.views.gaps_view import GapsView
+        model, _ = partial_coverage
+        report = _build_report(model)
+
+        oracle_cp = {}
+        for cg in report.covergroups:
+            oracle_cp.update(_collect_coverpoints(cg))
+
+        view = GapsView(StubApp(model))
+        for gap in view.gaps:
+            if gap.name not in oracle_cp:
+                continue
+            cp = oracle_cp[gap.name]
+            report_covered = sum(1 for b in cp.bins if b.hit)
+            report_total   = len(cp.bins)
+            assert gap.hits == report_covered, (
+                f"'{gap.name}' hits: TUI={gap.hits}, report={report_covered}"
+            )
+            assert gap.goal == report_total, (
+                f"'{gap.name}' goal: TUI={gap.goal}, report={report_total}"
+            )
+
+
+# ---------------------------------------------------------------------------
+# Covergroup count parity
+# ---------------------------------------------------------------------------
+
+class TestCovergroupCountParity:
+
+    def test_covergroup_count(self, partial_coverage):
+        model, expected = partial_coverage
+        report = _build_report(model)
+        # CoverageReportBuilder only counts type-level (non-instance) groups
+        # at the top level; the model counts all groups including COVERINSTANCE.
+        # We assert the minimum – the report count must be <= model count.
+        assert len(report.covergroups) <= model.get_summary()["covergroups"]
+
+    def test_covergroup_count_multi_test(self, multi_test):
+        model, _ = multi_test
+        report = _build_report(model)
+        assert len(report.covergroups) >= 1
+
+
+# ---------------------------------------------------------------------------
+# Overall coverage % parity
+# ---------------------------------------------------------------------------
+
+class TestOverallCoverageParity:
+
+    def test_overall_coverage_pct(self, partial_coverage):
+        """
+        The overall coverage shown by the TUI (CoverageModel.get_summary())
+        must agree with what the text report would show, within 0.1 %.
+        """
+        model, _ = partial_coverage
+        report = _build_report(model)
+
+        report_total   = _report_total_bins(report)
+        report_covered = _report_covered_bins(report)
+        report_pct = (report_covered / report_total * 100) if report_total else 0.0
+
+        model_pct = model.get_summary()["overall_coverage"]
+        assert abs(model_pct - report_pct) < 0.1, (
+            f"Overall coverage: model={model_pct:.2f}%, report={report_pct:.2f}%"
+        )
+
+    def test_zero_db_both_zero(self, zero_coverage):
+        model, _ = zero_coverage
+        report = _build_report(model)
+        assert _report_covered_bins(report) == 0
+        assert model.get_summary()["overall_coverage"] == 0.0
+
+    def test_full_db_both_100(self, full_coverage):
+        model, _ = full_coverage
+        report = _build_report(model)
+        report_total   = _report_total_bins(report)
+        report_covered = _report_covered_bins(report)
+        assert report_covered == report_total
+        assert abs(model.get_summary()["overall_coverage"] - 100.0) < 0.01
+
+
+# ---------------------------------------------------------------------------
+# Document known divergence: at_least > 1
+# ---------------------------------------------------------------------------
+
+class TestKnownDivergenceAtLeastGt1:
+    """
+    When at_least > 1 the two counters diverge:
+      CoverageModel:       bin is "covered" if data > 0
+      CoverageReportBuilder: bin is "covered" if data >= at_least
+    This test documents and verifies that divergence.
+    """
+
+    def _make_at_least_db(self, tmp_path, backend):
+        """
+        Create a DB with one coverpoint whose bin is intended to have at_least=5
+        and data=2.
+
+        NOTE: Both the XML and SQLite backends currently store ``at_least`` as
+        the ``goal`` field of ``CoverData`` (which defaults to 1), so the
+        requested ``at_least=5`` is silently stored as 1.  As a result the
+        bin is seen as covered by *both* the model and the report (2 >= 1).
+        This is a known backend limitation; it does not affect the correctness
+        of the common-metrics layer design (which correctly uses
+        ``data >= at_least`` once backends properly preserve the value).
+        """
+        from tests.tui_fixtures import make_model_and_expected
+        from ucis.mem.mem_factory import MemFactory
+        from ucis import (
+            UCIS_HISTORYNODE_TEST, UCIS_TESTSTATUS_OK, UCIS_OTHER,
+            UCIS_DU_MODULE, UCIS_ENABLED_STMT, UCIS_ENABLED_BRANCH,
+            UCIS_INST_ONCE, UCIS_SCOPE_UNDER_DU, UCIS_INSTANCE, UCIS_VLOG,
+        )
+        from ucis.source_info import SourceInfo
+        from ucis.test_data import TestData
+
+        def builder(db):
+            node = db.createHistoryNode(None, "t1", "t1", UCIS_HISTORYNODE_TEST)
+            node.setTestData(TestData(
+                teststatus=UCIS_TESTSTATUS_OK,
+                toolcategory="test",
+                date="20240101000000",
+            ))
+            file_h = db.createFileHandle("d.sv", "/rtl")
+            src = SourceInfo(file_h, 1, 0)
+            du = db.createScope("work.m", src, 1, UCIS_OTHER, UCIS_DU_MODULE,
+                                 UCIS_ENABLED_STMT | UCIS_INST_ONCE | UCIS_SCOPE_UNDER_DU)
+            inst = db.createInstance("tb", None, 1, UCIS_OTHER, UCIS_INSTANCE, du, UCIS_INST_ONCE)
+            cg = inst.createCovergroup("cg", src, 1, UCIS_OTHER)
+            cp = cg.createCoverpoint("cp_al5", src, 1, UCIS_VLOG)
+            # Intended: at_least=5, data=2.  Both backends store at_least as 1.
+            cp.createBin("bin_partial", src, 5, 2, "bin_partial")
+            return {"at_least": 5, "data": 2}
+
+        if backend == "sqlite":
+            from ucis.sqlite.sqlite_ucis import SqliteUCIS
+            from ucis.tui.models.coverage_model import CoverageModel
+            db_path = str(tmp_path / "al5.db")
+            db = SqliteUCIS(db_path)
+            expected = builder(db)
+            db.close()
+            return CoverageModel(db_path), expected
+        else:
+            db = MemFactory.create()
+            expected = builder(db)
+            from ucis.xml.xml_factory import XmlFactory
+            from ucis.tui.models.coverage_model import CoverageModel
+            xml_path = str(tmp_path / "al5.xml")
+            XmlFactory.write(db, xml_path)
+            return CoverageModel(xml_path), expected
+
+    @pytest.mark.parametrize("backend", ["xml", "sqlite"])
+    def test_model_and_report_agree(self, tmp_path, backend):
+        """Model and report must agree — the divergence (data>0 vs data>=at_least) is fixed."""
+        from ucis.report.coverage_report_builder import CoverageReportBuilder
+        model, _ = self._make_at_least_db(tmp_path, backend)
+        model_covered = model.get_summary()["covered_bins"]
+        report_covered = _report_covered_bins(CoverageReportBuilder.build(model.db))
+        assert model_covered == report_covered, (
+            f"Model ({model_covered}) and report ({report_covered}) disagree"
+        )
+
+
+# ---------------------------------------------------------------------------
+# Regression: real VLT SQLite database – basic parity smoke test
+# ---------------------------------------------------------------------------
+
+class TestVltReportParity:
+
+    def test_bin_totals_consistent(self, vlt_model):
+        """Report bin total must not exceed model bin total."""
+        report = _build_report(vlt_model)
+        report_total = _report_total_bins(report)
+        model_total  = vlt_model.get_summary()["total_bins"]
+        # Report only walks INSTANCE → COVERGROUP; model counts everything.
+        # They may differ but report total should be <= model total.
+        assert report_total <= model_total or model_total == 0
+
+
+# ---------------------------------------------------------------------------
+# Helper import needed in test body
+# ---------------------------------------------------------------------------
+
+from tests.tui_fixtures import StubApp  # noqa: E402 (after class defs)
diff --git a/tests/test_tui_view_data.py b/tests/test_tui_view_data.py
new file mode 100644
index 0000000..1af6dde
--- /dev/null
+++ b/tests/test_tui_view_data.py
@@ -0,0 +1,370 @@
+"""
+Layer 2: View data-fidelity tests (headless).
+
+Each view is instantiated with a real CoverageModel but without any
+terminal I/O or Rich rendering.  We inspect the Python data structures
+that the view would use to render, asserting their correctness without
+triggering any display code.
+"""
+import pytest
+from unittest.mock import MagicMock
+
+from tests.tui_fixtures import (
+    StubApp,
+    partial_coverage, zero_coverage, full_coverage, multi_test, vlt_model,
+)
+
+
+# ---------------------------------------------------------------------------
+# GapsView
+# ---------------------------------------------------------------------------
+
+class TestGapsViewData:
+
+    def test_all_gaps_below_threshold(self, partial_coverage):
+        """Every GapItem must have coverage < threshold."""
+        from ucis.tui.views.gaps_view import GapsView
+        model, _ = partial_coverage
+        view = GapsView(StubApp(model))
+        for gap in view.gaps:
+            assert gap.coverage < view.threshold, (
+                f"Gap '{gap.name}' has {gap.coverage}% which is >= threshold {view.threshold}%"
+            )
+
+    def test_gap_count_matches_uncovered_coverpoints(self, partial_coverage):
+        """There are 2 coverpoints in the partial fixture, both at 50 % → 2 gaps."""
+        from ucis.tui.views.gaps_view import GapsView
+        model, expected = partial_coverage
+        view = GapsView(StubApp(model))
+        assert len(view.gaps) == len(expected["gaps"]), (
+            f"Expected {len(expected['gaps'])} gaps, got {len(view.gaps)}"
+        )
+
+    def test_gaps_sorted_ascending(self, partial_coverage):
+        """GapsView sorts by coverage ascending."""
+        from ucis.tui.views.gaps_view import GapsView
+        model, _ = partial_coverage
+        view = GapsView(StubApp(model))
+        coverages = [g.coverage for g in view.gaps]
+        assert coverages == sorted(coverages), f"Gaps not sorted: {coverages}"
+
+    def test_gap_hits_and_goal_consistent(self, partial_coverage):
+        """gap.coverage should equal hits/goal * 100."""
+        from ucis.tui.views.gaps_view import GapsView
+        model, _ = partial_coverage
+        view = GapsView(StubApp(model))
+        for gap in view.gaps:
+            if gap.goal > 0:
+                expected_pct = gap.hits / gap.goal * 100
+                assert abs(gap.coverage - expected_pct) < 0.01, (
+                    f"Gap '{gap.name}': coverage={gap.coverage} but hits/goal={expected_pct}"
+                )
+
+    def test_no_gaps_when_fully_covered(self, full_coverage):
+        """A fully-covered database should have zero gaps."""
+        from ucis.tui.views.gaps_view import GapsView
+        model, _ = full_coverage
+        view = GapsView(StubApp(model))
+        assert len(view.gaps) == 0, f"Expected 0 gaps but got {len(view.gaps)}"
+
+    def test_all_bins_are_gaps_when_zero_coverage(self, zero_coverage):
+        """With zero coverage every coverpoint is a gap."""
+        from ucis.tui.views.gaps_view import GapsView
+        model, _ = zero_coverage
+        view = GapsView(StubApp(model))
+        assert len(view.gaps) >= 1
+        for gap in view.gaps:
+            assert gap.coverage == 0.0
+
+    def test_gaps_coverage_values_correct(self, partial_coverage):
+        """Coverage percentages must match what the text report would show."""
+        from ucis.tui.views.gaps_view import GapsView
+        from ucis.report.coverage_report_builder import CoverageReportBuilder
+        model, _ = partial_coverage
+        view = GapsView(StubApp(model))
+
+        # Build oracle from same underlying db
+        report = CoverageReportBuilder.build(model.db)
+        report_coverages = {}
+        for cg in report.covergroups:
+            _collect_coverpoint_coverages(cg, report_coverages)
+
+        for gap in view.gaps:
+            if gap.name in report_coverages:
+                oracle_pct = report_coverages[gap.name]
+                assert abs(gap.coverage - oracle_pct) < 0.01, (
+                    f"Gap '{gap.name}': TUI={gap.coverage:.2f}%, report={oracle_pct:.2f}%"
+                )
+
+    def test_gap_navigation_keys(self, partial_coverage):
+        """Arrow keys must update selected_index correctly."""
+        from ucis.tui.views.gaps_view import GapsView
+        model, _ = partial_coverage
+        view = GapsView(StubApp(model))
+        if not view.gaps:
+            pytest.skip("No gaps to navigate")
+        view.selected_index = 0
+        view.handle_key("down")
+        assert view.selected_index == 1
+        view.handle_key("up")
+        assert view.selected_index == 0
+
+    def test_gap_navigation_does_not_go_negative(self, partial_coverage):
+        from ucis.tui.views.gaps_view import GapsView
+        model, _ = partial_coverage
+        view = GapsView(StubApp(model))
+        view.selected_index = 0
+        view.handle_key("up")
+        assert view.selected_index == 0
+
+
+# ---------------------------------------------------------------------------
+# HierarchyView
+# ---------------------------------------------------------------------------
+
+class TestHierarchyViewData:
+
+    def test_root_nodes_non_empty(self, partial_coverage):
+        from ucis.tui.views.hierarchy_view import HierarchyView
+        model, _ = partial_coverage
+        view = HierarchyView(StubApp(model))
+        assert len(view.root_nodes) >= 1
+
+    def test_selected_node_is_set(self, partial_coverage):
+        from ucis.tui.views.hierarchy_view import HierarchyView
+        model, _ = partial_coverage
+        view = HierarchyView(StubApp(model))
+        assert view.selected_node is not None
+
+    def test_all_nodes_list_non_empty(self, partial_coverage):
+        from ucis.tui.views.hierarchy_view import HierarchyView
+        model, _ = partial_coverage
+        view = HierarchyView(StubApp(model))
+        assert len(view._all_nodes) >= 1
+
+    def test_node_coverage_total_non_negative(self, partial_coverage):
+        """Every node's total count must be >= 0."""
+        from ucis.tui.views.hierarchy_view import HierarchyView
+        model, _ = partial_coverage
+        view = HierarchyView(StubApp(model))
+        for node in view._all_nodes:
+            assert node.total >= 0
+            assert node.covered >= 0
+
+    def test_node_covered_lte_total(self, partial_coverage):
+        from ucis.tui.views.hierarchy_view import HierarchyView
+        model, _ = partial_coverage
+        view = HierarchyView(StubApp(model))
+        for node in view._all_nodes:
+            assert node.covered <= node.total, (
+                f"Node '{node.name}': covered={node.covered} > total={node.total}"
+            )
+
+    def test_coverage_percent_calculation(self, partial_coverage):
+        from ucis.tui.views.hierarchy_view import HierarchyView
+        model, _ = partial_coverage
+        view = HierarchyView(StubApp(model))
+        for node in view._all_nodes:
+            pct = node.get_coverage_percent()
+            if node.total == 0:
+                assert pct == 0.0
+            else:
+                expected = node.covered / node.total * 100
+                assert abs(pct - expected) < 0.01
+
+    def test_down_navigation_changes_selection(self, partial_coverage):
+        from ucis.tui.views.hierarchy_view import HierarchyView
+        model, _ = partial_coverage
+        view = HierarchyView(StubApp(model))
+        initial = view.selected_node
+        view.handle_key("down")
+        assert view.selected_node != initial or len(view._all_nodes) == 1
+
+    def test_up_after_down_returns_to_original(self, partial_coverage):
+        from ucis.tui.views.hierarchy_view import HierarchyView
+        model, _ = partial_coverage
+        view = HierarchyView(StubApp(model))
+        initial = view.selected_node
+        view.handle_key("down")
+        view.handle_key("up")
+        assert view.selected_node == initial
+
+
+# ---------------------------------------------------------------------------
+# MetricsView
+# ---------------------------------------------------------------------------
+
+class TestMetricsViewData:
+
+    def test_covergroup_count(self, partial_coverage):
+        from ucis.tui.views.metrics_view import MetricsView
+        model, expected = partial_coverage
+        view = MetricsView(StubApp(model))
+        assert view.metrics["total_covergroups"] == expected["covergroups"]
+
+    def test_coverpoint_count(self, partial_coverage):
+        from ucis.tui.views.metrics_view import MetricsView
+        model, expected = partial_coverage
+        view = MetricsView(StubApp(model))
+        assert view.metrics["total_coverpoints"] == expected["coverpoints"]
+
+    def test_total_bins(self, partial_coverage):
+        from ucis.tui.views.metrics_view import MetricsView
+        model, expected = partial_coverage
+        view = MetricsView(StubApp(model))
+        assert view.metrics["total_bins"] == expected["total_bins"]
+
+    def test_covered_bins(self, partial_coverage):
+        from ucis.tui.views.metrics_view import MetricsView
+        model, expected = partial_coverage
+        view = MetricsView(StubApp(model))
+        assert view.metrics["covered_bins"] == expected["covered_bins"]
+
+    def test_bin_distribution_sums_to_total(self, partial_coverage):
+        """The four histogram buckets must sum to total_bins."""
+        from ucis.tui.views.metrics_view import MetricsView
+        model, expected = partial_coverage
+        view = MetricsView(StubApp(model))
+        dist = view.metrics["bin_hit_distribution"]
+        bucket_total = sum(dist.values())
+        assert bucket_total == expected["total_bins"], (
+            f"Distribution sum {bucket_total} != total_bins {expected['total_bins']}: {dist}"
+        )
+
+    def test_zero_hit_bucket_correct(self, partial_coverage):
+        """Partial fixture: 3 uncovered bins → bucket '0' must be 3."""
+        from ucis.tui.views.metrics_view import MetricsView
+        model, expected = partial_coverage
+        view = MetricsView(StubApp(model))
+        uncovered = expected["total_bins"] - expected["covered_bins"]
+        assert view.metrics["bin_hit_distribution"]["0"] == uncovered
+
+    def test_zero_coverage_all_in_zero_bucket(self, zero_coverage):
+        from ucis.tui.views.metrics_view import MetricsView
+        model, expected = zero_coverage
+        view = MetricsView(StubApp(model))
+        assert view.metrics["bin_hit_distribution"]["0"] == expected["total_bins"]
+
+    def test_full_coverage_zero_in_zero_bucket(self, full_coverage):
+        from ucis.tui.views.metrics_view import MetricsView
+        model, _ = full_coverage
+        view = MetricsView(StubApp(model))
+        assert view.metrics["bin_hit_distribution"]["0"] == 0
+
+
+# ---------------------------------------------------------------------------
+# TestHistoryView
+# ---------------------------------------------------------------------------
+
+class TestTestHistoryViewData:
+
+    def _make_view(self, model):
+        from ucis.tui.views.test_history_view import TestHistoryView
+        view = TestHistoryView(StubApp(model))
+        view.on_enter()   # triggers _load_tests()
+        return view
+
+    def test_test_count(self, partial_coverage):
+        model, _ = partial_coverage
+        view = self._make_view(model)
+        assert len(view.tests) == 1
+
+    def test_test_name(self, partial_coverage):
+        model, _ = partial_coverage
+        view = self._make_view(model)
+        assert view.tests[0]["name"] == "test1"
+
+    def test_multi_test_count(self, multi_test):
+        model, expected = multi_test
+        view = self._make_view(model)
+        assert len(view.tests) == len(expected["test_names"])
+
+    def test_all_multi_test_names_present(self, multi_test):
+        model, expected = multi_test
+        view = self._make_view(model)
+        names = {t["name"] for t in view.tests}
+        for name in expected["test_names"]:
+            assert name in names
+
+    def test_sort_by_name_ascending(self, multi_test):
+        model, _ = multi_test
+        view = self._make_view(model)
+        view.handle_key("d")   # switch to date sort first
+        view.handle_key("n")   # switch back to name → ascending
+        names = [t["name"] for t in view.tests]
+        assert names == sorted(names)
+
+    def test_sort_by_name_toggle_descending(self, multi_test):
+        model, _ = multi_test
+        view = self._make_view(model)
+        view.handle_key("d")   # switch to date sort first
+        view.handle_key("n")   # sort by name ascending
+        view.handle_key("n")   # toggle → descending
+        names = [t["name"] for t in view.tests]
+        assert names == sorted(names, reverse=True)
+
+    def test_navigation_changes_selection(self, multi_test):
+        model, _ = multi_test
+        view = self._make_view(model)
+        view.selected_index = 0
+        view.handle_key("down")
+        assert view.selected_index == 1
+
+    def test_navigation_clamped_at_zero(self, multi_test):
+        model, _ = multi_test
+        view = self._make_view(model)
+        view.selected_index = 0
+        view.handle_key("up")
+        assert view.selected_index == 0
+
+    def test_filter_by_test_sets_model_filter(self, multi_test):
+        """Pressing 'f' should set the model test filter."""
+        model, expected = multi_test
+        view = self._make_view(model)
+        view.selected_index = 0
+        selected_name = view.tests[0]["name"]
+        view.handle_key("f")
+        assert model.get_test_filter() == selected_name
+
+
+# ---------------------------------------------------------------------------
+# CodeCoverageView
+# ---------------------------------------------------------------------------
+
+class TestCodeCoverageViewData:
+
+    def test_file_coverage_list_type(self, vlt_model):
+        """file_coverage must be a list (possibly empty for non-code dbs)."""
+        from ucis.tui.views.code_coverage_view import CodeCoverageView
+        view = CodeCoverageView(StubApp(vlt_model))
+        assert isinstance(view.file_coverage, list)
+
+    def test_covered_lte_total_per_file(self, vlt_model):
+        """For every file, line_covered <= line_total."""
+        from ucis.tui.views.code_coverage_view import CodeCoverageView
+        view = CodeCoverageView(StubApp(vlt_model))
+        for fi in view.file_coverage:
+            assert fi.line_covered <= fi.line_total, (
+                f"{fi.file_path}: covered={fi.line_covered} > total={fi.line_total}"
+            )
+
+    def test_coverage_percentage_property(self, vlt_model):
+        """line_coverage property must be arithmetically correct."""
+        from ucis.tui.views.code_coverage_view import CodeCoverageView
+        view = CodeCoverageView(StubApp(vlt_model))
+        for fi in view.file_coverage:
+            if fi.line_total > 0:
+                expected = fi.line_covered / fi.line_total * 100
+                assert abs(fi.line_coverage - expected) < 0.01
+
+
+# ---------------------------------------------------------------------------
+# Helper
+# ---------------------------------------------------------------------------
+
+def _collect_coverpoint_coverages(cg_node, result: dict):
+    """Recursively collect {coverpoint_name: coverage_pct} from a Covergroup."""
+    for cp in cg_node.coverpoints:
+        result[cp.name] = cp.coverage
+    for sub in getattr(cg_node, "covergroups", []):
+        _collect_coverpoint_coverages(sub, result)
diff --git a/tests/tui_fixtures.py b/tests/tui_fixtures.py
new file mode 100644
index 0000000..b88d56c
--- /dev/null
+++ b/tests/tui_fixtures.py
@@ -0,0 +1,305 @@
+"""
+Shared fixtures and database builders for TUI automated tests.
+
+Every fixture exposes:
+  - db        : UCIS database object (API-path or SQLite-path)
+  - expected  : dict of known-correct values for assertions
+  - model     : CoverageModel wrapping the database
+
+The StubApp bridges BaseView.__init__ without requiring a real TUI.
+"""
+import os
+import pytest
+from unittest.mock import MagicMock
+
+from ucis import (
+    UCIS_HISTORYNODE_TEST, UCIS_TESTSTATUS_OK, UCIS_OTHER,
+    UCIS_DU_MODULE, UCIS_ENABLED_STMT, UCIS_ENABLED_BRANCH,
+    UCIS_INST_ONCE, UCIS_SCOPE_UNDER_DU, UCIS_INSTANCE, UCIS_VLOG,
+)
+from ucis.mem.mem_factory import MemFactory
+from ucis.source_info import SourceInfo
+from ucis.test_data import TestData
+from ucis.tui.models.coverage_model import CoverageModel
+
+
+# ---------------------------------------------------------------------------
+# StubApp – minimal app object required by BaseView
+# ---------------------------------------------------------------------------
+
+class StubApp:
+    """Minimal app stub for instantiating views without a running TUI."""
+
+    def __init__(self, model: CoverageModel):
+        self.coverage_model = model
+        self.status_bar = MagicMock()
+        self.controller = MagicMock()
+
+
+# ---------------------------------------------------------------------------
+# Low-level database builders
+# ---------------------------------------------------------------------------
+
+def _add_test(db, logical_name="test1"):
+    """Add a passing test history node to *db*."""
+    node = db.createHistoryNode(None, logical_name, logical_name, UCIS_HISTORYNODE_TEST)
+    node.setTestData(TestData(
+        teststatus=UCIS_TESTSTATUS_OK,
+        toolcategory="UCIS:simulator",
+        date="20240101000000",
+    ))
+    return node
+
+
+def _add_instance(db):
+    """Add a minimal DU + instance and return the instance scope."""
+    file_h = db.createFileHandle("design.sv", "/rtl")
+    du = db.createScope(
+        "work.top", SourceInfo(file_h, 1, 0), 1,
+        UCIS_OTHER, UCIS_DU_MODULE,
+        UCIS_ENABLED_STMT | UCIS_ENABLED_BRANCH | UCIS_INST_ONCE | UCIS_SCOPE_UNDER_DU,
+    )
+    inst = db.createInstance(
+        "tb", None, 1, UCIS_OTHER, UCIS_INSTANCE, du, UCIS_INST_ONCE
+    )
+    return inst, file_h
+
+
+def _make_partial_coverage_db(db):
+    """
+    Build a database with known 50 % functional coverage.
+
+    Structure
+    ---------
+    tb (instance)
+      cg1 (covergroup)
+        cp1 (coverpoint)  bins: a(hit), b(hit), c(miss), d(miss) → 2/4 = 50 %
+      cg2 (covergroup)
+        cp2 (coverpoint)  bins: x(hit), y(miss) → 1/2 = 50 %
+
+    Overall: 3 covered / 6 total = 50 %
+    """
+    _add_test(db)
+    inst, file_h = _add_instance(db)
+    src = SourceInfo(file_h, 3, 0)
+
+    cg1 = inst.createCovergroup("cg1", src, 1, UCIS_OTHER)
+    cp1 = cg1.createCoverpoint("cp1", src, 1, UCIS_VLOG)
+    cp1.createBin("a", src, 1, 5, "a")   # hit  (5 >= 1)
+    cp1.createBin("b", src, 1, 3, "b")   # hit  (3 >= 1)
+    cp1.createBin("c", src, 1, 0, "c")   # miss
+    cp1.createBin("d", src, 1, 0, "d")   # miss
+
+    cg2 = inst.createCovergroup("cg2", src, 1, UCIS_OTHER)
+    cp2 = cg2.createCoverpoint("cp2", src, 1, UCIS_VLOG)
+    cp2.createBin("x", src, 1, 10, "x")  # hit
+    cp2.createBin("y", src, 1, 0,  "y")  # miss
+
+    expected = {
+        "total_bins": 6,
+        "covered_bins": 3,
+        "overall_coverage": 50.0,
+        "covergroups": 2,
+        "coverpoints": 2,
+        "gaps": [
+            # (name, coverage_pct)
+            ("cp1", 50.0),
+            ("cp2", 50.0),
+        ],
+    }
+    return expected
+
+
+def _make_zero_coverage_db(db):
+    """Database where no bins have been hit."""
+    _add_test(db)
+    inst, file_h = _add_instance(db)
+    src = SourceInfo(file_h, 3, 0)
+
+    cg = inst.createCovergroup("cg_zero", src, 1, UCIS_OTHER)
+    cp = cg.createCoverpoint("cp_zero", src, 1, UCIS_VLOG)
+    cp.createBin("b0", src, 1, 0, "b0")
+    cp.createBin("b1", src, 1, 0, "b1")
+    cp.createBin("b2", src, 1, 0, "b2")
+
+    expected = {
+        "total_bins": 3,
+        "covered_bins": 0,
+        "overall_coverage": 0.0,
+        "covergroups": 1,
+        "coverpoints": 1,
+    }
+    return expected
+
+
+def _make_full_coverage_db(db):
+    """Database where every bin has been hit."""
+    _add_test(db)
+    inst, file_h = _add_instance(db)
+    src = SourceInfo(file_h, 3, 0)
+
+    cg = inst.createCovergroup("cg_full", src, 1, UCIS_OTHER)
+    cp = cg.createCoverpoint("cp_full", src, 1, UCIS_VLOG)
+    cp.createBin("b0", src, 1, 1, "b0")
+    cp.createBin("b1", src, 1, 2, "b1")
+    cp.createBin("b2", src, 1, 7, "b2")
+
+    expected = {
+        "total_bins": 3,
+        "covered_bins": 3,
+        "overall_coverage": 100.0,
+        "covergroups": 1,
+        "coverpoints": 1,
+    }
+    return expected
+
+
+def _make_multi_test_db(db):
+    """
+    Three tests each contributing unique bins.
+
+    tb / cg_mt / cp_mt  bins b0..b5
+      test_a hits b0, b1
+      test_b hits b2, b3
+      test_c hits b4, b5  (not added → miss)
+
+    Overall: 4 covered / 6 total  ≈ 66.7 %
+    """
+    for name in ("test_a", "test_b", "test_c"):
+        _add_test(db, name)
+    inst, file_h = _add_instance(db)
+    src = SourceInfo(file_h, 3, 0)
+
+    cg = inst.createCovergroup("cg_mt", src, 1, UCIS_OTHER)
+    cp = cg.createCoverpoint("cp_mt", src, 1, UCIS_VLOG)
+    cp.createBin("b0", src, 1, 1, "b0")
+    cp.createBin("b1", src, 1, 1, "b1")
+    cp.createBin("b2", src, 1, 1, "b2")
+    cp.createBin("b3", src, 1, 1, "b3")
+    cp.createBin("b4", src, 1, 0, "b4")
+    cp.createBin("b5", src, 1, 0, "b5")
+
+    expected = {
+        "total_bins": 6,
+        "covered_bins": 4,
+        "covergroups": 1,
+        "coverpoints": 1,
+        "test_names": ["test_a", "test_b", "test_c"],
+    }
+    return expected
+
+
+# ---------------------------------------------------------------------------
+# CoverageModel factory helpers (API path via XML, SQLite path)
+# ---------------------------------------------------------------------------
+
+def _model_from_mem_db(db, tmp_path):
+    """Write *db* to XML then load via CoverageModel (exercises API/XML path)."""
+    from ucis.xml.xml_factory import XmlFactory
+    xml_path = str(tmp_path / "test.xml")
+    XmlFactory.write(db, xml_path)
+    return CoverageModel(xml_path)
+
+
+def _model_from_sqlite(builder_fn, tmp_path):
+    """Create a fresh SQLiteUCIS, populate it, return CoverageModel (exercises SQLite path)."""
+    from ucis.sqlite.sqlite_ucis import SqliteUCIS
+    db_path = str(tmp_path / "test.db")
+    db = SqliteUCIS(db_path)
+    expected = builder_fn(db)
+    db.close()
+    model = CoverageModel(db_path)
+    return model, expected
+
+
+# ---------------------------------------------------------------------------
+# Pytest fixtures
+# ---------------------------------------------------------------------------
+
+# Parametrize over (backend_label, builder_fn, extra_info) pairs so the same
+# test body exercises both the UCIS-API path (XML) and the SQLite fast path.
+
+PARTIAL_BUILDERS = [
+    pytest.param("xml",    _make_partial_coverage_db, id="xml"),
+    pytest.param("sqlite", _make_partial_coverage_db, id="sqlite"),
+]
+
+ZERO_BUILDERS = [
+    pytest.param("xml",    _make_zero_coverage_db, id="xml"),
+    pytest.param("sqlite", _make_zero_coverage_db, id="sqlite"),
+]
+
+FULL_BUILDERS = [
+    pytest.param("xml",    _make_full_coverage_db, id="xml"),
+    pytest.param("sqlite", _make_full_coverage_db, id="sqlite"),
+]
+
+MULTI_TEST_BUILDERS = [
+    pytest.param("xml",    _make_multi_test_db, id="xml"),
+    pytest.param("sqlite", _make_multi_test_db, id="sqlite"),
+]
+
+
+def make_model_and_expected(backend: str, builder_fn, tmp_path):
+    """Create (CoverageModel, expected_dict) for the given backend and builder."""
+    if backend == "sqlite":
+        return _model_from_sqlite(builder_fn, tmp_path)
+    else:  # xml / mem
+        db = MemFactory.create()
+        expected = builder_fn(db)
+        model = _model_from_mem_db(db, tmp_path)
+        return model, expected
+
+
+@pytest.fixture(params=["xml", "sqlite"])
+def partial_coverage(request, tmp_path):
+    """(model, expected) for a 50% coverage database."""
+    model, expected = make_model_and_expected(
+        request.param, _make_partial_coverage_db, tmp_path
+    )
+    yield model, expected
+    model.close()
+
+
+@pytest.fixture(params=["xml", "sqlite"])
+def zero_coverage(request, tmp_path):
+    """(model, expected) for a zero-coverage database."""
+    model, expected = make_model_and_expected(
+        request.param, _make_zero_coverage_db, tmp_path
+    )
+    yield model, expected
+    model.close()
+
+
+@pytest.fixture(params=["xml", "sqlite"])
+def full_coverage(request, tmp_path):
+    """(model, expected) for a 100% coverage database."""
+    model, expected = make_model_and_expected(
+        request.param, _make_full_coverage_db, tmp_path
+    )
+    yield model, expected
+    model.close()
+
+
+@pytest.fixture(params=["xml", "sqlite"])
+def multi_test(request, tmp_path):
+    """(model, expected) for a multi-test database."""
+    model, expected = make_model_and_expected(
+        request.param, _make_multi_test_db, tmp_path
+    )
+    yield model, expected
+    model.close()
+
+
+# File-based regression fixture (uses the committed test_vlt.cdb SQLite file)
+VLT_CDB = os.path.join(os.path.dirname(__file__), "..", "test_vlt.cdb")
+
+
+@pytest.fixture
+def vlt_model():
+    """CoverageModel loaded from the committed test_vlt.cdb SQLite fixture."""
+    if not os.path.exists(VLT_CDB):
+        pytest.skip(f"test_vlt.cdb not found at {VLT_CDB}")
+    model = CoverageModel(VLT_CDB)
+    yield model
+    model.close()
diff --git a/tests/unit/ncdb/test_bucket_index.py b/tests/unit/ncdb/test_bucket_index.py
new file mode 100644
index 0000000..4db626a
--- /dev/null
+++ b/tests/unit/ncdb/test_bucket_index.py
@@ -0,0 +1,113 @@
+"""Unit tests for BucketIndex (history/bucket_index.bin)."""
+import pytest
+from ucis.ncdb.bucket_index import BucketIndex
+
+
+def _idx(*buckets):
+    """Build a BucketIndex from (seq, ts_start, ts_end, records, fails, min_nid, max_nid)."""
+    idx = BucketIndex()
+    for seq, ts_start, ts_end, records, fails, min_nid, max_nid in buckets:
+        idx.add_bucket(seq, ts_start, ts_end, records, fails, min_nid, max_nid)
+    return idx
+
+
+def test_add_and_query_range():
+    idx = _idx(
+        (0, 1000, 1999, 100, 10, 0, 5),
+        (1, 2000, 2999, 200,  5, 0, 7),
+        (2, 3000, 3999,  50,  0, 3, 9),
+    )
+    hits = idx.buckets_in_range(1500, 2500)
+    seqs = [e.bucket_seq for e in hits]
+    assert 0 in seqs and 1 in seqs and 2 not in seqs
+
+
+def test_buckets_for_name():
+    idx = _idx(
+        (0, 1000, 1999, 100, 10, 0, 5),
+        (1, 2000, 2999, 200,  5, 6, 9),
+    )
+    # name_id=3 is in bucket 0 only
+    hits = idx.buckets_for_name(3)
+    assert len(hits) == 1 and hits[0].bucket_seq == 0
+
+    # name_id=7 is in bucket 1 only
+    hits = idx.buckets_for_name(7)
+    assert len(hits) == 1 and hits[0].bucket_seq == 1
+
+    # name_id=10 is in neither
+    assert idx.buckets_for_name(10) == []
+
+
+def test_buckets_for_name_with_time_filter():
+    idx = _idx(
+        (0, 1000, 1999, 100, 0, 0, 9),
+        (1, 2000, 2999, 100, 0, 0, 9),
+    )
+    hits = idx.buckets_for_name(5, ts_from=2000)
+    assert len(hits) == 1 and hits[0].bucket_seq == 1
+
+
+def test_pass_rate_series():
+    idx = _idx(
+        (0, 1000, 1999, 100, 10, 0, 5),
+        (1, 2000, 2999, 200,  0, 0, 5),
+    )
+    series = idx.pass_rate_series()
+    assert len(series) == 2
+    ts0, rate0 = series[0]
+    ts1, rate1 = series[1]
+    assert ts0 == 1000
+    assert abs(rate0 - 0.90) < 1e-6
+    assert abs(rate1 - 1.00) < 1e-6
+
+
+def test_serialize_deserialize_empty():
+    idx = BucketIndex()
+    idx2 = BucketIndex.deserialize(idx.serialize())
+    assert idx2.num_buckets == 0
+
+
+def test_serialize_deserialize_multiple():
+    idx = _idx(
+        (0, 1000, 1999, 100, 10, 0, 5),
+        (1, 2000, 2999, 200,  5, 0, 7),
+    )
+    data = idx.serialize()
+    idx2 = BucketIndex.deserialize(data)
+    assert idx2.num_buckets == 2
+    e = idx2.buckets_in_range(1000, 1999)
+    assert len(e) == 1 and e[0].fail_count == 10
+
+
+def test_serialize_3650_entries_size():
+    """10 years of buckets (one per day) should be well under 200 KB."""
+    idx = BucketIndex()
+    for i in range(3650):
+        idx.add_bucket(i, 1700000000 + i * 86400, 1700000000 + (i + 1) * 86400 - 1,
+                       10000, 100, 0, 999)
+    data = idx.serialize()
+    assert len(data) < 200 * 1024, f"Index too large: {len(data)} bytes"
+
+
+def test_next_seq():
+    idx = BucketIndex()
+    assert idx.next_seq() == 0
+    idx.add_bucket(0, 1000, 1999, 100, 0, 0, 0)
+    assert idx.next_seq() == 1
+    idx.add_bucket(1, 2000, 2999, 100, 0, 0, 0)
+    assert idx.next_seq() == 2
+
+
+def test_add_bucket_replaces_existing():
+    idx = BucketIndex()
+    idx.add_bucket(0, 1000, 1999, 100, 10, 0, 5)
+    idx.add_bucket(0, 1000, 1999, 200, 20, 0, 5)   # update same seq
+    assert idx.num_buckets == 1
+    assert idx._entries[0].num_records == 200
+
+
+def test_bad_magic_raises():
+    data = b"\x00\x00\x00\x00" + b"\x00" * 8
+    with pytest.raises(ValueError, match="Bad magic"):
+        BucketIndex.deserialize(data)
diff --git a/tests/unit/ncdb/test_contrib_index.py b/tests/unit/ncdb/test_contrib_index.py
new file mode 100644
index 0000000..092b902
--- /dev/null
+++ b/tests/unit/ncdb/test_contrib_index.py
@@ -0,0 +1,94 @@
+"""Unit tests for ContribIndex (contrib_index.bin)."""
+import pytest
+from ucis.ncdb.contrib_index import (
+    ContribIndex,
+    POLICY_ALL, POLICY_PASS_ONLY, POLICY_EXCLUDE_ERROR_RERUN, POLICY_STRICT,
+    FLAG_IS_RERUN, FLAG_FIRST_ATTEMPT_PASSED,
+)
+from ucis.ncdb.constants import HIST_STATUS_OK, HIST_STATUS_FAIL, HIST_STATUS_ERROR
+
+
+def test_add_and_passing_run_ids_pass_only():
+    ci = ContribIndex()
+    ci.add_entry(0, 0, HIST_STATUS_OK,   0)
+    ci.add_entry(1, 1, HIST_STATUS_FAIL, 0)
+    ci.add_entry(2, 0, HIST_STATUS_OK,   0)
+    assert ci.passing_run_ids(POLICY_PASS_ONLY) == [0, 2]
+
+
+def test_policy_all():
+    ci = ContribIndex()
+    ci.add_entry(0, 0, HIST_STATUS_OK,   0)
+    ci.add_entry(1, 0, HIST_STATUS_FAIL, 0)
+    assert ci.passing_run_ids(POLICY_ALL) == [0, 1]
+
+
+def test_policy_strict_excludes_rerun_without_first_pass():
+    ci = ContribIndex()
+    ci.add_entry(0, 0, HIST_STATUS_OK, 0)                          # normal pass → included
+    ci.add_entry(1, 0, HIST_STATUS_OK, FLAG_IS_RERUN)              # rerun, first attempt failed → excluded
+    ci.add_entry(2, 0, HIST_STATUS_OK,
+                 FLAG_IS_RERUN | FLAG_FIRST_ATTEMPT_PASSED)        # rerun, first also passed → included
+    assert ci.passing_run_ids(POLICY_STRICT) == [0, 2]
+
+
+def test_policy_exclude_error_rerun_same_as_pass_only():
+    ci = ContribIndex()
+    ci.add_entry(0, 0, HIST_STATUS_OK,    0)
+    ci.add_entry(1, 0, HIST_STATUS_ERROR, 0)
+    assert ci.passing_run_ids(POLICY_EXCLUDE_ERROR_RERUN) == [0]
+
+
+def test_squash_watermark_update():
+    ci = ContribIndex(squash_watermark=0)
+    ci.set_squash_watermark(99)
+    assert ci.squash_watermark == 99
+
+
+def test_remove_entries_after_squash():
+    ci = ContribIndex()
+    for run_id in range(10):
+        ci.add_entry(run_id, 0, HIST_STATUS_OK, 0)
+    ci.remove_entries_up_to(4)
+    assert ci.num_active == 5
+    remaining = [e.run_id for e in ci._entries]
+    assert remaining == [5, 6, 7, 8, 9]
+
+
+def test_max_run_id_from_entries():
+    ci = ContribIndex()
+    ci.add_entry(0, 0, HIST_STATUS_OK, 0)
+    ci.add_entry(7, 0, HIST_STATUS_OK, 0)
+    assert ci.max_run_id() == 7
+
+
+def test_max_run_id_falls_back_to_watermark():
+    ci = ContribIndex(squash_watermark=42)
+    assert ci.max_run_id() == 42
+
+
+def test_serialize_deserialize_empty():
+    ci = ContribIndex()
+    ci2 = ContribIndex.deserialize(ci.serialize())
+    assert ci2.num_active == 0
+    assert ci2.merge_policy == POLICY_PASS_ONLY
+
+
+def test_serialize_deserialize_with_entries():
+    ci = ContribIndex(merge_policy=POLICY_STRICT, squash_watermark=10)
+    ci.add_entry(11, 0, HIST_STATUS_OK,   FLAG_IS_RERUN)
+    ci.add_entry(12, 1, HIST_STATUS_FAIL, 0)
+    data = ci.serialize()
+    ci2 = ContribIndex.deserialize(data)
+    assert ci2.merge_policy    == POLICY_STRICT
+    assert ci2.squash_watermark == 10
+    assert ci2.num_active == 2
+    assert ci2._entries[0].run_id == 11
+    assert ci2._entries[0].is_rerun is True
+    assert ci2._entries[1].status  == HIST_STATUS_FAIL
+
+
+def test_bad_magic_raises():
+    data = b"\x00\x00\x00\x00" + b"\x00" * 20
+    with pytest.raises(ValueError, match="Bad magic"):
+        ContribIndex.deserialize(data)
diff --git a/tests/unit/ncdb/test_history_buckets.py b/tests/unit/ncdb/test_history_buckets.py
new file mode 100644
index 0000000..6206683
--- /dev/null
+++ b/tests/unit/ncdb/test_history_buckets.py
@@ -0,0 +1,144 @@
+"""Unit tests for BucketWriter / BucketReader (history/NNNNNN.bin)."""
+import pytest
+from ucis.ncdb.history_buckets import BucketWriter, BucketReader
+from ucis.ncdb.constants import (
+    HIST_STATUS_OK, HIST_STATUS_FAIL,
+    HIST_FLAG_IS_RERUN, HIST_FLAG_HAS_COVERAGE,
+    HISTORY_BUCKET_MAX_RECORDS,
+)
+
+
+def _bucket(*records):
+    """Write records and return a BucketReader over them.
+
+    Each record is a (name_id, seed_id, ts, status, flags) tuple.
+    """
+    w = BucketWriter()
+    for name_id, seed_id, ts, status, flags in records:
+        w.add(name_id, seed_id, ts, status, flags)
+    return BucketReader(w.seal_fast())
+
+
+def test_write_read_single_record():
+    r = _bucket((0, 0, 1700000000, HIST_STATUS_OK, 0))
+    assert r.num_records == 1
+    recs = r.records_for_name(0)
+    assert len(recs) == 1
+    assert recs[0].ts == 1700000000
+    assert recs[0].status == HIST_STATUS_OK
+    assert recs[0].flags == 0
+
+
+def test_name_index_binary_search():
+    records = []
+    for nid in range(20):
+        for i in range(5):
+            records.append((nid, 0, 1700000000 + nid * 1000 + i * 100, HIST_STATUS_OK, 0))
+    r = _bucket(*records)
+    for nid in range(20):
+        found = r.records_for_name(nid)
+        assert len(found) == 5, f"name_id {nid}: expected 5, got {len(found)}"
+
+
+def test_records_for_name_not_present():
+    r = _bucket((0, 0, 1700000000, HIST_STATUS_OK, 0))
+    assert r.records_for_name(99) == []
+
+
+def test_seed_dict_mapping():
+    # Two different global seed_ids should map back correctly
+    w = BucketWriter()
+    w.add(0, 42, 1700000000, HIST_STATUS_OK, 0)
+    w.add(0, 99, 1700000001, HIST_STATUS_OK, 0)
+    r = BucketReader(w.seal_fast())
+    recs = r.records_for_name(0)
+    seed_ids = {rec.seed_id for rec in recs}
+    assert seed_ids == {42, 99}
+
+
+def test_ts_delta_encoding():
+    base = 1700000000
+    timestamps = [base, base + 100, base + 250, base + 1000]
+    records = [(0, 0, ts, HIST_STATUS_OK, 0) for ts in timestamps]
+    r = _bucket(*records)
+    recs = r.records_for_name(0)
+    recovered = sorted(rec.ts for rec in recs)
+    assert recovered == timestamps
+
+
+def test_status_flags_pack_unpack():
+    """All status × flag combinations round-trip through the nibble-packed byte."""
+    statuses = [HIST_STATUS_OK, HIST_STATUS_FAIL]
+    flags_list = [0, HIST_FLAG_IS_RERUN, HIST_FLAG_HAS_COVERAGE,
+                  HIST_FLAG_IS_RERUN | HIST_FLAG_HAS_COVERAGE]
+    records = []
+    ts = 1700000000
+    for nid, (status, flags) in enumerate(
+            (s, f) for s in statuses for f in flags_list):
+        records.append((nid, 0, ts + nid * 100, status, flags))
+    r = _bucket(*records)
+    for nid, (status, flags) in enumerate(
+            (s, f) for s in statuses for f in flags_list):
+        recs = r.records_for_name(nid)
+        assert len(recs) == 1
+        assert recs[0].status == status, f"nid={nid} status mismatch"
+        assert recs[0].flags  == flags,  f"nid={nid} flags mismatch"
+
+
+def test_multiple_names_correct_counts():
+    w = BucketWriter()
+    for i in range(10):
+        w.add(0, 0, 1700000000 + i * 100, HIST_STATUS_OK, 0)
+    for i in range(5):
+        w.add(1, 0, 1700001000 + i * 100, HIST_STATUS_FAIL, 0)
+    r = BucketReader(w.seal_fast())
+    assert r.num_records == 15
+    assert len(r.records_for_name(0)) == 10
+    assert len(r.records_for_name(1)) == 5
+
+
+def test_seal_deflate():
+    w = BucketWriter()
+    for i in range(100):
+        w.add(0, 0, 1700000000 + i * 100, HIST_STATUS_OK, 0)
+    data = w.seal_fast()
+    r = BucketReader(data)
+    assert r.num_records == 100
+
+
+def test_seal_lzma_or_fallback():
+    """seal() should succeed regardless of liblzma availability."""
+    w = BucketWriter()
+    for i in range(100):
+        w.add(0, 0, 1700000000 + i * 100, HIST_STATUS_OK, 0)
+    data = w.seal(use_lzma=True)   # lzma or deflate fallback
+    r = BucketReader(data)
+    assert r.num_records == 100
+
+
+def test_10k_records_compressed_size():
+    """10K records should compress to ≤ 50 KB (well under 5 MB design target)."""
+    w = BucketWriter()
+    for i in range(HISTORY_BUCKET_MAX_RECORDS):
+        name_id = i % 100
+        w.add(name_id, 0, 1700000000 + i * 10, HIST_STATUS_OK, 0)
+    data = w.seal_fast()
+    assert len(data) < 50 * 1024, f"Bucket too large: {len(data)} bytes"
+
+
+def test_all_records_iteration():
+    w = BucketWriter()
+    for nid in range(3):
+        for i in range(4):
+            w.add(nid, 0, 1700000000 + nid * 10000 + i * 100, HIST_STATUS_OK, 0)
+    r = BucketReader(w.seal_fast())
+    all_recs = list(r.all_records())
+    assert len(all_recs) == 12
+
+
+def test_is_full():
+    w = BucketWriter()
+    assert not w.is_full()
+    for i in range(HISTORY_BUCKET_MAX_RECORDS):
+        w.add(0, 0, 1700000000 + i, HIST_STATUS_OK, 0)
+    assert w.is_full()
diff --git a/tests/unit/ncdb/test_reports.py b/tests/unit/ncdb/test_reports.py
new file mode 100644
index 0000000..f7b0e99
--- /dev/null
+++ b/tests/unit/ncdb/test_reports.py
@@ -0,0 +1,705 @@
+"""Unit tests for ucis.ncdb.reports."""
+
+import json
+import pytest
+from unittest.mock import MagicMock
+
+from ucis.ncdb.testplan import Testplan, Testpoint
+from ucis.ncdb.testplan_closure import TPStatus, TestpointResult
+from ucis.ncdb.reports import (
+    ClosureSummary,
+    StageGateReport,
+    CoveragePerTestpoint,
+    RegressionDelta,
+    StageProgression,
+    TestpointReliability,
+    UnexercisedCovergroups,
+    CoverageContribution,
+    TestBudget,
+    SafetyMatrix,
+    SeedReliability,
+    report_testpoint_closure,
+    format_testpoint_closure,
+    report_stage_gate,
+    format_stage_gate,
+    report_coverage_per_testpoint,
+    format_coverage_per_testpoint,
+    report_regression_delta,
+    format_regression_delta,
+    report_stage_progression,
+    format_stage_progression,
+    report_testpoint_reliability,
+    format_testpoint_reliability,
+    report_unexercised_covergroups,
+    format_unexercised_covergroups,
+    report_coverage_contribution,
+    format_coverage_contribution,
+    report_test_budget,
+    format_test_budget,
+    report_safety_matrix,
+    format_safety_matrix,
+    report_seed_reliability,
+    format_seed_reliability,
+)
+
+
+# ---------------------------------------------------------------------------
+# Fixtures
+# ---------------------------------------------------------------------------
+
+def _make_tp(name, stage="V1", tests=None, na=False):
+    return Testpoint(name=name, stage=stage, tests=tests or [name], na=na)
+
+
+def _make_result(tp, status, pass_count=0, fail_count=0, matched=None):
+    return TestpointResult(
+        testpoint=tp,
+        status=status,
+        matched_tests=matched or [],
+        pass_count=pass_count,
+        fail_count=fail_count,
+    )
+
+
+@pytest.fixture
+def simple_results():
+    tps = [
+        _make_tp("tp_alpha", "V1"),
+        _make_tp("tp_beta", "V1"),
+        _make_tp("tp_gamma", "V2"),
+        _make_tp("tp_delta", "V2"),
+        _make_tp("tp_na", "V1", na=True),
+    ]
+    return [
+        _make_result(tps[0], TPStatus.CLOSED, pass_count=5, matched=["tp_alpha"]),
+        _make_result(tps[1], TPStatus.FAILING, fail_count=3, matched=["tp_beta"]),
+        _make_result(tps[2], TPStatus.PARTIAL, pass_count=2, fail_count=1,
+                     matched=["tp_gamma"]),
+        _make_result(tps[3], TPStatus.NOT_RUN),
+        _make_result(tps[4], TPStatus.NA),
+    ]
+
+
+@pytest.fixture
+def simple_plan():
+    plan = Testplan(source_file="test.hjson")
+    plan.add_testpoint(Testpoint(name="tp_alpha", stage="V1", tests=["tp_alpha"]))
+    plan.add_testpoint(Testpoint(name="tp_beta", stage="V1", tests=["tp_beta"]))
+    plan.add_testpoint(Testpoint(name="tp_gamma", stage="V2", tests=["tp_gamma"]))
+    plan.add_testpoint(Testpoint(name="tp_delta", stage="V2", tests=["tp_delta"]))
+    return plan
+
+
+# ---------------------------------------------------------------------------
+# Report A — testpoint closure
+# ---------------------------------------------------------------------------
+
+class TestReportTestpointClosure:
+    def test_returns_closure_summary(self, simple_results):
+        summary = report_testpoint_closure(simple_results)
+        assert isinstance(summary, ClosureSummary)
+
+    def test_total_counts(self, simple_results):
+        summary = report_testpoint_closure(simple_results)
+        assert summary.total == 5
+        assert summary.total_closed == 1
+        assert summary.total_na == 1
+
+    def test_by_stage_keys(self, simple_results):
+        summary = report_testpoint_closure(simple_results)
+        assert "V1" in summary.by_stage
+        assert "V2" in summary.by_stage
+
+    def test_by_stage_counts(self, simple_results):
+        summary = report_testpoint_closure(simple_results)
+        # V1: tp_alpha (closed) + tp_beta (failing) — tp_na is NA, excluded
+        assert summary.by_stage["V1"]["total"] == 2
+        assert summary.by_stage["V1"]["closed"] == 1
+        # V2: tp_gamma (partial) + tp_delta (not_run)
+        assert summary.by_stage["V2"]["total"] == 2
+        assert summary.by_stage["V2"]["closed"] == 0
+
+    def test_stage_pct_calculation(self, simple_results):
+        summary = report_testpoint_closure(simple_results)
+        assert summary.by_stage["V1"]["pct"] == 50.0
+
+    def test_to_json_valid(self, simple_results):
+        summary = report_testpoint_closure(simple_results)
+        d = json.loads(summary.to_json())
+        assert d["total"] == 5
+        assert len(d["testpoints"]) == 5
+
+    def test_format_renders_header(self, simple_results):
+        summary = report_testpoint_closure(simple_results)
+        text = format_testpoint_closure(summary)
+        assert "Testpoint" in text
+        assert "Stage" in text
+        assert "Status" in text
+
+    def test_format_contains_testpoint_name(self, simple_results):
+        summary = report_testpoint_closure(simple_results)
+        text = format_testpoint_closure(summary)
+        assert "tp_alpha" in text
+        assert "tp_beta" in text
+
+    def test_format_skips_na_by_default(self, simple_results):
+        summary = report_testpoint_closure(simple_results)
+        text = format_testpoint_closure(summary)
+        # tp_na has TPStatus.NA — should not appear by default
+        assert "tp_na" not in text
+
+    def test_format_show_all_includes_na(self, simple_results):
+        summary = report_testpoint_closure(simple_results)
+        text = format_testpoint_closure(summary, show_all=True)
+        assert "tp_na" in text
+
+    def test_format_stage_rollup_present(self, simple_results):
+        summary = report_testpoint_closure(simple_results)
+        text = format_testpoint_closure(summary)
+        assert "Stage roll-up" in text
+        assert "V1" in text
+        assert "V2" in text
+
+
+# ---------------------------------------------------------------------------
+# Report B — stage gate
+# ---------------------------------------------------------------------------
+
+class TestReportStageGate:
+    def test_pass_when_all_v1_closed(self):
+        tp = _make_tp("tp1", "V1")
+        results = [_make_result(tp, TPStatus.CLOSED, pass_count=3)]
+        plan = Testplan(source_file="x.hjson")
+        plan.add_testpoint(Testpoint(name="tp1", stage="V1", tests=["tp1"]))
+        gate = report_stage_gate(results, "V1", plan)
+        assert isinstance(gate, StageGateReport)
+        assert gate.passed is True
+        assert gate.blocking == []
+
+    def test_fail_when_v1_failing(self, simple_results, simple_plan):
+        gate = report_stage_gate(simple_results, "V1", simple_plan)
+        assert gate.passed is False
+        assert any(r.testpoint.name == "tp_beta" for r in gate.blocking)
+
+    def test_to_json_valid(self, simple_results, simple_plan):
+        gate = report_stage_gate(simple_results, "V1", simple_plan)
+        d = json.loads(gate.to_json())
+        assert "passed" in d
+        assert "blocking" in d
+
+    def test_format_shows_verdict(self, simple_results, simple_plan):
+        gate = report_stage_gate(simple_results, "V1", simple_plan)
+        text = format_stage_gate(gate)
+        assert "V1" in text
+        assert "FAIL" in text or "PASS" in text
+
+    def test_format_lists_blocking(self, simple_results, simple_plan):
+        gate = report_stage_gate(simple_results, "V2", simple_plan)
+        text = format_stage_gate(gate)
+        assert "tp_beta" in text or "tp_gamma" in text or "tp_delta" in text
+
+
+# ---------------------------------------------------------------------------
+# Report D — regression delta
+# ---------------------------------------------------------------------------
+
+class TestReportRegressionDelta:
+    def test_returns_delta(self, simple_results):
+        # All results "old" → same results "new" → no change
+        delta = report_regression_delta(simple_results, simple_results)
+        assert isinstance(delta, RegressionDelta)
+        assert delta.newly_closed == []
+        assert delta.newly_failing == []
+
+    def test_detects_newly_closed(self, simple_results):
+        old = [_make_result(_make_tp("tp_beta", "V1"), TPStatus.NOT_RUN)]
+        new = [_make_result(_make_tp("tp_beta", "V1"), TPStatus.CLOSED,
+                            pass_count=1)]
+        delta = report_regression_delta(new, old)
+        assert len(delta.newly_closed) == 1
+        assert delta.newly_closed[0].testpoint.name == "tp_beta"
+
+    def test_detects_newly_failing(self):
+        old = [_make_result(_make_tp("tp_a", "V1"), TPStatus.PARTIAL,
+                            pass_count=1, fail_count=1)]
+        new = [_make_result(_make_tp("tp_a", "V1"), TPStatus.FAILING,
+                            fail_count=5)]
+        delta = report_regression_delta(new, old)
+        assert len(delta.newly_failing) == 1
+
+    def test_to_json_valid(self, simple_results):
+        delta = report_regression_delta(simple_results, simple_results)
+        d = json.loads(delta.to_json())
+        assert "summary" in d
+        assert "newly_closed" in d
+
+    def test_format_shows_summary(self, simple_results):
+        delta = report_regression_delta(simple_results, simple_results)
+        text = format_regression_delta(delta)
+        assert "delta" in text.lower()
+
+
+# ---------------------------------------------------------------------------
+# Report F — testpoint reliability
+# ---------------------------------------------------------------------------
+
+class TestReportTestpointReliability:
+    def test_returns_dataclass(self, simple_results):
+        db = MagicMock()
+        db.get_test_stats.return_value = None
+        report = report_testpoint_reliability(simple_results, db)
+        assert isinstance(report, TestpointReliability)
+
+    def test_uses_stats_when_available(self, simple_results):
+        stats = MagicMock()
+        stats.flake_score = 0.75
+        stats.pass_count = 3
+        stats.fail_count = 2
+
+        db = MagicMock()
+        db.get_test_stats.return_value = stats
+
+        report = report_testpoint_reliability(simple_results, db)
+        # tp_alpha has matched_tests=["tp_alpha"] — should get flake 0.75
+        alpha_row = next(r for r in report.rows if r[0] == "tp_alpha")
+        assert alpha_row[1] == pytest.approx(0.75)
+
+    def test_sorted_by_flake_desc(self, simple_results):
+        stats_high = MagicMock()
+        stats_high.flake_score = 0.9
+        stats_high.pass_count = 1
+        stats_high.fail_count = 5
+
+        stats_low = MagicMock()
+        stats_low.flake_score = 0.1
+        stats_low.pass_count = 9
+        stats_low.fail_count = 1
+
+        db = MagicMock()
+        def _get(name):
+            return stats_high if "beta" in name else stats_low
+        db.get_test_stats.side_effect = _get
+
+        report = report_testpoint_reliability(simple_results, db)
+        scores = [r[1] for r in report.rows]
+        assert scores == sorted(scores, reverse=True)
+
+    def test_to_json_valid(self, simple_results):
+        db = MagicMock()
+        db.get_test_stats.return_value = None
+        report = report_testpoint_reliability(simple_results, db)
+        d = json.loads(report.to_json())
+        assert "rows" in d
+
+    def test_format_renders_table(self, simple_results):
+        db = MagicMock()
+        db.get_test_stats.return_value = None
+        report = report_testpoint_reliability(simple_results, db)
+        text = format_testpoint_reliability(report)
+        assert "Testpoint" in text
+        assert "Flake" in text
+
+
+# ---------------------------------------------------------------------------
+# Report G — unexercised covergroups
+# ---------------------------------------------------------------------------
+
+class TestReportUnexercisedCovergroups:
+    def _make_db_with_cg(self, cg_hit_pct: dict):
+        """Build a mock db whose covergroup scopes reflect cg_hit_pct."""
+        from ucis.scope_type_t import ScopeTypeT
+        from ucis.cover_type_t import CoverTypeT
+
+        def make_scope(name, pct):
+            scope = MagicMock()
+            scope.getScopeName.return_value = name
+            n_bins = 10
+            hit_bins = int(n_bins * pct / 100)
+            cp = MagicMock()
+            bins = []
+            for i in range(n_bins):
+                b = MagicMock()
+                b.getData.return_value = (1 if i < hit_bins else 0,)
+                bins.append(b)
+            cp.getCoverItems.return_value = bins
+            scope.getScopes.return_value = [cp]
+            return scope
+
+        scopes = [make_scope(name, pct) for name, pct in cg_hit_pct.items()]
+        db = MagicMock()
+        db.getScopes.return_value = scopes
+        return db
+
+    def test_zero_hit_detected(self):
+        from ucis.ncdb.testplan import CovergroupEntry
+        plan = Testplan(source_file="x.hjson")
+        plan.covergroups.append(CovergroupEntry(name="cg_reset"))
+        db = self._make_db_with_cg({"cg_reset": 0})
+        report = report_unexercised_covergroups(db, plan)
+        assert "cg_reset" in report.zero_hit
+
+    def test_low_hit_detected(self):
+        from ucis.ncdb.testplan import CovergroupEntry
+        plan = Testplan(source_file="x.hjson")
+        plan.covergroups.append(CovergroupEntry(name="cg_x"))
+        db = self._make_db_with_cg({"cg_x": 30})
+        report = report_unexercised_covergroups(db, plan, low_threshold=50.0)
+        assert any(n == "cg_x" for n, _ in report.low_hit)
+
+    def test_fully_hit_not_reported(self):
+        from ucis.ncdb.testplan import CovergroupEntry
+        plan = Testplan(source_file="x.hjson")
+        plan.covergroups.append(CovergroupEntry(name="cg_full"))
+        db = self._make_db_with_cg({"cg_full": 100})
+        report = report_unexercised_covergroups(db, plan)
+        assert "cg_full" not in report.zero_hit
+        assert not any(n == "cg_full" for n, _ in report.low_hit)
+
+    def test_to_json_valid(self):
+        plan = Testplan(source_file="x.hjson")
+        db = MagicMock()
+        db.getScopes.return_value = []
+        report = report_unexercised_covergroups(db, plan)
+        d = json.loads(report.to_json())
+        assert "zero_hit" in d
+
+    def test_format_shows_message(self):
+        plan = Testplan(source_file="x.hjson")
+        db = MagicMock()
+        db.getScopes.return_value = []
+        report = report_unexercised_covergroups(db, plan)
+        text = format_unexercised_covergroups(report)
+        assert len(text) > 0
+
+
+# ---------------------------------------------------------------------------
+# Report I — coverage contribution
+# ---------------------------------------------------------------------------
+
+class TestReportCoverageContribution:
+    def test_returns_dataclass_empty_on_no_data(self):
+        db = MagicMock()
+        db.get_test_coverage_api.return_value = []
+        report = report_coverage_contribution(db)
+        assert isinstance(report, CoverageContribution)
+        assert report.rows == []
+
+    def test_rows_sorted_by_unique_desc(self):
+        db = MagicMock()
+        db.get_test_coverage_api.return_value = [
+            {"test": "t1", "unique_bins": 10, "total_hits": 20, "total_bins": 100},
+            {"test": "t2", "unique_bins": 50, "total_hits": 80, "total_bins": 100},
+            {"test": "t3", "unique_bins": 30, "total_hits": 40, "total_bins": 100},
+        ]
+        report = report_coverage_contribution(db)
+        names = [r[0] for r in report.rows]
+        assert names == ["t2", "t3", "t1"]
+
+    def test_to_json_valid(self):
+        db = MagicMock()
+        db.get_test_coverage_api.return_value = [
+            {"test": "t1", "unique_bins": 5, "total_hits": 10, "total_bins": 50},
+        ]
+        report = report_coverage_contribution(db)
+        d = json.loads(report.to_json())
+        assert d["rows"][0]["test"] == "t1"
+
+    def test_format_no_data_message(self):
+        db = MagicMock()
+        db.get_test_coverage_api.return_value = []
+        report = report_coverage_contribution(db)
+        text = format_coverage_contribution(report)
+        assert "no contribution data" in text.lower()
+
+    def test_format_renders_table(self):
+        db = MagicMock()
+        db.get_test_coverage_api.return_value = [
+            {"test": "uart_smoke", "unique_bins": 42, "total_hits": 100,
+             "total_bins": 200},
+        ]
+        report = report_coverage_contribution(db)
+        text = format_coverage_contribution(report)
+        assert "uart_smoke" in text
+        assert "42" in text
+
+
+# ---------------------------------------------------------------------------
+# Report H — test budget (P2)
+# ---------------------------------------------------------------------------
+
+class TestReportTestBudget:
+    """Tests for report_test_budget / format_test_budget."""
+
+    def _make_testplan(self, stages):
+        """Build a Testplan with one testpoint per stage entry."""
+        testpoints = []
+        for i, (stage, tests) in enumerate(stages):
+            tp = MagicMock(spec=Testpoint)
+            tp.name = f"tp_{stage}_{i}"
+            tp.stage = stage
+            tp.na = False
+            tp.tests = tests
+            testpoints.append(tp)
+        tp_obj = MagicMock(spec=Testplan)
+        tp_obj.testpoints = testpoints
+        return tp_obj
+
+    def _make_db_with_stats(self, stats_map):
+        """Build a mock db that returns stats from stats_map by test name."""
+        db = MagicMock()
+        def get_stats(name):
+            if name in stats_map:
+                m = MagicMock()
+                m.total_runs, m.mean_cpu_time = stats_map[name]
+                return m
+            return None
+        db.get_test_stats.side_effect = get_stats
+        return db
+
+    def test_empty_testplan_returns_empty_budget(self):
+        tp = MagicMock(spec=Testplan)
+        tp.testpoints = []
+        db = MagicMock()
+        report = report_test_budget(tp, db)
+        assert isinstance(report, TestBudget)
+        assert report.rows == []
+        assert report.stage_totals == {}
+        assert report.missing_stats == []
+
+    def test_single_testpoint_with_stats(self):
+        tp = self._make_testplan([("V1", ["smoke"])])
+        db = self._make_db_with_stats({"smoke": (10, 30.0)})
+        report = report_test_budget(tp, db)
+        assert len(report.rows) == 1
+        stage, name, cpu, runs = report.rows[0]
+        assert stage == "V1"
+        assert runs == 10
+        assert abs(cpu - 30.0) < 0.01
+        assert "V1" in report.stage_totals
+
+    def test_missing_stats_tracked(self):
+        tp = self._make_testplan([("V2", ["unknown_test"])])
+        db = self._make_db_with_stats({})
+        report = report_test_budget(tp, db)
+        assert "tp_V2_0" in report.missing_stats
+
+    def test_na_testpoints_skipped(self):
+        tp_obj = MagicMock(spec=Testpoint)
+        tp_obj.name = "tp_na"
+        tp_obj.stage = "V1"
+        tp_obj.na = True
+        tp_obj.tests = ["some_test"]
+        plan = MagicMock(spec=Testplan)
+        plan.testpoints = [tp_obj]
+        db = MagicMock()
+        report = report_test_budget(plan, db)
+        assert report.rows == []
+
+    def test_stage_sorting_order(self):
+        tp = self._make_testplan([("V3", ["t3"]), ("V1", ["t1"]), ("V2", ["t2"])])
+        stats = {"t1": (5, 10.0), "t2": (5, 20.0), "t3": (5, 30.0)}
+        db = self._make_db_with_stats(stats)
+        report = report_test_budget(tp, db)
+        stages = [r[0] for r in report.rows]
+        assert stages.index("V1") < stages.index("V2") < stages.index("V3")
+
+    def test_to_json_valid(self):
+        tp = self._make_testplan([("V1", ["s1"])])
+        db = self._make_db_with_stats({"s1": (3, 15.0)})
+        report = report_test_budget(tp, db)
+        data = json.loads(report.to_json())
+        assert "rows" in data
+        assert "stage_totals" in data
+
+    def test_format_shows_stage_and_testpoint(self):
+        tp = self._make_testplan([("V1", ["t1"])])
+        db = self._make_db_with_stats({"t1": (2, 45.0)})
+        report = report_test_budget(tp, db)
+        text = format_test_budget(report)
+        assert "V1" in text
+        assert "tp_V1_0" in text
+
+    def test_format_empty_budget(self):
+        report = TestBudget(rows=[], stage_totals={}, missing_stats=[])
+        text = format_test_budget(report)
+        assert "no" in text.lower() or text == "" or isinstance(text, str)
+
+
+# ---------------------------------------------------------------------------
+# Report L — safety matrix (P2)
+# ---------------------------------------------------------------------------
+
+class TestReportSafetyMatrix:
+    """Tests for report_safety_matrix / format_safety_matrix."""
+
+    def _make_result(self, tp_name, status=TPStatus.CLOSED, reqs=None):
+        tp = MagicMock(spec=Testpoint)
+        tp.name = tp_name
+        if reqs is not None:
+            req_mocks = []
+            for r in reqs:
+                rm = MagicMock()
+                rm.id = r
+                rm.desc = f"Requirement {r}"
+                req_mocks.append(rm)
+            tp.requirements = req_mocks
+        else:
+            tp.requirements = []
+        result = MagicMock(spec=TestpointResult)
+        result.testpoint = tp
+        result.status = status
+        return result
+
+    def test_empty_results_returns_empty_matrix(self):
+        report = report_safety_matrix([])
+        assert isinstance(report, SafetyMatrix)
+        assert report.rows == []
+
+    def test_result_without_requirements_has_dash_req_id(self):
+        r = self._make_result("tp_uart", status=TPStatus.CLOSED)
+        report = report_safety_matrix([r])
+        assert len(report.rows) == 1
+        req_id, _, tp, status, waived = report.rows[0]
+        assert req_id == "—"
+        assert tp == "tp_uart"
+        assert "CLOSED" in status
+
+    def test_result_with_requirements_expands_rows(self):
+        r = self._make_result("tp_dma", status=TPStatus.PARTIAL, reqs=["REQ-001", "REQ-002"])
+        report = report_safety_matrix([r])
+        assert len(report.rows) == 2
+        req_ids = {row[0] for row in report.rows}
+        assert "REQ-001" in req_ids
+        assert "REQ-002" in req_ids
+
+    def test_waived_flag_false_without_waivers(self):
+        r = self._make_result("tp_x", reqs=["R1"])
+        report = report_safety_matrix([r])
+        assert report.rows[0][4] is False
+
+    def test_to_json_valid(self):
+        r = self._make_result("tp_y", reqs=["R-A"])
+        report = report_safety_matrix([r])
+        data = json.loads(report.to_json())
+        assert "rows" in data
+        assert data["rows"][0]["req_id"] == "R-A"
+
+    def test_to_csv_header(self):
+        report = report_safety_matrix([])
+        csv = report.to_csv()
+        assert csv.startswith("req_id,")
+
+    def test_format_shows_req_and_testpoint(self):
+        r = self._make_result("tp_bus", reqs=["REQ-007"])
+        report = report_safety_matrix([r])
+        text = format_safety_matrix(report)
+        assert "REQ-007" in text
+        assert "tp_bus" in text
+
+    def test_format_multiple_results(self):
+        results = [
+            self._make_result("tp_a", reqs=["R1"]),
+            self._make_result("tp_b", status=TPStatus.FAILING, reqs=["R2"]),
+        ]
+        report = report_safety_matrix(results)
+        text = format_safety_matrix(report)
+        assert "R1" in text
+        assert "R2" in text
+        assert "tp_a" in text
+        assert "tp_b" in text
+
+
+# ---------------------------------------------------------------------------
+# Report M — seed reliability (P2)
+# ---------------------------------------------------------------------------
+
+class TestReportSeedReliability:
+    """Tests for report_seed_reliability / format_seed_reliability."""
+
+    def _make_db_with_history(self, records):
+        """Build a mock db returning history records."""
+        db = MagicMock()
+        db.query_test_history.return_value = records
+        return db
+
+    def _rec(self, seed_id, status):
+        from ucis.ncdb.constants import HIST_STATUS_OK
+        rec = MagicMock()
+        rec.seed_id = seed_id
+        rec.status = status
+        return rec
+
+    def test_empty_history_returns_empty_rows(self):
+        db = self._make_db_with_history([])
+        report = report_seed_reliability(db, "uart_smoke")
+        assert isinstance(report, SeedReliability)
+        assert report.rows == []
+        assert report.total_seeds == 0
+
+    def test_single_seed_all_pass(self):
+        from ucis.ncdb.constants import HIST_STATUS_OK
+        recs = [self._rec(42, HIST_STATUS_OK), self._rec(42, HIST_STATUS_OK)]
+        db = self._make_db_with_history(recs)
+        report = report_seed_reliability(db, "t1")
+        assert len(report.rows) == 1
+        sid, pc, fc, flake = report.rows[0]
+        assert sid == 42
+        assert pc == 2
+        assert fc == 0
+        assert flake == 0.0
+
+    def test_single_seed_all_fail(self):
+        recs = [self._rec(7, 1), self._rec(7, 1)]  # status != HIST_STATUS_OK
+        db = self._make_db_with_history(recs)
+        report = report_seed_reliability(db, "t2")
+        assert len(report.rows) == 1
+        sid, pc, fc, flake = report.rows[0]
+        assert fc == 2
+        assert pc == 0
+
+    def test_flaky_seed_has_nonzero_flake_score(self):
+        from ucis.ncdb.constants import HIST_STATUS_OK
+        recs = [self._rec(1, HIST_STATUS_OK), self._rec(1, 1),
+                self._rec(1, HIST_STATUS_OK), self._rec(1, 1)]
+        db = self._make_db_with_history(recs)
+        report = report_seed_reliability(db, "flaky")
+        assert report.rows[0][3] > 0.0
+
+    def test_multiple_seeds_sorted_by_fail_count(self):
+        from ucis.ncdb.constants import HIST_STATUS_OK
+        recs = [
+            self._rec(1, HIST_STATUS_OK),
+            self._rec(2, 1), self._rec(2, 1), self._rec(2, 1),
+        ]
+        db = self._make_db_with_history(recs)
+        report = report_seed_reliability(db, "t")
+        assert report.rows[0][0] == 2  # seed 2 has 3 failures, comes first
+
+    def test_db_exception_returns_empty(self):
+        db = MagicMock()
+        db.query_test_history.side_effect = Exception("no history table")
+        report = report_seed_reliability(db, "t")
+        assert report.rows == []
+
+    def test_to_json_valid(self):
+        from ucis.ncdb.constants import HIST_STATUS_OK
+        recs = [self._rec(10, HIST_STATUS_OK)]
+        db = self._make_db_with_history(recs)
+        report = report_seed_reliability(db, "uart_smoke")
+        data = json.loads(report.to_json())
+        assert data["test_name"] == "uart_smoke"
+        assert "rows" in data
+
+    def test_format_shows_seed_id(self):
+        from ucis.ncdb.constants import HIST_STATUS_OK
+        recs = [self._rec(99, HIST_STATUS_OK)]
+        db = self._make_db_with_history(recs)
+        report = report_seed_reliability(db, "uart_smoke")
+        text = format_seed_reliability(report)
+        assert "99" in text
+
+    def test_format_empty_shows_no_history_message(self):
+        db = self._make_db_with_history([])
+        report = report_seed_reliability(db, "absent_test")
+        text = format_seed_reliability(report)
+        assert "absent_test" in text
diff --git a/tests/unit/ncdb/test_squash_log.py b/tests/unit/ncdb/test_squash_log.py
new file mode 100644
index 0000000..8da8f27
--- /dev/null
+++ b/tests/unit/ncdb/test_squash_log.py
@@ -0,0 +1,64 @@
+"""Unit tests for SquashLog (squash_log.bin)."""
+import pytest
+from ucis.ncdb.squash_log import SquashLog
+
+
+def test_append_single_entry():
+    log = SquashLog()
+    log.append(ts=1700000000, policy=1, from_run=0, to_run=9,
+               num_runs=10, pass_runs=9)
+    assert log.num_squashes == 1
+    entries = log.entries()
+    e = entries[0]
+    assert e.ts        == 1700000000
+    assert e.policy    == 1
+    assert e.from_run  == 0
+    assert e.to_run    == 9
+    assert e.num_runs  == 10
+    assert e.pass_runs == 9
+
+
+def test_append_multiple():
+    log = SquashLog()
+    for i in range(5):
+        log.append(ts=1700000000 + i * 86400, policy=1,
+                   from_run=i * 10, to_run=i * 10 + 9,
+                   num_runs=10, pass_runs=10)
+    assert log.num_squashes == 5
+    entries = log.entries()
+    assert entries[4].from_run == 40
+
+
+def test_serialize_deserialize_empty():
+    log = SquashLog()
+    log2 = SquashLog.deserialize(log.serialize())
+    assert log2.num_squashes == 0
+
+
+def test_serialize_deserialize_multiple():
+    log = SquashLog()
+    log.append(ts=1700000000, policy=1, from_run=0,  to_run=9,  num_runs=10, pass_runs=9)
+    log.append(ts=1700086400, policy=1, from_run=10, to_run=19, num_runs=10, pass_runs=8)
+    log.append(ts=1700172800, policy=1, from_run=20, to_run=29, num_runs=10, pass_runs=7)
+    data = log.serialize()
+    log2 = SquashLog.deserialize(data)
+    assert log2.num_squashes == 3
+    entries = log2.entries()
+    assert entries[2].to_run    == 29
+    assert entries[2].pass_runs == 7
+
+
+def test_all_policy_values():
+    log = SquashLog()
+    for policy in range(4):
+        log.append(ts=1700000000 + policy * 86400, policy=policy,
+                   from_run=0, to_run=9, num_runs=10, pass_runs=10 - policy)
+    entries = log.entries()
+    policies = [e.policy for e in entries]
+    assert policies == [0, 1, 2, 3]
+
+
+def test_bad_magic_raises():
+    data = b"\x00\x00\x00\x00" + b"\x00" * 8
+    with pytest.raises(ValueError, match="Bad magic"):
+        SquashLog.deserialize(data)
diff --git a/tests/unit/ncdb/test_test_registry.py b/tests/unit/ncdb/test_test_registry.py
new file mode 100644
index 0000000..5479083
--- /dev/null
+++ b/tests/unit/ncdb/test_test_registry.py
@@ -0,0 +1,130 @@
+"""Unit tests for TestRegistry (test_registry.bin)."""
+import pytest
+from ucis.ncdb.test_registry import TestRegistry
+
+
+def test_assign_run_id_increments():
+    reg = TestRegistry()
+    assert reg.assign_run_id() == 0
+    assert reg.assign_run_id() == 1
+    assert reg.assign_run_id() == 2
+
+
+def test_assign_run_id_survives_roundtrip():
+    reg = TestRegistry()
+    reg.assign_run_id(); reg.assign_run_id()
+    reg2 = TestRegistry.deserialize(reg.serialize())
+    assert reg2.assign_run_id() == 2
+
+
+def test_lookup_name_id_new():
+    reg = TestRegistry()
+    nid = reg.lookup_name_id("uart_smoke")
+    assert nid == 0
+    assert reg.num_names == 1
+
+
+def test_lookup_name_id_existing():
+    reg = TestRegistry()
+    nid1 = reg.lookup_name_id("uart_smoke")
+    nid2 = reg.lookup_name_id("uart_smoke")
+    assert nid1 == nid2
+
+
+def test_name_heap_insertion_order():
+    """name_ids are assigned by insertion order and never shift."""
+    reg = TestRegistry()
+    reg.lookup_name_id("zebra")
+    reg.lookup_name_id("apple")
+    reg.lookup_name_id("mango")
+    assert reg.name_for_id(0) == "zebra"
+    assert reg.name_for_id(1) == "apple"
+    assert reg.name_for_id(2) == "mango"
+
+
+def test_name_id_stable_after_insert():
+    """Inserting a new name does NOT shift any existing name_id."""
+    reg = TestRegistry()
+    id_mango = reg.lookup_name_id("mango")    # id 0
+    id_apple = reg.lookup_name_id("apple")    # id 1 (insertion order)
+    assert id_mango == 0
+    assert id_apple == 1
+    # Looking them up again returns the SAME ids
+    assert reg.lookup_name_id("mango") == 0
+    assert reg.lookup_name_id("apple") == 1
+
+
+def test_lookup_seed_id_new():
+    reg = TestRegistry()
+    sid = reg.lookup_seed_id("12345")
+    assert sid == 0
+    assert reg.num_seeds == 1
+
+
+def test_lookup_seed_id_existing():
+    reg = TestRegistry()
+    sid1 = reg.lookup_seed_id("99999")
+    sid2 = reg.lookup_seed_id("99999")
+    assert sid1 == sid2
+
+
+def test_seed_id_insertion_order():
+    """Seeds are stored in insertion order (not sorted)."""
+    reg = TestRegistry()
+    reg.lookup_seed_id("zzz")
+    reg.lookup_seed_id("aaa")
+    assert reg.seed_for_id(0) == "zzz"
+    assert reg.seed_for_id(1) == "aaa"
+
+
+def test_seed_id_roundtrip():
+    reg = TestRegistry()
+    reg.lookup_seed_id("abc123")
+    reg2 = TestRegistry.deserialize(reg.serialize())
+    assert reg2.seed_for_id(0) == "abc123"
+
+
+def test_serialize_deserialize_empty():
+    reg = TestRegistry()
+    reg2 = TestRegistry.deserialize(reg.serialize())
+    assert reg2.num_names == 0
+    assert reg2.num_seeds == 0
+    assert reg2.next_run_id == 0
+
+
+def test_serialize_deserialize_names_and_seeds():
+    reg = TestRegistry(next_run_id=5)
+    names = ["test_z", "test_a", "test_m"]
+    for n in names:
+        reg.lookup_name_id(n)
+    reg.lookup_seed_id("1"); reg.lookup_seed_id("2")
+    data = reg.serialize()
+    reg2 = TestRegistry.deserialize(data)
+    assert reg2.next_run_id == 5
+    assert reg2.num_names == 3
+    assert reg2.num_seeds == 2
+    # Names and seeds preserved in insertion order
+    assert reg2.name_for_id(0) == "test_z"   # insertion order
+    assert reg2.name_for_id(1) == "test_a"
+    assert reg2.name_for_id(2) == "test_m"
+    assert reg2.seed_for_id(0) == "1"
+    assert reg2.seed_for_id(1) == "2"
+
+
+def test_serialize_deserialize_1000_names():
+    reg = TestRegistry()
+    for i in range(1000):
+        reg.lookup_name_id(f"test_{i:04d}")
+    data = reg.serialize()
+    reg2 = TestRegistry.deserialize(data)
+    assert reg2.num_names == 1000
+    # Spot-check a few
+    for i in range(1000):
+        name = f"test_{i:04d}"
+        assert reg2.name_for_id(reg2.lookup_name_id(name)) == name
+
+
+def test_bad_magic_raises():
+    data = b"\x00\x00\x00\x00" + b"\x00" * 20   # ≥ header size (17 bytes)
+    with pytest.raises(ValueError, match="Bad magic"):
+        TestRegistry.deserialize(data)
diff --git a/tests/unit/ncdb/test_test_stats.py b/tests/unit/ncdb/test_test_stats.py
new file mode 100644
index 0000000..3b2b070
--- /dev/null
+++ b/tests/unit/ncdb/test_test_stats.py
@@ -0,0 +1,178 @@
+"""Unit tests for TestStatsTable (test_stats.bin)."""
+import math
+import pytest
+from ucis.ncdb.test_stats import TestStatsTable, TestStatsEntry
+from ucis.ncdb.constants import HIST_STATUS_OK, HIST_STATUS_FAIL, HIST_STATUS_ERROR
+
+
+def _make_table(*statuses, cpu_times=None):
+    tbl = TestStatsTable()
+    for i, s in enumerate(statuses):
+        cpu = cpu_times[i] if cpu_times else None
+        tbl.update(0, s, 1700000000 + i * 86400, cpu_time=cpu)
+    return tbl
+
+
+def test_update_pass():
+    tbl = _make_table(HIST_STATUS_OK)
+    e = tbl.get(0)
+    assert e.total_runs == 1
+    assert e.pass_count == 1
+    assert e.fail_count == 0
+    assert e.last_green_ts == 1700000000
+
+
+def test_update_fail():
+    tbl = _make_table(HIST_STATUS_FAIL)
+    e = tbl.get(0)
+    assert e.fail_count == 1
+    assert e.pass_count == 0
+    assert e.last_green_ts == 0
+
+
+def test_update_error():
+    tbl = _make_table(HIST_STATUS_ERROR)
+    e = tbl.get(0)
+    assert e.error_count == 1
+
+
+def test_streak_consecutive_passes():
+    tbl = _make_table(HIST_STATUS_OK, HIST_STATUS_OK, HIST_STATUS_OK)
+    assert tbl.get(0).streak == 3
+
+
+def test_streak_consecutive_fails():
+    tbl = _make_table(HIST_STATUS_FAIL, HIST_STATUS_FAIL)
+    assert tbl.get(0).streak == -2
+
+
+def test_streak_resets_on_change():
+    tbl = _make_table(HIST_STATUS_FAIL, HIST_STATUS_FAIL, HIST_STATUS_OK)
+    assert tbl.get(0).streak == 1
+
+
+def test_transition_count():
+    tbl = _make_table(HIST_STATUS_OK, HIST_STATUS_FAIL, HIST_STATUS_OK)
+    e = tbl.get(0)
+    assert e.transition_count == 2
+
+
+def test_flake_score_alternating():
+    statuses = [HIST_STATUS_OK, HIST_STATUS_FAIL] * 50
+    tbl = _make_table(*statuses)
+    e = tbl.get(0)
+    # 99 transitions over 99 intervals → score = 1.0
+    assert abs(e.flake_score - 1.0) < 0.02
+
+
+def test_flake_score_stable_all_pass():
+    tbl = _make_table(*([HIST_STATUS_OK] * 10))
+    assert tbl.get(0).flake_score == 0.0
+
+
+def test_fail_rate():
+    tbl = _make_table(HIST_STATUS_OK, HIST_STATUS_FAIL, HIST_STATUS_FAIL)
+    e = tbl.get(0)
+    assert abs(e.fail_rate - 2/3) < 1e-6
+
+
+def test_welford_mean():
+    cpu = [1.0, 2.0, 3.0]
+    tbl = _make_table(HIST_STATUS_OK, HIST_STATUS_OK, HIST_STATUS_OK, cpu_times=cpu)
+    assert abs(tbl.get(0).mean_cpu_time - 2.0) < 1e-6
+
+
+def test_welford_stddev():
+    # known variance: [1,2,3] → mean=2, var=2/3, std=sqrt(2/3)
+    cpu = [1.0, 2.0, 3.0]
+    tbl = _make_table(HIST_STATUS_OK, HIST_STATUS_OK, HIST_STATUS_OK, cpu_times=cpu)
+    e = tbl.get(0)
+    expected_std = math.sqrt(2/3)
+    assert abs(e.stddev_cpu_time - expected_std) < 1e-5
+
+
+def test_cusum_detects_change_point():
+    """Sustained failures should drive CUSUM past the h=4.0 threshold."""
+    tbl = TestStatsTable()
+    # Start with passes to establish baseline mean ≈ 0
+    for i in range(10):
+        tbl.update(0, HIST_STATUS_OK, 1700000000 + i * 86400)
+    # Then many consecutive failures
+    triggered = False
+    for i in range(10, 30):
+        tbl.update(0, HIST_STATUS_FAIL, 1700000000 + i * 86400)
+        # After reset, CUSUM can rise again — just check it doesn't blow up
+    e = tbl.get(0)
+    assert e.fail_count == 20
+    assert e.cusum_value >= 0.0   # always non-negative
+
+
+def test_grade_score_range():
+    statuses = [HIST_STATUS_OK, HIST_STATUS_FAIL, HIST_STATUS_OK]
+    tbl = _make_table(*statuses)
+    score = tbl.get(0).grade_score
+    assert 0.0 <= score <= 1.0
+
+
+def test_is_broken():
+    tbl = _make_table(*([HIST_STATUS_FAIL] * 10))
+    assert tbl.get(0).is_broken()
+
+
+def test_is_flaky():
+    # Alternating → flake_score close to 1, abs(streak) < 3
+    tbl = _make_table(HIST_STATUS_OK, HIST_STATUS_FAIL, HIST_STATUS_OK)
+    assert tbl.get(0).is_flaky()
+
+
+def test_top_flaky():
+    tbl = TestStatsTable()
+    # name_id 0: alternates (high flake)
+    for i in range(10):
+        s = HIST_STATUS_OK if i % 2 == 0 else HIST_STATUS_FAIL
+        tbl.update(0, s, 1700000000 + i * 86400)
+    # name_id 1: always passes (zero flake)
+    for i in range(10):
+        tbl.update(1, HIST_STATUS_OK, 1700000000 + i * 86400)
+    top = tbl.top_flaky(1)
+    assert top[0].name_id == 0
+
+
+def test_top_failing():
+    tbl = TestStatsTable()
+    for i in range(10):
+        tbl.update(0, HIST_STATUS_FAIL, 1700000000 + i * 86400)  # 100% fail
+    for i in range(10):
+        tbl.update(1, HIST_STATUS_OK, 1700000000 + i * 86400)    # 0% fail
+    top = tbl.top_failing(1)
+    assert top[0].name_id == 0
+
+
+def test_multiple_name_ids():
+    tbl = TestStatsTable()
+    tbl.update(0, HIST_STATUS_OK, 1700000000)
+    tbl.update(3, HIST_STATUS_FAIL, 1700000001)
+    assert tbl.get(0).pass_count == 1
+    assert tbl.get(1) is not None   # auto-created empty
+    assert tbl.get(3).fail_count == 1
+
+
+def test_serialize_deserialize():
+    tbl = TestStatsTable()
+    for i in range(5):
+        s = HIST_STATUS_OK if i % 2 == 0 else HIST_STATUS_FAIL
+        tbl.update(0, s, 1700000000 + i * 86400, cpu_time=float(i + 1))
+    data = tbl.serialize()
+    tbl2 = TestStatsTable.deserialize(data)
+    e  = tbl.get(0)
+    e2 = tbl2.get(0)
+    assert e2.total_runs == e.total_runs
+    assert e2.pass_count == e.pass_count
+    assert abs(e2.flake_score - e.flake_score) < 1e-5
+    assert abs(e2.mean_cpu_time - e.mean_cpu_time) < 1e-4
+
+
+def test_bad_magic_raises():
+    data = b"\x00\x00\x00\x00" + b"\x00" * 8
+    with pytest.raises(ValueError, match="Bad magic"):
+        TestStatsTable.deserialize(data)
diff --git a/tests/unit/ncdb/test_testplan.py b/tests/unit/ncdb/test_testplan.py
new file mode 100644
index 0000000..a926f5e
--- /dev/null
+++ b/tests/unit/ncdb/test_testplan.py
@@ -0,0 +1,228 @@
+"""Unit tests for src/ucis/ncdb/testplan.py."""
+from __future__ import annotations
+
+import json
+
+import pytest
+
+from ucis.ncdb.testplan import (
+    CovergroupEntry,
+    RequirementLink,
+    Testplan,
+    Testpoint,
+    get_testplan,
+    set_testplan,
+)
+
+
+# ── construction helpers ──────────────────────────────────────────────────────
+
+def _make_plan() -> Testplan:
+    tp = Testplan(source_file="uart.hjson")
+    tp.add_testpoint(Testpoint(name="uart_reset",   stage="V1",
+                               tests=["uart_smoke", "uart_init_*"]))
+    tp.add_testpoint(Testpoint(name="uart_loopback", stage="V2",
+                               tests=["uart_loopback_42", "uart_loopback_99"]))
+    tp.add_testpoint(Testpoint(name="uart_na",       stage="V2",
+                               na=True, tests=[]))
+    tp.covergroups.append(CovergroupEntry(name="cg_uart_reset",
+                                          desc="Reset coverage"))
+    return tp
+
+
+# ── basic construction ────────────────────────────────────────────────────────
+
+class TestTestplanConstruction:
+    def test_empty_plan(self):
+        plan = Testplan()
+        assert plan.format_version == 1
+        assert plan.testpoints == []
+        assert plan.covergroups == []
+
+    def test_add_testpoint_invalidates_index(self):
+        plan = Testplan()
+        plan._indexed = True          # simulate already indexed
+        plan.add_testpoint(Testpoint(name="t1", stage="V1"))
+        assert plan._indexed is False
+
+    def test_stages_ordered(self):
+        plan = _make_plan()
+        assert plan.stages() == ["V1", "V2"]
+
+    def test_stages_custom_sorted_last(self):
+        plan = Testplan()
+        plan.add_testpoint(Testpoint(name="a", stage="V3"))
+        plan.add_testpoint(Testpoint(name="b", stage="V1"))
+        plan.add_testpoint(Testpoint(name="c", stage="CUSTOM"))
+        assert plan.stages() == ["V1", "V3", "CUSTOM"]
+
+    def test_testpoints_for_stage(self):
+        plan = _make_plan()
+        v1 = plan.testpointsForStage("V1")
+        assert len(v1) == 1
+        assert v1[0].name == "uart_reset"
+
+
+# ── lookup ────────────────────────────────────────────────────────────────────
+
+class TestTestpointLookup:
+    def test_get_testpoint_by_name(self):
+        plan = _make_plan()
+        tp = plan.getTestpoint("uart_reset")
+        assert tp is not None
+        assert tp.name == "uart_reset"
+
+    def test_get_testpoint_unknown(self):
+        plan = _make_plan()
+        assert plan.getTestpoint("nonexistent") is None
+
+    def test_testpoint_for_test_exact(self):
+        plan = _make_plan()
+        tp = plan.testpointForTest("uart_smoke")
+        assert tp is not None
+        assert tp.name == "uart_reset"
+
+    def test_testpoint_for_test_seed_strip(self):
+        plan = _make_plan()
+        # "uart_smoke_12345" → strip → "uart_smoke" → exact
+        tp = plan.testpointForTest("uart_smoke_12345")
+        assert tp is not None
+        assert tp.name == "uart_reset"
+
+    def test_testpoint_for_test_wildcard(self):
+        plan = _make_plan()
+        # "uart_init_*" matches "uart_init_fast"
+        tp = plan.testpointForTest("uart_init_fast")
+        assert tp is not None
+        assert tp.name == "uart_reset"
+
+    def test_testpoint_for_test_no_match(self):
+        plan = _make_plan()
+        assert plan.testpointForTest("spi_whatever") is None
+
+    def test_testpoint_for_test_na_testpoint(self):
+        plan = _make_plan()
+        # na testpoint has no tests so nothing maps to it
+        tp = plan.getTestpoint("uart_na")
+        assert tp is not None
+        assert tp.na is True
+        assert plan.testpointForTest("uart_na") is None
+
+    def test_wildcard_does_not_match_seed_strip_candidate(self):
+        # Seed-strip (strategy 2) has higher priority than wildcard (strategy 3)
+        plan = Testplan()
+        plan.add_testpoint(Testpoint(name="exact", stage="V1",
+                                     tests=["foo_bar"]))         # exact of stripped
+        plan.add_testpoint(Testpoint(name="wild",  stage="V1",
+                                     tests=["foo_*"]))           # wildcard
+        tp = plan.testpointForTest("foo_bar_42")  # strip→foo_bar wins
+        assert tp.name == "exact"
+
+
+# ── serialization round-trip ──────────────────────────────────────────────────
+
+class TestTestplanSerialization:
+    def test_to_dict_keys(self):
+        plan = _make_plan()
+        d = plan.to_dict()
+        assert "format_version" in d
+        assert "testpoints" in d
+        assert "covergroups" in d
+
+    def test_serialize_is_compact_json(self):
+        plan = _make_plan()
+        data = plan.serialize()
+        assert isinstance(data, bytes)
+        # compact separators: no space after ',' or ':'
+        text = data.decode()
+        assert ", " not in text
+        assert ": " not in text
+
+    def test_roundtrip_all_fields(self):
+        plan = Testplan(format_version=1, source_file="x.hjson",
+                        import_timestamp="2024-01-01T00:00:00+00:00")
+        plan.add_testpoint(Testpoint(
+            name="tp1", stage="V2", desc="desc",
+            tests=["t1", "t_*"], tags=["tag1"],
+            na=False, source_template="t_{x}",
+            requirements=[RequirementLink(system="ALM", project="P",
+                                          item_id="REQ-1", url="http://x")],
+        ))
+        plan.covergroups.append(CovergroupEntry(name="cg1", desc="cg desc"))
+        data = plan.serialize()
+        plan2 = Testplan.from_bytes(data)
+        assert plan2.format_version == 1
+        assert plan2.source_file == "x.hjson"
+        assert plan2.import_timestamp == "2024-01-01T00:00:00+00:00"
+        assert len(plan2.testpoints) == 1
+        tp2 = plan2.testpoints[0]
+        assert tp2.name == "tp1"
+        assert tp2.stage == "V2"
+        assert tp2.tests == ["t1", "t_*"]
+        assert tp2.tags == ["tag1"]
+        assert tp2.source_template == "t_{x}"
+        assert len(tp2.requirements) == 1
+        req = tp2.requirements[0]
+        assert req.system == "ALM"
+        assert req.item_id == "REQ-1"
+        assert len(plan2.covergroups) == 1
+
+    def test_from_dict_missing_optional_fields(self):
+        d = {"testpoints": [{"name": "tp", "stage": "V1"}]}
+        plan = Testplan.from_dict(d)
+        assert plan.format_version == 1
+        assert plan.source_file == ""
+        tp = plan.testpoints[0]
+        assert tp.desc == ""
+        assert tp.tests == []
+        assert tp.na is False
+
+    def test_from_bytes_roundtrip(self):
+        plan = _make_plan()
+        plan2 = Testplan.from_bytes(plan.serialize())
+        assert len(plan2.testpoints) == len(plan.testpoints)
+        assert plan2.covergroups[0].name == "cg_uart_reset"
+
+    def test_save_and_load(self, tmp_path):
+        plan = _make_plan()
+        path = str(tmp_path / "plan.json")
+        plan.save(path)
+        plan2 = Testplan.load(path)
+        assert plan2.source_file == "uart.hjson"
+        assert len(plan2.testpoints) == 3
+
+
+# ── stamp_import_time ─────────────────────────────────────────────────────────
+
+class TestStampImportTime:
+    def test_sets_non_empty_timestamp(self):
+        plan = Testplan()
+        assert plan.import_timestamp == ""
+        plan.stamp_import_time()
+        assert plan.import_timestamp != ""
+        assert "T" in plan.import_timestamp  # ISO-8601 format
+
+
+# ── module-level helpers ──────────────────────────────────────────────────────
+
+class TestModuleHelpers:
+    def test_get_testplan_from_duck_typed_db(self):
+        class FakeDB:
+            def getTestplan(self):
+                return "my_plan"
+        assert get_testplan(FakeDB()) == "my_plan"
+
+    def test_get_testplan_returns_none_without_method(self):
+        assert get_testplan(object()) is None
+
+    def test_set_testplan_duck_typed(self):
+        stored = []
+        class FakeDB:
+            def setTestplan(self, tp):
+                stored.append(tp)
+        set_testplan(FakeDB(), "plan_obj")
+        assert stored == ["plan_obj"]
+
+    def test_set_testplan_raises_without_method(self):
+        with pytest.raises(TypeError):
+            set_testplan(object(), "plan")
diff --git a/tests/unit/ncdb/test_testplan_closure.py b/tests/unit/ncdb/test_testplan_closure.py
new file mode 100644
index 0000000..8cf953b
--- /dev/null
+++ b/tests/unit/ncdb/test_testplan_closure.py
@@ -0,0 +1,206 @@
+"""Unit tests for src/ucis/ncdb/testplan_closure.py."""
+from __future__ import annotations
+
+import pytest
+
+from ucis.ncdb.testplan import Testplan, Testpoint
+from ucis.ncdb.testplan_closure import (
+    TPStatus,
+    TestpointResult,
+    compute_closure,
+    stage_gate_status,
+)
+
+
+# ── stub DB ───────────────────────────────────────────────────────────────────
+
+class _FakeStats:
+    def __init__(self, pass_count, fail_count):
+        self.pass_count = pass_count
+        self.fail_count = fail_count
+
+
+class _FakeRegistry:
+    def __init__(self, names):
+        self._names = names
+
+
+class _FakeDB:
+    """Minimal NcdbUCIS-like db using the v2 history path."""
+
+    def __init__(self, runs: dict):
+        """runs: {name: (pass_count, fail_count)}"""
+        names = list(runs.keys())
+        self._test_registry = _FakeRegistry(names)
+        self._test_stats = _FakeStatsTable(runs)
+
+    def historyNodes(self, _kind):
+        return []
+
+
+class _FakeStatsTable:
+    def __init__(self, runs):
+        self._runs = runs
+        self._names = list(runs.keys())
+
+    def get(self, nid):
+        name = self._names[nid]
+        p, f = self._runs[name]
+        return _FakeStats(p, f)
+
+
+def _db_with(**kwargs):
+    """Helper: _db_with(uart_smoke=(3,1)) → fake db."""
+    return _FakeDB(kwargs)
+
+
+# ── plan helpers ──────────────────────────────────────────────────────────────
+
+def _make_plan(*testpoints) -> Testplan:
+    plan = Testplan()
+    for tp in testpoints:
+        plan.add_testpoint(tp)
+    return plan
+
+
+# ── compute_closure ───────────────────────────────────────────────────────────
+
+class TestComputeClosure:
+    def test_closed_when_all_pass(self):
+        plan = _make_plan(Testpoint(name="tp", stage="V1", tests=["uart_smoke"]))
+        db = _db_with(uart_smoke=(5, 0))
+        results = compute_closure(plan, db)
+        assert results[0].status == TPStatus.CLOSED
+
+    def test_failing_when_all_fail(self):
+        plan = _make_plan(Testpoint(name="tp", stage="V1", tests=["t"]))
+        db = _db_with(t=(0, 3))
+        results = compute_closure(plan, db)
+        assert results[0].status == TPStatus.FAILING
+
+    def test_partial_when_mixed(self):
+        plan = _make_plan(Testpoint(name="tp", stage="V1", tests=["t"]))
+        db = _db_with(t=(2, 1))
+        results = compute_closure(plan, db)
+        assert results[0].status == TPStatus.PARTIAL
+
+    def test_not_run_when_absent(self):
+        plan = _make_plan(Testpoint(name="tp", stage="V1", tests=["t"]))
+        db = _db_with()
+        results = compute_closure(plan, db)
+        assert results[0].status == TPStatus.NOT_RUN
+
+    def test_na_testpoint(self):
+        plan = _make_plan(Testpoint(name="tp", stage="V1", na=True))
+        db = _db_with()
+        results = compute_closure(plan, db)
+        assert results[0].status == TPStatus.NA
+
+    def test_unimplemented_empty_tests(self):
+        plan = _make_plan(Testpoint(name="tp", stage="V1", tests=[]))
+        db = _db_with()
+        results = compute_closure(plan, db)
+        assert results[0].status == TPStatus.UNIMPLEMENTED
+
+    def test_wildcard_pattern_matches(self):
+        plan = _make_plan(Testpoint(name="tp", stage="V1", tests=["uart_*"]))
+        db = _db_with(uart_loopback=(3, 0), uart_reset=(2, 0))
+        results = compute_closure(plan, db)
+        assert results[0].status == TPStatus.CLOSED
+        assert len(results[0].matched_tests) == 2
+
+    def test_seed_strip_matches(self):
+        plan = _make_plan(Testpoint(name="tp", stage="V1", tests=["uart_smoke_42"]))
+        db = _db_with(uart_smoke=(4, 0))   # DB has stripped name
+        results = compute_closure(plan, db)
+        assert results[0].status == TPStatus.CLOSED
+
+    def test_pass_fail_counts_accurate(self):
+        plan = _make_plan(Testpoint(name="tp", stage="V1",
+                                    tests=["a", "b"]))
+        db = _db_with(a=(3, 1), b=(2, 2))
+        results = compute_closure(plan, db)
+        r = results[0]
+        assert r.pass_count == 5
+        assert r.fail_count == 3
+
+    def test_multiple_testpoints_independent(self):
+        plan = _make_plan(
+            Testpoint(name="tp1", stage="V1", tests=["a"]),
+            Testpoint(name="tp2", stage="V2", tests=["b"]),
+        )
+        db = _db_with(a=(5, 0), b=(0, 2))
+        results = compute_closure(plan, db)
+        assert results[0].status == TPStatus.CLOSED
+        assert results[1].status == TPStatus.FAILING
+
+    def test_result_order_matches_testplan(self):
+        plan = _make_plan(
+            Testpoint(name="first",  stage="V1", tests=["x"]),
+            Testpoint(name="second", stage="V1", tests=["y"]),
+        )
+        db = _db_with(x=(1, 0), y=(1, 0))
+        results = compute_closure(plan, db)
+        assert results[0].testpoint.name == "first"
+        assert results[1].testpoint.name == "second"
+
+
+# ── stage_gate_status ─────────────────────────────────────────────────────────
+
+class TestStageGateStatus:
+    def _plan_and_results(self, statuses: dict) -> tuple:
+        plan = Testplan()
+        for name, (stage, st) in statuses.items():
+            plan.add_testpoint(Testpoint(name=name, stage=stage,
+                                         tests=["t"] if st != TPStatus.UNIMPLEMENTED else []))
+        results = []
+        for tp in plan.testpoints:
+            st = statuses[tp.name][1]
+            results.append(TestpointResult(tp, st, [], 1 if st == TPStatus.CLOSED else 0, 0))
+        return plan, results
+
+    def test_gate_passes_all_closed(self):
+        plan, results = self._plan_and_results({
+            "v1_tp": ("V1", TPStatus.CLOSED),
+            "v2_tp": ("V2", TPStatus.CLOSED),
+        })
+        gate = stage_gate_status(results, "V2", plan)
+        assert gate["passed"] is True
+        assert gate["blocking"] == []
+
+    def test_gate_fails_if_lower_stage_not_closed(self):
+        plan, results = self._plan_and_results({
+            "v1_tp": ("V1", TPStatus.FAILING),
+            "v2_tp": ("V2", TPStatus.CLOSED),
+        })
+        gate = stage_gate_status(results, "V2", plan)
+        assert gate["passed"] is False
+        assert any(r.testpoint.name == "v1_tp" for r in gate["blocking"])
+
+    def test_gate_passes_na_testpoints_ignored(self):
+        plan, results = self._plan_and_results({
+            "v1_tp": ("V1", TPStatus.CLOSED),
+            "v1_na": ("V1", TPStatus.NA),
+        })
+        gate = stage_gate_status(results, "V1", plan)
+        assert gate["passed"] is True
+
+    def test_gate_ignores_higher_stage(self):
+        plan, results = self._plan_and_results({
+            "v1_tp": ("V1", TPStatus.CLOSED),
+            "v3_tp": ("V3", TPStatus.FAILING),   # V3 not evaluated for V2 gate
+        })
+        gate = stage_gate_status(results, "V2", plan)
+        assert gate["passed"] is True
+
+    def test_message_includes_stage(self):
+        plan, results = self._plan_and_results({
+            "tp": ("V1", TPStatus.CLOSED),
+        })
+        gate = stage_gate_status(results, "V1", plan)
+        assert "V1" in gate["message"]
+
+    def test_gate_returns_stage_key(self):
+        plan, results = self._plan_and_results({"tp": ("V1", TPStatus.CLOSED)})
+        gate = stage_gate_status(results, "V1", plan)
+        assert gate["stage"] == "V1"
diff --git a/tests/unit/ncdb/test_testplan_export.py b/tests/unit/ncdb/test_testplan_export.py
new file mode 100644
index 0000000..fb18829
--- /dev/null
+++ b/tests/unit/ncdb/test_testplan_export.py
@@ -0,0 +1,229 @@
+"""Unit tests for ucis.ncdb.testplan_export."""
+
+import io
+import json
+import os
+import tempfile
+from xml.etree import ElementTree as ET
+
+import pytest
+
+from ucis.ncdb.testplan import Testplan, Testpoint
+from ucis.ncdb.testplan_closure import TPStatus, TestpointResult
+from ucis.ncdb.testplan_export import (
+    export_junit_xml,
+    export_github_annotations,
+    export_summary_markdown,
+)
+from ucis.ncdb.reports import report_stage_gate
+
+
+# ---------------------------------------------------------------------------
+# Fixtures
+# ---------------------------------------------------------------------------
+
+def _make_tp(name, stage="V1", desc=""):
+    return Testpoint(name=name, stage=stage, tests=[name], desc=desc)
+
+
+def _make_result(tp, status, pass_count=0, fail_count=0, matched=None):
+    return TestpointResult(
+        testpoint=tp,
+        status=status,
+        matched_tests=matched or [],
+        pass_count=pass_count,
+        fail_count=fail_count,
+    )
+
+
+@pytest.fixture
+def mixed_results():
+    return [
+        _make_result(_make_tp("tp_pass", "V1"), TPStatus.CLOSED,
+                     pass_count=5, matched=["tp_pass"]),
+        _make_result(_make_tp("tp_fail", "V1"), TPStatus.FAILING,
+                     fail_count=3, matched=["tp_fail"]),
+        _make_result(_make_tp("tp_skip", "V2"), TPStatus.NOT_RUN),
+        _make_result(_make_tp("tp_partial", "V2"), TPStatus.PARTIAL,
+                     pass_count=2, fail_count=2, matched=["tp_partial"]),
+        _make_result(_make_tp("tp_na", "V1"), TPStatus.NA),
+    ]
+
+
+@pytest.fixture
+def simple_plan():
+    plan = Testplan(source_file="test.hjson")
+    for name, stage in [("tp_pass", "V1"), ("tp_fail", "V1"),
+                         ("tp_skip", "V2"), ("tp_partial", "V2"),
+                         ("tp_na", "V1")]:
+        plan.add_testpoint(Testpoint(name=name, stage=stage, tests=[name]))
+    return plan
+
+
+# ---------------------------------------------------------------------------
+# JUnit XML
+# ---------------------------------------------------------------------------
+
+class TestExportJunitXml:
+    def test_creates_file(self, mixed_results, tmp_path):
+        out = str(tmp_path / "results.xml")
+        export_junit_xml(mixed_results, out)
+        assert os.path.exists(out)
+
+    def test_valid_xml(self, mixed_results, tmp_path):
+        out = str(tmp_path / "results.xml")
+        export_junit_xml(mixed_results, out)
+        tree = ET.parse(out)
+        root = tree.getroot()
+        assert root.tag == "testsuite"
+
+    def test_testcase_count(self, mixed_results, tmp_path):
+        out = str(tmp_path / "results.xml")
+        export_junit_xml(mixed_results, out)
+        tree = ET.parse(out)
+        cases = tree.findall(".//testcase")
+        assert len(cases) == len(mixed_results)
+
+    def test_failure_element_for_failing(self, mixed_results, tmp_path):
+        out = str(tmp_path / "results.xml")
+        export_junit_xml(mixed_results, out)
+        tree = ET.parse(out)
+        fail_tc = next(
+            tc for tc in tree.findall(".//testcase")
+            if tc.attrib["name"] == "tp_fail"
+        )
+        assert fail_tc.find("failure") is not None
+
+    def test_skipped_element_for_not_run(self, mixed_results, tmp_path):
+        out = str(tmp_path / "results.xml")
+        export_junit_xml(mixed_results, out)
+        tree = ET.parse(out)
+        skip_tc = next(
+            tc for tc in tree.findall(".//testcase")
+            if tc.attrib["name"] == "tp_skip"
+        )
+        assert skip_tc.find("skipped") is not None
+
+    def test_no_failure_for_closed(self, mixed_results, tmp_path):
+        out = str(tmp_path / "results.xml")
+        export_junit_xml(mixed_results, out)
+        tree = ET.parse(out)
+        pass_tc = next(
+            tc for tc in tree.findall(".//testcase")
+            if tc.attrib["name"] == "tp_pass"
+        )
+        assert pass_tc.find("failure") is None
+        assert pass_tc.find("skipped") is None
+
+    def test_suite_name_attribute(self, mixed_results, tmp_path):
+        out = str(tmp_path / "results.xml")
+        export_junit_xml(mixed_results, out, suite_name="my_suite")
+        tree = ET.parse(out)
+        assert tree.getroot().attrib["name"] == "my_suite"
+
+    def test_failure_count_in_suite(self, mixed_results, tmp_path):
+        out = str(tmp_path / "results.xml")
+        export_junit_xml(mixed_results, out)
+        tree = ET.parse(out)
+        # tp_fail (FAILING) + tp_partial (PARTIAL) = 2 failures
+        assert tree.getroot().attrib["failures"] == "2"
+
+    def test_partial_gets_failure_element(self, mixed_results, tmp_path):
+        out = str(tmp_path / "results.xml")
+        export_junit_xml(mixed_results, out)
+        tree = ET.parse(out)
+        partial_tc = next(
+            tc for tc in tree.findall(".//testcase")
+            if tc.attrib["name"] == "tp_partial"
+        )
+        assert partial_tc.find("failure") is not None
+
+
+# ---------------------------------------------------------------------------
+# GitHub Annotations
+# ---------------------------------------------------------------------------
+
+class TestExportGithubAnnotations:
+    def test_error_for_failing(self, mixed_results):
+        buf = io.StringIO()
+        export_github_annotations(mixed_results, output=buf)
+        text = buf.getvalue()
+        assert "::error" in text
+        assert "tp_fail" in text
+
+    def test_warning_for_not_run(self, mixed_results):
+        buf = io.StringIO()
+        export_github_annotations(mixed_results, output=buf)
+        text = buf.getvalue()
+        assert "::warning" in text
+        assert "tp_skip" in text
+
+    def test_warning_for_partial(self, mixed_results):
+        buf = io.StringIO()
+        export_github_annotations(mixed_results, output=buf)
+        text = buf.getvalue()
+        assert "tp_partial" in text
+
+    def test_no_output_for_closed(self, mixed_results):
+        buf = io.StringIO()
+        export_github_annotations(mixed_results, output=buf)
+        text = buf.getvalue()
+        # tp_pass (CLOSED) should NOT produce any annotation
+        lines_with_pass = [l for l in text.splitlines() if "tp_pass" in l]
+        assert lines_with_pass == []
+
+    def test_no_output_for_na(self, mixed_results):
+        buf = io.StringIO()
+        export_github_annotations(mixed_results, output=buf)
+        text = buf.getvalue()
+        lines_with_na = [l for l in text.splitlines() if "tp_na" in l]
+        assert lines_with_na == []
+
+    def test_custom_file_field(self, mixed_results):
+        buf = io.StringIO()
+        export_github_annotations(mixed_results, file="uart.hjson", output=buf)
+        text = buf.getvalue()
+        assert "file=uart.hjson" in text
+
+
+# ---------------------------------------------------------------------------
+# Markdown summary
+# ---------------------------------------------------------------------------
+
+class TestExportSummaryMarkdown:
+    def test_returns_string(self, mixed_results):
+        md = export_summary_markdown(mixed_results)
+        assert isinstance(md, str)
+
+    def test_contains_headline(self, mixed_results):
+        md = export_summary_markdown(mixed_results)
+        assert "## Testplan Closure Report" in md
+
+    def test_contains_stage_table(self, mixed_results):
+        md = export_summary_markdown(mixed_results)
+        assert "| Stage" in md
+        assert "| V1" in md or "V1" in md
+
+    def test_contains_testpoint_table(self, mixed_results):
+        md = export_summary_markdown(mixed_results)
+        assert "| Testpoint" in md
+        assert "tp_pass" in md
+
+    def test_gate_verdict_included(self, mixed_results, simple_plan):
+        gate = report_stage_gate(mixed_results, "V1", simple_plan)
+        md = export_summary_markdown(mixed_results, stage_gate=gate)
+        assert "Stage gate" in md
+        assert "V1" in md
+
+    def test_blocking_section_when_gate_fails(self, mixed_results, simple_plan):
+        gate = report_stage_gate(mixed_results, "V2", simple_plan)
+        md = export_summary_markdown(mixed_results, stage_gate=gate)
+        if not gate.passed:
+            assert "Blocking testpoints" in md
+
+    def test_na_testpoints_excluded_from_table(self, mixed_results):
+        md = export_summary_markdown(mixed_results)
+        # tp_na (N/A) and UNIMPLEMENTED should not appear in testpoint table rows
+        # The heading line "| Testpoint" is present but tp_na row should not be
+        rows = [l for l in md.splitlines() if "tp_na" in l and "|" in l]
+        assert rows == []
diff --git a/tests/unit/ncdb/test_testplan_hjson.py b/tests/unit/ncdb/test_testplan_hjson.py
new file mode 100644
index 0000000..d1b54c6
--- /dev/null
+++ b/tests/unit/ncdb/test_testplan_hjson.py
@@ -0,0 +1,180 @@
+"""Unit tests for src/ucis/ncdb/testplan_hjson.py."""
+from __future__ import annotations
+
+import json
+import os
+import pytest
+
+from ucis.ncdb.testplan_hjson import (
+    _expand_template,
+    _expand_tests,
+    import_hjson,
+)
+from ucis.ncdb.testplan import Testplan
+
+
+# ── _expand_template ──────────────────────────────────────────────────────────
+
+class TestExpandTemplate:
+    def test_no_placeholders(self):
+        assert _expand_template("uart_smoke", {}) == ["uart_smoke"]
+
+    def test_scalar_substitution(self):
+        assert _expand_template("test_{baud}", {"baud": "9600"}) == ["test_9600"]
+
+    def test_list_substitution_cartesian(self):
+        result = _expand_template("test_{baud}", {"baud": ["9600", "115200"]})
+        assert result == ["test_9600", "test_115200"]
+
+    def test_multiple_keys_cartesian_product(self):
+        result = _expand_template("{mod}_{type}_test",
+                                  {"mod": ["a", "b"], "type": ["x", "y"]})
+        assert len(result) == 4
+        assert "a_x_test" in result
+        assert "b_y_test" in result
+
+    def test_unknown_key_left_verbatim(self):
+        result = _expand_template("test_{unknown}", {})
+        assert result == ["test_{unknown}"]
+
+    def test_mixed_known_unknown(self):
+        result = _expand_template("{a}_{b}", {"a": "hello"})
+        assert result == ["hello_{b}"]
+
+    def test_duplicate_key_in_template(self):
+        # {a} appears twice — should expand both consistently
+        result = _expand_template("{a}_{a}", {"a": ["x", "y"]})
+        assert set(result) == {"x_x", "y_y"}
+
+    def test_no_subs_empty_dict(self):
+        result = _expand_template("{x}", {})
+        assert result == ["{x}"]
+
+
+# ── _expand_tests ─────────────────────────────────────────────────────────────
+
+class TestExpandTests:
+    def test_flat_list_no_expansion(self):
+        result = _expand_tests(["a", "b", "c"], {})
+        assert result == ["a", "b", "c"]
+
+    def test_with_expansion(self):
+        result = _expand_tests(["{m}_test"], {"m": ["u", "v"]})
+        assert result == ["u_test", "v_test"]
+
+    def test_mixed_plain_and_template(self):
+        result = _expand_tests(["plain", "{x}_test"], {"x": ["a", "b"]})
+        assert result == ["plain", "a_test", "b_test"]
+
+
+# ── import_hjson ──────────────────────────────────────────────────────────────
+
+def _write_hjson(tmp_path, data: dict) -> str:
+    path = str(tmp_path / "plan.json")
+    with open(path, "w") as f:
+        json.dump(data, f)
+    return path
+
+
+class TestImportHjson:
+    def test_basic_import(self, tmp_path):
+        path = _write_hjson(tmp_path, {
+            "testpoints": [
+                {"name": "uart_reset", "stage": "V1",
+                 "tests": ["uart_smoke", "uart_init"]},
+            ],
+        })
+        plan = import_hjson(path)
+        assert isinstance(plan, Testplan)
+        assert len(plan.testpoints) == 1
+        tp = plan.testpoints[0]
+        assert tp.name == "uart_reset"
+        assert tp.stage == "V1"
+        assert tp.tests == ["uart_smoke", "uart_init"]
+        assert tp.na is False
+
+    def test_na_testpoint(self, tmp_path):
+        path = _write_hjson(tmp_path, {
+            "testpoints": [
+                {"name": "not_impl", "stage": "V2", "tests": ["N/A"]},
+            ],
+        })
+        plan = import_hjson(path)
+        tp = plan.testpoints[0]
+        assert tp.na is True
+        assert tp.tests == []
+
+    def test_wildcard_expansion(self, tmp_path):
+        path = _write_hjson(tmp_path, {
+            "testpoints": [
+                {"name": "tp", "stage": "V1",
+                 "tests": ["{baud}_test"]},
+            ],
+        })
+        plan = import_hjson(path, substitutions={"baud": ["9600", "115200"]})
+        assert plan.testpoints[0].tests == ["9600_test", "115200_test"]
+
+    def test_cartesian_expansion(self, tmp_path):
+        path = _write_hjson(tmp_path, {
+            "testpoints": [
+                {"name": "tp", "stage": "V1",
+                 "tests": ["{mod}_{intf}_test"]},
+            ],
+        })
+        plan = import_hjson(path, substitutions={
+            "mod": ["uart", "spi"],
+            "intf": ["a", "b"],
+        })
+        assert len(plan.testpoints[0].tests) == 4
+
+    def test_source_file_set(self, tmp_path):
+        path = _write_hjson(tmp_path, {"testpoints": []})
+        plan = import_hjson(path)
+        assert os.path.isabs(plan.source_file)
+        assert plan.source_file.endswith(".json")
+
+    def test_covergroups_imported(self, tmp_path):
+        path = _write_hjson(tmp_path, {
+            "testpoints": [],
+            "covergroups": [
+                {"name": "cg_reset", "desc": "Reset coverage"},
+            ],
+        })
+        plan = import_hjson(path)
+        assert len(plan.covergroups) == 1
+        assert plan.covergroups[0].name == "cg_reset"
+
+    def test_optional_fields_defaults(self, tmp_path):
+        path = _write_hjson(tmp_path, {
+            "testpoints": [{"name": "tp", "stage": "V1", "tests": ["t"]}],
+        })
+        plan = import_hjson(path)
+        tp = plan.testpoints[0]
+        assert tp.desc == ""
+        assert tp.tags == []
+
+    def test_tags_preserved(self, tmp_path):
+        path = _write_hjson(tmp_path, {
+            "testpoints": [
+                {"name": "tp", "stage": "V1", "tests": ["t"],
+                 "tags": ["smoke", "regression"]},
+            ],
+        })
+        plan = import_hjson(path, {})
+        assert plan.testpoints[0].tags == ["smoke", "regression"]
+
+    def test_source_template_recorded(self, tmp_path):
+        path = _write_hjson(tmp_path, {
+            "testpoints": [
+                {"name": "tp", "stage": "V1", "tests": ["{x}_test"]},
+            ],
+        })
+        plan = import_hjson(path, {"x": ["a", "b"]})
+        # source_template captures the original template
+        assert "{x}_test" in plan.testpoints[0].source_template
+
+    def test_empty_testplan(self, tmp_path):
+        path = _write_hjson(tmp_path, {})
+        plan = import_hjson(path)
+        assert plan.testpoints == []
+        assert plan.covergroups == []
diff --git a/tests/unit/ncdb/test_waivers.py b/tests/unit/ncdb/test_waivers.py
new file mode 100644
index 0000000..a702da6
--- /dev/null
+++ b/tests/unit/ncdb/test_waivers.py
@@ -0,0 +1,140 @@
+"""Unit tests for src/ucis/ncdb/waivers.py."""
+from __future__ import annotations
+
+import pytest
+
+from ucis.ncdb.waivers import Waiver, WaiverSet, _glob_match
+
+
+# ── _glob_match ───────────────────────────────────────────────────────────────
+
+class TestGlobMatch:
+    def test_exact_match(self):
+        assert _glob_match("foo/bar", "foo/bar")
+
+    def test_exact_no_match(self):
+        assert not _glob_match("foo/bar", "foo/baz")
+
+    def test_single_star_matches_segment(self):
+        assert _glob_match("foo/*/baz", "foo/bar/baz")
+
+    def test_single_star_does_not_cross_slash(self):
+        assert not _glob_match("foo/*/baz", "foo/x/y/baz")
+
+    def test_double_star_crosses_segments(self):
+        assert _glob_match("foo/**/baz", "foo/x/y/baz")
+
+    def test_double_star_matches_zero_segments(self):
+        assert _glob_match("foo/**/baz", "foo/baz")
+
+    def test_trailing_single_star(self):
+        assert _glob_match("scope/*", "scope/uart")
+        assert not _glob_match("scope/*", "scope/uart/sub")
+
+    def test_leading_double_star(self):
+        assert _glob_match("**/uart", "top/mid/uart")
+
+
+# ── Waiver ────────────────────────────────────────────────────────────────────
+
+class TestWaiver:
+    def test_matches_exact_scope(self):
+        w = Waiver(id="W1", scope_pattern="top/uart", bin_pattern="*")
+        assert w.matches("top/uart")
+
+    def test_no_match_wrong_scope(self):
+        w = Waiver(id="W1", scope_pattern="top/uart", bin_pattern="*")
+        assert not w.matches("top/spi")
+
+    def test_matches_with_bin_wildcard(self):
+        w = Waiver(id="W1", scope_pattern="top/*", bin_pattern="*")
+        assert w.matches("top/uart", "some_bin")
+
+    def test_matches_specific_bin(self):
+        w = Waiver(id="W1", scope_pattern="top/uart", bin_pattern="reset_bin")
+        assert w.matches("top/uart", "reset_bin")
+        assert not w.matches("top/uart", "other_bin")
+
+    def test_glob_scope_pattern(self):
+        w = Waiver(id="W1", scope_pattern="**/uart", bin_pattern="*")
+        assert w.matches("top/mid/uart")
+        assert not w.matches("top/spi")
+
+
+# ── WaiverSet ─────────────────────────────────────────────────────────────────
+
+class TestWaiverSet:
+    def test_empty_no_match(self):
+        ws = WaiverSet()
+        assert not ws.matches_scope("any/scope")
+
+    def test_add_and_match(self):
+        ws = WaiverSet()
+        ws.add(Waiver(id="W1", scope_pattern="top/uart"))
+        assert ws.matches_scope("top/uart")
+
+    def test_get_by_id(self):
+        ws = WaiverSet()
+        ws.add(Waiver(id="W1", scope_pattern="a"))
+        ws.add(Waiver(id="W2", scope_pattern="b"))
+        assert ws.get("W1").scope_pattern == "a"
+        assert ws.get("W2").scope_pattern == "b"
+        assert ws.get("W3") is None
+
+    def test_active_at_excludes_expired(self):
+        ws = WaiverSet([
+            Waiver(id="W1", scope_pattern="a", expires_at="2024-01-01T00:00:00"),
+            Waiver(id="W2", scope_pattern="b", expires_at="2030-01-01T00:00:00"),
+        ])
+        active = ws.active_at("2025-06-01T00:00:00")
+        assert len(active.waivers) == 1
+        assert active.waivers[0].id == "W2"
+
+    def test_active_at_includes_never_expires(self):
+        ws = WaiverSet([
+            Waiver(id="W1", scope_pattern="a", expires_at=""),
+        ])
+        active = ws.active_at("9999-12-31T00:00:00")
+        assert len(active.waivers) == 1
+
+    def test_active_at_excludes_revoked(self):
+        ws = WaiverSet([
+            Waiver(id="W1", scope_pattern="a", status="revoked"),
+        ])
+        active = ws.active_at("2025-01-01T00:00:00")
+        assert len(active.waivers) == 0
+
+    def test_serialize_roundtrip(self):
+        ws = WaiverSet([
+            Waiver(id="W1", scope_pattern="top/uart", bin_pattern="reset_*",
+                   rationale="Known issue", approver="eng",
+                   approved_at="2025-01-01T00:00:00",
+                   expires_at="2026-01-01T00:00:00",
+                   status="active"),
+        ])
+        data = ws.serialize()
+        ws2 = WaiverSet.from_bytes(data)
+        assert len(ws2.waivers) == 1
+        w = ws2.waivers[0]
+        assert w.id == "W1"
+        assert w.scope_pattern == "top/uart"
+        assert w.bin_pattern == "reset_*"
+        assert w.rationale == "Known issue"
+        assert w.approver == "eng"
+        assert w.expires_at == "2026-01-01T00:00:00"
+
+    def test_save_and_load(self, tmp_path):
+        ws = WaiverSet([Waiver(id="W1", scope_pattern="**")])
+        path = str(tmp_path / "waivers.json")
+        ws.save(path)
+        ws2 = WaiverSet.load(path)
+        assert len(ws2.waivers) == 1
+        assert ws2.waivers[0].id == "W1"
+
+    def test_from_dict_missing_optional_fields(self):
+        d = {"waivers": [{"id": "W1", "scope_pattern": "a"}]}
+        ws = WaiverSet.from_dict(d)
+        w = ws.waivers[0]
+        assert w.bin_pattern == "*"
+        assert w.status == "active"
+        assert w.expires_at == ""