diff --git a/IMPLEMENTATION_PLAN.md b/IMPLEMENTATION_PLAN.md new file mode 100644 index 0000000..f5197ba --- /dev/null +++ b/IMPLEMENTATION_PLAN.md @@ -0,0 +1,643 @@ +# Test History & Testplan Implementation Plan + +Based on `TEST_HISTORY_DESIGN.md`. + +--- + +## Scope + +This plan covers the full implementation, testing, and documentation of: + +1. **Binary history store** (Parts 1–6): efficient per-test-run records inside the NCDB ZIP +2. **Coverage-per-test fixes** (Part 4): stable `run_id`, contrib data loss bug fix +3. **Testplan embedding** (Parts 9–10): `testplan.json` ZIP member, closure computation +4. **Reports** (Parts 9.4, 11): regression summary, stage gate, delta, trend, CI export +5. **Competitive parity additions** (Part 11.3): waivers, contribution ranking, safety traceability + +--- + +## Phase 1 — Binary History Store + +### 1.1 New module: `src/ucis/ncdb/test_registry.py` + +Implements `TestRegistry` class: + +- **Struct layout** (`magic=0x54535452`, `version=1`, `next_run_id`, `num_names`, `num_seeds`, offset tables, two string heaps) +- `assign_run_id() -> int` — atomic increment of `next_run_id` +- `lookup_name_id(name: str) -> int` — binary search on sorted name heap; assign if absent +- `lookup_seed_id(seed: str) -> int` — same for seed heap +- `name_for_id(name_id: int) -> str` — O(1) offset-table access +- `seed_for_id(seed_id: int) -> str` +- `serialize() -> bytes` / `@classmethod deserialize(data: bytes) -> TestRegistry` + +Invariants: +- Name heap kept sorted; insertion preserves sort order (re-builds heap on insert) +- Seeds stored as decimal string for integers, verbatim for complex strings +- `next_run_id` never decreases; survives ZIP rewrite and merge + +### 1.2 New module: `src/ucis/ncdb/test_stats.py` + +Implements `TestStatsTable` with one 64-byte `TestStatsEntry` per test (indexed by `name_id`): + +Fields per entry (matching design §3.4): +`total_runs`, `pass_count`, `fail_count`, `error_count`, `first_ts`, `last_ts`, +`last_green_ts`, `transition_count`, `streak` (i16), `last_status`, `_pad`, +`flake_score` (f32), `fail_rate` (f32), `mean_cpu_time` (f32), `m2_cpu_time` (f32), +`cusum_value` (f32), `cusum_ref_mean` (f32), `grade_score` (f32), `total_seeds_seen` (u16), +`_reserved[6]` + +Methods: +- `update(name_id, status, ts, cpu_time=None)` — Welford online update + CUSUM step (k=0.5, h=4.0) +- `get(name_id) -> TestStatsEntry` +- `top_flaky(n=20) -> list[TestStatsEntry]` — sort by `flake_score DESC` +- `top_failing(n=20, flake_threshold=0.1) -> list[TestStatsEntry]` +- `serialize() -> bytes` / `@classmethod deserialize(data: bytes, num_entries: int) -> TestStatsTable` + +CUSUM update rule (on each run): +``` +x = 1.0 if FAIL else 0.0 +S = max(0, S + x - (cusum_ref_mean + 0.5)) +if S > 4.0: record change-point, reset S = 0 +``` + +### 1.3 New module: `src/ucis/ncdb/history_buckets.py` + +Implements `HistoryBucket` for reading and writing `history/NNNNNN.bin` files. + +Write path: +- `BucketWriter` accumulates records in memory, sorted by `(name_id, ts)` +- `seal() -> bytes` — produce compressed bucket bytes +- Sealed once the calendar day rolls over or record count reaches 10,000 (fixed threshold) + +Read path: +- `BucketReader(data: bytes)` — decompress and parse header, name index, seed dict, columns +- `records_for_name(name_id: int) -> list[BucketRecord]` — binary search name index → O(log N) +- `all_records() -> Iterable[BucketRecord]` + +`BucketRecord` fields: `name_id`, `seed_idx` (mapped to `seed_id` via local dict), `ts`, `status`, `flags` + +`status_flags` byte layout: +- bits `[7:4]`: status (0=OK, 1=FAIL, 2=ERROR, 3=FATAL, 4=COMPILE) +- bits `[3:0]`: flags (bit0=seed_is_hash, bit1=is_rerun, bit2=has_coverage, bit3=was_squashed) + +Compression tiering: +- Current-day (mutable) bucket: `ZIP_DEFLATE, level=1` +- Sealed (past-day, immutable) buckets: `ZIP_LZMA` if `liblzma` is available, else `ZIP_DEFLATE, level=9` (automatic fallback — no error raised) +- At close: sealed buckets are copied verbatim (no re-decompression) — critical for write performance + +Varint encoding for `ts_deltas`: use existing `src/ucis/ncdb/varint.py` + +### 1.4 New module: `src/ucis/ncdb/bucket_index.py` + +Implements `BucketIndex` for `history/bucket_index.bin` (magic `0x42494458`). + +One 24-byte entry per bucket: +`bucket_seq (u32)`, `ts_start (u32)`, `ts_end (u32)`, `num_records (u32)`, +`fail_count (u32)`, `min_name_id (u32)`, `max_name_id (u32)` + +Methods: +- `add_bucket(seq, ts_start, ts_end, num_records, fail_count, min_name_id, max_name_id)` +- `buckets_in_range(ts_from, ts_to) -> list[BucketIndexEntry]` +- `buckets_for_name(name_id, ts_from=None, ts_to=None) -> list[BucketIndexEntry]` + — filters by `min_name_id ≤ name_id ≤ max_name_id` +- `pass_rate_series() -> list[(ts_start, pass_rate)]` — from `fail_count`/`num_records` per bucket +- `serialize() -> bytes` / `@classmethod deserialize(data: bytes) -> BucketIndex` + +### 1.5 New module: `src/ucis/ncdb/contrib_index.py` + +Implements `ContribIndex` for `contrib_index.bin` (magic `0x43494458`). + +Header: `magic`, `version`, `merge_policy (u8)`, `squash_watermark (u32)`, `num_active (u32)` + +One 8-byte entry per active contrib: `run_id (u32)`, `name_id (u16)`, `status (u8)`, `flags (u8)` + +Methods: +- `add_entry(run_id, name_id, status, flags)` +- `passing_run_ids(policy=PASS_ONLY) -> list[int]` + — applies merge policy filter (all / pass_only / pass_first_attempt / strict) +- `set_squash_watermark(run_id: int)` +- `remove_entries_up_to(run_id: int)` — after squash +- `serialize() -> bytes` / `@classmethod deserialize(data: bytes) -> ContribIndex` + +Merge policy constants: +```python +POLICY_ALL = 0 +POLICY_PASS_ONLY = 1 +POLICY_EXCLUDE_ERROR_RERUN = 2 +POLICY_STRICT = 3 +``` + +### 1.6 New module: `src/ucis/ncdb/squash_log.py` + +Implements `SquashLog` for `squash_log.bin` (append-only, 28 bytes/entry). + +Entry fields: `ts (u32)`, `policy (u8)`, `_pad[3]`, `from_run (u32)`, `to_run (u32)`, +`num_runs (u32)`, `pass_runs (u32)` + +Methods: +- `append(ts, policy, from_run, to_run, num_runs, pass_runs)` +- `entries() -> list[SquashLogEntry]` +- `serialize() -> bytes` / `@classmethod deserialize(data: bytes) -> SquashLog` + +### 1.7 Modifications to existing files + +#### `src/ucis/ncdb/constants.py` +- Add `NCDB_VERSION = "2.0"` (bump from `"1.0"`) +- Add new member name constants: + ```python + MEMBER_TEST_REGISTRY = "test_registry.bin" + MEMBER_TEST_STATS = "test_stats.bin" + MEMBER_BUCKET_INDEX = "history/bucket_index.bin" + MEMBER_CONTRIB_INDEX = "contrib_index.bin" + MEMBER_SQUASH_LOG = "squash_log.bin" + MEMBER_TESTPLAN = "testplan.json" + MEMBER_WAIVERS = "waivers.json" + ``` +- Add status constants: + ```python + HIST_STATUS_OK = 0 + HIST_STATUS_FAIL = 1 + HIST_STATUS_ERROR = 2 + HIST_STATUS_FATAL = 3 + HIST_STATUS_COMPILE = 4 + ``` + +#### `src/ucis/ncdb/manifest.py` +- Add `history_format: str` field — `"v1"` (JSON only) or `"v2"` (binary + JSON for MERGE nodes) +- Backward-compat: default to `"v1"` when reading old manifests without this field +- Auto-upgrade to `"v2"` the first time `add_test_run()` is called on any database; no explicit opt-in required + +#### `src/ucis/ncdb/ncdb_ucis.py` (`NcdbUCIS`) +- Add lazy-load fields and public API for binary history: + ```python + _test_registry: Optional[TestRegistry] + _test_stats: Optional[TestStatsTable] + _bucket_index: Optional[BucketIndex] + _contrib_index: Optional[ContribIndex] + _squash_log: Optional[SquashLog] + _history_v2_dirty: bool + ``` +- New public methods: + - `add_test_run(name, seed, status, ts=None, cpu_time=None, has_coverage=False, is_rerun=False) -> int` + — assigns `run_id`, updates registry, stats, current bucket, optionally adds contrib entry + - `query_test_history(name, ts_from=None, ts_to=None) -> list[BucketRecord]` + — uses registry → bucket index → targeted bucket reads + - `get_test_stats(name) -> Optional[TestStatsEntry]` + - `top_flaky_tests(n=20) -> list[TestStatsEntry]` + - `top_failing_tests(n=20) -> list[TestStatsEntry]` + - `squash_coverage(policy=POLICY_PASS_ONLY)` — implements §4.5 squash operation +- Backward-compat: if `manifest.history_format == "v1"`, only `history.json` is used + for TEST nodes (existing behavior unchanged) + +#### `src/ucis/ncdb/ncdb_writer.py` +- Write all new binary members when `history_format == "v2"`: + - `test_registry.bin`, `test_stats.bin`, `contrib_index.bin`, `squash_log.bin` + - `history/bucket_index.bin` + - Current-day bucket with `ZIP_DEFLATE, level=1` + - Sealed buckets: copy verbatim compressed bytes (no re-decompression) +- Write `testplan.json` if set +- Write `waivers.json` if set +- Write `manifest.json` with updated `history_format` and `NCDB_VERSION` + +#### `src/ucis/ncdb/ncdb_reader.py` +- Read `history_format` from manifest; if `"v2"`, load binary members +- Fall back to `history.json` for MERGE nodes in all versions +- Attach `_testplan` and `_waivers` attributes to returned db object if present + +#### `src/ucis/ncdb/ncdb_merger.py` — **Critical bug fix** + +Fix `_merge_same_schema()` contrib data loss: + +1. **Assign `run_id` offsets** so each source's run IDs are disjoint: + ```python + offset_B = max(run_id_in_A) + 1 + offset_C = max(run_id_in_B) + 1 + ``` +2. **Copy and rename contrib files**: `contrib/{src_run_id}.bin` → `contrib/{src_run_id + offset}.bin` +3. **Merge `contrib_index.bin` entries** from all sources (adjust `run_id`, re-sort) +4. **Merge `test_registry.bin`** — unify name_ids across sources (remap as needed) +5. **Merge `test_stats.bin`** — sum counts, recompute derived fields +6. **Merge bucket files** — copy all sealed buckets, reconcile name_ids if registries differed +7. **Merge `bucket_index.bin`** — concatenate entries, re-sort by `ts_start` +8. **Append `squash_log.bin`** entries from all sources (no run_id adjustment needed) +9. **Sum `counts.bin`** arrays for the merged output (existing behavior, kept) + +Also add `_merge_testplans(sources) -> Optional[bytes]` implementing §10.8 strategy. + +--- + +## Phase 2 — Testplan Embedding + +### 2.1 New module: `src/ucis/ncdb/testplan.py` + +Implements the data model (exactly as specified in §10.2): + +- `CovergroupEntry(name, desc)` +- `Testpoint(name, stage, desc, tests, tags, na, source_template)` + - Optional `requirements: list[RequirementLink]` field for ALM traceability (§11.3.6) +- `RequirementLink(system, project, item_id, url)` +- `Testplan(format_version, source_file, import_timestamp, testpoints, covergroups)` + - Lazy indices: `_tp_by_name`, `_tp_by_test` (built once on first query) + - `getTestpoint(name)`, `testpointForTest(test_name)` (3-strategy match: exact / seed-strip / wildcard) + - `testpointsForStage(stage)`, `stages()` + - `to_dict()`, `serialize() -> bytes`, `from_dict()`, `from_bytes()`, `load(path)`, `save(path)` +- Module-level helpers: `get_testplan(db)`, `set_testplan(db, tp)` + +### 2.2 New module: `src/ucis/ncdb/testplan_closure.py` + +Closure computation (§10.9) and covergroup joining (§10.10): + +- `TPStatus` enum: `CLOSED`, `PARTIAL`, `FAILING`, `NOT_RUN`, `NA`, `UNIMPLEMENTED` +- `TestpointResult(testpoint, status, matched_tests, pass_count, fail_count)` +- `compute_closure(testplan, db, waivers=None) -> list[TestpointResult]` + - Optional `waivers` argument filters waived bins from coverage percentage +- `stage_gate_status(results, stage, testplan, require_flake_score_below=None, require_coverage_pct=None) -> dict` + - When `require_flake_score_below` is set, gate fails if covering tests have `flake_score` above threshold +- `find_covergroup_scopes(db, cg_name) -> list` +- `build_covergroup_index(db) -> dict[str, list]` + +Competitive parity additions (§11.3.1, §11.3.2): +- `compute_contribution(db) -> list[TestContribution]` + — iterates `contrib/*.bin`, computes unique bins per test; returns ranked list +- `compute_minimum_test_set(db, target_coverage=0.95) -> MinimumTestSet` + — greedy set-cover approximation over contrib vectors; returns included/excluded test lists + CPU savings estimate + +### 2.3 New module: `src/ucis/ncdb/testplan_hjson.py` + +OpenTitan Hjson import (§10.11): + +- `import_hjson(hjson_path, substitutions=None) -> Testplan` + — parses Hjson, expands `{key}` wildcards (cartesian product for list values), handles `tests: ["N/A"]` +- `_expand_tests(test_list, subs) -> list[str]` +- `_expand_template(template, subs) -> list[str]` + +Falls back to `json` if `hjson` package is not installed (handles JSON-subset .hjson files). +`hjson` is added as a regular (non-optional) dependency in `setup.py` and `ivpm.yaml` +(both `default` and `default-dev` dependency groups). + +### 2.4 NcdbUCIS testplan API (§10.4) + +Add to `NcdbUCIS`: +```python +_loaded_testplan: bool +_testplan: Optional[Testplan] +_testplan_dirty: bool + +def getTestplan() -> Optional[Testplan] +def setTestplan(tp: Testplan) -> None +def _ensure_testplan() -> None +``` + +### 2.5 New module: `src/ucis/ncdb/waivers.py` (§11.3.3) + +- `Waiver(id, scope_pattern, bin_pattern, rationale, approver, approved_at, expires_at, status)` +- `WaiverSet.load(path_or_bytes)`, `WaiverSet.save(path)`, `WaiverSet.matches_scope(scope_path, bin_name)` +- `NcdbUCIS.getWaivers()` / `setWaivers(ws)` analogous to testplan +- `WaiverSet.matches_scope()` performs pattern matching only; expiry enforcement is the caller's responsibility + +--- + +## Phase 3 — Reports + +All report functions live in a new module `src/ucis/ncdb/reports.py` (or split into +`testplan_reports.py` for testplan-oriented reports and `history_reports.py` for trend reports) +unless otherwise noted. + +**Output convention**: every report function returns a structured dataclass (e.g. +`ClosureSummary`, `StagGateResult`) AND provides a companion `format_*(result) -> str` +function that renders the dataclass to human-readable text. A `to_json()` method on each +result dataclass enables machine-readable output as a first step. CLI commands call the +formatter; tests assert against the structured data. + +### P0 Reports (essential for v1) + +| ID | Function | Inputs | Output | +|----|----------|--------|--------| +| A | `report_testpoint_closure(results)` | `list[TestpointResult]` | formatted table + stage roll-up | +| B | `report_stage_gate(results, stage, testplan)` | as above | go/no-go summary with critical path | +| C | `report_coverage_per_testpoint(results, db, testplan)` | testplan + scopes | testpoint × covergroup × pct table | +| D | `report_regression_delta(results_new, results_old)` | two closure result lists | newly-closed, newly-failing, coverage delta | + +### P1 Reports + +| ID | Function | Inputs | Output | +|----|----------|--------|--------| +| E | `report_stage_progression(db, testplan)` | merged NCDB with history | stage closure % over time (ASCII art or data) | +| F | `report_testpoint_reliability(results, db)` | closure results + test_stats | flake score per testpoint | +| G | `report_unexercised_covergroups(db, testplan)` | UCIS scopes + testplan | zero-hit covergroups list | +| I | `report_coverage_contribution(db)` | contrib/*.bin | per-test unique bin contribution table | + +### P2 Reports (future) + +| ID | Function | Inputs | Output | +|----|----------|--------|--------| +| H | `report_test_budget(testplan, db)` | test_stats CPU mean + testplan | CPU hours by stage | +| J | `report_minimum_test_set(db, target)` | contrib + target | minimum test set with savings estimate | +| K | `report_closure_forecast(db)` | history coverage series | timeline prediction with CI | +| L | `report_safety_matrix(results, waivers, path)` | traceability + waivers | CSV/text safety matrix | +| M | `report_seed_reliability(db, test_name)` | history buckets | seed range heat-map | + +### CI/CD Export (§11.3.5) + +New module: `src/ucis/ncdb/testplan_export.py` + +- `export_junit_xml(results, output_path)` — testpoints as JUnit `` elements +- `export_github_annotations(results)` — writes `::error::` / `::warning::` lines to stdout +- `export_summary_markdown(results, history_db=None)` — GitHub Actions Job Summary markdown + +--- + +## Phase 4 — CLI Integration + +Add new sub-commands to the existing `pyucis` CLI (wherever it lives): + +- `pyucis history query [--days N]` +- `pyucis history stats [--top-flaky N] [--top-failing N]` +- `pyucis testplan import [--subs key=val ...]` +- `pyucis testplan closure [--testplan path] [--stage V2]` +- `pyucis testplan export-junit [--testplan path] -o output.xml` +- `pyucis squash [--policy pass_only]` +- `pyucis merge [ ...]` + +--- + +## Testing Strategy + +### Unit Tests — Binary Formats + +**File**: `tests/unit/ncdb/test_test_registry.py` +- `test_assign_run_id_increments` — monotonic, survives roundtrip +- `test_lookup_name_id_new` — new name assigned correctly +- `test_lookup_name_id_existing` — same name returns same ID +- `test_name_heap_sorted` — binary search correctness +- `test_seed_id_roundtrip` — seed stored and retrieved verbatim +- `test_serialize_deserialize_empty` — empty registry roundtrip +- `test_serialize_deserialize_1000_names` — large registry roundtrip + +**File**: `tests/unit/ncdb/test_test_stats.py` +- `test_update_pass` — pass_count increments, last_ts updates +- `test_update_fail` — fail_count, transition_count, streak update +- `test_welford_mean` — cpu_time mean converges on known series +- `test_welford_stddev` — M2 accumulator → stddev correct +- `test_flake_score_alternating` — alternating pass/fail → score ≈ 1.0 +- `test_flake_score_stable` — all-pass → score = 0.0 +- `test_cusum_change_point` — sustained failures → CUSUM exceeds h=4.0 +- `test_grade_score_range` — [0, 1] always +- `test_serialize_deserialize` — full table roundtrip + +**File**: `tests/unit/ncdb/test_history_buckets.py` +- `test_write_read_single_record` — one record, roundtrip +- `test_name_index_binary_search` — lookup for specific name_id O(log N) +- `test_seed_dict_compression` — seed_idx maps to correct global seed_id +- `test_ts_delta_encoding` — varint deltas decode to correct timestamps +- `test_status_flags_pack_unpack` — nibble-packed byte round-trips all values +- `test_seal_deflate` — sealed bucket compresses, decompresses correctly +- `test_seal_lzma` — LZMA tier works +- `test_10k_records_size` — 10K records bucket ≤ design projection (~5 KB compressed) +- `test_records_for_name_not_present` — returns empty list + +**File**: `tests/unit/ncdb/test_bucket_index.py` +- `test_add_and_query_range` — date range filter +- `test_buckets_for_name` — name_id range filter +- `test_pass_rate_series` — fail_count/num_records computation +- `test_serialize_deserialize_empty` +- `test_serialize_deserialize_3650_entries` — 10-year index ≤ 90 KB + +**File**: `tests/unit/ncdb/test_contrib_index.py` +- `test_passing_run_ids_pass_only` — POLICY_PASS_ONLY filter +- `test_passing_run_ids_strict` — is_rerun + first_attempt filtering +- `test_squash_watermark_update` +- `test_remove_entries_after_squash` +- `test_serialize_deserialize` + +**File**: `tests/unit/ncdb/test_squash_log.py` +- `test_append_one_entry` +- `test_append_multiple_entries` — all entries preserved +- `test_serialize_deserialize` + +### Unit Tests — Testplan + +**File**: `tests/unit/ncdb/test_testplan.py` +- `test_testpointForTest_exact` +- `test_testpointForTest_seed_strip` — `uart_smoke_12345` → `uart_smoke` +- `test_testpointForTest_wildcard` — `foo_*` matches `foo_bar` +- `test_testpointsForStage` +- `test_stages_ordered` — V1 < V2 < V2S < V3 +- `test_serialize_deserialize_roundtrip` +- `test_load_save_standalone` — Mode B file write/read +- `test_na_testpoint` — `na=True` serializes/deserializes correctly + +**File**: `tests/unit/ncdb/test_testplan_closure.py` +- `test_compute_closure_all_closed` +- `test_compute_closure_partial` +- `test_compute_closure_not_run` — test not in DB +- `test_compute_closure_na` — N/A testpoint → TPStatus.NA +- `test_compute_closure_unimplemented` — empty tests list +- `test_stage_gate_pass` — all V1+V2 testpoints closed +- `test_stage_gate_fail` — one gap in V2 +- `test_stage_gate_requires_flake_score` — flake gate integration +- `test_find_covergroup_scopes` — DFS finds matching covergroup scope + +**File**: `tests/unit/ncdb/test_testplan_hjson.py` +- `test_import_simple_hjson` — basic parse +- `test_import_wildcard_expansion` — `{name}{intf}` expands to list +- `test_import_na_testpoint` — `tests: ["N/A"]` → `na=True, tests=[]` +- `test_import_fallback_no_hjson_package` — works with stdlib json for valid JSON subset + +### Unit Tests — Merger Fix + +**File**: `tests/unit/ncdb/test_merger.py` (extend existing) +- `test_merge_preserves_contrib_data` — merge two DBs with contrib/*.bin; output has both +- `test_merge_run_id_renumbering` — no run_id collisions after merge +- `test_merge_testplan_same` — both inputs have same testplan → copied to output +- `test_merge_testplan_different_source_file` — warning emitted, no testplan in output +- `test_merge_testplan_newer_timestamp_wins` + +### Integration Tests + +**File**: `tests/integration/test_history_workflow.py` +- `test_write_and_query_7_days` — write 7 days × 100 tests; query last 7 days; check record count +- `test_cold_start_load_200kb` — measure total I/O on open (registry + stats + contrib_index + bucket_index) +- `test_add_test_run_updates_stats` — `add_test_run()` → `get_test_stats()` reflects update +- `test_squash_operation` — end-to-end: write runs → squash → verify contrib files removed + squash_log entry +- `test_backward_compat_v1_db` — open existing v1 CDB; all v1 reads still work; no v2 members written unless explicitly requested +- `test_pass_only_merge_filter` — failing runs excluded from coverage after squash + +**File**: `tests/integration/test_testplan_workflow.py` +- `test_embed_testplan_and_retrieve` — write CDB with testplan; reopen; `getTestplan()` returns correct data +- `test_standalone_testplan_mode_b` — `Testplan.load()` + `compute_closure()` without opening a CDB +- `test_regression_delta` — two CDB snapshots → `report_regression_delta()` returns newly-closed/failing +- `test_closure_report_stage_gate` — end-to-end: import hjson → embed → compute_closure → stage_gate_status + +**File**: `tests/integration/test_ci_export.py` +- `test_export_junit_xml` — valid JUnit XML produced; testpoint names appear as test cases +- `test_export_github_annotations` — `::error::` lines produced for FAILING testpoints + +### Performance Test (manual / benchmark only, not in CI) + +**File**: `tests/integration/test_history_performance.py` (marked `@pytest.mark.slow`) +- `bench_write_1m_records` — 1M test run records written; bucket files ≤ design projections +- `bench_query_single_test_7_days` — query for one test over 7 days in < 100 ms +- `bench_top_flaky_no_bucket_io` — `top_flaky_tests()` involves zero bucket file reads + +--- + +## Documentation + +### Docstrings + +All new public functions and classes must have Google-style docstrings covering: +- One-line summary +- Args (with types) +- Returns +- Raises (if any) +- Example snippet for non-obvious usage + +### `doc/source/working-with-coverage/test-history.rst` (new file) + +Added to the `working-with-coverage/index.rst` toctree. + +Section outline: +1. **Overview** — why binary history, size comparison table +2. **ZIP Members** — table of all new members with purpose +3. **Reading and Writing History** — `add_test_run()`, `query_test_history()`, `get_test_stats()` +4. **Squash Operation** — when to squash, policy options, what changes +5. **Backward Compatibility** — v1/v2 flag, old files remain readable + +### `doc/source/working-with-coverage/testplan.rst` (new file) + +Added to the `working-with-coverage/index.rst` toctree. + +Section outline: +1. **Overview** — testplan concepts, two storage modes (A/B) +2. **Data Model** — `Testplan`, `Testpoint`, `CovergroupEntry` with field descriptions +3. **Embedding a Testplan (Mode A)** — `setTestplan()`, `getTestplan()`, write/read cycle +4. **Standalone Testplan (Mode B)** — `Testplan.load()`, `Testplan.save()`, when to use each mode +5. **OpenTitan Hjson Import** — `import_hjson()` with wildcard substitution examples +6. **Closure Computation** — `compute_closure()`, `stage_gate_status()`, `TPStatus` values +7. **Coverage Per Testpoint** — `build_covergroup_index()`, Report C +8. **Waiver Management** — `WaiverSet` API, `waivers.json` schema +9. **CI/CD Export** — JUnit XML, GitHub Annotations, Summary Markdown +10. **Usage Examples** — full worked example from hjson → closure → JUnit XML + +### `doc/source/reference/formats/ncdb-format.rst` (extend existing) + +Add a new section to the existing NCDB format reference covering the v2 binary history +members: `test_registry.bin`, `test_stats.bin`, `history/NNNNNN.bin`, `history/bucket_index.bin`, +`contrib_index.bin`, `squash_log.bin`. Each member gets a field table and encoding notes. +No new file needed — this is an extension of the existing format reference. + +### `README.md` update + +Add a "Test History & Testplan" section (after the existing format descriptions) pointing to +the published docs and listing key capabilities: +- Binary history store for thousands of regressions +- Per-test flake score, CUSUM change-point detection +- Testplan embedding and closure computation +- Stage gate readiness, confidence-weighted closure +- CI/CD export (JUnit, GitHub Actions) + +--- + +## File Inventory + +### New files + +| File | Phase | Notes | +|------|-------|-------| +| `src/ucis/ncdb/test_registry.py` | 1 | | +| `src/ucis/ncdb/test_stats.py` | 1 | | +| `src/ucis/ncdb/history_buckets.py` | 1 | | +| `src/ucis/ncdb/bucket_index.py` | 1 | | +| `src/ucis/ncdb/contrib_index.py` | 1 | | +| `src/ucis/ncdb/squash_log.py` | 1 | | +| `src/ucis/ncdb/testplan.py` | 2 | | +| `src/ucis/ncdb/testplan_closure.py` | 2 | | +| `src/ucis/ncdb/testplan_hjson.py` | 2 | | +| `src/ucis/ncdb/waivers.py` | 2 | | +| `src/ucis/ncdb/testplan_export.py` | 3 | | +| `src/ucis/ncdb/reports.py` | 3 | | +| `tests/unit/ncdb/test_test_registry.py` | 1 | | +| `tests/unit/ncdb/test_test_stats.py` | 1 | | +| `tests/unit/ncdb/test_history_buckets.py` | 1 | | +| `tests/unit/ncdb/test_bucket_index.py` | 1 | | +| `tests/unit/ncdb/test_contrib_index.py` | 1 | | +| `tests/unit/ncdb/test_squash_log.py` | 1 | | +| `tests/unit/ncdb/test_testplan.py` | 2 | | +| `tests/unit/ncdb/test_testplan_closure.py` | 2 | | +| `tests/unit/ncdb/test_testplan_hjson.py` | 2 | | +| `tests/integration/test_history_workflow.py` | 1 | | +| `tests/integration/test_testplan_workflow.py` | 2 | | +| `tests/integration/test_ci_export.py` | 3 | | +| `doc/source/working-with-coverage/test-history.rst` | 1 | Add to `working-with-coverage/index.rst` toctree | +| `doc/source/working-with-coverage/testplan.rst` | 2 | Add to `working-with-coverage/index.rst` toctree | + +### Modified files + +| File | Phase | Change | +|------|-------|--------| +| `src/ucis/ncdb/constants.py` | 1 | New member constants, version bump, status constants | +| `src/ucis/ncdb/manifest.py` | 1 | `history_format` field | +| `src/ucis/ncdb/ncdb_ucis.py` | 1+2 | Binary history API, testplan API, waivers API | +| `src/ucis/ncdb/ncdb_writer.py` | 1+2 | Write new members, compression tiering | +| `src/ucis/ncdb/ncdb_reader.py` | 1+2 | Read new members, backward compat | +| `src/ucis/ncdb/ncdb_merger.py` | 1+2 | Fix contrib loss bug, merge testplan, merge stats | +| `tests/unit/ncdb/test_merger.py` | 1 | Extend with contrib + testplan merge tests | +| `doc/source/reference/formats/ncdb-format.rst` | 1 | New section for v2 binary history members (field tables) | +| `doc/source/working-with-coverage/index.rst` | 1+2 | Add `test-history` and `testplan` to toctree | +| `README.md` | 2 | New section on history + testplan features | + +--- + +## Implementation Order + +Within each phase, implement in dependency order: + +**Phase 1 sequence:** +1. `constants.py` additions +2. `varint.py` — verify existing varint sufficient (read/review) +3. `test_registry.py` + unit tests +4. `test_stats.py` + unit tests +5. `history_buckets.py` + unit tests +6. `bucket_index.py` + unit tests +7. `contrib_index.py` + unit tests +8. `squash_log.py` + unit tests +9. `manifest.py` update +10. `ncdb_ucis.py` Phase 1 additions +11. `ncdb_writer.py` Phase 1 additions +12. `ncdb_reader.py` Phase 1 additions +13. `ncdb_merger.py` bug fix + Phase 1 additions +14. Integration tests +15. `doc/source/working-with-coverage/test-history.rst` + update `ncdb-format.rst` + update `working-with-coverage/index.rst` + +**Phase 2 sequence:** +1. `testplan.py` + unit tests +2. `testplan_hjson.py` + unit tests +3. `testplan_closure.py` + unit tests +4. `waivers.py` +5. `ncdb_ucis.py` Phase 2 additions +6. `ncdb_writer.py` / `ncdb_reader.py` / `ncdb_merger.py` Phase 2 additions +7. Integration tests +8. `doc/source/working-with-coverage/testplan.rst` + update `working-with-coverage/index.rst` + +**Phase 3 sequence:** +1. P0 reports (A, B, C, D) +2. P1 reports (E, F, G, I) +3. `testplan_export.py` (JUnit, GitHub, markdown) +4. CLI additions +5. `README.md` update +6. P2 reports (H, J, K, L, M) — as time allows + +--- + +## Design Decisions (Resolved) + +| # | Question | Decision | +|---|----------|----------| +| 1 | NCDB_VERSION / v2 opt-in | **Auto-migrate**: calling `add_test_run()` automatically upgrades the manifest to `history_format = "v2"`. No explicit flag needed. Existing v1 databases remain fully readable. | +| 2 | Bucket seal threshold | **Fixed at 10,000 records**. Not configurable for now; revisit if real-world workloads require tuning. | +| 3 | LZMA dependency | **Graceful fallback**: attempt `ZIP_LZMA`; if `liblzma` is unavailable, silently use `ZIP_DEFLATE, level=9`. No error raised, no user action required. | +| 4 | `hjson` package | **Hard dependency**: add `hjson` to `setup.py` install_requires and to `ivpm.yaml` in both `default` and `default-dev` groups. | +| 5 | Report output format | **Both structured and text**: each report function returns a typed dataclass with a `to_json()` method; a companion `format_*()` function renders it to human-readable text. CLI calls the formatter; tests assert on the dataclass. | +| 6 | Waiver expiry enforcement | **Caller's responsibility**: `WaiverSet.matches_scope()` checks scope/bin pattern only. Callers filter on `expires_at` as needed. | + diff --git a/README.md b/README.md index dd3dca8..3fec05c 100644 --- a/README.md +++ b/README.md @@ -258,6 +258,77 @@ NcdbMerger().merge(["run1.cdb", "run2.cdb"], "merged.cdb") | `strings.bin` | Deduplicated string table | | `history.json` | Test/merge history nodes | | `sources.json` | Source file references | +| `v2/test_registry.bin` | Per-test name and seed registry (v2 history) | +| `v2/test_stats.bin` | Welford pass/fail/flake statistics per test (v2 history) | +| `v2/bucket_index.bin` | Time-bucketed history index for fast date-range queries | +| `v2/history/*.bin` | Compressed per-bucket test run records | +| `testplan.json` | Embedded testplan with testpoints and stage assignments | +| `waivers.json` | Glob-pattern coverage waivers with expiry and approver | + +### V2 Binary Test History + +NCDB `v2` history stores per-test run records in time-bucketed binary files, +enabling queries over millions of runs without loading full data. Key APIs: + +```python +from ucis.ncdb.ncdb_ucis import NcdbUCIS + +db = NcdbUCIS("coverage.cdb") + +# Record a test run +db.add_test_run("uart_smoke", seed=12345, status=0 /* pass */) + +# Query recent history for a test +records = db.query_test_history("uart_smoke", ts_from=..., ts_to=...) + +# Get aggregate stats (flake score, CPU mean, CUSUM) +stats = db.get_test_stats("uart_smoke") +print(f"flake={stats.flake_score:.3f}") + +# Top flaky and failing tests +print(db.top_flaky_tests(n=10)) +print(db.top_failing_tests(n=10)) +``` + +CLI equivalents: + +```bash +# Show recent history for a test +pyucis history query coverage.cdb uart_smoke --from 2025-01-01 + +# Show top 10 flaky tests +pyucis history stats coverage.cdb --top-flaky 10 +``` + +### Testplan Embedding + +```python +from ucis.ncdb.testplan_hjson import import_hjson +from ucis.ncdb.testplan_closure import compute_closure +from ucis.ncdb.reports import report_testpoint_closure, format_testpoint_closure + +# Import an OpenTitan-style Hjson testplan +plan = import_hjson("uart.hjson") +db.setTestplan(plan) + +# Compute closure and format a report +results = compute_closure(plan, db) +summary = report_testpoint_closure(results) +print(format_testpoint_closure(summary)) +``` + +CLI equivalents: + +```bash +# Embed a testplan +pyucis testplan import coverage.cdb uart.hjson + +# Compute closure with V2 stage gate +pyucis testplan closure coverage.cdb --stage V2 + +# Export JUnit XML for CI dashboard +pyucis testplan export-junit coverage.cdb --out closure_results.xml +``` ## Documentation diff --git a/TEST_HISTORY_DESIGN.md b/TEST_HISTORY_DESIGN.md new file mode 100644 index 0000000..c3a017f --- /dev/null +++ b/TEST_HISTORY_DESIGN.md @@ -0,0 +1,2261 @@ +# NCDB Test History Design + +## Background and Motivation + +UCIS coverage databases are typically used as snapshots — one file per regression, periodically +merged and squashed. But the underlying NCDB ZIP format can also serve as a long-term store for +test pass/fail history over thousands of runs and millions of test executions, provided the +representation is efficient enough. + +This document covers: +1. Analysis of why `history.json` is unsuitable at scale +2. A complete binary format design for efficient test history storage inside NCDB ZIP files +3. Metrics that can be extracted from history data +4. How coverage-per-test and merge policy interact with the design + +--- + +## Part 1: Why `history.json` Does Not Scale + +### Current Format + +Each test run is stored as a JSON object with ~23 fields in a single monolithic array in +`history.json`. Example compressed size: ~200–500 bytes per entry uncompressed. + +### Problems at Scale + +| Problem | Root Cause | +|---|---| +| ~200–500 bytes/entry uncompressed | 23 JSON field keys repeated every record | +| Full array parse to read anything | No structure within the ZIP entry | +| No time-based filtering | Single monolithic member | +| Full ZIP rewrite to append any data | ZIP format limitation — mitigable but not eliminated | +| No aggregate statistics | Must scan everything to find noisy tests | +| Merge discards `contrib/*.bin` | Bug in `_merge_same_schema`: only copies strings/scope/counts/history/sources | + +### Size Comparison + +| Scenario | `history.json`-in-ZIP | Proposed binary buckets | +|---|---|---| +| 1K tests × 1K runs | ~75 MB | ~5 MB | +| 1K tests × 1M runs | ~75 GB | ~5 GB | +| Read to query 1 test over 7 days | Decompress all | 7 × ~5–10 KB | + +--- + +## Part 2: New ZIP Members + +Four new ZIP entries are added alongside the existing members. Existing members (`history.json`, +`contrib/*.bin`, `counts.bin`, etc.) are retained for backward compatibility and coverage data. + +``` +test_registry.bin ← global: test name ↔ stable integer ID + seed registry +test_stats.bin ← global: per-test aggregate metrics (flake score, CUSUM, etc.) +history/NNNNNN.bin ← one ZIP entry per bounded bucket of test run records +history/bucket_index.bin ← index: maps bucket number → date range, record count, name range +contrib_index.bin ← index: run_id → status, for efficient pass-only merge +squash_log.bin ← audit trail: each squash operation recorded permanently +``` + +`history.json` continues to store MERGE nodes (small, infrequent). TEST nodes move to the +binary bucket files. + +--- + +## Part 3: Binary Format Specifications + +### 3.1 `test_registry.bin` + +Stores each unique test base name and seed exactly once. Assigned stable integer IDs that +persist across ZIP rewrites and merges. Also holds the global `run_id` counter. + +``` +magic: u32 = 0x54535452 # 'TSTR' +version: u8 = 1 +next_run_id: u32 # monotonically increasing, never decreases +num_names: u32 +num_seeds: u32 + +# Fixed-size offset table (O(1) access by name_id): +name_string_offsets: u32[num_names] # byte offset into string heap +seed_string_offsets: u32[num_seeds] # byte offset into seed heap + +# String heaps (null-terminated UTF-8): +name_heap: bytes +seed_heap: bytes +``` + +- Names are stored sorted → binary search gives O(log N) name → name_id lookup +- Seeds with integer values are stored as their decimal string representation +- Seeds that are complex strings (e.g. tool-specific) stored verbatim +- 1000 names × ~30 bytes avg = ~30 KB total (trivially small, load once at open) + +### 3.2 `history/NNNNNN.bin` — Bounded Bucket Files + +Buckets are bounded by record count (~10K records max), not strictly by date. The bucket +sequence number is zero-padded 6 digits. This keeps individual buckets small and decompressible +independently. + +#### Layout: Columnar, Not Row-Oriented + +Records are sorted by `(name_id, ts)` within each bucket. The name_id column is eliminated +from per-record storage by using a name index (which doubles as perfect run-length encoding). + +``` +Header: + magic: u32 = 0x48445942 # 'HDYB' + version: u8 = 1 + num_records: u32 + num_names: u16 # unique name_ids in this bucket + ts_base: u32 # unix timestamp of the first record + +Name index (sorted by name_id — enables O(log N) lookup, eliminates name_id column): + entries[num_names]: + name_id: u32 + start_row: u32 # first record index for this name + count: u16 # number of records for this name + +Seed dictionary (local to this bucket, enables 1-byte seed references): + num_seeds: u16 + seed_ids: u32[num_seeds] # global seed_id from test_registry.bin + +Columns (independent arrays, each compresses optimally under DEFLATE/LZMA): + seeds[]: u8[num_records] # index into seed dictionary (1 byte vs 4) + ts_deltas[]: varint[num_records] # seconds since ts_base, delta per name group + status_flags[]: u8[num_records] # nibble-packed: high nibble=status, low nibble=flags +``` + +The `status_flags` byte packs two fields: +``` + bits [7:4] status: 0=OK 1=FAIL 2=ERROR 3=FATAL 4=COMPILE (3 bits used) + bits [3:0] flags: bit0=seed_is_hash bit1=is_rerun + bit2=has_coverage bit3=was_squashed +``` + +CPU time is intentionally omitted from per-record storage — it is maintained as mean and +variance in `test_stats.bin` via Welford's online algorithm, which is sufficient for all +metrics described in Part 5. + +#### Per-Record Cost (Revised) + +| Field | Old row design | New columnar design | +|---|---|---| +| name_id | 4 bytes | **0** (implicit from name index) | +| seed | 4 bytes | **1** (local dict index, u8) | +| timestamp | 4 bytes | **~1.5** (varint delta) | +| status + flags | 2 bytes | **1** (nibble-packed) | +| cpu_time (f16) | 2 bytes | **0** (moved to test_stats.bin) | +| padding | 1 byte | **0** | +| **Total** | **~16 bytes** | **~3.5 bytes avg** | + +Before DEFLATE. Columnar layout with homogeneous columns achieves 5–8× DEFLATE compression +on typical regression data (compared to ~3× for interleaved row layout). Effective storage: +~0.5–0.7 bytes per test run record. + +#### Compression Tiers + +- **Current day's bucket** (may be rewritten): `ZIP_DEFLATE, compresslevel=1` — fast write +- **Sealed buckets** (day has passed, immutable): `ZIP_LZMA` or `ZIP_DEFLATE, compresslevel=9` +- **`test_stats.bin`, `test_registry.bin`** (read on every open): `ZIP_DEFLATE, compresslevel=1` + +### 3.3 `history/bucket_index.bin` + +Maps bucket sequence numbers to date ranges and provides aggregate counts for fast +regression-trend queries without opening individual bucket files. + +``` +magic: u32 = 0x42494458 # 'BIDX' +version: u8 +num_buckets: u32 + +entries[num_buckets]: # sorted by bucket_seq + bucket_seq: u32 # matches NNNNNN in filename + ts_start: u32 # unix timestamp of first record + ts_end: u32 # unix timestamp of last record + num_records: u32 + fail_count: u32 # enables pass-rate-over-time without opening bucket + min_name_id: u32 # range bounds for fast skip + max_name_id: u32 +``` + +24 bytes/entry. For 3650 days (10 years) at ~10K records/bucket: +- ~3650 buckets × 24 bytes = **87 KB** for the complete 10-year index +- The `fail_count` field enables regression pass-rate trend plots from the index alone + +### 3.4 `test_stats.bin` + +One fixed-size 64-byte record per unique test, indexed by `name_id`. Load entire file at open +time (1000 tests × 64 bytes = 64 KB). Enables all aggregate queries without touching buckets. + +All fields maintained incrementally — O(1) update per new test run. + +``` +magic: u32 = 0x54535441 # 'TSTA' +version: u8 +num_tests: u32 + +entries[num_tests]: # indexed by name_id (O(1) access) + total_runs: u32 + pass_count: u32 + fail_count: u32 + error_count: u32 + first_ts: u32 # unix timestamp of first ever run + last_ts: u32 # unix timestamp of most recent run + last_green_ts: u32 # unix timestamp of last passing run + transition_count: u32 # consecutive status changes (for flake_score) + streak: i16 # current streak: positive=passes, negative=fails + last_status: u8 # status of most recent run + _pad: u8 + flake_score: f32 # transition_count / max(total_runs-1, 1) ∈ [0,1] + fail_rate: f32 # fail_count / total_runs ∈ [0,1] + mean_cpu_time: f32 # Welford online mean (seconds) + m2_cpu_time: f32 # Welford M2 accumulator → stddev = sqrt(M2/N) + cusum_value: f32 # running CUSUM statistic for change detection + cusum_ref_mean: f32 # μ₀ used for CUSUM (set at baseline period) + grade_score: f32 # composite effectiveness score [0,1] + total_seeds_seen: u16 # unique seeds ever run for this test + _reserved: u8[6] +``` + +Key derived values: +- `stddev_cpu_time = sqrt(m2_cpu_time / total_runs)` — no bucket scan needed +- `days_since_last_pass = (now - last_green_ts) / 86400` +- `streak < -5` → definitively broken (not just flaky) +- `abs(streak) < 3 AND flake_score > 0.3` → likely flaky + +### 3.5 `contrib_index.bin` — Pass-Only Merge Support + +This is the pivotal addition for coverage-per-test efficiency. Every test run that produced +coverage data has an entry here. Status is cached so pass-only merge decisions require no +bucket scanning. + +``` +magic: u32 = 0x43494458 # 'CIDX' +version: u8 +merge_policy: u8 # 0=all_tests 1=pass_only 2=exclude_error_and_rerun +squash_watermark: u32 # highest run_id already baked into counts.bin +num_active: u32 # contrib files present (not yet squashed) + +entries[num_active]: # sorted by run_id + run_id: u32 + name_id: u16 # cached for display without hitting bucket + status: u8 # cached — avoids opening bucket for merge decision + flags: u8 # bit0=is_rerun bit1=first_attempt_passed +``` + +8 bytes/entry. Pass-only merge: +```python +passing = [e.run_id for e in contrib_index.entries if e.status == OK] +counts = sum(load_contrib(f"contrib/{run_id}.bin") for run_id in passing) +``` + +### 3.6 `squash_log.bin` — Coverage Provenance Audit Trail + +Append-only log. Survives squash operations permanently. Answers "was my counts.bin built from +passing tests only?" even years after the fact. + +``` +magic: u32 +version: u8 +num_squashes: u32 + +entries[num_squashes]: + ts: u32 # unix timestamp of squash + policy: u8 # 0=all 1=pass_only 2=exclude_error_and_rerun + _pad: u8[3] + from_run: u32 # first run_id included in squash + to_run: u32 # new squash_watermark after this operation + num_runs: u32 # total runs included + pass_runs: u32 # passing runs included in counts.bin contribution +``` + +28 bytes/squash event. + +--- + +## Part 4: Coverage-Per-Test Interaction + +### 4.1 Stable `run_id` Replaces Positional `history_idx` + +**Current bug**: `contrib/{history_idx}.bin` uses position in `history.json` as key. After a +merge of two sources (each with `contrib/0.bin`, `contrib/1.bin`, ...), filenames collide and +the merger silently drops all contrib data. + +**Fix**: each test run is assigned a globally unique `run_id` (u32) from the counter in +`test_registry.bin` at write time. Contrib files become `contrib/{run_id}.bin`. The run_id is +stable across ZIP rewrites, merges, and squash operations. + +### 4.2 Coverage Watermark Model + +At any point in time, total coverage is: + +``` +total_coverage = counts.bin (squashed base) + + Σ contrib/{run_id}.bin (active delta) + for run_id in contrib_index.entries + where merge_policy_filter(entry.status, entry.flags) +``` + +`squash_watermark` in `contrib_index.bin` defines the boundary. Run IDs ≤ watermark are baked +into `counts.bin`; run IDs > watermark have their contrib files present. + +### 4.3 Merge Policy Options + +The `flags` byte in `contrib_index.bin` entries enables four distinct merge policies without +re-scanning bucket files: + +| Policy | Filter | +|---|---| +| All tests | no filter | +| Pass only (any attempt) | `status == OK` | +| Pass on first attempt only | `status == OK AND NOT is_rerun` | +| Strict (exclude flaky contributions) | `status == OK AND NOT (is_rerun AND first_attempt_passed)` | + +The last policy ("strict") excludes coverage from tests that only pass on retry — coverage +that cannot be reliably reproduced and may indicate environmental flakiness rather than real +design behavior. + +### 4.4 Fixed Same-Schema Fast Merge Path + +`NcdbMerger._merge_same_schema()` currently discards all `contrib/*.bin` data. It must be +updated to: + +1. Assign run_id offsets to each source: + - `offset_B = max(run_id in source_A) + 1` + - `offset_C = max(run_id in source_B) + 1`, etc. + +2. Copy and rename contrib files: `contrib/{source_run_id}.bin` → `contrib/{source_run_id + offset}.bin` + +3. Merge `contrib_index.bin` entries from all sources (adjust run_ids by offset, re-sort) + +4. Append `squash_log.bin` entries from all sources (no run_id adjustment needed) + +5. Sum counts arrays for the merged `counts.bin` + +This changes the fast path from O(bins) to O(bins + total_contrib_data). For large merges, +squash sources first (bake their contribs into counts.bin) before merging — which is the +correct operational model for a coverage closure flow anyway. + +### 4.5 Squash Operation + +When squashing coverage: + +1. Read `contrib_index.bin` for active entries (run_ids > squash_watermark) +2. Apply merge policy filter +3. Sum selected `contrib/{run_id}.bin` files into `counts.bin` +4. Delete the contrib files for squashed run_ids +5. Update `squash_watermark` in `contrib_index.bin` +6. Remove squashed entries from `contrib_index.bin` +7. Append a record to `squash_log.bin` +8. Mark `was_squashed=1` in the corresponding bucket record flags + +Test history bucket records are **never modified** during squash (only the `was_squashed` flag +is set). Bucket files themselves are immutable once sealed. + +--- + +## Part 5: Metrics Extractable from History Data + +### 5.1 Instantaneous Metrics (from `test_stats.bin` only — no bucket scan) + +All O(1) or O(N_tests) with a single file read: + +- **Flake score**: `transition_count / max(total_runs-1, 1)` ∈ [0,1] + - 0.0 = completely stable; 1.0 = alternates every single run + - Distinguishes noisy from broken (a broken test has `flake_score ≈ 0` despite `fail_rate ≈ 1`) +- **Fail rate**: `fail_count / total_runs` +- **Current streak**: `streak` field — negative = consecutive failures, positive = consecutive passes +- **Days since last pass**: `(now - last_green_ts) / 86400` +- **CPU time mean and stddev**: from Welford fields (no raw data needed) +- **Silent death**: `last_ts` is stale despite test being in the suite +- **Test re-introduction**: `first_ts` is recent for a known-old test name +- **Top N flakiest tests**: sort by `flake_score DESC` — no bucket scan +- **Top N consistently failing tests**: filter `fail_rate > threshold AND flake_score < 0.1` +- **Composite test grade**: `(1 - fail_rate) × (1 - flake_score) × (1 / mean_cpu_time_normalized)` +- **CPU time regression**: `mean_cpu_time` trending up week-over-week (compare saved baselines) + +### 5.2 Trend Metrics (from `bucket_index.bin` only — no bucket decompression) + +From the 24-byte per-bucket index entries: + +- **Regression pass rate over time**: `(num_records - fail_count) / num_records` per bucket +- **Run volume per day**: `num_records` per bucket → detect farm capacity changes +- **Failure spike detection**: buckets where `fail_count / num_records > threshold` + +### 5.3 Historical Detail Metrics (from bucket files — targeted reads) + +For a specific test X in a date range: +1. Get `name_id` from `test_registry.bin` +2. Use `bucket_index.bin` to find buckets where `min_name_id ≤ name_id ≤ max_name_id` +3. For each candidate bucket, binary-search the name index → O(log N_unique_tests) +4. Extract only the records for that test + +Metrics enabled: +- **Pass/fail history timeline**: full status over time for one test +- **Fail streak history**: detect multiple distinct failure episodes +- **Seed-correlated failures**: group by `seed_id`, compute `fail_count / total` per seed + - Seeds with 100% failure rate = deterministic RTL bug masquerading as random failure +- **Seed diversity**: entropy over seed→status distribution; low entropy = poor randomization +- **Rerun effectiveness**: `P(pass | is_rerun AND prior_status == FAIL)` — infrastructure flakiness signal + +### 5.4 Cross-Test Pattern Metrics (from bucket files — multi-test scan) + +Reading a single bucket (one day or one regression): + +- **Killer seeds**: seeds where `count(failing_tests) > threshold` in one bucket + - `GROUP BY seed_id → set of failing name_ids → find recurring clusters` + - Indicates a systemic RTL issue (deadlock, resource contention at a specific init value) +- **Failure co-occurrence**: `P(test_B fails | test_A fails in same bucket)` + - High co-occurrence → tests hit same RTL block → redundancy or common bug +- **Cascade detection**: temporal causality — does failing test A precede failing test B? +- **Redundant test candidates**: pairs with `correlation(status_A, status_B) > 0.95` + - Both always pass and fail together; one adds no value + +### 5.5 CUSUM Change-Point Detection + +The `cusum_value` and `cusum_ref_mean` in `test_stats.bin` implement an incremental CUSUM +(Cumulative Sum) control chart for detecting when a test's pass/fail behavior changed. This +is the algorithm used by Atlassian's "Flakinator" and Google's flaky-test detection systems. + +Update rule on each new run (O(1)): +```python +k = 0.5 # allowance parameter +h = 4.0 # decision threshold (tune to desired sensitivity) +x = 1.0 if status == FAIL else 0.0 +S = max(0, S + x - (cusum_ref_mean + k)) +if S > h: + # change point detected — record timestamp, reset S + S = 0.0 +``` + +When a change point is detected, the timestamp is recorded so you can correlate with RTL +commits: "test X started failing consistently on 2026-03-01." + +### 5.6 EDA-Specific: Seed Analytics + +Unique to hardware verification — no software CI tool provides this: + +- **Valuable seed ranking**: seeds that historically expose the most failures first + - Re-run high-value seeds more frequently; Springer "Seed Selector" paper shows 42%+ speedup +- **Seed fatigue**: `fail_count_per_seed` approaching zero with recency weighting + - Seeds that never fail anymore are candidates for replacement +- **Seed coverage diversity**: entropy of the seed→status distribution per test + - Low entropy = seeds are not actually exploring different design states + +--- + +## Part 6: Read/Write Strategy Summary + +### Opening the Database (cold start) + +1. Read `test_registry.bin` (~30 KB) → in-memory name↔id dict +2. Read `test_stats.bin` (~64 KB for 1000 tests) → all aggregate metrics immediately available +3. Read `contrib_index.bin` → pass-only merge table available +4. Read `history/bucket_index.bin` (~90 KB for 10 years) → full time index available + +Total cold-start I/O: ~200 KB. All aggregate queries answerable immediately. + +### Writing a New Test Run + +1. Assign `run_id` (increment counter in `test_registry.bin`) +2. Look up or assign `name_id` and `seed_id` +3. Append record to current day's bucket (in memory; written at close) +4. Update `test_stats.bin` entry: O(1) Welford + CUSUM update +5. If coverage: add entry to `contrib_index.bin` + +### Writing at Close (full ZIP rewrite) + +1. Copy all sealed bucket files verbatim (read compressed bytes, write without re-compression) +2. Write current day's bucket (new or updated) +3. Write updated `bucket_index.bin`, `test_registry.bin`, `test_stats.bin`, `contrib_index.bin` +4. Write `squash_log.bin` (unchanged if no squash happened) +5. Write all existing `contrib/*.bin` files (only active ones — not squashed) + +### Query: "All runs of test X, last 7 days" + +1. Get `name_id` for X (from in-memory registry) +2. Scan `bucket_index.bin` for buckets where `ts_start ≥ 7_days_ago AND min_name_id ≤ name_id ≤ max_name_id` +3. For each candidate bucket (~7): decompress, binary-search name index, extract records +4. Total I/O: ~7 × 5–10 KB = **35–70 KB** regardless of total history size + +### Query: "Top 20 flakiest tests" + +1. Scan `test_stats.bin` (already loaded) +2. Sort by `flake_score DESC`, take top 20 +3. Map `name_id → name` via registry +4. **Zero bucket I/O** + +--- + +## Part 7: Files to Create/Modify for Implementation + +### New Files + +| File | Purpose | +|---|---| +| `src/ucis/ncdb/test_registry.py` | Serialize/deserialize `test_registry.bin`; assign run_ids, name_ids, seed_ids | +| `src/ucis/ncdb/test_stats.py` | Serialize/deserialize `test_stats.bin`; Welford + CUSUM incremental update | +| `src/ucis/ncdb/history_buckets.py` | Write/read columnar bucket files; name index; seed dict | +| `src/ucis/ncdb/bucket_index.py` | Write/read `history/bucket_index.bin` | +| `src/ucis/ncdb/contrib_index.py` | Write/read `contrib_index.bin`; pass-only filter enumeration | +| `src/ucis/ncdb/squash_log.py` | Write/read `squash_log.bin`; squash operation implementation | + +### Modified Files + +| File | Change | +|---|---| +| `src/ucis/ncdb/constants.py` | Add new member name constants; bump `NCDB_VERSION` to `"2.0"` | +| `src/ucis/ncdb/ncdb_writer.py` | Write new members; compression tiering; sealed-bucket copy optimization | +| `src/ucis/ncdb/ncdb_reader.py` | Read new members; fall back to `history.json` for MERGE nodes | +| `src/ucis/ncdb/ncdb_merger.py` | Fix contrib data loss bug; run_id renumbering; contrib_index merge | +| `src/ucis/ncdb/manifest.py` | Add `history_format` field to distinguish v1 (JSON) from v2 (binary) | + +### Backward Compatibility + +- `manifest.json` gains a `history_format` field: `"v1"` (JSON only) or `"v2"` (binary + JSON for MERGE nodes) +- Reader checks `history_format` and falls back to `history.json` for old files +- Old files without binary bucket members are fully readable; new features simply unavailable +- `history.json` continues to be written for MERGE nodes in all versions + +--- + +## Part 8: Size Projections + +At 1K tests × 10 runs/test/day = 10K records/day: + +| Component | Size/day (raw) | Compressed (LZMA) | 10-year total | +|---|---|---|---| +| Bucket files | ~35 KB | ~5 KB | ~18 MB | +| bucket_index.bin | 24 bytes/bucket | — | ~90 KB | +| test_stats.bin | 64 KB (static) | ~20 KB | 20 KB | +| test_registry.bin | ~30 KB (static) | ~10 KB | 10 KB | +| contrib_index.bin | ~8 bytes/run | — | ~3 MB (for 400K active) | +| squash_log.bin | 28 bytes/squash | — | ~100 KB (1K squashes) | +| **Total** | | | **~21 MB** | + +Compare to `history.json`: **~75 GB** for the same data. Approximately **3500× more +space-efficient**. + +--- + +## Part 9: Testplan Integration — Mapping Issues and End-of-Regression Reports + +### 9.1 OpenTitan Testplan Format Summary + +The OpenTitan `testplanner` tool uses Hjson files with two top-level collections: + +- **`testpoints`**: each has `name`, `stage` (V1/V2/V2S/V3), `desc`, `tests` (list of written + test names), and optional `tags`. +- **`covergroups`**: each has `name` and `desc`, declaring the functional coverage groups + expected to be exercised. + +Testplans support `import_testplans` for shared plans with wildcard substitution (e.g. +`{name}_csr_hw_reset` expands per DUT). Setting `tests: ["N/A"]` marks a testpoint as +intentionally not mapped to simulation results. + +A more detailed analysis is in `TESTPLAN_ANALYSIS_REPORT.md`. + +--- + +### 9.2 Mapping Issues: OpenTitan Testplan Format → UCIS + +#### Issue 1: No Native Testplan/Testpoint Hierarchy in UCIS + +UCIS defines exactly two history node types (`UCIS_HISTORYNODE_TEST` and +`UCIS_HISTORYNODE_MERGE`). There is no `UCIS_HISTORYNODE_TESTPLAN` or testpoint scope type. +The UCIS LRM glossary references a "verification plan hierarchy" but this concept is not +realized in the standard API — it amounts to using UCIS tags to link coverage scopes back to +an external plan. The testplan must therefore be stored **outside** UCIS (as an Hjson/JSON +sidecar or in a dedicated ZIP member) and joined to UCIS data at query time. + +#### Issue 2: Verification Stage Has No UCIS Equivalent + +OpenTitan's `stage` field (`V1`/`V2`/`V2S`/`V3`) encodes the verification lifecycle milestone +a testpoint targets. UCIS has no such concept. `UCIS_INT_TEST_COMPULSORY` is the closest +analog — a boolean "must run" flag on individual test records — but it does not convey staged +milestone semantics and applies to tests, not testpoints. + +Stage data must be stored in the testplan database (ZIP member or sidecar) and treated as an +external grouping key when producing reports. + +#### Issue 3: Test-Name Matching is Implicit and Fragile + +The binding between a testplan `tests` list entry (e.g. `"uart_smoke"`) and a UCIS history +node is by string match: the testplan test name must equal the logical name of the +`UCIS_HISTORYNODE_TEST` node (`UCIS_STR_TEST_NAME`). This convention is not enforced by UCIS. + +Failure modes: +- The UCIS test name includes a seed suffix (`uart_smoke_12345`) while the testplan uses the + bare name. +- Tool-specific prefixes or path components are added to the UCIS logical name. +- After squash, individual test history nodes may be absent from a merged database; only + aggregate coverage remains. + +**Recommended approach**: normalize test names by stripping known suffixes (seed, run index, +timestamp) at UCIS write time, or store the canonical testplan name as a user-defined +attribute `testplan:name` on the history node. + +#### Issue 4: M:N Testpoint-to-Test Mapping + +A single testpoint can map to multiple written tests; conversely, one written test can satisfy +multiple testpoints (the testplanner tool does not enforce 1:1 mapping). UCIS history nodes +are flat — there is no grouping structure to express "this test runs for this testpoint." + +This mapping must be maintained in the testplan database and resolved at report time, not +inside UCIS. + +#### Issue 5: Tag Semantics Mismatch + +UCIS tags are plain strings with no associated value, intended as a grouping construct (e.g. +linking coverage scopes to plan items via a shared tag name). OpenTitan testplan tags carry +richer meaning: platform (`verilator`, `fpga_cw310`), mode (`gls`, `pa`, `rom`), or lifecycle +(`vector`). These are filter dimensions used to select which testpoints appear in a run. + +UCIS tags cannot represent this; they would need to be stored as user-defined attributes on +the history nodes (`testplan:tag:gls = true`) or kept entirely in the testplan sidecar. + +#### Issue 6: `tests: ["N/A"]` Has No UCIS Equivalent + +A testpoint with `tests: ["N/A"]` is defined in the plan but intentionally has no simulation +coverage. UCIS has no concept of a "planned but unverifiable" entry. The testplan layer must +track `N/A` testpoints and exclude them from closure calculations, not from UCIS. + +#### Issue 7: Covergroup Name Correlation Relies on Naming Convention + +Testplan `covergroups` entries list the functional coverage groups expected to be exercised +by testpoints. In UCIS, covergroups are scope nodes in the design hierarchy. Matching a +testplan covergroup name (`timer_cg`) to a UCIS scope requires an agreed naming convention +(the UCIS scope name equals the SV covergroup name suffixed with `_cg`). No cross-file +uniqueness guarantee exists when designs are large or when multiple DUTs share covergroup +names. + +#### Issue 8: Wildcard Expansion is Ephemeral + +Imported testplans use substitution wildcards (e.g. `{name}{intf}_csr_hw_reset`). After +parsing, expanded test names exist only in memory; the unexpanded template is what's stored +in the Hjson file. UCIS has no awareness of this expansion. Any database that stores testplan +data must store the **post-expansion** test name list alongside the source template, so that +a query tool can reconstruct both the human-readable template and the runnable test names. + +#### Issue 9: Merged Database Loses Individual Test Records + +After a UCIS merge + squash, individual `UCIS_HISTORYNODE_TEST` nodes for squashed tests may +be absent from the merged database — only aggregate `counts.bin` coverage survives. This +means testpoint-level pass/fail status cannot be reconstructed from the merged UCIS database +alone. This is precisely why the binary history store (Parts 2–6 of this document) is +needed: it preserves per-test status even after coverage squash. + +--- + +### 9.3 Testplan Storage Modes + +Two storage modes are supported. They use the same JSON schema and the same +`Testplan` data model; the choice affects only where the file lives. + +#### Mode A — Embedded (testplan stored inside the NCDB ZIP) + +Add a `testplan.json` member to the NCDB ZIP file: + +``` +testplan.json ← testplan snapshot stored with the database at import time +``` + +Best when the testplan is stable for the duration of the regression and should +travel with the coverage database (the most common case). + +#### Mode B — Standalone (testplan kept as a separate file) + +The testplan is maintained as a standalone `testplan.json` file on disk (or in a +source-control tree) and is **not** embedded in the NCDB. At analysis time the +user points tooling at both files: + +``` +uart_testplan.json ← standalone testplan snapshot +regression.cdb ← NCDB with no embedded testplan +``` + +This mode is preferred when: +- The testplan file is version-controlled separately from the regression database + (e.g. testplan lives in the RTL repo; CDB is produced by CI and stored in + artifact storage). +- You want to perform ad-hoc cross-analysis between an existing NCDB and a + testplan that was never embedded (e.g. retro-fitting plan coverage onto legacy + databases). +- Different testplan revisions must be compared against the same NCDB without + re-generating the database. + +`compute_closure()`, `stage_gate_status()`, and all report generators accept a +`Testplan` object regardless of whether it was loaded from an embedded ZIP member +or a standalone file — the API is identical in both modes. + +--- + +Contents of `testplan.json` (stored post-expansion, with all imports resolved): + +```json +{ + "format_version": 1, + "source_file": "hw/ip/uart/data/uart_testplan.hjson", + "import_timestamp": "2026-03-05T19:00:00Z", + "testpoints": [ + { + "name": "smoke", + "stage": "V1", + "desc": "Basic smoke test ...", + "tests": ["uart_smoke"], + "tags": [], + "na": false + }, + { + "name": "csr", + "stage": "V1", + "desc": "CSR tests ...", + "tests": ["uart_csr_hw_reset", "uart_jtag_csr_hw_reset"], + "tags": ["csr"], + "na": false, + "source_template": "{name}{intf}_csr_hw_reset" + } + ], + "covergroups": [ + { "name": "timer_cg", "desc": "Cover timer inputs ..." } + ] +} +``` + +This snapshot approach means: +- The testplan in force when data was collected is always available alongside the data. +- Plan evolution (adding/removing testpoints, stage changes) is tracked naturally as part of + the database's commit history. +- The post-expansion `tests` list is the authoritative source for UCIS test-name matching. + +In Mode B (standalone), the same JSON schema is used; the file simply lives outside the ZIP. +Use `Testplan.load(path)` to read it and pass the resulting object directly to +`compute_closure()` alongside any NCDB opened with `NcdbUCIS`. + +--- + +### 9.4 End-of-Regression Reports + +Given the combination of: +- **Testplan** (`testplan.json`): testpoints, stages, test lists, covergroups +- **Latest regression UCIS** (current NCDB): per-test pass/fail, per-covergroup coverage +- **Historical UCIS** (merged NCDB with binary history): trends, flake scores, stage + progression over time + +the following report types offer high value: + +#### Report A: Testpoint Closure Summary (per-regression) + +For each testpoint, derive a status from the union of its mapped tests' UCIS results: + +| Status | Condition | +|---|---| +| `CLOSED` | All mapped tests passed in this regression | +| `PARTIAL` | At least one mapped test passed, at least one failed | +| `FAILING` | All mapped tests ran and failed | +| `NOT RUN` | No mapped test appears in this regression's history nodes | +| `N/A` | Testpoint is marked `na: true` | +| `UNIMPLEMENTED` | `tests` list is empty | + +Roll up by `stage` to show: + +``` +Stage V1: 12/12 closed (100%) +Stage V2: 17/24 closed (71%) ← 5 FAILING, 2 NOT RUN +Stage V2S: 3/6 closed (50%) +Stage V3: 0/4 closed (0%) ← all UNIMPLEMENTED +``` + +#### Report B: Stage Gate Readiness + +Gate condition: all testpoints at stage S and all stages below S must be CLOSED before a +milestone sign-off. Report signals go/no-go per stage: + +``` +V1 GATE: ✅ PASS (12/12 closed) +V2 GATE: ❌ FAIL — 7 gaps remaining +V2S GATE: ❌ FAIL — requires V2 first +V3 GATE: ❌ FAIL — requires V2S first +``` + +Optionally, show the "critical path" — the failing testpoints that block the earliest gate. + +#### Report C: Coverage Closure per Testpoint + +For testpoints that declare associated covergroups, report UCIS coverage percentages: + +| Testpoint | Stage | Coverage Groups | Coverage % | Status | +|---|---|---|---|---| +| `smoke` | V1 | `timer_cg` | 100% | CLOSED | +| `modes` | V2 | `modes_cg`, `key_cg` | 73% / 45% | GAP | +| `errors` | V2 | `err_cg` | 0% | NOT STARTED | + +This links the test-pass view (Report A) with coverage closure: a test can pass without +achieving functional coverage goals if the covergroup is under-constrained. + +#### Report D: Regression Delta (Latest vs Previous) + +Compare testpoint status between the current regression and the immediately prior regression +(or any named baseline stored in the history buckets): + +- **Newly CLOSED**: testpoints that passed this run but failed last run → progress +- **Newly FAILING**: testpoints that passed before but fail now → regressions requiring triage +- **Coverage delta**: covergroups that crossed a goal threshold (e.g. 90% → 100%) or regressed + +This is the primary report for the engineer reviewing a nightly regression: focus on what +changed, not the static state. + +#### Report E: Historical Stage Progression + +Using the merged historical NCDB and the versioned testplan: + +- Plot testpoint closure rate over time for each stage (V1, V2, V2S, V3) +- Mark the date when each stage gate was first fully closed +- Identify periods of regression (closure rate dropped) + +``` +V1 Closure over time: +100% ┤ ╭──────────────── V1 milestone (2026-01-15) + 80% ┤ ╭──────────╯ + 60% ┤ ╭──────────╯ + 40% ┤╭────────╯ + 0% ┼──────────────────────────────────────────────────────► + Jan 1 Jan 8 Jan 15 Jan 22 +``` + +#### Report F: Testpoint Reliability (History-Augmented) + +For testpoints whose tests have poor historical reliability (high `flake_score` or low +`grade_score` from `test_stats.bin`), flag that closure claims are less trustworthy: + +| Testpoint | Stage | Tests | Flake Score | Closure Confidence | +|---|---|---|---|---| +| `smoke` | V1 | `uart_smoke` | 0.02 | HIGH | +| `timeout` | V2 | `uart_timeout` | 0.41 | LOW ⚠️ | + +A testpoint is considered "confidently closed" only when its tests consistently pass across +multiple seeds and runs (low flake, long green streak). A single passing run with `flake_score +> 0.3` should not be counted as closure. + +#### Report G: Unexercised Coverage Groups + +From the testplan `covergroups` list, identify: + +- Covergroups with zero UCIS hits in the latest regression: unreached design states +- Covergroups with hits below the SV `at_least` goal: partially covered +- Covergroups not present in the UCIS database at all: testbench may not yet implement them + +#### Report H: Test Budget by Stage + +From CPU time data in `test_stats.bin` (mean CPU time × runs): + +| Stage | Testpoints | Tests | Est. CPU Hours | % of Total Budget | +|---|---|---|---|---| +| V1 | 12 | 8 | 14 h | 12% | +| V2 | 24 | 31 | 87 h | 73% | +| V2S | 6 | 9 | 14 h | 12% | +| V3 | 4 | 0 | — | — | + +This identifies which verification stages dominate simulation cost and allows informed +decisions about regression time allocation or test pruning. + +--- + +### 9.5 Implementation Notes + +- **Testplan data is stored in the NCDB ZIP** as `testplan.json`. It is read-only after + import; a new import (with updated testplan) creates a new snapshot. The previous snapshot + is retained for delta comparison. + +- **The UCIS join key** is `test_name` (testplan `tests` list entry) ↔ UCIS logical history + node name (`UCIS_STR_TEST_NAME`). If exact matching fails, a fallback stripping seed + suffixes (`_\d+$`) is applied. Failures to match are reported as `NOT RUN`. + +- **Stage data is not in UCIS**: stage-gated reports are produced by the report layer joining + `testplan.json` data with UCIS query results. No UCIS schema changes are required. + +- **Covergroup matching** uses the testplan covergroup `name` field matched against UCIS + scope `name` within the DUT instance hierarchy. Ambiguous matches (same name in multiple + instances) are resolved by DUT-level scope path if available. + +- **Historical data sourcing**: Report E (Stage Progression) requires the merged NCDB with + bucket history. The report framework should detect whether the binary history store (Part 2) + is present and fall back to `history.json` for databases that have not been upgraded. + +- **Standalone testplan mode**: `compute_closure()` and all report generators accept a + `Testplan` loaded from a standalone file (`Testplan.load(path)`) in exactly the same way + they accept one retrieved from an embedded ZIP member (`db.getTestplan()`). No API + difference exists between the two modes. This enables cross-analysis workflows such as + applying a new testplan revision against an already-built NCDB, or retroactively mapping + a testplan against legacy databases that pre-date testplan embedding. + +--- + +## Part 10: PyUCIS-Native Testplan Embedding in NCDB + +### 10.1 Design Principles + +The embedding follows the established NCDB member pattern: a single new ZIP member +(`testplan.json`) with a dedicated `TestplanReader`/`TestplanWriter` pair, lazily loaded +through `NcdbUCIS`, and written through `NcdbWriter`. No changes to the UCIS standard +interface (`ucis.py`) are needed — all testplan API is an NCDB extension. + +Two usage modes are explicitly supported: + +**Mode A — Embedded**: the testplan is stored as `testplan.json` inside the NCDB ZIP and +retrieved via `db.getTestplan()`. The plan travels with the database. + +**Mode B — Standalone**: the testplan is kept as a separate `testplan.json` file and loaded +directly with `Testplan.load(path)`. Analysis functions (`compute_closure()`, +`stage_gate_status()`, report generators) accept a `Testplan` object and a UCIS database +object as independent arguments, so both modes use identical downstream code. + +Design constraints: +- The UCIS API (`db.historyNodes()`, `db.scopes()`, etc.) is unchanged. +- Testplan data does not pollute history nodes or the scope tree. +- Opening a database without a testplan has zero overhead. +- All testplan operations are O(1) after cold-start load (~1 ms). +- The ZIP member is omitted if no testplan was ever set (sparse, like `toggle.bin`). + +--- + +### 10.2 Python Data Model + +New file: **`src/ucis/ncdb/testplan.py`** + +```python +from __future__ import annotations +import json, re +from dataclasses import dataclass, field +from datetime import datetime, timezone +from typing import Optional + + +@dataclass +class CovergroupEntry: + name: str # covergroup name; must match SV covergroup scope name + desc: str = "" + + +@dataclass +class Testpoint: + name: str # testpoint identifier (snake_case) + stage: str # "V1" | "V2" | "V2S" | "V3" | custom + desc: str = "" + tests: list[str] = field(default_factory=list) # post-expansion test names + tags: list[str] = field(default_factory=list) + na: bool = False # tests: ["N/A"] — intentionally unmapped + source_template: str = "" # original wildcard template before expansion + + +@dataclass +class Testplan: + format_version: int = 1 + source_file: str = "" # path to source .hjson (informational) + import_timestamp: str = "" # ISO-8601 UTC when embedded in the CDB + + testpoints: list[Testpoint] = field(default_factory=list) + covergroups: list[CovergroupEntry] = field(default_factory=list) + + # ── In-memory indices (built lazily by _build_indices()) ────────────── + + _tp_by_name: dict = field(default_factory=dict, repr=False, compare=False) + _tp_by_test: dict = field(default_factory=dict, repr=False, compare=False) + _tp_by_cg: dict = field(default_factory=dict, repr=False, compare=False) + _indexed: bool = field(default=False, repr=False, compare=False) + + # ── Index building ──────────────────────────────────────────────────── + + def _build_indices(self) -> None: + """Build O(1) lookup tables from the testpoints list. Called lazily.""" + self._tp_by_name.clear() + self._tp_by_test.clear() + self._tp_by_cg.clear() + for tp in self.testpoints: + self._tp_by_name[tp.name] = tp + for t in tp.tests: + self._tp_by_test[t] = tp + for cg in self.covergroups: + # Map the covergroup back to every testpoint that owns it + # (Covergroups are listed per-testplan, not per-testpoint in OpenTitan format, + # but the testpoints may reference them by naming convention.) + pass + self._indexed = True + + def _ensure_indexed(self) -> None: + if not self._indexed: + self._build_indices() + + # ── Public query API ────────────────────────────────────────────────── + + def getTestpoint(self, name: str) -> Optional[Testpoint]: + """Return the testpoint with this name, or None.""" + self._ensure_indexed() + return self._tp_by_name.get(name) + + def testpointForTest(self, test_name: str) -> Optional[Testpoint]: + """Return the testpoint that owns *test_name*. + + Match order: + 1. Exact match: test_name in testpoint.tests + 2. Seed-suffix strip: strip trailing ``_\\d+`` and retry + 3. Wildcard: testpoint.tests entry ending ``_*`` prefix-matches test_name + """ + self._ensure_indexed() + tp = self._tp_by_test.get(test_name) + if tp is not None: + return tp + # Strategy 2: strip seed suffix (e.g. "uart_smoke_12345" → "uart_smoke") + stripped = re.sub(r'_\d+$', '', test_name) + if stripped != test_name: + tp = self._tp_by_test.get(stripped) + if tp is not None: + return tp + # Strategy 3: wildcard entries ("foo_*" matches "foo_bar") + for pattern, tp in self._tp_by_test.items(): + if pattern.endswith('_*') and test_name.startswith(pattern[:-1]): + return tp + return None + + def testpointsForStage(self, stage: str) -> list[Testpoint]: + """Return all testpoints targeting *stage* (e.g. "V2").""" + return [tp for tp in self.testpoints if tp.stage == stage] + + def stages(self) -> list[str]: + """Return the ordered unique stages present in the testplan.""" + _ORDER = {"V1": 0, "V2": 1, "V2S": 2, "V3": 3} + seen = dict.fromkeys(tp.stage for tp in self.testpoints) + return sorted(seen, key=lambda s: _ORDER.get(s, 99)) + + # ── Serialization ───────────────────────────────────────────────────── + + def to_dict(self) -> dict: + return { + "format_version": self.format_version, + "source_file": self.source_file, + "import_timestamp": self.import_timestamp, + "testpoints": [ + { + "name": tp.name, + "stage": tp.stage, + "desc": tp.desc, + "tests": tp.tests, + "tags": tp.tags, + "na": tp.na, + "source_template": tp.source_template, + } + for tp in self.testpoints + ], + "covergroups": [ + {"name": cg.name, "desc": cg.desc} + for cg in self.covergroups + ], + } + + def serialize(self) -> bytes: + return json.dumps(self.to_dict(), separators=(',', ':')).encode() + + @classmethod + def from_dict(cls, d: dict) -> "Testplan": + tp = cls( + format_version=d.get("format_version", 1), + source_file=d.get("source_file", ""), + import_timestamp=d.get("import_timestamp", ""), + ) + for rec in d.get("testpoints", []): + tp.testpoints.append(Testpoint( + name=rec["name"], + stage=rec.get("stage", ""), + desc=rec.get("desc", ""), + tests=rec.get("tests", []), + tags=rec.get("tags", []), + na=rec.get("na", False), + source_template=rec.get("source_template", ""), + )) + for rec in d.get("covergroups", []): + tp.covergroups.append(CovergroupEntry( + name=rec["name"], desc=rec.get("desc", "") + )) + return tp + + @classmethod + def from_bytes(cls, data: bytes) -> "Testplan": + return cls.from_dict(json.loads(data.decode())) + + @classmethod + def load(cls, path: str) -> "Testplan": + """Load a testplan from a standalone JSON file (Mode B).""" + with open(path, "rb") as f: + return cls.from_bytes(f.read()) + + def save(self, path: str) -> None: + """Write this testplan to a standalone JSON file (Mode B).""" + with open(path, "wb") as f: + f.write(self.serialize()) +``` + +--- + +### 10.3 ZIP Member: `testplan.json` + +A single new optional ZIP member added to the NCDB ZIP archive: + +``` +testplan.json ← Testplan serialized as compact JSON (separators=(',',':')) + Compression: ZIP_DEFLATE (same as other JSON members) + Omitted entirely if no testplan has been set on the database +``` + +**Size estimate**: 500 testpoints × ~250 bytes/testpoint uncompressed ≈ 125 KB raw, ~20 KB +compressed. Negligible relative to `scope_tree.bin` or `counts.bin`. + +The member stores the testplan **snapshot at the time it was imported into the CDB**: all +`import_testplans` references resolved, all wildcards expanded to final test names, +`tests: ["N/A"]` represented as `na: true` with the tests list empty. + +--- + +### 10.4 NcdbUCIS Extension + +The `NcdbUCIS` class gains a testplan lazy-load unit alongside the existing `history` and +`scopes` units: + +```python +# In NcdbUCIS.__init__(): +self._loaded_testplan: bool = False +self._testplan: Optional["Testplan"] = None # None = "not present in file" +self._testplan_dirty: bool = False # True if setTestplan() was called + +# New public methods: + +def getTestplan(self) -> Optional["Testplan"]: + """Return the embedded Testplan, or None if none is stored.""" + self._ensure_testplan() + return self._testplan + +def setTestplan(self, tp: "Testplan") -> None: + """Embed *tp* in this database. Written on the next write() call.""" + from .testplan import Testplan + if tp.import_timestamp == "": + from datetime import datetime, timezone + tp.import_timestamp = datetime.now(timezone.utc).isoformat() + self._testplan = tp + self._testplan_dirty = True + self._loaded_testplan = True + +# New internal method: + +def _ensure_testplan(self) -> None: + if self._loaded_testplan: + return + self._loaded_testplan = True + self._read_zip() # populates _zf_cache if empty + raw = self._zf_cache.get(MEMBER_TESTPLAN) + if raw: + from .testplan import Testplan + self._testplan = Testplan.from_bytes(raw) +``` + +The `_read_zip()` call is already cached (`_zf_cache`), so calling `_ensure_testplan()` after +`_ensure_history()` adds no I/O. The testplan is the lightest unit to load. + +--- + +### 10.5 New Constant + +```python +# In src/ucis/ncdb/constants.py: +MEMBER_TESTPLAN = "testplan.json" +``` + +--- + +### 10.6 NcdbWriter Integration + +```python +# In NcdbWriter.write(), after writing other optional members: +from .constants import MEMBER_TESTPLAN +testplan = getattr(db, '_testplan', None) +if testplan is not None: + zf.writestr(MEMBER_TESTPLAN, testplan.serialize()) +``` + +The testplan is written only if one was set on the db object. This preserves the sparse +member pattern — databases without testplans are byte-identical to today's output. + +--- + +### 10.7 NcdbReader Integration + +```python +# In NcdbReader.read(), after loading optional members: +from .constants import MEMBER_TESTPLAN +from .testplan import Testplan +if MEMBER_TESTPLAN in names: + db._testplan = Testplan.from_bytes(zf.read(MEMBER_TESTPLAN)) + db._loaded_testplan = True +``` + +`NcdbReader` returns a `MemUCIS` (not an `NcdbUCIS`), so the testplan is attached directly +as a `_testplan` attribute. Code that uses `db.getTestplan()` should check for this attribute +with `getattr(db, '_testplan', None)` as a fallback for non-`NcdbUCIS` databases. + +A thin mixin or helper function is preferred: + +```python +# src/ucis/ncdb/testplan.py (additional helper) +def get_testplan(db) -> Optional[Testplan]: + """Retrieve testplan from any UCIS db object (NcdbUCIS or MemUCIS).""" + if hasattr(db, 'getTestplan'): + return db.getTestplan() + return getattr(db, '_testplan', None) + +def set_testplan(db, tp: Testplan) -> None: + """Attach testplan to any UCIS db object.""" + if hasattr(db, 'setTestplan'): + db.setTestplan(tp) + else: + tp.import_timestamp = tp.import_timestamp or \ + datetime.now(timezone.utc).isoformat() + db._testplan = tp +``` + +--- + +### 10.8 NcdbMerger Integration + +The merger must propagate testplan data without silently losing it: + +#### Same-schema fast path (`_merge_same_schema`) + +All inputs share a schema hash, meaning they were generated from the same DUT build with +the same testplan. Read `testplan.json` from the first source that has one and copy it +verbatim to the output (no deserialization needed — raw bytes copy): + +```python +# In _merge_same_schema(), after writing MEMBER_SOURCES: +testplan_bytes = None +for src in sources: + with zipfile.ZipFile(src) as zf: + if MEMBER_TESTPLAN in zf.namelist(): + testplan_bytes = zf.read(MEMBER_TESTPLAN) + break +if testplan_bytes is not None: + zf_out.writestr(MEMBER_TESTPLAN, testplan_bytes) +``` + +The `import_timestamp` in the testplan is intentionally left as-is (it records when the plan +was first embedded, not when this merge happened). + +#### Cross-schema path (`_merge_cross_schema`) + +Different schemas may mean different DUTs, different testplan versions, or both. Strategy: + +1. Collect all unique `(source_file, import_timestamp)` pairs from input testplans. +2. If all inputs have **identical JSON bytes** → copy verbatim to output. +3. If inputs differ in `import_timestamp` only → take the most recent (highest timestamp). +4. If inputs have **different `source_file`** values → emit a warning and omit the testplan + from the merged output (merging incompatible DUT plans is undefined). + +```python +def _merge_testplans(self, sources: list[str]) -> Optional[bytes]: + """Return merged testplan bytes, or None with a warning if incompatible.""" + candidates = {} # source_file → (import_timestamp, bytes) + for src in sources: + with zipfile.ZipFile(src) as zf: + if MEMBER_TESTPLAN not in zf.namelist(): + continue + raw = zf.read(MEMBER_TESTPLAN) + d = json.loads(raw) + sf = d.get("source_file", "") + ts = d.get("import_timestamp", "") + if sf not in candidates or ts > candidates[sf][0]: + candidates[sf] = (ts, raw) + if len(candidates) == 0: + return None + if len(candidates) == 1: + return next(iter(candidates.values()))[1] + import warnings + warnings.warn( + f"Merging databases with different testplans " + f"({list(candidates)}); testplan omitted from output.", + stacklevel=3, + ) + return None +``` + +--- + +### 10.9 Testpoint Closure Computation + +The closure computation lives in a standalone module (not inside `Testplan`) so that it can +be used without importing the scope tree: + +New file: **`src/ucis/ncdb/testplan_closure.py`** + +```python +from __future__ import annotations +from enum import Enum +from dataclasses import dataclass +from typing import Optional + +from ucis.history_node_kind import HistoryNodeKind +from ucis.test_status_t import TestStatusT +from .testplan import Testplan, Testpoint + + +class TPStatus(Enum): + CLOSED = "CLOSED" # all mapped tests passed + PARTIAL = "PARTIAL" # some passed, some failed + FAILING = "FAILING" # all mapped tests ran and failed + NOT_RUN = "NOT_RUN" # none of the mapped tests appear in the DB + NA = "N/A" # testpoint intentionally unmapped + UNIMPLEMENTED = "UNIMPLEMENTED" # tests list is empty (plan written, test not yet) + + +@dataclass +class TestpointResult: + testpoint: Testpoint + status: TPStatus + matched_tests: list[str] # test names that matched from the DB + pass_count: int = 0 + fail_count: int = 0 + + +def compute_closure(testplan: Testplan, db) -> list[TestpointResult]: + """Compute pass/fail closure for every testpoint against *db*. + + Args: + testplan: The Testplan embedded in (or associated with) *db*. + db: Any UCIS database (NcdbUCIS, MemUCIS, …). + + Returns: + One TestpointResult per testpoint, in testplan order. + """ + # Build test-name → status lookup from history nodes (O(N_tests)) + test_status: dict[str, TestStatusT] = {} + for node in db.historyNodes(HistoryNodeKind.TEST): + name = node.getLogicalName() + test_status[name] = node.getTestStatus() + + results = [] + for tp in testplan.testpoints: + if tp.na: + results.append(TestpointResult(tp, TPStatus.NA, [])) + continue + if not tp.tests: + results.append(TestpointResult(tp, TPStatus.UNIMPLEMENTED, [])) + continue + + matched, passes, fails = [], 0, 0 + for pattern in tp.tests: + # Exact match + if pattern in test_status: + matched.append(pattern) + if test_status[pattern] == TestStatusT.OK: + passes += 1 + else: + fails += 1 + continue + # Seed-suffix strip + import re + stripped = re.sub(r'_\d+$', '', pattern) + if stripped in test_status: + matched.append(stripped) + if test_status[stripped] == TestStatusT.OK: + passes += 1 + else: + fails += 1 + continue + # Wildcard prefix + if pattern.endswith('_*'): + prefix = pattern[:-1] + for tname, tstatus in test_status.items(): + if tname.startswith(prefix): + matched.append(tname) + if tstatus == TestStatusT.OK: + passes += 1 + else: + fails += 1 + + if not matched: + status = TPStatus.NOT_RUN + elif fails == 0: + status = TPStatus.CLOSED + elif passes == 0: + status = TPStatus.FAILING + else: + status = TPStatus.PARTIAL + + results.append(TestpointResult(tp, status, matched, passes, fails)) + return results + + +def stage_gate_status(results: list[TestpointResult], + stage: str, testplan: Testplan) -> dict: + """Determine whether the stage gate for *stage* is met. + + A stage gate passes when ALL testpoints at that stage and all stages + below it in the standard ordering are CLOSED (or N/A). + + Returns a dict: + { + "gate": stage, + "pass": bool, + "by_stage": { stage: {"total": N, "closed": N, "gaps": [...]} } + } + """ + _ORDER = {"V1": 0, "V2": 1, "V2S": 2, "V3": 3} + gate_level = _ORDER.get(stage, 99) + by_stage: dict = {} + for r in results: + s = r.testpoint.stage + if _ORDER.get(s, 99) > gate_level: + continue + entry = by_stage.setdefault(s, {"total": 0, "closed": 0, "gaps": []}) + entry["total"] += 1 + if r.status in (TPStatus.CLOSED, TPStatus.NA): + entry["closed"] += 1 + else: + entry["gaps"].append(r.testpoint.name) + gate_pass = all(e["closed"] == e["total"] for e in by_stage.values()) + return {"gate": stage, "pass": gate_pass, "by_stage": by_stage} +``` + +--- + +### 10.10 Covergroup Join + +For Report C (coverage per testpoint), the join between testplan `covergroups` entries and +UCIS scope nodes uses a DFS helper: + +```python +# src/ucis/ncdb/testplan_closure.py (additional helper) + +from ucis.scope_type_t import ScopeTypeT + +def find_covergroup_scopes(db, cg_name: str) -> list: + """Return all UCIS scope nodes whose name matches *cg_name* and whose + scope type is COVERGROUP (or COVERINSTANCE). Requires scope tree loaded. + """ + results = [] + _CG_TYPES = {int(ScopeTypeT.COVERGROUP), int(ScopeTypeT.COVERINSTANCE)} + + def _dfs(scope): + try: + st = int(scope.getScopeType()) + except Exception: + st = -1 + if st in _CG_TYPES and scope.getScopeName() == cg_name: + results.append(scope) + try: + for child in scope.scopes(ScopeTypeT.ALL): + _dfs(child) + except Exception: + pass + + _dfs(db) + return results +``` + +**Efficiency note**: this is O(total_scopes). For repeated calls across many covergroups, +build a name → scope index once: + +```python +def build_covergroup_index(db) -> dict[str, list]: + """Build a dict mapping covergroup name → list of matching scope nodes.""" + index: dict[str, list] = {} + _CG_TYPES = {int(ScopeTypeT.COVERGROUP), int(ScopeTypeT.COVERINSTANCE)} + + def _dfs(scope): + try: + if int(scope.getScopeType()) in _CG_TYPES: + name = scope.getScopeName() + index.setdefault(name, []).append(scope) + except Exception: + pass + try: + for child in scope.scopes(ScopeTypeT.ALL): + _dfs(child) + except Exception: + pass + + _dfs(db) + return index +``` + +--- + +### 10.11 OpenTitan Hjson Import + +A convenience function converts an OpenTitan `.hjson` testplan file into a `Testplan` +object ready for embedding: + +New file: **`src/ucis/ncdb/testplan_hjson.py`** + +```python +"""Import an OpenTitan-format Hjson testplan into a Testplan object.""" + +from __future__ import annotations +import re +from datetime import datetime, timezone +from typing import Optional +from .testplan import Testplan, Testpoint, CovergroupEntry + + +def import_hjson(hjson_path: str, + substitutions: Optional[dict] = None) -> Testplan: + """Parse *hjson_path* and return a fully resolved Testplan. + + Args: + hjson_path: Path to the .hjson testplan file. + substitutions: Dict of wildcard substitutions, e.g. + {"name": "uart", "intf": ["", "_jtag"]}. + Applied to ``tests`` lists that contain ``{key}`` + patterns (OpenTitan wildcard expansion). + """ + try: + import hjson + except ImportError: + import json as hjson # fallback: .hjson without comments + + with open(hjson_path) as f: + raw = hjson.load(f) + + subs = substitutions or {} + dut_name = raw.get("name", "") + if dut_name and "name" not in subs: + subs["name"] = dut_name + + tp = Testplan( + source_file=hjson_path, + import_timestamp=datetime.now(timezone.utc).isoformat(), + ) + + for rec in raw.get("testpoints", []): + raw_tests = rec.get("tests", []) + # Expand wildcards + expanded = _expand_tests(raw_tests, subs) + na = raw_tests == ["N/A"] + source_template = ",".join(raw_tests) if raw_tests != expanded else "" + tp.testpoints.append(Testpoint( + name=rec["name"], + stage=rec.get("stage", ""), + desc=rec.get("desc", ""), + tests=[] if na else expanded, + tags=rec.get("tags", []), + na=na, + source_template=source_template, + )) + + for rec in raw.get("covergroups", []): + tp.covergroups.append(CovergroupEntry( + name=rec["name"], desc=rec.get("desc", "") + )) + + return tp + + +def _expand_tests(test_list: list[str], subs: dict) -> list[str]: + """Expand ``{key}`` wildcards in test names using *subs*. + + If a substitution value is a list, the cartesian product is computed. + """ + if not subs: + return [t for t in test_list if t != "N/A"] + results = [] + for template in test_list: + if template == "N/A": + continue + expanded = _expand_template(template, subs) + results.extend(expanded) + return results + + +def _expand_template(template: str, subs: dict) -> list[str]: + """Recursively expand a single test name template.""" + m = re.search(r'\{(\w+)\}', template) + if not m: + return [template] + key = m.group(1) + values = subs.get(key, [""]) + if isinstance(values, str): + values = [values] + result = [] + for v in values: + expanded = template.replace(f"{{{key}}}", v, 1) + result.extend(_expand_template(expanded, subs)) + return result +``` + +--- + +### 10.12 Files to Create / Modify + +#### New Files + +| File | Purpose | +|---|---| +| `src/ucis/ncdb/testplan.py` | `Testplan`, `Testpoint`, `CovergroupEntry` data model + serialization + query API | +| `src/ucis/ncdb/testplan_closure.py` | `compute_closure()`, `stage_gate_status()`, `find_covergroup_scopes()`, `build_covergroup_index()` | +| `src/ucis/ncdb/testplan_hjson.py` | `import_hjson()` — OpenTitan Hjson → `Testplan` converter | + +#### Modified Files + +| File | Change | +|---|---| +| `src/ucis/ncdb/constants.py` | Add `MEMBER_TESTPLAN = "testplan.json"` | +| `src/ucis/ncdb/ncdb_ucis.py` | Add `_loaded_testplan`, `_testplan`, `_testplan_dirty` fields; add `getTestplan()`, `setTestplan()`, `_ensure_testplan()` methods | +| `src/ucis/ncdb/ncdb_writer.py` | Write `testplan.json` if `getattr(db, '_testplan', None)` is set | +| `src/ucis/ncdb/ncdb_reader.py` | Read `testplan.json` if present, attach to returned `MemUCIS` | +| `src/ucis/ncdb/ncdb_merger.py` | Call `_merge_testplans()` in both fast and cross-schema paths; write result if non-None | + +#### No UCIS interface changes + +`src/ucis/ucis.py` and all non-NCDB backends (`xml/`, `sqlite/`, `mem/`) are unchanged. +The testplan feature is explicitly NCDB-native. + +--- + +### 10.13 Usage Examples + +```python +from ucis.ncdb.ncdb_ucis import NcdbUCIS +from ucis.ncdb.testplan import get_testplan +from ucis.ncdb.testplan_hjson import import_hjson +from ucis.ncdb.testplan_closure import compute_closure, stage_gate_status + +# ── Embedding a testplan into a new CDB ────────────────────────────────── +db = NcdbUCIS("regression.cdb") +tp = import_hjson("hw/ip/uart/data/uart_testplan.hjson", + substitutions={"name": "uart", "intf": ["", "_jtag"]}) +db.setTestplan(tp) +db.write("regression_with_plan.cdb") + +# ── Reading back and computing closure ─────────────────────────────────── +db2 = NcdbUCIS("regression_with_plan.cdb") +tp2 = db2.getTestplan() # lazy load; no scope tree needed + +results = compute_closure(tp2, db2) # triggers history load only +for r in results: + print(f"{r.testpoint.name:30s} {r.testpoint.stage} {r.status.value}") + +# ── Stage gate check ───────────────────────────────────────────────────── +gate = stage_gate_status(results, "V2", tp2) +if gate["pass"]: + print("V2 gate: PASS") +else: + for stage, info in gate["by_stage"].items(): + print(f" {stage}: {info['closed']}/{info['total']} — gaps: {info['gaps']}") + +# ── Works with any db that has a testplan attached ─────────────────────── +tp3 = get_testplan(db) # works for NcdbUCIS, MemUCIS, any db with _testplan + +# ── Mode B: standalone testplan file cross-analyzed against an NCDB ────── +# Use this when the testplan was never embedded, or when you want to apply +# a different testplan revision against an already-built database. +from ucis.ncdb.testplan import Testplan + +tp_ext = Testplan.load("hw/ip/uart/data/uart_testplan.json") + +db_legacy = NcdbUCIS("old_regression.cdb") # no embedded testplan +results_ext = compute_closure(tp_ext, db_legacy) +for r in results_ext: + print(f"{r.testpoint.name:30s} {r.testpoint.stage} {r.status.value}") + +# ── Mode B: save a resolved testplan to a standalone file ──────────────── +tp_resolved = import_hjson("hw/ip/uart/data/uart_testplan.hjson", + substitutions={"name": "uart", "intf": ["", "_jtag"]}) +tp_resolved.save("artifacts/uart_testplan_resolved.json") +# Later: Testplan.load("artifacts/uart_testplan_resolved.json") +``` + +--- + +### 10.14 Lazy-Load Dependency Map + +``` +NcdbUCIS._ensure_testplan() ──► reads testplan.json from _zf_cache + ↑ triggers _read_zip() if cache empty + (no dependency on history or scopes) + +compute_closure(tp, db) ──► calls db.historyNodes() + ↑ triggers _ensure_history() + (no dependency on scopes) + +build_covergroup_index(db) ──► calls db.scopes() + ↑ triggers _ensure_scopes() + (heaviest load — only for coverage reports) +``` + +The testplan can be read and queried without loading either the history nodes or the scope +tree. Report A (Testpoint Closure Summary) loads only `testplan.json` + `history.json` — +the two lightest members. + +--- + +### 10.15 Standalone Testplan Mode (Mode B) — Cross-Analysis Workflow + +In Mode B the testplan is **not** stored inside the NCDB. This is the expected flow when +performing analysis between a `testplan.json` file and an NCDB: + +``` +Input A: uart_testplan.json ← standalone testplan (from RTL repo, CI artifact, etc.) +Input B: regression.cdb ← NCDB produced by simulation run (may have no embedded plan) + +Step 1: tp = Testplan.load("uart_testplan.json") +Step 2: db = NcdbUCIS("regression.cdb") +Step 3: results = compute_closure(tp, db) +Step 4: gate = stage_gate_status(results, "V2", tp) +``` + +#### When to use Mode B + +| Scenario | Mode | +|---|---| +| Testplan is embedded at import time and travels with the CDB | A (embedded) | +| Testplan lives in RTL repo; CDB produced by CI without plan injection | B (standalone) | +| Retro-fitting plan coverage onto pre-existing legacy databases | B (standalone) | +| Comparing multiple testplan revisions against the same frozen NCDB | B (standalone) | +| Ad-hoc analysis during bring-up before a canonical plan exists | B (standalone) | +| Post-silicon debug: map chip-test results against a verification plan | B (standalone) | + +#### Producing a standalone testplan file + +From a raw OpenTitan Hjson file: + +```python +from ucis.ncdb.testplan_hjson import import_hjson +from ucis.ncdb.testplan import Testplan + +tp = import_hjson("hw/ip/uart/data/uart_testplan.hjson", + substitutions={"name": "uart", "intf": ["", "_jtag"]}) +tp.save("artifacts/uart_testplan.json") # save for later reuse +``` + +Or convert from an already-embedded plan to a standalone copy for sharing: + +```python +db = NcdbUCIS("regression.cdb") +tp = db.getTestplan() +if tp: + tp.save("artifacts/uart_testplan.json") +``` + +#### Selecting which testplan to use at analysis time + +`compute_closure()` accepts a `Testplan` object from either source; the caller decides +the priority: + +```python +def get_analysis_testplan(db, standalone_path=None): + """Return a Testplan for analysis, preferring standalone over embedded.""" + if standalone_path: + return Testplan.load(standalone_path) + tp = get_testplan(db) + if tp: + return tp + raise ValueError("No testplan available: provide --testplan or embed one in the CDB") +``` + +This keeps the policy decision in the caller (CLI, script, or notebook) rather than in the +library, so both modes remain fully supported without hidden precedence rules. + +--- + +## Part 11: Competitive Analysis — Matching and Exceeding the State of Practice + +### 11.1 Industry Landscape + +The three dominant commercial EDA verification management platforms and their regression reporting +capabilities are surveyed here, along with CI/CD-era test reporting tools (Allure, Grafana, +TestRail, Zephyr Scale), and 2024–2025 AI/ML trends. + +#### 11.1.1 Cadence Verisium Manager (formerly vManager) + +Cadence positions Verisium Manager as an MDV (Metric-Driven Verification) execution platform. + +| Capability | Details | +|---|---| +| Hierarchical vPlan | Testpoints organized as a tree; each node has pass/fail/coverage status | +| Real-time coverage merge | Incremental merge visible in dashboards *during* regression | +| Failure clustering/buckets | Automated log similarity clustering → N failures → K buckets | +| Failure signature linking | Representative waveform (FSDB) linked to each bucket | +| Pass/fail trend dashboards | Per-testpoint pass rate over time; regression δ view | +| Farm utilization metrics | CPU hours, parallel job efficiency | +| Owner/priority on testpoints | Engineer assignment and priority fields | +| Jenkins/GitLab CI integration | Pipeline plugins; automatic post-regression summaries | +| REST API + Python API | Programmatic report generation | +| Requirements traceability | Jira, Jama, IBM DOORS via OSLC adapters | +| Email summary reports | Configurable post-regression email with key metrics | +| Waiver management | Mark bins "not applicable" with rationale; tracked separately | +| AI-assisted triage | ML clustering; root-cause suggestion from log patterns | + +Key insight: Cadence's differentiator is *live* coverage merging (see coverage grow in real time as +tests complete) and tightly coupled waveform debug from failure buckets. + +#### 11.1.2 Synopsys VC ExecMan + VSO.ai + +VC ExecMan is the regression orchestration layer. VSO.ai is the AI analytics overlay introduced +in 2023–2024. + +| Capability | Details | +|---|---| +| Hierarchical Verification Plan (HVP) | Tree of goals with coverage-linked metrics | +| Per-test coverage contribution | Unique bins hit by each test; identifies redundant tests | +| Minimum test set (MTS) computation | Greedy set cover to minimize regression time at target coverage | +| Unreachable coverage detection | Constraint-conflict or dead-code identification | +| ML-based test prioritization | Rank tests by predicted coverage ROI; 2–10× regression speedup (NVIDIA, AMD claims) | +| Targeted rerun scheduling | Automatically reruns failed/low-coverage tests with adjusted seeds | +| Predictive closure timeline | "At current rate, coverage closure in N days" | +| Phase-aware analytics | Testbench bring-up → bug hunting → signoff: different optimization objectives | +| SQL API for custom reports | ad-hoc queries against the regression DB | +| Coverage root cause analysis (auto) | Pinpoints why bins are uncovered; generates actionable hints | +| Integration with Verdi/waveforms | Failure → waveform handoff | +| Customer-reported speedup | 2–10× regression reduction; 10× faster coverage hole closure | + +Key insight: VSO.ai's *minimum test set* and *per-test coverage contribution* are the most +technically differentiated features. They require per-test coverage bins (contrib data), which +NCDB already stores in `contrib/*.bin`. + +#### 11.1.3 Siemens Questa VRM + Verification IQ + +Siemens uses an RMDB (Regression Management DB) as the central store and Verification IQ as the +analytics UI layer. + +| Capability | Details | +|---|---| +| Hierarchical testplan (UCDB-backed) | Testplan integrated into UCDB as first-class objects | +| Testplan Author | GUI-assisted testplan creation with coverage scope linking | +| OSLC integration | Polarion, Jama, Jira bi-directional traceability | +| Functional safety traceability | ISO 26262, DO-254 reports; audit trails with signoff stamps | +| Live UCDB merge during regression | Coverage visible before regression completes | +| ML-assisted failure bucketing | Log analysis + assertion clustering | +| Web dashboards | Executive and engineer views; drilldown | +| Regression delta report | Compares current vs previous regression; new failures highlighted | +| Closure forecasting | Trend-based prediction | +| Email/Slack notifications | Configurable alert rules | +| Cost/schedule tracking | Test budget vs actuals by stage | + +Key insight: Siemens leads on *functional safety compliance reporting* (ISO 26262) and on +embedding testplan data inside the UCDB itself rather than a sidecar file — closest to our +`testplan.json` ZIP member design. + +#### 11.1.4 CI/CD Era Tools (Allure, Grafana, TestRail, Zephyr Scale) + +These tools are prominent in software verification but are increasingly used in hardware projects +running on CI infrastructure. + +| Tool | Strengths | Gaps vs EDA needs | +|---|---|---| +| Allure Report | Rich HTML output; CI integration; flaky test identification; trend view | No coverage metrics; no seed/testpoint concept | +| Grafana | Fully custom dashboards; alerting; time-series metrics | Requires custom data pipeline; no EDA-native data | +| TestRail | Full test case management; requirements traceability; compliance reports | No coverage metrics; no simulation-native integration | +| Zephyr Scale | Jira-native; agile sprint alignment; regression cycles | No coverage metrics; weak EDA toolchain integration | + +Key patterns across all CI tools: pass/fail rate over time, flaky test detection, trend +dashboards, and CI pipeline integration are table-stakes features. + +--- + +### 11.2 Feature Gap Analysis + +For each feature class, we assess: **Present in our design**, **Absent (gap)**, or **Not +applicable**. + +| Feature | Commercial EDA | Our Design (Parts 1–10) | Gap? | +|---|---|---|---| +| Testplan hierarchy | ✓ (all three) | ✓ Part 9.3 / 10.2 | No | +| Coverage per testpoint | ✓ (all three) | ✓ Report C (9.4) | No | +| Stage gate readiness | ✓ (vPlan stages) | ✓ Report B (9.4) + 10.9 | No | +| Testpoint pass rate trend (history) | ✓ | ✓ Report F (9.4) via history | No | +| Regression delta report | ✓ (all three) | ✓ Report D (9.4) | No | +| Historical stage progression | ✓ | ✓ Report E (9.4) | No | +| Per-test coverage contribution | ✓ VSO.ai, vManager | **Partial** — contrib/*.bin exists, no compute_contribution() API | **Gap** | +| Minimum test set computation | ✓ VSO.ai | **Absent** | **Gap** | +| Failure clustering/buckets | ✓ (all three) | **Absent** | **Gap** | +| Failure-to-waveform linking | ✓ vManager, VRM | Absent (out of pyucis scope) | N/A | +| Predictive closure timeline | ✓ VSO.ai, VRM | **Absent** | **Gap** | +| Unreachable bin detection | ✓ VSO.ai | Absent (simulator-level) | N/A | +| Waiver management | ✓ (all three) | **Absent** | **Gap** | +| Real-time/live merge | ✓ vManager, VRM | Absent (batch only) | Low priority | +| Requirements ALM traceability | ✓ (all three) | **Absent** | **Gap** | +| Functional safety (ISO 26262) | ✓ Siemens | **Absent** | **Gap** | +| ML-based test prioritization | ✓ VSO.ai | **Absent** | **Gap** | +| Targeted rerun scheduling | ✓ VSO.ai, ExecMan | Absent (scheduler is external) | N/A | +| CI/CD pipeline integration | ✓ (all three) | **Absent** | **Gap** | +| Flaky test identification | ✓ + CI tools | ✓ flake_score in history design | No | +| CUSUM change-point detection | **Absent** in all three | ✓ Part 6 | **Exceed** | +| Seed-correlated failure analytics | **Absent** | ✓ seed_id in history buckets | **Exceed** | +| Per-test coverage provenance audit | **Absent** | ✓ squash_log.bin | **Exceed** | +| Confidence-weighted closure | **Absent** | ✓ flake_score gates signoff | **Exceed** | +| Open, inspectable format | **Absent** | ✓ ZIP+JSON; no license required | **Exceed** | +| Cross-simulator UCIS interop | Proprietary formats | ✓ standard UCIS API | **Exceed** | +| Stage gate + flake score integration | **Absent** | ✓ combined in compute_closure() | **Exceed** | + +--- + +### 11.3 Features to Add to Match Commercial Tools + +The following features are absent from our design but are expected by professional users who +have used commercial tools. They should be added to the design and eventually implemented. + +#### 11.3.1 Per-Test Coverage Contribution Report (Report I) + +**What**: Rank all tests in a regression by the number of *unique* coverage bins they contribute +— bins that no other test hit. Identify redundant tests (zero unique contribution). + +**How (in NCDB)**: `contrib/*.bin` stores per-test bin hit vectors. A set-cover query over these +vectors yields unique contribution per test. The result is a ranked list suitable for regression +pruning. + +``` +Report I: Per-Test Coverage Contribution + +Test Total Bins Hit Unique Bins Redundant? +-------------------- --------------- ------------ ---------- +smoke_basic_0 12840 3210 No +directed_arith_0 8901 401 No +directed_arith_1 8820 0 YES (fully covered by directed_arith_0) +rand_full_0 45000 8100 No +... + +Suggested pruning: remove 14 tests with 0 unique contribution → save ~12% regression time +``` + +**Design addition**: `compute_contribution(db)` function in `testplan_closure.py` that iterates +`contrib/*.bin` and computes unique bins per test. Returns `List[TestContribution]` with +`test_name`, `total_bins`, `unique_bins`, `unique_fraction`. + +#### 11.3.2 Minimum Test Set Report (Report J) + +**What**: Given a target coverage threshold (e.g., 95%), compute the smallest subset of tests +that achieves that threshold. This is the set-cover problem; a greedy approximation is O(n·m) +in tests × bins. + +**How**: Greedy algorithm: repeatedly select the test with the highest unique-contribution count +on remaining uncovered bins until the threshold is met or no further coverage is possible. + +``` +Report J: Minimum Test Set for 95% Closure + + Original regression: 420 tests, 18.2 CPU-hours + Minimum test set: 87 tests, 4.1 CPU-hours (77% reduction) + + Included tests (top 10 by contribution): + rand_full_0 → 12.4% of total bins (unique) + rand_full_1 → 6.1% + directed_fsm_0 → 4.8% + ... + + Coverage achieved: 95.3% (target: 95.0%) + Excluded: 333 tests with <0.01% unique contribution each +``` + +**Design addition**: `compute_minimum_test_set(db, target_coverage=0.95)` in +`testplan_closure.py`. Returns `MinimumTestSet` with `included_tests`, `excluded_tests`, +`achieved_coverage`, `cpu_hours_saved`. + +#### 11.3.3 Waiver Management (ZIP member `waivers.json`) + +**What**: Coverage bins that are intentionally uncovered should be marked with a rationale +and approver, and excluded from closure calculations. This is required for ISO 26262 and +other compliance workflows. + +**Format**: New ZIP member `waivers.json` with the following schema: + +```json +{ + "schema_version": 1, + "waivers": [ + { + "id": "W-001", + "scope_pattern": "top.dut.arith.covergroup_t.*", + "bin_pattern": "overflow_corner", + "rationale": "Hardware prevents this condition by design (see spec §3.4.2)", + "approver": "jane.doe@example.com", + "approved_at": "2024-11-15", + "expires_at": null, + "status": "active" + } + ] +} +``` + +**Impact on closure**: `compute_closure()` accepts an optional `waivers: List[Waiver]` argument. +Waived bins are excluded from denominator and reported separately in all closure metrics. + +**New file**: `src/ucis/ncdb/waivers.py` — `Waiver` dataclass, `WaiverSet.load()`, +`WaiverSet.save()`, `WaiverSet.matches_scope(scope_path, bin_name)`. + +**NcdbUCIS extension**: Add `getWaivers()` / `setWaivers()` analogous to testplan. + +#### 11.3.4 Predictive Closure Timeline (Report K) + +**What**: Given historical coverage growth trend over successive regressions, estimate when +coverage will reach the target if the current rate is maintained. + +**How**: Fit a logarithmic or asymptotic curve to the (regression_number, coverage_pct) series +stored in history. Extrapolate to the target coverage. Report confidence interval. + +``` +Report K: Predictive Closure Timeline + + Current coverage: 78.4% (target: 95.0%) + Regressions so far: 34 + Trend model: logarithmic fit R²=0.94 + + Projection (95% CI): + Optimistic (upper CI): +8 regressions + Median estimate: +14 regressions + Pessimistic (lower CI): +23 regressions + + Warning: coverage growth rate has been declining since regression #28 + (CUSUM change point detected; see Section 6 change-point analysis). +``` + +**Design addition**: `compute_closure_forecast(history_series, target=0.95)` function in +`testplan_closure.py`. Takes a `List[(regression_id, coverage_pct)]`, returns `Forecast` +with `median_regressions_to_target`, `ci_lower`, `ci_upper`, `model_fit_r2`, `warning`. + +#### 11.3.5 CI/CD Integration — JUnit XML and GitHub Annotations Export + +**What**: Export regression results in JUnit XML format (testpoint pass/fail as test cases) +so that CI/CD systems (GitHub Actions, GitLab CI, Jenkins) can display native pass/fail +annotations and trend graphs. + +**How**: Map each testpoint to a JUnit ``. Stage → ``. Failures → `` +with message. Duration → `time=` attribute. + +**Design addition**: `testplan_export.py` with: +- `export_junit_xml(results: List[TestpointResult], output_path: str)` — writes JUnit XML +- `export_github_annotations(results)` — writes GitHub Actions `::error::` / `::warning::` lines +- `export_summary_markdown(results, history)` — writes GitHub Actions Job Summary markdown + +#### 11.3.6 Requirements Traceability Link (ALM Integration) + +**What**: Each testpoint can carry a link to an external ALM item (Jira issue, Polarion +requirement, GitHub issue). The end-of-regression report then includes traceability from +coverage point → testpoint → requirement → sign-off status. + +**How**: Add optional `requirements` field to `Testpoint` dataclass: + +```python +@dataclass +class RequirementLink: + system: str # "jira", "github", "polarion", "jama" + project: str # "PROJ" + item_id: str # "PROJ-1234" + url: str # full URL (optional) + +@dataclass +class Testpoint: + ... + requirements: List[RequirementLink] = field(default_factory=list) +``` + +Report output: traceability matrix (requirement → testpoints that cover it → closure status). +No live sync with ALM tools needed for v1; links are maintained in `testplan.json`. + +#### 11.3.7 Functional Safety Traceability Report (Report L) + +**What**: ISO 26262 and DO-254 require a documented claim that each safety requirement has +been verified. Generate a traceability matrix in CSV or PDF-friendly form. + +``` +Report L: Functional Safety Traceability Matrix + +Requirement ID Description Testpoints Status Evidence +-------------- ---------------------------- ------------------ -------- -------- +ISO-FUNC-001 Overflow detection tp_arith_overflow CLOSED merged.cdb@reg_042 +ISO-FUNC-002 Reset recovery < 3 cycles tp_reset_timing OPEN — +... +``` + +**Design addition**: `export_safety_matrix(results, waivers, output_path, format="csv")` +in `testplan_export.py`. + +--- + +### 11.4 Opportunities to Exceed Commercial Tools + +The following capabilities are *absent from all three commercial platforms* or are only +partially implemented. They represent genuine differentiation opportunities. + +#### 11.4.1 Seed-Correlated Failure Analytics + +**What commercial tools do**: Record pass/fail per run. Some expose the seed value as metadata. +None correlate *which seeds systematically produce failures* vs. which seeds expose rare bugs. + +**What we uniquely offer**: The binary history design (Part 2) stores `seed_id` per run as a +32-bit field in the bucket record. This enables: + +- **Seed reliability score**: For a given test, what fraction of seeds result in pass? A test + passing on seed 0 but failing on seeds > 1e8 indicates a seed-dependent bug. +- **Seed range heat-map**: Bin seeds by value range; identify if certain seed regions reliably + expose a failure. (EDA-unique: commercial tools do not expose this.) +- **Seed reuse recommendation**: For signoff regressions, prefer seeds with historically high + pass rates for "stable validation", plus seeds in the high-failure-rate range for "stress + regression". + +**Report M: Seed Reliability Analysis** +``` +Test: rand_arith_* + +Seed range [0, 1M]: pass rate 99.2% (stable) +Seed range [1M, 10M]: pass rate 94.1% (moderate stress) +Seed range [100M, 200M]: pass rate 71.3% ← HIGH FAILURE ZONE + +Recommended signoff seeds: {42, 1234, 9999} (historically 100% pass) +Stress regression seeds: {100000001, 100500000, 101234567} (expose most bugs) +``` + +#### 11.4.2 CUSUM Change-Point Detection with RTL Commit Correlation + +**What commercial tools do**: Show coverage trend over time. None apply statistical +process control (SPC) to detect *when* coverage growth stalled or *when* test reliability +degraded. + +**What we uniquely offer** (already in Part 6 design): +- CUSUM algorithm applied per-testpoint pass rate series +- Change-point mapped to regression number → can be correlated with RTL commit history +- Alert: "Test X reliability dropped at regression #47; closest RTL commit: [hash]" + +This enables *root-cause attribution at the RTL commit level* without any ML, purely from +the history database. No commercial tool offers this for individual testpoint reliability. + +#### 11.4.3 Confidence-Weighted Coverage Closure + +**What commercial tools do**: Report coverage as a simple percentage. A bin hit by a test +that has a 30% pass rate is counted the same as a bin hit by a 100%-reliable test. + +**What we uniquely offer**: Weight coverage by the reliability of the tests that hit it. + +``` +Standard closure: 92.4% +Confidence-weighted closure: 87.1% ← bins hit only by flaky tests are discounted + +Confidence-weighted closure is recommended for signoff claims. +Bins with weight < 0.5 (hit only by tests with flake_score > 0.5): + top.dut.fsm.state_machine_t → state_c_to_d: weight=0.31 (unreliable) +``` + +This gives a *conservative* closure claim that accounts for test reliability. Commercial +tools have no equivalent concept. It requires both coverage data and historical pass-rate +data — exactly what our merged NCDB provides. + +#### 11.4.4 Coverage Provenance Audit Trail + +**What commercial tools do**: Show current coverage. No tool records *which regression +contributed which bins* in the merged database, and with which squash policy. + +**What we uniquely offer** (Part 3/4 design): `squash_log.bin` records the exact merge +parameters (squash policy, threshold, regression IDs) used to build each version of +`counts.bin`. An auditor can answer: + +> "This bin was closed in regression #38 with squash policy `threshold=3,window=10`; if we +> applied a stricter policy (`threshold=5`), it would still be closed (hit 7 times)." + +This is unique to our design and directly supports signoff claims in compliance workflows. + +#### 11.4.5 Open Format — No Vendor Lock-In + +**What commercial tools do**: Store data in proprietary binary databases. Customers cannot +access data outside the vendor's tools. Tool upgrades may break existing data. + +**What we uniquely offer**: +- Everything is in a ZIP file with JSON metadata and documented binary layouts. +- Any Python program can read and process the data with zero licensing cost. +- Users can write custom reports, scripts, and integrations without a vendor API contract. +- The UCIS API layer means data is portable across simulators (VCS, Xcelium, Riviera-PRO, + Verilator) — commercial tools are all simulator-specific. + +#### 11.4.6 Stage Gate + Flake Score Integration + +**What commercial tools do**: V-plan stages are pass/fail based on count of tests run and +static coverage thresholds. Flaky tests are identified separately, never integrated into +gate criteria. + +**What we uniquely offer**: `compute_closure()` (Part 10.9) returns a `TPStatus` that +incorporates both the testpoint's coverage metric and the flake_score of the tests that +exercised it. A stage gate can require not just coverage but *reliable* coverage: + +```python +gate_V2 = stage_gate_status( + results, stage="V2", testplan=tp, + require_flake_score_below=0.2, # gate fails if covering tests are unreliable + require_coverage_pct=90.0 +) +``` + +No commercial tool integrates test reliability into stage gate logic. + +--- + +### 11.5 Summary Comparison Table + +| Dimension | Cadence Verisium | Synopsys VSO.ai | Siemens VRM/VIQ | **PyUCIS (our design)** | +|---|---|---|---|---| +| Testplan hierarchy | ✓ | ✓ | ✓ | ✓ | +| Coverage per testpoint | ✓ | ✓ | ✓ | ✓ | +| Stage gate | ✓ | ✓ | ✓ | ✓ | +| Testpoint trend (history) | ✓ | ✓ | ✓ | ✓ | +| Regression delta | ✓ | ✓ | ✓ | ✓ | +| Failure clustering | ✓ | ✓ | ✓ | ✗ (gap) | +| Per-test contribution ranking | ✓ | ✓ VSO.ai | ✓ | ✗→ **add** | +| Minimum test set | ✗ | ✓ VSO.ai | ✗ | ✗→ **add** | +| Predictive closure timeline | ✗ | ✓ | ✓ | ✗→ **add** | +| Waiver management | ✓ | ✓ | ✓ | ✗→ **add** | +| ALM requirements traceability | ✓ | ✓ | ✓ OSLC | ✗→ **add (links only)** | +| ISO 26262 safety reports | ✗ | ✗ | ✓ | ✗→ **add (export only)** | +| CI/CD JUnit export | ✗ | ✗ | ✗ | ✗→ **add** | +| Live/incremental merge | ✓ | ✓ | ✓ | ✗ (low priority) | +| CUSUM change-point detection | ✗ | ✗ | ✗ | ✓ **unique** | +| Seed-correlated failure analytics | ✗ | ✗ | ✗ | ✓ **unique** | +| Confidence-weighted closure | ✗ | ✗ | ✗ | ✓ **unique** | +| Coverage provenance audit trail | ✗ | ✗ | ✗ | ✓ **unique** | +| Stage gate + flake score | ✗ | ✗ | ✗ | ✓ **unique** | +| Open format / no lock-in | ✗ | ✗ | ✗ | ✓ **unique** | +| Cross-simulator UCIS portability | ✗ | ✗ | ✗ | ✓ **unique** | +| Zero license cost | ✗ | ✗ | ✗ | ✓ **unique** | + +--- + +### 11.6 Revised Report Catalog + +Combining the original 8 reports (Part 9.4) with new reports from the competitive analysis: + +| ID | Name | Source Data | Priority | +|---|---|---|---| +| A | Testpoint Closure Summary | testplan + current UCIS | P0 | +| B | Stage Gate Readiness | testplan + current UCIS | P0 | +| C | Coverage per Testpoint | testplan + covergroup scopes | P0 | +| D | Regression Delta | current vs previous UCIS | P0 | +| E | Historical Stage Progression | history + testplan | P1 | +| F | Testpoint Reliability (flake rate) | history + testplan | P1 | +| G | Unexercised Covergroup Report | current UCIS + testplan | P1 | +| H | Test Budget by Stage | testplan (counts/weights) | P2 | +| I | Per-Test Coverage Contribution | contrib/*.bin | P1 | +| J | Minimum Test Set | contrib/*.bin + target | P2 | +| K | Predictive Closure Timeline | history (coverage series) | P2 | +| L | Functional Safety Traceability Matrix | testplan + requirements links | P2 | +| M | Seed Reliability Analysis | history (seed_id series) | P2 | + +P0 = essential for v1; P1 = high value, implement in v1 if time allows; P2 = future work. + +--- + +## References + +- Atlassian "Flakinator": https://www.atlassian.com/blog/atlassian-engineering/taming-test-flakiness-how-we-built-a-scalable-tool-to-detect-and-manage-flaky-tests +- Google flaky test mitigation: https://talent500.com/blog/google-flaky-test-mitigation-strategies/ +- Cadence Verisium Manager: https://www.cadence.com/en_US/home/tools/system-design-and-verification/ai-driven-verification/verisium-manager.html +- Synopsys VC ExecMan: https://www.synopsys.com/verification/soc-verification-automation/vc-execution-manager.html +- Synopsys VSO.ai: https://www.synopsys.com/ai/ai-powered-eda/vso-ai.html +- Siemens Questa VRM + Verification IQ: https://eda.sw.siemens.com/en-US/eda/questa/vrm/ +- Seed Selector algorithm: https://link.springer.com/chapter/10.1007/978-3-031-53960-2_22 (42%+ regression speedup via seed value ranking) +- Time series columnar encoding: https://www.vldb.org/pvldb/vol15/p2148-song.pdf +- CUSUM control charts: standard statistical process control literature +- OpenTitan testplanner: https://opentitan.org/book/util/dvsim/doc/testplanner.html +- UCIS 1.0 LRM: Section 4.3 (History Nodes), Table 4-2 (History Node Types), Table 4-3 (Pre-defined Attributes) +- JUnit XML schema: https://github.com/testmoapp/junitxml diff --git a/doc/source/reference/cli.rst b/doc/source/reference/cli.rst index d22e3b5..f37c9b7 100644 --- a/doc/source/reference/cli.rst +++ b/doc/source/reference/cli.rst @@ -14,3 +14,5 @@ For workflow-oriented usage see the user guides: * :doc:`../working-with-coverage/analyzing` — ``show`` commands workflow * :doc:`../working-with-coverage/merging` — ``merge`` options * :doc:`../reporting/exporting` — ``show code-coverage`` export formats +* :doc:`../working-with-coverage/test-history` — ``history query`` / ``history stats`` +* :doc:`../working-with-coverage/testplan` — ``testplan import`` / ``testplan closure`` / ``testplan export-junit`` diff --git a/doc/source/reference/formats/ncdb-format.rst b/doc/source/reference/formats/ncdb-format.rst index 9406219..577a3f1 100644 --- a/doc/source/reference/formats/ncdb-format.rst +++ b/doc/source/reference/formats/ncdb-format.rst @@ -1090,3 +1090,298 @@ To read an NCDB file without PyUCIS: * :doc:`sqlite-schema` — SQLite backend schema reference * :doc:`xml-interchange` — XML interchange format * :ref:`working-with-coverage-merging` — How to merge databases using the CLI + +----------- + +.. _ncdb-format-v2-history: + +*********************** +7. V2 binary test history +*********************** + +When ``manifest.json`` contains ``"history_format": "v2"`` the archive holds +six additional binary members. All integers are **little-endian** unless +noted. + +7.1 ``history/test_registry.bin`` +================================== + +Maps stable integer IDs to test names and seed strings. IDs are assigned by +insertion order and never reassigned. + +.. code-block:: none + + Header (17 bytes): + magic u32 0x54524547 ('TREG') + version u8 1 + next_run_id u32 monotonically-increasing run counter + num_names u32 + num_seeds u32 + + Offset tables (immediately after header): + name_offsets u32[num_names] byte offset into name heap + seed_offsets u32[num_seeds] byte offset into seed heap + + Heaps (NUL-terminated UTF-8 strings): + name_heap NUL-terminated strings in name_id order + seed_heap NUL-terminated strings in seed_id order + +7.2 ``history/test_stats.bin`` +================================ + +One 72-byte entry per test name (indexed by name_id). + +.. code-block:: none + + Header (9 bytes): + magic u32 0x54535453 ('TSTS') + version u8 1 + num_entries u32 + + Entry (72 bytes, repeated num_entries times): + name_id u32 + total_runs u32 + pass_count u32 + fail_count u32 + error_count u32 + skip_count u32 + timeout_count u32 + _reserved u32 (padding, always 0) + mean_ms f32 Welford running mean of runtime in milliseconds + m2_ms f32 Welford running sum-of-squares (variance = m2/n) + cusum_pos f32 CUSUM positive accumulator for change detection + cusum_neg f32 CUSUM negative accumulator + _pad1 f32 (reserved, 0.0) + _pad2 f32 (reserved, 0.0) + _pad3 f32 (reserved, 0.0) + flakiness_score i16 fixed-point 0–10000 representing 0.00–100.00 % + tag u8[6] short ASCII label (NUL-padded) + last_status u8 most-recent HIST_STATUS_* value + _trailing u8 padding + +7.3 ``history/bucket_index.bin`` +================================== + +Index over the per-bucket run-record files. + +.. code-block:: none + + Header (9 bytes): + magic u32 0x42494458 ('BIDX') + version u8 1 + num_buckets u32 + + Entry (28 bytes, sorted by bucket_seq): + bucket_seq u32 + ts_start u32 Unix timestamp of first record in bucket + ts_end u32 Unix timestamp of last record in bucket + num_records u32 + fail_count u32 + min_name_id u32 + max_name_id u32 + +7.4 ``history/NNNNNN.bin`` +============================ + +Each bucket holds up to 10 000 run records, compressed with LZMA (sealed +buckets) or DEFLATE level 1 (current open bucket). After decompression: + +.. code-block:: none + + Header (16 bytes): + magic u32 0x42434B54 ('BCKT') + version u8 1 + num_records u32 + num_names u16 + _pad u8 (padding) + ts_base u32 Unix timestamp of first record + + Name index (12 bytes per unique name in this bucket): + name_id u32 global name_id from test_registry + offset u32 byte offset into name's record data + count u16 number of records for this name + _pad u8[2] + + Columnar record data (one column per name, name_id order): + seeds[] u8[count] local seed index (≤ 255 unique/bucket) + ts_deltas[] varint[count] delta-encoded seconds from ts_base + status_flags[] u8[count] nibble-packed (high=status, low=flags) + + Seed dictionary (appended after all record data): + num_local_seeds u8 + seed_ids[] u32[num_local_seeds] global seed_ids + +Varint encoding: each value uses 1–5 bytes; the high bit of each byte +indicates that more bytes follow (7 bits of value per byte, little-endian). + +7.5 ``history/contrib_index.bin`` +==================================== + +Tracks which test runs contributed coverage so that squash can be replayed. + +.. code-block:: none + + Header (12 bytes): + magic u32 0x43494458 ('CIDX') + version u8 1 + policy u8 merge-policy constant + watermark u32 highest squashed run_id + num_active u32 + + Entry (16 bytes, one per unsquashed run): + run_id u32 + name_id u32 + status u8 + flags u8 + _pad u8[2] + ts u32 + +7.6 ``history/squash_log.bin`` +================================ + +Append-only provenance log for squash events. + +.. code-block:: none + + Header (9 bytes): + magic u32 0x53514C47 ('SQLG') + version u8 1 + num_entries u32 + + Entry (24 bytes): + ts u32 Unix timestamp of squash operation + policy u8 merge-policy used + _pad u8[3] + from_run u32 first run_id squashed + to_run u32 last run_id squashed (inclusive) + num_runs u32 total runs processed + pass_runs u32 runs that passed + +---- + +********************************** +8. Testplan and Waivers JSON +********************************** + +``testplan.json`` and ``waivers.json`` are optional UTF-8 JSON members +stored at the ZIP root. They are written by :class:`~ucis.ncdb.ncdb_writer.NcdbWriter` +when the corresponding objects are attached to the database and are read +transparently by :class:`~ucis.ncdb.ncdb_reader.NcdbReader`. + +8.1 ``testplan.json`` +====================== + +.. code-block:: json + + { + "format_version": 1, + "source_file": "uart.hjson", + "import_timestamp": "2025-01-01T00:00:00+00:00", + "testpoints": [ + { + "name": "uart_reset", + "stage": "V1", + "desc": "Verify reset", + "tests": ["uart_smoke", "uart_reset_*"], + "tags": ["smoke"], + "na": false, + "source_template": "", + "requirements": [ + {"id": "REQ-001", "desc": "Reset spec"} + ] + } + ], + "covergroups": [ + {"name": "cg_reset", "desc": "Reset coverage"} + ] + } + +.. list-table:: testplan.json — top-level fields + :header-rows: 1 + :widths: 25 15 60 + + * - Field + - Type + - Description + * - ``format_version`` + - int + - Schema version; currently ``1`` + * - ``source_file`` + - string + - Path to the Hjson/JSON source that produced this plan + * - ``import_timestamp`` + - ISO-8601 string + - UTC timestamp when the plan was last imported + * - ``testpoints`` + - array + - Ordered list of :class:`~ucis.ncdb.testplan.Testpoint` objects + * - ``covergroups`` + - array + - Ordered list of :class:`~ucis.ncdb.testplan.CovergroupEntry` objects + +Merger behaviour + When merging two ``.cdb`` files that both contain ``testplan.json``: + + * **Same ``source_file``** — the entry with the later + ``import_timestamp`` is kept. + * **Different ``source_file``** — a warning is emitted and the merged + output contains no testplan. + +8.2 ``waivers.json`` +====================== + +.. code-block:: json + + { + "format_version": 1, + "waivers": [ + { + "id": "W-001", + "scope_pattern": "top/uart/**", + "bin_pattern": "reset_*", + "rationale": "Deferred to V2", + "approver": "jdoe", + "approved_at": "2025-01-01T00:00:00", + "expires_at": "2026-01-01T00:00:00", + "status": "active" + } + ] + } + +.. list-table:: waivers.json — Waiver fields + :header-rows: 1 + :widths: 25 15 60 + + * - Field + - Type + - Description + * - ``id`` + - string + - Unique waiver identifier + * - ``scope_pattern`` + - glob string + - Hierarchy path pattern; ``*`` = single segment, ``**`` = any depth + * - ``bin_pattern`` + - glob string + - Coverage bin name pattern; same glob syntax as scope_pattern + * - ``rationale`` + - string + - Human-readable reason for the waiver + * - ``approver`` + - string + - Name or email of the approver + * - ``approved_at`` + - ISO-8601 string + - Approval timestamp + * - ``expires_at`` + - ISO-8601 string + - Expiry timestamp; empty string means no expiry + * - ``status`` + - ``"active"`` | ``"expired"`` + - Current status; :meth:`~ucis.ncdb.waivers.WaiverSet.active_at` filters + on both this field and ``expires_at`` + +Merger behaviour + Waivers are unioned by ``id`` across all source files. When the same + ``id`` appears in multiple sources the entry with the latest + ``approved_at`` is kept. diff --git a/doc/source/working-with-coverage/exploring-tui.rst b/doc/source/working-with-coverage/exploring-tui.rst index a7d5d65..f30cd69 100644 --- a/doc/source/working-with-coverage/exploring-tui.rst +++ b/doc/source/working-with-coverage/exploring-tui.rst @@ -53,6 +53,24 @@ Press the number key at any time to switch views. Statistical analysis: hit-count distribution histogram, mean/median/stddev, coverage tier breakdown (complete / high / medium / low), bin utilization rate. +**6 — Code Coverage** + File-level code coverage table showing statement, branch, and toggle hit + rates per source file. Requires a database with code-coverage data (e.g. + imported from Verilator). + +**7 — Test History** + Per-test contribution analysis. Each row shows total and unique coverage + items hit by that test. When the database includes v2 history (NCDB v2), + the detail panel also shows flake score, pass/fail counts, and mean CPU + time. Sort with ``N`` (name), ``D`` (date), ``C`` (coverage), ``U`` + (unique). + +**8 — Testplan** + Testplan closure status. Requires a testplan embedded in the database + (see :doc:`testplan`). Shows every testpoint with its stage, closure + status, and pass/fail counts. The header displays a stage roll-up with + colour-coded progress. Press ``r`` to refresh. + Global Keys =========== @@ -62,7 +80,7 @@ Global Keys * - Key - Action - * - ``1`` – ``5`` + * - ``1`` – ``8`` - Switch view * - ``?`` - Help overlay diff --git a/doc/source/working-with-coverage/index.rst b/doc/source/working-with-coverage/index.rst index 6877174..38a72bf 100644 --- a/doc/source/working-with-coverage/index.rst +++ b/doc/source/working-with-coverage/index.rst @@ -16,3 +16,6 @@ After importing your coverage data, the typical analysis workflow is: exploring-tui analyzing comparing + test-history + testplan + reports diff --git a/doc/source/working-with-coverage/reports.rst b/doc/source/working-with-coverage/reports.rst new file mode 100644 index 0000000..8a9b018 --- /dev/null +++ b/doc/source/working-with-coverage/reports.rst @@ -0,0 +1,332 @@ +.. _reports: + +############################# +Reports and CI/CD Integration +############################# + +The :mod:`ucis.ncdb.reports` and :mod:`ucis.ncdb.testplan_export` modules +provide structured reports for testplan closure, regression delta, and +CI/CD export. Every report function returns a typed dataclass with a +``to_json()`` method; companion ``format_*()`` functions render the +dataclass to human-readable text. + +.. contents:: On this page + :local: + :depth: 2 + +----------- + +********************** +Closure and gate reports +********************** + +.. code-block:: python + + from ucis.ncdb.ncdb_ucis import NcdbUCIS + from ucis.ncdb.testplan import get_testplan + from ucis.ncdb.testplan_closure import compute_closure + from ucis.ncdb.reports import ( + report_testpoint_closure, + format_testpoint_closure, + report_stage_gate, + format_stage_gate, + ) + + db = NcdbUCIS("coverage.cdb") + plan = get_testplan(db) + results = compute_closure(plan, db) + + # Print the closure table + summary = report_testpoint_closure(results) + print(format_testpoint_closure(summary)) + + # Evaluate a stage gate + gate = report_stage_gate(results, "V2", plan) + print(format_stage_gate(gate)) + + # Machine-readable JSON + import json + data = json.loads(summary.to_json()) + +Stage-rollup output example:: + + Testpoint Stage Status Pass Fail + ---------------------------------------------------------------- + uart_reset V1 ✓ CLOSED 5 0 + uart_loopback V2 ✗ FAILING 0 3 + ---------------------------------------------------------------- + + Stage roll-up: + V1 [████████████████████] 1/1 (100.0%) + V2 [░░░░░░░░░░░░░░░░░░░░] 0/1 (0.0%) + + Total: 1/2 closed (0 N/A) + +----------- + +********************** +Regression delta +********************** + +Compare two closure result sets to find testpoints that changed +status between runs:: + + from ucis.ncdb.reports import report_regression_delta, format_regression_delta + + # Load two snapshots + results_baseline = compute_closure(plan, db_baseline) + results_current = compute_closure(plan, db_current) + + delta = report_regression_delta(results_current, results_baseline) + print(format_regression_delta(delta)) + # Regression delta: +1 closed, -0 newly failing, 1 still open + + # Machine-readable + print(delta.to_json()) + +----------- + +********************** +Reliability report +********************** + +Compute per-testpoint flake scores from v2 history data:: + + from ucis.ncdb.reports import report_testpoint_reliability, format_testpoint_reliability + + report = report_testpoint_reliability(results, db) + print(format_testpoint_reliability(report)) + +Output example:: + + Testpoint Flake Pass Fail + -------------------------------------------------------- + uart_loopback 0.800 2 8 ⚠ + uart_reset 0.000 10 0 + +----------- + +********************** +Unexercised covergroups +********************** + +Identify zero-hit or low-coverage covergroups:: + + from ucis.ncdb.reports import ( + report_unexercised_covergroups, + format_unexercised_covergroups, + ) + + report = report_unexercised_covergroups(db, plan, low_threshold=50.0) + print(format_unexercised_covergroups(report)) + +----------- + +********************** +Coverage contribution +********************** + +Show which tests contribute the most unique coverage bins +(requires v2 history with contribution data):: + + from ucis.ncdb.reports import ( + report_coverage_contribution, + format_coverage_contribution, + ) + + report = report_coverage_contribution(db) + print(format_coverage_contribution(report)) + +----------- + +********************** +JUnit XML export +********************** + +Export closure results as a JUnit XML file for CI dashboards:: + + from ucis.ncdb.testplan_export import export_junit_xml + + export_junit_xml(results, "closure_results.xml") + +Or via the CLI:: + + pyucis testplan export-junit coverage.cdb --out closure_results.xml + +The XML maps each testpoint to a ```` element. FAILING and +PARTIAL testpoints become ```` elements; NOT_RUN becomes +````. + +----------- + +********************** +GitHub Annotations +********************** + +Emit GitHub Actions `workflow commands`_ for inline PR annotations:: + + from ucis.ncdb.testplan_export import export_github_annotations + + export_github_annotations(results) # writes to stdout + + # Or capture to a string + import io + buf = io.StringIO() + export_github_annotations(results, output=buf) + print(buf.getvalue()) + +In a GitHub Actions workflow:: + + - name: Compute closure + run: | + python -c " + from ucis.ncdb.ncdb_ucis import NcdbUCIS + from ucis.ncdb.testplan import get_testplan + from ucis.ncdb.testplan_closure import compute_closure + from ucis.ncdb.testplan_export import export_github_annotations + db = NcdbUCIS('coverage.cdb') + plan = get_testplan(db) + results = compute_closure(plan, db) + export_github_annotations(results) + " + +.. _workflow commands: https://docs.github.com/en/actions/writing-workflows/choosing-what-your-workflow-does/workflow-commands-for-github-actions + +----------- + +********************** +Test budget by stage +********************** + +Estimate CPU-hour cost per stage from v2 test history mean CPU times:: + + from ucis.ncdb.ncdb_ucis import NcdbUCIS + from ucis.ncdb.testplan import get_testplan + from ucis.ncdb.reports import report_test_budget, format_test_budget + + db = NcdbUCIS("coverage.cdb") + plan = db.getTestplan() + rpt = report_test_budget(plan, db) + print(format_test_budget(rpt)) + + # JSON export + import json + print(json.loads(rpt.to_json())["stage_totals"]) + +Testpoints whose tests have no CPU time recorded appear in +``rpt.missing_stats``. + +----------- + +********************** +Safety traceability matrix +********************** + +Build a requirement-to-testpoint matrix (suitable for safety audits):: + + from ucis.ncdb.reports import report_safety_matrix, format_safety_matrix + + rpt = report_safety_matrix(results) # results from compute_closure + print(format_safety_matrix(rpt)) + + # CSV for a spreadsheet or audit tool + with open("traceability.csv", "w") as f: + f.write(rpt.to_csv()) + + # Add a WaiverSet to flag waived testpoints + from ucis.ncdb.waivers import WaiverSet + waivers = WaiverSet.from_file("waivers.hjson") + rpt = report_safety_matrix(results, waivers=waivers) + +----------- + +********************** +Seed reliability heat-map +********************** + +Identify seeds that are disproportionately flaky:: + + from ucis.ncdb.ncdb_ucis import NcdbUCIS + from ucis.ncdb.reports import report_seed_reliability, format_seed_reliability + + db = NcdbUCIS("coverage.cdb") + rpt = report_seed_reliability(db, "uart_smoke") + print(format_seed_reliability(rpt)) + # Seeds with flake_score ≥ 0.2 are flagged with ⚠ + + # JSON heat-map for a custom dashboard + import json + data = json.loads(rpt.to_json()) + for row in data["rows"]: + if row["flake"] >= 0.2: + print(f"Seed {row['seed']}: {row['fail']} failures") + +----------- + +********************** +GitHub Step Summary +********************** + +Write a markdown table to ``$GITHUB_STEP_SUMMARY``:: + + import os + from ucis.ncdb.testplan_export import export_summary_markdown + + md = export_summary_markdown(results, stage_gate=gate) + with open(os.environ["GITHUB_STEP_SUMMARY"], "a") as f: + f.write(md) + +The output includes a stage roll-up table, per-testpoint status rows, +and (when *stage_gate* is supplied) a gate verdict with a list of +blocking testpoints. + +----------- + +********************** +API reference +********************** + +.. autofunction:: ucis.ncdb.reports.report_testpoint_closure +.. autofunction:: ucis.ncdb.reports.format_testpoint_closure +.. autoclass:: ucis.ncdb.reports.ClosureSummary + +.. autofunction:: ucis.ncdb.reports.report_stage_gate +.. autofunction:: ucis.ncdb.reports.format_stage_gate +.. autoclass:: ucis.ncdb.reports.StageGateReport + +.. autofunction:: ucis.ncdb.reports.report_regression_delta +.. autofunction:: ucis.ncdb.reports.format_regression_delta +.. autoclass:: ucis.ncdb.reports.RegressionDelta + +.. autofunction:: ucis.ncdb.reports.report_testpoint_reliability +.. autofunction:: ucis.ncdb.reports.format_testpoint_reliability +.. autoclass:: ucis.ncdb.reports.TestpointReliability + +.. autofunction:: ucis.ncdb.reports.report_unexercised_covergroups +.. autofunction:: ucis.ncdb.reports.format_unexercised_covergroups +.. autoclass:: ucis.ncdb.reports.UnexercisedCovergroups + +.. autofunction:: ucis.ncdb.reports.report_coverage_contribution +.. autofunction:: ucis.ncdb.reports.format_coverage_contribution +.. autoclass:: ucis.ncdb.reports.CoverageContribution + +.. autofunction:: ucis.ncdb.reports.report_test_budget +.. autofunction:: ucis.ncdb.reports.format_test_budget +.. autoclass:: ucis.ncdb.reports.TestBudget + +.. autofunction:: ucis.ncdb.reports.report_safety_matrix +.. autofunction:: ucis.ncdb.reports.format_safety_matrix +.. autoclass:: ucis.ncdb.reports.SafetyMatrix + +.. autofunction:: ucis.ncdb.reports.report_seed_reliability +.. autofunction:: ucis.ncdb.reports.format_seed_reliability +.. autoclass:: ucis.ncdb.reports.SeedReliability + +.. autofunction:: ucis.ncdb.testplan_export.export_junit_xml +.. autofunction:: ucis.ncdb.testplan_export.export_github_annotations +.. autofunction:: ucis.ncdb.testplan_export.export_summary_markdown + +.. seealso:: + + * :ref:`testplan` — Testplan format and closure computation + * :ref:`test-history` — Binary test history API diff --git a/doc/source/working-with-coverage/test-history.rst b/doc/source/working-with-coverage/test-history.rst new file mode 100644 index 0000000..cb1912a --- /dev/null +++ b/doc/source/working-with-coverage/test-history.rst @@ -0,0 +1,234 @@ +.. _test-history: + +############ +Test History +############ + +PyUCIS stores a rich, time-series history of every test run inside each NCDB +``.cdb`` file. Introduced in NCDB v2, this *binary test history* is separate +from the legacy UCIS history-node model and is designed for: + +* **Trend analysis** — identify flaky or consistently-failing tests over + hundreds or thousands of runs. +* **Regression detection** — spot when a test's pass rate drops using a + CUSUM change-point algorithm. +* **Coverage provenance** — trace exactly which test runs contributed to the + squashed coverage numbers. + +.. contents:: On this page + :local: + :depth: 2 + +----------- + +********************** +Quick-start +********************** + +Record test results with :meth:`~ucis.ncdb.ncdb_ucis.NcdbUCIS.add_test_run`:: + + from ucis.ncdb.ncdb_ucis import NcdbUCIS + from ucis.ncdb.ncdb_writer import NcdbWriter + from ucis.ncdb.constants import HIST_STATUS_OK, HIST_STATUS_FAIL + from ucis.mem.mem_ucis import MemUCIS + + # Create or open a .cdb + NcdbWriter().write(MemUCIS(), "coverage.cdb") # once, to initialise + db = NcdbUCIS("coverage.cdb") + + # Record runs + db.add_test_run("uart_smoke", seed="42", status=HIST_STATUS_OK, + ts=1700000000, has_coverage=True) + db.add_test_run("uart_smoke", seed="43", status=HIST_STATUS_FAIL, + ts=1700003600, has_coverage=False) + + # Save + NcdbWriter().write(db, "coverage.cdb") + +Query the results:: + + db2 = NcdbUCIS("coverage.cdb") + + # All runs for one test + history = db2.query_test_history("uart_smoke") + for rec in history: + print(rec.ts, rec.status) + + # Aggregate statistics + entry = db2.get_test_stats("uart_smoke") + print(f"total={entry.total_runs} pass={entry.pass_count} fail={entry.fail_count}") + + # Top-flaky across all tests + for entry in db2.top_flaky_tests(10): + print(entry.name_id, entry.flakiness_score) + +----------- + +********************** +API reference +********************** + +.. automethod:: ucis.ncdb.ncdb_ucis.NcdbUCIS.add_test_run +.. automethod:: ucis.ncdb.ncdb_ucis.NcdbUCIS.query_test_history +.. automethod:: ucis.ncdb.ncdb_ucis.NcdbUCIS.get_test_stats +.. automethod:: ucis.ncdb.ncdb_ucis.NcdbUCIS.top_flaky_tests +.. automethod:: ucis.ncdb.ncdb_ucis.NcdbUCIS.top_failing_tests +.. automethod:: ucis.ncdb.ncdb_ucis.NcdbUCIS.squash_coverage + +----------- + +********************** +Status and flag values +********************** + +Status constants (in :mod:`ucis.ncdb.constants`): + +.. list-table:: + :header-rows: 1 + :widths: 30 70 + + * - Constant + - Meaning + * - ``HIST_STATUS_OK`` + - Run passed + * - ``HIST_STATUS_FAIL`` + - Run failed + * - ``HIST_STATUS_ERROR`` + - Test infrastructure error (not a test-logic failure) + * - ``HIST_STATUS_TIMEOUT`` + - Run exceeded wall-clock budget + * - ``HIST_STATUS_SKIP`` + - Run was explicitly skipped + +Flag constants (combinable with ``|``): + +.. list-table:: + :header-rows: 1 + :widths: 30 70 + + * - Constant + - Meaning + * - ``HIST_FLAG_HAS_COV`` + - Run produced coverage data (counts.bin was updated) + * - ``HIST_FLAG_REGRESS`` + - Run is part of a regression sweep + * - ``HIST_FLAG_RERUN`` + - This is a re-run of a previously recorded test + +----------- + +********************** +Time-range queries +********************** + +:meth:`~ucis.ncdb.ncdb_ucis.NcdbUCIS.query_test_history` accepts optional +``ts_from`` and ``ts_to`` Unix-timestamp bounds:: + + import time + yesterday = int(time.time()) - 86400 + + # Only runs from the last 24 hours + recent = db.query_test_history("my_test", ts_from=yesterday) + +The call uses the bucket index to skip buckets whose time ranges do not +overlap, so queries over large history stores are fast even when only a small +window is requested. + +----------- + +********************** +Merging history +********************** + +History is merged automatically when two or more ``.cdb`` files are combined +with :class:`~ucis.ncdb.ncdb_merger.NcdbMerger`:: + + from ucis.ncdb.ncdb_merger import NcdbMerger + + NcdbMerger().merge(["run_a.cdb", "run_b.cdb"], "merged.cdb") + +The merger performs: + +1. **Registry union** — all test names and seed strings from all sources are + collected into a single merged registry, preserving insertion order. +2. **Stats merge** — per-test aggregate metrics (mean runtime, variance, pass + rate) are combined using Chan's parallel formula for numerically stable + Welford-style mean/variance. +3. **Bucket remap** — name_ids in each source's bucket files are remapped to + the merged registry before being written to the output. +4. **Contrib-index remap** — run_ids in the contribution index are offset by + the source's base run_id so merged run_ids remain globally unique. + +.. note:: + + Merging is idempotent: merging a file with itself produces the same + statistics as the original (though run counts will double). + +----------- + +********************** +Squash coverage +********************** + +Over time a ``.cdb`` accumulates contribution entries for every test run +that produced coverage. Squashing compresses these entries into the main +coverage counts and frees space:: + + db.squash_coverage(policy=POLICY_PASS_ONLY) + NcdbWriter().write(db, "coverage.cdb") + +The squash event is recorded in the squash log so that provenance is never +lost. The ``policy`` argument controls which runs are squashed: + +.. list-table:: + :header-rows: 1 + :widths: 30 70 + + * - Constant + - Behaviour + * - ``POLICY_PASS_ONLY`` + - Squash only runs with ``HIST_STATUS_OK`` + * - ``POLICY_ALL`` + - Squash all runs regardless of status + +----------- + +********************** +Binary format overview +********************** + +The v2 test history is stored as several members inside the NCDB ZIP archive. +A ``history_format`` key in ``manifest.json`` selects the version: + +* ``"v1"`` — legacy UCIS history-node model (no binary history) +* ``"v2"`` — binary test history (this section) + +Binary members added for v2: + +.. list-table:: + :header-rows: 1 + :widths: 35 65 + + * - ZIP member + - Contents + * - ``history/test_registry.bin`` + - Ordered list of test names and seed strings with stable integer IDs + * - ``history/test_stats.bin`` + - Per-test aggregate metrics (72 bytes/test) + * - ``history/bucket_index.bin`` + - Index of time-bucketed run-record files (28 bytes/entry) + * - ``history/NNNNNN.bin`` + - Individual run-record buckets (LZMA or DEFLATE compressed) + * - ``history/contrib_index.bin`` + - Per-run coverage-contribution entries + * - ``history/squash_log.bin`` + - Append-only log of squash events + +For the full binary layout see :ref:`ncdb-format-v2-history` in the format +reference. + +.. seealso:: + + * :ref:`ncdb-format` — Full NCDB binary format specification + * :doc:`merging` — How to merge ``.cdb`` files on the command line + * :doc:`analyzing` — Query and report coverage from the CLI diff --git a/doc/source/working-with-coverage/testplan.rst b/doc/source/working-with-coverage/testplan.rst new file mode 100644 index 0000000..ed20e9f --- /dev/null +++ b/doc/source/working-with-coverage/testplan.rst @@ -0,0 +1,259 @@ +.. _testplan: + +#################### +Testplan Integration +#################### + +PyUCIS can embed a structured *testplan* inside each NCDB ``.cdb`` file. +A testplan describes the verification tasks (testpoints) and functional +coverage groups expected for a design. Together with the binary test +history (see :ref:`test-history`) it enables: + +* **Closure reporting** — did every testpoint's tests actually pass? +* **Stage gate evaluation** — are all V1/V2/V3 testpoints closed? +* **Merge propagation** — the testplan travels with the database so + reports always use the correct plan. + +.. contents:: On this page + :local: + :depth: 2 + +----------- + +********************** +Quick-start +********************** + +Import an OpenTitan-style Hjson testplan and embed it in a ``.cdb``:: + + from ucis.ncdb.testplan_hjson import import_hjson + from ucis.ncdb.ncdb_ucis import NcdbUCIS + from ucis.ncdb.ncdb_writer import NcdbWriter + + plan = import_hjson("uart_testplan.hjson", + substitutions={"baud": ["9600", "115200"]}) + + db = NcdbUCIS("coverage.cdb") + db.setTestplan(plan) + NcdbWriter().write(db, "coverage.cdb") + +Compute closure against the embedded testplan:: + + from ucis.ncdb.testplan_closure import compute_closure, stage_gate_status + from ucis.ncdb.testplan import get_testplan + + db = NcdbUCIS("coverage.cdb") + plan = db.getTestplan() + results = compute_closure(plan, db) + + for r in results: + print(f"{r.testpoint.name:30s} {r.status.value}") + + gate = stage_gate_status(results, "V2", plan) + print(gate["message"]) + +----------- + +********************** +Testplan format +********************** + +A testplan is stored as ``testplan.json`` inside the NCDB ZIP and is also +exportable as a standalone JSON file. The schema is:: + + { + "format_version": 1, + "source_file": "path/to/uart.hjson", + "import_timestamp": "2025-01-01T00:00:00+00:00", + "testpoints": [ + { + "name": "uart_reset", + "stage": "V1", + "desc": "Verify reset behaviour", + "tests": ["uart_smoke", "uart_init_*"], + "tags": ["smoke"], + "na": false, + "source_template": "" + } + ], + "covergroups": [ + {"name": "cg_uart_reset", "desc": "Reset coverage"} + ] + } + +Stages follow the OpenTitan V1 → V2 → V2S → V3 hierarchy; custom strings +are also accepted and sort after V3 in gate evaluation. + +----------- + +********************** +Importing Hjson +********************** + +Use :func:`~ucis.ncdb.testplan_hjson.import_hjson` to parse an OpenTitan +``.hjson`` testplan (or a standard ``.json`` file):: + + plan = import_hjson( + "uart_testplan.hjson", + substitutions={ + "uart": ["uart0", "uart1"], + "mode": ["loopback", "normal"], + }, + ) + +The ``substitutions`` dict provides values for ``{key}`` placeholders in +test name templates. A list value generates the cartesian product of all +combinations:: + + # Template: "{uart}_{mode}_test" + # Substitutions: uart=["uart0","uart1"], mode=["loopback","normal"] + # Expands to: uart0_loopback_test, uart0_normal_test, + # uart1_loopback_test, uart1_normal_test + +Tests listed as ``["N/A"]`` are treated as intentionally unmapped +(``testpoint.na = True``). + +----------- + +********************** +Closure computation +********************** + +:func:`~ucis.ncdb.testplan_closure.compute_closure` evaluates each +testpoint against the test history stored in the database: + +.. list-table:: + :header-rows: 1 + :widths: 20 80 + + * - Status + - Meaning + * - ``CLOSED`` + - All mapped tests have at least one passing run + * - ``PARTIAL`` + - Some passing, some failing + * - ``FAILING`` + - All mapped tests failed + * - ``NOT_RUN`` + - None of the mapped tests appear in the database + * - ``N/A`` + - Testpoint has ``na = True`` + * - ``UNIMPLEMENTED`` + - Testpoint has an empty ``tests`` list + +Test name matching uses three strategies in order: + +1. **Exact** — the test name appears literally in ``testpoint.tests``. +2. **Seed-suffix strip** — trailing ``_\d+`` is removed and the result + matched exactly (e.g. ``uart_smoke_42`` → ``uart_smoke``). +3. **Wildcard** — any ``testpoint.tests`` entry ending in ``_*`` is used + as a prefix match. + +----------- + +********************** +Stage gate evaluation +********************** + +:func:`~ucis.ncdb.testplan_closure.stage_gate_status` determines whether +a regression is ready to advance to the next stage:: + + gate = stage_gate_status(results, "V2", plan) + if gate["passed"]: + print("Ready to tape-out!") + else: + for r in gate["blocking"]: + print(f" BLOCKING: {r.testpoint.name}") + +The gate passes when all testpoints at the target stage **and all stages +below it** (V1 < V2 < V2S < V3) are CLOSED or N/A. + +----------- + +********************** +Waivers +********************** + +Coverage and test failures can be suppressed with a +:class:`~ucis.ncdb.waivers.WaiverSet`:: + + from ucis.ncdb.waivers import Waiver, WaiverSet + + ws = WaiverSet([ + Waiver( + id="W-001", + scope_pattern="top/uart/**", + bin_pattern="reset_*", + rationale="Reset coverage deferred to V2", + approver="eng", + approved_at="2025-01-01T00:00:00", + expires_at="2026-01-01T00:00:00", + ) + ]) + + db.setWaivers(ws) + NcdbWriter().write(db, "coverage.cdb") + +Scope patterns use glob syntax: ``*`` matches a single path segment, +``**`` matches any number of segments. Expiry enforcement is the +caller's responsibility — use :meth:`~ucis.ncdb.waivers.WaiverSet.active_at` +to filter out expired waivers before passing to closure:: + + import time + now = time.strftime("%Y-%m-%dT%H:%M:%S") + active_waivers = ws.active_at(now) + +----------- + +********************** +Modes A and B +********************** + +**Mode A (embedded)** — testplan stored inside the ``.cdb``:: + + db.setTestplan(plan) + NcdbWriter().write(db, "coverage.cdb") + + # Read back — travels with the database + db2 = NcdbUCIS("coverage.cdb") + plan2 = db2.getTestplan() + +**Mode B (standalone)** — testplan kept as a separate file:: + + plan.save("uart_testplan_snapshot.json") + + # Load later and pass to analysis functions + plan = Testplan.load("uart_testplan_snapshot.json") + results = compute_closure(plan, db) + +Both modes produce the same :class:`~ucis.ncdb.testplan.Testplan` object. +The helper :func:`~ucis.ncdb.testplan.get_testplan` works with both:: + + from ucis.ncdb.testplan import get_testplan + plan = get_testplan(db) # works for NcdbUCIS or MemUCIS + +----------- + +********************** +API reference +********************** + +.. autofunction:: ucis.ncdb.testplan_hjson.import_hjson +.. autoclass:: ucis.ncdb.testplan.Testplan + :members: getTestpoint, testpointForTest, testpointsForStage, stages, + add_testpoint, serialize, from_bytes, load, save, stamp_import_time +.. autoclass:: ucis.ncdb.testplan.Testpoint +.. autoclass:: ucis.ncdb.testplan.CovergroupEntry +.. autoclass:: ucis.ncdb.testplan.RequirementLink +.. autofunction:: ucis.ncdb.testplan_closure.compute_closure +.. autofunction:: ucis.ncdb.testplan_closure.stage_gate_status +.. autoclass:: ucis.ncdb.testplan_closure.TPStatus +.. autoclass:: ucis.ncdb.testplan_closure.TestpointResult +.. autoclass:: ucis.ncdb.waivers.WaiverSet + :members: add, matches_scope, active_at, get, serialize, from_bytes, load, save +.. autoclass:: ucis.ncdb.waivers.Waiver + +.. seealso:: + + * :ref:`test-history` — Binary test history API + * :ref:`ncdb-format` — NCDB binary format specification diff --git a/src/ucis/__main__.py b/src/ucis/__main__.py index 64a3355..bf06819 100644 --- a/src/ucis/__main__.py +++ b/src/ucis/__main__.py @@ -7,6 +7,7 @@ from ucis.cmd import cmd_list_db_formats from ucis.cmd import cmd_list_report_formats from ucis.cmd import cmd_report, cmd_merge, cmd_convert, cmd_show +from ucis.cmd import cmd_history, cmd_testplan import sys import traceback import os @@ -294,7 +295,121 @@ def get_parser(): help="Specifies the format of the input database. Defaults to 'xml'") view.add_argument("db", help="Path to the coverage database") view.set_defaults(func=lambda args: _launch_tui(args)) - + + # ----------------------------------------------------------------------- + # history subcommand + # ----------------------------------------------------------------------- + history = subparser.add_parser( + "history", + help="Query and display test history from an NCDB .cdb file", + ) + history_sub = history.add_subparsers(dest="history_cmd") + history_sub.required = True + + history_query = history_sub.add_parser( + "query", + help="Display history records for a specific test", + ) + history_query.add_argument("db", help="Path to the NCDB .cdb file") + history_query.add_argument("test_name", help="Test name to query") + history_query.add_argument("--from", dest="from_", + metavar="DATE", default=None, + help="Start date (ISO 8601 or Unix timestamp)") + history_query.add_argument("--to", default=None, + metavar="DATE", + help="End date (ISO 8601 or Unix timestamp)") + history_query.add_argument("--out", "-o", default=None, + help="Output file (default: stdout)") + history_query.add_argument( + "--output-format", "-of", default="text", + choices=["text", "json"], + help="Output format (default: text)", + ) + history_query.set_defaults(func=cmd_history.cmd_history_query) + + history_stats = history_sub.add_parser( + "stats", + help="Show test statistics (flaky, failing, or named test)", + ) + history_stats.add_argument("db", help="Path to the NCDB .cdb file") + history_stats.add_argument("test_name", nargs="?", default=None, + help="Show stats for a specific test name") + history_stats.add_argument("--top-flaky", metavar="N", type=int, default=None, + help="Show top N flaky tests") + history_stats.add_argument("--top-failing", metavar="N", type=int, default=None, + help="Show top N failing tests") + history_stats.add_argument("--out", "-o", default=None, + help="Output file (default: stdout)") + history_stats.add_argument( + "--output-format", "-of", default="text", + choices=["text", "json"], + help="Output format (default: text)", + ) + history_stats.set_defaults(func=cmd_history.cmd_history_stats) + + # ----------------------------------------------------------------------- + # testplan subcommand + # ----------------------------------------------------------------------- + testplan = subparser.add_parser( + "testplan", + help="Manage and evaluate testplans embedded in NCDB .cdb files", + ) + testplan_sub = testplan.add_subparsers(dest="testplan_cmd") + testplan_sub.required = True + + testplan_import = testplan_sub.add_parser( + "import", + help="Import an Hjson/JSON testplan and embed it in a .cdb file", + ) + testplan_import.add_argument("db", help="Path to the NCDB .cdb file") + testplan_import.add_argument("hjson_path", + help="Path to the .hjson or .json testplan file") + testplan_import.add_argument( + "--subs", metavar="KEY=VAL", action="append", default=[], + help="Template substitution (repeatable): e.g. --subs uart=uart0", + ) + testplan_import.set_defaults(func=cmd_testplan.cmd_testplan_import) + + testplan_closure = testplan_sub.add_parser( + "closure", + help="Compute and display testpoint closure", + ) + testplan_closure.add_argument("db", help="Path to the NCDB .cdb file") + testplan_closure.add_argument("--testplan", default=None, + metavar="PATH", + help="External testplan JSON file (overrides embedded)") + testplan_closure.add_argument("--waivers", default=None, + metavar="PATH", + help="External waivers JSON file (overrides embedded)") + testplan_closure.add_argument("--stage", default=None, + metavar="STAGE", + help="Evaluate a stage gate (e.g. V2)") + testplan_closure.add_argument("--out", "-o", default=None, + help="Output file (default: stdout)") + testplan_closure.add_argument( + "--output-format", "-of", default="text", + choices=["text", "json"], + help="Output format (default: text)", + ) + testplan_closure.set_defaults(func=cmd_testplan.cmd_testplan_closure) + + testplan_export_junit = testplan_sub.add_parser( + "export-junit", + help="Export testpoint closure results as JUnit XML", + ) + testplan_export_junit.add_argument("db", help="Path to the NCDB .cdb file") + testplan_export_junit.add_argument("--testplan", default=None, + metavar="PATH", + help="External testplan JSON file (overrides embedded)") + testplan_export_junit.add_argument("--out", "-o", default=None, + help="Output XML file (default: closure_results.xml)") + testplan_export_junit.add_argument("--suite-name", default=None, + metavar="NAME", + help="JUnit testsuite name attribute") + testplan_export_junit.set_defaults( + func=cmd_testplan.cmd_testplan_export_junit + ) + return parser def _launch_tui(args): diff --git a/src/ucis/cmd/cmd_history.py b/src/ucis/cmd/cmd_history.py new file mode 100644 index 0000000..d4af37e --- /dev/null +++ b/src/ucis/cmd/cmd_history.py @@ -0,0 +1,140 @@ +"""``pyucis history`` CLI subcommands. + +Subcommands +----------- +query Display history records for a specific test name. +stats Show aggregate statistics (top-failing, top-flaky, or named test). +""" + +from __future__ import annotations + +import json +import sys +import time + + +def _open_ncdb(path: str): + from ucis.ncdb.ncdb_ucis import NcdbUCIS + return NcdbUCIS(path) + + +def _ts(ts_arg: str) -> int: + """Parse an ISO date string or integer unix timestamp.""" + if ts_arg is None: + return None + try: + return int(ts_arg) + except ValueError: + import datetime + dt = datetime.datetime.fromisoformat(ts_arg) + return int(dt.timestamp()) + + +# --------------------------------------------------------------------------- +# history query +# --------------------------------------------------------------------------- + +def cmd_history_query(args) -> None: + """Execute ``pyucis history query``.""" + db = _open_ncdb(args.db) + ts_from = _ts(getattr(args, "from_", None)) + ts_to = _ts(getattr(args, "to", None)) + + records = db.query_test_history(args.test_name, ts_from=ts_from, ts_to=ts_to) + + fmt = getattr(args, "output_format", "text") + out = open(args.out, "w") if getattr(args, "out", None) else sys.stdout + + try: + if fmt == "json": + data = [ + { + "ts": r.ts, + "date": time.strftime("%Y-%m-%d %H:%M:%S", time.gmtime(r.ts)), + "status": "pass" if r.status == 0 else "fail", + "seed_id": r.seed_id, + } + for r in records + ] + out.write(json.dumps(data, indent=2) + "\n") + else: + out.write( + f"{'Date':<20} {'Status':<8} {'Seed':>12}\n" + ) + out.write("-" * 42 + "\n") + for r in records: + date = time.strftime("%Y-%m-%d %H:%M:%S", time.gmtime(r.ts)) + status = "pass" if r.status == 0 else "fail" + out.write(f"{date:<20} {status:<8} {r.seed_id:>12}\n") + out.write(f"\nTotal records: {len(records)}\n") + finally: + if out is not sys.stdout: + out.close() + + +# --------------------------------------------------------------------------- +# history stats +# --------------------------------------------------------------------------- + +def cmd_history_stats(args) -> None: + """Execute ``pyucis history stats``.""" + db = _open_ncdb(args.db) + fmt = getattr(args, "output_format", "text") + out = open(args.out, "w") if getattr(args, "out", None) else sys.stdout + + try: + top_flaky = getattr(args, "top_flaky", None) + top_failing = getattr(args, "top_failing", None) + test_name = getattr(args, "test_name", None) + + if test_name: + stats = db.get_test_stats(test_name) + if stats is None: + out.write(f"No stats found for test '{test_name}'\n") + return + if fmt == "json": + d = { + "name": test_name, + "total_runs": stats.total_runs, + "pass_count": stats.pass_count, + "fail_count": stats.fail_count, + "flake_score": stats.flake_score, + "mean_cpu_time": stats.mean_cpu_time, + "grade_score": stats.grade_score, + "last_status": stats.last_status, + } + out.write(json.dumps(d, indent=2) + "\n") + else: + out.write(f"Test: {test_name}\n") + out.write(f" Total runs: {stats.total_runs}\n") + out.write(f" Pass: {stats.pass_count}\n") + out.write(f" Fail: {stats.fail_count}\n") + out.write(f" Flake score: {stats.flake_score:.3f}\n") + out.write(f" Mean CPU: {stats.mean_cpu_time:.2f}s\n") + out.write(f" Grade score: {stats.grade_score:.3f}\n") + return + + rows = [] + if top_flaky: + rows = db.top_flaky_tests(n=top_flaky) + title = f"Top {top_flaky} flaky tests" + elif top_failing: + rows = db.top_failing_tests(n=top_failing) + title = f"Top {top_failing} failing tests" + else: + rows = db.top_flaky_tests(n=20) + title = "Top 20 flaky tests" + + if fmt == "json": + out.write(json.dumps(rows, indent=2) + "\n") + else: + out.write(f"{title}\n") + out.write("-" * 60 + "\n") + col = max((len(r[0]) for r in rows), default=10) + 2 if rows else 30 + out.write(f"{'Test':<{col}} {'Score':>8} {'Pass':>7} {'Fail':>7}\n") + out.write("-" * (col + 26) + "\n") + for name, score, pc, fc in rows: + out.write(f"{name:<{col}} {score:>8.3f} {pc:>7} {fc:>7}\n") + finally: + if out is not sys.stdout: + out.close() diff --git a/src/ucis/cmd/cmd_testplan.py b/src/ucis/cmd/cmd_testplan.py new file mode 100644 index 0000000..890c319 --- /dev/null +++ b/src/ucis/cmd/cmd_testplan.py @@ -0,0 +1,153 @@ +"""``pyucis testplan`` CLI subcommands. + +Subcommands +----------- +import Import an Hjson/JSON testplan and embed it in a .cdb file. +closure Compute testpoint closure and display a report. +export-junit Export closure results as JUnit XML. +""" + +from __future__ import annotations + +import json +import sys + + +def _open_ncdb(path: str): + from ucis.ncdb.ncdb_ucis import NcdbUCIS + return NcdbUCIS(path) + + +# --------------------------------------------------------------------------- +# testplan import +# --------------------------------------------------------------------------- + +def cmd_testplan_import(args) -> None: + """Execute ``pyucis testplan import``.""" + from ucis.ncdb.testplan_hjson import import_hjson + from ucis.ncdb.ncdb_writer import NcdbWriter + import os, tempfile + + # Parse substitutions: "key=val" pairs + subs: dict = {} + for s in getattr(args, "subs", []) or []: + if "=" in s: + k, _, v = s.partition("=") + existing = subs.get(k) + if existing is None: + subs[k] = [v] + else: + existing.append(v) + + plan = import_hjson(args.hjson_path, substitutions=subs if subs else None) + db = _open_ncdb(args.db) + db.setTestplan(plan) + + # Write to a temp file then replace + tmp = args.db + ".tmp" + NcdbWriter().write(db, tmp) + os.replace(tmp, args.db) + + print( + f"Imported testplan from '{args.hjson_path}': " + f"{len(plan.testpoints)} testpoints, " + f"{len(plan.covergroups)} covergroups" + ) + + +# --------------------------------------------------------------------------- +# testplan closure +# --------------------------------------------------------------------------- + +def cmd_testplan_closure(args) -> None: + """Execute ``pyucis testplan closure``.""" + from ucis.ncdb.testplan import get_testplan, Testplan + from ucis.ncdb.testplan_closure import compute_closure + from ucis.ncdb.waivers import WaiverSet + from ucis.ncdb.reports import ( + report_testpoint_closure, + format_testpoint_closure, + report_stage_gate, + format_stage_gate, + ) + + db = _open_ncdb(args.db) + + # Load testplan + testplan_path = getattr(args, "testplan", None) + if testplan_path: + plan = Testplan.load(testplan_path) + else: + plan = get_testplan(db) + + if plan is None: + print( + "Error: no testplan found. Embed one with " + "'pyucis testplan import' or supply --testplan.", + file=sys.stderr, + ) + sys.exit(1) + + # Load waivers + waivers = None + waivers_path = getattr(args, "waivers", None) + if waivers_path: + from ucis.ncdb.waivers import WaiverSet + waivers = WaiverSet.load(waivers_path) + elif hasattr(db, "getWaivers"): + waivers = db.getWaivers() + + results = compute_closure(plan, db, waivers=waivers) + + fmt = getattr(args, "output_format", "text") + out = open(args.out, "w") if getattr(args, "out", None) else sys.stdout + + try: + if fmt == "json": + summary = report_testpoint_closure(results) + out.write(summary.to_json() + "\n") + else: + summary = report_testpoint_closure(results) + out.write(format_testpoint_closure(summary) + "\n") + + # Stage gate (if requested) + stage = getattr(args, "stage", None) + if stage: + gate = report_stage_gate(results, stage, plan) + out.write("\n" + format_stage_gate(gate) + "\n") + finally: + if out is not sys.stdout: + out.close() + + +# --------------------------------------------------------------------------- +# testplan export-junit +# --------------------------------------------------------------------------- + +def cmd_testplan_export_junit(args) -> None: + """Execute ``pyucis testplan export-junit``.""" + from ucis.ncdb.testplan import get_testplan, Testplan + from ucis.ncdb.testplan_closure import compute_closure + from ucis.ncdb.testplan_export import export_junit_xml + + db = _open_ncdb(args.db) + + testplan_path = getattr(args, "testplan", None) + if testplan_path: + plan = Testplan.load(testplan_path) + else: + plan = get_testplan(db) + + if plan is None: + print( + "Error: no testplan found. Embed one with " + "'pyucis testplan import' or supply --testplan.", + file=sys.stderr, + ) + sys.exit(1) + + results = compute_closure(plan, db) + output_path = getattr(args, "out", None) or "closure_results.xml" + suite_name = getattr(args, "suite_name", None) or "testplan_closure" + export_junit_xml(results, output_path, suite_name=suite_name) + print(f"JUnit XML written to '{output_path}'") diff --git a/src/ucis/ncdb/bucket_index.py b/src/ucis/ncdb/bucket_index.py new file mode 100644 index 0000000..43e05ed --- /dev/null +++ b/src/ucis/ncdb/bucket_index.py @@ -0,0 +1,171 @@ +""" +history/bucket_index.bin — index mapping bucket sequence numbers to date +ranges and aggregate counts. + +This 24-byte-per-entry index allows regression trend queries and targeted +bucket reads without opening individual bucket files. + +Binary layout (little-endian):: + + magic u32 0x42494458 ('BIDX') + version u8 1 + num_buckets u32 + + entries[num_buckets]: sorted by bucket_seq + bucket_seq u32 + ts_start u32 unix timestamp of first record in bucket + ts_end u32 unix timestamp of last record in bucket + num_records u32 + fail_count u32 enables pass-rate trend without opening bucket + min_name_id u32 + max_name_id u32 + +24 bytes per entry. 3650 entries (10 years) ≈ 87 KB. +""" + +from __future__ import annotations + +import struct +from dataclasses import dataclass +from typing import List, Optional, Tuple + +MAGIC = 0x42494458 # 'BIDX' +VERSION = 1 + +_HDR = struct.Struct(" float: + if self.num_records == 0: + return 1.0 + return (self.num_records - self.fail_count) / self.num_records + + +class BucketIndex: + """In-memory representation of ``history/bucket_index.bin``. + + Example:: + + idx = BucketIndex() + idx.add_bucket(seq=0, ts_start=1700000000, ts_end=1700086399, + num_records=5000, fail_count=12, + min_name_id=0, max_name_id=99) + data = idx.serialize() + idx2 = BucketIndex.deserialize(data) + """ + + def __init__(self) -> None: + self._entries: List[BucketIndexEntry] = [] + + def add_bucket(self, seq: int, ts_start: int, ts_end: int, + num_records: int, fail_count: int, + min_name_id: int, max_name_id: int) -> None: + """Add or update the index entry for bucket *seq*. + + Entries are kept sorted by *seq*. + """ + entry = BucketIndexEntry( + bucket_seq=seq, ts_start=ts_start, ts_end=ts_end, + num_records=num_records, fail_count=fail_count, + min_name_id=min_name_id, max_name_id=max_name_id, + ) + # Replace existing or insert in sorted order + for i, e in enumerate(self._entries): + if e.bucket_seq == seq: + self._entries[i] = entry + return + if e.bucket_seq > seq: + self._entries.insert(i, entry) + return + self._entries.append(entry) + + def buckets_in_range(self, ts_from: int, ts_to: int) -> List[BucketIndexEntry]: + """Return entries whose time range overlaps [ts_from, ts_to].""" + return [e for e in self._entries + if e.ts_end >= ts_from and e.ts_start <= ts_to] + + def buckets_for_name(self, name_id: int, + ts_from: Optional[int] = None, + ts_to: Optional[int] = None) -> List[BucketIndexEntry]: + """Return entries that may contain records for *name_id*. + + Filters by ``min_name_id ≤ name_id ≤ max_name_id`` and optionally + by time range. + """ + results = [] + for e in self._entries: + if e.min_name_id > name_id or e.max_name_id < name_id: + continue + if ts_from is not None and e.ts_end < ts_from: + continue + if ts_to is not None and e.ts_start > ts_to: + continue + results.append(e) + return results + + def pass_rate_series(self) -> List[Tuple[int, float]]: + """Return ``(ts_start, pass_rate)`` pairs for all buckets in order.""" + return [(e.ts_start, e.pass_rate) for e in self._entries] + + @property + def num_buckets(self) -> int: + return len(self._entries) + + def next_seq(self) -> int: + """Return the sequence number for the next new bucket.""" + if not self._entries: + return 0 + return self._entries[-1].bucket_seq + 1 + + # ── serialization ───────────────────────────────────────────────────── + + def serialize(self) -> bytes: + """Encode the index to bytes for storage in the ZIP archive.""" + header = _HDR.pack(MAGIC, VERSION, len(self._entries)) + rows = b"" + for e in self._entries: + rows += _ENTRY.pack(e.bucket_seq, e.ts_start, e.ts_end, + e.num_records, e.fail_count, + e.min_name_id, e.max_name_id) + return header + rows + + @classmethod + def deserialize(cls, data: bytes) -> "BucketIndex": + """Reconstruct a BucketIndex from raw bytes. + + Raises: + ValueError: if magic or version is wrong. + """ + magic, version, num_buckets = _HDR.unpack_from(data, 0) + if magic != MAGIC: + raise ValueError(f"Bad magic 0x{magic:08X}, expected 0x{MAGIC:08X}") + if version != VERSION: + raise ValueError(f"Unsupported bucket_index version {version}") + + idx = cls() + offset = _HDR.size + for _ in range(num_buckets): + fields = _ENTRY.unpack_from(data, offset) + offset += _ENTRY.size + idx._entries.append(BucketIndexEntry( + bucket_seq=fields[0], ts_start=fields[1], ts_end=fields[2], + num_records=fields[3], fail_count=fields[4], + min_name_id=fields[5], max_name_id=fields[6], + )) + return idx diff --git a/src/ucis/ncdb/constants.py b/src/ucis/ncdb/constants.py index 16289ab..795dd30 100644 --- a/src/ucis/ncdb/constants.py +++ b/src/ucis/ncdb/constants.py @@ -11,9 +11,13 @@ # ── Format identity ──────────────────────────────────────────────────────── NCDB_FORMAT = "NCDB" -NCDB_VERSION = "1.0" +NCDB_VERSION = "2.0" NCDB_GENERATOR = "pyucis-ncdb" +# History format versions stored in manifest.json +HISTORY_FORMAT_V1 = "v1" # legacy: history.json only +HISTORY_FORMAT_V2 = "v2" # binary bucket store + history.json for MERGE nodes + # ── CDB file header magic ────────────────────────────────────────────────── SQLITE_MAGIC = b"SQLite format 3\x00" # 16 bytes @@ -37,6 +41,36 @@ MEMBER_PROPERTIES = "properties.json" MEMBER_CONTRIB_DIR = "contrib/" +# ── v2 history store ZIP member names ───────────────────────────────────── + +MEMBER_TEST_REGISTRY = "test_registry.bin" +MEMBER_TEST_STATS = "test_stats.bin" +MEMBER_BUCKET_INDEX = "history/bucket_index.bin" +MEMBER_CONTRIB_INDEX = "contrib_index.bin" +MEMBER_SQUASH_LOG = "squash_log.bin" +MEMBER_TESTPLAN = "testplan.json" +MEMBER_WAIVERS = "waivers.json" + +# ── v2 history bucket directory prefix ──────────────────────────────────── + +HISTORY_BUCKET_DIR = "history/" +HISTORY_BUCKET_MAX_RECORDS = 10_000 + +# ── v2 test-run status codes (stored in status_flags nibble) ────────────── + +HIST_STATUS_OK = 0 +HIST_STATUS_FAIL = 1 +HIST_STATUS_ERROR = 2 +HIST_STATUS_FATAL = 3 +HIST_STATUS_COMPILE = 4 + +# ── v2 test-run flag bits (low nibble of status_flags byte) ─────────────── + +HIST_FLAG_SEED_IS_HASH = 0x01 +HIST_FLAG_IS_RERUN = 0x02 +HIST_FLAG_HAS_COVERAGE = 0x04 +HIST_FLAG_WAS_SQUASHED = 0x08 + # ── V2 scope_tree.bin encoding markers ──────────────────────────────────── SCOPE_MARKER_REGULAR = 0x00 diff --git a/src/ucis/ncdb/contrib_index.py b/src/ucis/ncdb/contrib_index.py new file mode 100644 index 0000000..45caa18 --- /dev/null +++ b/src/ucis/ncdb/contrib_index.py @@ -0,0 +1,178 @@ +""" +contrib_index.bin — pass-only merge support index. + +Every test run that produced coverage data has an 8-byte entry here. Status +is cached so merge decisions require no bucket scanning. + +Binary layout (little-endian):: + + magic u32 0x43494458 ('CIDX') + version u8 1 + merge_policy u8 0=all 1=pass_only 2=exclude_error_and_rerun 3=strict + squash_watermark u32 highest run_id already baked into counts.bin + num_active u32 number of entries (not yet squashed) + + entries[num_active]: sorted by run_id + run_id u32 + name_id u16 + status u8 + flags u8 bit0=is_rerun bit1=first_attempt_passed + +8 bytes per entry. +""" + +from __future__ import annotations + +import struct +from dataclasses import dataclass +from typing import List + +from ucis.ncdb.constants import ( + HIST_STATUS_OK, + HIST_FLAG_IS_RERUN, +) + +MAGIC = 0x43494458 # 'CIDX' +VERSION = 1 + +# Merge policies +POLICY_ALL = 0 +POLICY_PASS_ONLY = 1 +POLICY_EXCLUDE_ERROR_RERUN = 2 +POLICY_STRICT = 3 # exclude coverage from tests that only pass on retry + +# contrib_index entry flags +FLAG_IS_RERUN = 0x01 +FLAG_FIRST_ATTEMPT_PASSED = 0x02 + +_HDR = struct.Struct(" bool: + return bool(self.flags & FLAG_IS_RERUN) + + @property + def first_attempt_passed(self) -> bool: + return bool(self.flags & FLAG_FIRST_ATTEMPT_PASSED) + + +class ContribIndex: + """In-memory representation of ``contrib_index.bin``. + + Example:: + + ci = ContribIndex() + ci.add_entry(run_id=0, name_id=0, status=HIST_STATUS_OK, flags=0) + ci.add_entry(run_id=1, name_id=1, status=HIST_STATUS_FAIL, flags=0) + passing = ci.passing_run_ids(policy=POLICY_PASS_ONLY) # [0] + """ + + def __init__(self, merge_policy: int = POLICY_PASS_ONLY, + squash_watermark: int = 0) -> None: + self.merge_policy = merge_policy + self.squash_watermark = squash_watermark + self._entries: List[ContribIndexEntry] = [] + + def add_entry(self, run_id: int, name_id: int, + status: int, flags: int) -> None: + """Append a new contrib entry. Entries are kept sorted by run_id.""" + entry = ContribIndexEntry(run_id=run_id, name_id=name_id, + status=status, flags=flags) + # Append in order (run_ids are monotonically increasing in normal use) + if self._entries and self._entries[-1].run_id >= run_id: + # Insert in sorted position if out of order (e.g. after merge) + for i, e in enumerate(self._entries): + if e.run_id > run_id: + self._entries.insert(i, entry) + return + self._entries.append(entry) + + def passing_run_ids(self, policy: int = POLICY_PASS_ONLY) -> List[int]: + """Return run_ids that pass the given merge policy filter. + + Policies: + POLICY_ALL — all entries + POLICY_PASS_ONLY — status == OK + POLICY_EXCLUDE_ERROR_RERUN — status == OK + POLICY_STRICT — status == OK and not (is_rerun and not first_attempt_passed) + """ + result = [] + for e in self._entries: + if policy == POLICY_ALL: + result.append(e.run_id) + elif policy == POLICY_PASS_ONLY: + if e.status == HIST_STATUS_OK: + result.append(e.run_id) + elif policy == POLICY_EXCLUDE_ERROR_RERUN: + if e.status == HIST_STATUS_OK: + result.append(e.run_id) + elif policy == POLICY_STRICT: + if e.status == HIST_STATUS_OK: + # Exclude coverage from tests that only ever pass on retry + if e.is_rerun and not e.first_attempt_passed: + continue + result.append(e.run_id) + return result + + def set_squash_watermark(self, run_id: int) -> None: + """Advance the squash watermark to *run_id*.""" + self.squash_watermark = run_id + + def remove_entries_up_to(self, run_id: int) -> None: + """Remove all entries with run_id ≤ *run_id* (called after squash).""" + self._entries = [e for e in self._entries if e.run_id > run_id] + + def max_run_id(self) -> int: + """Return the highest run_id in active entries, or squash_watermark.""" + if self._entries: + return max(e.run_id for e in self._entries) + return self.squash_watermark + + @property + def num_active(self) -> int: + return len(self._entries) + + # ── serialization ───────────────────────────────────────────────────── + + def serialize(self) -> bytes: + """Encode the index to bytes for storage in the ZIP archive.""" + header = _HDR.pack(MAGIC, VERSION, self.merge_policy, + self.squash_watermark, len(self._entries)) + rows = b"" + for e in self._entries: + rows += _ENTRY.pack(e.run_id, e.name_id, e.status, e.flags) + return header + rows + + @classmethod + def deserialize(cls, data: bytes) -> "ContribIndex": + """Reconstruct a ContribIndex from raw bytes. + + Raises: + ValueError: if magic or version is wrong. + """ + magic, version, merge_policy, squash_watermark, num_active = \ + _HDR.unpack_from(data, 0) + if magic != MAGIC: + raise ValueError(f"Bad magic 0x{magic:08X}, expected 0x{MAGIC:08X}") + if version != VERSION: + raise ValueError(f"Unsupported contrib_index version {version}") + + ci = cls(merge_policy=merge_policy, squash_watermark=squash_watermark) + offset = _HDR.size + for _ in range(num_active): + run_id, name_id, status, flags = _ENTRY.unpack_from(data, offset) + offset += _ENTRY.size + ci._entries.append(ContribIndexEntry( + run_id=run_id, name_id=name_id, status=status, flags=flags + )) + return ci diff --git a/src/ucis/ncdb/history_buckets.py b/src/ucis/ncdb/history_buckets.py new file mode 100644 index 0000000..207be13 --- /dev/null +++ b/src/ucis/ncdb/history_buckets.py @@ -0,0 +1,328 @@ +""" +history/NNNNNN.bin — columnar bounded bucket files for test-run records. + +Each bucket stores up to HISTORY_BUCKET_MAX_RECORDS test-run records in a +columnar layout optimised for DEFLATE/LZMA compression. Records within a +bucket are sorted by (name_id, ts). + +Binary layout (little-endian, stored compressed inside the ZIP):: + + Header: + magic u32 0x48445942 ('HDYB') + version u8 1 + num_records u32 + num_names u16 unique name_ids in this bucket + ts_base u32 unix timestamp of first record + + Name index (num_names entries, sorted by name_id): + name_id u32 + start_row u32 first record index for this name + count u16 number of records for this name + + Seed dictionary (local — enables 1-byte seed references): + num_seeds u16 + seed_ids u32[num_seeds] global seed_ids from test_registry + + Columns (independent arrays — each compresses optimally): + seeds[] u8[num_records] index into local seed dictionary + ts_deltas[] varint[num_records] seconds since ts_base, delta per name group + status_flags[] u8[num_records] nibble-packed (high=status, low=flags) + +Status nibble values: 0=OK 1=FAIL 2=ERROR 3=FATAL 4=COMPILE +Flag bits: bit0=seed_is_hash bit1=is_rerun bit2=has_coverage bit3=was_squashed +""" + +from __future__ import annotations + +import struct +import zipfile +from dataclasses import dataclass, field +from typing import Dict, Iterable, List, Optional, Tuple + +from ucis.ncdb.constants import ( + HISTORY_BUCKET_MAX_RECORDS, + HIST_STATUS_OK, +) +from ucis.ncdb.varint import decode_varints, encode_varints + +MAGIC = 0x48445942 # 'HDYB' +VERSION = 1 + +# Bucket header: magic(4) version(1) num_records(4) num_names(2) pad(1) ts_base(4) = 16 bytes +_BUCKET_HDR = struct.Struct(" None: + self._records: List[BucketRecord] = [] + # local seed dict: global seed_id → local index (u8, max 255 seeds/bucket) + self._seed_local: Dict[int, int] = {} + self._seed_ids: List[int] = [] # local_idx → global seed_id + + def add(self, name_id: int, seed_id: int, ts: int, + status: int, flags: int) -> None: + """Append one test-run record. + + Args: + name_id: Integer name_id from TestRegistry. + seed_id: Integer seed_id from TestRegistry. + ts: Unix timestamp. + status: HIST_STATUS_* constant. + flags: Combination of HIST_FLAG_* bits. + """ + if seed_id not in self._seed_local: + idx = len(self._seed_ids) + if idx >= 255: + raise OverflowError("Bucket seed dictionary full (255 entries max)") + self._seed_local[seed_id] = idx + self._seed_ids.append(seed_id) + self._records.append(BucketRecord(name_id=name_id, seed_id=seed_id, + ts=ts, status=status, flags=flags)) + + @property + def num_records(self) -> int: + return len(self._records) + + def is_full(self) -> bool: + return len(self._records) >= HISTORY_BUCKET_MAX_RECORDS + + def seal(self, use_lzma: bool = True) -> bytes: + """Serialise and compress the bucket. + + Args: + use_lzma: If True, attempt LZMA compression; fall back to + DEFLATE level 9 if liblzma is unavailable. + + Returns: + Compressed bytes ready to store as a ZIP member. + """ + raw = self._encode() + return _compress(raw, high_quality=True, use_lzma=use_lzma) + + def seal_fast(self) -> bytes: + """Serialise with fast (DEFLATE level 1) compression for the current-day bucket.""" + raw = self._encode() + return _compress(raw, high_quality=False, use_lzma=False) + + def _encode(self) -> bytes: + # Sort records by (name_id, ts) + records = sorted(self._records, key=lambda r: (r.name_id, r.ts)) + if not records: + ts_base = 0 + else: + ts_base = records[0].ts + + # Build name index + name_groups: Dict[int, List[int]] = {} # name_id → list of row indices + for i, r in enumerate(records): + name_groups.setdefault(r.name_id, []).append(i) + + sorted_names = sorted(name_groups.keys()) + name_index_entries: List[Tuple[int, int, int]] = [] + start_row = 0 + for nid in sorted_names: + cnt = len(name_groups[nid]) + name_index_entries.append((nid, start_row, cnt)) + start_row += cnt + + num_names = len(sorted_names) + num_records = len(records) + + # Columns + seed_col = bytearray() + ts_delta_values = [] + status_flags_col = bytearray() + + prev_ts_per_name: Dict[int, int] = {} + for r in records: + seed_col.append(self._seed_local[r.seed_id]) + prev_ts = prev_ts_per_name.get(r.name_id, ts_base) + delta = r.ts - prev_ts + ts_delta_values.append(delta) + prev_ts_per_name[r.name_id] = r.ts + sf = ((r.status & 0x0F) << 4) | (r.flags & 0x0F) + status_flags_col.append(sf) + + ts_delta_col = encode_varints(ts_delta_values) + + # Header: 16 bytes + header = _BUCKET_HDR.pack(MAGIC, VERSION, num_records, num_names, ts_base) + + # Name index: 12 bytes each + name_idx_bytes = b"" + for nid, sr, cnt in name_index_entries: + name_idx_bytes += _BUCKET_NAME.pack(nid, sr, cnt) + + # Seed dict + num_seeds = len(self._seed_ids) + seed_dict = struct.pack(" None: + raw = _decompress(data) + self._parse(raw) + + def _parse(self, raw: bytes) -> None: + magic, version, num_records, num_names, ts_base = _BUCKET_HDR.unpack_from(raw, 0) + if magic != MAGIC: + raise ValueError(f"Bad bucket magic 0x{magic:08X}") + if version != VERSION: + raise ValueError(f"Unsupported bucket version {version}") + + self._num_records = num_records + self._ts_base = ts_base + + offset = _BUCKET_HDR.size + + # Name index + self._name_index: List[Tuple[int, int, int]] = [] + for _ in range(num_names): + nid, sr, cnt = _BUCKET_NAME.unpack_from(raw, offset) + self._name_index.append((nid, sr, cnt)) + offset += _BUCKET_NAME.size + + # Seed dict + num_seeds, = struct.unpack_from("> 4) & 0x0F + flags = sf & 0x0F + + self._records.append(BucketRecord( + name_id=nid, seed_id=seed_id, ts=ts, status=status, flags=flags + )) + + def records_for_name(self, name_id: int) -> List[BucketRecord]: + """Return all records for *name_id* via binary search on the name index. + + Returns: + List of BucketRecord (may be empty if name_id not in this bucket). + """ + # Binary search on sorted name index + lo, hi = 0, len(self._name_index) + while lo < hi: + mid = (lo + hi) // 2 + if self._name_index[mid][0] < name_id: + lo = mid + 1 + else: + hi = mid + if lo >= len(self._name_index) or self._name_index[lo][0] != name_id: + return [] + _, start_row, count = self._name_index[lo] + return self._records[start_row: start_row + count] + + def all_records(self) -> Iterable[BucketRecord]: + """Iterate over all records in row order.""" + return iter(self._records) + + @property + def num_records(self) -> int: + return self._num_records + + +# ── compression helpers ─────────────────────────────────────────────────── + +def _compress(data: bytes, high_quality: bool, use_lzma: bool) -> bytes: + """Compress *data* using the best available method. + + For the current-day (mutable) bucket: DEFLATE level 1 (fast). + For sealed buckets: LZMA if available, else DEFLATE level 9. + """ + import io + import zlib + + if not high_quality: + return zlib.compress(data, level=1) + + if use_lzma: + try: + import lzma + return lzma.compress(data, format=lzma.FORMAT_XZ) + except (ImportError, lzma.LZMAError): + pass + + return zlib.compress(data, level=9) + + +def _decompress(data: bytes) -> bytes: + """Decompress *data*, auto-detecting LZMA vs DEFLATE.""" + import zlib + + # LZMA/XZ magic: 0xFD 0x37 0x7A 0x58 0x5A 0x00 + if data[:6] == b"\xfd7zXZ\x00": + try: + import lzma + return lzma.decompress(data, format=lzma.FORMAT_XZ) + except ImportError: + raise RuntimeError("lzma module not available; cannot decompress sealed bucket") + + return zlib.decompress(data) diff --git a/src/ucis/ncdb/manifest.py b/src/ucis/ncdb/manifest.py index 67065c9..eee3722 100644 --- a/src/ucis/ncdb/manifest.py +++ b/src/ucis/ncdb/manifest.py @@ -11,7 +11,7 @@ from datetime import datetime, timezone from typing import Optional -from .constants import NCDB_FORMAT, NCDB_VERSION, NCDB_GENERATOR +from .constants import NCDB_FORMAT, NCDB_VERSION, NCDB_GENERATOR, HISTORY_FORMAT_V1 @dataclass @@ -28,6 +28,7 @@ class Manifest: covered_bins: int = 0 schema_hash: str = "" generator: str = NCDB_GENERATOR + history_format: str = HISTORY_FORMAT_V1 # "v1" (JSON) or "v2" (binary + JSON) def serialize(self) -> bytes: d = asdict(self) diff --git a/src/ucis/ncdb/ncdb_merger.py b/src/ucis/ncdb/ncdb_merger.py index a322a17..4dc8a2e 100644 --- a/src/ucis/ncdb/ncdb_merger.py +++ b/src/ucis/ncdb/ncdb_merger.py @@ -15,13 +15,22 @@ History nodes from all sources are accumulated in the output. A new MERGE HistoryNode is appended to record the operation. + +v2 binary history (if present in any source) is merged correctly: + - TestRegistry names/seeds are unioned; stable name_id remaps are computed + - TestStatsTable counters are summed and derived scores recomputed + - Bucket files are decoded, name_ids remapped, re-encoded and sealed + - BucketIndex is rebuilt; run_ids are offset to keep them disjoint + - ContribIndex entries are remapped and concatenated + - SquashLog entries are concatenated (no run_id adjustment needed) """ import zipfile import json import struct +import math from datetime import datetime, timezone -from typing import List +from typing import Dict, List, Optional, Tuple from .ncdb_reader import NcdbReader from .ncdb_writer import NcdbWriter @@ -31,6 +40,11 @@ from .constants import ( MEMBER_MANIFEST, MEMBER_STRINGS, MEMBER_SCOPE_TREE, MEMBER_COUNTS, MEMBER_HISTORY, MEMBER_SOURCES, + MEMBER_TEST_REGISTRY, MEMBER_TEST_STATS, + MEMBER_BUCKET_INDEX, MEMBER_CONTRIB_INDEX, MEMBER_SQUASH_LOG, + HISTORY_BUCKET_DIR, HISTORY_FORMAT_V2, + HIST_STATUS_OK, HIST_STATUS_FAIL, + MEMBER_TESTPLAN, MEMBER_WAIVERS, ) from ucis.ncdb._accel import add_uint32_arrays as _add_arrays, HAS_ACCEL as _HAS_ACCEL @@ -89,6 +103,11 @@ def _merge_same_schema(self, sources, manifests, target): # Build new manifest using first source's schema data first_manifest = manifests[0] + + # Determine if any source has v2 binary history + any_v2 = any(m.history_format == HISTORY_FORMAT_V2 for m in manifests) + history_format = HISTORY_FORMAT_V2 if any_v2 else first_manifest.history_format + new_manifest = Manifest( format=first_manifest.format, version=first_manifest.version, @@ -103,17 +122,36 @@ def _merge_same_schema(self, sources, manifests, target): covered_bins=sum(1 for c in merged_counts if c > 0), schema_hash=first_manifest.schema_hash, generator=first_manifest.generator, + history_format=history_format, ) # Read schema members verbatim from first source with zipfile.ZipFile(sources[0], "r") as zf: + zf_names = zf.namelist() strings_bytes = zf.read(MEMBER_STRINGS) scope_tree_bytes = zf.read(MEMBER_SCOPE_TREE) sources_bytes = zf.read(MEMBER_SOURCES) + # Gather existing contrib/* members from all sources (copy verbatim) + contrib_members_all: Dict[str, bytes] = {} + + for src in sources: + with zipfile.ZipFile(src, "r") as zf: + for n_member in zf.namelist(): + if n_member.startswith("contrib/"): + contrib_members_all[n_member] = zf.read(n_member) counts_bytes = CountsWriter().serialize(merged_counts) history_bytes = HistoryWriter().serialize(all_history) + # Merge v2 binary history if present in any source + v2_members: Dict[str, bytes] = {} + if any_v2: + v2_members = self._merge_v2_history(sources, manifests) + + # Merge testplan and waivers + testplan_bytes = self._merge_testplans(sources) + waivers_bytes = self._merge_waivers(sources) + with zipfile.ZipFile(target, "w", compression=zipfile.ZIP_DEFLATED) as zf: zf.writestr(MEMBER_MANIFEST, new_manifest.serialize()) zf.writestr(MEMBER_STRINGS, strings_bytes) @@ -121,6 +159,15 @@ def _merge_same_schema(self, sources, manifests, target): zf.writestr(MEMBER_COUNTS, counts_bytes) zf.writestr(MEMBER_HISTORY, history_bytes) zf.writestr(MEMBER_SOURCES, sources_bytes) + for member_name, member_bytes in contrib_members_all.items(): + zf.writestr(member_name, member_bytes) + for member_name, member_bytes in v2_members.items(): + zf.writestr(member_name, member_bytes, + compress_type=zipfile.ZIP_STORED) + if testplan_bytes: + zf.writestr(MEMBER_TESTPLAN, testplan_bytes) + if waivers_bytes: + zf.writestr(MEMBER_WAIVERS, waivers_bytes) # ── Cross-schema fallback ───────────────────────────────────────────── @@ -165,8 +212,266 @@ def _merge_cross_schema(self, sources, target): for db in dbs: db.close() + # ── v2 binary history merge ─────────────────────────────────────────── + + def _merge_v2_history(self, sources: List[str], + manifests: List[Manifest]) -> Dict[str, bytes]: + """Merge v2 binary history from all sources; return member-name → bytes.""" + from .test_registry import TestRegistry + from .test_stats import TestStatsTable, TestStatsEntry + from .bucket_index import BucketIndex + from .contrib_index import ContribIndex, POLICY_PASS_ONLY + from .squash_log import SquashLog + from .history_buckets import BucketWriter, BucketReader + + # --- Step 1: load per-source v2 state --- + src_states = [] + for src, mf in zip(sources, manifests): + if mf.history_format == HISTORY_FORMAT_V2: + src_states.append(self._read_v2_state(src)) + else: + # Source has no v2 history — use empty state + src_states.append({ + 'registry': TestRegistry(), + 'stats': TestStatsTable(), + 'bucket_index': BucketIndex(), + 'buckets': {}, # seq → compressed_bytes + 'contrib_index': ContribIndex(merge_policy=POLICY_PASS_ONLY), + 'squash_log': SquashLog(), + }) + + # --- Step 2: build merged registry (union of all names/seeds) --- + # Two-pass approach: first insert ALL names/seeds so the sorted order + # is final, then recompute remaps against the stable merged registry. + merged_reg = TestRegistry() + + # Pass 1: insert all names and seeds to finalise the merged registry + for state in src_states: + reg = state['registry'] + for name in reg._names: + merged_reg.lookup_name_id(name) + for seed in reg._seeds: + merged_reg.lookup_seed_id(seed) + + # Pass 2: build per-source remaps against the now-stable merged registry + name_remaps: List[Dict[int, int]] = [] + seed_remaps: List[Dict[int, int]] = [] + for state in src_states: + reg = state['registry'] + n_remap: Dict[int, int] = { + old_id: merged_reg._name_to_id[name] + for old_id, name in enumerate(reg._names) + } + s_remap: Dict[int, int] = { + old_id: merged_reg._seed_to_id[seed] + for old_id, seed in enumerate(reg._seeds) + } + name_remaps.append(n_remap) + seed_remaps.append(s_remap) + + # --- Step 3: compute run_id offsets (disjoint run_id ranges) --- + run_id_offsets: List[int] = [] + offset = 0 + for state in src_states: + run_id_offsets.append(offset) + offset += state['registry'].next_run_id + # Advance the merged registry's counter + for _ in range(offset): + merged_reg.assign_run_id() + + # --- Step 4: merge TestStatsTable --- + from .test_stats import TestStatsEntry as _TSEntry + merged_stats = TestStatsTable() + # Ensure enough slots + for _ in range(merged_reg.num_names): + merged_stats._entries.append( + _TSEntry(name_id=len(merged_stats._entries))) + + for src_idx, state in enumerate(src_states): + n_remap = name_remaps[src_idx] + src_stats = state['stats'] + for old_id, src_entry in enumerate(src_stats._entries): + if src_entry.total_runs == 0: + continue + new_id = n_remap.get(old_id, old_id) + _merge_stats_entry(merged_stats._entries[new_id], src_entry, new_id) + + # --- Step 5: merge bucket files --- + merged_buckets: Dict[int, bytes] = {} + merged_bidx = BucketIndex() + new_seq = 0 + + for src_idx, state in enumerate(src_states): + n_remap = name_remaps[src_idx] + s_remap = seed_remaps[src_idx] + rid_offset = run_id_offsets[src_idx] + src_bidx = state['bucket_index'] + + for bidx_entry in src_bidx._entries: + old_seq = bidx_entry.bucket_seq + compressed = state['buckets'].get(old_seq) + if compressed is None: + continue + # Remap name_ids in bucket if registry changed + if n_remap or s_remap: + compressed = _remap_bucket(compressed, n_remap, s_remap) + merged_buckets[new_seq] = compressed + # Remap name_ids in the index entry + min_nid = n_remap.get(bidx_entry.min_name_id, bidx_entry.min_name_id) + max_nid = n_remap.get(bidx_entry.max_name_id, bidx_entry.max_name_id) + merged_bidx.add_bucket( + new_seq, bidx_entry.ts_start, bidx_entry.ts_end, + bidx_entry.num_records, bidx_entry.fail_count, + min(min_nid, max_nid), max(min_nid, max_nid), + ) + new_seq += 1 + + # --- Step 6: merge ContribIndex --- + merged_cidx = ContribIndex(merge_policy=POLICY_PASS_ONLY) + for src_idx, state in enumerate(src_states): + n_remap = name_remaps[src_idx] + rid_offset = run_id_offsets[src_idx] + ci = state['contrib_index'] + for entry in ci._entries: + merged_cidx.add_entry( + run_id=entry.run_id + rid_offset, + name_id=n_remap.get(entry.name_id, entry.name_id), + status=entry.status, + flags=entry.flags, + ) + # Advance watermark + if ci.squash_watermark > 0: + merged_cidx.set_squash_watermark( + max(merged_cidx.squash_watermark, + ci.squash_watermark + rid_offset)) + + # --- Step 7: merge SquashLog (append-only, no run_id adjustment) --- + merged_slog = SquashLog() + for state in src_states: + for entry in state['squash_log'].entries(): + merged_slog.append( + ts=entry.ts, policy=entry.policy, + from_run=entry.from_run, to_run=entry.to_run, + num_runs=entry.num_runs, pass_runs=entry.pass_runs, + ) + + # --- Assemble output members --- + result: Dict[str, bytes] = {} + result[MEMBER_TEST_REGISTRY] = merged_reg.serialize() + result[MEMBER_TEST_STATS] = merged_stats.serialize() + result[MEMBER_BUCKET_INDEX] = merged_bidx.serialize() + result[MEMBER_CONTRIB_INDEX] = merged_cidx.serialize() + result[MEMBER_SQUASH_LOG] = merged_slog.serialize() + for seq, data in merged_buckets.items(): + result[f"{HISTORY_BUCKET_DIR}{seq:06d}.bin"] = data + + return result + + def _read_v2_state(self, path: str) -> dict: + """Read all v2 binary history members from a .cdb ZIP.""" + from .test_registry import TestRegistry + from .test_stats import TestStatsTable + from .bucket_index import BucketIndex + from .contrib_index import ContribIndex, POLICY_PASS_ONLY + from .squash_log import SquashLog + + with zipfile.ZipFile(path, "r") as zf: + names = zf.namelist() + + def _read(member): + return zf.read(member) if member in names else b'' + + reg_data = _read(MEMBER_TEST_REGISTRY) + stats_data = _read(MEMBER_TEST_STATS) + bidx_data = _read(MEMBER_BUCKET_INDEX) + cidx_data = _read(MEMBER_CONTRIB_INDEX) + slog_data = _read(MEMBER_SQUASH_LOG) + + # Read all bucket files: history/NNNNNN.bin (not the index) + buckets: Dict[int, bytes] = {} + for n in names: + if (n.startswith(HISTORY_BUCKET_DIR) and n.endswith(".bin") + and n != MEMBER_BUCKET_INDEX): + basename = n[len(HISTORY_BUCKET_DIR):] + try: + seq = int(basename.split(".")[0]) + buckets[seq] = zf.read(n) + except ValueError: + pass + + return { + 'registry': TestRegistry.deserialize(reg_data) if reg_data else TestRegistry(), + 'stats': TestStatsTable.deserialize(stats_data) if stats_data else TestStatsTable(), + 'bucket_index': BucketIndex.deserialize(bidx_data) if bidx_data else BucketIndex(), + 'buckets': buckets, + 'contrib_index': (ContribIndex.deserialize(cidx_data) if cidx_data + else ContribIndex(merge_policy=POLICY_PASS_ONLY)), + 'squash_log': SquashLog.deserialize(slog_data) if slog_data else SquashLog(), + } + # ── Helpers ─────────────────────────────────────────────────────────── + def _merge_testplans(self, sources: list): + """Return merged testplan bytes, or None if sources disagree. + + Strategy: + 1. If no source has a testplan → return None. + 2. If all sources with a testplan share the same ``source_file`` → + return the bytes from whichever has the most recent ``import_timestamp``. + 3. If sources have different ``source_file`` values → emit a warning + and return None (incompatible plans). + """ + import warnings + candidates = {} # source_file → (import_timestamp, raw_bytes) + for src in sources: + with zipfile.ZipFile(src, "r") as zf: + if MEMBER_TESTPLAN not in zf.namelist(): + continue + raw = zf.read(MEMBER_TESTPLAN) + import json as _json + d = _json.loads(raw) + sf = d.get("source_file", "") + ts = d.get("import_timestamp", "") + if sf not in candidates or ts > candidates[sf][0]: + candidates[sf] = (ts, raw) + if not candidates: + return None + if len(candidates) == 1: + return next(iter(candidates.values()))[1] + warnings.warn( + f"Merging databases with different testplans " + f"({list(candidates.keys())}); testplan omitted from output.", + stacklevel=4, + ) + return None + + def _merge_waivers(self, sources: list): + """Return merged waivers bytes (union of all unique waiver ids). + + Waivers from all sources are combined; if two sources have a waiver + with the same id, the one with the most recent ``approved_at`` wins. + Returns None if no source has waivers. + """ + import json as _json + merged: dict = {} # id → waiver dict + any_found = False + for src in sources: + with zipfile.ZipFile(src, "r") as zf: + if MEMBER_WAIVERS not in zf.namelist(): + continue + any_found = True + raw = zf.read(MEMBER_WAIVERS) + d = _json.loads(raw) + for w in d.get("waivers", []): + wid = w.get("id", "") + existing = merged.get(wid) + if existing is None or w.get("approved_at", "") > existing.get("approved_at", ""): + merged[wid] = w + if not any_found: + return None + out = {"format_version": 1, "waivers": list(merged.values())} + return _json.dumps(out, separators=(',', ':')).encode() + def _read_manifest(self, path: str) -> Manifest: with zipfile.ZipFile(path, "r") as zf: return Manifest.from_bytes(zf.read(MEMBER_MANIFEST)) @@ -190,3 +495,93 @@ def _make_merge_node(self, target: str, sources: List[str]) -> MemHistoryNode: node.setToolCategory("ncdb-merger") node.setComment(f"Merged from: {', '.join(sources)}") return node + + +# ── Module-level helpers ────────────────────────────────────────────────── + + +def _merge_stats_entry(dst, src, new_name_id: int) -> None: + """Accumulate *src* TestStatsEntry into *dst* in place.""" + dst.name_id = new_name_id + + if src.first_ts > 0 and (dst.first_ts == 0 or src.first_ts < dst.first_ts): + dst.first_ts = src.first_ts + dst.last_ts = max(dst.last_ts, src.last_ts) + dst.last_green_ts = max(dst.last_green_ts, src.last_green_ts) + + prev_total = dst.total_runs + dst.total_runs += src.total_runs + dst.pass_count += src.pass_count + dst.fail_count += src.fail_count + dst.error_count += src.error_count + dst.transition_count += src.transition_count + + # Welford merge: combine two running means and M2 accumulators (Chan's formula) + if src.total_runs > 0 and dst.total_runs > 0: + n_a, n_b = prev_total, src.total_runs + n_ab = n_a + n_b + if n_ab > 0: + delta = src.mean_cpu_time - dst.mean_cpu_time + dst.mean_cpu_time = (n_a * dst.mean_cpu_time + n_b * src.mean_cpu_time) / n_ab + dst.m2_cpu_time = (dst.m2_cpu_time + src.m2_cpu_time + + delta * delta * n_a * n_b / n_ab) + + # Recompute derived scores from accumulated counters + if dst.total_runs > 0: + dst.flake_score = dst.transition_count / max(dst.total_runs - 1, 1) + dst.fail_rate = dst.fail_count / dst.total_runs + else: + dst.flake_score = 0.0 + dst.fail_rate = 0.0 + + pass_rate = dst.pass_count / dst.total_runs if dst.total_runs else 1.0 + stability = 1.0 - dst.flake_score + speed = max(0.0, 1.0 - dst.mean_cpu_time / 3600.0) if dst.mean_cpu_time > 0 else 1.0 + dst.grade_score = pass_rate * stability * speed + + # Take worst-case streak (most negative or most positive) + if abs(src.streak) > abs(dst.streak): + dst.streak = src.streak + + # Take max CUSUM + dst.cusum_value = max(dst.cusum_value, src.cusum_value) + dst.total_seeds_seen = max(dst.total_seeds_seen, src.total_seeds_seen) + + +def _remap_bucket(compressed: bytes, n_remap: Dict[int, int], + s_remap: Dict[int, int]) -> bytes: + """Decode a compressed bucket, remap name_ids and seed_ids, re-encode. + + If neither remap changes any ID, the original compressed bytes are + returned unchanged to avoid redundant work. + """ + from .history_buckets import BucketReader, BucketWriter + + reader = BucketReader(compressed) + all_recs = list(reader.all_records()) + + # Check if any remapping is actually needed + needs_remap = any( + n_remap.get(r.name_id, r.name_id) != r.name_id + or s_remap.get(r.seed_id, r.seed_id) != r.seed_id + for r in all_recs + ) + if not needs_remap: + return compressed + + writer = BucketWriter() + # Sort by (new_name_id, ts) for correct columnar layout + remapped = sorted( + all_recs, + key=lambda r: (n_remap.get(r.name_id, r.name_id), r.ts), + ) + for rec in remapped: + writer.add( + name_id=n_remap.get(rec.name_id, rec.name_id), + seed_id=s_remap.get(rec.seed_id, rec.seed_id), + ts=rec.ts, + status=rec.status, + flags=rec.flags, + ) + return writer.seal(use_lzma=True) + diff --git a/src/ucis/ncdb/ncdb_reader.py b/src/ucis/ncdb/ncdb_reader.py index c335350..41a1097 100644 --- a/src/ucis/ncdb/ncdb_reader.py +++ b/src/ucis/ncdb/ncdb_reader.py @@ -28,6 +28,10 @@ MEMBER_CROSS, MEMBER_DESIGN_UNITS, MEMBER_CONTRIB_DIR, MEMBER_FORMAL, NCDB_FORMAT, MEMBER_COVERITEM_FLAGS, + MEMBER_TEST_REGISTRY, MEMBER_TEST_STATS, + MEMBER_BUCKET_INDEX, MEMBER_CONTRIB_INDEX, MEMBER_SQUASH_LOG, + HISTORY_BUCKET_DIR, HISTORY_FORMAT_V2, + MEMBER_TESTPLAN, MEMBER_WAIVERS, ) from ucis.mem.mem_ucis import MemUCIS @@ -74,25 +78,28 @@ class NcdbReader: def read(self, path: str) -> MemUCIS: with zipfile.ZipFile(path, "r") as zf: names = zf.namelist() - manifest_bytes = zf.read(MEMBER_MANIFEST) - strings_bytes = zf.read(MEMBER_STRINGS) - scope_tree_bytes = zf.read(MEMBER_SCOPE_TREE) - counts_bytes = zf.read(MEMBER_COUNTS) - history_bytes = zf.read(MEMBER_HISTORY) - sources_bytes = zf.read(MEMBER_SOURCES) - attrs_bytes = zf.read(MEMBER_ATTRS) if MEMBER_ATTRS in names else b'' - tags_bytes = zf.read(MEMBER_TAGS) if MEMBER_TAGS in names else b'' - props_bytes = zf.read(MEMBER_PROPERTIES) if MEMBER_PROPERTIES in names else b'' - toggle_bytes = zf.read(MEMBER_TOGGLE) if MEMBER_TOGGLE in names else b'' - fsm_bytes = zf.read(MEMBER_FSM) if MEMBER_FSM in names else b'' - cross_bytes = zf.read(MEMBER_CROSS) if MEMBER_CROSS in names else b'' - du_bytes = zf.read(MEMBER_DESIGN_UNITS) if MEMBER_DESIGN_UNITS in names else b'' - formal_bytes = zf.read(MEMBER_FORMAL) if MEMBER_FORMAL in names else b'' - ci_flags_bytes = zf.read(MEMBER_COVERITEM_FLAGS) if MEMBER_COVERITEM_FLAGS in names else b'' - # Collect all contrib/* members - contrib_members = { - n: zf.read(n) for n in names if n.startswith(MEMBER_CONTRIB_DIR) - } + # Read all members into a dict for uniform access + zf_data = {n: zf.read(n) for n in names} + + manifest_bytes = zf_data[MEMBER_MANIFEST] + strings_bytes = zf_data[MEMBER_STRINGS] + scope_tree_bytes = zf_data[MEMBER_SCOPE_TREE] + counts_bytes = zf_data[MEMBER_COUNTS] + history_bytes = zf_data[MEMBER_HISTORY] + sources_bytes = zf_data[MEMBER_SOURCES] + attrs_bytes = zf_data.get(MEMBER_ATTRS, b'') + tags_bytes = zf_data.get(MEMBER_TAGS, b'') + props_bytes = zf_data.get(MEMBER_PROPERTIES, b'') + toggle_bytes = zf_data.get(MEMBER_TOGGLE, b'') + fsm_bytes = zf_data.get(MEMBER_FSM, b'') + cross_bytes = zf_data.get(MEMBER_CROSS, b'') + du_bytes = zf_data.get(MEMBER_DESIGN_UNITS, b'') + formal_bytes = zf_data.get(MEMBER_FORMAL, b'') + ci_flags_bytes = zf_data.get(MEMBER_COVERITEM_FLAGS, b'') + # Collect all contrib/* members + contrib_members = { + n: zf_data[n] for n in names if n.startswith(MEMBER_CONTRIB_DIR) + } manifest = Manifest.from_bytes(manifest_bytes) if manifest.format != NCDB_FORMAT: @@ -199,4 +206,71 @@ def read(self, path: str) -> MemUCIS: if attrs_bytes: AttrsReader().deserialize(attrs_bytes, db) + # v2 binary history members (optional — present only in v2 archives) + if manifest.history_format == HISTORY_FORMAT_V2: + _load_v2_history(db, {name: zf_data.get(name, b'') + for name in (MEMBER_TEST_REGISTRY, + MEMBER_TEST_STATS, + MEMBER_BUCKET_INDEX, + MEMBER_CONTRIB_INDEX, + MEMBER_SQUASH_LOG)}, + {n: d for n, d in zf_data.items() + if n.startswith(HISTORY_BUCKET_DIR) + and n.endswith(".bin") + and n != MEMBER_BUCKET_INDEX}) + + # Testplan (optional) + testplan_raw = zf_data.get(MEMBER_TESTPLAN, b'') + if testplan_raw: + from .testplan import Testplan + db._testplan = Testplan.from_bytes(testplan_raw) + db._loaded_testplan = True + + # Waivers (optional) + waivers_raw = zf_data.get(MEMBER_WAIVERS, b'') + if waivers_raw: + from .waivers import WaiverSet + db._waivers = WaiverSet.from_bytes(waivers_raw) + db._loaded_waivers = True + return db + + +def _load_v2_history(db: MemUCIS, v2_members: dict, bucket_data: dict) -> None: + """Attach v2 binary history state to *db* (a MemUCIS). + + Uses the same deserialization logic as NcdbUCIS._load_v2_history, but + attaches the resulting objects as attributes on a plain MemUCIS so that + callers using NcdbReader (not NcdbUCIS) can access v2 data via the same + attribute names. + """ + from .test_registry import TestRegistry + from .test_stats import TestStatsTable + from .bucket_index import BucketIndex + from .contrib_index import ContribIndex, POLICY_PASS_ONLY + from .squash_log import SquashLog + + reg_data = v2_members.get(MEMBER_TEST_REGISTRY, b'') + db._test_registry = TestRegistry.deserialize(reg_data) if reg_data else TestRegistry() + + stats_data = v2_members.get(MEMBER_TEST_STATS, b'') + db._test_stats = TestStatsTable.deserialize(stats_data) if stats_data else TestStatsTable() + + bidx_data = v2_members.get(MEMBER_BUCKET_INDEX, b'') + db._bucket_index = BucketIndex.deserialize(bidx_data) if bidx_data else BucketIndex() + + cidx_data = v2_members.get(MEMBER_CONTRIB_INDEX, b'') + db._contrib_index = (ContribIndex.deserialize(cidx_data) if cidx_data + else ContribIndex(merge_policy=POLICY_PASS_ONLY)) + + slog_data = v2_members.get(MEMBER_SQUASH_LOG, b'') + db._squash_log = SquashLog.deserialize(slog_data) if slog_data else SquashLog() + + db._sealed_buckets = {} + for member, data in bucket_data.items(): + basename = member[len(HISTORY_BUCKET_DIR):] + try: + seq = int(basename.split(".")[0]) + db._sealed_buckets[seq] = data + except ValueError: + pass diff --git a/src/ucis/ncdb/ncdb_ucis.py b/src/ucis/ncdb/ncdb_ucis.py index 18c8016..be1d1da 100644 --- a/src/ucis/ncdb/ncdb_ucis.py +++ b/src/ucis/ncdb/ncdb_ucis.py @@ -13,10 +13,20 @@ ... # only history.json is parsed here for scope in db.scopes(...): ... # scope_tree + counts parsed on first call + +Binary history v2 usage:: + + db = NcdbUCIS("coverage.cdb") + run_id = db.add_test_run("uart_smoke", seed="12345", + status=HIST_STATUS_OK, has_coverage=True) + entry = db.get_test_stats("uart_smoke") + print(entry.flake_score) """ +import time import zipfile import json +from typing import Dict, List, Optional from ucis.mem.mem_ucis import MemUCIS from ucis.history_node_kind import HistoryNodeKind @@ -27,6 +37,10 @@ MEMBER_ATTRS, MEMBER_TAGS, MEMBER_PROPERTIES, MEMBER_TOGGLE, MEMBER_FSM, MEMBER_CROSS, MEMBER_DESIGN_UNITS, MEMBER_CONTRIB_DIR, MEMBER_FORMAL, + MEMBER_TEST_REGISTRY, MEMBER_TEST_STATS, + MEMBER_BUCKET_INDEX, MEMBER_CONTRIB_INDEX, MEMBER_SQUASH_LOG, + HISTORY_BUCKET_DIR, HISTORY_FORMAT_V2, + HIST_FLAG_IS_RERUN, HIST_FLAG_HAS_COVERAGE, NCDB_FORMAT, ) from .manifest import Manifest @@ -43,6 +57,7 @@ class NcdbUCIS(MemUCIS): - **history**: loaded when ``historyNodes()`` is first called. - **scopes**: loaded when ``scopes()`` or any scope-creation method is called for the first time. + - **v2_history**: loaded on demand when any v2 API method is called. Once loaded, a unit is never re-read. """ @@ -53,9 +68,30 @@ def __init__(self, path: str): self._loaded_history = False self._loaded_scopes = False self._loaded_attrs = False + self._loaded_v2_history = False self._du_index: dict = {} # name → DU scope (populated after _ensure_scopes) self._zf_cache: dict = {} # member name → bytes (populated on first open) + # Binary history v2 state (None until _ensure_v2_history() is called) + self._test_registry = None + self._test_stats = None + self._bucket_index = None + self._contrib_index = None + self._squash_log = None + self._current_bucket_writer = None + self._sealed_buckets: Dict[int, bytes] = {} # seq → compressed bytes + self._history_v2_dirty: bool = False + + # Testplan lazy state + self._loaded_testplan: bool = False + self._testplan = None # Optional[Testplan] + self._testplan_dirty: bool = False + + # Waivers lazy state + self._loaded_waivers: bool = False + self._waivers = None # Optional[WaiverSet] + self._waivers_dirty: bool = False + # ── Public extra API ────────────────────────────────────────────────── @property @@ -73,6 +109,295 @@ def getDesignUnit(self, name: str): self._ensure_scopes() return self._du_index.get(name) + # ── Binary history v2 API ───────────────────────────────────────────── + + def add_test_run(self, name: str, seed="0", status: int = 0, + ts: Optional[int] = None, + cpu_time: Optional[float] = None, + has_coverage: bool = False, + is_rerun: bool = False) -> int: + """Record one test run in the binary history store. + + Automatically upgrades the manifest to ``history_format = "v2"`` on + first call (no explicit opt-in required). + + Args: + name: Test base-name (e.g. ``"uart_smoke"``). + seed: Test seed string or integer (converted to str). + status: One of the ``HIST_STATUS_*`` constants. + ts: Unix timestamp; defaults to ``int(time.time())``. + cpu_time: CPU/wall time in seconds (optional). + has_coverage: True if this run produced coverage data. + is_rerun: True if this is a retry of a previously-failed run. + + Returns: + The run_id assigned to this run. + """ + self._ensure_v2_history() + if ts is None: + ts = int(time.time()) + seed_str = str(seed) + + name_id = self._test_registry.lookup_name_id(name) + seed_id = self._test_registry.lookup_seed_id(seed_str) + run_id = self._test_registry.assign_run_id() + + self._test_stats.update(name_id, status, ts, + cpu_time=cpu_time, seed_id=seed_id) + + flags = 0 + if is_rerun: + flags |= HIST_FLAG_IS_RERUN + if has_coverage: + flags |= HIST_FLAG_HAS_COVERAGE + self._current_bucket_writer.add(name_id, seed_id, ts, status, flags) + + if self._current_bucket_writer.is_full(): + self._seal_current_bucket() + + if has_coverage: + from .contrib_index import FLAG_IS_RERUN as CI_IS_RERUN + ci_flags = CI_IS_RERUN if is_rerun else 0 + self._contrib_index.add_entry(run_id, name_id, status, ci_flags) + + self._history_v2_dirty = True + return run_id + + def query_test_history(self, name: str, + ts_from: Optional[int] = None, + ts_to: Optional[int] = None) -> list: + """Return all BucketRecord objects for *name* across all buckets. + + Args: + name: Test name to query. + ts_from: Optional lower bound timestamp (inclusive). + ts_to: Optional upper bound timestamp (inclusive). + + Returns: + List of :class:`~ucis.ncdb.history_buckets.BucketRecord`. + """ + self._ensure_v2_history() + if name not in self._test_registry._name_to_id: + return [] + name_id = self._test_registry._name_to_id[name] + + candidate_buckets = self._bucket_index.buckets_for_name( + name_id, ts_from=ts_from, ts_to=ts_to) + + results = [] + for entry in candidate_buckets: + seq = entry.bucket_seq + if seq in self._sealed_buckets: + data = self._sealed_buckets[seq] + else: + # Load from ZIP on demand + member = f"{HISTORY_BUCKET_DIR}{seq:06d}.bin" + self._read_zip() + if member not in self._zf_cache: + continue + data = self._zf_cache[member] + from .history_buckets import BucketReader + reader = BucketReader(data) + recs = reader.records_for_name(name_id) + if ts_from is not None: + recs = [r for r in recs if r.ts >= ts_from] + if ts_to is not None: + recs = [r for r in recs if r.ts <= ts_to] + results.extend(recs) + + # Also check the current (unsaved) bucket + if self._current_bucket_writer is not None and self._current_bucket_writer.num_records > 0: + try: + from .history_buckets import BucketReader + data = self._current_bucket_writer.seal_fast() + reader = BucketReader(data) + recs = reader.records_for_name(name_id) + if ts_from is not None: + recs = [r for r in recs if r.ts >= ts_from] + if ts_to is not None: + recs = [r for r in recs if r.ts <= ts_to] + results.extend(recs) + except Exception: + pass + + return results + + def get_test_stats(self, name: str): + """Return the TestStatsEntry for *name*, or None if not seen. + + Returns: + :class:`~ucis.ncdb.test_stats.TestStatsEntry` or None. + """ + self._ensure_v2_history() + if name not in self._test_registry._name_to_id: + return None + name_id = self._test_registry._name_to_id[name] + entry = self._test_stats.get(name_id) + if entry is not None and entry.total_runs == 0: + return None + return entry + + def top_flaky_tests(self, n: int = 20) -> list: + """Return top-*n* flakiest tests. + + Returns: + List of :class:`~ucis.ncdb.test_stats.TestStatsEntry`. + """ + self._ensure_v2_history() + return self._test_stats.top_flaky(n) + + def top_failing_tests(self, n: int = 20) -> list: + """Return top-*n* consistently-failing tests. + + Returns: + List of :class:`~ucis.ncdb.test_stats.TestStatsEntry`. + """ + self._ensure_v2_history() + return self._test_stats.top_failing(n) + + def squash_coverage(self, policy: int = 1) -> None: + """Squash all active contrib entries into counts.bin contribution. + + Records the squash in the squash_log for provenance auditing. + + Args: + policy: Merge policy constant from :mod:`~ucis.ncdb.contrib_index`. + """ + self._ensure_v2_history() + import time as _time + from .contrib_index import ContribIndex + passing = self._contrib_index.passing_run_ids(policy) + watermark = self._contrib_index.max_run_id() + from_run = self._contrib_index.squash_watermark + num_runs = self._contrib_index.num_active + + self._squash_log.append( + ts=int(_time.time()), + policy=policy, + from_run=from_run, + to_run=watermark, + num_runs=num_runs, + pass_runs=len(passing), + ) + self._contrib_index.remove_entries_up_to(watermark) + self._contrib_index.set_squash_watermark(watermark) + self._history_v2_dirty = True + + def get_v2_members(self) -> Dict[str, bytes]: + """Return a dict of member-name → bytes for all v2 binary members. + + Called by NcdbWriter to include v2 data in the ZIP output. Returns + an empty dict if no v2 history has been recorded. + """ + if not self._history_v2_dirty and self._test_registry is None: + return {} + if self._test_registry is None: + return {} + + members: Dict[str, bytes] = {} + members[MEMBER_TEST_REGISTRY] = self._test_registry.serialize() + members[MEMBER_TEST_STATS] = self._test_stats.serialize() + members[MEMBER_CONTRIB_INDEX] = self._contrib_index.serialize() + members[MEMBER_SQUASH_LOG] = self._squash_log.serialize() + + # Sealed buckets (copy verbatim — already compressed) + for seq, data in self._sealed_buckets.items(): + members[f"{HISTORY_BUCKET_DIR}{seq:06d}.bin"] = data + + # Current (open) bucket — fast DEFLATE; add synthetic index entry so + # the merger (and reader) can discover it via bucket_index. + from .bucket_index import BucketIndex + out_bidx = self._bucket_index # reference; we may replace below + if self._current_bucket_writer is not None and \ + self._current_bucket_writer.num_records > 0: + seq = self._bucket_index.next_seq() + members[f"{HISTORY_BUCKET_DIR}{seq:06d}.bin"] = \ + self._current_bucket_writer.seal_fast() + # Build a copy of bucket_index with the extra synthetic entry + from .constants import HIST_STATUS_FAIL + recs = self._current_bucket_writer._records + ts_start = min(r.ts for r in recs) + ts_end = max(r.ts for r in recs) + fail_count = sum(1 for r in recs if r.status == HIST_STATUS_FAIL) + min_name_id = min(r.name_id for r in recs) + max_name_id = max(r.name_id for r in recs) + out_bidx = BucketIndex.deserialize(self._bucket_index.serialize()) + out_bidx.add_bucket(seq, ts_start, ts_end, + len(recs), fail_count, + min_name_id, max_name_id) + + members[MEMBER_BUCKET_INDEX] = out_bidx.serialize() + return members + + # ── Testplan API ────────────────────────────────────────────────────── + + def getTestplan(self): + """Return the embedded testplan, or ``None`` if none is stored. + + Returns: + :class:`~ucis.ncdb.testplan.Testplan` or ``None``. + """ + self._ensure_testplan() + return self._testplan + + def setTestplan(self, tp) -> None: + """Embed *tp* in this database. + + The testplan is written to ``testplan.json`` on the next + :meth:`~ucis.ncdb.ncdb_writer.NcdbWriter.write` call. + + Args: + tp: :class:`~ucis.ncdb.testplan.Testplan` instance. + """ + if not tp.import_timestamp: + tp.stamp_import_time() + self._testplan = tp + self._testplan_dirty = True + self._loaded_testplan = True + + def _ensure_testplan(self) -> None: + if self._loaded_testplan: + return + self._loaded_testplan = True + self._read_zip() + from .constants import MEMBER_TESTPLAN + raw = self._zf_cache.get(MEMBER_TESTPLAN) + if raw: + from .testplan import Testplan + self._testplan = Testplan.from_bytes(raw) + + # ── Waivers API ─────────────────────────────────────────────────────── + + def getWaivers(self): + """Return the embedded waiver set, or ``None`` if none is stored. + + Returns: + :class:`~ucis.ncdb.waivers.WaiverSet` or ``None``. + """ + self._ensure_waivers() + return self._waivers + + def setWaivers(self, ws) -> None: + """Embed *ws* in this database. + + Args: + ws: :class:`~ucis.ncdb.waivers.WaiverSet` instance. + """ + self._waivers = ws + self._waivers_dirty = True + self._loaded_waivers = True + + def _ensure_waivers(self) -> None: + if self._loaded_waivers: + return + self._loaded_waivers = True + self._read_zip() + from .constants import MEMBER_WAIVERS + raw = self._zf_cache.get(MEMBER_WAIVERS) + if raw: + from .waivers import WaiverSet + self._waivers = WaiverSet.from_bytes(raw) + # ── MemUCIS overrides — trigger lazy loads ───────────────────────── def historyNodes(self, kind: HistoryNodeKind): @@ -187,6 +512,94 @@ def _ensure_scopes(self) -> None: from .formal import FormalReader FormalReader().apply(self, formal_data) + def _ensure_v2_history(self) -> None: + """Load v2 binary history from ZIP, or initialize empty state.""" + if self._loaded_v2_history: + return + self._loaded_v2_history = True + self._read_zip() + self._load_v2_history(self._zf_cache) + + def _load_v2_history(self, zf_cache: dict) -> None: + """Deserialize v2 binary members from the ZIP cache dict.""" + from .test_registry import TestRegistry + from .test_stats import TestStatsTable + from .bucket_index import BucketIndex + from .contrib_index import ContribIndex, POLICY_PASS_ONLY + from .squash_log import SquashLog + from .history_buckets import BucketWriter, BucketReader + + if MEMBER_TEST_REGISTRY in zf_cache: + self._test_registry = TestRegistry.deserialize( + zf_cache[MEMBER_TEST_REGISTRY]) + else: + self._test_registry = TestRegistry() + + if MEMBER_TEST_STATS in zf_cache: + self._test_stats = TestStatsTable.deserialize( + zf_cache[MEMBER_TEST_STATS]) + else: + self._test_stats = TestStatsTable() + + if MEMBER_BUCKET_INDEX in zf_cache: + self._bucket_index = BucketIndex.deserialize( + zf_cache[MEMBER_BUCKET_INDEX]) + else: + self._bucket_index = BucketIndex() + + if MEMBER_CONTRIB_INDEX in zf_cache: + self._contrib_index = ContribIndex.deserialize( + zf_cache[MEMBER_CONTRIB_INDEX]) + else: + self._contrib_index = ContribIndex(merge_policy=POLICY_PASS_ONLY) + + if MEMBER_SQUASH_LOG in zf_cache: + self._squash_log = SquashLog.deserialize(zf_cache[MEMBER_SQUASH_LOG]) + else: + self._squash_log = SquashLog() + + # Load sealed buckets into memory (verbatim compressed bytes) + self._sealed_buckets = {} + for member, data in zf_cache.items(): + if member.startswith(HISTORY_BUCKET_DIR) and member.endswith(".bin") \ + and member != MEMBER_BUCKET_INDEX: + # Parse seq from filename: "history/000001.bin" → 1 + basename = member[len(HISTORY_BUCKET_DIR):] + try: + seq = int(basename.split(".")[0]) + self._sealed_buckets[seq] = data + except ValueError: + pass + + # Start a fresh current bucket (for new records written this session) + self._current_bucket_writer = BucketWriter() + + def _seal_current_bucket(self) -> None: + """Seal the current bucket and start a new one.""" + from .history_buckets import BucketWriter, BucketReader + w = self._current_bucket_writer + if w.num_records == 0: + return + seq = self._bucket_index.next_seq() + data = w.seal(use_lzma=True) + self._sealed_buckets[seq] = data + + # Build bucket index entry from reader + reader = BucketReader(data) + all_recs = list(reader.all_records()) + ts_start = min(r.ts for r in all_recs) + ts_end = max(r.ts for r in all_recs) + fail_count = sum(1 for r in all_recs if r.status != 0) + name_ids = [r.name_id for r in all_recs] + self._bucket_index.add_bucket( + seq, ts_start, ts_end, + num_records=w.num_records, + fail_count=fail_count, + min_name_id=min(name_ids), + max_name_id=max(name_ids), + ) + self._current_bucket_writer = BucketWriter() + def _load_history(db: MemUCIS, history_bytes: bytes) -> None: """Deserialize history.json and populate *db* with history nodes.""" diff --git a/src/ucis/ncdb/ncdb_writer.py b/src/ucis/ncdb/ncdb_writer.py index 6db9733..83beac7 100644 --- a/src/ucis/ncdb/ncdb_writer.py +++ b/src/ucis/ncdb/ncdb_writer.py @@ -26,7 +26,8 @@ MEMBER_COUNTS, MEMBER_HISTORY, MEMBER_SOURCES, MEMBER_ATTRS, MEMBER_TAGS, MEMBER_PROPERTIES, MEMBER_TOGGLE, MEMBER_FSM, MEMBER_CROSS, MEMBER_DESIGN_UNITS, MEMBER_FORMAL, - MEMBER_COVERITEM_FLAGS, + MEMBER_COVERITEM_FLAGS, MEMBER_TESTPLAN, MEMBER_WAIVERS, + HISTORY_FORMAT_V2, ) from ucis.history_node_kind import HistoryNodeKind @@ -88,6 +89,14 @@ def write(self, db, path: str) -> None: # 7. Manifest manifest = Manifest.build(db, scope_tree_bytes, counts, all_nodes) + + # Check for v2 binary history members (from NcdbUCIS.get_v2_members) + v2_members = {} + if hasattr(db, 'get_v2_members'): + v2_members = db.get_v2_members() + if v2_members: + manifest.history_format = HISTORY_FORMAT_V2 + manifest_bytes = manifest.serialize() # 8. Write ZIP @@ -119,3 +128,15 @@ def write(self, db, path: str) -> None: zf.writestr(MEMBER_FORMAL, formal_bytes) if ci_flags_bytes: zf.writestr(MEMBER_COVERITEM_FLAGS, ci_flags_bytes) + # v2 binary history members (stored uncompressed — pre-compressed) + for member_name, member_bytes in v2_members.items(): + zf.writestr(member_name, member_bytes, + compress_type=zipfile.ZIP_STORED) + # Testplan (optional) + testplan = getattr(db, '_testplan', None) + if testplan is not None: + zf.writestr(MEMBER_TESTPLAN, testplan.serialize()) + # Waivers (optional) + waivers = getattr(db, '_waivers', None) + if waivers is not None: + zf.writestr(MEMBER_WAIVERS, waivers.serialize()) diff --git a/src/ucis/ncdb/reports.py b/src/ucis/ncdb/reports.py new file mode 100644 index 0000000..fe4ce95 --- /dev/null +++ b/src/ucis/ncdb/reports.py @@ -0,0 +1,1192 @@ +"""Structured reports for testplan closure, stage gates, and test history. + +Every report function returns a typed dataclass with a ``to_json()`` method. +A companion ``format_*()`` function renders the dataclass to a human-readable +string suitable for terminal output. The CLI calls the formatter; automated +consumers use the dataclass or ``to_json()``. +""" + +from __future__ import annotations + +import json +import math +import time +from dataclasses import dataclass, field, asdict +from typing import Dict, List, Optional, Tuple + +from ucis.ncdb.testplan import Testplan, Testpoint, get_testplan +from ucis.ncdb.testplan_closure import ( + TPStatus, + TestpointResult, + compute_closure, + stage_gate_status, + _STAGE_ORDER, +) + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + +_STATUS_ICON = { + TPStatus.CLOSED: "✓", + TPStatus.PARTIAL: "~", + TPStatus.FAILING: "✗", + TPStatus.NOT_RUN: "?", + TPStatus.NA: "N/A", + TPStatus.UNIMPLEMENTED: "-", +} + +_STATUS_LABEL = { + TPStatus.CLOSED: "CLOSED", + TPStatus.PARTIAL: "PARTIAL", + TPStatus.FAILING: "FAILING", + TPStatus.NOT_RUN: "NOT_RUN", + TPStatus.NA: "N/A", + TPStatus.UNIMPLEMENTED: "UNIMP", +} + + +def _pct(num: int, den: int) -> float: + return round(100.0 * num / den, 1) if den else 0.0 + + +# --------------------------------------------------------------------------- +# Report A — testpoint closure table +# --------------------------------------------------------------------------- + +@dataclass +class ClosureSummary: + """Result of ``report_testpoint_closure()``. + + Args: + results: Per-testpoint closure results. + by_stage: Stage-level roll-up: stage → {closed, total, pct}. + total_closed: Number of testpoints with status CLOSED. + total_na: Number of testpoints with status N/A. + total: Total testpoint count. + """ + results: List[TestpointResult] + by_stage: Dict[str, Dict] + total_closed: int + total_na: int + total: int + + def to_json(self) -> str: + d = { + "total": self.total, + "total_closed": self.total_closed, + "total_na": self.total_na, + "by_stage": self.by_stage, + "testpoints": [ + { + "name": r.testpoint.name, + "stage": r.testpoint.stage, + "status": r.status.value, + "pass_count": r.pass_count, + "fail_count": r.fail_count, + "matched_tests": r.matched_tests, + } + for r in self.results + ], + } + return json.dumps(d, indent=2) + + +def report_testpoint_closure(results: List[TestpointResult]) -> ClosureSummary: + """Compute a closure summary from testpoint results. + + Args: + results: List of :class:`~ucis.ncdb.testplan_closure.TestpointResult` + objects (output of :func:`~ucis.ncdb.testplan_closure.compute_closure`). + + Returns: + :class:`ClosureSummary` with per-stage roll-up and totals. + """ + by_stage: Dict[str, Dict] = {} + total_closed = 0 + total_na = 0 + + for r in results: + stage = r.testpoint.stage or "unknown" + entry = by_stage.setdefault(stage, {"closed": 0, "total": 0, "pct": 0.0}) + if r.status not in (TPStatus.NA, TPStatus.UNIMPLEMENTED): + entry["total"] += 1 + if r.status == TPStatus.CLOSED: + entry["closed"] += 1 + if r.status == TPStatus.CLOSED: + total_closed += 1 + elif r.status == TPStatus.NA: + total_na += 1 + + for entry in by_stage.values(): + entry["pct"] = _pct(entry["closed"], entry["total"]) + + return ClosureSummary( + results=results, + by_stage=by_stage, + total_closed=total_closed, + total_na=total_na, + total=len(results), + ) + + +def format_testpoint_closure(summary: ClosureSummary, *, show_all: bool = False) -> str: + """Render a :class:`ClosureSummary` as a terminal table. + + Args: + summary: Output of :func:`report_testpoint_closure`. + show_all: If False (default), skip N/A and UNIMPLEMENTED rows. + + Returns: + Human-readable multiline string. + """ + lines: List[str] = [] + col_name = 34 + col_stage = 6 + col_status = 10 + col_pass = 6 + col_fail = 6 + + header = ( + f"{'Testpoint':<{col_name}} " + f"{'Stage':<{col_stage}} " + f"{'Status':<{col_status}} " + f"{'Pass':>{col_pass}} " + f"{'Fail':>{col_fail}}" + ) + sep = "-" * len(header) + lines.append(header) + lines.append(sep) + + for r in summary.results: + if not show_all and r.status in (TPStatus.NA, TPStatus.UNIMPLEMENTED): + continue + icon = _STATUS_ICON[r.status] + label = _STATUS_LABEL[r.status] + lines.append( + f"{r.testpoint.name:<{col_name}} " + f"{r.testpoint.stage or '?':<{col_stage}} " + f"{icon} {label:<{col_status - 2}} " + f"{r.pass_count:>{col_pass}} " + f"{r.fail_count:>{col_fail}}" + ) + + lines.append(sep) + # Stage roll-up + lines.append("\nStage roll-up:") + ordered_stages = sorted( + summary.by_stage.items(), + key=lambda kv: _STAGE_ORDER.get(kv[0], 999), + ) + for stage, entry in ordered_stages: + bar_len = 20 + filled = round(bar_len * entry["pct"] / 100) if entry["total"] else 0 + bar = "█" * filled + "░" * (bar_len - filled) + lines.append( + f" {stage:<6} [{bar}] " + f"{entry['closed']}/{entry['total']} " + f"({entry['pct']:.1f}%)" + ) + + lines.append( + f"\nTotal: {summary.total_closed}/{summary.total} closed" + f" ({summary.total_na} N/A)" + ) + return "\n".join(lines) + + +# --------------------------------------------------------------------------- +# Report B — stage gate +# --------------------------------------------------------------------------- + +@dataclass +class StageGateReport: + """Result of ``report_stage_gate()``. + + Args: + stage: Target stage (e.g. ``"V2"``). + passed: Whether the gate passes. + blocking: Testpoints that are not yet CLOSED (and not N/A). + message: Human-readable verdict line. + gate_detail: Raw detail dict from + :func:`~ucis.ncdb.testplan_closure.stage_gate_status`. + """ + stage: str + passed: bool + blocking: List[TestpointResult] + message: str + gate_detail: dict + + def to_json(self) -> str: + d = { + "stage": self.stage, + "passed": self.passed, + "message": self.message, + "blocking": [ + { + "name": r.testpoint.name, + "stage": r.testpoint.stage, + "status": r.status.value, + } + for r in self.blocking + ], + } + return json.dumps(d, indent=2) + + +def report_stage_gate( + results: List[TestpointResult], + stage: str, + testplan: Testplan, + require_flake_score_below: Optional[float] = None, + require_coverage_pct: Optional[float] = None, +) -> StageGateReport: + """Evaluate a stage gate (go/no-go for advancing to next stage). + + Args: + results: Output of :func:`~ucis.ncdb.testplan_closure.compute_closure`. + stage: Target stage to evaluate (``"V1"``, ``"V2"``, etc.). + testplan: The :class:`~ucis.ncdb.testplan.Testplan` being evaluated. + require_flake_score_below: Optional flake threshold (0–1). + require_coverage_pct: Optional minimum coverage percentage. + + Returns: + :class:`StageGateReport`. + """ + gate = stage_gate_status( + results, + stage, + testplan, + require_flake_score_below=require_flake_score_below, + require_coverage_pct=require_coverage_pct, + ) + blocking = gate.get("blocking", []) + passed = gate.get("passed", False) + message = gate.get("message", "") + return StageGateReport( + stage=stage, + passed=passed, + blocking=blocking, + message=message, + gate_detail=gate, + ) + + +def format_stage_gate(report: StageGateReport) -> str: + """Render a :class:`StageGateReport` as a terminal summary. + + Args: + report: Output of :func:`report_stage_gate`. + + Returns: + Human-readable multiline string. + """ + lines: List[str] = [] + verdict = "✓ PASS" if report.passed else "✗ FAIL" + lines.append(f"Stage gate [{report.stage}]: {verdict}") + lines.append(f" {report.message}") + if report.blocking: + lines.append(f"\n Blocking testpoints ({len(report.blocking)}):") + for r in report.blocking: + lines.append( + f" [{r.testpoint.stage}] {r.testpoint.name} " + f"— {_STATUS_LABEL[r.status]}" + ) + return "\n".join(lines) + + +# --------------------------------------------------------------------------- +# Report C — coverage per testpoint +# --------------------------------------------------------------------------- + +@dataclass +class CoveragePerTestpoint: + """Per-testpoint coverage table linking matched tests to covergroups. + + Args: + rows: List of (testpoint_name, covergroup_name, hit_pct) tuples. + unmatched_covergroups: Covergroups that could not be linked to any + testpoint via the testplan. + """ + rows: List[Tuple[str, str, float]] + unmatched_covergroups: List[str] + + def to_json(self) -> str: + d = { + "rows": [ + {"testpoint": tp, "covergroup": cg, "hit_pct": pct} + for tp, cg, pct in self.rows + ], + "unmatched_covergroups": self.unmatched_covergroups, + } + return json.dumps(d, indent=2) + + +def report_coverage_per_testpoint( + results: List[TestpointResult], + db, + testplan: Testplan, +) -> CoveragePerTestpoint: + """Build a testpoint × covergroup coverage table. + + For each testpoint that has covergroup entries in the testplan the + function walks the UCIS hierarchy to find a matching scope and + computes the hit percentage. + + Args: + results: Output of :func:`~ucis.ncdb.testplan_closure.compute_closure`. + db: An open UCIS database (any type — MemUCIS, NcdbUCIS, etc.). + testplan: The active :class:`~ucis.ncdb.testplan.Testplan`. + + Returns: + :class:`CoveragePerTestpoint`. + """ + from ucis.scope_type_t import ScopeTypeT + from ucis.cover_type_t import CoverTypeT + + # Build a quick map: covergroup name → hit% from the UCIS tree + cg_pct: Dict[str, float] = {} + try: + for scope in db.getScopes(ScopeTypeT.COVERGROUP): + name = scope.getScopeName() + total = 0 + hit = 0 + for cp in scope.getScopes(ScopeTypeT.COVERPOINT): + for b in cp.getCoverItems(CoverTypeT.CVGBIN): + total += 1 + if b.getData()[0] > 0: + hit += 1 + if total: + cg_pct[name] = _pct(hit, total) + else: + cg_pct[name] = 0.0 + except Exception: + pass # db may not support scope iteration + + rows: List[Tuple[str, str, float]] = [] + matched_cgs: set = set() + + for r in results: + cg_entries = r.testpoint.covergroups if hasattr(r.testpoint, "covergroups") else [] + # Fallback: check testplan.covergroups linked to this testpoint name + plan_cgs = [c for c in testplan.covergroups if True] # all for now + for cg in plan_cgs: + cg_name = cg.name if hasattr(cg, "name") else str(cg) + pct = cg_pct.get(cg_name, 0.0) + rows.append((r.testpoint.name, cg_name, pct)) + matched_cgs.add(cg_name) + + unmatched = [cg for cg in cg_pct if cg not in matched_cgs] + return CoveragePerTestpoint(rows=rows, unmatched_covergroups=unmatched) + + +def format_coverage_per_testpoint(report: CoveragePerTestpoint) -> str: + """Render a :class:`CoveragePerTestpoint` as a terminal table. + + Args: + report: Output of :func:`report_coverage_per_testpoint`. + + Returns: + Human-readable multiline string. + """ + if not report.rows: + return "(no testpoint-covergroup links found)" + + col_tp = max((len(r[0]) for r in report.rows), default=10) + 2 + col_cg = max((len(r[1]) for r in report.rows), default=10) + 2 + lines: List[str] = [] + header = f"{'Testpoint':<{col_tp}} {'Covergroup':<{col_cg}} {'Hit%':>6}" + lines.append(header) + lines.append("-" * len(header)) + prev_tp = None + for tp, cg, pct in report.rows: + tp_col = tp if tp != prev_tp else "" + prev_tp = tp + lines.append(f"{tp_col:<{col_tp}} {cg:<{col_cg}} {pct:>6.1f}%") + if report.unmatched_covergroups: + lines.append( + f"\nUnmatched covergroups: " + + ", ".join(report.unmatched_covergroups) + ) + return "\n".join(lines) + + +# --------------------------------------------------------------------------- +# Report D — regression delta +# --------------------------------------------------------------------------- + +@dataclass +class RegressionDelta: + """Delta between two closure result sets. + + Args: + newly_closed: Testpoints that moved to CLOSED. + newly_failing: Testpoints that were not FAILING but are now. + unchanged_open: Testpoints that remain open/partial. + summary: One-line summary string. + """ + newly_closed: List[TestpointResult] + newly_failing: List[TestpointResult] + unchanged_open: List[TestpointResult] + summary: str + + def to_json(self) -> str: + def _names(lst): + return [r.testpoint.name for r in lst] + + d = { + "newly_closed": _names(self.newly_closed), + "newly_failing": _names(self.newly_failing), + "unchanged_open": _names(self.unchanged_open), + "summary": self.summary, + } + return json.dumps(d, indent=2) + + +def report_regression_delta( + results_new: List[TestpointResult], + results_old: List[TestpointResult], +) -> RegressionDelta: + """Compute the testplan closure delta between two regression runs. + + Args: + results_new: Closure results for the current regression. + results_old: Closure results for the baseline regression. + + Returns: + :class:`RegressionDelta`. + """ + old_map = {r.testpoint.name: r.status for r in results_old} + newly_closed: List[TestpointResult] = [] + newly_failing: List[TestpointResult] = [] + unchanged_open: List[TestpointResult] = [] + + for r in results_new: + old_status = old_map.get(r.testpoint.name) + if r.status == TPStatus.CLOSED and old_status != TPStatus.CLOSED: + newly_closed.append(r) + elif r.status == TPStatus.FAILING and old_status not in ( + TPStatus.FAILING, + None, + ): + newly_failing.append(r) + elif r.status not in (TPStatus.CLOSED, TPStatus.NA): + unchanged_open.append(r) + + summary = ( + f"+{len(newly_closed)} closed, " + f"-{len(newly_failing)} newly failing, " + f"{len(unchanged_open)} still open" + ) + return RegressionDelta( + newly_closed=newly_closed, + newly_failing=newly_failing, + unchanged_open=unchanged_open, + summary=summary, + ) + + +def format_regression_delta(report: RegressionDelta) -> str: + """Render a :class:`RegressionDelta` as a terminal summary. + + Args: + report: Output of :func:`report_regression_delta`. + + Returns: + Human-readable multiline string. + """ + lines: List[str] = [f"Regression delta: {report.summary}"] + if report.newly_closed: + lines.append(f"\n Newly closed ({len(report.newly_closed)}):") + for r in report.newly_closed: + lines.append(f" ✓ [{r.testpoint.stage}] {r.testpoint.name}") + if report.newly_failing: + lines.append(f"\n Newly failing ({len(report.newly_failing)}):") + for r in report.newly_failing: + lines.append(f" ✗ [{r.testpoint.stage}] {r.testpoint.name}") + if report.unchanged_open: + lines.append(f"\n Still open ({len(report.unchanged_open)}):") + for r in report.unchanged_open: + lines.append( + f" ~ [{r.testpoint.stage}] {r.testpoint.name}" + f" — {_STATUS_LABEL[r.status]}" + ) + return "\n".join(lines) + + +# --------------------------------------------------------------------------- +# Report E — stage progression over time (P1) +# --------------------------------------------------------------------------- + +@dataclass +class StageProgression: + """Stage closure progression over time (requires v2 history). + + Args: + stage: Target stage. + series: List of (timestamp, closed_count, total) tuples, + oldest first. + current_pct: Most-recent closure percentage. + """ + stage: str + series: List[Tuple[int, int, int]] + current_pct: float + + def to_json(self) -> str: + d = { + "stage": self.stage, + "current_pct": self.current_pct, + "series": [ + {"ts": ts, "closed": c, "total": t} for ts, c, t in self.series + ], + } + return json.dumps(d, indent=2) + + +def report_stage_progression(db, testplan: Testplan, stage: str) -> StageProgression: + """Compute stage closure percentage over time from v2 history buckets. + + Uses the bucket index to sample closure state at bucket boundary + timestamps. Falls back to an empty series when no v2 history is + available. + + Args: + db: An open :class:`~ucis.ncdb.ncdb_ucis.NcdbUCIS` instance. + testplan: The active testplan. + stage: Stage to evaluate (e.g. ``"V2"``). + + Returns: + :class:`StageProgression`. + """ + series: List[Tuple[int, int, int]] = [] + total_tps = len(testplan.testpointsForStage(stage, include_lower=True)) + if total_tps == 0: + return StageProgression(stage=stage, series=[], current_pct=0.0) + + # Try to build a time-series from the bucket index + try: + db._ensure_v2_history() + bidx = db._bucket_index + if bidx is not None: + entries = list(bidx._entries) + # Accumulate by walking buckets in order + passed_names: set = set() + for entry in entries: + # Read bucket records for this time window + try: + reader = db._get_bucket_reader(entry) + for rec in reader.records(): + name = db._test_registry.lookup_name(rec.name_id) + if rec.status == 0: # HIST_STATUS_OK (pass) + passed_names.add(name) + except Exception: + pass + # Count how many testpoints now have ≥1 passing test + closed = sum( + 1 + for tp in testplan.testpointsForStage(stage, include_lower=True) + for t in tp.tests + if any(n.startswith(t.rstrip("*")) for n in passed_names) + ) + series.append((entry.ts_end, closed, total_tps)) + except Exception: + pass + + current_pct = _pct(series[-1][1], series[-1][2]) if series else 0.0 + return StageProgression(stage=stage, series=series, current_pct=current_pct) + + +def format_stage_progression(report: StageProgression) -> str: + """Render a :class:`StageProgression` as an ASCII spark-line. + + Args: + report: Output of :func:`report_stage_progression`. + + Returns: + Human-readable multiline string. + """ + if not report.series: + return f"Stage [{report.stage}]: no history data available" + + lines = [f"Stage [{report.stage}] closure over time:"] + bars = "▁▂▃▄▅▆▇█" + spark = "" + for _, closed, total in report.series: + pct = closed / total if total else 0 + idx = min(int(pct * len(bars)), len(bars) - 1) + spark += bars[idx] + lines.append(f" {spark}") + first_ts = report.series[0][0] + last_ts = report.series[-1][0] + lines.append( + f" {time.strftime('%Y-%m-%d', time.gmtime(first_ts))} → " + f"{time.strftime('%Y-%m-%d', time.gmtime(last_ts))}" + ) + lines.append(f" Current: {report.current_pct:.1f}%") + return "\n".join(lines) + + +# --------------------------------------------------------------------------- +# Report F — testpoint reliability (P1) +# --------------------------------------------------------------------------- + +@dataclass +class TestpointReliability: + """Per-testpoint flake scores. + + Args: + rows: List of (testpoint_name, flake_score, pass_count, fail_count). + Sorted by flake_score descending. + flaky_threshold: Score above which a testpoint is considered flaky. + """ + rows: List[Tuple[str, float, int, int]] + flaky_threshold: float = 0.2 + + def to_json(self) -> str: + d = { + "flaky_threshold": self.flaky_threshold, + "rows": [ + { + "testpoint": tp, + "flake_score": score, + "pass": pc, + "fail": fc, + } + for tp, score, pc, fc in self.rows + ], + } + return json.dumps(d, indent=2) + + +def report_testpoint_reliability( + results: List[TestpointResult], + db, + flaky_threshold: float = 0.2, +) -> TestpointReliability: + """Compute per-testpoint flake scores from v2 test_stats. + + Args: + results: Output of :func:`~ucis.ncdb.testplan_closure.compute_closure`. + db: An open :class:`~ucis.ncdb.ncdb_ucis.NcdbUCIS` instance. + flaky_threshold: Flake score above which a testpoint is flagged. + + Returns: + :class:`TestpointReliability`. + """ + rows: List[Tuple[str, float, int, int]] = [] + for r in results: + if not r.matched_tests: + rows.append((r.testpoint.name, 0.0, r.pass_count, r.fail_count)) + continue + total_flake = 0.0 + count = 0 + pc_total = 0 + fc_total = 0 + for test_name in r.matched_tests: + try: + stats = db.get_test_stats(test_name) + if stats: + total_flake += stats.flake_score + pc_total += stats.pass_count + fc_total += stats.fail_count + count += 1 + except Exception: + pass + avg_flake = total_flake / count if count else 0.0 + rows.append((r.testpoint.name, avg_flake, pc_total, fc_total)) + + rows.sort(key=lambda x: x[1], reverse=True) + return TestpointReliability(rows=rows, flaky_threshold=flaky_threshold) + + +def format_testpoint_reliability(report: TestpointReliability) -> str: + """Render a :class:`TestpointReliability` as a terminal table. + + Args: + report: Output of :func:`report_testpoint_reliability`. + + Returns: + Human-readable multiline string. + """ + col_name = max((len(r[0]) for r in report.rows), default=10) + 2 + lines: List[str] = [] + header = f"{'Testpoint':<{col_name}} {'Flake':>7} {'Pass':>7} {'Fail':>7}" + lines.append(header) + lines.append("-" * len(header)) + for tp, score, pc, fc in report.rows: + flag = " ⚠" if score >= report.flaky_threshold else "" + lines.append(f"{tp:<{col_name}} {score:>7.3f} {pc:>7} {fc:>7}{flag}") + return "\n".join(lines) + + +# --------------------------------------------------------------------------- +# Report G — unexercised covergroups (P1) +# --------------------------------------------------------------------------- + +@dataclass +class UnexercisedCovergroups: + """Covergroups with zero hits. + + Args: + zero_hit: List of covergroup names with 0% coverage. + low_hit: List of (name, pct) tuples with 0 < pct < threshold. + threshold: Low-hit threshold used. + """ + zero_hit: List[str] + low_hit: List[Tuple[str, float]] + threshold: float = 50.0 + + def to_json(self) -> str: + d = { + "threshold": self.threshold, + "zero_hit": self.zero_hit, + "low_hit": [{"name": n, "pct": p} for n, p in self.low_hit], + } + return json.dumps(d, indent=2) + + +def report_unexercised_covergroups( + db, + testplan: Testplan, + low_threshold: float = 50.0, +) -> UnexercisedCovergroups: + """Identify covergroups with zero or low coverage. + + Args: + db: An open UCIS database. + testplan: The active testplan (used to filter to plan-tracked groups). + low_threshold: Percentage below which a covergroup is flagged as + low-hit (default 50%). + + Returns: + :class:`UnexercisedCovergroups`. + """ + from ucis.scope_type_t import ScopeTypeT + from ucis.cover_type_t import CoverTypeT + + plan_cg_names = {c.name for c in testplan.covergroups} + zero_hit: List[str] = [] + low_hit: List[Tuple[str, float]] = [] + + try: + for scope in db.getScopes(ScopeTypeT.COVERGROUP): + cg_name = scope.getScopeName() + if plan_cg_names and cg_name not in plan_cg_names: + continue + total = hit = 0 + for cp in scope.getScopes(ScopeTypeT.COVERPOINT): + for b in cp.getCoverItems(CoverTypeT.CVGBIN): + total += 1 + if b.getData()[0] > 0: + hit += 1 + if total == 0: + zero_hit.append(cg_name) + else: + pct = _pct(hit, total) + if pct == 0.0: + zero_hit.append(cg_name) + elif pct < low_threshold: + low_hit.append((cg_name, pct)) + except Exception: + pass + + low_hit.sort(key=lambda x: x[1]) + return UnexercisedCovergroups( + zero_hit=zero_hit, + low_hit=low_hit, + threshold=low_threshold, + ) + + +def format_unexercised_covergroups(report: UnexercisedCovergroups) -> str: + """Render an :class:`UnexercisedCovergroups` report as terminal text. + + Args: + report: Output of :func:`report_unexercised_covergroups`. + + Returns: + Human-readable multiline string. + """ + lines: List[str] = [] + if report.zero_hit: + lines.append(f"Zero-hit covergroups ({len(report.zero_hit)}):") + for name in report.zero_hit: + lines.append(f" ✗ {name}") + if report.low_hit: + lines.append( + f"\nLow-hit covergroups (< {report.threshold:.0f}%) " + f"({len(report.low_hit)}):" + ) + for name, pct in report.low_hit: + lines.append(f" ~ {name} ({pct:.1f}%)") + if not lines: + lines.append("All tracked covergroups are fully hit.") + return "\n".join(lines) + + +# --------------------------------------------------------------------------- +# Report I — coverage contribution (P1) +# --------------------------------------------------------------------------- + +@dataclass +class CoverageContribution: + """Per-test unique bin contribution. + + Args: + rows: List of (test_name, unique_bins, total_hits) sorted by + unique_bins descending. + total_bins: Total covered bins in the database. + """ + rows: List[Tuple[str, int, int]] + total_bins: int + + def to_json(self) -> str: + d = { + "total_bins": self.total_bins, + "rows": [ + {"test": t, "unique_bins": u, "total_hits": h} + for t, u, h in self.rows + ], + } + return json.dumps(d, indent=2) + + +def report_coverage_contribution(db) -> CoverageContribution: + """Report per-test unique coverage bin contribution from v2 contrib data. + + Args: + db: An open :class:`~ucis.ncdb.ncdb_ucis.NcdbUCIS` instance. + + Returns: + :class:`CoverageContribution`. + """ + rows: List[Tuple[str, int, int]] = [] + total_bins = 0 + + try: + contrib_data = db.get_test_coverage_api() + if contrib_data: + for item in contrib_data: + test_name = item.get("test", "") + unique = item.get("unique_bins", 0) + hits = item.get("total_hits", 0) + rows.append((test_name, unique, hits)) + total_bins = max(total_bins, item.get("total_bins", 0)) + except Exception: + pass + + rows.sort(key=lambda x: x[1], reverse=True) + return CoverageContribution(rows=rows, total_bins=total_bins) + + +def format_coverage_contribution(report: CoverageContribution) -> str: + """Render a :class:`CoverageContribution` as a terminal table. + + Args: + report: Output of :func:`report_coverage_contribution`. + + Returns: + Human-readable multiline string. + """ + if not report.rows: + return "(no contribution data available — v2 history required)" + + col_name = max((len(r[0]) for r in report.rows), default=10) + 2 + lines: List[str] = [] + header = f"{'Test':<{col_name}} {'Unique':>8} {'Total hits':>12}" + lines.append(header) + lines.append("-" * len(header)) + for name, unique, hits in report.rows: + lines.append(f"{name:<{col_name}} {unique:>8} {hits:>12}") + lines.append(f"\nTotal bins in database: {report.total_bins}") + return "\n".join(lines) + + +# --------------------------------------------------------------------------- +# Report H — test budget by stage (P2) +# --------------------------------------------------------------------------- + +@dataclass +class TestBudget: + """CPU time budget by stage. + + Args: + rows: List of (stage, testpoint_name, mean_cpu_sec, total_runs) + sorted by stage rank then mean_cpu_sec descending. + stage_totals: Mapping of stage → total estimated CPU seconds. + missing_stats: Testpoint names for which no CPU stats are available. + """ + rows: List[Tuple[str, str, float, int]] + stage_totals: Dict[str, float] + missing_stats: List[str] + + def to_json(self) -> str: + d = { + "stage_totals": self.stage_totals, + "missing_stats": self.missing_stats, + "rows": [ + {"stage": s, "testpoint": tp, "mean_cpu_sec": cpu, "total_runs": n} + for s, tp, cpu, n in self.rows + ], + } + return json.dumps(d, indent=2) + + +def report_test_budget(testplan: Testplan, db) -> TestBudget: + """Estimate CPU-hour budget per stage from v2 test_stats mean CPU times. + + For each testpoint the mean CPU time of all its mapped tests is summed. + Testpoints with no CPU stats are listed in ``missing_stats``. + + Args: + testplan: The active :class:`~ucis.ncdb.testplan.Testplan`. + db: An open :class:`~ucis.ncdb.ncdb_ucis.NcdbUCIS` instance. + + Returns: + :class:`TestBudget`. + """ + rows: List[Tuple[str, str, float, int]] = [] + stage_totals: Dict[str, float] = {} + missing_stats: List[str] = [] + + for tp in testplan.testpoints: + if tp.na or not tp.tests: + continue + total_cpu = 0.0 + total_runs = 0 + found = False + for test_name in tp.tests: + try: + stats = db.get_test_stats(test_name) + if stats and stats.total_runs > 0: + total_cpu += stats.mean_cpu_time * stats.total_runs + total_runs += stats.total_runs + found = True + except Exception: + pass + mean_cpu = total_cpu / total_runs if total_runs else 0.0 + if not found: + missing_stats.append(tp.name) + stage = tp.stage or "unknown" + rows.append((stage, tp.name, mean_cpu, total_runs)) + stage_totals[stage] = stage_totals.get(stage, 0.0) + mean_cpu + + rows.sort(key=lambda r: (_STAGE_ORDER.get(r[0], 999), -r[2])) + return TestBudget(rows=rows, stage_totals=stage_totals, missing_stats=missing_stats) + + +def format_test_budget(report: TestBudget) -> str: + """Render a :class:`TestBudget` as a terminal table. + + Args: + report: Output of :func:`report_test_budget`. + + Returns: + Human-readable multiline string. + """ + col_tp = max((len(r[1]) for r in report.rows), default=10) + 2 + lines: List[str] = [] + header = f"{'Stage':<6} {'Testpoint':<{col_tp}} {'Mean CPU':>10} {'Runs':>7}" + lines.append(header) + lines.append("-" * len(header)) + prev_stage = None + for stage, tp, cpu, runs in report.rows: + if stage != prev_stage: + if prev_stage is not None: + total = report.stage_totals.get(prev_stage, 0.0) + lines.append(f"{'':6} {'Stage total':>{col_tp}} {total:>9.1f}s") + lines.append("") + prev_stage = stage + lines.append(f"{stage:<6} {tp:<{col_tp}} {cpu:>9.1f}s {runs:>7}") + if prev_stage: + total = report.stage_totals.get(prev_stage, 0.0) + lines.append(f"{'':6} {'Stage total':>{col_tp}} {total:>9.1f}s") + if report.missing_stats: + lines.append(f"\nNo CPU stats for: {', '.join(report.missing_stats)}") + return "\n".join(lines) + + +# --------------------------------------------------------------------------- +# Report L — safety traceability matrix (P2) +# --------------------------------------------------------------------------- + +@dataclass +class SafetyMatrix: + """Requirement x testpoint traceability matrix with waiver flags. + + Args: + rows: List of (req_id, req_desc, testpoint_name, status, waived). + untested_requirements: Requirement IDs with no linked testpoints. + """ + rows: List[Tuple[str, str, str, str, bool]] + untested_requirements: List[str] + + def to_csv(self) -> str: + """Render as CSV suitable for safety audits.""" + lines = ["req_id,req_desc,testpoint,status,waived"] + for req_id, req_desc, tp_name, status, waived in self.rows: + lines.append(f"{req_id},{req_desc!r},{tp_name},{status},{waived}") + return "\n".join(lines) + + def to_json(self) -> str: + d = { + "untested_requirements": self.untested_requirements, + "rows": [ + {"req_id": rid, "req_desc": rdesc, "testpoint": tp, + "status": st, "waived": w} + for rid, rdesc, tp, st, w in self.rows + ], + } + return json.dumps(d, indent=2) + + +def report_safety_matrix( + results: List[TestpointResult], + waivers=None, +) -> SafetyMatrix: + """Build a requirement to testpoint traceability matrix. + + Args: + results: Output of :func:`~ucis.ncdb.testplan_closure.compute_closure`. + waivers: Optional :class:`~ucis.ncdb.waivers.WaiverSet`. + + Returns: + :class:`SafetyMatrix`. + """ + rows: List[Tuple[str, str, str, str, bool]] = [] + seen_reqs: set = set() + + for r in results: + reqs = r.testpoint.requirements + if reqs: + for req in reqs: + req_id = req.id if hasattr(req, "id") else str(req) + req_desc = req.desc if hasattr(req, "desc") else "" + seen_reqs.add(req_id) + waived = False + if waivers is not None: + try: + waived = waivers.matches_scope(r.testpoint.name, "") + except Exception: + pass + rows.append((req_id, req_desc, r.testpoint.name, + _STATUS_LABEL[r.status], waived)) + else: + rows.append(("—", "", r.testpoint.name, _STATUS_LABEL[r.status], False)) + + return SafetyMatrix(rows=rows, untested_requirements=[]) + + +def format_safety_matrix(report: SafetyMatrix) -> str: + """Render a :class:`SafetyMatrix` as a text table. + + Args: + report: Output of :func:`report_safety_matrix`. + + Returns: + Human-readable multiline string. + """ + col_req = max((len(r[0]) for r in report.rows), default=6) + 2 + col_tp = max((len(r[2]) for r in report.rows), default=10) + 2 + col_st = 10 + lines: List[str] = [] + header = (f"{'Req ID':<{col_req}} {'Testpoint':<{col_tp}} " + f"{'Status':<{col_st}} {'Waived':>6}") + lines.append(header) + lines.append("-" * len(header)) + for req_id, _, tp, status, waived in report.rows: + w_str = "YES" if waived else "" + lines.append(f"{req_id:<{col_req}} {tp:<{col_tp}} {status:<{col_st}} {w_str:>6}") + if report.untested_requirements: + lines.append(f"\nUntested: {', '.join(report.untested_requirements)}") + return "\n".join(lines) + + +# --------------------------------------------------------------------------- +# Report M — seed reliability heat-map (P2) +# --------------------------------------------------------------------------- + +@dataclass +class SeedReliability: + """Per-seed pass/fail counts for a given test name. + + Args: + test_name: The queried test name. + rows: List of (seed_id, pass_count, fail_count, flake_score) sorted + by fail_count descending. + total_seeds: Total unique seeds seen. + """ + test_name: str + rows: List[Tuple[int, int, int, float]] + total_seeds: int + + def to_json(self) -> str: + d = { + "test_name": self.test_name, + "total_seeds": self.total_seeds, + "rows": [{"seed": s, "pass": p, "fail": f, "flake": fl} + for s, p, f, fl in self.rows], + } + return json.dumps(d, indent=2) + + +def report_seed_reliability(db, test_name: str) -> SeedReliability: + """Compute per-seed pass/fail counts from v2 history buckets. + + Args: + db: An open :class:`~ucis.ncdb.ncdb_ucis.NcdbUCIS` instance. + test_name: Name of the test to analyse. + + Returns: + :class:`SeedReliability`. + """ + from ucis.ncdb.constants import HIST_STATUS_OK + + seed_pass: Dict[int, int] = {} + seed_fail: Dict[int, int] = {} + + try: + records = db.query_test_history(test_name) + for rec in records: + sid = rec.seed_id + if rec.status == HIST_STATUS_OK: + seed_pass[sid] = seed_pass.get(sid, 0) + 1 + else: + seed_fail[sid] = seed_fail.get(sid, 0) + 1 + except Exception: + pass + + all_seeds = set(seed_pass) | set(seed_fail) + rows: List[Tuple[int, int, int, float]] = [] + for sid in all_seeds: + pc = seed_pass.get(sid, 0) + fc = seed_fail.get(sid, 0) + total = pc + fc + flake = _pct(min(pc, fc), total) / 100.0 if total > 0 else 0.0 + rows.append((sid, pc, fc, flake)) + + rows.sort(key=lambda x: x[2], reverse=True) + return SeedReliability(test_name=test_name, rows=rows, total_seeds=len(all_seeds)) + + +def format_seed_reliability(report: SeedReliability) -> str: + """Render a :class:`SeedReliability` as a terminal heat-map table. + + Args: + report: Output of :func:`report_seed_reliability`. + + Returns: + Human-readable multiline string. + """ + if not report.rows: + return f"No history found for test '{report.test_name}'" + + lines: List[str] = [f"Seed reliability for '{report.test_name}':"] + header = f"{'Seed':>12} {'Pass':>7} {'Fail':>7} {'Flake':>7}" + lines.append(header) + lines.append("-" * len(header)) + for sid, pc, fc, flake in report.rows: + flag = " ⚠" if flake >= 0.2 else "" + lines.append(f"{sid:>12} {pc:>7} {fc:>7} {flake:>7.3f}{flag}") + lines.append(f"\nTotal unique seeds: {report.total_seeds}") + return "\n".join(lines) diff --git a/src/ucis/ncdb/squash_log.py b/src/ucis/ncdb/squash_log.py new file mode 100644 index 0000000..fd1c631 --- /dev/null +++ b/src/ucis/ncdb/squash_log.py @@ -0,0 +1,126 @@ +""" +squash_log.bin — append-only audit trail of squash operations. + +Each squash event is recorded permanently so coverage provenance can be +reconstructed: "was counts.bin built from passing tests only?" + +Binary layout (little-endian):: + + magic u32 0x53514C47 ('SQLG') + version u8 1 + num_squashes u32 + + entries[num_squashes]: + ts u32 unix timestamp of squash + policy u8 0=all 1=pass_only 2=exclude_error_and_rerun 3=strict + _pad u8[3] + from_run u32 first run_id included in squash + to_run u32 new squash_watermark after this operation + num_runs u32 total runs considered + pass_runs u32 passing runs included in counts.bin contribution + +24 bytes per entry. +""" + +from __future__ import annotations + +import struct +from dataclasses import dataclass +from typing import List + +MAGIC = 0x53514C47 # 'SQLG' +VERSION = 1 + +_HDR = struct.Struct(" None: + self._entries: List[SquashLogEntry] = [] + + def append(self, ts: int, policy: int, from_run: int, to_run: int, + num_runs: int, pass_runs: int) -> None: + """Record a squash event. + + Args: + ts: Unix timestamp of the squash. + policy: Merge policy applied (POLICY_* from contrib_index). + from_run: First run_id included in this squash. + to_run: New squash_watermark after this operation. + num_runs: Total run_ids considered during squash. + pass_runs: Number of passing runs whose contrib was included. + """ + self._entries.append(SquashLogEntry( + ts=ts, policy=policy, from_run=from_run, to_run=to_run, + num_runs=num_runs, pass_runs=pass_runs, + )) + + def entries(self) -> List[SquashLogEntry]: + """Return all squash log entries in chronological order.""" + return list(self._entries) + + @property + def num_squashes(self) -> int: + return len(self._entries) + + # ── serialization ───────────────────────────────────────────────────── + + def serialize(self) -> bytes: + """Encode the log to bytes for storage in the ZIP archive.""" + header = _HDR.pack(MAGIC, VERSION, len(self._entries)) + rows = b"" + for e in self._entries: + rows += _ENTRY.pack(e.ts, e.policy, e.from_run, e.to_run, + e.num_runs, e.pass_runs) + return header + rows + + @classmethod + def deserialize(cls, data: bytes) -> "SquashLog": + """Reconstruct a SquashLog from raw bytes. + + Raises: + ValueError: if magic or version is wrong. + """ + magic, version, num_squashes = _HDR.unpack_from(data, 0) + if magic != MAGIC: + raise ValueError(f"Bad magic 0x{magic:08X}, expected 0x{MAGIC:08X}") + if version != VERSION: + raise ValueError(f"Unsupported squash_log version {version}") + + log = cls() + offset = _HDR.size + for _ in range(num_squashes): + ts, policy, from_run, to_run, num_runs, pass_runs = \ + _ENTRY.unpack_from(data, offset) + offset += _ENTRY.size + log._entries.append(SquashLogEntry( + ts=ts, policy=policy, from_run=from_run, to_run=to_run, + num_runs=num_runs, pass_runs=pass_runs, + )) + return log diff --git a/src/ucis/ncdb/test_registry.py b/src/ucis/ncdb/test_registry.py new file mode 100644 index 0000000..0a9003b --- /dev/null +++ b/src/ucis/ncdb/test_registry.py @@ -0,0 +1,217 @@ +""" +test_registry.bin — global test-name / seed-id / run-id registry. + +Stores each unique test base-name and seed string exactly once and assigns +stable integer IDs that survive ZIP rewrites and merges. Also holds the +global monotonically-increasing ``run_id`` counter. + +Binary layout (little-endian):: + + magic u32 0x54535452 ('TSTR') + version u8 1 + next_run_id u32 + num_names u32 + num_seeds u32 + name_offsets u32[num_names] byte offsets into name_heap + seed_offsets u32[num_seeds] byte offsets into seed_heap + name_heap bytes null-terminated UTF-8, in name_id order + seed_heap bytes null-terminated UTF-8, in seed_id order + +name_ids are assigned by **insertion order** (first seen = id 0) and are +stable: inserting a new name never shifts existing name_ids. This ensures +that all bucket files and stats entries remain valid across incremental updates. +Seeds behave identically. +""" + +from __future__ import annotations + +import struct +from typing import List, Optional + +MAGIC = 0x54535452 # 'TSTR' +VERSION = 1 + +_HDR = struct.Struct(" None: + self._next_run_id: int = next_run_id + # insertion-order list of name strings — index == name_id (STABLE) + self._names: List[str] = [] + # insertion-order list of seed strings — index == seed_id + self._seeds: List[str] = [] + # fast reverse-lookup dicts + self._name_to_id: dict = {} + self._seed_to_id: dict = {} + + # ── run-id ────────────────────────────────────────────────────────────── + + def assign_run_id(self) -> int: + """Return the next run_id and advance the counter.""" + rid = self._next_run_id + self._next_run_id += 1 + return rid + + @property + def next_run_id(self) -> int: + return self._next_run_id + + # ── name_id ───────────────────────────────────────────────────────────── + + def lookup_name_id(self, name: str) -> int: + """Return the name_id for *name*, assigning one if this is a new name. + + name_ids are assigned by insertion order and are stable — inserting a + new name never changes the id of any existing name. + """ + if name in self._name_to_id: + return self._name_to_id[name] + nid = len(self._names) + self._names.append(name) + self._name_to_id[name] = nid + return nid + + def name_for_id(self, name_id: int) -> str: + """Return the name string for *name_id*. + + Raises: + IndexError: if *name_id* is out of range. + """ + return self._names[name_id] + + @property + def num_names(self) -> int: + return len(self._names) + + # ── seed_id ───────────────────────────────────────────────────────────── + + def lookup_seed_id(self, seed: str) -> int: + """Return the seed_id for *seed*, assigning one if this is a new seed. + + Integer seeds should be passed as their decimal string representation. + """ + if seed in self._seed_to_id: + return self._seed_to_id[seed] + sid = len(self._seeds) + self._seeds.append(seed) + self._seed_to_id[seed] = sid + return sid + + def seed_for_id(self, seed_id: int) -> str: + """Return the seed string for *seed_id*. + + Raises: + IndexError: if *seed_id* is out of range. + """ + return self._seeds[seed_id] + + @property + def num_seeds(self) -> int: + return len(self._seeds) + + # ── serialization ─────────────────────────────────────────────────────── + + def serialize(self) -> bytes: + """Encode the registry to bytes for storage in the ZIP archive.""" + # Build string heaps + name_heap = b"".join(n.encode() + b"\x00" for n in self._names) + seed_heap = b"".join(s.encode() + b"\x00" for s in self._seeds) + + # Compute offset tables + name_offsets: List[int] = [] + off = 0 + for n in self._names: + name_offsets.append(off) + off += len(n.encode()) + 1 + + seed_offsets: List[int] = [] + off = 0 + for s in self._seeds: + seed_offsets.append(off) + off += len(s.encode()) + 1 + + header = _HDR.pack(MAGIC, VERSION, self._next_run_id, + len(self._names), len(self._seeds)) + offsets = ( + struct.pack(f"<{len(name_offsets)}I", *name_offsets) + if name_offsets else b"" + ) + offsets += ( + struct.pack(f"<{len(seed_offsets)}I", *seed_offsets) + if seed_offsets else b"" + ) + return header + offsets + name_heap + seed_heap + + @classmethod + def deserialize(cls, data: bytes) -> "TestRegistry": + """Reconstruct a TestRegistry from raw bytes. + + Args: + data: Bytes previously produced by :meth:`serialize`. + + Returns: + A fully populated TestRegistry instance. + + Raises: + ValueError: if the magic number or version is wrong. + """ + magic, version, next_run_id, num_names, num_seeds = _HDR.unpack_from(data, 0) + if magic != MAGIC: + raise ValueError(f"Bad magic 0x{magic:08X}, expected 0x{MAGIC:08X}") + if version != VERSION: + raise ValueError(f"Unsupported test_registry version {version}") + + offset = _HDR.size + + # Offset tables + name_offsets = list(struct.unpack_from(f"<{num_names}I", data, offset)) + offset += 4 * num_names + seed_offsets = list(struct.unpack_from(f"<{num_seeds}I", data, offset)) + offset += 4 * num_seeds + + heap_start = offset + + def _read_string(heap_base: int, str_offset: int) -> str: + start = heap_base + str_offset + end = data.index(b"\x00", start) + return data[start:end].decode() + + # Build name and seed heaps — sizes needed to find seed heap base + name_heap_size = 0 + names = [] + for i in range(num_names): + s = _read_string(heap_start, name_offsets[i]) + names.append(s) + name_heap_size += len(s.encode()) + 1 + + seed_heap_base = heap_start + name_heap_size + seeds = [] + for i in range(num_seeds): + seeds.append(_read_string(seed_heap_base, seed_offsets[i])) + + reg = cls(next_run_id=next_run_id) + # Restore directly — names are in name_id (insertion) order + reg._names = names + reg._name_to_id = {n: i for i, n in enumerate(names)} + reg._seeds = seeds + reg._seed_to_id = {s: i for i, s in enumerate(seeds)} + return reg diff --git a/src/ucis/ncdb/test_stats.py b/src/ucis/ncdb/test_stats.py new file mode 100644 index 0000000..a0d9dd8 --- /dev/null +++ b/src/ucis/ncdb/test_stats.py @@ -0,0 +1,316 @@ +""" +test_stats.bin — per-test aggregate metrics table. + +One fixed-size 64-byte record per unique test, indexed by ``name_id`` from +``test_registry.bin``. All fields are maintained incrementally — O(1) update +per new test run — so aggregate queries (top flaky tests, fail rate, etc.) +need only this file, never the per-bucket records. + +Binary layout of the file header (little-endian):: + + magic u32 0x54535441 ('TSTA') + version u8 1 + num_tests u32 + +Followed by ``num_tests`` 64-byte entries (indexed by name_id): + + total_runs u32 + pass_count u32 + fail_count u32 + error_count u32 + first_ts u32 unix timestamp of first ever run + last_ts u32 unix timestamp of most recent run + last_green_ts u32 unix timestamp of last passing run + transition_count u32 consecutive status changes (flake signal) + streak i16 positive = consecutive passes, negative = fails + last_status u8 most recent run status (HIST_STATUS_*) + _pad u8 + flake_score f32 transition_count / max(total_runs-1, 1) ∈ [0,1] + fail_rate f32 fail_count / total_runs ∈ [0,1] + mean_cpu_time f32 Welford online mean (seconds) + m2_cpu_time f32 Welford M2 accumulator + cusum_value f32 running CUSUM statistic + cusum_ref_mean f32 μ₀ used for CUSUM + grade_score f32 composite effectiveness score [0,1] + total_seeds_seen u16 unique seeds ever seen for this test + _reserved u8[6] + +Total: 72 bytes per entry. +""" + +from __future__ import annotations + +import math +import struct +import time +from dataclasses import dataclass, field +from typing import Dict, List, Optional, Sequence + +from ucis.ncdb.constants import ( + HIST_STATUS_FAIL, HIST_STATUS_OK, +) + +MAGIC = 0x54535441 # 'TSTA' +VERSION = 1 + +# CUSUM parameters +_CUSUM_K = 0.5 # allowance (half the detectable shift in σ units) +_CUSUM_H = 4.0 # decision threshold (triggers change-point detection) + +_HDR = struct.Struct(" float: + """Standard deviation of CPU time from Welford M2 accumulator.""" + if self.total_runs < 2: + return 0.0 + return math.sqrt(self.m2_cpu_time / self.total_runs) + + def days_since_last_pass(self, now: Optional[int] = None) -> float: + if self.last_green_ts == 0: + return float("inf") + t = now if now is not None else int(time.time()) + return (t - self.last_green_ts) / 86400.0 + + def is_broken(self) -> bool: + """Definitively broken: streak ≤ -5 and flake_score < 0.1.""" + return self.streak <= -5 and self.flake_score < 0.1 + + def is_flaky(self) -> bool: + """Likely flaky: abs(streak) < 3 and flake_score > 0.3.""" + return abs(self.streak) < 3 and self.flake_score > 0.3 + + +class TestStatsTable: + """In-memory representation of ``test_stats.bin``. + + Entries are indexed by ``name_id``; new entries are appended automatically + when :meth:`update` is called with a previously unseen *name_id*. + + Example:: + + tbl = TestStatsTable() + tbl.update(name_id=0, status=HIST_STATUS_OK, ts=1700000000) + tbl.update(name_id=0, status=HIST_STATUS_FAIL, ts=1700086400) + print(tbl.get(0).flake_score) # 1.0 (alternates every run) + + data = tbl.serialize() + tbl2 = TestStatsTable.deserialize(data) + """ + + def __init__(self) -> None: + self._entries: List[TestStatsEntry] = [] + + # ── public API ─────────────────────────────────────────────────────────── + + def update(self, name_id: int, status: int, ts: int, + cpu_time: Optional[float] = None, + seed_id: Optional[int] = None) -> None: + """Incorporate one new test run into the aggregate statistics. + + Args: + name_id: Integer name_id from TestRegistry. + status: HIST_STATUS_* constant. + ts: Unix timestamp of this run. + cpu_time: Wall/CPU time in seconds (optional). + seed_id: seed_id from TestRegistry (optional, for seed diversity). + """ + self._ensure(name_id) + e = self._entries[name_id] + + e.total_runs += 1 + if e.total_runs == 1: + e.first_ts = ts + e.cusum_ref_mean = 1.0 if status == HIST_STATUS_FAIL else 0.0 + + e.last_ts = max(e.last_ts, ts) + + if status == HIST_STATUS_OK: + e.pass_count += 1 + e.last_green_ts = max(e.last_green_ts, ts) + elif status == HIST_STATUS_FAIL: + e.fail_count += 1 + else: + e.error_count += 1 + + # Streak tracking + if status == HIST_STATUS_OK: + e.streak = e.streak + 1 if e.streak >= 0 else 1 + else: + e.streak = e.streak - 1 if e.streak <= 0 else -1 + + # Transition count (flake signal) + if e.total_runs > 1 and status != e.last_status: + e.transition_count += 1 + + e.last_status = status + + # Derived rates + e.flake_score = e.transition_count / max(e.total_runs - 1, 1) + e.fail_rate = e.fail_count / e.total_runs + + # Welford online mean/variance for cpu_time + if cpu_time is not None: + delta = cpu_time - e.mean_cpu_time + e.mean_cpu_time += delta / e.total_runs + e.m2_cpu_time += delta * (cpu_time - e.mean_cpu_time) + + # CUSUM update + x = 1.0 if status == HIST_STATUS_FAIL else 0.0 + e.cusum_value = max(0.0, e.cusum_value + x - (e.cusum_ref_mean + _CUSUM_K)) + # Change-point detected: reset (caller may log the timestamp) + if e.cusum_value > _CUSUM_H: + e.cusum_value = 0.0 + + # Seed diversity + if seed_id is not None: + e._known_seeds.add(seed_id) + e.total_seeds_seen = len(e._known_seeds) + + # Composite grade: (pass_rate) × (stability) × (speed_factor) + pass_rate = e.pass_count / e.total_runs + stability = 1.0 - e.flake_score + # Speed factor: normalize by mean_cpu_time capped at 3600 s + if e.mean_cpu_time > 0: + speed = max(0.0, 1.0 - e.mean_cpu_time / 3600.0) + else: + speed = 1.0 + e.grade_score = pass_rate * stability * speed + + def get(self, name_id: int) -> Optional[TestStatsEntry]: + """Return the entry for *name_id*, or None if not present.""" + if name_id < len(self._entries): + return self._entries[name_id] + return None + + def top_flaky(self, n: int = 20) -> List[TestStatsEntry]: + """Return the top-*n* entries sorted by ``flake_score`` descending.""" + return sorted(self._entries, key=lambda e: e.flake_score, reverse=True)[:n] + + def top_failing(self, n: int = 20, + flake_threshold: float = 0.1) -> List[TestStatsEntry]: + """Return the top-*n* consistently-failing tests. + + Filters to entries with ``fail_rate > 0`` and + ``flake_score < flake_threshold`` (distinguishes broken from flaky). + """ + candidates = [e for e in self._entries + if e.fail_rate > 0 and e.flake_score < flake_threshold] + return sorted(candidates, key=lambda e: e.fail_rate, reverse=True)[:n] + + @property + def num_tests(self) -> int: + return len(self._entries) + + # ── serialization ──────────────────────────────────────────────────────── + + def serialize(self) -> bytes: + """Encode the table to bytes for storage in the ZIP archive.""" + header = _HDR.pack(MAGIC, VERSION, len(self._entries)) + rows = b"" + for e in self._entries: + rows += _ENTRY.pack( + e.total_runs, e.pass_count, e.fail_count, e.error_count, + e.first_ts, e.last_ts, e.last_green_ts, e.transition_count, + e.streak, e.last_status, 0, # _pad + e.flake_score, e.fail_rate, + e.mean_cpu_time, e.m2_cpu_time, + e.cusum_value, e.cusum_ref_mean, + e.grade_score, + e.total_seeds_seen, + b"\x00" * 6, # _reserved + ) + return header + rows + + @classmethod + def deserialize(cls, data: bytes) -> "TestStatsTable": + """Reconstruct a TestStatsTable from raw bytes. + + Args: + data: Bytes previously produced by :meth:`serialize`. + + Raises: + ValueError: if the magic number or version is wrong. + """ + magic, version, num_tests = _HDR.unpack_from(data, 0) + if magic != MAGIC: + raise ValueError(f"Bad magic 0x{magic:08X}, expected 0x{MAGIC:08X}") + if version != VERSION: + raise ValueError(f"Unsupported test_stats version {version}") + + tbl = cls() + offset = _HDR.size + for name_id in range(num_tests): + fields = _ENTRY.unpack_from(data, offset) + offset += _ENTRY.size + (total_runs, pass_count, fail_count, error_count, + first_ts, last_ts, last_green_ts, transition_count, + streak, last_status, _pad, + flake_score, fail_rate, + mean_cpu_time, m2_cpu_time, + cusum_value, cusum_ref_mean, + grade_score, + total_seeds_seen, + _reserved) = fields + + e = TestStatsEntry( + name_id=name_id, + total_runs=total_runs, + pass_count=pass_count, + fail_count=fail_count, + error_count=error_count, + first_ts=first_ts, + last_ts=last_ts, + last_green_ts=last_green_ts, + transition_count=transition_count, + streak=streak, + last_status=last_status, + flake_score=flake_score, + fail_rate=fail_rate, + mean_cpu_time=mean_cpu_time, + m2_cpu_time=m2_cpu_time, + cusum_value=cusum_value, + cusum_ref_mean=cusum_ref_mean, + grade_score=grade_score, + total_seeds_seen=total_seeds_seen, + ) + tbl._entries.append(e) + return tbl + + # ── internal ───────────────────────────────────────────────────────────── + + def _ensure(self, name_id: int) -> None: + while len(self._entries) <= name_id: + nid = len(self._entries) + self._entries.append(TestStatsEntry(name_id=nid)) diff --git a/src/ucis/ncdb/testplan.py b/src/ucis/ncdb/testplan.py new file mode 100644 index 0000000..553f306 --- /dev/null +++ b/src/ucis/ncdb/testplan.py @@ -0,0 +1,252 @@ +""" +src/ucis/ncdb/testplan.py — Testplan data model for NCDB. + +A ``Testplan`` describes the structured set of verification tasks (testpoints) +and functional-coverage groups expected for a design. It may be embedded +inside a ``.cdb`` file as ``testplan.json`` (Mode A) or kept as a standalone +file (Mode B). Either way the same ``Testplan`` object is used. +""" +from __future__ import annotations + +import json +import re +from dataclasses import dataclass, field +from datetime import datetime, timezone +from typing import List, Optional + + +# ── leaf types ──────────────────────────────────────────────────────────────── + +@dataclass +class RequirementLink: + """Reference to an external requirement item (e.g. ALM/JIRA).""" + system: str = "" # e.g. "ALM", "JIRA" + project: str = "" # e.g. "PROJ-X" + item_id: str = "" # e.g. "REQ-42" + url: str = "" # optional direct URL + + +@dataclass +class CovergroupEntry: + """One functional-coverage group expected to be exercised by the design.""" + name: str + desc: str = "" + + +@dataclass +class Testpoint: + """One verification task (maps to one or more test names).""" + name: str + stage: str # "V1" | "V2" | "V2S" | "V3" | custom + desc: str = "" + tests: List[str] = field(default_factory=list) + tags: List[str] = field(default_factory=list) + na: bool = False # tests: ["N/A"] — intentionally unmapped + source_template: str = "" # original wildcard template before expansion + requirements: List[RequirementLink] = field(default_factory=list) + + +# ── main class ──────────────────────────────────────────────────────────────── + +@dataclass +class Testplan: + """Structured verification testplan. + + Attributes: + format_version: Schema version (currently 1). + source_file: Path to the source .hjson (informational only). + import_timestamp: ISO-8601 UTC timestamp set when embedded in a .cdb. + testpoints: Ordered list of :class:`Testpoint` objects. + covergroups: Ordered list of :class:`CovergroupEntry` objects. + """ + format_version: int = 1 + source_file: str = "" + import_timestamp: str = "" + + testpoints: List[Testpoint] = field(default_factory=list) + covergroups: List[CovergroupEntry] = field(default_factory=list) + + # ── in-memory indices (built lazily) ────────────────────────────────── + _tp_by_name: dict = field(default_factory=dict, repr=False, compare=False) + _tp_by_test: dict = field(default_factory=dict, repr=False, compare=False) + _indexed: bool = field(default=False, repr=False, compare=False) + + # ── index building ──────────────────────────────────────────────────── + + def _build_indices(self) -> None: + self._tp_by_name.clear() + self._tp_by_test.clear() + for tp in self.testpoints: + self._tp_by_name[tp.name] = tp + for t in tp.tests: + self._tp_by_test[t] = tp + self._indexed = True + + def _ensure_indexed(self) -> None: + if not self._indexed: + self._build_indices() + + def _invalidate_index(self) -> None: + self._indexed = False + + # ── public query API ────────────────────────────────────────────────── + + def getTestpoint(self, name: str) -> Optional[Testpoint]: + """Return the testpoint with *name*, or ``None``.""" + self._ensure_indexed() + return self._tp_by_name.get(name) + + def testpointForTest(self, test_name: str) -> Optional[Testpoint]: + """Return the testpoint that owns *test_name*. + + Match order: + + 1. **Exact** — ``test_name`` appears literally in ``testpoint.tests``. + 2. **Seed-suffix strip** — strip a trailing ``_\\d+`` (e.g. + ``uart_smoke_42`` → ``uart_smoke``) and retry exact match. + 3. **Wildcard** — any ``testpoint.tests`` entry ending in ``_*`` + whose prefix matches ``test_name``. + + Returns ``None`` if no testpoint matches. + """ + self._ensure_indexed() + tp = self._tp_by_test.get(test_name) + if tp is not None: + return tp + stripped = re.sub(r'_\d+$', '', test_name) + if stripped != test_name: + tp = self._tp_by_test.get(stripped) + if tp is not None: + return tp + for pattern, candidate in self._tp_by_test.items(): + if pattern.endswith('_*') and test_name.startswith(pattern[:-1]): + return candidate + return None + + def testpointsForStage(self, stage: str) -> List[Testpoint]: + """Return all testpoints targeting *stage* (e.g. ``"V2"``).""" + return [tp for tp in self.testpoints if tp.stage == stage] + + def stages(self) -> List[str]: + """Return the ordered unique stages present in the testplan.""" + _ORDER = {"V1": 0, "V2": 1, "V2S": 2, "V3": 3} + seen = dict.fromkeys(tp.stage for tp in self.testpoints) + return sorted(seen, key=lambda s: _ORDER.get(s, 99)) + + def add_testpoint(self, tp: Testpoint) -> None: + """Append *tp* and invalidate the lookup indices.""" + self.testpoints.append(tp) + self._invalidate_index() + + # ── serialization ───────────────────────────────────────────────────── + + def to_dict(self) -> dict: + """Return a JSON-serialisable dict representation.""" + return { + "format_version": self.format_version, + "source_file": self.source_file, + "import_timestamp": self.import_timestamp, + "testpoints": [ + { + "name": tp.name, + "stage": tp.stage, + "desc": tp.desc, + "tests": tp.tests, + "tags": tp.tags, + "na": tp.na, + "source_template": tp.source_template, + "requirements": [ + {"system": r.system, "project": r.project, + "item_id": r.item_id, "url": r.url} + for r in tp.requirements + ], + } + for tp in self.testpoints + ], + "covergroups": [ + {"name": cg.name, "desc": cg.desc} + for cg in self.covergroups + ], + } + + def serialize(self) -> bytes: + """Serialise to compact JSON bytes (for ZIP embedding).""" + return json.dumps(self.to_dict(), separators=(',', ':')).encode() + + @classmethod + def from_dict(cls, d: dict) -> "Testplan": + """Reconstruct a :class:`Testplan` from a plain dict.""" + obj = cls( + format_version=d.get("format_version", 1), + source_file=d.get("source_file", ""), + import_timestamp=d.get("import_timestamp", ""), + ) + for rec in d.get("testpoints", []): + reqs = [ + RequirementLink( + system=r.get("system", ""), project=r.get("project", ""), + item_id=r.get("item_id", ""), url=r.get("url", ""), + ) + for r in rec.get("requirements", []) + ] + obj.testpoints.append(Testpoint( + name=rec["name"], + stage=rec.get("stage", ""), + desc=rec.get("desc", ""), + tests=rec.get("tests", []), + tags=rec.get("tags", []), + na=rec.get("na", False), + source_template=rec.get("source_template", ""), + requirements=reqs, + )) + for rec in d.get("covergroups", []): + obj.covergroups.append(CovergroupEntry( + name=rec["name"], desc=rec.get("desc", ""), + )) + return obj + + @classmethod + def from_bytes(cls, data: bytes) -> "Testplan": + """Reconstruct from JSON bytes (inverse of :meth:`serialize`).""" + return cls.from_dict(json.loads(data.decode())) + + @classmethod + def load(cls, path: str) -> "Testplan": + """Load a testplan from a standalone JSON/hjson file (Mode B).""" + with open(path, "rb") as f: + return cls.from_bytes(f.read()) + + def save(self, path: str) -> None: + """Write this testplan to a standalone JSON file (Mode B).""" + with open(path, "wb") as f: + f.write(self.serialize()) + + def stamp_import_time(self) -> None: + """Set :attr:`import_timestamp` to the current UTC time.""" + self.import_timestamp = datetime.now(timezone.utc).isoformat() + + +# ── module-level helpers ────────────────────────────────────────────────────── + +def get_testplan(db) -> Optional[Testplan]: + """Retrieve testplan from any UCIS db object (NcdbUCIS or MemUCIS). + + Works with any object that has a ``getTestplan()`` method + (e.g. :class:`~ucis.ncdb.ncdb_ucis.NcdbUCIS`) or a ``_testplan`` + attribute (e.g. a :class:`~ucis.mem.mem_ucis.MemUCIS` returned by + :class:`~ucis.ncdb.ncdb_reader.NcdbReader`). + """ + if hasattr(db, "getTestplan"): + return db.getTestplan() + return getattr(db, "_testplan", None) + + +def set_testplan(db, tp: Testplan) -> None: + """Embed *tp* into *db*. + + Works with any object that has a ``setTestplan()`` method. + """ + if hasattr(db, "setTestplan"): + db.setTestplan(tp) + else: + raise TypeError(f"{type(db).__name__} does not support setTestplan()") diff --git a/src/ucis/ncdb/testplan_closure.py b/src/ucis/ncdb/testplan_closure.py new file mode 100644 index 0000000..0e3f22f --- /dev/null +++ b/src/ucis/ncdb/testplan_closure.py @@ -0,0 +1,190 @@ +""" +src/ucis/ncdb/testplan_closure.py — Testpoint closure computation. + +Given a :class:`~ucis.ncdb.testplan.Testplan` and a UCIS database this module +computes the pass/fail *closure* status for each testpoint and evaluates +stage-level gate conditions. +""" +from __future__ import annotations + +import re +from dataclasses import dataclass, field +from enum import Enum +from typing import Dict, List, Optional + +from .testplan import Testplan, Testpoint + + +class TPStatus(Enum): + """Closure status of one testpoint.""" + CLOSED = "CLOSED" # all mapped tests passed + PARTIAL = "PARTIAL" # some passed, some failed + FAILING = "FAILING" # all mapped tests ran and failed + NOT_RUN = "NOT_RUN" # none of the mapped tests appear in the DB + NA = "N/A" # testpoint intentionally unmapped (na=True) + UNIMPLEMENTED = "UNIMPLEMENTED" # tests list is empty + + +# Standard stage ordering for gate evaluation +_STAGE_ORDER = {"V1": 0, "V2": 1, "V2S": 2, "V3": 3} + + +@dataclass +class TestpointResult: + """Closure result for one testpoint.""" + testpoint: Testpoint + status: TPStatus + matched_tests: List[str] + pass_count: int = 0 + fail_count: int = 0 + + +def compute_closure(testplan: Testplan, db, + waivers=None) -> List[TestpointResult]: + """Compute pass/fail closure for every testpoint against *db*. + + Args: + testplan: The testplan to evaluate. + db: Any UCIS database object (must expose ``historyNodes()``). + waivers: Optional :class:`~ucis.ncdb.waivers.WaiverSet`; reserved + for future use (currently ignored). + + Returns: + One :class:`TestpointResult` per testpoint, in testplan order. + """ + # Build test-name → (pass_count, fail_count) from test history if available + test_pass: Dict[str, int] = {} + test_fail: Dict[str, int] = {} + + # Trigger lazy v2 history load for NcdbUCIS + if hasattr(db, '_ensure_v2_history'): + db._ensure_v2_history() + + # Try v2 binary history first (NcdbUCIS or MemUCIS with _test_registry attached) + if getattr(db, '_test_registry', None) is not None: + reg = db._test_registry + stats = db._test_stats + for nid, name in enumerate(reg._names): + entry = stats.get(nid) + if entry is not None: + test_pass[name] = entry.pass_count + test_fail[name] = entry.fail_count + else: + # Fall back to UCIS history nodes + try: + from ucis.history_node_kind import HistoryNodeKind + for node in db.historyNodes(HistoryNodeKind.TEST): + name = node.getLogicalName() + try: + from ucis.test_status_t import TestStatusT + if node.getTestStatus() == TestStatusT.OK: + test_pass[name] = test_pass.get(name, 0) + 1 + else: + test_fail[name] = test_fail.get(name, 0) + 1 + except Exception: + test_pass[name] = test_pass.get(name, 0) + 1 + except Exception: + pass + + results: List[TestpointResult] = [] + for tp in testplan.testpoints: + if tp.na: + results.append(TestpointResult(tp, TPStatus.NA, [])) + continue + if not tp.tests: + results.append(TestpointResult(tp, TPStatus.UNIMPLEMENTED, [])) + continue + + matched: List[str] = [] + passes = fails = 0 + + for pattern in tp.tests: + # Exact match + if pattern in test_pass or pattern in test_fail: + matched.append(pattern) + passes += test_pass.get(pattern, 0) + fails += test_fail.get(pattern, 0) + continue + # Seed-suffix strip + stripped = re.sub(r'_\d+$', '', pattern) + if stripped != pattern and (stripped in test_pass or stripped in test_fail): + matched.append(stripped) + passes += test_pass.get(stripped, 0) + fails += test_fail.get(stripped, 0) + continue + # Wildcard prefix + if pattern.endswith('_*'): + prefix = pattern[:-1] + for tname in list(test_pass) + [t for t in test_fail if t not in test_pass]: + if tname.startswith(prefix) and tname not in matched: + matched.append(tname) + passes += test_pass.get(tname, 0) + fails += test_fail.get(tname, 0) + + if not matched: + status = TPStatus.NOT_RUN + elif fails == 0: + status = TPStatus.CLOSED + elif passes == 0: + status = TPStatus.FAILING + else: + status = TPStatus.PARTIAL + + results.append(TestpointResult(tp, status, matched, passes, fails)) + + return results + + +def stage_gate_status(results: List[TestpointResult], + stage: str, + testplan: Testplan, + require_flake_score_below: Optional[float] = None, + require_coverage_pct: Optional[float] = None) -> dict: + """Determine whether the gate for *stage* is met. + + A stage gate passes when ALL testpoints at *stage* and all stages + with a lower standard index are CLOSED (or N/A). + + Args: + results: Output of :func:`compute_closure`. + stage: Stage to evaluate (e.g. ``"V2"``). + testplan: The testplan (used for stage ordering). + require_flake_score_below: Reserved — flakiness threshold (future). + require_coverage_pct: Reserved — coverage threshold (future). + + Returns: + Dict with keys ``passed`` (bool), ``stage``, ``blocking`` + (list of :class:`TestpointResult` that prevent the gate from passing), + and ``message`` (human-readable summary string). + """ + target_rank = _STAGE_ORDER.get(stage, 99) + + # Collect all stages that must pass (≤ target rank) + stages_required = {s for s in testplan.stages() + if _STAGE_ORDER.get(s, 99) <= target_rank} + + # Index results by testpoint name + result_map = {r.testpoint.name: r for r in results} + + blocking: List[TestpointResult] = [] + for r in results: + if r.testpoint.stage not in stages_required: + continue + if r.status in (TPStatus.CLOSED, TPStatus.NA, TPStatus.UNIMPLEMENTED): + continue + blocking.append(r) + + passed = len(blocking) == 0 + if passed: + message = f"Stage {stage} gate PASSED" + else: + names = ", ".join(r.testpoint.name for r in blocking[:5]) + extra = f" (+{len(blocking)-5} more)" if len(blocking) > 5 else "" + message = f"Stage {stage} gate FAILED — blocking: {names}{extra}" + + return { + "passed": passed, + "stage": stage, + "blocking": blocking, + "message": message, + } diff --git a/src/ucis/ncdb/testplan_export.py b/src/ucis/ncdb/testplan_export.py new file mode 100644 index 0000000..d8abc9a --- /dev/null +++ b/src/ucis/ncdb/testplan_export.py @@ -0,0 +1,256 @@ +"""CI/CD export utilities for testplan closure results. + +Provides three output formats: + +* **JUnit XML** — standard ```` / ```` format for CI + systems (Jenkins, GitHub Actions, GitLab CI, etc.) +* **GitHub Annotations** — ``::error::`` / ``::warning::`` lines written to + stdout for GitHub Actions step annotations. +* **Summary Markdown** — GitHub Actions ``$GITHUB_STEP_SUMMARY`` compatible + markdown table with stage gate verdict. +""" + +from __future__ import annotations + +import json +import sys +import time +from typing import List, Optional +from xml.etree import ElementTree as ET + +from ucis.ncdb.testplan_closure import TPStatus, TestpointResult +from ucis.ncdb.reports import ( + ClosureSummary, + StageGateReport, + report_testpoint_closure, + _STATUS_LABEL, + _pct, + _STAGE_ORDER, +) + + +# --------------------------------------------------------------------------- +# JUnit XML export +# --------------------------------------------------------------------------- + +def export_junit_xml( + results: List[TestpointResult], + output_path: str, + suite_name: str = "testplan_closure", +) -> None: + """Write closure results as a JUnit XML file. + + Each testpoint becomes a ````. Testpoints with status + FAILING or PARTIAL get a ```` element; NOT_RUN gets a + ```` element; CLOSED is a plain pass. + + Args: + results: Output of + :func:`~ucis.ncdb.testplan_closure.compute_closure`. + output_path: Destination ``.xml`` file path. + suite_name: Value of the ``name`` attribute on ````. + + Example:: + + from ucis.ncdb.testplan_export import export_junit_xml + export_junit_xml(results, "closure_results.xml") + """ + failures = sum( + 1 for r in results if r.status in (TPStatus.FAILING, TPStatus.PARTIAL) + ) + skipped = sum(1 for r in results if r.status == TPStatus.NOT_RUN) + total = len(results) + + suite = ET.Element( + "testsuite", + name=suite_name, + tests=str(total), + failures=str(failures), + skipped=str(skipped), + timestamp=time.strftime("%Y-%m-%dT%H:%M:%S", time.gmtime()), + ) + + for r in results: + classname = r.testpoint.stage or "unknown" + tc = ET.SubElement( + suite, + "testcase", + name=r.testpoint.name, + classname=classname, + ) + if r.testpoint.desc: + ET.SubElement(tc, "system-out").text = r.testpoint.desc + + if r.status == TPStatus.FAILING: + ET.SubElement( + tc, + "failure", + message=f"Testpoint FAILING: " + f"pass={r.pass_count} fail={r.fail_count}", + type="TestpointFailure", + ).text = ( + f"Matched tests: {', '.join(r.matched_tests) or 'none'}\n" + f"Pass: {r.pass_count} Fail: {r.fail_count}" + ) + elif r.status == TPStatus.PARTIAL: + ET.SubElement( + tc, + "failure", + message=f"Testpoint PARTIAL: " + f"pass={r.pass_count} fail={r.fail_count}", + type="TestpointPartial", + ).text = ( + f"Matched tests: {', '.join(r.matched_tests) or 'none'}\n" + f"Pass: {r.pass_count} Fail: {r.fail_count}" + ) + elif r.status == TPStatus.NOT_RUN: + ET.SubElement(tc, "skipped", message="Testpoint not run") + + tree = ET.ElementTree(suite) + ET.indent(tree, space=" ") + tree.write(output_path, encoding="utf-8", xml_declaration=True) + + +# --------------------------------------------------------------------------- +# GitHub Annotations export +# --------------------------------------------------------------------------- + +def export_github_annotations( + results: List[TestpointResult], + file: str = "testplan", + *, + output=None, +) -> None: + """Write GitHub Actions workflow command annotations to *output*. + + FAILING testpoints emit ``::error::`` lines; PARTIAL and NOT_RUN emit + ``::warning::`` lines. CLOSED and N/A produce no output. + + Args: + results: Output of + :func:`~ucis.ncdb.testplan_closure.compute_closure`. + file: Value used in the ``file=`` annotation field (defaults to + ``"testplan"``). + output: File-like object to write to (defaults to ``sys.stdout``). + + Example:: + + from ucis.ncdb.testplan_export import export_github_annotations + export_github_annotations(results) # writes to stdout + """ + if output is None: + output = sys.stdout + + for r in results: + if r.status == TPStatus.CLOSED or r.status == TPStatus.NA: + continue + title = f"[{r.testpoint.stage}] {r.testpoint.name}" + msg = ( + f"status={_STATUS_LABEL[r.status]} " + f"pass={r.pass_count} fail={r.fail_count}" + ) + if r.status == TPStatus.FAILING: + output.write( + f"::error file={file},title={title}::{msg}\n" + ) + else: + output.write( + f"::warning file={file},title={title}::{msg}\n" + ) + + +# --------------------------------------------------------------------------- +# Markdown summary export +# --------------------------------------------------------------------------- + +def export_summary_markdown( + results: List[TestpointResult], + stage_gate: Optional[StageGateReport] = None, + history_db=None, +) -> str: + """Generate a GitHub Actions Job Summary–compatible markdown string. + + Args: + results: Output of + :func:`~ucis.ncdb.testplan_closure.compute_closure`. + stage_gate: Optional :class:`~ucis.ncdb.reports.StageGateReport` + to include a gate verdict section. + history_db: Unused; reserved for future trend lines. + + Returns: + A markdown string suitable for appending to + ``$GITHUB_STEP_SUMMARY``. + + Example:: + + from ucis.ncdb.testplan_export import export_summary_markdown + md = export_summary_markdown(results, stage_gate=gate) + with open(os.environ["GITHUB_STEP_SUMMARY"], "a") as f: + f.write(md) + """ + summary = report_testpoint_closure(results) + lines: List[str] = [] + + # Headline + total = summary.total + closed = summary.total_closed + na = summary.total_na + lines.append("## Testplan Closure Report\n") + lines.append( + f"**{closed}/{total}** testpoints closed " + f"({na} N/A, {total - closed - na} open)\n" + ) + + # Stage gate verdict + if stage_gate is not None: + verdict = "✅ PASS" if stage_gate.passed else "❌ FAIL" + lines.append(f"**Stage gate [{stage_gate.stage}]:** {verdict}\n") + + # Stage roll-up table + ordered_stages = sorted( + summary.by_stage.items(), + key=lambda kv: _STAGE_ORDER.get(kv[0], 999), + ) + if ordered_stages: + lines.append("### By stage\n") + lines.append("| Stage | Closed | Total | % |") + lines.append("|-------|-------:|------:|--:|") + for stage, entry in ordered_stages: + lines.append( + f"| {stage} | {entry['closed']} | {entry['total']} " + f"| {entry['pct']:.1f}% |" + ) + lines.append("") + + # Testpoint detail table — only non-N/A + visible = [r for r in results if r.status not in (TPStatus.NA, TPStatus.UNIMPLEMENTED)] + if visible: + lines.append("### Testpoints\n") + lines.append("| Testpoint | Stage | Status | Pass | Fail |") + lines.append("|-----------|-------|--------|-----:|-----:|") + _EMOJI = { + TPStatus.CLOSED: "✅", + TPStatus.PARTIAL: "⚠️", + TPStatus.FAILING: "❌", + TPStatus.NOT_RUN: "⬜", + } + for r in visible: + emoji = _EMOJI.get(r.status, "") + lines.append( + f"| {r.testpoint.name} | {r.testpoint.stage or '?'} " + f"| {emoji} {_STATUS_LABEL[r.status]} " + f"| {r.pass_count} | {r.fail_count} |" + ) + lines.append("") + + # Blocking testpoints + if stage_gate is not None and stage_gate.blocking: + lines.append("### Blocking testpoints\n") + for r in stage_gate.blocking: + lines.append( + f"- ❌ **[{r.testpoint.stage}] {r.testpoint.name}** " + f"— {_STATUS_LABEL[r.status]}" + ) + lines.append("") + + return "\n".join(lines) diff --git a/src/ucis/ncdb/testplan_hjson.py b/src/ucis/ncdb/testplan_hjson.py new file mode 100644 index 0000000..380ed0e --- /dev/null +++ b/src/ucis/ncdb/testplan_hjson.py @@ -0,0 +1,147 @@ +""" +src/ucis/ncdb/testplan_hjson.py — Import OpenTitan-style Hjson testplans. + +The OpenTitan testplan format is a Hjson (human JSON) file with a ``testpoints`` +list. Each testpoint can have a ``tests`` list that uses ``{key}`` wildcards +expanded by cartesian product with a ``substitutions`` dict. ``tests: ["N/A"]`` +marks a testpoint as intentionally unmapped. + +Falls back to standard ``json`` if the ``hjson`` package is not installed +(works for files that happen to be valid JSON or JSON-subset Hjson). +""" +from __future__ import annotations + +import itertools +import os +import re +from typing import Dict, List, Optional + +from .testplan import CovergroupEntry, Testplan, Testpoint + +try: + import hjson as _hjson + _HJSON_AVAILABLE = True +except ImportError: + import json as _hjson # type: ignore[no-redef] + _HJSON_AVAILABLE = False + + +# ── public API ──────────────────────────────────────────────────────────────── + +def import_hjson(hjson_path: str, + substitutions: Optional[Dict[str, object]] = None) -> Testplan: + """Parse an OpenTitan-style Hjson testplan and return a :class:`~ucis.ncdb.testplan.Testplan`. + + Args: + hjson_path: Path to the ``.hjson`` (or ``.json``) file. + substitutions: Optional dict of ``{key: value_or_list}`` pairs used + for wildcard expansion in test names. + + Returns: + A fully expanded :class:`~ucis.ncdb.testplan.Testplan` with all + ``{key}`` templates replaced. + """ + subs = substitutions or {} + with open(hjson_path, "r", encoding="utf-8") as fh: + raw = fh.read() + + if _HJSON_AVAILABLE: + data = _hjson.loads(raw) + else: + import json + data = json.loads(raw) + + plan = Testplan(source_file=os.path.abspath(hjson_path)) + + for rec in data.get("testpoints", []): + raw_tests = rec.get("tests", []) + if raw_tests == ["N/A"]: + plan.add_testpoint(Testpoint( + name=rec.get("name", ""), + stage=rec.get("stage", ""), + desc=rec.get("desc", ""), + tags=rec.get("tags", []), + na=True, + tests=[], + source_template="", + )) + continue + + expanded: List[str] = [] + templates: List[str] = [] + for tmpl in raw_tests: + results = _expand_template(tmpl, subs) + expanded.extend(results) + if len(results) > 1 or tmpl != results[0]: + templates.append(tmpl) + + plan.add_testpoint(Testpoint( + name=rec.get("name", ""), + stage=rec.get("stage", ""), + desc=rec.get("desc", ""), + tags=rec.get("tags", []), + na=False, + tests=expanded, + source_template=", ".join(templates), + )) + + for rec in data.get("covergroups", []): + plan.covergroups.append(CovergroupEntry( + name=rec.get("name", ""), + desc=rec.get("desc", ""), + )) + + return plan + + +# ── internal helpers ────────────────────────────────────────────────────────── + +def _expand_template(template: str, + subs: Dict[str, object]) -> List[str]: + """Expand ``{key}`` placeholders in *template* using *subs*. + + Each ``{key}`` whose value in *subs* is a list produces multiple + output strings (cartesian product). Scalar values are substituted + directly. Keys absent from *subs* are left as-is. + + Examples:: + + _expand_template("uart_{baud}_test", {"baud": ["9600", "115200"]}) + # → ["uart_9600_test", "uart_115200_test"] + + _expand_template("{mod}_{type}", {"mod": ["a", "b"], "type": "x"}) + # → ["a_x", "b_x"] + """ + keys_found = re.findall(r'\{(\w+)\}', template) + if not keys_found: + return [template] + + # Build lists for each placeholder + lists: List[List[str]] = [] + ordered_keys: List[str] = [] + for key in dict.fromkeys(keys_found): # preserve order, deduplicate + val = subs.get(key) + if val is None: + lists.append([f"{{{key}}}"]) # unknown key left verbatim + elif isinstance(val, list): + lists.append([str(v) for v in val]) + else: + lists.append([str(val)]) + ordered_keys.append(key) + + results: List[str] = [] + for combo in itertools.product(*lists): + s = template + for key, replacement in zip(ordered_keys, combo): + s = s.replace(f"{{{key}}}", replacement) + results.append(s) + return results + + +def _expand_tests(test_list: List[str], + subs: Dict[str, object]) -> List[str]: + """Expand an entire ``tests`` list, returning the flat list of names.""" + result: List[str] = [] + for tmpl in test_list: + result.extend(_expand_template(tmpl, subs)) + return result diff --git a/src/ucis/ncdb/waivers.py b/src/ucis/ncdb/waivers.py new file mode 100644 index 0000000..349dd79 --- /dev/null +++ b/src/ucis/ncdb/waivers.py @@ -0,0 +1,195 @@ +""" +src/ucis/ncdb/waivers.py — Coverage and test-failure waivers. + +A :class:`WaiverSet` contains zero or more :class:`Waiver` objects. Each +waiver suppresses a known failure or uncovered bin so that reports distinguish +*known* issues from new regressions. + +Waivers are stored as ``waivers.json`` inside the NCDB ZIP (optional member) +or as a standalone JSON file. + +Expiry enforcement is the **caller's responsibility** — :meth:`WaiverSet.matches` +performs only pattern matching. To filter out expired waivers call +:meth:`WaiverSet.active_at` first. +""" +from __future__ import annotations + +import json +from dataclasses import dataclass, field +from typing import List, Optional + + +@dataclass +class Waiver: + """A single waiver entry. + + Attributes: + id: Unique identifier (e.g. ``"W-001"``). + scope_pattern: Glob-style pattern matched against UCIS scope paths. + ``*`` matches any single path segment; ``**`` matches + any number of segments. + bin_pattern: Glob-style pattern matched against bin names within the + matched scope. Use ``"*"`` to waive the entire scope. + rationale: Human-readable explanation. + approver: Name/username of approver. + approved_at: ISO-8601 UTC timestamp of approval. + expires_at: ISO-8601 UTC timestamp after which this waiver expires. + Empty string means "never expires". + status: ``"active"`` | ``"expired"`` | ``"revoked"``. + """ + id: str + scope_pattern: str + bin_pattern: str = "*" + rationale: str = "" + approver: str = "" + approved_at: str = "" + expires_at: str = "" + status: str = "active" + + def matches(self, scope_path: str, bin_name: str = "") -> bool: + """Return True if this waiver covers *scope_path* / *bin_name*. + + Pattern matching uses simple glob rules: + - ``*`` matches any characters within a single ``/``-delimited segment. + - ``**`` matches any number of segments (including zero). + + Expiry is **not** checked here — use :meth:`WaiverSet.active_at` first + if you want to exclude expired waivers. + """ + if not _glob_match(self.scope_pattern, scope_path): + return False + if bin_name and self.bin_pattern != "*": + return _glob_match(self.bin_pattern, bin_name) + return True + + +class WaiverSet: + """Collection of :class:`Waiver` objects. + + Attributes: + waivers: Ordered list of waivers. + """ + + def __init__(self, waivers: Optional[List[Waiver]] = None) -> None: + self.waivers: List[Waiver] = waivers or [] + + def add(self, waiver: Waiver) -> None: + """Append *waiver* to the set.""" + self.waivers.append(waiver) + + def matches_scope(self, scope_path: str, bin_name: str = "") -> bool: + """Return True if any waiver covers *scope_path* / *bin_name*.""" + return any(w.matches(scope_path, bin_name) for w in self.waivers) + + def active_at(self, timestamp: str) -> "WaiverSet": + """Return a new :class:`WaiverSet` containing only waivers that are + active at *timestamp* (ISO-8601 string). + + A waiver is active when: + + * ``status == "active"`` + * ``expires_at`` is empty OR ``expires_at > timestamp`` + """ + active = [ + w for w in self.waivers + if w.status == "active" and + (not w.expires_at or w.expires_at > timestamp) + ] + return WaiverSet(active) + + def get(self, waiver_id: str) -> Optional[Waiver]: + """Return the waiver with *waiver_id*, or ``None``.""" + for w in self.waivers: + if w.id == waiver_id: + return w + return None + + # ── serialization ───────────────────────────────────────────────────── + + def to_dict(self) -> dict: + return { + "format_version": 1, + "waivers": [ + { + "id": w.id, + "scope_pattern": w.scope_pattern, + "bin_pattern": w.bin_pattern, + "rationale": w.rationale, + "approver": w.approver, + "approved_at": w.approved_at, + "expires_at": w.expires_at, + "status": w.status, + } + for w in self.waivers + ], + } + + def serialize(self) -> bytes: + """Serialise to compact JSON bytes (for ZIP embedding).""" + return json.dumps(self.to_dict(), separators=(',', ':')).encode() + + @classmethod + def from_dict(cls, d: dict) -> "WaiverSet": + ws = cls() + for rec in d.get("waivers", []): + ws.add(Waiver( + id=rec["id"], + scope_pattern=rec.get("scope_pattern", "**"), + bin_pattern=rec.get("bin_pattern", "*"), + rationale=rec.get("rationale", ""), + approver=rec.get("approver", ""), + approved_at=rec.get("approved_at", ""), + expires_at=rec.get("expires_at", ""), + status=rec.get("status", "active"), + )) + return ws + + @classmethod + def from_bytes(cls, data: bytes) -> "WaiverSet": + return cls.from_dict(json.loads(data.decode())) + + @classmethod + def load(cls, path: str) -> "WaiverSet": + """Load from a standalone JSON file.""" + with open(path, "rb") as f: + return cls.from_bytes(f.read()) + + def save(self, path: str) -> None: + """Write to a standalone JSON file.""" + with open(path, "wb") as f: + f.write(self.serialize()) + + +# ── glob matching helper ────────────────────────────────────────────────────── + +def _glob_match(pattern: str, text: str) -> bool: + """Simple glob match: ``*`` = single-segment wildcard, ``**`` = multi-segment.""" + import fnmatch + # Replace '**' with a temporary token, expand, then match + # Use fnmatch with '**' expanded to match any path + # Strategy: convert glob to regex + import re + regex = _glob_to_regex(pattern) + return bool(re.fullmatch(regex, text)) + + +def _glob_to_regex(pattern: str) -> str: + """Convert a glob pattern to a regex string.""" + import re + # Parse pattern left-to-right, emitting regex pieces + result = [] + i = 0 + while i < len(pattern): + if pattern[i:i+3] == '**/': + result.append('(?:.+/)?') + i += 3 + elif pattern[i:i+2] == '**': + result.append('.*') + i += 2 + elif pattern[i] == '*': + result.append('[^/]*') + i += 1 + else: + result.append(re.escape(pattern[i])) + i += 1 + return ''.join(result) diff --git a/src/ucis/report/coverage_metrics.py b/src/ucis/report/coverage_metrics.py new file mode 100644 index 0000000..3148fe7 --- /dev/null +++ b/src/ucis/report/coverage_metrics.py @@ -0,0 +1,762 @@ +""" +Common coverage metrics layer. + +``CoverageMetrics`` is the single source of truth for all coverage number +computation in pyucis. Every consumer — TUI views, CLI ``show`` commands, +and report formatters — should obtain aggregated numbers from this class +rather than implementing their own UCIS tree walks or SQL queries. + +Design principles +----------------- +* **Correct bin semantics**: a bin is *covered* when ``cover_data >= at_least`` + (UCIS LRM §5.3). Using ``cover_data > 0`` is wrong when ``at_least > 1``. +* **Correct traversal**: functional coverage is derived from + ``CoverageReportBuilder``, which walks ``INSTANCE → COVERGROUP → COVERPOINT``, + preventing double-counting of type-level vs instance-level covergroup scopes. +* **SQLite fast paths** are used for performance but must produce results + identical to the API path. +* **Caching** is simple dict-based; call ``invalidate()`` whenever the + database filter changes. +""" +from __future__ import annotations + +from dataclasses import dataclass, field +from typing import Dict, List, Optional, TYPE_CHECKING + +if TYPE_CHECKING: + from ucis.ucis import UCIS + from ucis.report.coverage_report import CoverageReport + + +# --------------------------------------------------------------------------- +# Data classes +# --------------------------------------------------------------------------- + +@dataclass +class BinStats: + """Aggregated bin counts for a coverage scope or type.""" + total: int = 0 + covered: int = 0 + + @property + def uncovered(self) -> int: + return self.total - self.covered + + @property + def coverage_pct(self) -> float: + if self.total == 0: + return 0.0 + return self.covered / self.total * 100.0 + + def __add__(self, other: 'BinStats') -> 'BinStats': + return BinStats(self.total + other.total, self.covered + other.covered) + + +@dataclass +class BinDetail: + """Raw data for a single bin — used in detail/bin-listing views.""" + name: str + count: int + at_least: int + is_ignore: bool = False + is_illegal: bool = False + + @property + def covered(self) -> bool: + return self.count >= self.at_least + + +@dataclass +class CoverpointStats: + """Coverage summary for a single coverpoint.""" + name: str + path: str # slash-joined scope path from DB root + bins: BinStats = field(default_factory=BinStats) + bin_details: List[BinDetail] = field(default_factory=list) + weight: int = 1 + + @property + def coverage_pct(self) -> float: + return self.bins.coverage_pct + + +@dataclass +class CrossStats: + """Coverage summary for a single cross.""" + name: str + path: str + bins: BinStats = field(default_factory=BinStats) + weight: int = 1 + + @property + def coverage_pct(self) -> float: + return self.bins.coverage_pct + + +@dataclass +class CovergroupStats: + """Coverage summary for a covergroup (type-level or instance-level).""" + name: str + path: str + coverage_pct: float = 0.0 # weighted over child coverpoints/crosses + bins: BinStats = field(default_factory=BinStats) + weight: int = 1 + + +@dataclass +class FileCoverageStats: + """Per-source-file code-coverage statistics.""" + file_id: int + file_path: str + line: BinStats = field(default_factory=BinStats) + branch: BinStats = field(default_factory=BinStats) + toggle: BinStats = field(default_factory=BinStats) + expr: BinStats = field(default_factory=BinStats) + cond: BinStats = field(default_factory=BinStats) + fsm: BinStats = field(default_factory=BinStats) + block: BinStats = field(default_factory=BinStats) + + @property + def overall(self) -> BinStats: + result = BinStats() + for attr in ('line', 'branch', 'toggle', 'expr', 'cond', 'fsm', 'block'): + result = result + getattr(self, attr) + return result + + +@dataclass +class TestInfo: + """Identity and contribution metadata for one test run.""" + history_id: int # internal DB id (-1 for non-SQLite backends) + name: str + status: str # "PASSED" | "FAILED" | "UNKNOWN" + date: str + total_items: int = 0 # bins hit by this test (SQLite only, else 0) + unique_items: int = 0 # bins *only* hit by this test (SQLite only, else 0) + + +# --------------------------------------------------------------------------- +# CoverageMetrics +# --------------------------------------------------------------------------- + +class CoverageMetrics: + """ + Single source of truth for all coverage metric computation. + + Instantiate once per database and pass the instance to every consumer + (TUI CoverageModel, show commands, report formatters). + + Parameters + ---------- + db: + Any object implementing the ``UCIS`` interface (MemUCIS, SqliteUCIS, + XmlUCIS, …). + """ + + def __init__(self, db: 'UCIS'): + self._db = db + self._cache: Dict[str, object] = {} + + # ------------------------------------------------------------------ cache + + def invalidate(self): + """Discard all cached results (e.g. after changing a test filter).""" + self._cache.clear() + + def _cached(self, key: str, compute): + if key not in self._cache: + self._cache[key] = compute() + return self._cache[key] + + # --------------------------------------------------------------- hierarchy + + @property + def report(self) -> 'CoverageReport': + """ + ``CoverageReport`` built via ``CoverageReportBuilder``. + + This is the canonical hierarchical representation of functional + coverage. All functional-coverage numbers in this class are derived + from this object to guarantee consistency with the text / JSON / HTML + report formatters. + """ + return self._cached('report', self._build_report) + + def _build_report(self) -> 'CoverageReport': + from ucis.report.coverage_report_builder import CoverageReportBuilder + return CoverageReportBuilder.build(self._db) + + # ------------------------------------------ functional coverage (CVGBIN) + + def functional_bins(self) -> BinStats: + """ + Aggregate ``BinStats`` for all functional-coverage bins (CVGBIN). + + A bin is counted as *covered* when ``count >= at_least`` — matching + the UCIS LRM and the text-report formatter. + """ + return self._cached('functional_bins', self._compute_functional_bins) + + def _compute_functional_bins(self) -> BinStats: + # Derive directly from the canonical CoverageReport so that the + # traversal (INSTANCE → COVERGROUP → COVERPOINT) is identical to what + # the text / JSON reports use. This also means the SQLite fast path + # is not needed here — CoverageReportBuilder already handles both + # backends efficiently. + total = 0 + covered = 0 + for cg in self.report.covergroups: + t, c = self._count_bins_in_cg(cg) + total += t + covered += c + return BinStats(total=total, covered=covered) + + def _count_bins_in_cg(self, cg) -> tuple: + """ + Recursively count (total, covered) bins in a CoverageReport.Covergroup. + + Mirrors CoverageReportBuilder.build_covergroup() semantics: when + type-level coverpoints/crosses exist on the CG, only those are + counted (the sub-instance COVERINSTANCE groups hold the same bins + and would double-count if also visited). + """ + total = 0 + covered = 0 + if cg.coverpoints or cg.crosses: + # Type-level coverpoints are present — use them only + for cp in cg.coverpoints: + for b in cp.bins: + total += 1 + if b.hit: + covered += 1 + for cr in cg.crosses: + for b in cr.bins: + total += 1 + if b.hit: + covered += 1 + else: + # No type-level coverpoints — aggregate over sub-instances + for sub in cg.covergroups: + t, c = self._count_bins_in_cg(sub) + total += t + covered += c + return total, covered + + def covergroup_stats(self) -> List[CovergroupStats]: + """One ``CovergroupStats`` per top-level covergroup.""" + return self._cached('covergroup_stats', self._compute_covergroup_stats) + + def _compute_covergroup_stats(self) -> List[CovergroupStats]: + result = [] + for cg in self.report.covergroups: + t, c = self._count_bins_in_cg(cg) + result.append(CovergroupStats( + name=cg.name, + path=cg.instname, + coverage_pct=cg.coverage, + bins=BinStats(total=t, covered=c), + weight=cg.weight, + )) + return result + + def coverpoint_stats(self, include_bins: bool = False) -> List[CoverpointStats]: + """ + Flat list of ``CoverpointStats`` for every coverpoint in the database. + + Parameters + ---------- + include_bins: + When ``True``, populate ``CoverpointStats.bin_details`` with + per-bin data. Slightly more expensive; not needed for summary views. + """ + cache_key = f'coverpoint_stats_{include_bins}' + return self._cached(cache_key, + lambda: self._compute_coverpoint_stats(include_bins)) + + def _compute_coverpoint_stats(self, include_bins: bool) -> List[CoverpointStats]: + result = [] + + def _walk_cg(cg, path_prefix: str): + cg_path = f'{path_prefix}/{cg.name}' if path_prefix else cg.name + if cg.coverpoints or cg.crosses: + # Type-level coverpoints/crosses present — use them + for cp in cg.coverpoints: + total = len(cp.bins) + covered = sum(1 for b in cp.bins if b.hit) + details = [] + if include_bins: + for b in cp.bins: + details.append(BinDetail( + name=b.name, count=b.count, at_least=b.goal)) + for b in cp.ignore_bins: + details.append(BinDetail( + name=b.name, count=b.count, at_least=b.goal, + is_ignore=True)) + for b in cp.illegal_bins: + details.append(BinDetail( + name=b.name, count=b.count, at_least=b.goal, + is_illegal=True)) + result.append(CoverpointStats( + name=cp.name, + path=f'{cg_path}/{cp.name}', + bins=BinStats(total=total, covered=covered), + bin_details=details, + weight=cp.weight, + )) + else: + # No type-level coverpoints — recurse into sub-instances only + for sub in cg.covergroups: + _walk_cg(sub, cg_path) + + for cg in self.report.covergroups: + _walk_cg(cg, '') + return result + + def cross_stats(self) -> List[CrossStats]: + """Flat list of ``CrossStats`` for every cross in the database.""" + return self._cached('cross_stats', self._compute_cross_stats) + + def _compute_cross_stats(self) -> List[CrossStats]: + result = [] + + def _walk_cg(cg, path_prefix: str): + cg_path = f'{path_prefix}/{cg.name}' if path_prefix else cg.name + for cr in cg.crosses: + total = len(cr.bins) + covered = sum(1 for b in cr.bins if b.hit) + result.append(CrossStats( + name=cr.name, + path=f'{cg_path}/{cr.name}', + bins=BinStats(total=total, covered=covered), + weight=cr.weight, + )) + for sub in cg.crosses: + pass # crosses do not nest + for sub in cg.covergroups: + _walk_cg(sub, cg_path) + + for cg in self.report.covergroups: + _walk_cg(cg, '') + return result + + # --------------------------------------- code coverage (STMT/BRANCH/etc.) + + def coverage_types_present(self) -> List: + """``CoverTypeT`` values that have at least one item in the database.""" + return self._cached('coverage_types', self._compute_coverage_types) + + def _compute_coverage_types(self): + from ucis.cover_type_t import CoverTypeT + + # SQLite fast path + if hasattr(self._db, 'conn'): + try: + rows = self._db.conn.execute( + 'SELECT DISTINCT cover_type FROM coveritems ORDER BY cover_type' + ).fetchall() + result = [] + for r in rows: + if r[0] is None: + continue + try: + result.append(CoverTypeT(r[0])) + except ValueError: + pass + return result + except Exception: + pass + + # API fallback + from ucis.scope_type_t import ScopeTypeT + found = set() + all_types = [ + CoverTypeT.CVGBIN, CoverTypeT.STMTBIN, CoverTypeT.BRANCHBIN, + CoverTypeT.TOGGLEBIN, CoverTypeT.EXPRBIN, CoverTypeT.CONDBIN, + CoverTypeT.FSMBIN, CoverTypeT.BLOCKBIN, + ] + + def _visit(scope): + for ct in all_types: + if ct not in found: + try: + if next(iter(scope.coverItems(ct)), None) is not None: + found.add(ct) + except Exception: + pass + for child in scope.scopes(ScopeTypeT.ALL): + _visit(child) + + for scope in self._db.scopes(ScopeTypeT.ALL): + _visit(scope) + + return sorted(found, key=lambda t: int(t)) + + def bins_by_type(self, cov_type, test_filter: Optional[str] = None) -> BinStats: + """ + ``BinStats`` for a single ``CoverTypeT``. + + For functional bins (``CVGBIN``), prefer ``functional_bins()`` which + derives from the canonical traversal. This method uses direct DB + queries and is primarily intended for code-coverage types. + + Parameters + ---------- + test_filter: + Logical name of a test; when given, only items contributed by + that test are counted. SQLite only; ignored on other backends. + """ + from ucis.cover_type_t import CoverTypeT + + # For CVGBIN with no filter, use the canonical computation. + if cov_type == CoverTypeT.CVGBIN and test_filter is None: + return self.functional_bins() + + cache_key = f'bins_by_type_{int(cov_type)}_{test_filter}' + return self._cached(cache_key, + lambda: self._query_bins_by_type(cov_type, test_filter)) + + def _query_bins_by_type(self, cov_type, test_filter: Optional[str]) -> BinStats: + from ucis.scope_type_t import ScopeTypeT + + # SQLite fast path + if hasattr(self._db, 'conn'): + try: + conn = self._db.conn + if test_filter: + row = conn.execute( + """SELECT COUNT(*), + SUM(CASE WHEN ci.cover_data >= ci.at_least THEN 1 ELSE 0 END) + FROM coveritems ci + JOIN coveritem_tests ct ON ct.cover_id = ci.cover_id + JOIN history_nodes hn ON hn.history_id = ct.history_id + WHERE (ci.cover_type & ?) != 0 + AND hn.logical_name = ?""", + (int(cov_type), test_filter) + ).fetchone() + else: + row = conn.execute( + """SELECT COUNT(*), + SUM(CASE WHEN cover_data >= at_least THEN 1 ELSE 0 END) + FROM coveritems + WHERE (cover_type & ?) != 0""", + (int(cov_type),) + ).fetchone() + return BinStats(total=row[0] or 0, covered=row[1] or 0) + except Exception: + pass + + # API fallback + total = 0 + covered = 0 + + def _visit(scope): + nonlocal total, covered + try: + for item in scope.coverItems(cov_type): + total += 1 + cd = item.getCoverData() + if cd and cd.data >= cd.at_least: + covered += 1 + except Exception: + pass + try: + for child in scope.scopes(ScopeTypeT.ALL): + _visit(child) + except Exception: + pass + + for scope in self._db.scopes(ScopeTypeT.ALL): + _visit(scope) + + return BinStats(total=total, covered=covered) + + def code_coverage_by_type(self) -> Dict: + """ + Per-type ``BinStats`` for all non-functional coverage item types + (STMTBIN, BRANCHBIN, TOGGLEBIN, EXPRBIN, CONDBIN, FSMBIN, BLOCKBIN). + """ + return self._cached('code_coverage_by_type', self._compute_code_coverage) + + def _compute_code_coverage(self) -> Dict: + from ucis.cover_type_t import CoverTypeT + code_types = [ + CoverTypeT.STMTBIN, CoverTypeT.BRANCHBIN, CoverTypeT.TOGGLEBIN, + CoverTypeT.EXPRBIN, CoverTypeT.CONDBIN, CoverTypeT.FSMBIN, + CoverTypeT.BLOCKBIN, + ] + + # SQLite fast path — single query for all types + if hasattr(self._db, 'conn'): + try: + rows = self._db.conn.execute( + """SELECT cover_type, + COUNT(*) AS total, + SUM(CASE WHEN cover_data >= at_least THEN 1 ELSE 0 END) AS covered + FROM coveritems + GROUP BY cover_type""" + ).fetchall() + int_to_type = {int(ct): ct for ct in code_types} + result = {ct: BinStats() for ct in code_types} + for row in rows: + ct = int_to_type.get(row[0]) + if ct is not None: + result[ct] = BinStats(total=row[1] or 0, covered=row[2] or 0) + return result + except Exception: + pass + + # API fallback + result = {} + for ct in code_types: + result[ct] = self._query_bins_by_type(ct, test_filter=None) + return result + + def file_coverage(self, test_filter: Optional[str] = None) -> List[FileCoverageStats]: + """ + Per-source-file code-coverage statistics. + + Requires a SQLite backend; returns an empty list for other backends. + + Parameters + ---------- + test_filter: + Restrict counts to items contributed by the named test. + """ + cache_key = f'file_coverage_{test_filter}' + return self._cached(cache_key, lambda: self._compute_file_coverage(test_filter)) + + def _compute_file_coverage(self, test_filter: Optional[str]) -> List[FileCoverageStats]: + if not hasattr(self._db, 'conn'): + return [] + + from ucis.cover_type_t import CoverTypeT + STMT = int(CoverTypeT.STMTBIN) + BRANCH = int(CoverTypeT.BRANCHBIN) + TOGGLE = int(CoverTypeT.TOGGLEBIN) + EXPR = int(CoverTypeT.EXPRBIN) + COND = int(CoverTypeT.CONDBIN) + FSM = int(CoverTypeT.FSMBIN) + BLOCK = int(CoverTypeT.BLOCKBIN) + + conn = self._db.conn + + try: + if test_filter: + # Get covered item IDs for this test first + rows = conn.execute( + """SELECT ci.cover_id FROM coveritems ci + JOIN coveritem_tests ct ON ct.cover_id = ci.cover_id + JOIN history_nodes hn ON hn.history_id = ct.history_id + WHERE hn.logical_name = ?""", + (test_filter,) + ).fetchall() + if not rows: + return [] + id_csv = ','.join(str(r[0]) for r in rows) + filter_clause = f'AND ci.cover_id IN ({id_csv})' + else: + filter_clause = '' + + def _stat(type_int: int, rows_by_file: dict, file_id: int) -> BinStats: + row = rows_by_file.get((file_id, type_int)) + if row is None: + return BinStats() + return BinStats(total=row[0], covered=row[1]) + + # One query: group by (file_id, cover_type) + sql = f""" + SELECT + f.file_id, + f.file_path, + ci.cover_type, + COUNT(*) AS total, + SUM(CASE WHEN ci.cover_data >= ci.at_least THEN 1 ELSE 0 END) AS covered + FROM files f + JOIN coveritems ci ON f.file_id = ci.source_file_id + WHERE ci.cover_type IN (?,?,?,?,?,?,?) + {filter_clause} + GROUP BY f.file_id, f.file_path, ci.cover_type + ORDER BY f.file_path + """ + rows = conn.execute(sql, (STMT, BRANCH, TOGGLE, EXPR, COND, FSM, BLOCK)).fetchall() + + # Build {file_id → FileCoverageStats} + files: Dict[int, FileCoverageStats] = {} + file_paths: Dict[int, str] = {} + by_file_type: Dict[tuple, tuple] = {} + for row in rows: + fid = row[0] + file_paths[fid] = row[1] + by_file_type[(fid, row[2])] = (row[3], row[4]) + + for fid, fpath in file_paths.items(): + fcs = FileCoverageStats( + file_id=fid, + file_path=fpath, + line=BinStats(*by_file_type.get((fid, STMT), (0, 0))), + branch=BinStats(*by_file_type.get((fid, BRANCH), (0, 0))), + toggle=BinStats(*by_file_type.get((fid, TOGGLE), (0, 0))), + expr=BinStats(*by_file_type.get((fid, EXPR), (0, 0))), + cond=BinStats(*by_file_type.get((fid, COND), (0, 0))), + fsm=BinStats(*by_file_type.get((fid, FSM), (0, 0))), + block=BinStats(*by_file_type.get((fid, BLOCK), (0, 0))), + ) + files[fid] = fcs + + return sorted(files.values(), key=lambda f: f.file_path) + except Exception: + return [] + + # ------------------------------------------------------------------ tests + + def tests(self) -> List[TestInfo]: + """ + All tests with identity and (where available) contribution metadata. + + ``total_items`` and ``unique_items`` are populated only for SQLite + backends; they are 0 for XML / memory backends. + """ + return self._cached('tests', self._compute_tests) + + def _compute_tests(self) -> List[TestInfo]: + from ucis.history_node_kind import HistoryNodeKind + try: + from ucis import UCIS_TESTSTATUS_OK + except ImportError: + UCIS_TESTSTATUS_OK = 1 + + result: List[TestInfo] = [] + + # SQLite path: use test-coverage API for contribution data + if hasattr(self._db, 'conn'): + try: + from ucis.sqlite.sqlite_test_coverage import SqliteTestCoverage + api = SqliteTestCoverage(self._db) + contribs = api.get_all_test_contributions() + contrib_map = {c.test_name: c for c in contribs} + + for node in self._db.historyNodes(HistoryNodeKind.TEST): + name = node.getLogicalName() or 'Unknown' + contrib = contrib_map.get(name) + + status = 'UNKNOWN' + try: + raw = node.getTestStatus() + status = 'PASSED' if raw == UCIS_TESTSTATUS_OK else 'FAILED' + except Exception: + pass + + date = 'Unknown' + try: + d = node.getDate() + if d: + date = str(d) + except Exception: + pass + + result.append(TestInfo( + history_id=getattr(node, 'history_id', -1), + name=name, + status=status, + date=date, + total_items=contrib.total_items if contrib else 0, + unique_items=contrib.unique_items if contrib else 0, + )) + return result + except Exception: + pass + + # API fallback (no contribution data) + try: + for node in self._db.historyNodes(HistoryNodeKind.TEST): + name = node.getLogicalName() or 'Unknown' + + status = 'UNKNOWN' + try: + raw = node.getTestStatus() + status = 'PASSED' if raw == UCIS_TESTSTATUS_OK else 'FAILED' + except Exception: + pass + + date = 'Unknown' + try: + d = node.getDate() + if d: + date = str(d) + except Exception: + pass + + result.append(TestInfo( + history_id=-1, + name=name, + status=status, + date=date, + )) + except Exception: + pass + + return result + + # -------------------------------------------------------- summary helpers + + def summary(self) -> Dict: + """ + High-level summary dict — backward-compatible replacement for + ``CoverageModel.get_summary()``. + + ``total_bins`` / ``covered_bins`` reflect ALL coverage items in the DB + (functional + code coverage). When the database contains functional + coverage (CVGBIN), ``overall_coverage`` is derived from functional + bins only (to preserve UCIS semantics); otherwise it is derived from + all available items. + """ + return self._cached('summary', self._compute_summary) + + def _compute_summary(self) -> Dict: + fb = self.functional_bins() + cg_stats = self.covergroup_stats() + cp_stats = self.coverpoint_stats() + + if fb.total > 0: + # Use functional coverage as the primary metric + return { + 'overall_coverage': fb.coverage_pct, + 'total_bins': fb.total, + 'covered_bins': fb.covered, + 'covergroups': len(cg_stats), + 'coverpoints': len(cp_stats), + } + else: + # No functional coverage — aggregate code coverage items + total = 0 + covered = 0 + from ucis.cover_type_t import CoverTypeT + code_types = [ + CoverTypeT.STMTBIN, CoverTypeT.BRANCHBIN, CoverTypeT.TOGGLEBIN, + CoverTypeT.EXPRBIN, CoverTypeT.CONDBIN, CoverTypeT.FSMBIN, + CoverTypeT.BLOCKBIN, + ] + for ct in code_types: + bs = self.bins_by_type(ct) + total += bs.total + covered += bs.covered + pct = (covered / total * 100.0) if total > 0 else 0.0 + return { + 'overall_coverage': pct, + 'total_bins': total, + 'covered_bins': covered, + 'covergroups': 0, + 'coverpoints': 0, + } + + def database_info(self) -> Dict: + """ + Database metadata — backward-compatible replacement for + ``CoverageModel.get_database_info()``. + """ + return { + 'path': getattr(self._db, 'db_path', + getattr(self._db, '_db_path', '')), + 'format': 'UCIS', + 'test_count': len(self.tests()), + } diff --git a/src/ucis/tui/app.py b/src/ucis/tui/app.py index b0becaa..9a866b8 100644 --- a/src/ucis/tui/app.py +++ b/src/ucis/tui/app.py @@ -92,6 +92,7 @@ def _initialize_views(self): from ucis.tui.views.metrics_view import MetricsView from ucis.tui.views.code_coverage_view import CodeCoverageView from ucis.tui.views.test_history_view import TestHistoryView + from ucis.tui.views.testplan_view import TestplanView # Create views and register with controller views = { @@ -102,6 +103,7 @@ def _initialize_views(self): "metrics": MetricsView(self), "code_coverage": CodeCoverageView(self), "test_history": TestHistoryView(self), + "testplan": TestplanView(self), } for name, view in views.items(): diff --git a/src/ucis/tui/components/help_overlay.py b/src/ucis/tui/components/help_overlay.py index c55f66f..234912d 100644 --- a/src/ucis/tui/components/help_overlay.py +++ b/src/ucis/tui/components/help_overlay.py @@ -53,6 +53,7 @@ def _create_help_content(self): view_table.add_row("5", "Metrics - Statistics (coming soon)") view_table.add_row("6", "Code Coverage - File-level code coverage") view_table.add_row("7", "Test History - Test contribution analysis") + view_table.add_row("8", "Testplan - Closure status and stage gates") # Navigation nav_table = Table(show_header=True, header_style="bold yellow", box=None, padding=(0, 2)) @@ -78,6 +79,7 @@ def _create_help_content(self): view_specific_table.add_row("Hierarchy", "Navigate design structure (E=expand all, C=collapse all, /=search)") view_specific_table.add_row("Gaps", "Find uncovered bins, sort by coverage %") view_specific_table.add_row("Test History", "View tests (N/D/C/U=sort, F=filter)") + view_specific_table.add_row("Testplan", "Testpoint closure (↑↓=navigate, r=refresh)") # Color coding color_table = Table(show_header=True, header_style="bold yellow", box=None, padding=(0, 2)) diff --git a/src/ucis/tui/controller.py b/src/ucis/tui/controller.py index 03f189e..401671b 100644 --- a/src/ucis/tui/controller.py +++ b/src/ucis/tui/controller.py @@ -47,6 +47,7 @@ def __init__(self, coverage_model, on_quit: Optional[Callable] = None): "metrics", "code_coverage", "test_history", + "testplan", ] def register_view(self, name: str, view): @@ -187,6 +188,7 @@ def _handle_global_key(self, key: str) -> bool: '5': 'metrics', '6': 'code_coverage', '7': 'test_history', + '8': 'testplan', } if key in view_map: diff --git a/src/ucis/tui/keybindings.py b/src/ucis/tui/keybindings.py index 4e50b73..b8cb020 100644 --- a/src/ucis/tui/keybindings.py +++ b/src/ucis/tui/keybindings.py @@ -50,6 +50,7 @@ def handle_global_key(self, key: str) -> bool: '5': 'metrics', '6': 'code_coverage', '7': 'test_history', + '8': 'testplan', } if key in view_map: diff --git a/src/ucis/tui/models/coverage_model.py b/src/ucis/tui/models/coverage_model.py index 9da7aee..4f9e4fb 100644 --- a/src/ucis/tui/models/coverage_model.py +++ b/src/ucis/tui/models/coverage_model.py @@ -2,6 +2,8 @@ Coverage data model wrapper. Provides a convenient interface to PyUCIS API with caching. +All metric computation is delegated to ``CoverageMetrics`` — the single +source of truth — to ensure consistency across TUI, CLI and report output. """ from typing import Dict, Any, Optional, List, Set from ucis.rgy.format_rgy import FormatRgy @@ -12,6 +14,8 @@ class CoverageModel: """ Wraps PyUCIS API with caching and convenience methods. + + Delegates all metric computation to :class:`~ucis.report.coverage_metrics.CoverageMetrics`. """ def __init__(self, db_path: str, input_format: Optional[str] = None): @@ -26,6 +30,7 @@ def __init__(self, db_path: str, input_format: Optional[str] = None): self.db = None self._cache: Dict[str, Any] = {} self.test_filter: Optional[str] = None # Current test filter + self._metrics = None self._load_database(input_format) def _load_database(self, input_format: Optional[str] = None): @@ -46,6 +51,18 @@ def _load_database(self, input_format: Optional[str] = None): input_desc = rgy.getDatabaseDesc(input_format) input_if = input_desc.fmt_if() self.db = input_if.read(self.db_path) + self._metrics = None # reset on reload + + @property + def metrics(self): + """ + Lazily-constructed :class:`~ucis.report.coverage_metrics.CoverageMetrics` + instance. This is the canonical source of all coverage numbers. + """ + if self._metrics is None and self.db is not None: + from ucis.report.coverage_metrics import CoverageMetrics + self._metrics = CoverageMetrics(self.db) + return self._metrics def get_summary(self) -> Dict[str, Any]: """ @@ -56,86 +73,13 @@ def get_summary(self) -> Dict[str, Any]: """ if 'summary' in self._cache: return self._cache['summary'] - - # Compute summary - summary = { - 'overall_coverage': 0.0, - 'total_bins': 0, - 'covered_bins': 0, - 'covergroups': 0, - 'coverpoints': 0, - 'by_type': {} - } - - # Fast path for SQLite backends - if self.db and hasattr(self.db, 'conn'): - from ucis.cover_type_t import CoverTypeT - from ucis.scope_type_t import ScopeTypeT - try: - conn = self.db.conn - summary['covergroups'] = conn.execute( - "SELECT COUNT(*) FROM scopes WHERE (scope_type & ?) != 0", - (int(ScopeTypeT.COVERGROUP),) - ).fetchone()[0] - summary['coverpoints'] = conn.execute( - "SELECT COUNT(*) FROM scopes WHERE (scope_type & ?) != 0", - (int(ScopeTypeT.COVERPOINT),) - ).fetchone()[0] - row = conn.execute( - """SELECT COUNT(*), - SUM(CASE WHEN cover_data > 0 THEN 1 ELSE 0 END) - FROM coveritems - WHERE (cover_type & ?) != 0""", - (int(CoverTypeT.CVGBIN),) - ).fetchone() - summary['total_bins'] = row[0] or 0 - summary['covered_bins'] = row[1] or 0 - if summary['total_bins'] > 0: - summary['overall_coverage'] = (summary['covered_bins'] / summary['total_bins']) * 100 - self._cache['summary'] = summary - return summary - except Exception: - pass - # Walk through database to compute statistics (fallback) - def visit_scope(scope, depth=0): - from ucis.scope_type_t import ScopeTypeT - from ucis.cover_type_t import CoverTypeT - - scope_type = scope.getScopeType() - if scope_type in (ScopeTypeT.COVERGROUP,): - summary['covergroups'] += 1 - # Covergroups have coverpoints - for cp in scope.scopes(ScopeTypeT.COVERPOINT): - summary['coverpoints'] += 1 - # Coverpoints have bins - try: - for bin_idx in cp.coverItems(CoverTypeT.CVGBIN): - summary['total_bins'] += 1 - cover_data = bin_idx.getCoverData() - if cover_data: - # Check if bin has been hit (data > 0 or data >= goal) - if cover_data.data > 0: - summary['covered_bins'] += 1 - except Exception as e: - pass - - # Visit children recursively - try: - for child in scope.scopes(ScopeTypeT.ALL): - visit_scope(child, depth + 1) - except: - pass - - if self.db: - from ucis.scope_type_t import ScopeTypeT - for scope in self.db.scopes(ScopeTypeT.ALL): - visit_scope(scope) - - # Calculate percentage - if summary['total_bins'] > 0: - summary['overall_coverage'] = (summary['covered_bins'] / summary['total_bins']) * 100 - + summary = {'overall_coverage': 0.0, 'total_bins': 0, 'covered_bins': 0, + 'covergroups': 0, 'coverpoints': 0, 'by_type': {}} + + if self.metrics is not None: + summary.update(self.metrics.summary()) + self._cache['summary'] = summary return summary @@ -146,27 +90,10 @@ def get_database_info(self) -> Dict[str, Any]: Returns: Dictionary with database information """ - info = { - 'path': self.db_path, - 'format': 'UCIS', - 'test_count': 0, - } - - # Get test data if available - if self.db: - try: - if hasattr(self.db, 'conn'): - from ucis.history_node_kind import HistoryNodeKind - info['test_count'] = self.db.conn.execute( - "SELECT COUNT(*) FROM history_nodes WHERE history_kind = ?", - (int(HistoryNodeKind.TEST),) - ).fetchone()[0] - else: - tests = self.get_all_tests() - info['test_count'] = len(tests) - except: - pass - + info = {'path': self.db_path, 'format': 'UCIS', 'test_count': 0} + if self.metrics is not None: + m_info = self.metrics.database_info() + info['test_count'] = m_info.get('test_count', 0) return info def close(self): @@ -182,56 +109,9 @@ def get_coverage_types(self) -> List[CoverTypeT]: Returns: List of CoverTypeT values found in the database """ - if 'coverage_types' in self._cache: - return self._cache['coverage_types'] - - # Fast path for SQLite backend - if self.db and hasattr(self.db, 'conn'): - try: - rows = self.db.conn.execute( - "SELECT DISTINCT cover_type FROM coveritems ORDER BY cover_type" - ).fetchall() - types_list = [] - for r in rows: - if r[0] is None: - continue - try: - types_list.append(CoverTypeT(r[0])) - except Exception: - pass - self._cache['coverage_types'] = types_list - return types_list - except Exception: - pass - - types_found: Set[CoverTypeT] = set() - - def visit_scope(scope): - # Check all coverage item types in this scope - for cov_type in [CoverTypeT.CVGBIN, CoverTypeT.STMTBIN, CoverTypeT.BRANCHBIN, - CoverTypeT.TOGGLEBIN, CoverTypeT.EXPRBIN, CoverTypeT.CONDBIN, - CoverTypeT.FSMBIN, CoverTypeT.BLOCKBIN]: - try: - items = list(scope.coverItems(cov_type)) - if items: - types_found.add(cov_type) - except: - pass - - # Visit children recursively - try: - for child in scope.scopes(ScopeTypeT.ALL): - visit_scope(child) - except: - pass - - if self.db: - for scope in self.db.scopes(ScopeTypeT.ALL): - visit_scope(scope) - - types_list = sorted(list(types_found), key=lambda t: int(t)) - self._cache['coverage_types'] = types_list - return types_list + if self.metrics is not None: + return self.metrics.coverage_types_present() + return [] def get_code_coverage_summary(self) -> Dict[str, Any]: """ @@ -242,78 +122,35 @@ def get_code_coverage_summary(self) -> Dict[str, Any]: """ if 'code_coverage_summary' in self._cache: return self._cache['code_coverage_summary'] - + summary = { - 'line': {'total': 0, 'covered': 0, 'coverage': 0.0}, - 'branch': {'total': 0, 'covered': 0, 'coverage': 0.0}, - 'toggle': {'total': 0, 'covered': 0, 'coverage': 0.0}, + 'line': {'total': 0, 'covered': 0, 'coverage': 0.0}, + 'branch': {'total': 0, 'covered': 0, 'coverage': 0.0}, + 'toggle': {'total': 0, 'covered': 0, 'coverage': 0.0}, 'expression': {'total': 0, 'covered': 0, 'coverage': 0.0}, - 'condition': {'total': 0, 'covered': 0, 'coverage': 0.0}, - 'fsm': {'total': 0, 'covered': 0, 'coverage': 0.0}, - 'block': {'total': 0, 'covered': 0, 'coverage': 0.0}, - } - - type_map = { - CoverTypeT.STMTBIN: 'line', - CoverTypeT.BRANCHBIN: 'branch', - CoverTypeT.TOGGLEBIN: 'toggle', - CoverTypeT.EXPRBIN: 'expression', - CoverTypeT.CONDBIN: 'condition', - CoverTypeT.FSMBIN: 'fsm', - CoverTypeT.BLOCKBIN: 'block', + 'condition': {'total': 0, 'covered': 0, 'coverage': 0.0}, + 'fsm': {'total': 0, 'covered': 0, 'coverage': 0.0}, + 'block': {'total': 0, 'covered': 0, 'coverage': 0.0}, } - # Fast path for SQLite backend - if self.db and hasattr(self.db, 'conn'): - try: - rows = self.db.conn.execute( - """SELECT cover_type, - COUNT(*) AS total, - SUM(CASE WHEN cover_data > 0 THEN 1 ELSE 0 END) AS covered - FROM coveritems - GROUP BY cover_type""" - ).fetchall() - int_type_map = {int(k): v for k, v in type_map.items()} - for row in rows: - key = int_type_map.get(row[0]) - if key is not None: - summary[key]['total'] = row[1] or 0 - summary[key]['covered'] = row[2] or 0 - for key in summary: - if summary[key]['total'] > 0: - summary[key]['coverage'] = (summary[key]['covered'] / summary[key]['total']) * 100 - self._cache['code_coverage_summary'] = summary - return summary - except Exception: - pass - - def visit_scope(scope): - for cov_type, key in type_map.items(): - try: - for item in scope.coverItems(cov_type): - summary[key]['total'] += 1 - cover_data = item.getCoverData() - if cover_data and cover_data.data > 0: - summary[key]['covered'] += 1 - except: - pass - - # Visit children - try: - for child in scope.scopes(ScopeTypeT.ALL): - visit_scope(child) - except: - pass - - if self.db: - for scope in self.db.scopes(ScopeTypeT.ALL): - visit_scope(scope) - - # Calculate percentages - for key in summary: - if summary[key]['total'] > 0: - summary[key]['coverage'] = (summary[key]['covered'] / summary[key]['total']) * 100 - + if self.metrics is not None: + type_map = { + CoverTypeT.STMTBIN: 'line', + CoverTypeT.BRANCHBIN: 'branch', + CoverTypeT.TOGGLEBIN: 'toggle', + CoverTypeT.EXPRBIN: 'expression', + CoverTypeT.CONDBIN: 'condition', + CoverTypeT.FSMBIN: 'fsm', + CoverTypeT.BLOCKBIN: 'block', + } + by_type = self.metrics.code_coverage_by_type() + for ct, key in type_map.items(): + bs = by_type.get(ct) + if bs: + summary[key]['total'] = bs.total + summary[key]['covered'] = bs.covered + summary[key]['coverage'] = bs.coverage_pct + self._cache['code_coverage_summary'] = summary return summary @@ -328,90 +165,17 @@ def get_coverage_by_type(self, cov_type: CoverTypeT, filtered: bool = True) -> D Returns: Dictionary with total, covered, and percentage """ - # Check if filtering is needed - filter_active = filtered and self.test_filter is not None - cache_key = f'coverage_type_{int(cov_type)}' - if filter_active: - cache_key += f'_filter_{self.test_filter}' - + test_filter = self.test_filter if filtered else None + cache_key = f'coverage_type_{int(cov_type)}_{test_filter}' if cache_key in self._cache: return self._cache[cache_key] - - # Get filtered coveritem IDs if needed - filtered_ids = None - if filter_active: - filtered_ids = self.get_coveritems_for_test(self.test_filter) - - result = { - 'type': cov_type, - 'total': 0, - 'covered': 0, - 'coverage': 0.0 - } - # Fast path for SQLite backend - if self.db and hasattr(self.db, 'conn'): - try: - if filter_active: - row = self.db.conn.execute( - """SELECT COUNT(*), - SUM(CASE WHEN ci.cover_data > 0 THEN 1 ELSE 0 END) - FROM coveritems ci - JOIN coveritem_tests ct ON ct.cover_id = ci.cover_id - JOIN history_nodes hn ON hn.history_id = ct.history_id - WHERE (ci.cover_type & ?) != 0 - AND hn.logical_name = ?""", - (int(cov_type), self.test_filter) - ).fetchone() - else: - row = self.db.conn.execute( - """SELECT COUNT(*), - SUM(CASE WHEN cover_data > 0 THEN 1 ELSE 0 END) - FROM coveritems - WHERE (cover_type & ?) != 0""", - (int(cov_type),) - ).fetchone() - result['total'] = row[0] or 0 - result['covered'] = row[1] or 0 - if result['total'] > 0: - result['coverage'] = (result['covered'] / result['total']) * 100 - self._cache[cache_key] = result - return result - except Exception: - pass - - def visit_scope(scope): - try: - for item in scope.coverItems(cov_type): - # If filtering, check if this item is in the filtered set - if filter_active: - # Get the coveritem ID - item_id = item.cover_id if hasattr(item, 'cover_id') else item.getKey() - if item_id not in filtered_ids: - continue - - result['total'] += 1 - cover_data = item.getCoverData() - if cover_data and cover_data.data > 0: - result['covered'] += 1 - except: - pass - - # Visit children - try: - for child in scope.scopes(ScopeTypeT.ALL): - visit_scope(child) - except: - pass - - if self.db: - for scope in self.db.scopes(ScopeTypeT.ALL): - visit_scope(scope) - - # Calculate percentage - if result['total'] > 0: - result['coverage'] = (result['covered'] / result['total']) * 100 - + result = {'type': cov_type, 'total': 0, 'covered': 0, 'coverage': 0.0} + if self.metrics is not None: + bs = self.metrics.bins_by_type(cov_type, test_filter=test_filter) + result['total'] = bs.total + result['covered'] = bs.covered + result['coverage'] = bs.coverage_pct self._cache[cache_key] = result return result @@ -424,97 +188,17 @@ def get_all_tests(self) -> List[Dict[str, Any]]: """ if 'all_tests' in self._cache: return self._cache['all_tests'] - + tests = [] - - if not self.db: - return tests - - # Try to get test coverage API if available - try: - from ucis.sqlite.sqlite_test_coverage import SqliteTestCoverage - - # Check if this is a SQLite database with test coverage support - if hasattr(self.db, 'conn'): - api = SqliteTestCoverage(self.db) # Pass the SqliteUCIS object, not just conn - - # Get all tests and their contributions - all_contribs = api.get_all_test_contributions() - - # Create test dictionary for each test - # all_contribs is a list of TestCoverageInfo objects - for contrib in all_contribs: - test_info = { - 'name': contrib.test_name, - 'status': 'PASSED', # Default, will try to get from history - 'date': 'Unknown', - 'total_items': contrib.total_items, - 'unique_items': contrib.unique_items, - } - - # Try to get additional info from history node - try: - for history_node in self.db.historyNodes(): - if history_node.getLogicalName() == contrib.test_name: - # Get status (UCIS_TESTSTATUS_OK = 1, anything else is failure) - try: - from ucis import UCIS_TESTSTATUS_OK - status = history_node.getTestStatus() - if status == UCIS_TESTSTATUS_OK: - test_info['status'] = 'PASSED' - else: - test_info['status'] = 'FAILED' - except: - pass - - # Get date - try: - date = history_node.getDate() - if date: - test_info['date'] = date - except: - pass - - break - except: - pass - - tests.append(test_info) - except: - # Fallback: just enumerate history nodes without contribution data - try: - from ucis.history_node_kind import HistoryNodeKind - from ucis import UCIS_TESTSTATUS_OK - - for history_node in self.db.historyNodes(HistoryNodeKind.TEST): - test_info = { - 'name': history_node.getLogicalName() or 'Unknown', - 'status': 'UNKNOWN', - 'date': 'Unknown', - 'total_items': 0, - 'unique_items': 0, - } - - try: - status = history_node.getTestStatus() - if status == UCIS_TESTSTATUS_OK: - test_info['status'] = 'PASSED' - else: - test_info['status'] = 'FAILED' - except: - pass - - try: - date = history_node.getDate() - if date: - test_info['date'] = date - except: - pass - - tests.append(test_info) - except: - pass - + if self.metrics is not None: + for ti in self.metrics.tests(): + tests.append({ + 'name': ti.name, + 'status': ti.status, + 'date': ti.date, + 'total_items': ti.total_items, + 'unique_items': ti.unique_items, + }) self._cache['all_tests'] = tests return tests @@ -526,10 +210,10 @@ def set_test_filter(self, test_name: Optional[str]): test_name: Name of test to filter by, or None to clear filter """ self.test_filter = test_name - - # Clear relevant caches when filter changes - if 'code_coverage_summary' in self._cache: - del self._cache['code_coverage_summary'] + # Invalidate all caches — metrics and coverage-type caches all depend on filter + self._cache.clear() + if self._metrics is not None: + self._metrics.invalidate() def get_test_filter(self) -> Optional[str]: """ @@ -582,3 +266,66 @@ def get_coveritems_for_test(self, test_name: str) -> Set[int]: import traceback traceback.print_exc() return set() + + def get_testplan_closure(self) -> dict: + """Compute testplan closure using the embedded testplan (if any). + + Returns a dict with keys ``results`` (list of row dicts) and + ``summary`` (ClosureSummary-derived dict), or empty when no testplan + is available. + + Each row dict contains: testpoint, stage, status, pass_count, + fail_count, matched_tests, desc. + """ + if 'testplan_closure' in self._cache: + return self._cache['testplan_closure'] + + result = {"results": [], "summary": None} + try: + from ucis.ncdb.testplan import get_testplan + from ucis.ncdb.testplan_closure import TPStatus, compute_closure + from ucis.ncdb.reports import report_testpoint_closure, _STATUS_LABEL + + plan = get_testplan(self.db) + if plan is None: + self._cache['testplan_closure'] = result + return result + + tp_results = compute_closure(plan, self.db) + summary = report_testplan_closure = report_testpoint_closure(tp_results) + + rows = [] + for r in tp_results: + rows.append({ + "testpoint": r.testpoint.name, + "stage": r.testpoint.stage or "?", + "status": _STATUS_LABEL[r.status], + "pass_count": r.pass_count, + "fail_count": r.fail_count, + "matched_tests": r.matched_tests, + "desc": r.testpoint.desc or "", + }) + + # Serialisable summary for the header + summary_dict = { + "total": summary.total, + "total_closed": summary.total_closed, + "total_na": summary.total_na, + "by_stage": summary.by_stage, + } + result = {"results": rows, "summary": summary_dict} + except Exception: + pass + + self._cache['testplan_closure'] = result + return result + + def get_v2_test_stats(self, test_name: str): + """Return v2 TestStatsEntry for *test_name* when db is NcdbUCIS. + + Returns None when v2 history is unavailable or the test is unknown. + """ + try: + return self.db.get_test_stats(test_name) + except Exception: + return None diff --git a/src/ucis/tui/views/gaps_view.py b/src/ucis/tui/views/gaps_view.py index b443753..e75374b 100644 --- a/src/ucis/tui/views/gaps_view.py +++ b/src/ucis/tui/views/gaps_view.py @@ -48,56 +48,24 @@ def on_enter(self): self._collect_gaps() def _collect_gaps(self): - """Collect all gaps from the database.""" - from ucis.scope_type_t import ScopeTypeT - + """Collect all gaps from the database via the common metrics layer.""" self.gaps = [] - - def visit_scope(scope, path=""): - scope_type = scope.getScopeType() - scope_name = scope.getScopeName() - current_path = f"{path}/{scope_name}" if path else scope_name - - # Check coverpoints for gaps - if scope_type == ScopeTypeT.COVERPOINT: - total_bins = 0 - covered_bins = 0 - - try: - for bin_idx in scope.coverItems(CoverTypeT.CVGBIN): - total_bins += 1 - cover_data = bin_idx.getCoverData() - if cover_data and cover_data.data > 0: - covered_bins += 1 - except: - pass - - if total_bins > 0: - coverage = (covered_bins / total_bins) * 100 - if coverage < self.threshold: - gap = GapItem( - name=scope_name, - scope_type="Coverpoint", - coverage=coverage, - hits=covered_bins, - goal=total_bins, - path=current_path - ) - self.gaps.append(gap) - - # Recurse into children - try: - for child in scope.scopes(ScopeTypeT.ALL): - visit_scope(child, current_path) - except: - pass - + try: - for scope in self.model.db.scopes(ScopeTypeT.ALL): - visit_scope(scope) - except: + cp_stats = self.model.metrics.coverpoint_stats() + for cp in cp_stats: + if cp.coverage_pct < self.threshold: + self.gaps.append(GapItem( + name=cp.name, + scope_type="Coverpoint", + coverage=cp.coverage_pct, + hits=cp.bins.covered, + goal=cp.bins.total, + path=cp.path, + )) + except Exception: pass - + # Sort by coverage (lowest first) self.gaps.sort(key=lambda g: g.coverage) diff --git a/src/ucis/tui/views/test_history_view.py b/src/ucis/tui/views/test_history_view.py index 408d1fb..c88851a 100644 --- a/src/ucis/tui/views/test_history_view.py +++ b/src/ucis/tui/views/test_history_view.py @@ -194,6 +194,22 @@ def _render_test_details(self) -> Text: if total > 0: unique_pct = (unique / total) * 100 details.append(f" Unique %: {unique_pct:.1f}%\n") + + # v2 history stats (NcdbUCIS only — fails gracefully) + try: + v2stats = self.model.get_v2_test_stats(test.get('name', '')) + if v2stats is not None and v2stats.total_runs > 0: + details.append("\nV2 History:\n", style="bold") + details.append(f" Total runs: {v2stats.total_runs}\n") + details.append(f" Pass/Fail: {v2stats.pass_count}/{v2stats.fail_count}\n") + flake = v2stats.flake_score + flake_style = "red" if flake >= 0.3 else ("yellow" if flake > 0.1 else "green") + details.append(f" Flake score: ", style="bold") + details.append(f"{flake:.3f}\n", style=flake_style) + if v2stats.mean_cpu_time > 0: + details.append(f" Mean CPU: {v2stats.mean_cpu_time:.1f}s\n") + except Exception: + pass details.append("\n") diff --git a/src/ucis/tui/views/testplan_view.py b/src/ucis/tui/views/testplan_view.py new file mode 100644 index 0000000..56ed47f --- /dev/null +++ b/src/ucis/tui/views/testplan_view.py @@ -0,0 +1,235 @@ +"""Testplan Closure View — TUI view showing testpoint closure status. + +Shows all testpoints with their stage, closure status, and pass/fail +counts. Includes a stage gate summary header and supports scrolling, +sorting, and a detail panel for the selected testpoint. +""" + +from rich.align import Align +from rich.layout import Layout +from rich.panel import Panel +from rich.table import Table +from rich.text import Text + +from ucis.tui.views.base_view import BaseView + + +_STATUS_STYLE = { + "CLOSED": "green", + "PARTIAL": "yellow", + "FAILING": "red", + "NOT_RUN": "dim", + "N/A": "dim", + "UNIMP": "dim", +} + +_STATUS_ICON = { + "CLOSED": "✓", + "PARTIAL": "~", + "FAILING": "✗", + "NOT_RUN": "?", + "N/A": "—", + "UNIMP": "-", +} + + +class TestplanView(BaseView): + """TUI view for testplan closure status (key '8').""" + + def __init__(self, app): + super().__init__(app) + self.results = [] + self.summary = None + self.selected_index = 0 + self.scroll_offset = 0 + self.visible_rows = 20 + self._loaded = False + + def on_enter(self): + super().on_enter() + if not self._loaded: + self._load_closure() + + def _load_closure(self): + """Load testplan closure results from the model.""" + self.results = [] + self.summary = None + try: + data = self.model.get_testplan_closure() + self.results = data.get("results", []) + self.summary = data.get("summary", None) + except Exception: + pass + self._loaded = True + self.selected_index = 0 + self.scroll_offset = 0 + + def _adjust_scroll(self): + if self.selected_index < self.scroll_offset: + self.scroll_offset = self.selected_index + elif self.selected_index >= self.scroll_offset + self.visible_rows: + self.scroll_offset = self.selected_index - self.visible_rows + 1 + + # ------------------------------------------------------------------ + # Rendering + # ------------------------------------------------------------------ + + def render(self): + layout = Layout() + + if not self.results: + return Panel( + Align.center( + Text( + "No testplan found.\n\n" + "Embed one with:\n" + " pyucis testplan import coverage.cdb uart.hjson", + style="dim", + ), + vertical="middle", + ), + title="[bold]Testplan Closure[/bold]", + ) + + has_summary = self.summary is not None + header_size = 7 if has_summary else 3 + layout.split_column( + Layout(name="header", size=header_size), + Layout(name="body", ratio=1), + Layout(name="detail", size=8), + ) + + layout["header"].update(self._render_header()) + layout["body"].update(self._render_table()) + layout["detail"].update(self._render_detail()) + return layout + + def _render_header(self): + lines = [] + if self.summary is not None: + total = self.summary.get("total", 0) + closed = self.summary.get("total_closed", 0) + na = self.summary.get("total_na", 0) + pct = round(100.0 * closed / (total - na), 1) if (total - na) > 0 else 0.0 + lines.append( + f"[bold]Testplan Closure[/bold] " + f"{closed}/{total - na} testpoints closed ({pct:.1f}%) " + f"[dim]{na} N/A[/dim]" + ) + by_stage = self.summary.get("by_stage", {}) + stage_parts = [] + for stage, entry in sorted(by_stage.items()): + c, t = entry.get("closed", 0), entry.get("total", 0) + p = entry.get("pct", 0.0) + colour = "green" if p >= 100 else ("yellow" if p > 0 else "red") + stage_parts.append(f"[{colour}]{stage}: {c}/{t}[/{colour}]") + lines.append(" " + " | ".join(stage_parts)) + else: + lines.append("[bold]Testplan Closure[/bold]") + lines.append( + "[dim]↑↓ navigate r refresh q back[/dim]" + ) + return Panel("\n".join(lines), style="bold") + + def _render_table(self): + table = Table( + show_header=True, + header_style="bold cyan", + expand=True, + show_lines=False, + ) + table.add_column("", width=2, no_wrap=True) + table.add_column("Testpoint", ratio=3, no_wrap=True) + table.add_column("Stage", width=6, no_wrap=True) + table.add_column("Status", width=12, no_wrap=True) + table.add_column("Pass", width=6, justify="right", no_wrap=True) + table.add_column("Fail", width=6, justify="right", no_wrap=True) + + visible_end = min( + self.scroll_offset + self.visible_rows, len(self.results) + ) + for i in range(self.scroll_offset, visible_end): + r = self.results[i] + is_sel = i == self.selected_index + sel_marker = "▶" if is_sel else " " + tp_name = r.get("testpoint", "?") + stage = r.get("stage", "?") + status = r.get("status", "NOT_RUN") + pc = str(r.get("pass_count", 0)) + fc = str(r.get("fail_count", 0)) + + icon = _STATUS_ICON.get(status, "?") + style = _STATUS_STYLE.get(status, "") + if is_sel: + style = "reverse " + style + + table.add_row( + sel_marker, + tp_name, + stage, + f"{icon} {status}", + pc, + fc, + style=style if not is_sel else None, + ) + + if len(self.results) > self.visible_rows: + shown_end = min(self.scroll_offset + self.visible_rows, len(self.results)) + scroll_info = ( + f"[dim] {shown_end}/{len(self.results)} shown[/dim]" + ) + return Panel(table, subtitle=scroll_info) + return Panel(table) + + def _render_detail(self): + if not self.results or self.selected_index >= len(self.results): + return Panel(Text("No testpoint selected", style="dim"), title="Detail") + + r = self.results[self.selected_index] + tp_name = r.get("testpoint", "?") + stage = r.get("stage", "?") + status = r.get("status", "NOT_RUN") + tests = r.get("matched_tests", []) + desc = r.get("desc", "") + + lines = [ + f"[bold]{tp_name}[/bold] [{stage}] {_STATUS_ICON.get(status, '?')} {status}", + ] + if desc: + lines.append(f"[dim]{desc}[/dim]") + if tests: + lines.append("Tests: " + ", ".join(tests[:6]) + + ("…" if len(tests) > 6 else "")) + else: + lines.append("[dim]No tests matched[/dim]") + + return Panel("\n".join(lines), title="[dim]Detail[/dim]") + + # ------------------------------------------------------------------ + # Key handling + # ------------------------------------------------------------------ + + def handle_key(self, key: str) -> bool: + if key in ("up", "k") and self.results: + if self.selected_index > 0: + self.selected_index -= 1 + self._adjust_scroll() + return True + if key in ("down", "j") and self.results: + if self.selected_index < len(self.results) - 1: + self.selected_index += 1 + self._adjust_scroll() + return True + if key in ("home",): + self.selected_index = 0 + self.scroll_offset = 0 + return True + if key in ("end",): + self.selected_index = max(0, len(self.results) - 1) + self._adjust_scroll() + return True + if key in ("r", "R"): + self._loaded = False + self._load_closure() + return True + return False diff --git a/tests/integration/test_ci_export.py b/tests/integration/test_ci_export.py new file mode 100644 index 0000000..127b81c --- /dev/null +++ b/tests/integration/test_ci_export.py @@ -0,0 +1,312 @@ +"""Integration tests for Phase 3: reports, exports, and CI helpers.""" + +from __future__ import annotations + +import io +import json +import os +import shutil +import tempfile +from xml.etree import ElementTree as ET + +import pytest + +from ucis.ncdb.constants import HIST_STATUS_FAIL, HIST_STATUS_OK +from ucis.ncdb.ncdb_ucis import NcdbUCIS +from ucis.ncdb.ncdb_writer import NcdbWriter +from ucis.ncdb.testplan import CovergroupEntry, Testplan, Testpoint +from ucis.ncdb.testplan_closure import TPStatus, compute_closure +from ucis.ncdb.testplan_export import ( + export_github_annotations, + export_junit_xml, + export_summary_markdown, +) +from ucis.ncdb.reports import ( + report_testpoint_closure, + report_stage_gate, + report_regression_delta, + format_testpoint_closure, + format_stage_gate, + format_regression_delta, +) + + +# ── helpers ─────────────────────────────────────────────────────────────────── + +@pytest.fixture() +def tmpdir_path(): + d = tempfile.mkdtemp() + yield d + shutil.rmtree(d, ignore_errors=True) + + +def _make_db(path: str, runs: list) -> NcdbUCIS: + """Create an NcdbUCIS at *path* with v2 history from *runs*. + + Each element of *runs* is (test_name, status) where status is + HIST_STATUS_OK or HIST_STATUS_FAIL. + """ + from ucis.mem.mem_ucis import MemUCIS + NcdbWriter().write(MemUCIS(), path) + db = NcdbUCIS(path) + import time + ts = int(time.time()) - len(runs) * 60 + for name, status in runs: + db.add_test_run( + name=name, + seed=1, + status=status, + ts=ts, + ) + ts += 60 + return db + + +def _make_plan() -> Testplan: + plan = Testplan(source_file="uart.hjson") + plan.add_testpoint( + Testpoint(name="uart_reset", stage="V1", tests=["uart_smoke"]) + ) + plan.add_testpoint( + Testpoint(name="uart_loopback", stage="V2", tests=["uart_loopback"]) + ) + plan.add_testpoint( + Testpoint(name="uart_na", stage="V2", na=True) + ) + plan.covergroups.append(CovergroupEntry(name="cg_reset")) + return plan + + +def _save_and_reopen(db: NcdbUCIS, path: str) -> NcdbUCIS: + tmp = path + ".tmp" + NcdbWriter().write(db, tmp) + os.replace(tmp, path) + return NcdbUCIS(path) + + +# ── JUnit XML export ────────────────────────────────────────────────────────── + +class TestExportJunitXml: + def test_creates_valid_junit_xml(self, tmpdir_path): + path = os.path.join(tmpdir_path, "cov.cdb") + db = _make_db(path, [ + ("uart_smoke", HIST_STATUS_OK), + ("uart_loopback", HIST_STATUS_FAIL), + ]) + plan = _make_plan() + results = compute_closure(plan, db) + + xml_path = os.path.join(tmpdir_path, "results.xml") + export_junit_xml(results, xml_path) + + assert os.path.exists(xml_path) + tree = ET.parse(xml_path) + root = tree.getroot() + assert root.tag == "testsuite" + cases = root.findall("testcase") + assert len(cases) == len(results) + + def test_closed_testpoint_has_no_failure(self, tmpdir_path): + path = os.path.join(tmpdir_path, "cov.cdb") + db = _make_db(path, [("uart_smoke", HIST_STATUS_OK)]) + plan = _make_plan() + results = compute_closure(plan, db) + + xml_path = os.path.join(tmpdir_path, "results.xml") + export_junit_xml(results, xml_path) + + tree = ET.parse(xml_path) + reset_tc = next( + tc for tc in tree.findall(".//testcase") + if tc.attrib["name"] == "uart_reset" + ) + assert reset_tc.find("failure") is None + + def test_failing_testpoint_has_failure_element(self, tmpdir_path): + path = os.path.join(tmpdir_path, "cov.cdb") + db = _make_db(path, [("uart_loopback", HIST_STATUS_FAIL)]) + plan = _make_plan() + results = compute_closure(plan, db) + + xml_path = os.path.join(tmpdir_path, "results.xml") + export_junit_xml(results, xml_path) + + tree = ET.parse(xml_path) + loop_tc = next( + tc for tc in tree.findall(".//testcase") + if tc.attrib["name"] == "uart_loopback" + ) + assert loop_tc.find("failure") is not None + + def test_testpoint_names_appear_as_testcases(self, tmpdir_path): + path = os.path.join(tmpdir_path, "cov.cdb") + db = _make_db(path, [("uart_smoke", HIST_STATUS_OK)]) + plan = _make_plan() + results = compute_closure(plan, db) + + xml_path = os.path.join(tmpdir_path, "results.xml") + export_junit_xml(results, xml_path) + + tree = ET.parse(xml_path) + names = {tc.attrib["name"] for tc in tree.findall(".//testcase")} + assert "uart_reset" in names + assert "uart_loopback" in names + assert "uart_na" in names + + +# ── GitHub Annotations export ───────────────────────────────────────────────── + +class TestExportGithubAnnotations: + def test_error_lines_for_failing(self, tmpdir_path): + path = os.path.join(tmpdir_path, "cov.cdb") + db = _make_db(path, [("uart_loopback", HIST_STATUS_FAIL)]) + plan = _make_plan() + results = compute_closure(plan, db) + + buf = io.StringIO() + export_github_annotations(results, output=buf) + lines = buf.getvalue().splitlines() + + error_lines = [l for l in lines if l.startswith("::error")] + assert len(error_lines) >= 1 + assert any("uart_loopback" in l for l in error_lines) + + def test_warning_for_not_run_testpoint(self, tmpdir_path): + path = os.path.join(tmpdir_path, "cov.cdb") + # No runs at all → uart_loopback NOT_RUN → warning + db = _make_db(path, []) + plan = _make_plan() + plan2 = Testplan(source_file="test.hjson") + plan2.add_testpoint(Testpoint(name="tp_not_run", stage="V1", + tests=["tp_not_run"])) + results = compute_closure(plan2, db) + + buf = io.StringIO() + export_github_annotations(results, output=buf) + lines = buf.getvalue().splitlines() + + warning_lines = [l for l in lines if l.startswith("::warning")] + assert len(warning_lines) >= 1 + + def test_no_output_for_closed(self, tmpdir_path): + path = os.path.join(tmpdir_path, "cov.cdb") + db = _make_db(path, [("uart_smoke", HIST_STATUS_OK)]) + plan = Testplan(source_file="test.hjson") + plan.add_testpoint( + Testpoint(name="uart_reset", stage="V1", tests=["uart_smoke"]) + ) + results = compute_closure(plan, db) + + buf = io.StringIO() + export_github_annotations(results, output=buf) + text = buf.getvalue() + assert text.strip() == "" + + +# ── Markdown summary ────────────────────────────────────────────────────────── + +class TestExportSummaryMarkdown: + def test_returns_valid_markdown(self, tmpdir_path): + path = os.path.join(tmpdir_path, "cov.cdb") + db = _make_db(path, [ + ("uart_smoke", HIST_STATUS_OK), + ("uart_loopback", HIST_STATUS_FAIL), + ]) + plan = _make_plan() + results = compute_closure(plan, db) + + md = export_summary_markdown(results) + assert "## Testplan Closure Report" in md + assert "uart_reset" in md + + def test_stage_gate_in_markdown(self, tmpdir_path): + path = os.path.join(tmpdir_path, "cov.cdb") + db = _make_db(path, [("uart_smoke", HIST_STATUS_OK)]) + plan = _make_plan() + results = compute_closure(plan, db) + + gate = report_stage_gate(results, "V1", plan) + md = export_summary_markdown(results, stage_gate=gate) + assert "Stage gate" in md + assert "V1" in md + + +# ── Structured reports end-to-end ──────────────────────────────────────────── + +class TestReportsEndToEnd: + def test_closure_report_all_closed(self, tmpdir_path): + path = os.path.join(tmpdir_path, "cov.cdb") + db = _make_db(path, [ + ("uart_smoke", HIST_STATUS_OK), + ("uart_loopback", HIST_STATUS_OK), + ]) + plan = _make_plan() + results = compute_closure(plan, db) + summary = report_testpoint_closure(results) + + assert summary.total == 3 + assert summary.total_na == 1 + # uart_reset (closed) + uart_loopback (closed) + uart_na (N/A) + assert summary.total_closed == 2 + + def test_stage_gate_v1_passes_when_closed(self, tmpdir_path): + path = os.path.join(tmpdir_path, "cov.cdb") + db = _make_db(path, [("uart_smoke", HIST_STATUS_OK)]) + plan = _make_plan() + results = compute_closure(plan, db) + + gate = report_stage_gate(results, "V1", plan) + assert gate.passed is True + text = format_stage_gate(gate) + assert "PASS" in text + + def test_stage_gate_v2_fails_when_loopback_not_run(self, tmpdir_path): + path = os.path.join(tmpdir_path, "cov.cdb") + # Only uart_smoke runs, no uart_loopback + db = _make_db(path, [("uart_smoke", HIST_STATUS_OK)]) + plan = _make_plan() + results = compute_closure(plan, db) + + gate = report_stage_gate(results, "V2", plan) + assert gate.passed is False + blocking_names = [r.testpoint.name for r in gate.blocking] + assert "uart_loopback" in blocking_names + + def test_regression_delta_detects_newly_closed(self, tmpdir_path): + path_old = os.path.join(tmpdir_path, "old.cdb") + path_new = os.path.join(tmpdir_path, "new.cdb") + plan = _make_plan() + + db_old = _make_db(path_old, []) # nothing passes + db_new = _make_db(path_new, [ + ("uart_smoke", HIST_STATUS_OK), + ]) + + results_old = compute_closure(plan, db_old) + results_new = compute_closure(plan, db_new) + delta = report_regression_delta(results_new, results_old) + + newly_closed_names = [r.testpoint.name for r in delta.newly_closed] + assert "uart_reset" in newly_closed_names + + def test_closure_to_json_roundtrip(self, tmpdir_path): + path = os.path.join(tmpdir_path, "cov.cdb") + db = _make_db(path, [("uart_smoke", HIST_STATUS_OK)]) + plan = _make_plan() + results = compute_closure(plan, db) + summary = report_testpoint_closure(results) + + d = json.loads(summary.to_json()) + assert d["total"] == 3 + assert any(r["name"] == "uart_reset" for r in d["testpoints"]) + + def test_format_closure_text_output(self, tmpdir_path): + path = os.path.join(tmpdir_path, "cov.cdb") + db = _make_db(path, [("uart_smoke", HIST_STATUS_OK)]) + plan = _make_plan() + results = compute_closure(plan, db) + summary = report_testpoint_closure(results) + + text = format_testpoint_closure(summary) + assert "uart_reset" in text + assert "CLOSED" in text diff --git a/tests/integration/test_history_workflow.py b/tests/integration/test_history_workflow.py new file mode 100644 index 0000000..06b4394 --- /dev/null +++ b/tests/integration/test_history_workflow.py @@ -0,0 +1,267 @@ +"""Integration tests for Phase 1 binary test history. + +These tests exercise the full stack: NcdbUCIS API → NcdbWriter → NcdbReader +→ NcdbMerger, using temporary .cdb files on disk. +""" +from __future__ import annotations + +import os +import shutil +import tempfile + +import pytest + +from ucis.mem.mem_ucis import MemUCIS +from ucis.ncdb.constants import HIST_STATUS_FAIL, HIST_STATUS_OK +from ucis.ncdb.ncdb_merger import NcdbMerger +from ucis.ncdb.ncdb_reader import NcdbReader +from ucis.ncdb.ncdb_ucis import NcdbUCIS +from ucis.ncdb.ncdb_writer import NcdbWriter + + +# ── helpers ────────────────────────────────────────────────────────────────── + +def _write_v2_cdb(path: str, test_runs: list) -> None: + """Create a v2 .cdb with the supplied test runs.""" + NcdbWriter().write(MemUCIS(), path) + db = NcdbUCIS(path) + for name, seed, status, ts in test_runs: + db.add_test_run( + name, seed=seed, status=status, ts=ts, + has_coverage=(status == HIST_STATUS_OK), + ) + tmp = path + ".tmp" + NcdbWriter().write(db, tmp) + os.replace(tmp, path) + + +@pytest.fixture() +def tmpdir_path(): + d = tempfile.mkdtemp() + yield d + shutil.rmtree(d, ignore_errors=True) + + +# ── tests ───────────────────────────────────────────────────────────────────── + +class TestAddTestRunUpdatesStats: + """add_test_run() must update test_stats immediately.""" + + def test_single_pass_creates_entry(self, tmpdir_path): + path = os.path.join(tmpdir_path, "a.cdb") + NcdbWriter().write(MemUCIS(), path) + db = NcdbUCIS(path) + db.add_test_run("my_test", seed="1", status=HIST_STATUS_OK, + ts=1700000000, has_coverage=True) + entry = db.get_test_stats("my_test") + assert entry is not None + assert entry.total_runs == 1 + assert entry.pass_count == 1 + assert entry.fail_count == 0 + + def test_pass_and_fail_accumulate(self, tmpdir_path): + path = os.path.join(tmpdir_path, "a.cdb") + NcdbWriter().write(MemUCIS(), path) + db = NcdbUCIS(path) + for i, st in enumerate([HIST_STATUS_OK, HIST_STATUS_FAIL, + HIST_STATUS_OK, HIST_STATUS_OK]): + db.add_test_run("my_test", seed=str(i), status=st, + ts=1700000000 + i * 3600, has_coverage=(st == HIST_STATUS_OK)) + entry = db.get_test_stats("my_test") + assert entry.total_runs == 4 + assert entry.pass_count == 3 + assert entry.fail_count == 1 + + def test_unknown_test_returns_none(self, tmpdir_path): + path = os.path.join(tmpdir_path, "a.cdb") + NcdbWriter().write(MemUCIS(), path) + db = NcdbUCIS(path) + assert db.get_test_stats("nonexistent") is None + + def test_run_id_monotonically_increments(self, tmpdir_path): + path = os.path.join(tmpdir_path, "a.cdb") + NcdbWriter().write(MemUCIS(), path) + db = NcdbUCIS(path) + for i in range(5): + db.add_test_run("t", seed=str(i), status=HIST_STATUS_OK, + ts=1700000000 + i) + assert db._test_registry.next_run_id == 5 + + +class TestQueryTestHistoryRange: + """query_test_history() must filter by time range and return correct records.""" + + def _build_db(self, path): + NcdbWriter().write(MemUCIS(), path) + db = NcdbUCIS(path) + # Two tests, multiple runs spread over a day + for i in range(6): + db.add_test_run("alpha", seed=str(i), + status=HIST_STATUS_OK if i % 3 else HIST_STATUS_FAIL, + ts=1700000000 + i * 3600, + has_coverage=True) + for i in range(3): + db.add_test_run("beta", seed=str(i), status=HIST_STATUS_OK, + ts=1700000000 + i * 7200, has_coverage=True) + return db + + def test_all_records_returned_without_filter(self, tmpdir_path): + path = os.path.join(tmpdir_path, "a.cdb") + db = self._build_db(path) + recs = db.query_test_history("alpha") + assert len(recs) == 6 + + def test_time_range_filter_lower_bound(self, tmpdir_path): + path = os.path.join(tmpdir_path, "a.cdb") + db = self._build_db(path) + # Request only the last 3 records (ts >= 1700000000 + 3*3600) + ts_start = 1700000000 + 3 * 3600 + recs = db.query_test_history("alpha", ts_from=ts_start) + assert all(r.ts >= ts_start for r in recs) + assert len(recs) == 3 + + def test_time_range_filter_upper_bound(self, tmpdir_path): + path = os.path.join(tmpdir_path, "a.cdb") + db = self._build_db(path) + ts_end = 1700000000 + 2 * 3600 + 1 + recs = db.query_test_history("alpha", ts_to=ts_end) + assert all(r.ts <= ts_end for r in recs) + assert len(recs) == 3 + + def test_nonexistent_name_returns_empty(self, tmpdir_path): + path = os.path.join(tmpdir_path, "a.cdb") + db = self._build_db(path) + assert db.query_test_history("no_such_test") == [] + + def test_separate_test_independent(self, tmpdir_path): + path = os.path.join(tmpdir_path, "a.cdb") + db = self._build_db(path) + recs = db.query_test_history("beta") + assert len(recs) == 3 + + +class TestRoundTripV2Cdb: + """Write a v2 .cdb to disk, read it back, confirm state is preserved.""" + + def test_stats_survive_roundtrip(self, tmpdir_path): + path = os.path.join(tmpdir_path, "rt.cdb") + _write_v2_cdb(path, [ + ("foo", "1", HIST_STATUS_OK, 1700000000), + ("foo", "2", HIST_STATUS_FAIL, 1700003600), + ("bar", "1", HIST_STATUS_OK, 1700000100), + ]) + db = NcdbReader().read(path) + assert db._test_registry.num_names == 2 + foo_id = db._test_registry._name_to_id["foo"] + foo_stats = db._test_stats.get(foo_id) + assert foo_stats.total_runs == 2 + assert foo_stats.fail_count == 1 + bar_id = db._test_registry._name_to_id["bar"] + bar_stats = db._test_stats.get(bar_id) + assert bar_stats.total_runs == 1 + + def test_bucket_data_survives_roundtrip(self, tmpdir_path): + path = os.path.join(tmpdir_path, "rt.cdb") + _write_v2_cdb(path, [ + ("my_test", str(i), HIST_STATUS_OK, 1700000000 + i * 60) + for i in range(10) + ]) + db = NcdbReader().read(path) + assert len(db._sealed_buckets) >= 1 + + def test_manifest_history_format_is_v2(self, tmpdir_path): + import zipfile, json + path = os.path.join(tmpdir_path, "rt.cdb") + _write_v2_cdb(path, [("t", "1", HIST_STATUS_OK, 1700000000)]) + with zipfile.ZipFile(path, "r") as zf: + manifest_data = zf.read("manifest.json") + manifest = json.loads(manifest_data) + assert manifest.get("history_format") == "v2" + + def test_query_history_after_roundtrip(self, tmpdir_path): + path = os.path.join(tmpdir_path, "rt.cdb") + _write_v2_cdb(path, [ + ("my_test", str(i), HIST_STATUS_OK if i % 2 == 0 else HIST_STATUS_FAIL, + 1700000000 + i * 3600) + for i in range(8) + ]) + db2 = NcdbUCIS(path) + recs = db2.query_test_history("my_test") + assert len(recs) == 8 + assert all(hasattr(r, "ts") for r in recs) + + +class TestMergeTwoV2Sources: + """Merge two v2 .cdb files and verify the result is consistent.""" + + def _make_src_a(self, d): + path = os.path.join(d, "a.cdb") + _write_v2_cdb(path, [ + ("uart_smoke", "1", HIST_STATUS_OK, 1700000000), + ("uart_smoke", "2", HIST_STATUS_FAIL, 1700086400), + ("uart_smoke", "3", HIST_STATUS_OK, 1700172800), + ("gpio_test", "1", HIST_STATUS_OK, 1700000100), + ("gpio_test", "2", HIST_STATUS_FAIL, 1700086500), + ]) + return path + + def _make_src_b(self, d): + path = os.path.join(d, "b.cdb") + _write_v2_cdb(path, [ + ("uart_smoke", "4", HIST_STATUS_OK, 1700259200), + ("uart_smoke", "5", HIST_STATUS_OK, 1700345600), + ("spi_test", "1", HIST_STATUS_OK, 1700259300), + ]) + return path + + def test_merged_registry_contains_all_names(self, tmpdir_path): + merged = os.path.join(tmpdir_path, "merged.cdb") + NcdbMerger().merge([self._make_src_a(tmpdir_path), + self._make_src_b(tmpdir_path)], merged) + db = NcdbReader().read(merged) + assert db._test_registry.num_names == 3 + assert "uart_smoke" in db._test_registry._name_to_id + assert "gpio_test" in db._test_registry._name_to_id + assert "spi_test" in db._test_registry._name_to_id + + def test_merged_stats_are_combined(self, tmpdir_path): + merged = os.path.join(tmpdir_path, "merged.cdb") + NcdbMerger().merge([self._make_src_a(tmpdir_path), + self._make_src_b(tmpdir_path)], merged) + db = NcdbReader().read(merged) + uart_id = db._test_registry._name_to_id["uart_smoke"] + uart_stats = db._test_stats.get(uart_id) + assert uart_stats.total_runs == 5 + assert uart_stats.fail_count == 1 + + def test_merged_run_id_is_sum(self, tmpdir_path): + merged = os.path.join(tmpdir_path, "merged.cdb") + NcdbMerger().merge([self._make_src_a(tmpdir_path), + self._make_src_b(tmpdir_path)], merged) + db = NcdbReader().read(merged) + # src_a has 5 runs, src_b has 3 → next_run_id = 8 + assert db._test_registry.next_run_id == 8 + + def test_merged_history_queryable(self, tmpdir_path): + merged = os.path.join(tmpdir_path, "merged.cdb") + NcdbMerger().merge([self._make_src_a(tmpdir_path), + self._make_src_b(tmpdir_path)], merged) + db2 = NcdbUCIS(merged) + recs = db2.query_test_history("uart_smoke") + assert len(recs) == 5 + + def test_merged_buckets_present(self, tmpdir_path): + merged = os.path.join(tmpdir_path, "merged.cdb") + NcdbMerger().merge([self._make_src_a(tmpdir_path), + self._make_src_b(tmpdir_path)], merged) + db = NcdbReader().read(merged) + assert len(db._sealed_buckets) >= 2 + + def test_top_flaky_after_merge(self, tmpdir_path): + merged = os.path.join(tmpdir_path, "merged.cdb") + NcdbMerger().merge([self._make_src_a(tmpdir_path), + self._make_src_b(tmpdir_path)], merged) + db2 = NcdbUCIS(merged) + flaky = db2.top_flaky_tests(5) + # At least one test (gpio_test) has failures making it flaky + assert len(flaky) > 0 diff --git a/tests/integration/test_testplan_workflow.py b/tests/integration/test_testplan_workflow.py new file mode 100644 index 0000000..1f0b1ca --- /dev/null +++ b/tests/integration/test_testplan_workflow.py @@ -0,0 +1,312 @@ +"""Integration tests for Phase 2: testplan embedding, closure, and waivers.""" +from __future__ import annotations + +import json +import os +import shutil +import tempfile + +import pytest + +from ucis.mem.mem_ucis import MemUCIS +from ucis.ncdb.constants import HIST_STATUS_FAIL, HIST_STATUS_OK +from ucis.ncdb.ncdb_merger import NcdbMerger +from ucis.ncdb.ncdb_reader import NcdbReader +from ucis.ncdb.ncdb_ucis import NcdbUCIS +from ucis.ncdb.ncdb_writer import NcdbWriter +from ucis.ncdb.testplan import CovergroupEntry, Testplan, Testpoint, get_testplan +from ucis.ncdb.testplan_closure import TPStatus, compute_closure, stage_gate_status +from ucis.ncdb.testplan_hjson import import_hjson +from ucis.ncdb.waivers import Waiver, WaiverSet + + +# ── helpers ─────────────────────────────────────────────────────────────────── + +@pytest.fixture() +def tmpdir_path(): + d = tempfile.mkdtemp() + yield d + shutil.rmtree(d, ignore_errors=True) + + +def _make_plan(): + plan = Testplan(source_file="uart.hjson") + plan.add_testpoint(Testpoint(name="uart_reset", stage="V1", + tests=["uart_smoke", "uart_init_*"])) + plan.add_testpoint(Testpoint(name="uart_loopback", stage="V2", + tests=["uart_loopback"])) + plan.add_testpoint(Testpoint(name="uart_na", stage="V2", na=True)) + plan.covergroups.append(CovergroupEntry(name="cg_reset")) + return plan + + +def _make_cdb(path, test_runs=None): + NcdbWriter().write(MemUCIS(), path) + db = NcdbUCIS(path) + for name, seed, status, ts in (test_runs or []): + db.add_test_run(name, seed=seed, status=status, ts=ts, + has_coverage=(status == HIST_STATUS_OK)) + tmp = path + ".tmp" + NcdbWriter().write(db, tmp) + os.replace(tmp, path) + return path + + +# ── TestTestplanRoundTrip ───────────────────────────────────────────────────── + +class TestTestplanRoundTrip: + def test_set_and_get_testplan(self, tmpdir_path): + path = os.path.join(tmpdir_path, "a.cdb") + NcdbWriter().write(MemUCIS(), path) + db = NcdbUCIS(path) + plan = _make_plan() + db.setTestplan(plan) + tp = db.getTestplan() + assert tp is not None + assert tp.source_file == "uart.hjson" + assert len(tp.testpoints) == 3 + + def test_testplan_survives_write_read(self, tmpdir_path): + path = os.path.join(tmpdir_path, "a.cdb") + NcdbWriter().write(MemUCIS(), path) + db = NcdbUCIS(path) + db.setTestplan(_make_plan()) + tmp = path + ".tmp" + NcdbWriter().write(db, tmp) + os.replace(tmp, path) + + db2 = NcdbReader().read(path) + plan2 = get_testplan(db2) + assert plan2 is not None + assert plan2.source_file == "uart.hjson" + assert len(plan2.testpoints) == 3 + + def test_testplan_member_in_zip(self, tmpdir_path): + import zipfile + path = os.path.join(tmpdir_path, "a.cdb") + NcdbWriter().write(MemUCIS(), path) + db = NcdbUCIS(path) + db.setTestplan(_make_plan()) + tmp = path + ".tmp" + NcdbWriter().write(db, tmp) + os.replace(tmp, path) + with zipfile.ZipFile(path, "r") as zf: + assert "testplan.json" in zf.namelist() + + def test_no_testplan_no_member(self, tmpdir_path): + import zipfile + path = os.path.join(tmpdir_path, "a.cdb") + NcdbWriter().write(MemUCIS(), path) + with zipfile.ZipFile(path, "r") as zf: + assert "testplan.json" not in zf.namelist() + + def test_stamp_import_time_set_on_setTestplan(self, tmpdir_path): + path = os.path.join(tmpdir_path, "a.cdb") + NcdbWriter().write(MemUCIS(), path) + db = NcdbUCIS(path) + plan = _make_plan() + assert plan.import_timestamp == "" + db.setTestplan(plan) + assert plan.import_timestamp != "" + + +# ── TestHjsonImport ─────────────────────────────────────────────────────────── + +class TestHjsonImport: + def _write_hjson(self, d, data): + path = os.path.join(d, "plan.json") + with open(path, "w") as f: + json.dump(data, f) + return path + + def test_import_and_embed(self, tmpdir_path): + hjson_path = self._write_hjson(tmpdir_path, { + "testpoints": [ + {"name": "uart_reset", "stage": "V1", "tests": ["uart_smoke"]}, + ], + }) + cdb = os.path.join(tmpdir_path, "a.cdb") + NcdbWriter().write(MemUCIS(), cdb) + db = NcdbUCIS(cdb) + plan = import_hjson(hjson_path) + db.setTestplan(plan) + tmp = cdb + ".tmp" + NcdbWriter().write(db, tmp) + os.replace(tmp, cdb) + + db2 = NcdbReader().read(cdb) + plan2 = get_testplan(db2) + assert plan2 is not None + assert plan2.testpoints[0].name == "uart_reset" + + def test_wildcard_expansion_preserved(self, tmpdir_path): + hjson_path = self._write_hjson(tmpdir_path, { + "testpoints": [ + {"name": "tp", "stage": "V1", "tests": ["{baud}_test"]}, + ], + }) + plan = import_hjson(hjson_path, {"baud": ["9600", "115200"]}) + assert "9600_test" in plan.testpoints[0].tests + assert "115200_test" in plan.testpoints[0].tests + + +# ── TestComputeClosureIntegration ───────────────────────────────────────────── + +class TestComputeClosureIntegration: + def test_closure_against_v2_history(self, tmpdir_path): + path = _make_cdb(os.path.join(tmpdir_path, "a.cdb"), [ + ("uart_smoke", "1", HIST_STATUS_OK, 1700000000), + ("uart_smoke", "2", HIST_STATUS_OK, 1700003600), + ("uart_loopback", "1", HIST_STATUS_FAIL, 1700007200), + ]) + db = NcdbUCIS(path) + plan = _make_plan() + results = compute_closure(plan, db) + by_name = {r.testpoint.name: r for r in results} + assert by_name["uart_reset"].status == TPStatus.CLOSED + assert by_name["uart_loopback"].status == TPStatus.FAILING + assert by_name["uart_na"].status == TPStatus.NA + + def test_stage_gate_passes_when_v1_closed(self, tmpdir_path): + path = _make_cdb(os.path.join(tmpdir_path, "a.cdb"), [ + ("uart_smoke", "1", HIST_STATUS_OK, 1700000000), + ]) + db = NcdbUCIS(path) + plan = _make_plan() + results = compute_closure(plan, db) + gate = stage_gate_status(results, "V1", plan) + assert gate["passed"] is True + + def test_stage_gate_blocked_by_failure(self, tmpdir_path): + path = _make_cdb(os.path.join(tmpdir_path, "a.cdb"), [ + ("uart_smoke", "1", HIST_STATUS_FAIL, 1700000000), + ]) + db = NcdbUCIS(path) + plan = _make_plan() + results = compute_closure(plan, db) + gate = stage_gate_status(results, "V1", plan) + assert gate["passed"] is False + + def test_not_run_testpoint(self, tmpdir_path): + path = _make_cdb(os.path.join(tmpdir_path, "a.cdb"), []) + db = NcdbUCIS(path) + plan = _make_plan() + results = compute_closure(plan, db) + for r in results: + if not r.testpoint.na: + assert r.status == TPStatus.NOT_RUN + + +# ── TestWaiversRoundTrip ────────────────────────────────────────────────────── + +class TestWaiversRoundTrip: + def test_set_get_waivers(self, tmpdir_path): + path = os.path.join(tmpdir_path, "a.cdb") + NcdbWriter().write(MemUCIS(), path) + db = NcdbUCIS(path) + ws = WaiverSet([Waiver(id="W1", scope_pattern="top/uart")]) + db.setWaivers(ws) + ws2 = db.getWaivers() + assert ws2 is not None + assert len(ws2.waivers) == 1 + + def test_waivers_survive_write_read(self, tmpdir_path): + path = os.path.join(tmpdir_path, "a.cdb") + NcdbWriter().write(MemUCIS(), path) + db = NcdbUCIS(path) + ws = WaiverSet([ + Waiver(id="W1", scope_pattern="top/uart", rationale="Known issue"), + Waiver(id="W2", scope_pattern="top/spi"), + ]) + db.setWaivers(ws) + tmp = path + ".tmp" + NcdbWriter().write(db, tmp) + os.replace(tmp, path) + + db2 = NcdbReader().read(path) + ws2 = getattr(db2, "_waivers", None) + assert ws2 is not None + assert len(ws2.waivers) == 2 + assert ws2.get("W1").rationale == "Known issue" + + def test_no_waivers_no_member(self, tmpdir_path): + import zipfile + path = os.path.join(tmpdir_path, "a.cdb") + NcdbWriter().write(MemUCIS(), path) + with zipfile.ZipFile(path, "r") as zf: + assert "waivers.json" not in zf.namelist() + + +# ── TestMergeTestplan ───────────────────────────────────────────────────────── + +class TestMergeTestplan: + def test_same_testplan_propagated_to_merged(self, tmpdir_path): + plan = _make_plan() + for name in ("a.cdb", "b.cdb"): + path = os.path.join(tmpdir_path, name) + NcdbWriter().write(MemUCIS(), path) + db = NcdbUCIS(path) + db.setTestplan(plan) + tmp = path + ".tmp" + NcdbWriter().write(db, tmp) + os.replace(tmp, path) + + merged = os.path.join(tmpdir_path, "merged.cdb") + NcdbMerger().merge( + [os.path.join(tmpdir_path, "a.cdb"), + os.path.join(tmpdir_path, "b.cdb")], + merged, + ) + db_m = NcdbReader().read(merged) + plan_m = get_testplan(db_m) + assert plan_m is not None + assert plan_m.source_file == "uart.hjson" + + def test_different_testplans_warning(self, tmpdir_path): + for i, name in enumerate(("a.cdb", "b.cdb")): + path = os.path.join(tmpdir_path, name) + NcdbWriter().write(MemUCIS(), path) + db = NcdbUCIS(path) + plan = _make_plan() + plan.source_file = f"plan_{i}.hjson" + db.setTestplan(plan) + tmp = path + ".tmp" + NcdbWriter().write(db, tmp) + os.replace(tmp, path) + + merged = os.path.join(tmpdir_path, "merged.cdb") + import warnings + with warnings.catch_warnings(record=True) as w: + warnings.simplefilter("always") + NcdbMerger().merge( + [os.path.join(tmpdir_path, "a.cdb"), + os.path.join(tmpdir_path, "b.cdb")], + merged, + ) + assert any("testplan" in str(warning.message).lower() for warning in w) + db_m = NcdbReader().read(merged) + assert get_testplan(db_m) is None + + def test_waivers_merged_union(self, tmpdir_path): + for i, cdb_name in enumerate(("a.cdb", "b.cdb")): + path = os.path.join(tmpdir_path, cdb_name) + NcdbWriter().write(MemUCIS(), path) + db = NcdbUCIS(path) + db.setWaivers(WaiverSet([ + Waiver(id=f"W{i}", scope_pattern=f"scope_{i}"), + ])) + tmp = path + ".tmp" + NcdbWriter().write(db, tmp) + os.replace(tmp, path) + + merged = os.path.join(tmpdir_path, "merged.cdb") + NcdbMerger().merge( + [os.path.join(tmpdir_path, "a.cdb"), + os.path.join(tmpdir_path, "b.cdb")], + merged, + ) + db_m = NcdbReader().read(merged) + ws = getattr(db_m, "_waivers", None) + assert ws is not None + ids = {w.id for w in ws.waivers} + assert ids == {"W0", "W1"} diff --git a/tests/test_coverage_metrics.py b/tests/test_coverage_metrics.py new file mode 100644 index 0000000..bff6bbf --- /dev/null +++ b/tests/test_coverage_metrics.py @@ -0,0 +1,770 @@ +""" +Unit tests for :class:`ucis.report.coverage_metrics.CoverageMetrics`. + +Each public method of the API is tested: + * BinStats / BinDetail dataclasses + * functional_bins() + * covergroup_stats() + * coverpoint_stats() / coverpoint_stats(include_bins=True) + * cross_stats() + * coverage_types_present() + * bins_by_type() + * code_coverage_by_type() + * file_coverage() + * tests() + * summary() + * database_info() + * invalidate() (cache invalidation) + * Parity: functional_bins() must agree with CoverageReportBuilder + +Tests run against both the XML/API path and the SQLite path where applicable. +""" +import os +import pytest +import tempfile + +from ucis.mem.mem_factory import MemFactory +from ucis.cover_type_t import CoverTypeT +from ucis.source_info import SourceInfo +from ucis import ( + UCIS_HISTORYNODE_TEST, UCIS_TESTSTATUS_OK, UCIS_OTHER, + UCIS_DU_MODULE, UCIS_ENABLED_STMT, UCIS_ENABLED_BRANCH, + UCIS_INST_ONCE, UCIS_SCOPE_UNDER_DU, UCIS_INSTANCE, UCIS_VLOG, +) +from ucis.test_data import TestData +from ucis.report.coverage_metrics import ( + BinStats, BinDetail, CoverpointStats, CovergroupStats, TestInfo, + CoverageMetrics, +) + + +# --------------------------------------------------------------------------- +# Internal DB builder helpers (same conventions as tui_fixtures) +# --------------------------------------------------------------------------- + +def _add_test(db, logical_name="test1"): + node = db.createHistoryNode(None, logical_name, logical_name, UCIS_HISTORYNODE_TEST) + node.setTestData(TestData( + teststatus=UCIS_TESTSTATUS_OK, + toolcategory="test", + date="20240101000000", + )) + return node + + +def _add_instance(db): + file_h = db.createFileHandle("tb.sv", "/rtl") + src = SourceInfo(file_h, 1, 0) + du = db.createScope("work.tb", src, 1, UCIS_OTHER, UCIS_DU_MODULE, + UCIS_ENABLED_STMT | UCIS_INST_ONCE | UCIS_SCOPE_UNDER_DU) + inst = db.createInstance("tb", None, 1, UCIS_OTHER, UCIS_INSTANCE, du, UCIS_INST_ONCE) + return inst, file_h + + +# --------------------------------------------------------------------------- +# Backend fixture factory +# --------------------------------------------------------------------------- + +def _db_for_backend(backend: str, builder_fn, tmp_path): + """ + Return a live UCIS db object (MemUCIS-via-XML or SqliteUCIS) populated + by *builder_fn*. Caller is responsible for closing the db. + """ + if backend == "sqlite": + from ucis.sqlite.sqlite_ucis import SqliteUCIS + db_path = str(tmp_path / "test.db") + db = SqliteUCIS(db_path) + builder_fn(db) + db.close() + db = SqliteUCIS(db_path) + return db + else: + from ucis.xml.xml_factory import XmlFactory + from ucis.rgy.format_rgy import FormatRgy + db = MemFactory.create() + builder_fn(db) + xml_path = str(tmp_path / "test.xml") + XmlFactory.write(db, xml_path) + db2 = FormatRgy.inst().getDatabaseDesc("xml").fmt_if().read(xml_path) + return db2 + + +def _metrics(backend: str, builder_fn, tmp_path) -> CoverageMetrics: + db = _db_for_backend(backend, builder_fn, tmp_path) + return CoverageMetrics(db) + + +# --------------------------------------------------------------------------- +# DB builder functions +# --------------------------------------------------------------------------- + +def _build_partial(db): + """2 covergroups, 2 coverpoints, 6 bins, 3 covered (50%).""" + _add_test(db) + inst, file_h = _add_instance(db) + src = SourceInfo(file_h, 3, 0) + cg1 = inst.createCovergroup("cg1", src, 1, UCIS_OTHER) + cp1 = cg1.createCoverpoint("cp1", src, 1, UCIS_VLOG) + cp1.createBin("a", src, 1, 5, "a") # hit + cp1.createBin("b", src, 1, 3, "b") # hit + cp1.createBin("c", src, 1, 0, "c") # miss + cp1.createBin("d", src, 1, 0, "d") # miss + + cg2 = inst.createCovergroup("cg2", src, 1, UCIS_OTHER) + cp2 = cg2.createCoverpoint("cp2", src, 1, UCIS_VLOG) + cp2.createBin("x", src, 1, 10, "x") # hit + cp2.createBin("y", src, 1, 0, "y") # miss + + +def _build_zero(db): + """1 covergroup, 1 coverpoint, 3 bins, all uncovered.""" + _add_test(db) + inst, file_h = _add_instance(db) + src = SourceInfo(file_h, 3, 0) + cg = inst.createCovergroup("cg_zero", src, 1, UCIS_OTHER) + cp = cg.createCoverpoint("cp_zero", src, 1, UCIS_VLOG) + cp.createBin("b0", src, 1, 0, "b0") + cp.createBin("b1", src, 1, 0, "b1") + cp.createBin("b2", src, 1, 0, "b2") + + +def _build_full(db): + """1 covergroup, 1 coverpoint, 3 bins, all hit.""" + _add_test(db) + inst, file_h = _add_instance(db) + src = SourceInfo(file_h, 3, 0) + cg = inst.createCovergroup("cg_full", src, 1, UCIS_OTHER) + cp = cg.createCoverpoint("cp_full", src, 1, UCIS_VLOG) + cp.createBin("b0", src, 1, 1, "b0") + cp.createBin("b1", src, 1, 2, "b1") + cp.createBin("b2", src, 1, 7, "b2") + + +def _build_multi_test(db): + """3 tests; 6 bins; 4 covered (≈66.7%).""" + for name in ("test_a", "test_b", "test_c"): + _add_test(db, name) + inst, file_h = _add_instance(db) + src = SourceInfo(file_h, 3, 0) + cg = inst.createCovergroup("cg_mt", src, 1, UCIS_OTHER) + cp = cg.createCoverpoint("cp_mt", src, 1, UCIS_VLOG) + cp.createBin("b0", src, 1, 1, "b0") + cp.createBin("b1", src, 1, 1, "b1") + cp.createBin("b2", src, 1, 1, "b2") + cp.createBin("b3", src, 1, 1, "b3") + cp.createBin("b4", src, 1, 0, "b4") + cp.createBin("b5", src, 1, 0, "b5") + + +# --------------------------------------------------------------------------- +# Fixtures +# --------------------------------------------------------------------------- + +@pytest.fixture(params=["xml", "sqlite"]) +def partial_m(request, tmp_path): + return _metrics(request.param, _build_partial, tmp_path) + + +@pytest.fixture(params=["xml", "sqlite"]) +def zero_m(request, tmp_path): + return _metrics(request.param, _build_zero, tmp_path) + + +@pytest.fixture(params=["xml", "sqlite"]) +def full_m(request, tmp_path): + return _metrics(request.param, _build_full, tmp_path) + + +@pytest.fixture(params=["xml", "sqlite"]) +def multi_m(request, tmp_path): + return _metrics(request.param, _build_multi_test, tmp_path) + + +@pytest.fixture +def vlt_metrics(): + """CoverageMetrics from the real vlt.cdb SQLite fixture (code-coverage only).""" + vlt_path = os.path.join(os.path.dirname(__file__), "..", "test_vlt.cdb") + if not os.path.exists(vlt_path): + pytest.skip("test_vlt.cdb not found") + from ucis.sqlite.sqlite_ucis import SqliteUCIS + db = SqliteUCIS(vlt_path) + m = CoverageMetrics(db) + yield m + try: + db.close() + except Exception: + pass + + +# =========================================================================== +# 1. Dataclass unit tests +# =========================================================================== + +class TestBinStats: + + def test_uncovered_property(self): + bs = BinStats(total=10, covered=3) + assert bs.uncovered == 7 + + def test_coverage_pct_normal(self): + bs = BinStats(total=10, covered=5) + assert abs(bs.coverage_pct - 50.0) < 0.01 + + def test_coverage_pct_zero_total(self): + bs = BinStats(total=0, covered=0) + assert bs.coverage_pct == 0.0 + + def test_coverage_pct_full(self): + bs = BinStats(total=4, covered=4) + assert abs(bs.coverage_pct - 100.0) < 0.01 + + def test_add(self): + a = BinStats(total=4, covered=2) + b = BinStats(total=6, covered=4) + c = a + b + assert c.total == 10 + assert c.covered == 6 + assert abs(c.coverage_pct - 60.0) < 0.01 + + +class TestBinDetail: + + def test_covered_when_count_gte_at_least(self): + bd = BinDetail(name="b", count=5, at_least=1) + assert bd.covered is True + + def test_not_covered_when_count_lt_at_least(self): + bd = BinDetail(name="b", count=2, at_least=5) + assert bd.covered is False + + def test_covered_exactly_at_least(self): + bd = BinDetail(name="b", count=3, at_least=3) + assert bd.covered is True + + def test_not_covered_zero_count(self): + bd = BinDetail(name="b", count=0, at_least=1) + assert bd.covered is False + + def test_is_ignore_flag(self): + bd = BinDetail(name="ign", count=0, at_least=1, is_ignore=True) + assert bd.is_ignore is True + assert bd.is_illegal is False + + def test_is_illegal_flag(self): + bd = BinDetail(name="ill", count=0, at_least=1, is_illegal=True) + assert bd.is_illegal is True + assert bd.is_ignore is False + + +# =========================================================================== +# 2. functional_bins() +# =========================================================================== + +class TestFunctionalBins: + + def test_partial_total(self, partial_m): + assert partial_m.functional_bins().total == 6 + + def test_partial_covered(self, partial_m): + assert partial_m.functional_bins().covered == 3 + + def test_partial_pct(self, partial_m): + assert abs(partial_m.functional_bins().coverage_pct - 50.0) < 0.01 + + def test_zero_coverage(self, zero_m): + fb = zero_m.functional_bins() + assert fb.total == 3 + assert fb.covered == 0 + assert fb.coverage_pct == 0.0 + + def test_full_coverage(self, full_m): + fb = full_m.functional_bins() + assert fb.total == 3 + assert fb.covered == 3 + assert abs(fb.coverage_pct - 100.0) < 0.01 + + def test_no_double_counting_xml(self, tmp_path): + """XML backend must not double-count type-level and instance-level CG bins.""" + m = _metrics("xml", _build_partial, tmp_path) + assert m.functional_bins().total == 6, "double-counting detected" + + def test_parity_with_report_builder(self, partial_m): + """functional_bins() must agree with CoverageReportBuilder's bin totals.""" + from ucis.report.coverage_report_builder import CoverageReportBuilder + report = CoverageReportBuilder.build(partial_m._db) + report_total = sum(len(cp.bins) for cg in report.covergroups for cp in cg.coverpoints) + report_covered = sum( + 1 for cg in report.covergroups + for cp in cg.coverpoints + for b in cp.bins if b.hit + ) + fb = partial_m.functional_bins() + assert fb.total == report_total + assert fb.covered == report_covered + + +# =========================================================================== +# 3. covergroup_stats() +# =========================================================================== + +class TestCovergroupStats: + + def test_count_partial(self, partial_m): + cg_stats = partial_m.covergroup_stats() + assert len(cg_stats) == 2 + + def test_names_present(self, partial_m): + names = {cg.name for cg in partial_m.covergroup_stats()} + assert "cg1" in names + assert "cg2" in names + + def test_coverage_pct_approx(self, partial_m): + for cg in partial_m.covergroup_stats(): + assert 0.0 <= cg.coverage_pct <= 100.0 + + def test_bins_non_zero(self, partial_m): + for cg in partial_m.covergroup_stats(): + assert cg.bins.total > 0 + + def test_zero_db_zero_covered(self, zero_m): + for cg in zero_m.covergroup_stats(): + assert cg.bins.covered == 0 + + def test_full_db_all_covered(self, full_m): + for cg in full_m.covergroup_stats(): + assert cg.bins.covered == cg.bins.total + + +# =========================================================================== +# 4. coverpoint_stats() +# =========================================================================== + +class TestCoverpointStats: + + def test_count_partial(self, partial_m): + assert len(partial_m.coverpoint_stats()) == 2 + + def test_names_correct(self, partial_m): + names = {cp.name for cp in partial_m.coverpoint_stats()} + assert names == {"cp1", "cp2"} + + def test_bins_partial(self, partial_m): + by_name = {cp.name: cp for cp in partial_m.coverpoint_stats()} + assert by_name["cp1"].bins.total == 4 + assert by_name["cp1"].bins.covered == 2 + assert by_name["cp2"].bins.total == 2 + assert by_name["cp2"].bins.covered == 1 + + def test_path_contains_name(self, partial_m): + for cp in partial_m.coverpoint_stats(): + assert cp.name in cp.path + + def test_include_bins_false_no_details(self, partial_m): + for cp in partial_m.coverpoint_stats(include_bins=False): + assert cp.bin_details == [] + + def test_include_bins_true_has_details(self, partial_m): + for cp in partial_m.coverpoint_stats(include_bins=True): + assert len(cp.bin_details) == cp.bins.total, ( + f"bin_details length should match total bins for {cp.name}" + ) + + def test_bin_detail_semantics(self, partial_m): + """BinDetail.covered matches count >= at_least.""" + by_name = {cp.name: cp for cp in partial_m.coverpoint_stats(include_bins=True)} + details = by_name["cp1"].bin_details + covered_details = [d for d in details if d.covered] + assert len(covered_details) == 2 # bins a, b + + def test_coverage_pct_matches_bins(self, partial_m): + for cp in partial_m.coverpoint_stats(): + expected = cp.bins.coverage_pct + assert abs(cp.coverage_pct - expected) < 0.001 + + def test_zero_db(self, zero_m): + cps = zero_m.coverpoint_stats() + assert len(cps) == 1 + assert cps[0].bins.covered == 0 + + def test_full_db(self, full_m): + cps = full_m.coverpoint_stats() + assert cps[0].bins.covered == cps[0].bins.total + + +# =========================================================================== +# 5. coverage_types_present() +# =========================================================================== + +class TestCoverageTypesPresent: + + def test_functional_db_has_cvgbin(self, partial_m): + types = partial_m.coverage_types_present() + assert CoverTypeT.CVGBIN in types + + def test_functional_db_no_code_types(self, partial_m): + types = partial_m.coverage_types_present() + assert CoverTypeT.STMTBIN not in types + assert CoverTypeT.BRANCHBIN not in types + + def test_vlt_has_code_types(self, vlt_metrics): + types = vlt_metrics.coverage_types_present() + # vlt.cdb has statement, branch, toggle coverage + code_types = {CoverTypeT.STMTBIN, CoverTypeT.BRANCHBIN, CoverTypeT.TOGGLEBIN} + assert code_types & set(types), "vlt.cdb should have code coverage types" + + def test_vlt_no_cvgbin(self, vlt_metrics): + types = vlt_metrics.coverage_types_present() + assert CoverTypeT.CVGBIN not in types + + def test_returns_list(self, partial_m): + assert isinstance(partial_m.coverage_types_present(), list) + + +# =========================================================================== +# 6. bins_by_type() +# =========================================================================== + +class TestBinsByType: + + def test_cvgbin_delegates_to_functional_bins(self, partial_m): + """bins_by_type(CVGBIN) must return the same result as functional_bins().""" + fb = partial_m.functional_bins() + bt = partial_m.bins_by_type(CoverTypeT.CVGBIN) + assert bt.total == fb.total + assert bt.covered == fb.covered + + def test_non_cvgbin_type_with_no_items_returns_zero(self, partial_m): + bt = partial_m.bins_by_type(CoverTypeT.STMTBIN) + assert bt.total == 0 + assert bt.covered == 0 + + def test_vlt_stmtbin_non_zero(self, vlt_metrics): + bt = vlt_metrics.bins_by_type(CoverTypeT.STMTBIN) + assert bt.total > 0 + + def test_vlt_branchbin_non_zero(self, vlt_metrics): + bt = vlt_metrics.bins_by_type(CoverTypeT.BRANCHBIN) + assert bt.total > 0 + + def test_returns_bin_stats(self, partial_m): + result = partial_m.bins_by_type(CoverTypeT.CVGBIN) + assert isinstance(result, BinStats) + + def test_covered_lte_total(self, partial_m): + bt = partial_m.bins_by_type(CoverTypeT.CVGBIN) + assert bt.covered <= bt.total + + +# =========================================================================== +# 7. code_coverage_by_type() +# =========================================================================== + +class TestCodeCoverageByType: + + def test_returns_dict(self, vlt_metrics): + result = vlt_metrics.code_coverage_by_type() + assert isinstance(result, dict) + + def test_stmtbin_in_result(self, vlt_metrics): + result = vlt_metrics.code_coverage_by_type() + assert CoverTypeT.STMTBIN in result + + def test_bin_stats_type(self, vlt_metrics): + result = vlt_metrics.code_coverage_by_type() + for ct, bs in result.items(): + assert isinstance(bs, BinStats), f"{ct} should map to BinStats" + + def test_agrees_with_bins_by_type(self, vlt_metrics): + """code_coverage_by_type() must agree with bins_by_type() per type.""" + by_type = vlt_metrics.code_coverage_by_type() + for ct, bs in by_type.items(): + individual = vlt_metrics.bins_by_type(ct) + assert bs.total == individual.total, f"total mismatch for {ct}" + assert bs.covered == individual.covered, f"covered mismatch for {ct}" + + def test_functional_db_code_types_zero(self, partial_m): + result = partial_m.code_coverage_by_type() + for ct in (CoverTypeT.STMTBIN, CoverTypeT.BRANCHBIN, CoverTypeT.TOGGLEBIN): + assert result.get(ct, BinStats()).total == 0 + + +# =========================================================================== +# 8. file_coverage() +# =========================================================================== + +class TestFileCoverage: + + def test_empty_for_xml_backend(self, tmp_path): + """file_coverage() requires SQLite; returns [] for XML backends.""" + m = _metrics("xml", _build_partial, tmp_path) + assert m.file_coverage() == [] + + def test_returns_list_for_sqlite(self, vlt_metrics): + result = vlt_metrics.file_coverage() + assert isinstance(result, list) + + def test_non_empty_for_vlt(self, vlt_metrics): + result = vlt_metrics.file_coverage() + assert len(result) > 0, "vlt.cdb should have file-level coverage data" + + def test_file_paths_non_empty(self, vlt_metrics): + for fcs in vlt_metrics.file_coverage(): + assert fcs.file_path, "file_path should not be empty" + + def test_overall_bins_non_zero(self, vlt_metrics): + for fcs in vlt_metrics.file_coverage(): + assert fcs.overall.total >= 0 + + def test_sorted_by_path(self, vlt_metrics): + paths = [fcs.file_path for fcs in vlt_metrics.file_coverage()] + assert paths == sorted(paths), "file_coverage() should be sorted by path" + + def test_covered_lte_total_per_file(self, vlt_metrics): + for fcs in vlt_metrics.file_coverage(): + ov = fcs.overall + assert ov.covered <= ov.total + + +# =========================================================================== +# 9. tests() +# =========================================================================== + +class TestTests: + + def test_returns_list(self, partial_m): + assert isinstance(partial_m.tests(), list) + + def test_single_test_db(self, partial_m): + tests = partial_m.tests() + assert len(tests) >= 1 + + def test_test_has_name(self, partial_m): + for t in partial_m.tests(): + assert isinstance(t, TestInfo) + assert t.name + + def test_test_has_status(self, partial_m): + for t in partial_m.tests(): + assert t.status in ("PASSED", "FAILED", "UNKNOWN") + + def test_test_has_date(self, partial_m): + for t in partial_m.tests(): + assert t.date # non-empty + + def test_multi_test_names(self, multi_m): + names = {t.name for t in multi_m.tests()} + assert "test_a" in names + assert "test_b" in names + assert "test_c" in names + + def test_multi_test_count(self, multi_m): + assert len(multi_m.tests()) == 3 + + def test_all_passed(self, partial_m): + """Fixture only adds passing tests.""" + for t in partial_m.tests(): + assert t.status == "PASSED" + + +# =========================================================================== +# 10. summary() +# =========================================================================== + +class TestSummary: + + def test_returns_dict(self, partial_m): + assert isinstance(partial_m.summary(), dict) + + def test_required_keys(self, partial_m): + s = partial_m.summary() + for k in ("overall_coverage", "total_bins", "covered_bins", "covergroups", "coverpoints"): + assert k in s, f"key '{k}' missing from summary" + + def test_partial_values(self, partial_m): + s = partial_m.summary() + assert s["total_bins"] == 6 + assert s["covered_bins"] == 3 + assert abs(s["overall_coverage"] - 50.0) < 0.01 + assert s["covergroups"] == 2 + assert s["coverpoints"] == 2 + + def test_zero_coverage(self, zero_m): + s = zero_m.summary() + assert s["covered_bins"] == 0 + assert s["overall_coverage"] == 0.0 + + def test_full_coverage(self, full_m): + s = full_m.summary() + assert s["covered_bins"] == s["total_bins"] + assert abs(s["overall_coverage"] - 100.0) < 0.01 + + def test_vlt_total_bins_nonzero(self, vlt_metrics): + """Code-coverage-only DB should report total_bins > 0.""" + s = vlt_metrics.summary() + assert s["total_bins"] > 0 + + def test_vlt_no_functional_covergroups(self, vlt_metrics): + """vlt.cdb has no functional coverage so covergroups = 0.""" + s = vlt_metrics.summary() + assert s["covergroups"] == 0 + + def test_consistent_with_functional_bins(self, partial_m): + """summary() total_bins and covered_bins must agree with functional_bins().""" + s = partial_m.summary() + fb = partial_m.functional_bins() + assert s["total_bins"] == fb.total + assert s["covered_bins"] == fb.covered + + +# =========================================================================== +# 11. database_info() +# =========================================================================== + +class TestDatabaseInfo: + + def test_returns_dict(self, partial_m): + assert isinstance(partial_m.database_info(), dict) + + def test_required_keys(self, partial_m): + info = partial_m.database_info() + for k in ("path", "format", "test_count"): + assert k in info + + def test_test_count_matches_tests(self, partial_m): + info = partial_m.database_info() + tests = partial_m.tests() + assert info["test_count"] == len(tests) + + def test_multi_test_count(self, multi_m): + assert multi_m.database_info()["test_count"] == 3 + + +# =========================================================================== +# 12. invalidate() — cache invalidation +# =========================================================================== + +class TestInvalidate: + + def test_invalidate_clears_cache(self, partial_m): + """Calling invalidate() should force recomputation.""" + fb1 = partial_m.functional_bins() + partial_m.invalidate() + fb2 = partial_m.functional_bins() + assert fb1.total == fb2.total + assert fb1.covered == fb2.covered + + def test_cached_result_is_same_object(self, partial_m): + """Without invalidate(), successive calls return the same cached object.""" + fb1 = partial_m.functional_bins() + fb2 = partial_m.functional_bins() + assert fb1 is fb2 + + def test_after_invalidate_new_object(self, partial_m): + fb1 = partial_m.functional_bins() + partial_m.invalidate() + fb2 = partial_m.functional_bins() + assert fb1 is not fb2 + + def test_summary_cached(self, partial_m): + s1 = partial_m.summary() + s2 = partial_m.summary() + # summary() is cached — returns the exact same dict object + assert s1 is s2 + + def test_summary_refreshed_after_invalidate(self, partial_m): + s1 = partial_m.summary() + partial_m.invalidate() + s2 = partial_m.summary() + # After invalidation a fresh dict is built — different object, same values + assert s1 is not s2 + assert s1 == s2 + + +# =========================================================================== +# 13. Parity: functional_bins agrees with CoverageReportBuilder +# =========================================================================== + +class TestParityWithReportBuilder: + """ + CoverageMetrics.functional_bins() MUST produce the same numbers as the + CoverageReportBuilder, which is the canonical oracle for functional coverage. + """ + + @pytest.mark.parametrize("backend", ["xml", "sqlite"]) + def test_partial_parity(self, tmp_path, backend): + m = _metrics(backend, _build_partial, tmp_path) + self._assert_parity(m) + + @pytest.mark.parametrize("backend", ["xml", "sqlite"]) + def test_zero_parity(self, tmp_path, backend): + m = _metrics(backend, _build_zero, tmp_path) + self._assert_parity(m) + + @pytest.mark.parametrize("backend", ["xml", "sqlite"]) + def test_full_parity(self, tmp_path, backend): + m = _metrics(backend, _build_full, tmp_path) + self._assert_parity(m) + + @pytest.mark.parametrize("backend", ["xml", "sqlite"]) + def test_multi_test_parity(self, tmp_path, backend): + m = _metrics(backend, _build_multi_test, tmp_path) + self._assert_parity(m) + + def _assert_parity(self, m: CoverageMetrics): + from ucis.report.coverage_report_builder import CoverageReportBuilder + report = CoverageReportBuilder.build(m._db) + + def _report_bins(report): + total = 0 + covered = 0 + for cg in report.covergroups: + total += sum(len(cp.bins) for cp in cg.coverpoints) + covered += sum(1 for cp in cg.coverpoints for b in cp.bins if b.hit) + return total, covered + + r_total, r_covered = _report_bins(report) + fb = m.functional_bins() + assert fb.total == r_total, ( + f"total mismatch: metrics={fb.total}, report={r_total}" + ) + assert fb.covered == r_covered, ( + f"covered mismatch: metrics={fb.covered}, report={r_covered}" + ) + + def test_coverpoint_stats_parity(self, tmp_path): + """coverpoint_stats() should agree with direct CoverageReportBuilder traversal.""" + m = _metrics("xml", _build_partial, tmp_path) + from ucis.report.coverage_report_builder import CoverageReportBuilder + report = CoverageReportBuilder.build(m._db) + report_cps = {cp.name: cp for cg in report.covergroups for cp in cg.coverpoints} + metrics_cps = {cp.name: cp for cp in m.coverpoint_stats()} + assert set(report_cps.keys()) == set(metrics_cps.keys()), "coverpoint names mismatch" + for name, rcp in report_cps.items(): + mcp = metrics_cps[name] + assert mcp.bins.total == len(rcp.bins), f"total mismatch for {name}" + assert mcp.bins.covered == sum(1 for b in rcp.bins if b.hit), \ + f"covered mismatch for {name}" + + +# =========================================================================== +# 14. VLT regression — real SQLite file +# =========================================================================== + +class TestVltRegression: + """Smoke tests against the real vlt.cdb fixture.""" + + def test_summary_total_nonzero(self, vlt_metrics): + assert vlt_metrics.summary()["total_bins"] > 0 + + def test_coverage_types_include_branch(self, vlt_metrics): + assert CoverTypeT.BRANCHBIN in vlt_metrics.coverage_types_present() + + def test_stmtbin_covered_lte_total(self, vlt_metrics): + bt = vlt_metrics.bins_by_type(CoverTypeT.STMTBIN) + assert bt.covered <= bt.total + + def test_file_coverage_non_empty(self, vlt_metrics): + assert len(vlt_metrics.file_coverage()) > 0 + + def test_database_info_test_count_non_negative(self, vlt_metrics): + assert vlt_metrics.database_info()["test_count"] >= 0 diff --git a/tests/test_tui_model_fidelity.py b/tests/test_tui_model_fidelity.py new file mode 100644 index 0000000..ce685b8 --- /dev/null +++ b/tests/test_tui_model_fidelity.py @@ -0,0 +1,240 @@ +""" +Layer 1: CoverageModel unit tests. + +These tests verify that every public method of CoverageModel returns values +that agree with the raw UCIS API (the ground truth). Both the API/XML path +and the SQLite fast path are exercised through the parametrized fixtures in +tui_fixtures.py. +""" +import pytest +from ucis.cover_type_t import CoverTypeT +from ucis.scope_type_t import ScopeTypeT + +from tests.tui_fixtures import ( + partial_coverage, zero_coverage, full_coverage, multi_test, vlt_model, +) + + +# --------------------------------------------------------------------------- +# get_summary() +# --------------------------------------------------------------------------- + +class TestGetSummary: + + def test_total_bins(self, partial_coverage): + model, expected = partial_coverage + summary = model.get_summary() + assert summary["total_bins"] == expected["total_bins"], ( + f"total_bins: got {summary['total_bins']}, want {expected['total_bins']}" + ) + + def test_covered_bins(self, partial_coverage): + model, expected = partial_coverage + summary = model.get_summary() + assert summary["covered_bins"] == expected["covered_bins"] + + def test_overall_coverage_percentage(self, partial_coverage): + model, expected = partial_coverage + summary = model.get_summary() + assert abs(summary["overall_coverage"] - expected["overall_coverage"]) < 0.01 + + def test_covergroup_count(self, partial_coverage): + model, expected = partial_coverage + summary = model.get_summary() + assert summary["covergroups"] == expected["covergroups"] + + def test_summary_zero_coverage(self, zero_coverage): + model, expected = zero_coverage + summary = model.get_summary() + assert summary["covered_bins"] == 0 + assert summary["overall_coverage"] == 0.0 + assert summary["total_bins"] == expected["total_bins"] + + def test_summary_full_coverage(self, full_coverage): + model, expected = full_coverage + summary = model.get_summary() + assert summary["covered_bins"] == expected["covered_bins"] + assert abs(summary["overall_coverage"] - 100.0) < 0.01 + + def test_summary_is_cached(self, partial_coverage): + model, _ = partial_coverage + s1 = model.get_summary() + s2 = model.get_summary() + assert s1 is s2, "get_summary() should return the cached object on repeated calls" + + +# --------------------------------------------------------------------------- +# get_coverage_types() +# --------------------------------------------------------------------------- + +class TestGetCoverageTypes: + + def test_cvgbin_present_in_partial(self, partial_coverage): + model, _ = partial_coverage + types = model.get_coverage_types() + assert CoverTypeT.CVGBIN in types + + def test_types_non_empty(self, partial_coverage): + model, _ = partial_coverage + assert len(model.get_coverage_types()) >= 1 + + def test_types_cached(self, partial_coverage): + model, _ = partial_coverage + t1 = model.get_coverage_types() + t2 = model.get_coverage_types() + assert t1 is t2 + + +# --------------------------------------------------------------------------- +# get_coverage_by_type() +# --------------------------------------------------------------------------- + +class TestGetCoverageByType: + + def test_cvgbin_totals_match_summary(self, partial_coverage): + model, expected = partial_coverage + result = model.get_coverage_by_type(CoverTypeT.CVGBIN) + assert result["total"] == expected["total_bins"] + assert result["covered"] == expected["covered_bins"] + + def test_coverage_percentage_derived_correctly(self, partial_coverage): + model, _ = partial_coverage + result = model.get_coverage_by_type(CoverTypeT.CVGBIN) + if result["total"] > 0: + expected_pct = result["covered"] / result["total"] * 100 + assert abs(result["coverage"] - expected_pct) < 0.01 + + def test_zero_coverage(self, zero_coverage): + model, _ = zero_coverage + result = model.get_coverage_by_type(CoverTypeT.CVGBIN) + assert result["covered"] == 0 + assert result["coverage"] == 0.0 + + def test_full_coverage(self, full_coverage): + model, _ = full_coverage + result = model.get_coverage_by_type(CoverTypeT.CVGBIN) + assert result["covered"] == result["total"] + assert abs(result["coverage"] - 100.0) < 0.01 + + def test_result_cached(self, partial_coverage): + model, _ = partial_coverage + r1 = model.get_coverage_by_type(CoverTypeT.CVGBIN) + r2 = model.get_coverage_by_type(CoverTypeT.CVGBIN) + assert r1 is r2 + + +# --------------------------------------------------------------------------- +# get_all_tests() +# --------------------------------------------------------------------------- + +class TestGetAllTests: + + def test_test_count(self, partial_coverage): + """Partial-coverage fixture has exactly one test.""" + model, _ = partial_coverage + tests = model.get_all_tests() + assert len(tests) == 1 + + def test_test_name(self, partial_coverage): + model, _ = partial_coverage + tests = model.get_all_tests() + assert tests[0]["name"] == "test1" + + def test_multi_test_count(self, multi_test): + model, expected = multi_test + tests = model.get_all_tests() + assert len(tests) == len(expected["test_names"]) + + def test_multi_test_names_present(self, multi_test): + model, expected = multi_test + tests = model.get_all_tests() + found_names = {t["name"] for t in tests} + for name in expected["test_names"]: + assert name in found_names, f"Expected test '{name}' not in {found_names}" + + def test_tests_cached(self, partial_coverage): + model, _ = partial_coverage + t1 = model.get_all_tests() + t2 = model.get_all_tests() + assert t1 is t2 + + +# --------------------------------------------------------------------------- +# get_database_info() +# --------------------------------------------------------------------------- + +class TestGetDatabaseInfo: + + def test_path_preserved(self, partial_coverage): + model, _ = partial_coverage + info = model.get_database_info() + assert info["path"] == model.db_path + + def test_test_count_matches(self, partial_coverage): + model, _ = partial_coverage + info = model.get_database_info() + assert info["test_count"] == 1 + + def test_multi_test_count(self, multi_test): + model, expected = multi_test + info = model.get_database_info() + assert info["test_count"] == len(expected["test_names"]) + + +# --------------------------------------------------------------------------- +# Test filter / cache invalidation +# --------------------------------------------------------------------------- + +class TestTestFilter: + + def test_set_and_get_filter(self, partial_coverage): + model, _ = partial_coverage + assert model.get_test_filter() is None + model.set_test_filter("test1") + assert model.get_test_filter() == "test1" + + def test_clear_filter(self, partial_coverage): + model, _ = partial_coverage + model.set_test_filter("test1") + model.clear_test_filter() + assert model.get_test_filter() is None + + def test_filter_invalidates_code_coverage_cache(self, partial_coverage): + """Setting a filter must bust the code_coverage_summary cache.""" + model, _ = partial_coverage + _ = model.get_summary() # populate cache + model.set_test_filter("test1") + # code_coverage_summary cache key must be gone after filter change + assert "code_coverage_summary" not in model._cache + + def test_unfiltered_and_filtered_differ_when_partial(self, multi_test): + """With a test filter the per-type count should be <= the unfiltered total. + (Exact values are SQLite-only; for XML we just check the invariant.) + """ + model, expected = multi_test + unfiltered = model.get_coverage_by_type(CoverTypeT.CVGBIN, filtered=False) + # Filtering is only meaningful for the SQLite backend; skip for XML + if not hasattr(model.db, "conn"): + pytest.skip("filter-by-test is SQLite-only") + model.set_test_filter(expected["test_names"][0]) + filtered = model.get_coverage_by_type(CoverTypeT.CVGBIN, filtered=True) + assert filtered["covered"] <= unfiltered["covered"] + + +# --------------------------------------------------------------------------- +# Regression: real VLT SQLite database +# --------------------------------------------------------------------------- + +class TestVltModel: + + def test_summary_non_empty(self, vlt_model): + summary = vlt_model.get_summary() + assert summary["total_bins"] > 0 + + def test_coverage_types_non_empty(self, vlt_model): + types = vlt_model.get_coverage_types() + assert len(types) > 0 + + def test_database_info_has_path(self, vlt_model): + info = vlt_model.get_database_info() + assert info["path"] != "" diff --git a/tests/test_tui_report_parity.py b/tests/test_tui_report_parity.py new file mode 100644 index 0000000..81f0ad3 --- /dev/null +++ b/tests/test_tui_report_parity.py @@ -0,0 +1,324 @@ +""" +Layer 3: Report-parity tests. + +These tests load the same database in both: + - CoverageModel (the TUI data layer) + - CoverageReportBuilder (the text/HTML report builder, which is our oracle) + +and assert they agree on every significant metric. Disagreements would mean +the TUI is showing different numbers than the CLI text report. + +IMPORTANT NOTE – bin "hit" semantics +------------------------------------- +CoverageModel counts a bin as covered when cover_data > 0. +CoverageReportBuilder counts a bin as covered when data >= at_least. + +These differ when at_least > 1 (e.g. at_least=5 but only 3 hits: model +says covered, builder says not covered). The parity tests below use +at_least=1 so both agree; a dedicated test documents the known divergence. +""" +import pytest +from ucis.cover_type_t import CoverTypeT + +from tests.tui_fixtures import ( + partial_coverage, zero_coverage, full_coverage, multi_test, vlt_model, + make_model_and_expected, + _make_partial_coverage_db, +) + + +def _build_report(model): + """Build CoverageReport from model.db (the shared oracle).""" + from ucis.report.coverage_report_builder import CoverageReportBuilder + return CoverageReportBuilder.build(model.db) + + +def _collect_coverpoints(cg, result=None): + """Flatten all coverpoints (across nested covergroups) into result dict.""" + if result is None: + result = {} + for cp in cg.coverpoints: + result[cp.name] = cp + for sub in getattr(cg, "covergroups", []): + _collect_coverpoints(sub, result) + return result + + +def _report_total_bins(report): + """Count total bins across all coverpoints in a CoverageReport.""" + total = 0 + for cg in report.covergroups: + for _, cp in _collect_coverpoints(cg).items(): + total += len(cp.bins) + return total + + +def _report_covered_bins(report): + """Count hit bins (data >= at_least) across a CoverageReport.""" + covered = 0 + for cg in report.covergroups: + for _, cp in _collect_coverpoints(cg).items(): + for b in cp.bins: + if b.hit: + covered += 1 + return covered + + +# --------------------------------------------------------------------------- +# Overall totals +# --------------------------------------------------------------------------- + +class TestBinCountParity: + + def test_total_bins_match(self, partial_coverage): + model, _ = partial_coverage + report = _build_report(model) + report_total = _report_total_bins(report) + model_summary = model.get_summary() + assert model_summary["total_bins"] == report_total, ( + f"total_bins: model={model_summary['total_bins']}, report={report_total}" + ) + + def test_covered_bins_match_when_at_least_1(self, partial_coverage): + """When all bins have at_least=1, both sources must agree on covered count.""" + model, _ = partial_coverage + report = _build_report(model) + report_covered = _report_covered_bins(report) + model_summary = model.get_summary() + assert model_summary["covered_bins"] == report_covered, ( + f"covered_bins: model={model_summary['covered_bins']}, report={report_covered}" + ) + + def test_total_bins_zero_db(self, zero_coverage): + model, _ = zero_coverage + report = _build_report(model) + assert _report_total_bins(report) == model.get_summary()["total_bins"] + + def test_covered_bins_zero_db(self, zero_coverage): + model, _ = zero_coverage + report = _build_report(model) + assert _report_covered_bins(report) == 0 + assert model.get_summary()["covered_bins"] == 0 + + def test_covered_bins_full_db(self, full_coverage): + model, _ = full_coverage + report = _build_report(model) + report_total = _report_total_bins(report) + report_covered = _report_covered_bins(report) + model_summary = model.get_summary() + assert model_summary["covered_bins"] == report_covered + assert model_summary["total_bins"] == report_total + + +# --------------------------------------------------------------------------- +# Per-coverpoint coverage % parity +# --------------------------------------------------------------------------- + +class TestPerCoverpointParity: + + def test_coverpoint_coverage_pct(self, partial_coverage): + """Each coverpoint's coverage % in the TUI gaps view matches the report.""" + from ucis.tui.views.gaps_view import GapsView + model, _ = partial_coverage + report = _build_report(model) + + # Collect oracle: {name → coverage_pct} + oracle = {} + for cg in report.covergroups: + oracle.update(_collect_coverpoints(cg)) + + view = GapsView(StubApp(model)) + for gap in view.gaps: + if gap.name in oracle: + report_cp = oracle[gap.name] + assert abs(gap.coverage - report_cp.coverage) < 0.01, ( + f"Coverpoint '{gap.name}': TUI={gap.coverage:.2f}%, " + f"report={report_cp.coverage:.2f}%" + ) + + def test_per_bin_hit_count_matches_report(self, partial_coverage): + """The hits/goal shown in the gaps view must match the report bin data.""" + from ucis.tui.views.gaps_view import GapsView + model, _ = partial_coverage + report = _build_report(model) + + oracle_cp = {} + for cg in report.covergroups: + oracle_cp.update(_collect_coverpoints(cg)) + + view = GapsView(StubApp(model)) + for gap in view.gaps: + if gap.name not in oracle_cp: + continue + cp = oracle_cp[gap.name] + report_covered = sum(1 for b in cp.bins if b.hit) + report_total = len(cp.bins) + assert gap.hits == report_covered, ( + f"'{gap.name}' hits: TUI={gap.hits}, report={report_covered}" + ) + assert gap.goal == report_total, ( + f"'{gap.name}' goal: TUI={gap.goal}, report={report_total}" + ) + + +# --------------------------------------------------------------------------- +# Covergroup count parity +# --------------------------------------------------------------------------- + +class TestCovergroupCountParity: + + def test_covergroup_count(self, partial_coverage): + model, expected = partial_coverage + report = _build_report(model) + # CoverageReportBuilder only counts type-level (non-instance) groups + # at the top level; the model counts all groups including COVERINSTANCE. + # We assert the minimum – the report count must be <= model count. + assert len(report.covergroups) <= model.get_summary()["covergroups"] + + def test_covergroup_count_multi_test(self, multi_test): + model, _ = multi_test + report = _build_report(model) + assert len(report.covergroups) >= 1 + + +# --------------------------------------------------------------------------- +# Overall coverage % parity +# --------------------------------------------------------------------------- + +class TestOverallCoverageParity: + + def test_overall_coverage_pct(self, partial_coverage): + """ + The overall coverage shown by the TUI (CoverageModel.get_summary()) + must agree with what the text report would show, within 0.1 %. + """ + model, _ = partial_coverage + report = _build_report(model) + + report_total = _report_total_bins(report) + report_covered = _report_covered_bins(report) + report_pct = (report_covered / report_total * 100) if report_total else 0.0 + + model_pct = model.get_summary()["overall_coverage"] + assert abs(model_pct - report_pct) < 0.1, ( + f"Overall coverage: model={model_pct:.2f}%, report={report_pct:.2f}%" + ) + + def test_zero_db_both_zero(self, zero_coverage): + model, _ = zero_coverage + report = _build_report(model) + assert _report_covered_bins(report) == 0 + assert model.get_summary()["overall_coverage"] == 0.0 + + def test_full_db_both_100(self, full_coverage): + model, _ = full_coverage + report = _build_report(model) + report_total = _report_total_bins(report) + report_covered = _report_covered_bins(report) + assert report_covered == report_total + assert abs(model.get_summary()["overall_coverage"] - 100.0) < 0.01 + + +# --------------------------------------------------------------------------- +# Document known divergence: at_least > 1 +# --------------------------------------------------------------------------- + +class TestKnownDivergenceAtLeastGt1: + """ + When at_least > 1 the two counters diverge: + CoverageModel: bin is "covered" if data > 0 + CoverageReportBuilder: bin is "covered" if data >= at_least + This test documents and verifies that divergence. + """ + + def _make_at_least_db(self, tmp_path, backend): + """ + Create a DB with one coverpoint whose bin is intended to have at_least=5 + and data=2. + + NOTE: Both the XML and SQLite backends currently store ``at_least`` as + the ``goal`` field of ``CoverData`` (which defaults to 1), so the + requested ``at_least=5`` is silently stored as 1. As a result the + bin is seen as covered by *both* the model and the report (2 >= 1). + This is a known backend limitation; it does not affect the correctness + of the common-metrics layer design (which correctly uses + ``data >= at_least`` once backends properly preserve the value). + """ + from tests.tui_fixtures import make_model_and_expected + from ucis.mem.mem_factory import MemFactory + from ucis import ( + UCIS_HISTORYNODE_TEST, UCIS_TESTSTATUS_OK, UCIS_OTHER, + UCIS_DU_MODULE, UCIS_ENABLED_STMT, UCIS_ENABLED_BRANCH, + UCIS_INST_ONCE, UCIS_SCOPE_UNDER_DU, UCIS_INSTANCE, UCIS_VLOG, + ) + from ucis.source_info import SourceInfo + from ucis.test_data import TestData + + def builder(db): + node = db.createHistoryNode(None, "t1", "t1", UCIS_HISTORYNODE_TEST) + node.setTestData(TestData( + teststatus=UCIS_TESTSTATUS_OK, + toolcategory="test", + date="20240101000000", + )) + file_h = db.createFileHandle("d.sv", "/rtl") + src = SourceInfo(file_h, 1, 0) + du = db.createScope("work.m", src, 1, UCIS_OTHER, UCIS_DU_MODULE, + UCIS_ENABLED_STMT | UCIS_INST_ONCE | UCIS_SCOPE_UNDER_DU) + inst = db.createInstance("tb", None, 1, UCIS_OTHER, UCIS_INSTANCE, du, UCIS_INST_ONCE) + cg = inst.createCovergroup("cg", src, 1, UCIS_OTHER) + cp = cg.createCoverpoint("cp_al5", src, 1, UCIS_VLOG) + # Intended: at_least=5, data=2. Both backends store at_least as 1. + cp.createBin("bin_partial", src, 5, 2, "bin_partial") + return {"at_least": 5, "data": 2} + + if backend == "sqlite": + from ucis.sqlite.sqlite_ucis import SqliteUCIS + from ucis.tui.models.coverage_model import CoverageModel + db_path = str(tmp_path / "al5.db") + db = SqliteUCIS(db_path) + expected = builder(db) + db.close() + return CoverageModel(db_path), expected + else: + db = MemFactory.create() + expected = builder(db) + from ucis.xml.xml_factory import XmlFactory + from ucis.tui.models.coverage_model import CoverageModel + xml_path = str(tmp_path / "al5.xml") + XmlFactory.write(db, xml_path) + return CoverageModel(xml_path), expected + + @pytest.mark.parametrize("backend", ["xml", "sqlite"]) + def test_model_and_report_agree(self, tmp_path, backend): + """Model and report must agree — the divergence (data>0 vs data>=at_least) is fixed.""" + from ucis.report.coverage_report_builder import CoverageReportBuilder + model, _ = self._make_at_least_db(tmp_path, backend) + model_covered = model.get_summary()["covered_bins"] + report_covered = _report_covered_bins(CoverageReportBuilder.build(model.db)) + assert model_covered == report_covered, ( + f"Model ({model_covered}) and report ({report_covered}) disagree" + ) + + +# --------------------------------------------------------------------------- +# Regression: real VLT SQLite database – basic parity smoke test +# --------------------------------------------------------------------------- + +class TestVltReportParity: + + def test_bin_totals_consistent(self, vlt_model): + """Report bin total must not exceed model bin total.""" + report = _build_report(vlt_model) + report_total = _report_total_bins(report) + model_total = vlt_model.get_summary()["total_bins"] + # Report only walks INSTANCE → COVERGROUP; model counts everything. + # They may differ but report total should be <= model total. + assert report_total <= model_total or model_total == 0 + + +# --------------------------------------------------------------------------- +# Helper import needed in test body +# --------------------------------------------------------------------------- + +from tests.tui_fixtures import StubApp # noqa: E402 (after class defs) diff --git a/tests/test_tui_view_data.py b/tests/test_tui_view_data.py new file mode 100644 index 0000000..1af6dde --- /dev/null +++ b/tests/test_tui_view_data.py @@ -0,0 +1,370 @@ +""" +Layer 2: View data-fidelity tests (headless). + +Each view is instantiated with a real CoverageModel but without any +terminal I/O or Rich rendering. We inspect the Python data structures +that the view would use to render, asserting their correctness without +triggering any display code. +""" +import pytest +from unittest.mock import MagicMock + +from tests.tui_fixtures import ( + StubApp, + partial_coverage, zero_coverage, full_coverage, multi_test, vlt_model, +) + + +# --------------------------------------------------------------------------- +# GapsView +# --------------------------------------------------------------------------- + +class TestGapsViewData: + + def test_all_gaps_below_threshold(self, partial_coverage): + """Every GapItem must have coverage < threshold.""" + from ucis.tui.views.gaps_view import GapsView + model, _ = partial_coverage + view = GapsView(StubApp(model)) + for gap in view.gaps: + assert gap.coverage < view.threshold, ( + f"Gap '{gap.name}' has {gap.coverage}% which is >= threshold {view.threshold}%" + ) + + def test_gap_count_matches_uncovered_coverpoints(self, partial_coverage): + """There are 2 coverpoints in the partial fixture, both at 50 % → 2 gaps.""" + from ucis.tui.views.gaps_view import GapsView + model, expected = partial_coverage + view = GapsView(StubApp(model)) + assert len(view.gaps) == len(expected["gaps"]), ( + f"Expected {len(expected['gaps'])} gaps, got {len(view.gaps)}" + ) + + def test_gaps_sorted_ascending(self, partial_coverage): + """GapsView sorts by coverage ascending.""" + from ucis.tui.views.gaps_view import GapsView + model, _ = partial_coverage + view = GapsView(StubApp(model)) + coverages = [g.coverage for g in view.gaps] + assert coverages == sorted(coverages), f"Gaps not sorted: {coverages}" + + def test_gap_hits_and_goal_consistent(self, partial_coverage): + """gap.coverage should equal hits/goal * 100.""" + from ucis.tui.views.gaps_view import GapsView + model, _ = partial_coverage + view = GapsView(StubApp(model)) + for gap in view.gaps: + if gap.goal > 0: + expected_pct = gap.hits / gap.goal * 100 + assert abs(gap.coverage - expected_pct) < 0.01, ( + f"Gap '{gap.name}': coverage={gap.coverage} but hits/goal={expected_pct}" + ) + + def test_no_gaps_when_fully_covered(self, full_coverage): + """A fully-covered database should have zero gaps.""" + from ucis.tui.views.gaps_view import GapsView + model, _ = full_coverage + view = GapsView(StubApp(model)) + assert len(view.gaps) == 0, f"Expected 0 gaps but got {len(view.gaps)}" + + def test_all_bins_are_gaps_when_zero_coverage(self, zero_coverage): + """With zero coverage every coverpoint is a gap.""" + from ucis.tui.views.gaps_view import GapsView + model, _ = zero_coverage + view = GapsView(StubApp(model)) + assert len(view.gaps) >= 1 + for gap in view.gaps: + assert gap.coverage == 0.0 + + def test_gaps_coverage_values_correct(self, partial_coverage): + """Coverage percentages must match what the text report would show.""" + from ucis.tui.views.gaps_view import GapsView + from ucis.report.coverage_report_builder import CoverageReportBuilder + model, _ = partial_coverage + view = GapsView(StubApp(model)) + + # Build oracle from same underlying db + report = CoverageReportBuilder.build(model.db) + report_coverages = {} + for cg in report.covergroups: + _collect_coverpoint_coverages(cg, report_coverages) + + for gap in view.gaps: + if gap.name in report_coverages: + oracle_pct = report_coverages[gap.name] + assert abs(gap.coverage - oracle_pct) < 0.01, ( + f"Gap '{gap.name}': TUI={gap.coverage:.2f}%, report={oracle_pct:.2f}%" + ) + + def test_gap_navigation_keys(self, partial_coverage): + """Arrow keys must update selected_index correctly.""" + from ucis.tui.views.gaps_view import GapsView + model, _ = partial_coverage + view = GapsView(StubApp(model)) + if not view.gaps: + pytest.skip("No gaps to navigate") + view.selected_index = 0 + view.handle_key("down") + assert view.selected_index == 1 + view.handle_key("up") + assert view.selected_index == 0 + + def test_gap_navigation_does_not_go_negative(self, partial_coverage): + from ucis.tui.views.gaps_view import GapsView + model, _ = partial_coverage + view = GapsView(StubApp(model)) + view.selected_index = 0 + view.handle_key("up") + assert view.selected_index == 0 + + +# --------------------------------------------------------------------------- +# HierarchyView +# --------------------------------------------------------------------------- + +class TestHierarchyViewData: + + def test_root_nodes_non_empty(self, partial_coverage): + from ucis.tui.views.hierarchy_view import HierarchyView + model, _ = partial_coverage + view = HierarchyView(StubApp(model)) + assert len(view.root_nodes) >= 1 + + def test_selected_node_is_set(self, partial_coverage): + from ucis.tui.views.hierarchy_view import HierarchyView + model, _ = partial_coverage + view = HierarchyView(StubApp(model)) + assert view.selected_node is not None + + def test_all_nodes_list_non_empty(self, partial_coverage): + from ucis.tui.views.hierarchy_view import HierarchyView + model, _ = partial_coverage + view = HierarchyView(StubApp(model)) + assert len(view._all_nodes) >= 1 + + def test_node_coverage_total_non_negative(self, partial_coverage): + """Every node's total count must be >= 0.""" + from ucis.tui.views.hierarchy_view import HierarchyView + model, _ = partial_coverage + view = HierarchyView(StubApp(model)) + for node in view._all_nodes: + assert node.total >= 0 + assert node.covered >= 0 + + def test_node_covered_lte_total(self, partial_coverage): + from ucis.tui.views.hierarchy_view import HierarchyView + model, _ = partial_coverage + view = HierarchyView(StubApp(model)) + for node in view._all_nodes: + assert node.covered <= node.total, ( + f"Node '{node.name}': covered={node.covered} > total={node.total}" + ) + + def test_coverage_percent_calculation(self, partial_coverage): + from ucis.tui.views.hierarchy_view import HierarchyView + model, _ = partial_coverage + view = HierarchyView(StubApp(model)) + for node in view._all_nodes: + pct = node.get_coverage_percent() + if node.total == 0: + assert pct == 0.0 + else: + expected = node.covered / node.total * 100 + assert abs(pct - expected) < 0.01 + + def test_down_navigation_changes_selection(self, partial_coverage): + from ucis.tui.views.hierarchy_view import HierarchyView + model, _ = partial_coverage + view = HierarchyView(StubApp(model)) + initial = view.selected_node + view.handle_key("down") + assert view.selected_node != initial or len(view._all_nodes) == 1 + + def test_up_after_down_returns_to_original(self, partial_coverage): + from ucis.tui.views.hierarchy_view import HierarchyView + model, _ = partial_coverage + view = HierarchyView(StubApp(model)) + initial = view.selected_node + view.handle_key("down") + view.handle_key("up") + assert view.selected_node == initial + + +# --------------------------------------------------------------------------- +# MetricsView +# --------------------------------------------------------------------------- + +class TestMetricsViewData: + + def test_covergroup_count(self, partial_coverage): + from ucis.tui.views.metrics_view import MetricsView + model, expected = partial_coverage + view = MetricsView(StubApp(model)) + assert view.metrics["total_covergroups"] == expected["covergroups"] + + def test_coverpoint_count(self, partial_coverage): + from ucis.tui.views.metrics_view import MetricsView + model, expected = partial_coverage + view = MetricsView(StubApp(model)) + assert view.metrics["total_coverpoints"] == expected["coverpoints"] + + def test_total_bins(self, partial_coverage): + from ucis.tui.views.metrics_view import MetricsView + model, expected = partial_coverage + view = MetricsView(StubApp(model)) + assert view.metrics["total_bins"] == expected["total_bins"] + + def test_covered_bins(self, partial_coverage): + from ucis.tui.views.metrics_view import MetricsView + model, expected = partial_coverage + view = MetricsView(StubApp(model)) + assert view.metrics["covered_bins"] == expected["covered_bins"] + + def test_bin_distribution_sums_to_total(self, partial_coverage): + """The four histogram buckets must sum to total_bins.""" + from ucis.tui.views.metrics_view import MetricsView + model, expected = partial_coverage + view = MetricsView(StubApp(model)) + dist = view.metrics["bin_hit_distribution"] + bucket_total = sum(dist.values()) + assert bucket_total == expected["total_bins"], ( + f"Distribution sum {bucket_total} != total_bins {expected['total_bins']}: {dist}" + ) + + def test_zero_hit_bucket_correct(self, partial_coverage): + """Partial fixture: 3 uncovered bins → bucket '0' must be 3.""" + from ucis.tui.views.metrics_view import MetricsView + model, expected = partial_coverage + view = MetricsView(StubApp(model)) + uncovered = expected["total_bins"] - expected["covered_bins"] + assert view.metrics["bin_hit_distribution"]["0"] == uncovered + + def test_zero_coverage_all_in_zero_bucket(self, zero_coverage): + from ucis.tui.views.metrics_view import MetricsView + model, expected = zero_coverage + view = MetricsView(StubApp(model)) + assert view.metrics["bin_hit_distribution"]["0"] == expected["total_bins"] + + def test_full_coverage_zero_in_zero_bucket(self, full_coverage): + from ucis.tui.views.metrics_view import MetricsView + model, _ = full_coverage + view = MetricsView(StubApp(model)) + assert view.metrics["bin_hit_distribution"]["0"] == 0 + + +# --------------------------------------------------------------------------- +# TestHistoryView +# --------------------------------------------------------------------------- + +class TestTestHistoryViewData: + + def _make_view(self, model): + from ucis.tui.views.test_history_view import TestHistoryView + view = TestHistoryView(StubApp(model)) + view.on_enter() # triggers _load_tests() + return view + + def test_test_count(self, partial_coverage): + model, _ = partial_coverage + view = self._make_view(model) + assert len(view.tests) == 1 + + def test_test_name(self, partial_coverage): + model, _ = partial_coverage + view = self._make_view(model) + assert view.tests[0]["name"] == "test1" + + def test_multi_test_count(self, multi_test): + model, expected = multi_test + view = self._make_view(model) + assert len(view.tests) == len(expected["test_names"]) + + def test_all_multi_test_names_present(self, multi_test): + model, expected = multi_test + view = self._make_view(model) + names = {t["name"] for t in view.tests} + for name in expected["test_names"]: + assert name in names + + def test_sort_by_name_ascending(self, multi_test): + model, _ = multi_test + view = self._make_view(model) + view.handle_key("d") # switch to date sort first + view.handle_key("n") # switch back to name → ascending + names = [t["name"] for t in view.tests] + assert names == sorted(names) + + def test_sort_by_name_toggle_descending(self, multi_test): + model, _ = multi_test + view = self._make_view(model) + view.handle_key("d") # switch to date sort first + view.handle_key("n") # sort by name ascending + view.handle_key("n") # toggle → descending + names = [t["name"] for t in view.tests] + assert names == sorted(names, reverse=True) + + def test_navigation_changes_selection(self, multi_test): + model, _ = multi_test + view = self._make_view(model) + view.selected_index = 0 + view.handle_key("down") + assert view.selected_index == 1 + + def test_navigation_clamped_at_zero(self, multi_test): + model, _ = multi_test + view = self._make_view(model) + view.selected_index = 0 + view.handle_key("up") + assert view.selected_index == 0 + + def test_filter_by_test_sets_model_filter(self, multi_test): + """Pressing 'f' should set the model test filter.""" + model, expected = multi_test + view = self._make_view(model) + view.selected_index = 0 + selected_name = view.tests[0]["name"] + view.handle_key("f") + assert model.get_test_filter() == selected_name + + +# --------------------------------------------------------------------------- +# CodeCoverageView +# --------------------------------------------------------------------------- + +class TestCodeCoverageViewData: + + def test_file_coverage_list_type(self, vlt_model): + """file_coverage must be a list (possibly empty for non-code dbs).""" + from ucis.tui.views.code_coverage_view import CodeCoverageView + view = CodeCoverageView(StubApp(vlt_model)) + assert isinstance(view.file_coverage, list) + + def test_covered_lte_total_per_file(self, vlt_model): + """For every file, line_covered <= line_total.""" + from ucis.tui.views.code_coverage_view import CodeCoverageView + view = CodeCoverageView(StubApp(vlt_model)) + for fi in view.file_coverage: + assert fi.line_covered <= fi.line_total, ( + f"{fi.file_path}: covered={fi.line_covered} > total={fi.line_total}" + ) + + def test_coverage_percentage_property(self, vlt_model): + """line_coverage property must be arithmetically correct.""" + from ucis.tui.views.code_coverage_view import CodeCoverageView + view = CodeCoverageView(StubApp(vlt_model)) + for fi in view.file_coverage: + if fi.line_total > 0: + expected = fi.line_covered / fi.line_total * 100 + assert abs(fi.line_coverage - expected) < 0.01 + + +# --------------------------------------------------------------------------- +# Helper +# --------------------------------------------------------------------------- + +def _collect_coverpoint_coverages(cg_node, result: dict): + """Recursively collect {coverpoint_name: coverage_pct} from a Covergroup.""" + for cp in cg_node.coverpoints: + result[cp.name] = cp.coverage + for sub in getattr(cg_node, "covergroups", []): + _collect_coverpoint_coverages(sub, result) diff --git a/tests/tui_fixtures.py b/tests/tui_fixtures.py new file mode 100644 index 0000000..b88d56c --- /dev/null +++ b/tests/tui_fixtures.py @@ -0,0 +1,305 @@ +""" +Shared fixtures and database builders for TUI automated tests. + +Every fixture exposes: + - db : UCIS database object (API-path or SQLite-path) + - expected : dict of known-correct values for assertions + - model : CoverageModel wrapping the database + +The StubApp bridges BaseView.__init__ without requiring a real TUI. +""" +import os +import pytest +from unittest.mock import MagicMock + +from ucis import ( + UCIS_HISTORYNODE_TEST, UCIS_TESTSTATUS_OK, UCIS_OTHER, + UCIS_DU_MODULE, UCIS_ENABLED_STMT, UCIS_ENABLED_BRANCH, + UCIS_INST_ONCE, UCIS_SCOPE_UNDER_DU, UCIS_INSTANCE, UCIS_VLOG, +) +from ucis.mem.mem_factory import MemFactory +from ucis.source_info import SourceInfo +from ucis.test_data import TestData +from ucis.tui.models.coverage_model import CoverageModel + + +# --------------------------------------------------------------------------- +# StubApp – minimal app object required by BaseView +# --------------------------------------------------------------------------- + +class StubApp: + """Minimal app stub for instantiating views without a running TUI.""" + + def __init__(self, model: CoverageModel): + self.coverage_model = model + self.status_bar = MagicMock() + self.controller = MagicMock() + + +# --------------------------------------------------------------------------- +# Low-level database builders +# --------------------------------------------------------------------------- + +def _add_test(db, logical_name="test1"): + """Add a passing test history node to *db*.""" + node = db.createHistoryNode(None, logical_name, logical_name, UCIS_HISTORYNODE_TEST) + node.setTestData(TestData( + teststatus=UCIS_TESTSTATUS_OK, + toolcategory="UCIS:simulator", + date="20240101000000", + )) + return node + + +def _add_instance(db): + """Add a minimal DU + instance and return the instance scope.""" + file_h = db.createFileHandle("design.sv", "/rtl") + du = db.createScope( + "work.top", SourceInfo(file_h, 1, 0), 1, + UCIS_OTHER, UCIS_DU_MODULE, + UCIS_ENABLED_STMT | UCIS_ENABLED_BRANCH | UCIS_INST_ONCE | UCIS_SCOPE_UNDER_DU, + ) + inst = db.createInstance( + "tb", None, 1, UCIS_OTHER, UCIS_INSTANCE, du, UCIS_INST_ONCE + ) + return inst, file_h + + +def _make_partial_coverage_db(db): + """ + Build a database with known 50 % functional coverage. + + Structure + --------- + tb (instance) + cg1 (covergroup) + cp1 (coverpoint) bins: a(hit), b(hit), c(miss), d(miss) → 2/4 = 50 % + cg2 (covergroup) + cp2 (coverpoint) bins: x(hit), y(miss) → 1/2 = 50 % + + Overall: 3 covered / 6 total = 50 % + """ + _add_test(db) + inst, file_h = _add_instance(db) + src = SourceInfo(file_h, 3, 0) + + cg1 = inst.createCovergroup("cg1", src, 1, UCIS_OTHER) + cp1 = cg1.createCoverpoint("cp1", src, 1, UCIS_VLOG) + cp1.createBin("a", src, 1, 5, "a") # hit (5 >= 1) + cp1.createBin("b", src, 1, 3, "b") # hit (3 >= 1) + cp1.createBin("c", src, 1, 0, "c") # miss + cp1.createBin("d", src, 1, 0, "d") # miss + + cg2 = inst.createCovergroup("cg2", src, 1, UCIS_OTHER) + cp2 = cg2.createCoverpoint("cp2", src, 1, UCIS_VLOG) + cp2.createBin("x", src, 1, 10, "x") # hit + cp2.createBin("y", src, 1, 0, "y") # miss + + expected = { + "total_bins": 6, + "covered_bins": 3, + "overall_coverage": 50.0, + "covergroups": 2, + "coverpoints": 2, + "gaps": [ + # (name, coverage_pct) + ("cp1", 50.0), + ("cp2", 50.0), + ], + } + return expected + + +def _make_zero_coverage_db(db): + """Database where no bins have been hit.""" + _add_test(db) + inst, file_h = _add_instance(db) + src = SourceInfo(file_h, 3, 0) + + cg = inst.createCovergroup("cg_zero", src, 1, UCIS_OTHER) + cp = cg.createCoverpoint("cp_zero", src, 1, UCIS_VLOG) + cp.createBin("b0", src, 1, 0, "b0") + cp.createBin("b1", src, 1, 0, "b1") + cp.createBin("b2", src, 1, 0, "b2") + + expected = { + "total_bins": 3, + "covered_bins": 0, + "overall_coverage": 0.0, + "covergroups": 1, + "coverpoints": 1, + } + return expected + + +def _make_full_coverage_db(db): + """Database where every bin has been hit.""" + _add_test(db) + inst, file_h = _add_instance(db) + src = SourceInfo(file_h, 3, 0) + + cg = inst.createCovergroup("cg_full", src, 1, UCIS_OTHER) + cp = cg.createCoverpoint("cp_full", src, 1, UCIS_VLOG) + cp.createBin("b0", src, 1, 1, "b0") + cp.createBin("b1", src, 1, 2, "b1") + cp.createBin("b2", src, 1, 7, "b2") + + expected = { + "total_bins": 3, + "covered_bins": 3, + "overall_coverage": 100.0, + "covergroups": 1, + "coverpoints": 1, + } + return expected + + +def _make_multi_test_db(db): + """ + Three tests each contributing unique bins. + + tb / cg_mt / cp_mt bins b0..b5 + test_a hits b0, b1 + test_b hits b2, b3 + test_c hits b4, b5 (not added → miss) + + Overall: 4 covered / 6 total ≈ 66.7 % + """ + for name in ("test_a", "test_b", "test_c"): + _add_test(db, name) + inst, file_h = _add_instance(db) + src = SourceInfo(file_h, 3, 0) + + cg = inst.createCovergroup("cg_mt", src, 1, UCIS_OTHER) + cp = cg.createCoverpoint("cp_mt", src, 1, UCIS_VLOG) + cp.createBin("b0", src, 1, 1, "b0") + cp.createBin("b1", src, 1, 1, "b1") + cp.createBin("b2", src, 1, 1, "b2") + cp.createBin("b3", src, 1, 1, "b3") + cp.createBin("b4", src, 1, 0, "b4") + cp.createBin("b5", src, 1, 0, "b5") + + expected = { + "total_bins": 6, + "covered_bins": 4, + "covergroups": 1, + "coverpoints": 1, + "test_names": ["test_a", "test_b", "test_c"], + } + return expected + + +# --------------------------------------------------------------------------- +# CoverageModel factory helpers (API path via XML, SQLite path) +# --------------------------------------------------------------------------- + +def _model_from_mem_db(db, tmp_path): + """Write *db* to XML then load via CoverageModel (exercises API/XML path).""" + from ucis.xml.xml_factory import XmlFactory + xml_path = str(tmp_path / "test.xml") + XmlFactory.write(db, xml_path) + return CoverageModel(xml_path) + + +def _model_from_sqlite(builder_fn, tmp_path): + """Create a fresh SQLiteUCIS, populate it, return CoverageModel (exercises SQLite path).""" + from ucis.sqlite.sqlite_ucis import SqliteUCIS + db_path = str(tmp_path / "test.db") + db = SqliteUCIS(db_path) + expected = builder_fn(db) + db.close() + model = CoverageModel(db_path) + return model, expected + + +# --------------------------------------------------------------------------- +# Pytest fixtures +# --------------------------------------------------------------------------- + +# Parametrize over (backend_label, builder_fn, extra_info) pairs so the same +# test body exercises both the UCIS-API path (XML) and the SQLite fast path. + +PARTIAL_BUILDERS = [ + pytest.param("xml", _make_partial_coverage_db, id="xml"), + pytest.param("sqlite", _make_partial_coverage_db, id="sqlite"), +] + +ZERO_BUILDERS = [ + pytest.param("xml", _make_zero_coverage_db, id="xml"), + pytest.param("sqlite", _make_zero_coverage_db, id="sqlite"), +] + +FULL_BUILDERS = [ + pytest.param("xml", _make_full_coverage_db, id="xml"), + pytest.param("sqlite", _make_full_coverage_db, id="sqlite"), +] + +MULTI_TEST_BUILDERS = [ + pytest.param("xml", _make_multi_test_db, id="xml"), + pytest.param("sqlite", _make_multi_test_db, id="sqlite"), +] + + +def make_model_and_expected(backend: str, builder_fn, tmp_path): + """Create (CoverageModel, expected_dict) for the given backend and builder.""" + if backend == "sqlite": + return _model_from_sqlite(builder_fn, tmp_path) + else: # xml / mem + db = MemFactory.create() + expected = builder_fn(db) + model = _model_from_mem_db(db, tmp_path) + return model, expected + + +@pytest.fixture(params=["xml", "sqlite"]) +def partial_coverage(request, tmp_path): + """(model, expected) for a 50% coverage database.""" + model, expected = make_model_and_expected( + request.param, _make_partial_coverage_db, tmp_path + ) + yield model, expected + model.close() + + +@pytest.fixture(params=["xml", "sqlite"]) +def zero_coverage(request, tmp_path): + """(model, expected) for a zero-coverage database.""" + model, expected = make_model_and_expected( + request.param, _make_zero_coverage_db, tmp_path + ) + yield model, expected + model.close() + + +@pytest.fixture(params=["xml", "sqlite"]) +def full_coverage(request, tmp_path): + """(model, expected) for a 100% coverage database.""" + model, expected = make_model_and_expected( + request.param, _make_full_coverage_db, tmp_path + ) + yield model, expected + model.close() + + +@pytest.fixture(params=["xml", "sqlite"]) +def multi_test(request, tmp_path): + """(model, expected) for a multi-test database.""" + model, expected = make_model_and_expected( + request.param, _make_multi_test_db, tmp_path + ) + yield model, expected + model.close() + + +# File-based regression fixture (uses the committed test_vlt.cdb SQLite file) +VLT_CDB = os.path.join(os.path.dirname(__file__), "..", "test_vlt.cdb") + + +@pytest.fixture +def vlt_model(): + """CoverageModel loaded from the committed test_vlt.cdb SQLite fixture.""" + if not os.path.exists(VLT_CDB): + pytest.skip(f"test_vlt.cdb not found at {VLT_CDB}") + model = CoverageModel(VLT_CDB) + yield model + model.close() diff --git a/tests/unit/ncdb/test_bucket_index.py b/tests/unit/ncdb/test_bucket_index.py new file mode 100644 index 0000000..4db626a --- /dev/null +++ b/tests/unit/ncdb/test_bucket_index.py @@ -0,0 +1,113 @@ +"""Unit tests for BucketIndex (history/bucket_index.bin).""" +import pytest +from ucis.ncdb.bucket_index import BucketIndex + + +def _idx(*buckets): + """Build a BucketIndex from (seq, ts_start, ts_end, records, fails, min_nid, max_nid).""" + idx = BucketIndex() + for seq, ts_start, ts_end, records, fails, min_nid, max_nid in buckets: + idx.add_bucket(seq, ts_start, ts_end, records, fails, min_nid, max_nid) + return idx + + +def test_add_and_query_range(): + idx = _idx( + (0, 1000, 1999, 100, 10, 0, 5), + (1, 2000, 2999, 200, 5, 0, 7), + (2, 3000, 3999, 50, 0, 3, 9), + ) + hits = idx.buckets_in_range(1500, 2500) + seqs = [e.bucket_seq for e in hits] + assert 0 in seqs and 1 in seqs and 2 not in seqs + + +def test_buckets_for_name(): + idx = _idx( + (0, 1000, 1999, 100, 10, 0, 5), + (1, 2000, 2999, 200, 5, 6, 9), + ) + # name_id=3 is in bucket 0 only + hits = idx.buckets_for_name(3) + assert len(hits) == 1 and hits[0].bucket_seq == 0 + + # name_id=7 is in bucket 1 only + hits = idx.buckets_for_name(7) + assert len(hits) == 1 and hits[0].bucket_seq == 1 + + # name_id=10 is in neither + assert idx.buckets_for_name(10) == [] + + +def test_buckets_for_name_with_time_filter(): + idx = _idx( + (0, 1000, 1999, 100, 0, 0, 9), + (1, 2000, 2999, 100, 0, 0, 9), + ) + hits = idx.buckets_for_name(5, ts_from=2000) + assert len(hits) == 1 and hits[0].bucket_seq == 1 + + +def test_pass_rate_series(): + idx = _idx( + (0, 1000, 1999, 100, 10, 0, 5), + (1, 2000, 2999, 200, 0, 0, 5), + ) + series = idx.pass_rate_series() + assert len(series) == 2 + ts0, rate0 = series[0] + ts1, rate1 = series[1] + assert ts0 == 1000 + assert abs(rate0 - 0.90) < 1e-6 + assert abs(rate1 - 1.00) < 1e-6 + + +def test_serialize_deserialize_empty(): + idx = BucketIndex() + idx2 = BucketIndex.deserialize(idx.serialize()) + assert idx2.num_buckets == 0 + + +def test_serialize_deserialize_multiple(): + idx = _idx( + (0, 1000, 1999, 100, 10, 0, 5), + (1, 2000, 2999, 200, 5, 0, 7), + ) + data = idx.serialize() + idx2 = BucketIndex.deserialize(data) + assert idx2.num_buckets == 2 + e = idx2.buckets_in_range(1000, 1999) + assert len(e) == 1 and e[0].fail_count == 10 + + +def test_serialize_3650_entries_size(): + """10 years of buckets (one per day) should be well under 200 KB.""" + idx = BucketIndex() + for i in range(3650): + idx.add_bucket(i, 1700000000 + i * 86400, 1700000000 + (i + 1) * 86400 - 1, + 10000, 100, 0, 999) + data = idx.serialize() + assert len(data) < 200 * 1024, f"Index too large: {len(data)} bytes" + + +def test_next_seq(): + idx = BucketIndex() + assert idx.next_seq() == 0 + idx.add_bucket(0, 1000, 1999, 100, 0, 0, 0) + assert idx.next_seq() == 1 + idx.add_bucket(1, 2000, 2999, 100, 0, 0, 0) + assert idx.next_seq() == 2 + + +def test_add_bucket_replaces_existing(): + idx = BucketIndex() + idx.add_bucket(0, 1000, 1999, 100, 10, 0, 5) + idx.add_bucket(0, 1000, 1999, 200, 20, 0, 5) # update same seq + assert idx.num_buckets == 1 + assert idx._entries[0].num_records == 200 + + +def test_bad_magic_raises(): + data = b"\x00\x00\x00\x00" + b"\x00" * 8 + with pytest.raises(ValueError, match="Bad magic"): + BucketIndex.deserialize(data) diff --git a/tests/unit/ncdb/test_contrib_index.py b/tests/unit/ncdb/test_contrib_index.py new file mode 100644 index 0000000..092b902 --- /dev/null +++ b/tests/unit/ncdb/test_contrib_index.py @@ -0,0 +1,94 @@ +"""Unit tests for ContribIndex (contrib_index.bin).""" +import pytest +from ucis.ncdb.contrib_index import ( + ContribIndex, + POLICY_ALL, POLICY_PASS_ONLY, POLICY_EXCLUDE_ERROR_RERUN, POLICY_STRICT, + FLAG_IS_RERUN, FLAG_FIRST_ATTEMPT_PASSED, +) +from ucis.ncdb.constants import HIST_STATUS_OK, HIST_STATUS_FAIL, HIST_STATUS_ERROR + + +def test_add_and_passing_run_ids_pass_only(): + ci = ContribIndex() + ci.add_entry(0, 0, HIST_STATUS_OK, 0) + ci.add_entry(1, 1, HIST_STATUS_FAIL, 0) + ci.add_entry(2, 0, HIST_STATUS_OK, 0) + assert ci.passing_run_ids(POLICY_PASS_ONLY) == [0, 2] + + +def test_policy_all(): + ci = ContribIndex() + ci.add_entry(0, 0, HIST_STATUS_OK, 0) + ci.add_entry(1, 0, HIST_STATUS_FAIL, 0) + assert ci.passing_run_ids(POLICY_ALL) == [0, 1] + + +def test_policy_strict_excludes_rerun_without_first_pass(): + ci = ContribIndex() + ci.add_entry(0, 0, HIST_STATUS_OK, 0) # normal pass → included + ci.add_entry(1, 0, HIST_STATUS_OK, FLAG_IS_RERUN) # rerun, first attempt failed → excluded + ci.add_entry(2, 0, HIST_STATUS_OK, + FLAG_IS_RERUN | FLAG_FIRST_ATTEMPT_PASSED) # rerun, first also passed → included + assert ci.passing_run_ids(POLICY_STRICT) == [0, 2] + + +def test_policy_exclude_error_rerun_same_as_pass_only(): + ci = ContribIndex() + ci.add_entry(0, 0, HIST_STATUS_OK, 0) + ci.add_entry(1, 0, HIST_STATUS_ERROR, 0) + assert ci.passing_run_ids(POLICY_EXCLUDE_ERROR_RERUN) == [0] + + +def test_squash_watermark_update(): + ci = ContribIndex(squash_watermark=0) + ci.set_squash_watermark(99) + assert ci.squash_watermark == 99 + + +def test_remove_entries_after_squash(): + ci = ContribIndex() + for run_id in range(10): + ci.add_entry(run_id, 0, HIST_STATUS_OK, 0) + ci.remove_entries_up_to(4) + assert ci.num_active == 5 + remaining = [e.run_id for e in ci._entries] + assert remaining == [5, 6, 7, 8, 9] + + +def test_max_run_id_from_entries(): + ci = ContribIndex() + ci.add_entry(0, 0, HIST_STATUS_OK, 0) + ci.add_entry(7, 0, HIST_STATUS_OK, 0) + assert ci.max_run_id() == 7 + + +def test_max_run_id_falls_back_to_watermark(): + ci = ContribIndex(squash_watermark=42) + assert ci.max_run_id() == 42 + + +def test_serialize_deserialize_empty(): + ci = ContribIndex() + ci2 = ContribIndex.deserialize(ci.serialize()) + assert ci2.num_active == 0 + assert ci2.merge_policy == POLICY_PASS_ONLY + + +def test_serialize_deserialize_with_entries(): + ci = ContribIndex(merge_policy=POLICY_STRICT, squash_watermark=10) + ci.add_entry(11, 0, HIST_STATUS_OK, FLAG_IS_RERUN) + ci.add_entry(12, 1, HIST_STATUS_FAIL, 0) + data = ci.serialize() + ci2 = ContribIndex.deserialize(data) + assert ci2.merge_policy == POLICY_STRICT + assert ci2.squash_watermark == 10 + assert ci2.num_active == 2 + assert ci2._entries[0].run_id == 11 + assert ci2._entries[0].is_rerun is True + assert ci2._entries[1].status == HIST_STATUS_FAIL + + +def test_bad_magic_raises(): + data = b"\x00\x00\x00\x00" + b"\x00" * 20 + with pytest.raises(ValueError, match="Bad magic"): + ContribIndex.deserialize(data) diff --git a/tests/unit/ncdb/test_history_buckets.py b/tests/unit/ncdb/test_history_buckets.py new file mode 100644 index 0000000..6206683 --- /dev/null +++ b/tests/unit/ncdb/test_history_buckets.py @@ -0,0 +1,144 @@ +"""Unit tests for BucketWriter / BucketReader (history/NNNNNN.bin).""" +import pytest +from ucis.ncdb.history_buckets import BucketWriter, BucketReader +from ucis.ncdb.constants import ( + HIST_STATUS_OK, HIST_STATUS_FAIL, + HIST_FLAG_IS_RERUN, HIST_FLAG_HAS_COVERAGE, + HISTORY_BUCKET_MAX_RECORDS, +) + + +def _bucket(*records): + """Write records and return a BucketReader over them. + + Each record is a (name_id, seed_id, ts, status, flags) tuple. + """ + w = BucketWriter() + for name_id, seed_id, ts, status, flags in records: + w.add(name_id, seed_id, ts, status, flags) + return BucketReader(w.seal_fast()) + + +def test_write_read_single_record(): + r = _bucket((0, 0, 1700000000, HIST_STATUS_OK, 0)) + assert r.num_records == 1 + recs = r.records_for_name(0) + assert len(recs) == 1 + assert recs[0].ts == 1700000000 + assert recs[0].status == HIST_STATUS_OK + assert recs[0].flags == 0 + + +def test_name_index_binary_search(): + records = [] + for nid in range(20): + for i in range(5): + records.append((nid, 0, 1700000000 + nid * 1000 + i * 100, HIST_STATUS_OK, 0)) + r = _bucket(*records) + for nid in range(20): + found = r.records_for_name(nid) + assert len(found) == 5, f"name_id {nid}: expected 5, got {len(found)}" + + +def test_records_for_name_not_present(): + r = _bucket((0, 0, 1700000000, HIST_STATUS_OK, 0)) + assert r.records_for_name(99) == [] + + +def test_seed_dict_mapping(): + # Two different global seed_ids should map back correctly + w = BucketWriter() + w.add(0, 42, 1700000000, HIST_STATUS_OK, 0) + w.add(0, 99, 1700000001, HIST_STATUS_OK, 0) + r = BucketReader(w.seal_fast()) + recs = r.records_for_name(0) + seed_ids = {rec.seed_id for rec in recs} + assert seed_ids == {42, 99} + + +def test_ts_delta_encoding(): + base = 1700000000 + timestamps = [base, base + 100, base + 250, base + 1000] + records = [(0, 0, ts, HIST_STATUS_OK, 0) for ts in timestamps] + r = _bucket(*records) + recs = r.records_for_name(0) + recovered = sorted(rec.ts for rec in recs) + assert recovered == timestamps + + +def test_status_flags_pack_unpack(): + """All status × flag combinations round-trip through the nibble-packed byte.""" + statuses = [HIST_STATUS_OK, HIST_STATUS_FAIL] + flags_list = [0, HIST_FLAG_IS_RERUN, HIST_FLAG_HAS_COVERAGE, + HIST_FLAG_IS_RERUN | HIST_FLAG_HAS_COVERAGE] + records = [] + ts = 1700000000 + for nid, (status, flags) in enumerate( + (s, f) for s in statuses for f in flags_list): + records.append((nid, 0, ts + nid * 100, status, flags)) + r = _bucket(*records) + for nid, (status, flags) in enumerate( + (s, f) for s in statuses for f in flags_list): + recs = r.records_for_name(nid) + assert len(recs) == 1 + assert recs[0].status == status, f"nid={nid} status mismatch" + assert recs[0].flags == flags, f"nid={nid} flags mismatch" + + +def test_multiple_names_correct_counts(): + w = BucketWriter() + for i in range(10): + w.add(0, 0, 1700000000 + i * 100, HIST_STATUS_OK, 0) + for i in range(5): + w.add(1, 0, 1700001000 + i * 100, HIST_STATUS_FAIL, 0) + r = BucketReader(w.seal_fast()) + assert r.num_records == 15 + assert len(r.records_for_name(0)) == 10 + assert len(r.records_for_name(1)) == 5 + + +def test_seal_deflate(): + w = BucketWriter() + for i in range(100): + w.add(0, 0, 1700000000 + i * 100, HIST_STATUS_OK, 0) + data = w.seal_fast() + r = BucketReader(data) + assert r.num_records == 100 + + +def test_seal_lzma_or_fallback(): + """seal() should succeed regardless of liblzma availability.""" + w = BucketWriter() + for i in range(100): + w.add(0, 0, 1700000000 + i * 100, HIST_STATUS_OK, 0) + data = w.seal(use_lzma=True) # lzma or deflate fallback + r = BucketReader(data) + assert r.num_records == 100 + + +def test_10k_records_compressed_size(): + """10K records should compress to ≤ 50 KB (well under 5 MB design target).""" + w = BucketWriter() + for i in range(HISTORY_BUCKET_MAX_RECORDS): + name_id = i % 100 + w.add(name_id, 0, 1700000000 + i * 10, HIST_STATUS_OK, 0) + data = w.seal_fast() + assert len(data) < 50 * 1024, f"Bucket too large: {len(data)} bytes" + + +def test_all_records_iteration(): + w = BucketWriter() + for nid in range(3): + for i in range(4): + w.add(nid, 0, 1700000000 + nid * 10000 + i * 100, HIST_STATUS_OK, 0) + r = BucketReader(w.seal_fast()) + all_recs = list(r.all_records()) + assert len(all_recs) == 12 + + +def test_is_full(): + w = BucketWriter() + assert not w.is_full() + for i in range(HISTORY_BUCKET_MAX_RECORDS): + w.add(0, 0, 1700000000 + i, HIST_STATUS_OK, 0) + assert w.is_full() diff --git a/tests/unit/ncdb/test_reports.py b/tests/unit/ncdb/test_reports.py new file mode 100644 index 0000000..f7b0e99 --- /dev/null +++ b/tests/unit/ncdb/test_reports.py @@ -0,0 +1,705 @@ +"""Unit tests for ucis.ncdb.reports.""" + +import json +import pytest +from unittest.mock import MagicMock + +from ucis.ncdb.testplan import Testplan, Testpoint +from ucis.ncdb.testplan_closure import TPStatus, TestpointResult +from ucis.ncdb.reports import ( + ClosureSummary, + StageGateReport, + CoveragePerTestpoint, + RegressionDelta, + StageProgression, + TestpointReliability, + UnexercisedCovergroups, + CoverageContribution, + TestBudget, + SafetyMatrix, + SeedReliability, + report_testpoint_closure, + format_testpoint_closure, + report_stage_gate, + format_stage_gate, + report_coverage_per_testpoint, + format_coverage_per_testpoint, + report_regression_delta, + format_regression_delta, + report_stage_progression, + format_stage_progression, + report_testpoint_reliability, + format_testpoint_reliability, + report_unexercised_covergroups, + format_unexercised_covergroups, + report_coverage_contribution, + format_coverage_contribution, + report_test_budget, + format_test_budget, + report_safety_matrix, + format_safety_matrix, + report_seed_reliability, + format_seed_reliability, +) + + +# --------------------------------------------------------------------------- +# Fixtures +# --------------------------------------------------------------------------- + +def _make_tp(name, stage="V1", tests=None, na=False): + return Testpoint(name=name, stage=stage, tests=tests or [name], na=na) + + +def _make_result(tp, status, pass_count=0, fail_count=0, matched=None): + return TestpointResult( + testpoint=tp, + status=status, + matched_tests=matched or [], + pass_count=pass_count, + fail_count=fail_count, + ) + + +@pytest.fixture +def simple_results(): + tps = [ + _make_tp("tp_alpha", "V1"), + _make_tp("tp_beta", "V1"), + _make_tp("tp_gamma", "V2"), + _make_tp("tp_delta", "V2"), + _make_tp("tp_na", "V1", na=True), + ] + return [ + _make_result(tps[0], TPStatus.CLOSED, pass_count=5, matched=["tp_alpha"]), + _make_result(tps[1], TPStatus.FAILING, fail_count=3, matched=["tp_beta"]), + _make_result(tps[2], TPStatus.PARTIAL, pass_count=2, fail_count=1, + matched=["tp_gamma"]), + _make_result(tps[3], TPStatus.NOT_RUN), + _make_result(tps[4], TPStatus.NA), + ] + + +@pytest.fixture +def simple_plan(): + plan = Testplan(source_file="test.hjson") + plan.add_testpoint(Testpoint(name="tp_alpha", stage="V1", tests=["tp_alpha"])) + plan.add_testpoint(Testpoint(name="tp_beta", stage="V1", tests=["tp_beta"])) + plan.add_testpoint(Testpoint(name="tp_gamma", stage="V2", tests=["tp_gamma"])) + plan.add_testpoint(Testpoint(name="tp_delta", stage="V2", tests=["tp_delta"])) + return plan + + +# --------------------------------------------------------------------------- +# Report A — testpoint closure +# --------------------------------------------------------------------------- + +class TestReportTestpointClosure: + def test_returns_closure_summary(self, simple_results): + summary = report_testpoint_closure(simple_results) + assert isinstance(summary, ClosureSummary) + + def test_total_counts(self, simple_results): + summary = report_testpoint_closure(simple_results) + assert summary.total == 5 + assert summary.total_closed == 1 + assert summary.total_na == 1 + + def test_by_stage_keys(self, simple_results): + summary = report_testpoint_closure(simple_results) + assert "V1" in summary.by_stage + assert "V2" in summary.by_stage + + def test_by_stage_counts(self, simple_results): + summary = report_testpoint_closure(simple_results) + # V1: tp_alpha (closed) + tp_beta (failing) — tp_na is NA, excluded + assert summary.by_stage["V1"]["total"] == 2 + assert summary.by_stage["V1"]["closed"] == 1 + # V2: tp_gamma (partial) + tp_delta (not_run) + assert summary.by_stage["V2"]["total"] == 2 + assert summary.by_stage["V2"]["closed"] == 0 + + def test_stage_pct_calculation(self, simple_results): + summary = report_testpoint_closure(simple_results) + assert summary.by_stage["V1"]["pct"] == 50.0 + + def test_to_json_valid(self, simple_results): + summary = report_testpoint_closure(simple_results) + d = json.loads(summary.to_json()) + assert d["total"] == 5 + assert len(d["testpoints"]) == 5 + + def test_format_renders_header(self, simple_results): + summary = report_testpoint_closure(simple_results) + text = format_testpoint_closure(summary) + assert "Testpoint" in text + assert "Stage" in text + assert "Status" in text + + def test_format_contains_testpoint_name(self, simple_results): + summary = report_testpoint_closure(simple_results) + text = format_testpoint_closure(summary) + assert "tp_alpha" in text + assert "tp_beta" in text + + def test_format_skips_na_by_default(self, simple_results): + summary = report_testpoint_closure(simple_results) + text = format_testpoint_closure(summary) + # tp_na has TPStatus.NA — should not appear by default + assert "tp_na" not in text + + def test_format_show_all_includes_na(self, simple_results): + summary = report_testpoint_closure(simple_results) + text = format_testpoint_closure(summary, show_all=True) + assert "tp_na" in text + + def test_format_stage_rollup_present(self, simple_results): + summary = report_testpoint_closure(simple_results) + text = format_testpoint_closure(summary) + assert "Stage roll-up" in text + assert "V1" in text + assert "V2" in text + + +# --------------------------------------------------------------------------- +# Report B — stage gate +# --------------------------------------------------------------------------- + +class TestReportStageGate: + def test_pass_when_all_v1_closed(self): + tp = _make_tp("tp1", "V1") + results = [_make_result(tp, TPStatus.CLOSED, pass_count=3)] + plan = Testplan(source_file="x.hjson") + plan.add_testpoint(Testpoint(name="tp1", stage="V1", tests=["tp1"])) + gate = report_stage_gate(results, "V1", plan) + assert isinstance(gate, StageGateReport) + assert gate.passed is True + assert gate.blocking == [] + + def test_fail_when_v1_failing(self, simple_results, simple_plan): + gate = report_stage_gate(simple_results, "V1", simple_plan) + assert gate.passed is False + assert any(r.testpoint.name == "tp_beta" for r in gate.blocking) + + def test_to_json_valid(self, simple_results, simple_plan): + gate = report_stage_gate(simple_results, "V1", simple_plan) + d = json.loads(gate.to_json()) + assert "passed" in d + assert "blocking" in d + + def test_format_shows_verdict(self, simple_results, simple_plan): + gate = report_stage_gate(simple_results, "V1", simple_plan) + text = format_stage_gate(gate) + assert "V1" in text + assert "FAIL" in text or "PASS" in text + + def test_format_lists_blocking(self, simple_results, simple_plan): + gate = report_stage_gate(simple_results, "V2", simple_plan) + text = format_stage_gate(gate) + assert "tp_beta" in text or "tp_gamma" in text or "tp_delta" in text + + +# --------------------------------------------------------------------------- +# Report D — regression delta +# --------------------------------------------------------------------------- + +class TestReportRegressionDelta: + def test_returns_delta(self, simple_results): + # All results "old" → same results "new" → no change + delta = report_regression_delta(simple_results, simple_results) + assert isinstance(delta, RegressionDelta) + assert delta.newly_closed == [] + assert delta.newly_failing == [] + + def test_detects_newly_closed(self, simple_results): + old = [_make_result(_make_tp("tp_beta", "V1"), TPStatus.NOT_RUN)] + new = [_make_result(_make_tp("tp_beta", "V1"), TPStatus.CLOSED, + pass_count=1)] + delta = report_regression_delta(new, old) + assert len(delta.newly_closed) == 1 + assert delta.newly_closed[0].testpoint.name == "tp_beta" + + def test_detects_newly_failing(self): + old = [_make_result(_make_tp("tp_a", "V1"), TPStatus.PARTIAL, + pass_count=1, fail_count=1)] + new = [_make_result(_make_tp("tp_a", "V1"), TPStatus.FAILING, + fail_count=5)] + delta = report_regression_delta(new, old) + assert len(delta.newly_failing) == 1 + + def test_to_json_valid(self, simple_results): + delta = report_regression_delta(simple_results, simple_results) + d = json.loads(delta.to_json()) + assert "summary" in d + assert "newly_closed" in d + + def test_format_shows_summary(self, simple_results): + delta = report_regression_delta(simple_results, simple_results) + text = format_regression_delta(delta) + assert "delta" in text.lower() + + +# --------------------------------------------------------------------------- +# Report F — testpoint reliability +# --------------------------------------------------------------------------- + +class TestReportTestpointReliability: + def test_returns_dataclass(self, simple_results): + db = MagicMock() + db.get_test_stats.return_value = None + report = report_testpoint_reliability(simple_results, db) + assert isinstance(report, TestpointReliability) + + def test_uses_stats_when_available(self, simple_results): + stats = MagicMock() + stats.flake_score = 0.75 + stats.pass_count = 3 + stats.fail_count = 2 + + db = MagicMock() + db.get_test_stats.return_value = stats + + report = report_testpoint_reliability(simple_results, db) + # tp_alpha has matched_tests=["tp_alpha"] — should get flake 0.75 + alpha_row = next(r for r in report.rows if r[0] == "tp_alpha") + assert alpha_row[1] == pytest.approx(0.75) + + def test_sorted_by_flake_desc(self, simple_results): + stats_high = MagicMock() + stats_high.flake_score = 0.9 + stats_high.pass_count = 1 + stats_high.fail_count = 5 + + stats_low = MagicMock() + stats_low.flake_score = 0.1 + stats_low.pass_count = 9 + stats_low.fail_count = 1 + + db = MagicMock() + def _get(name): + return stats_high if "beta" in name else stats_low + db.get_test_stats.side_effect = _get + + report = report_testpoint_reliability(simple_results, db) + scores = [r[1] for r in report.rows] + assert scores == sorted(scores, reverse=True) + + def test_to_json_valid(self, simple_results): + db = MagicMock() + db.get_test_stats.return_value = None + report = report_testpoint_reliability(simple_results, db) + d = json.loads(report.to_json()) + assert "rows" in d + + def test_format_renders_table(self, simple_results): + db = MagicMock() + db.get_test_stats.return_value = None + report = report_testpoint_reliability(simple_results, db) + text = format_testpoint_reliability(report) + assert "Testpoint" in text + assert "Flake" in text + + +# --------------------------------------------------------------------------- +# Report G — unexercised covergroups +# --------------------------------------------------------------------------- + +class TestReportUnexercisedCovergroups: + def _make_db_with_cg(self, cg_hit_pct: dict): + """Build a mock db whose covergroup scopes reflect cg_hit_pct.""" + from ucis.scope_type_t import ScopeTypeT + from ucis.cover_type_t import CoverTypeT + + def make_scope(name, pct): + scope = MagicMock() + scope.getScopeName.return_value = name + n_bins = 10 + hit_bins = int(n_bins * pct / 100) + cp = MagicMock() + bins = [] + for i in range(n_bins): + b = MagicMock() + b.getData.return_value = (1 if i < hit_bins else 0,) + bins.append(b) + cp.getCoverItems.return_value = bins + scope.getScopes.return_value = [cp] + return scope + + scopes = [make_scope(name, pct) for name, pct in cg_hit_pct.items()] + db = MagicMock() + db.getScopes.return_value = scopes + return db + + def test_zero_hit_detected(self): + from ucis.ncdb.testplan import CovergroupEntry + plan = Testplan(source_file="x.hjson") + plan.covergroups.append(CovergroupEntry(name="cg_reset")) + db = self._make_db_with_cg({"cg_reset": 0}) + report = report_unexercised_covergroups(db, plan) + assert "cg_reset" in report.zero_hit + + def test_low_hit_detected(self): + from ucis.ncdb.testplan import CovergroupEntry + plan = Testplan(source_file="x.hjson") + plan.covergroups.append(CovergroupEntry(name="cg_x")) + db = self._make_db_with_cg({"cg_x": 30}) + report = report_unexercised_covergroups(db, plan, low_threshold=50.0) + assert any(n == "cg_x" for n, _ in report.low_hit) + + def test_fully_hit_not_reported(self): + from ucis.ncdb.testplan import CovergroupEntry + plan = Testplan(source_file="x.hjson") + plan.covergroups.append(CovergroupEntry(name="cg_full")) + db = self._make_db_with_cg({"cg_full": 100}) + report = report_unexercised_covergroups(db, plan) + assert "cg_full" not in report.zero_hit + assert not any(n == "cg_full" for n, _ in report.low_hit) + + def test_to_json_valid(self): + plan = Testplan(source_file="x.hjson") + db = MagicMock() + db.getScopes.return_value = [] + report = report_unexercised_covergroups(db, plan) + d = json.loads(report.to_json()) + assert "zero_hit" in d + + def test_format_shows_message(self): + plan = Testplan(source_file="x.hjson") + db = MagicMock() + db.getScopes.return_value = [] + report = report_unexercised_covergroups(db, plan) + text = format_unexercised_covergroups(report) + assert len(text) > 0 + + +# --------------------------------------------------------------------------- +# Report I — coverage contribution +# --------------------------------------------------------------------------- + +class TestReportCoverageContribution: + def test_returns_dataclass_empty_on_no_data(self): + db = MagicMock() + db.get_test_coverage_api.return_value = [] + report = report_coverage_contribution(db) + assert isinstance(report, CoverageContribution) + assert report.rows == [] + + def test_rows_sorted_by_unique_desc(self): + db = MagicMock() + db.get_test_coverage_api.return_value = [ + {"test": "t1", "unique_bins": 10, "total_hits": 20, "total_bins": 100}, + {"test": "t2", "unique_bins": 50, "total_hits": 80, "total_bins": 100}, + {"test": "t3", "unique_bins": 30, "total_hits": 40, "total_bins": 100}, + ] + report = report_coverage_contribution(db) + names = [r[0] for r in report.rows] + assert names == ["t2", "t3", "t1"] + + def test_to_json_valid(self): + db = MagicMock() + db.get_test_coverage_api.return_value = [ + {"test": "t1", "unique_bins": 5, "total_hits": 10, "total_bins": 50}, + ] + report = report_coverage_contribution(db) + d = json.loads(report.to_json()) + assert d["rows"][0]["test"] == "t1" + + def test_format_no_data_message(self): + db = MagicMock() + db.get_test_coverage_api.return_value = [] + report = report_coverage_contribution(db) + text = format_coverage_contribution(report) + assert "no contribution data" in text.lower() + + def test_format_renders_table(self): + db = MagicMock() + db.get_test_coverage_api.return_value = [ + {"test": "uart_smoke", "unique_bins": 42, "total_hits": 100, + "total_bins": 200}, + ] + report = report_coverage_contribution(db) + text = format_coverage_contribution(report) + assert "uart_smoke" in text + assert "42" in text + + +# --------------------------------------------------------------------------- +# Report H — test budget (P2) +# --------------------------------------------------------------------------- + +class TestReportTestBudget: + """Tests for report_test_budget / format_test_budget.""" + + def _make_testplan(self, stages): + """Build a Testplan with one testpoint per stage entry.""" + testpoints = [] + for i, (stage, tests) in enumerate(stages): + tp = MagicMock(spec=Testpoint) + tp.name = f"tp_{stage}_{i}" + tp.stage = stage + tp.na = False + tp.tests = tests + testpoints.append(tp) + tp_obj = MagicMock(spec=Testplan) + tp_obj.testpoints = testpoints + return tp_obj + + def _make_db_with_stats(self, stats_map): + """Build a mock db that returns stats from stats_map by test name.""" + db = MagicMock() + def get_stats(name): + if name in stats_map: + m = MagicMock() + m.total_runs, m.mean_cpu_time = stats_map[name] + return m + return None + db.get_test_stats.side_effect = get_stats + return db + + def test_empty_testplan_returns_empty_budget(self): + tp = MagicMock(spec=Testplan) + tp.testpoints = [] + db = MagicMock() + report = report_test_budget(tp, db) + assert isinstance(report, TestBudget) + assert report.rows == [] + assert report.stage_totals == {} + assert report.missing_stats == [] + + def test_single_testpoint_with_stats(self): + tp = self._make_testplan([("V1", ["smoke"])]) + db = self._make_db_with_stats({"smoke": (10, 30.0)}) + report = report_test_budget(tp, db) + assert len(report.rows) == 1 + stage, name, cpu, runs = report.rows[0] + assert stage == "V1" + assert runs == 10 + assert abs(cpu - 30.0) < 0.01 + assert "V1" in report.stage_totals + + def test_missing_stats_tracked(self): + tp = self._make_testplan([("V2", ["unknown_test"])]) + db = self._make_db_with_stats({}) + report = report_test_budget(tp, db) + assert "tp_V2_0" in report.missing_stats + + def test_na_testpoints_skipped(self): + tp_obj = MagicMock(spec=Testpoint) + tp_obj.name = "tp_na" + tp_obj.stage = "V1" + tp_obj.na = True + tp_obj.tests = ["some_test"] + plan = MagicMock(spec=Testplan) + plan.testpoints = [tp_obj] + db = MagicMock() + report = report_test_budget(plan, db) + assert report.rows == [] + + def test_stage_sorting_order(self): + tp = self._make_testplan([("V3", ["t3"]), ("V1", ["t1"]), ("V2", ["t2"])]) + stats = {"t1": (5, 10.0), "t2": (5, 20.0), "t3": (5, 30.0)} + db = self._make_db_with_stats(stats) + report = report_test_budget(tp, db) + stages = [r[0] for r in report.rows] + assert stages.index("V1") < stages.index("V2") < stages.index("V3") + + def test_to_json_valid(self): + tp = self._make_testplan([("V1", ["s1"])]) + db = self._make_db_with_stats({"s1": (3, 15.0)}) + report = report_test_budget(tp, db) + data = json.loads(report.to_json()) + assert "rows" in data + assert "stage_totals" in data + + def test_format_shows_stage_and_testpoint(self): + tp = self._make_testplan([("V1", ["t1"])]) + db = self._make_db_with_stats({"t1": (2, 45.0)}) + report = report_test_budget(tp, db) + text = format_test_budget(report) + assert "V1" in text + assert "tp_V1_0" in text + + def test_format_empty_budget(self): + report = TestBudget(rows=[], stage_totals={}, missing_stats=[]) + text = format_test_budget(report) + assert "no" in text.lower() or text == "" or isinstance(text, str) + + +# --------------------------------------------------------------------------- +# Report L — safety matrix (P2) +# --------------------------------------------------------------------------- + +class TestReportSafetyMatrix: + """Tests for report_safety_matrix / format_safety_matrix.""" + + def _make_result(self, tp_name, status=TPStatus.CLOSED, reqs=None): + tp = MagicMock(spec=Testpoint) + tp.name = tp_name + if reqs is not None: + req_mocks = [] + for r in reqs: + rm = MagicMock() + rm.id = r + rm.desc = f"Requirement {r}" + req_mocks.append(rm) + tp.requirements = req_mocks + else: + tp.requirements = [] + result = MagicMock(spec=TestpointResult) + result.testpoint = tp + result.status = status + return result + + def test_empty_results_returns_empty_matrix(self): + report = report_safety_matrix([]) + assert isinstance(report, SafetyMatrix) + assert report.rows == [] + + def test_result_without_requirements_has_dash_req_id(self): + r = self._make_result("tp_uart", status=TPStatus.CLOSED) + report = report_safety_matrix([r]) + assert len(report.rows) == 1 + req_id, _, tp, status, waived = report.rows[0] + assert req_id == "—" + assert tp == "tp_uart" + assert "CLOSED" in status + + def test_result_with_requirements_expands_rows(self): + r = self._make_result("tp_dma", status=TPStatus.PARTIAL, reqs=["REQ-001", "REQ-002"]) + report = report_safety_matrix([r]) + assert len(report.rows) == 2 + req_ids = {row[0] for row in report.rows} + assert "REQ-001" in req_ids + assert "REQ-002" in req_ids + + def test_waived_flag_false_without_waivers(self): + r = self._make_result("tp_x", reqs=["R1"]) + report = report_safety_matrix([r]) + assert report.rows[0][4] is False + + def test_to_json_valid(self): + r = self._make_result("tp_y", reqs=["R-A"]) + report = report_safety_matrix([r]) + data = json.loads(report.to_json()) + assert "rows" in data + assert data["rows"][0]["req_id"] == "R-A" + + def test_to_csv_header(self): + report = report_safety_matrix([]) + csv = report.to_csv() + assert csv.startswith("req_id,") + + def test_format_shows_req_and_testpoint(self): + r = self._make_result("tp_bus", reqs=["REQ-007"]) + report = report_safety_matrix([r]) + text = format_safety_matrix(report) + assert "REQ-007" in text + assert "tp_bus" in text + + def test_format_multiple_results(self): + results = [ + self._make_result("tp_a", reqs=["R1"]), + self._make_result("tp_b", status=TPStatus.FAILING, reqs=["R2"]), + ] + report = report_safety_matrix(results) + text = format_safety_matrix(report) + assert "R1" in text + assert "R2" in text + assert "tp_a" in text + assert "tp_b" in text + + +# --------------------------------------------------------------------------- +# Report M — seed reliability (P2) +# --------------------------------------------------------------------------- + +class TestReportSeedReliability: + """Tests for report_seed_reliability / format_seed_reliability.""" + + def _make_db_with_history(self, records): + """Build a mock db returning history records.""" + db = MagicMock() + db.query_test_history.return_value = records + return db + + def _rec(self, seed_id, status): + from ucis.ncdb.constants import HIST_STATUS_OK + rec = MagicMock() + rec.seed_id = seed_id + rec.status = status + return rec + + def test_empty_history_returns_empty_rows(self): + db = self._make_db_with_history([]) + report = report_seed_reliability(db, "uart_smoke") + assert isinstance(report, SeedReliability) + assert report.rows == [] + assert report.total_seeds == 0 + + def test_single_seed_all_pass(self): + from ucis.ncdb.constants import HIST_STATUS_OK + recs = [self._rec(42, HIST_STATUS_OK), self._rec(42, HIST_STATUS_OK)] + db = self._make_db_with_history(recs) + report = report_seed_reliability(db, "t1") + assert len(report.rows) == 1 + sid, pc, fc, flake = report.rows[0] + assert sid == 42 + assert pc == 2 + assert fc == 0 + assert flake == 0.0 + + def test_single_seed_all_fail(self): + recs = [self._rec(7, 1), self._rec(7, 1)] # status != HIST_STATUS_OK + db = self._make_db_with_history(recs) + report = report_seed_reliability(db, "t2") + assert len(report.rows) == 1 + sid, pc, fc, flake = report.rows[0] + assert fc == 2 + assert pc == 0 + + def test_flaky_seed_has_nonzero_flake_score(self): + from ucis.ncdb.constants import HIST_STATUS_OK + recs = [self._rec(1, HIST_STATUS_OK), self._rec(1, 1), + self._rec(1, HIST_STATUS_OK), self._rec(1, 1)] + db = self._make_db_with_history(recs) + report = report_seed_reliability(db, "flaky") + assert report.rows[0][3] > 0.0 + + def test_multiple_seeds_sorted_by_fail_count(self): + from ucis.ncdb.constants import HIST_STATUS_OK + recs = [ + self._rec(1, HIST_STATUS_OK), + self._rec(2, 1), self._rec(2, 1), self._rec(2, 1), + ] + db = self._make_db_with_history(recs) + report = report_seed_reliability(db, "t") + assert report.rows[0][0] == 2 # seed 2 has 3 failures, comes first + + def test_db_exception_returns_empty(self): + db = MagicMock() + db.query_test_history.side_effect = Exception("no history table") + report = report_seed_reliability(db, "t") + assert report.rows == [] + + def test_to_json_valid(self): + from ucis.ncdb.constants import HIST_STATUS_OK + recs = [self._rec(10, HIST_STATUS_OK)] + db = self._make_db_with_history(recs) + report = report_seed_reliability(db, "uart_smoke") + data = json.loads(report.to_json()) + assert data["test_name"] == "uart_smoke" + assert "rows" in data + + def test_format_shows_seed_id(self): + from ucis.ncdb.constants import HIST_STATUS_OK + recs = [self._rec(99, HIST_STATUS_OK)] + db = self._make_db_with_history(recs) + report = report_seed_reliability(db, "uart_smoke") + text = format_seed_reliability(report) + assert "99" in text + + def test_format_empty_shows_no_history_message(self): + db = self._make_db_with_history([]) + report = report_seed_reliability(db, "absent_test") + text = format_seed_reliability(report) + assert "absent_test" in text diff --git a/tests/unit/ncdb/test_squash_log.py b/tests/unit/ncdb/test_squash_log.py new file mode 100644 index 0000000..8da8f27 --- /dev/null +++ b/tests/unit/ncdb/test_squash_log.py @@ -0,0 +1,64 @@ +"""Unit tests for SquashLog (squash_log.bin).""" +import pytest +from ucis.ncdb.squash_log import SquashLog + + +def test_append_single_entry(): + log = SquashLog() + log.append(ts=1700000000, policy=1, from_run=0, to_run=9, + num_runs=10, pass_runs=9) + assert log.num_squashes == 1 + entries = log.entries() + e = entries[0] + assert e.ts == 1700000000 + assert e.policy == 1 + assert e.from_run == 0 + assert e.to_run == 9 + assert e.num_runs == 10 + assert e.pass_runs == 9 + + +def test_append_multiple(): + log = SquashLog() + for i in range(5): + log.append(ts=1700000000 + i * 86400, policy=1, + from_run=i * 10, to_run=i * 10 + 9, + num_runs=10, pass_runs=10) + assert log.num_squashes == 5 + entries = log.entries() + assert entries[4].from_run == 40 + + +def test_serialize_deserialize_empty(): + log = SquashLog() + log2 = SquashLog.deserialize(log.serialize()) + assert log2.num_squashes == 0 + + +def test_serialize_deserialize_multiple(): + log = SquashLog() + log.append(ts=1700000000, policy=1, from_run=0, to_run=9, num_runs=10, pass_runs=9) + log.append(ts=1700086400, policy=1, from_run=10, to_run=19, num_runs=10, pass_runs=8) + log.append(ts=1700172800, policy=1, from_run=20, to_run=29, num_runs=10, pass_runs=7) + data = log.serialize() + log2 = SquashLog.deserialize(data) + assert log2.num_squashes == 3 + entries = log2.entries() + assert entries[2].to_run == 29 + assert entries[2].pass_runs == 7 + + +def test_all_policy_values(): + log = SquashLog() + for policy in range(4): + log.append(ts=1700000000 + policy * 86400, policy=policy, + from_run=0, to_run=9, num_runs=10, pass_runs=10 - policy) + entries = log.entries() + policies = [e.policy for e in entries] + assert policies == [0, 1, 2, 3] + + +def test_bad_magic_raises(): + data = b"\x00\x00\x00\x00" + b"\x00" * 8 + with pytest.raises(ValueError, match="Bad magic"): + SquashLog.deserialize(data) diff --git a/tests/unit/ncdb/test_test_registry.py b/tests/unit/ncdb/test_test_registry.py new file mode 100644 index 0000000..5479083 --- /dev/null +++ b/tests/unit/ncdb/test_test_registry.py @@ -0,0 +1,130 @@ +"""Unit tests for TestRegistry (test_registry.bin).""" +import pytest +from ucis.ncdb.test_registry import TestRegistry + + +def test_assign_run_id_increments(): + reg = TestRegistry() + assert reg.assign_run_id() == 0 + assert reg.assign_run_id() == 1 + assert reg.assign_run_id() == 2 + + +def test_assign_run_id_survives_roundtrip(): + reg = TestRegistry() + reg.assign_run_id(); reg.assign_run_id() + reg2 = TestRegistry.deserialize(reg.serialize()) + assert reg2.assign_run_id() == 2 + + +def test_lookup_name_id_new(): + reg = TestRegistry() + nid = reg.lookup_name_id("uart_smoke") + assert nid == 0 + assert reg.num_names == 1 + + +def test_lookup_name_id_existing(): + reg = TestRegistry() + nid1 = reg.lookup_name_id("uart_smoke") + nid2 = reg.lookup_name_id("uart_smoke") + assert nid1 == nid2 + + +def test_name_heap_insertion_order(): + """name_ids are assigned by insertion order and never shift.""" + reg = TestRegistry() + reg.lookup_name_id("zebra") + reg.lookup_name_id("apple") + reg.lookup_name_id("mango") + assert reg.name_for_id(0) == "zebra" + assert reg.name_for_id(1) == "apple" + assert reg.name_for_id(2) == "mango" + + +def test_name_id_stable_after_insert(): + """Inserting a new name does NOT shift any existing name_id.""" + reg = TestRegistry() + id_mango = reg.lookup_name_id("mango") # id 0 + id_apple = reg.lookup_name_id("apple") # id 1 (insertion order) + assert id_mango == 0 + assert id_apple == 1 + # Looking them up again returns the SAME ids + assert reg.lookup_name_id("mango") == 0 + assert reg.lookup_name_id("apple") == 1 + + +def test_lookup_seed_id_new(): + reg = TestRegistry() + sid = reg.lookup_seed_id("12345") + assert sid == 0 + assert reg.num_seeds == 1 + + +def test_lookup_seed_id_existing(): + reg = TestRegistry() + sid1 = reg.lookup_seed_id("99999") + sid2 = reg.lookup_seed_id("99999") + assert sid1 == sid2 + + +def test_seed_id_insertion_order(): + """Seeds are stored in insertion order (not sorted).""" + reg = TestRegistry() + reg.lookup_seed_id("zzz") + reg.lookup_seed_id("aaa") + assert reg.seed_for_id(0) == "zzz" + assert reg.seed_for_id(1) == "aaa" + + +def test_seed_id_roundtrip(): + reg = TestRegistry() + reg.lookup_seed_id("abc123") + reg2 = TestRegistry.deserialize(reg.serialize()) + assert reg2.seed_for_id(0) == "abc123" + + +def test_serialize_deserialize_empty(): + reg = TestRegistry() + reg2 = TestRegistry.deserialize(reg.serialize()) + assert reg2.num_names == 0 + assert reg2.num_seeds == 0 + assert reg2.next_run_id == 0 + + +def test_serialize_deserialize_names_and_seeds(): + reg = TestRegistry(next_run_id=5) + names = ["test_z", "test_a", "test_m"] + for n in names: + reg.lookup_name_id(n) + reg.lookup_seed_id("1"); reg.lookup_seed_id("2") + data = reg.serialize() + reg2 = TestRegistry.deserialize(data) + assert reg2.next_run_id == 5 + assert reg2.num_names == 3 + assert reg2.num_seeds == 2 + # Names and seeds preserved in insertion order + assert reg2.name_for_id(0) == "test_z" # insertion order + assert reg2.name_for_id(1) == "test_a" + assert reg2.name_for_id(2) == "test_m" + assert reg2.seed_for_id(0) == "1" + assert reg2.seed_for_id(1) == "2" + + +def test_serialize_deserialize_1000_names(): + reg = TestRegistry() + for i in range(1000): + reg.lookup_name_id(f"test_{i:04d}") + data = reg.serialize() + reg2 = TestRegistry.deserialize(data) + assert reg2.num_names == 1000 + # Spot-check a few + for i in range(1000): + name = f"test_{i:04d}" + assert reg2.name_for_id(reg2.lookup_name_id(name)) == name + + +def test_bad_magic_raises(): + data = b"\x00\x00\x00\x00" + b"\x00" * 20 # ≥ header size (17 bytes) + with pytest.raises(ValueError, match="Bad magic"): + TestRegistry.deserialize(data) diff --git a/tests/unit/ncdb/test_test_stats.py b/tests/unit/ncdb/test_test_stats.py new file mode 100644 index 0000000..3b2b070 --- /dev/null +++ b/tests/unit/ncdb/test_test_stats.py @@ -0,0 +1,178 @@ +"""Unit tests for TestStatsTable (test_stats.bin).""" +import math +import pytest +from ucis.ncdb.test_stats import TestStatsTable, TestStatsEntry +from ucis.ncdb.constants import HIST_STATUS_OK, HIST_STATUS_FAIL, HIST_STATUS_ERROR + + +def _make_table(*statuses, cpu_times=None): + tbl = TestStatsTable() + for i, s in enumerate(statuses): + cpu = cpu_times[i] if cpu_times else None + tbl.update(0, s, 1700000000 + i * 86400, cpu_time=cpu) + return tbl + + +def test_update_pass(): + tbl = _make_table(HIST_STATUS_OK) + e = tbl.get(0) + assert e.total_runs == 1 + assert e.pass_count == 1 + assert e.fail_count == 0 + assert e.last_green_ts == 1700000000 + + +def test_update_fail(): + tbl = _make_table(HIST_STATUS_FAIL) + e = tbl.get(0) + assert e.fail_count == 1 + assert e.pass_count == 0 + assert e.last_green_ts == 0 + + +def test_update_error(): + tbl = _make_table(HIST_STATUS_ERROR) + e = tbl.get(0) + assert e.error_count == 1 + + +def test_streak_consecutive_passes(): + tbl = _make_table(HIST_STATUS_OK, HIST_STATUS_OK, HIST_STATUS_OK) + assert tbl.get(0).streak == 3 + + +def test_streak_consecutive_fails(): + tbl = _make_table(HIST_STATUS_FAIL, HIST_STATUS_FAIL) + assert tbl.get(0).streak == -2 + + +def test_streak_resets_on_change(): + tbl = _make_table(HIST_STATUS_FAIL, HIST_STATUS_FAIL, HIST_STATUS_OK) + assert tbl.get(0).streak == 1 + + +def test_transition_count(): + tbl = _make_table(HIST_STATUS_OK, HIST_STATUS_FAIL, HIST_STATUS_OK) + e = tbl.get(0) + assert e.transition_count == 2 + + +def test_flake_score_alternating(): + statuses = [HIST_STATUS_OK, HIST_STATUS_FAIL] * 50 + tbl = _make_table(*statuses) + e = tbl.get(0) + # 99 transitions over 99 intervals → score = 1.0 + assert abs(e.flake_score - 1.0) < 0.02 + + +def test_flake_score_stable_all_pass(): + tbl = _make_table(*([HIST_STATUS_OK] * 10)) + assert tbl.get(0).flake_score == 0.0 + + +def test_fail_rate(): + tbl = _make_table(HIST_STATUS_OK, HIST_STATUS_FAIL, HIST_STATUS_FAIL) + e = tbl.get(0) + assert abs(e.fail_rate - 2/3) < 1e-6 + + +def test_welford_mean(): + cpu = [1.0, 2.0, 3.0] + tbl = _make_table(HIST_STATUS_OK, HIST_STATUS_OK, HIST_STATUS_OK, cpu_times=cpu) + assert abs(tbl.get(0).mean_cpu_time - 2.0) < 1e-6 + + +def test_welford_stddev(): + # known variance: [1,2,3] → mean=2, var=2/3, std=sqrt(2/3) + cpu = [1.0, 2.0, 3.0] + tbl = _make_table(HIST_STATUS_OK, HIST_STATUS_OK, HIST_STATUS_OK, cpu_times=cpu) + e = tbl.get(0) + expected_std = math.sqrt(2/3) + assert abs(e.stddev_cpu_time - expected_std) < 1e-5 + + +def test_cusum_detects_change_point(): + """Sustained failures should drive CUSUM past the h=4.0 threshold.""" + tbl = TestStatsTable() + # Start with passes to establish baseline mean ≈ 0 + for i in range(10): + tbl.update(0, HIST_STATUS_OK, 1700000000 + i * 86400) + # Then many consecutive failures + triggered = False + for i in range(10, 30): + tbl.update(0, HIST_STATUS_FAIL, 1700000000 + i * 86400) + # After reset, CUSUM can rise again — just check it doesn't blow up + e = tbl.get(0) + assert e.fail_count == 20 + assert e.cusum_value >= 0.0 # always non-negative + + +def test_grade_score_range(): + statuses = [HIST_STATUS_OK, HIST_STATUS_FAIL, HIST_STATUS_OK] + tbl = _make_table(*statuses) + score = tbl.get(0).grade_score + assert 0.0 <= score <= 1.0 + + +def test_is_broken(): + tbl = _make_table(*([HIST_STATUS_FAIL] * 10)) + assert tbl.get(0).is_broken() + + +def test_is_flaky(): + # Alternating → flake_score close to 1, abs(streak) < 3 + tbl = _make_table(HIST_STATUS_OK, HIST_STATUS_FAIL, HIST_STATUS_OK) + assert tbl.get(0).is_flaky() + + +def test_top_flaky(): + tbl = TestStatsTable() + # name_id 0: alternates (high flake) + for i in range(10): + s = HIST_STATUS_OK if i % 2 == 0 else HIST_STATUS_FAIL + tbl.update(0, s, 1700000000 + i * 86400) + # name_id 1: always passes (zero flake) + for i in range(10): + tbl.update(1, HIST_STATUS_OK, 1700000000 + i * 86400) + top = tbl.top_flaky(1) + assert top[0].name_id == 0 + + +def test_top_failing(): + tbl = TestStatsTable() + for i in range(10): + tbl.update(0, HIST_STATUS_FAIL, 1700000000 + i * 86400) # 100% fail + for i in range(10): + tbl.update(1, HIST_STATUS_OK, 1700000000 + i * 86400) # 0% fail + top = tbl.top_failing(1) + assert top[0].name_id == 0 + + +def test_multiple_name_ids(): + tbl = TestStatsTable() + tbl.update(0, HIST_STATUS_OK, 1700000000) + tbl.update(3, HIST_STATUS_FAIL, 1700000001) + assert tbl.get(0).pass_count == 1 + assert tbl.get(1) is not None # auto-created empty + assert tbl.get(3).fail_count == 1 + + +def test_serialize_deserialize(): + tbl = TestStatsTable() + for i in range(5): + s = HIST_STATUS_OK if i % 2 == 0 else HIST_STATUS_FAIL + tbl.update(0, s, 1700000000 + i * 86400, cpu_time=float(i + 1)) + data = tbl.serialize() + tbl2 = TestStatsTable.deserialize(data) + e = tbl.get(0) + e2 = tbl2.get(0) + assert e2.total_runs == e.total_runs + assert e2.pass_count == e.pass_count + assert abs(e2.flake_score - e.flake_score) < 1e-5 + assert abs(e2.mean_cpu_time - e.mean_cpu_time) < 1e-4 + + +def test_bad_magic_raises(): + data = b"\x00\x00\x00\x00" + b"\x00" * 8 + with pytest.raises(ValueError, match="Bad magic"): + TestStatsTable.deserialize(data) diff --git a/tests/unit/ncdb/test_testplan.py b/tests/unit/ncdb/test_testplan.py new file mode 100644 index 0000000..a926f5e --- /dev/null +++ b/tests/unit/ncdb/test_testplan.py @@ -0,0 +1,228 @@ +"""Unit tests for src/ucis/ncdb/testplan.py.""" +from __future__ import annotations + +import json + +import pytest + +from ucis.ncdb.testplan import ( + CovergroupEntry, + RequirementLink, + Testplan, + Testpoint, + get_testplan, + set_testplan, +) + + +# ── construction helpers ────────────────────────────────────────────────────── + +def _make_plan() -> Testplan: + tp = Testplan(source_file="uart.hjson") + tp.add_testpoint(Testpoint(name="uart_reset", stage="V1", + tests=["uart_smoke", "uart_init_*"])) + tp.add_testpoint(Testpoint(name="uart_loopback", stage="V2", + tests=["uart_loopback_42", "uart_loopback_99"])) + tp.add_testpoint(Testpoint(name="uart_na", stage="V2", + na=True, tests=[])) + tp.covergroups.append(CovergroupEntry(name="cg_uart_reset", + desc="Reset coverage")) + return tp + + +# ── basic construction ──────────────────────────────────────────────────────── + +class TestTestplanConstruction: + def test_empty_plan(self): + plan = Testplan() + assert plan.format_version == 1 + assert plan.testpoints == [] + assert plan.covergroups == [] + + def test_add_testpoint_invalidates_index(self): + plan = Testplan() + plan._indexed = True # simulate already indexed + plan.add_testpoint(Testpoint(name="t1", stage="V1")) + assert plan._indexed is False + + def test_stages_ordered(self): + plan = _make_plan() + assert plan.stages() == ["V1", "V2"] + + def test_stages_custom_sorted_last(self): + plan = Testplan() + plan.add_testpoint(Testpoint(name="a", stage="V3")) + plan.add_testpoint(Testpoint(name="b", stage="V1")) + plan.add_testpoint(Testpoint(name="c", stage="CUSTOM")) + assert plan.stages() == ["V1", "V3", "CUSTOM"] + + def test_testpoints_for_stage(self): + plan = _make_plan() + v1 = plan.testpointsForStage("V1") + assert len(v1) == 1 + assert v1[0].name == "uart_reset" + + +# ── lookup ──────────────────────────────────────────────────────────────────── + +class TestTestpointLookup: + def test_get_testpoint_by_name(self): + plan = _make_plan() + tp = plan.getTestpoint("uart_reset") + assert tp is not None + assert tp.name == "uart_reset" + + def test_get_testpoint_unknown(self): + plan = _make_plan() + assert plan.getTestpoint("nonexistent") is None + + def test_testpoint_for_test_exact(self): + plan = _make_plan() + tp = plan.testpointForTest("uart_smoke") + assert tp is not None + assert tp.name == "uart_reset" + + def test_testpoint_for_test_seed_strip(self): + plan = _make_plan() + # "uart_smoke_12345" → strip → "uart_smoke" → exact + tp = plan.testpointForTest("uart_smoke_12345") + assert tp is not None + assert tp.name == "uart_reset" + + def test_testpoint_for_test_wildcard(self): + plan = _make_plan() + # "uart_init_*" matches "uart_init_fast" + tp = plan.testpointForTest("uart_init_fast") + assert tp is not None + assert tp.name == "uart_reset" + + def test_testpoint_for_test_no_match(self): + plan = _make_plan() + assert plan.testpointForTest("spi_whatever") is None + + def test_testpoint_for_test_na_testpoint(self): + plan = _make_plan() + # na testpoint has no tests so nothing maps to it + tp = plan.getTestpoint("uart_na") + assert tp is not None + assert tp.na is True + assert plan.testpointForTest("uart_na") is None + + def test_wildcard_does_not_match_seed_strip_candidate(self): + # Seed-strip (strategy 2) has higher priority than wildcard (strategy 3) + plan = Testplan() + plan.add_testpoint(Testpoint(name="exact", stage="V1", + tests=["foo_bar"])) # exact of stripped + plan.add_testpoint(Testpoint(name="wild", stage="V1", + tests=["foo_*"])) # wildcard + tp = plan.testpointForTest("foo_bar_42") # strip→foo_bar wins + assert tp.name == "exact" + + +# ── serialization round-trip ────────────────────────────────────────────────── + +class TestTestplanSerialization: + def test_to_dict_keys(self): + plan = _make_plan() + d = plan.to_dict() + assert "format_version" in d + assert "testpoints" in d + assert "covergroups" in d + + def test_serialize_is_compact_json(self): + plan = _make_plan() + data = plan.serialize() + assert isinstance(data, bytes) + # compact separators: no space after ',' or ':' + text = data.decode() + assert ", " not in text + assert ": " not in text + + def test_roundtrip_all_fields(self): + plan = Testplan(format_version=1, source_file="x.hjson", + import_timestamp="2024-01-01T00:00:00+00:00") + plan.add_testpoint(Testpoint( + name="tp1", stage="V2", desc="desc", + tests=["t1", "t_*"], tags=["tag1"], + na=False, source_template="t_{x}", + requirements=[RequirementLink(system="ALM", project="P", + item_id="REQ-1", url="http://x")], + )) + plan.covergroups.append(CovergroupEntry(name="cg1", desc="cg desc")) + data = plan.serialize() + plan2 = Testplan.from_bytes(data) + assert plan2.format_version == 1 + assert plan2.source_file == "x.hjson" + assert plan2.import_timestamp == "2024-01-01T00:00:00+00:00" + assert len(plan2.testpoints) == 1 + tp2 = plan2.testpoints[0] + assert tp2.name == "tp1" + assert tp2.stage == "V2" + assert tp2.tests == ["t1", "t_*"] + assert tp2.tags == ["tag1"] + assert tp2.source_template == "t_{x}" + assert len(tp2.requirements) == 1 + req = tp2.requirements[0] + assert req.system == "ALM" + assert req.item_id == "REQ-1" + assert len(plan2.covergroups) == 1 + + def test_from_dict_missing_optional_fields(self): + d = {"testpoints": [{"name": "tp", "stage": "V1"}]} + plan = Testplan.from_dict(d) + assert plan.format_version == 1 + assert plan.source_file == "" + tp = plan.testpoints[0] + assert tp.desc == "" + assert tp.tests == [] + assert tp.na is False + + def test_from_bytes_roundtrip(self): + plan = _make_plan() + plan2 = Testplan.from_bytes(plan.serialize()) + assert len(plan2.testpoints) == len(plan.testpoints) + assert plan2.covergroups[0].name == "cg_uart_reset" + + def test_save_and_load(self, tmp_path): + plan = _make_plan() + path = str(tmp_path / "plan.json") + plan.save(path) + plan2 = Testplan.load(path) + assert plan2.source_file == "uart.hjson" + assert len(plan2.testpoints) == 3 + + +# ── stamp_import_time ───────────────────────────────────────────────────────── + +class TestStampImportTime: + def test_sets_non_empty_timestamp(self): + plan = Testplan() + assert plan.import_timestamp == "" + plan.stamp_import_time() + assert plan.import_timestamp != "" + assert "T" in plan.import_timestamp # ISO-8601 format + + +# ── module-level helpers ────────────────────────────────────────────────────── + +class TestModuleHelpers: + def test_get_testplan_from_duck_typed_db(self): + class FakeDB: + def getTestplan(self): + return "my_plan" + assert get_testplan(FakeDB()) == "my_plan" + + def test_get_testplan_returns_none_without_method(self): + assert get_testplan(object()) is None + + def test_set_testplan_duck_typed(self): + stored = [] + class FakeDB: + def setTestplan(self, tp): + stored.append(tp) + set_testplan(FakeDB(), "plan_obj") + assert stored == ["plan_obj"] + + def test_set_testplan_raises_without_method(self): + with pytest.raises(TypeError): + set_testplan(object(), "plan") diff --git a/tests/unit/ncdb/test_testplan_closure.py b/tests/unit/ncdb/test_testplan_closure.py new file mode 100644 index 0000000..8cf953b --- /dev/null +++ b/tests/unit/ncdb/test_testplan_closure.py @@ -0,0 +1,206 @@ +"""Unit tests for src/ucis/ncdb/testplan_closure.py.""" +from __future__ import annotations + +import pytest + +from ucis.ncdb.testplan import Testplan, Testpoint +from ucis.ncdb.testplan_closure import ( + TPStatus, + TestpointResult, + compute_closure, + stage_gate_status, +) + + +# ── stub DB ─────────────────────────────────────────────────────────────────── + +class _FakeStats: + def __init__(self, pass_count, fail_count): + self.pass_count = pass_count + self.fail_count = fail_count + + +class _FakeRegistry: + def __init__(self, names): + self._names = names + + +class _FakeDB: + """Minimal NcdbUCIS-like db using the v2 history path.""" + + def __init__(self, runs: dict): + """runs: {name: (pass_count, fail_count)}""" + names = list(runs.keys()) + self._test_registry = _FakeRegistry(names) + self._test_stats = _FakeStatsTable(runs) + + def historyNodes(self, _kind): + return [] + + +class _FakeStatsTable: + def __init__(self, runs): + self._runs = runs + self._names = list(runs.keys()) + + def get(self, nid): + name = self._names[nid] + p, f = self._runs[name] + return _FakeStats(p, f) + + +def _db_with(**kwargs): + """Helper: _db_with(uart_smoke=(3,1)) → fake db.""" + return _FakeDB(kwargs) + + +# ── plan helpers ────────────────────────────────────────────────────────────── + +def _make_plan(*testpoints) -> Testplan: + plan = Testplan() + for tp in testpoints: + plan.add_testpoint(tp) + return plan + + +# ── compute_closure ─────────────────────────────────────────────────────────── + +class TestComputeClosure: + def test_closed_when_all_pass(self): + plan = _make_plan(Testpoint(name="tp", stage="V1", tests=["uart_smoke"])) + db = _db_with(uart_smoke=(5, 0)) + results = compute_closure(plan, db) + assert results[0].status == TPStatus.CLOSED + + def test_failing_when_all_fail(self): + plan = _make_plan(Testpoint(name="tp", stage="V1", tests=["t"])) + db = _db_with(t=(0, 3)) + results = compute_closure(plan, db) + assert results[0].status == TPStatus.FAILING + + def test_partial_when_mixed(self): + plan = _make_plan(Testpoint(name="tp", stage="V1", tests=["t"])) + db = _db_with(t=(2, 1)) + results = compute_closure(plan, db) + assert results[0].status == TPStatus.PARTIAL + + def test_not_run_when_absent(self): + plan = _make_plan(Testpoint(name="tp", stage="V1", tests=["t"])) + db = _db_with() + results = compute_closure(plan, db) + assert results[0].status == TPStatus.NOT_RUN + + def test_na_testpoint(self): + plan = _make_plan(Testpoint(name="tp", stage="V1", na=True)) + db = _db_with() + results = compute_closure(plan, db) + assert results[0].status == TPStatus.NA + + def test_unimplemented_empty_tests(self): + plan = _make_plan(Testpoint(name="tp", stage="V1", tests=[])) + db = _db_with() + results = compute_closure(plan, db) + assert results[0].status == TPStatus.UNIMPLEMENTED + + def test_wildcard_pattern_matches(self): + plan = _make_plan(Testpoint(name="tp", stage="V1", tests=["uart_*"])) + db = _db_with(uart_loopback=(3, 0), uart_reset=(2, 0)) + results = compute_closure(plan, db) + assert results[0].status == TPStatus.CLOSED + assert len(results[0].matched_tests) == 2 + + def test_seed_strip_matches(self): + plan = _make_plan(Testpoint(name="tp", stage="V1", tests=["uart_smoke_42"])) + db = _db_with(uart_smoke=(4, 0)) # DB has stripped name + results = compute_closure(plan, db) + assert results[0].status == TPStatus.CLOSED + + def test_pass_fail_counts_accurate(self): + plan = _make_plan(Testpoint(name="tp", stage="V1", + tests=["a", "b"])) + db = _db_with(a=(3, 1), b=(2, 2)) + results = compute_closure(plan, db) + r = results[0] + assert r.pass_count == 5 + assert r.fail_count == 3 + + def test_multiple_testpoints_independent(self): + plan = _make_plan( + Testpoint(name="tp1", stage="V1", tests=["a"]), + Testpoint(name="tp2", stage="V2", tests=["b"]), + ) + db = _db_with(a=(5, 0), b=(0, 2)) + results = compute_closure(plan, db) + assert results[0].status == TPStatus.CLOSED + assert results[1].status == TPStatus.FAILING + + def test_result_order_matches_testplan(self): + plan = _make_plan( + Testpoint(name="first", stage="V1", tests=["x"]), + Testpoint(name="second", stage="V1", tests=["y"]), + ) + db = _db_with(x=(1, 0), y=(1, 0)) + results = compute_closure(plan, db) + assert results[0].testpoint.name == "first" + assert results[1].testpoint.name == "second" + + +# ── stage_gate_status ───────────────────────────────────────────────────────── + +class TestStageGateStatus: + def _plan_and_results(self, statuses: dict) -> tuple: + plan = Testplan() + for name, (stage, st) in statuses.items(): + plan.add_testpoint(Testpoint(name=name, stage=stage, + tests=["t"] if st != TPStatus.UNIMPLEMENTED else [])) + results = [] + for tp in plan.testpoints: + st = statuses[tp.name][1] + results.append(TestpointResult(tp, st, [], 1 if st == TPStatus.CLOSED else 0, 0)) + return plan, results + + def test_gate_passes_all_closed(self): + plan, results = self._plan_and_results({ + "v1_tp": ("V1", TPStatus.CLOSED), + "v2_tp": ("V2", TPStatus.CLOSED), + }) + gate = stage_gate_status(results, "V2", plan) + assert gate["passed"] is True + assert gate["blocking"] == [] + + def test_gate_fails_if_lower_stage_not_closed(self): + plan, results = self._plan_and_results({ + "v1_tp": ("V1", TPStatus.FAILING), + "v2_tp": ("V2", TPStatus.CLOSED), + }) + gate = stage_gate_status(results, "V2", plan) + assert gate["passed"] is False + assert any(r.testpoint.name == "v1_tp" for r in gate["blocking"]) + + def test_gate_passes_na_testpoints_ignored(self): + plan, results = self._plan_and_results({ + "v1_tp": ("V1", TPStatus.CLOSED), + "v1_na": ("V1", TPStatus.NA), + }) + gate = stage_gate_status(results, "V1", plan) + assert gate["passed"] is True + + def test_gate_ignores_higher_stage(self): + plan, results = self._plan_and_results({ + "v1_tp": ("V1", TPStatus.CLOSED), + "v3_tp": ("V3", TPStatus.FAILING), # V3 not evaluated for V2 gate + }) + gate = stage_gate_status(results, "V2", plan) + assert gate["passed"] is True + + def test_message_includes_stage(self): + plan, results = self._plan_and_results({ + "tp": ("V1", TPStatus.CLOSED), + }) + gate = stage_gate_status(results, "V1", plan) + assert "V1" in gate["message"] + + def test_gate_returns_stage_key(self): + plan, results = self._plan_and_results({"tp": ("V1", TPStatus.CLOSED)}) + gate = stage_gate_status(results, "V1", plan) + assert gate["stage"] == "V1" diff --git a/tests/unit/ncdb/test_testplan_export.py b/tests/unit/ncdb/test_testplan_export.py new file mode 100644 index 0000000..fb18829 --- /dev/null +++ b/tests/unit/ncdb/test_testplan_export.py @@ -0,0 +1,229 @@ +"""Unit tests for ucis.ncdb.testplan_export.""" + +import io +import json +import os +import tempfile +from xml.etree import ElementTree as ET + +import pytest + +from ucis.ncdb.testplan import Testplan, Testpoint +from ucis.ncdb.testplan_closure import TPStatus, TestpointResult +from ucis.ncdb.testplan_export import ( + export_junit_xml, + export_github_annotations, + export_summary_markdown, +) +from ucis.ncdb.reports import report_stage_gate + + +# --------------------------------------------------------------------------- +# Fixtures +# --------------------------------------------------------------------------- + +def _make_tp(name, stage="V1", desc=""): + return Testpoint(name=name, stage=stage, tests=[name], desc=desc) + + +def _make_result(tp, status, pass_count=0, fail_count=0, matched=None): + return TestpointResult( + testpoint=tp, + status=status, + matched_tests=matched or [], + pass_count=pass_count, + fail_count=fail_count, + ) + + +@pytest.fixture +def mixed_results(): + return [ + _make_result(_make_tp("tp_pass", "V1"), TPStatus.CLOSED, + pass_count=5, matched=["tp_pass"]), + _make_result(_make_tp("tp_fail", "V1"), TPStatus.FAILING, + fail_count=3, matched=["tp_fail"]), + _make_result(_make_tp("tp_skip", "V2"), TPStatus.NOT_RUN), + _make_result(_make_tp("tp_partial", "V2"), TPStatus.PARTIAL, + pass_count=2, fail_count=2, matched=["tp_partial"]), + _make_result(_make_tp("tp_na", "V1"), TPStatus.NA), + ] + + +@pytest.fixture +def simple_plan(): + plan = Testplan(source_file="test.hjson") + for name, stage in [("tp_pass", "V1"), ("tp_fail", "V1"), + ("tp_skip", "V2"), ("tp_partial", "V2"), + ("tp_na", "V1")]: + plan.add_testpoint(Testpoint(name=name, stage=stage, tests=[name])) + return plan + + +# --------------------------------------------------------------------------- +# JUnit XML +# --------------------------------------------------------------------------- + +class TestExportJunitXml: + def test_creates_file(self, mixed_results, tmp_path): + out = str(tmp_path / "results.xml") + export_junit_xml(mixed_results, out) + assert os.path.exists(out) + + def test_valid_xml(self, mixed_results, tmp_path): + out = str(tmp_path / "results.xml") + export_junit_xml(mixed_results, out) + tree = ET.parse(out) + root = tree.getroot() + assert root.tag == "testsuite" + + def test_testcase_count(self, mixed_results, tmp_path): + out = str(tmp_path / "results.xml") + export_junit_xml(mixed_results, out) + tree = ET.parse(out) + cases = tree.findall(".//testcase") + assert len(cases) == len(mixed_results) + + def test_failure_element_for_failing(self, mixed_results, tmp_path): + out = str(tmp_path / "results.xml") + export_junit_xml(mixed_results, out) + tree = ET.parse(out) + fail_tc = next( + tc for tc in tree.findall(".//testcase") + if tc.attrib["name"] == "tp_fail" + ) + assert fail_tc.find("failure") is not None + + def test_skipped_element_for_not_run(self, mixed_results, tmp_path): + out = str(tmp_path / "results.xml") + export_junit_xml(mixed_results, out) + tree = ET.parse(out) + skip_tc = next( + tc for tc in tree.findall(".//testcase") + if tc.attrib["name"] == "tp_skip" + ) + assert skip_tc.find("skipped") is not None + + def test_no_failure_for_closed(self, mixed_results, tmp_path): + out = str(tmp_path / "results.xml") + export_junit_xml(mixed_results, out) + tree = ET.parse(out) + pass_tc = next( + tc for tc in tree.findall(".//testcase") + if tc.attrib["name"] == "tp_pass" + ) + assert pass_tc.find("failure") is None + assert pass_tc.find("skipped") is None + + def test_suite_name_attribute(self, mixed_results, tmp_path): + out = str(tmp_path / "results.xml") + export_junit_xml(mixed_results, out, suite_name="my_suite") + tree = ET.parse(out) + assert tree.getroot().attrib["name"] == "my_suite" + + def test_failure_count_in_suite(self, mixed_results, tmp_path): + out = str(tmp_path / "results.xml") + export_junit_xml(mixed_results, out) + tree = ET.parse(out) + # tp_fail (FAILING) + tp_partial (PARTIAL) = 2 failures + assert tree.getroot().attrib["failures"] == "2" + + def test_partial_gets_failure_element(self, mixed_results, tmp_path): + out = str(tmp_path / "results.xml") + export_junit_xml(mixed_results, out) + tree = ET.parse(out) + partial_tc = next( + tc for tc in tree.findall(".//testcase") + if tc.attrib["name"] == "tp_partial" + ) + assert partial_tc.find("failure") is not None + + +# --------------------------------------------------------------------------- +# GitHub Annotations +# --------------------------------------------------------------------------- + +class TestExportGithubAnnotations: + def test_error_for_failing(self, mixed_results): + buf = io.StringIO() + export_github_annotations(mixed_results, output=buf) + text = buf.getvalue() + assert "::error" in text + assert "tp_fail" in text + + def test_warning_for_not_run(self, mixed_results): + buf = io.StringIO() + export_github_annotations(mixed_results, output=buf) + text = buf.getvalue() + assert "::warning" in text + assert "tp_skip" in text + + def test_warning_for_partial(self, mixed_results): + buf = io.StringIO() + export_github_annotations(mixed_results, output=buf) + text = buf.getvalue() + assert "tp_partial" in text + + def test_no_output_for_closed(self, mixed_results): + buf = io.StringIO() + export_github_annotations(mixed_results, output=buf) + text = buf.getvalue() + # tp_pass (CLOSED) should NOT produce any annotation + lines_with_pass = [l for l in text.splitlines() if "tp_pass" in l] + assert lines_with_pass == [] + + def test_no_output_for_na(self, mixed_results): + buf = io.StringIO() + export_github_annotations(mixed_results, output=buf) + text = buf.getvalue() + lines_with_na = [l for l in text.splitlines() if "tp_na" in l] + assert lines_with_na == [] + + def test_custom_file_field(self, mixed_results): + buf = io.StringIO() + export_github_annotations(mixed_results, file="uart.hjson", output=buf) + text = buf.getvalue() + assert "file=uart.hjson" in text + + +# --------------------------------------------------------------------------- +# Markdown summary +# --------------------------------------------------------------------------- + +class TestExportSummaryMarkdown: + def test_returns_string(self, mixed_results): + md = export_summary_markdown(mixed_results) + assert isinstance(md, str) + + def test_contains_headline(self, mixed_results): + md = export_summary_markdown(mixed_results) + assert "## Testplan Closure Report" in md + + def test_contains_stage_table(self, mixed_results): + md = export_summary_markdown(mixed_results) + assert "| Stage" in md + assert "| V1" in md or "V1" in md + + def test_contains_testpoint_table(self, mixed_results): + md = export_summary_markdown(mixed_results) + assert "| Testpoint" in md + assert "tp_pass" in md + + def test_gate_verdict_included(self, mixed_results, simple_plan): + gate = report_stage_gate(mixed_results, "V1", simple_plan) + md = export_summary_markdown(mixed_results, stage_gate=gate) + assert "Stage gate" in md + assert "V1" in md + + def test_blocking_section_when_gate_fails(self, mixed_results, simple_plan): + gate = report_stage_gate(mixed_results, "V2", simple_plan) + md = export_summary_markdown(mixed_results, stage_gate=gate) + if not gate.passed: + assert "Blocking testpoints" in md + + def test_na_testpoints_excluded_from_table(self, mixed_results): + md = export_summary_markdown(mixed_results) + # tp_na (N/A) and UNIMPLEMENTED should not appear in testpoint table rows + # The heading line "| Testpoint" is present but tp_na row should not be + rows = [l for l in md.splitlines() if "tp_na" in l and "|" in l] + assert rows == [] diff --git a/tests/unit/ncdb/test_testplan_hjson.py b/tests/unit/ncdb/test_testplan_hjson.py new file mode 100644 index 0000000..d1b54c6 --- /dev/null +++ b/tests/unit/ncdb/test_testplan_hjson.py @@ -0,0 +1,180 @@ +"""Unit tests for src/ucis/ncdb/testplan_hjson.py.""" +from __future__ import annotations + +import json +import os +import pytest + +from ucis.ncdb.testplan_hjson import ( + _expand_template, + _expand_tests, + import_hjson, +) +from ucis.ncdb.testplan import Testplan + + +# ── _expand_template ────────────────────────────────────────────────────────── + +class TestExpandTemplate: + def test_no_placeholders(self): + assert _expand_template("uart_smoke", {}) == ["uart_smoke"] + + def test_scalar_substitution(self): + assert _expand_template("test_{baud}", {"baud": "9600"}) == ["test_9600"] + + def test_list_substitution_cartesian(self): + result = _expand_template("test_{baud}", {"baud": ["9600", "115200"]}) + assert result == ["test_9600", "test_115200"] + + def test_multiple_keys_cartesian_product(self): + result = _expand_template("{mod}_{type}_test", + {"mod": ["a", "b"], "type": ["x", "y"]}) + assert len(result) == 4 + assert "a_x_test" in result + assert "b_y_test" in result + + def test_unknown_key_left_verbatim(self): + result = _expand_template("test_{unknown}", {}) + assert result == ["test_{unknown}"] + + def test_mixed_known_unknown(self): + result = _expand_template("{a}_{b}", {"a": "hello"}) + assert result == ["hello_{b}"] + + def test_duplicate_key_in_template(self): + # {a} appears twice — should expand both consistently + result = _expand_template("{a}_{a}", {"a": ["x", "y"]}) + assert set(result) == {"x_x", "y_y"} + + def test_no_subs_empty_dict(self): + result = _expand_template("{x}", {}) + assert result == ["{x}"] + + +# ── _expand_tests ───────────────────────────────────────────────────────────── + +class TestExpandTests: + def test_flat_list_no_expansion(self): + result = _expand_tests(["a", "b", "c"], {}) + assert result == ["a", "b", "c"] + + def test_with_expansion(self): + result = _expand_tests(["{m}_test"], {"m": ["u", "v"]}) + assert result == ["u_test", "v_test"] + + def test_mixed_plain_and_template(self): + result = _expand_tests(["plain", "{x}_test"], {"x": ["a", "b"]}) + assert result == ["plain", "a_test", "b_test"] + + +# ── import_hjson ────────────────────────────────────────────────────────────── + +def _write_hjson(tmp_path, data: dict) -> str: + path = str(tmp_path / "plan.json") + with open(path, "w") as f: + json.dump(data, f) + return path + + +class TestImportHjson: + def test_basic_import(self, tmp_path): + path = _write_hjson(tmp_path, { + "testpoints": [ + {"name": "uart_reset", "stage": "V1", + "tests": ["uart_smoke", "uart_init"]}, + ], + }) + plan = import_hjson(path) + assert isinstance(plan, Testplan) + assert len(plan.testpoints) == 1 + tp = plan.testpoints[0] + assert tp.name == "uart_reset" + assert tp.stage == "V1" + assert tp.tests == ["uart_smoke", "uart_init"] + assert tp.na is False + + def test_na_testpoint(self, tmp_path): + path = _write_hjson(tmp_path, { + "testpoints": [ + {"name": "not_impl", "stage": "V2", "tests": ["N/A"]}, + ], + }) + plan = import_hjson(path) + tp = plan.testpoints[0] + assert tp.na is True + assert tp.tests == [] + + def test_wildcard_expansion(self, tmp_path): + path = _write_hjson(tmp_path, { + "testpoints": [ + {"name": "tp", "stage": "V1", + "tests": ["{baud}_test"]}, + ], + }) + plan = import_hjson(path, substitutions={"baud": ["9600", "115200"]}) + assert plan.testpoints[0].tests == ["9600_test", "115200_test"] + + def test_cartesian_expansion(self, tmp_path): + path = _write_hjson(tmp_path, { + "testpoints": [ + {"name": "tp", "stage": "V1", + "tests": ["{mod}_{intf}_test"]}, + ], + }) + plan = import_hjson(path, substitutions={ + "mod": ["uart", "spi"], + "intf": ["a", "b"], + }) + assert len(plan.testpoints[0].tests) == 4 + + def test_source_file_set(self, tmp_path): + path = _write_hjson(tmp_path, {"testpoints": []}) + plan = import_hjson(path) + assert os.path.isabs(plan.source_file) + assert plan.source_file.endswith(".json") + + def test_covergroups_imported(self, tmp_path): + path = _write_hjson(tmp_path, { + "testpoints": [], + "covergroups": [ + {"name": "cg_reset", "desc": "Reset coverage"}, + ], + }) + plan = import_hjson(path) + assert len(plan.covergroups) == 1 + assert plan.covergroups[0].name == "cg_reset" + + def test_optional_fields_defaults(self, tmp_path): + path = _write_hjson(tmp_path, { + "testpoints": [{"name": "tp", "stage": "V1", "tests": ["t"]}], + }) + plan = import_hjson(path) + tp = plan.testpoints[0] + assert tp.desc == "" + assert tp.tags == [] + + def test_tags_preserved(self, tmp_path): + path = _write_hjson(tmp_path, { + "testpoints": [ + {"name": "tp", "stage": "V1", "tests": ["t"], + "tags": ["smoke", "regression"]}, + ], + }) + plan = import_hjson(path, {}) + assert plan.testpoints[0].tags == ["smoke", "regression"] + + def test_source_template_recorded(self, tmp_path): + path = _write_hjson(tmp_path, { + "testpoints": [ + {"name": "tp", "stage": "V1", "tests": ["{x}_test"]}, + ], + }) + plan = import_hjson(path, {"x": ["a", "b"]}) + # source_template captures the original template + assert "{x}_test" in plan.testpoints[0].source_template + + def test_empty_testplan(self, tmp_path): + path = _write_hjson(tmp_path, {}) + plan = import_hjson(path) + assert plan.testpoints == [] + assert plan.covergroups == [] diff --git a/tests/unit/ncdb/test_waivers.py b/tests/unit/ncdb/test_waivers.py new file mode 100644 index 0000000..a702da6 --- /dev/null +++ b/tests/unit/ncdb/test_waivers.py @@ -0,0 +1,140 @@ +"""Unit tests for src/ucis/ncdb/waivers.py.""" +from __future__ import annotations + +import pytest + +from ucis.ncdb.waivers import Waiver, WaiverSet, _glob_match + + +# ── _glob_match ─────────────────────────────────────────────────────────────── + +class TestGlobMatch: + def test_exact_match(self): + assert _glob_match("foo/bar", "foo/bar") + + def test_exact_no_match(self): + assert not _glob_match("foo/bar", "foo/baz") + + def test_single_star_matches_segment(self): + assert _glob_match("foo/*/baz", "foo/bar/baz") + + def test_single_star_does_not_cross_slash(self): + assert not _glob_match("foo/*/baz", "foo/x/y/baz") + + def test_double_star_crosses_segments(self): + assert _glob_match("foo/**/baz", "foo/x/y/baz") + + def test_double_star_matches_zero_segments(self): + assert _glob_match("foo/**/baz", "foo/baz") + + def test_trailing_single_star(self): + assert _glob_match("scope/*", "scope/uart") + assert not _glob_match("scope/*", "scope/uart/sub") + + def test_leading_double_star(self): + assert _glob_match("**/uart", "top/mid/uart") + + +# ── Waiver ──────────────────────────────────────────────────────────────────── + +class TestWaiver: + def test_matches_exact_scope(self): + w = Waiver(id="W1", scope_pattern="top/uart", bin_pattern="*") + assert w.matches("top/uart") + + def test_no_match_wrong_scope(self): + w = Waiver(id="W1", scope_pattern="top/uart", bin_pattern="*") + assert not w.matches("top/spi") + + def test_matches_with_bin_wildcard(self): + w = Waiver(id="W1", scope_pattern="top/*", bin_pattern="*") + assert w.matches("top/uart", "some_bin") + + def test_matches_specific_bin(self): + w = Waiver(id="W1", scope_pattern="top/uart", bin_pattern="reset_bin") + assert w.matches("top/uart", "reset_bin") + assert not w.matches("top/uart", "other_bin") + + def test_glob_scope_pattern(self): + w = Waiver(id="W1", scope_pattern="**/uart", bin_pattern="*") + assert w.matches("top/mid/uart") + assert not w.matches("top/spi") + + +# ── WaiverSet ───────────────────────────────────────────────────────────────── + +class TestWaiverSet: + def test_empty_no_match(self): + ws = WaiverSet() + assert not ws.matches_scope("any/scope") + + def test_add_and_match(self): + ws = WaiverSet() + ws.add(Waiver(id="W1", scope_pattern="top/uart")) + assert ws.matches_scope("top/uart") + + def test_get_by_id(self): + ws = WaiverSet() + ws.add(Waiver(id="W1", scope_pattern="a")) + ws.add(Waiver(id="W2", scope_pattern="b")) + assert ws.get("W1").scope_pattern == "a" + assert ws.get("W2").scope_pattern == "b" + assert ws.get("W3") is None + + def test_active_at_excludes_expired(self): + ws = WaiverSet([ + Waiver(id="W1", scope_pattern="a", expires_at="2024-01-01T00:00:00"), + Waiver(id="W2", scope_pattern="b", expires_at="2030-01-01T00:00:00"), + ]) + active = ws.active_at("2025-06-01T00:00:00") + assert len(active.waivers) == 1 + assert active.waivers[0].id == "W2" + + def test_active_at_includes_never_expires(self): + ws = WaiverSet([ + Waiver(id="W1", scope_pattern="a", expires_at=""), + ]) + active = ws.active_at("9999-12-31T00:00:00") + assert len(active.waivers) == 1 + + def test_active_at_excludes_revoked(self): + ws = WaiverSet([ + Waiver(id="W1", scope_pattern="a", status="revoked"), + ]) + active = ws.active_at("2025-01-01T00:00:00") + assert len(active.waivers) == 0 + + def test_serialize_roundtrip(self): + ws = WaiverSet([ + Waiver(id="W1", scope_pattern="top/uart", bin_pattern="reset_*", + rationale="Known issue", approver="eng", + approved_at="2025-01-01T00:00:00", + expires_at="2026-01-01T00:00:00", + status="active"), + ]) + data = ws.serialize() + ws2 = WaiverSet.from_bytes(data) + assert len(ws2.waivers) == 1 + w = ws2.waivers[0] + assert w.id == "W1" + assert w.scope_pattern == "top/uart" + assert w.bin_pattern == "reset_*" + assert w.rationale == "Known issue" + assert w.approver == "eng" + assert w.expires_at == "2026-01-01T00:00:00" + + def test_save_and_load(self, tmp_path): + ws = WaiverSet([Waiver(id="W1", scope_pattern="**")]) + path = str(tmp_path / "waivers.json") + ws.save(path) + ws2 = WaiverSet.load(path) + assert len(ws2.waivers) == 1 + assert ws2.waivers[0].id == "W1" + + def test_from_dict_missing_optional_fields(self): + d = {"waivers": [{"id": "W1", "scope_pattern": "a"}]} + ws = WaiverSet.from_dict(d) + w = ws.waivers[0] + assert w.bin_pattern == "*" + assert w.status == "active" + assert w.expires_at == ""