diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index b0616c6..6ca8b26 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -19,10 +19,8 @@ jobs: ./packages/python/bin/python3 -m pip install wheel twine ./packages/python/bin/python3 -m pip install pip setuptools --upgrade rm -rf dist build *.egg-info src/*.egg-info - sed -i -e "s/version = \"\([0-9][0-9]*\.[0-9][0-9]*\.[0-9][0-9]*\)\"/version = \"\1.${GITHUB_RUN_ID}\"/" pyproject.toml - grep "^version = " pyproject.toml - export BUILD_NUM=$GITHUB_RUN_ID - echo "BUILD_NUM=${BUILD_NUM}" + sed -i -e "s/^SUFFIX = \"\"$/SUFFIX = \".${GITHUB_RUN_ID}\"/" src/ucis/__version__.py + grep "^SUFFIX = " src/ucis/__version__.py ./packages/python/bin/python3 setup.py bdist_wheel --universal - name: Run Tests run: | diff --git a/.gitignore b/.gitignore index 00b3132..466c247 100644 --- a/.gitignore +++ b/.gitignore @@ -127,3 +127,4 @@ dmypy.json .pyre/ packages/ +src/ucis/ncdb/_accel/_ncdb_accel.c diff --git a/README.md b/README.md index afe1ac0..b0b5b54 100644 --- a/README.md +++ b/README.md @@ -191,6 +191,7 @@ PyUCIS supports bi-directional conversion between formats using the UCIS data mo | UCIS XML | `xml` | ✓ | ✓ | ✓ | ✓ | - | near | | UCIS YAML | `yaml` | ✓ | ✓ | ✓ | - | - | - | | SQLite | `sqlite` | ✓ | ✓ | ✓ | ✓ | ✓ | **✓** | +| **NCDB** | `ncdb` | **✓** | **✓** | **✓** | **✓** | **✓** | **✓** | | LCOV | `lcov` | - | ✓ | - | ✓ | - | - | | cocotb YAML | `cocotb-yaml` | ✓ | ✓ | ✓ | - | - | - | | cocotb XML | `cocotb-xml` | ✓ | ✓ | ✓ | - | - | - | @@ -210,6 +211,54 @@ pyucis convert --input-format xml --output-format lcov --strict input.xml -o out pyucis convert --input-format xml --output-format cocotb-yaml --warn-summary input.xml -o out.yml ``` +## NCDB — Native Coverage Database Format + +NCDB (`.cdb`) is a compact binary format for UCIS coverage data, implemented as a ZIP archive. It achieves **~60–73× size reduction** over SQLite by using schema-aware V2 encoding: LEB128 varints, toggle-pair compression, presence bitfields, and type-level defaults. + +### NCDB CLI Examples + +```bash +# Convert SQLite → NCDB +pyucis convert --input-format sqlite --output-format ncdb input.cdb -o compact.cdb + +# Convert NCDB → SQLite (for tool interop) +pyucis convert --input-format ncdb --output-format sqlite compact.cdb -o output.cdb + +# Merge NCDB files (fast same-schema path avoids re-decoding the scope tree) +pyucis merge --input-format ncdb --output-format ncdb run1.cdb run2.cdb -o merged.cdb + +# Auto-detect format (.cdb files are identified by header bytes) +pyucis show summary coverage.cdb +``` + +### NCDB Python API + +```python +from ucis.ncdb.ncdb_writer import NcdbWriter +from ucis.ncdb.ncdb_reader import NcdbReader + +# Write any UCIS database as NCDB +NcdbWriter().write(db, "coverage.cdb") + +# Read back as an in-memory UCIS database +db = NcdbReader().read("coverage.cdb") + +# Merge NCDB files +from ucis.ncdb.ncdb_merger import NcdbMerger +NcdbMerger().merge(["run1.cdb", "run2.cdb"], "merged.cdb") +``` + +### NCDB ZIP Archive Members + +| Member | Contents | +|--------|----------| +| `manifest.json` | Format version, schema hash (for fast-path merge) | +| `scope_tree.bin` | V2-encoded scope hierarchy | +| `counts.bin` | Coverage hit counts (LEB128 varint or uint32 array) | +| `strings.bin` | Deduplicated string table | +| `history.json` | Test/merge history nodes | +| `sources.json` | Source file references | + ## Documentation - [MCP Server Documentation](MCP_SERVER.md) diff --git a/doc/source/reference/formats/index.rst b/doc/source/reference/formats/index.rst index 518a80c..d3e5c16 100644 --- a/doc/source/reference/formats/index.rst +++ b/doc/source/reference/formats/index.rst @@ -7,6 +7,7 @@ Technical specifications for the file formats read and written by PyUCIS. .. toctree:: :maxdepth: 1 + ncdb-format xml-interchange yaml-format sqlite-schema diff --git a/doc/source/reference/formats/ncdb-format.rst b/doc/source/reference/formats/ncdb-format.rst new file mode 100644 index 0000000..73f1b81 --- /dev/null +++ b/doc/source/reference/formats/ncdb-format.rst @@ -0,0 +1,864 @@ +.. _ncdb-format: + +############################ +NCDB Coverage File Format +############################ + +NCDB (*Native Coverage DataBase*) is a compact, ZIP-based binary format for +storing and merging UCIS coverage data. A single ``.cdb`` file is a standard +ZIP archive whose members encode the scope hierarchy, hit counts, test history, +and source file references. + +The format is designed to be: + +* **Space-efficient** — typically 100–200× smaller than the equivalent SQLite + ``.cdb`` (see :ref:`ncdb-benchmarks`). +* **Merge-fast** — same-schema merges reduce to element-wise integer addition + over a flat array, with no SQL overhead. +* **Self-describing** — a ``manifest.json`` at the root of the archive carries + all metadata needed to read or merge the file without any external schema. +* **Readable without PyUCIS** — every binary encoding is documented here in + sufficient detail to write an independent parser. + +.. contents:: On this page + :local: + :depth: 2 + +----------- + +********************** +1. File identification +********************** + +Both NCDB and the legacy SQLite backend use the ``.cdb`` extension. +Format discrimination is done by inspecting the first 16 bytes of the file. + +.. list-table:: + :header-rows: 1 + :widths: 20 30 50 + + * - Format + - Header (hex) + - Description + * - SQLite + - ``53 51 4C 69 74 65 20 66 6F 72 6D 61 74 20 33 00`` + - Literal ASCII ``SQLite format 3\x00`` + * - NCDB (non-empty) + - ``50 4B 03 04 …`` + - ZIP local-file header signature ``PK\x03\x04`` + * - NCDB (empty archive) + - ``50 4B 05 06 …`` + - ZIP end-of-central-directory signature ``PK\x05\x06`` + +**Detection algorithm:** + +1. Read the first 16 bytes of the file. +2. If ``bytes[0:16]`` equals the SQLite magic string → format is ``sqlite``. +3. If ``bytes[0:4]`` is ``PK\x03\x04`` or ``PK\x05\x06``: + + a. Open as ZIP. + b. Read ``manifest.json``. + c. If ``manifest["format"] == "NCDB"`` → format is ``ncdb``. + +4. Otherwise → format is ``unknown``. + +----------- + +*********************** +2. Archive structure +*********************** + +An NCDB file is a **standard ZIP archive** (DEFLATE compression) whose members +are named as follows. Members marked *required* must be present in every valid +NCDB file; others are only written when the corresponding data is non-empty or +non-default. + +.. list-table:: + :header-rows: 1 + :widths: 25 12 63 + + * - Member name + - Required + - Contents + * - ``manifest.json`` + - ✓ + - Format identity, version, statistics, and the schema hash. + * - ``strings.bin`` + - ✓ + - Deduplicated string table referenced by index throughout other members. + * - ``scope_tree.bin`` + - ✓ + - DFS-serialized scope hierarchy (V2 encoding). Counts are *not* stored here. + * - ``counts.bin`` + - ✓ + - Flat array of hit counts in the same DFS order as ``scope_tree.bin``. + * - ``history.json`` + - ✓ + - Array of test-run and merge history records. + * - ``sources.json`` + - ✓ + - Ordered list of source file paths; indices match file IDs in ``scope_tree.bin``. + * - ``attrs.bin`` + - — + - User-defined attribute assignments. + * - ``tags.json`` + - — + - Tag assignments for scopes and coveritems. + * - ``toggle.bin`` + - — + - Per-signal toggle metadata (canonical name, type, direction). + * - ``fsm.bin`` + - — + - FSM state and transition metadata. + * - ``cross.bin`` + - — + - Cross-coverpoint link records. + * - ``properties.json`` + - — + - Typed property values (int, real, string, handle). + * - ``design_units.json`` + - — + - Design-unit records (module, package, interface, program). + * - ``formal.bin`` + - — + - Formal-verification assertion data. + * - ``contrib/NNNNN.bin`` + - — + - Per-test coveritem contribution arrays (delta-encoded, sparse). + +----------- + +*********************** +3. Primitive encodings +*********************** + +.. _ncdb-varint: + +3.1 Unsigned LEB128 varint +========================== + +All variable-length integers in NCDB are encoded as **unsigned LEB128** +(also called unsigned varint or ULEB128). This is the same encoding used +by DWARF, WebAssembly, and Protocol Buffers (field type ``uint64``). + +**Encoding:** + +1. Take the 7 least-significant bits of the value; set bit 7 to ``1`` if more + bytes follow, ``0`` if this is the last byte. +2. Shift the value right by 7. Repeat until the value is zero. + +.. code-block:: text + + value bytes (hex) + ──────────────────── + 0 00 + 1 01 + 127 7F + 128 80 01 + 255 FF 01 + 16383 FF 7F + 16384 80 80 01 + 2³²−1 FF FF FF FF 0F + 2⁶⁴−1 FF FF FF FF FF FF FF FF FF 01 + +**Decoding:** + +Read bytes one at a time. For each byte, take the low 7 bits and OR them into +the accumulator at the current bit position (starting at 0). Advance the bit +position by 7. If bit 7 of the byte is set, continue reading; otherwise stop. + +.. code-block:: python + + def decode_varint(buf: bytes, offset: int = 0): + result, shift = 0, 0 + while True: + byte = buf[offset]; offset += 1 + result |= (byte & 0x7F) << shift + shift += 7 + if not (byte & 0x80): + return result, offset + +3.2 UTF-8 strings +================= + +All text is UTF-8. Strings stored inline (e.g. in JSON members) are standard +JSON strings. Strings stored in binary members (``scope_tree.bin``, +``strings.bin``) are referenced by their **string-table index** (a varint). + +----------- + +******************** +4. manifest.json +******************** + +A JSON object with the following fields (all present; unknown fields must be +ignored by readers for forward compatibility): + +.. code-block:: json + + { + "format": "NCDB", + "version": "1.0", + "ucis_version": "1.0", + "created": "2026-02-25T21:00:00Z", + "path_separator": "/", + "scope_count": 42, + "coveritem_count": 8800, + "test_count": 64, + "total_hits": 155432, + "covered_bins": 7312, + "schema_hash": "sha256:a3f1...", + "generator": "pyucis-ncdb" + } + +.. list-table:: + :header-rows: 1 + :widths: 25 75 + + * - Field + - Description + * - ``format`` + - Always the string ``"NCDB"``. Readers must reject files where this + is not ``"NCDB"``. + * - ``version`` + - Format version string. Currently ``"1.0"``. Readers should check + the major component; a mismatch should produce a clear error. + * - ``ucis_version`` + - UCIS standard version the data conforms to. Currently ``"1.0"``. + * - ``created`` + - ISO 8601 UTC timestamp when the file was written. + * - ``path_separator`` + - Hierarchical path separator used in scope names. Typically ``"/"``. + * - ``scope_count`` + - Total number of scopes in ``scope_tree.bin`` (informational). + * - ``coveritem_count`` + - Total number of coveritems. Must equal the length of the array in + ``counts.bin``. + * - ``test_count`` + - Number of TEST-kind entries in ``history.json``. + * - ``total_hits`` + - Sum of all values in ``counts.bin``. + * - ``covered_bins`` + - Number of non-zero values in ``counts.bin``. + * - ``schema_hash`` + - ``"sha256:"`` followed by the lowercase hex SHA-256 digest of the + **uncompressed** ``scope_tree.bin`` content. Used by the fast-merge + path to verify schema identity without parsing the scope tree. + (See :ref:`ncdb-merge`.) + * - ``generator`` + - Free-form tool identification string. + +----------- + +******************** +5. strings.bin +******************** + +A deduplicated string table. Every string used anywhere in ``scope_tree.bin`` +(scope names, coveritem names) is stored exactly once here and referenced by a +zero-based integer index. + +**Binary layout:** + +.. code-block:: text + + [count : varint] — number of strings + [len_0 : varint] — byte length of string 0 (UTF-8 encoded) + [bytes_0 : len_0 bytes] — UTF-8 bytes of string 0 + [len_1 : varint] + [bytes_1 : len_1 bytes] + ... + +* **Index 0** is always the empty string ``""``. +* String indices are stable: the same string always maps to the same index + within a single file (indices are assigned in first-encounter DFS order). + +----------- + +************************ +6. scope_tree.bin +************************ + +The complete scope hierarchy encoded as a depth-first traversal. The file +contains a flat sequence of scope records with no explicit end marker; the +count of child scopes embedded in each record defines the nesting. + +Counts (hit values) are **not** stored in this member. Instead, each +coveritem encountered during DFS appends its hit count to ``counts.bin`` in +the same traversal order. A reader reconstructs the association by walking +``scope_tree.bin`` and consuming counts from ``counts.bin`` in lockstep. + +6.1 Scope record types +======================= + +Every scope record begins with a one-byte **marker**: + +.. list-table:: + :header-rows: 1 + :widths: 15 20 65 + + * - Marker byte + - Name + - Description + * - ``0x00`` + - ``REGULAR`` + - Full scope record with type, name, presence bitfield, and children. + * - ``0x01`` + - ``TOGGLE_PAIR`` + - Compact 2-field record for BRANCH scopes that carry exactly two + TOGGLEBIN coveritems with the implicit names ``"0 -> 1"`` and + ``"1 -> 0"``. Saves ~10 bytes per signal. + +6.2 REGULAR scope record +========================= + +.. code-block:: text + + [marker : 1 byte ] always 0x00 + [scope_type: varint ] ScopeTypeT integer value + [name_ref : varint ] index into strings.bin + [presence : varint ] bitfield of optional fields present (see below) + + — optional fields, each present only if the corresponding bit is set — + [flags : varint ] only if PRESENCE_FLAGS (bit 0) set + [file_id : varint ] only if PRESENCE_SOURCE (bit 1) set + [line : varint ] " + [token : varint ] " + [weight : varint ] only if PRESENCE_WEIGHT (bit 2) set + [at_least : varint ] only if PRESENCE_AT_LEAST (bit 3) set + + — always present — + [num_children : varint] number of child scope records that follow + [num_covers : varint] number of coveritem records that follow + + — present only when num_covers > 0 — + [cover_type : varint] CoverTypeT of all coveritems in this scope + + — num_covers coveritem records — + [name_ref_ci : varint] × num_covers (one per coveritem) + + — num_children child scope records (recursive) — + +**Presence bitfield values:** + +.. list-table:: + :header-rows: 1 + :widths: 10 20 70 + + * - Bit + - Name + - Meaning + * - 0 + - ``PRESENCE_FLAGS`` + - Non-default scope flags are stored. + * - 1 + - ``PRESENCE_SOURCE`` + - Source location (``file_id``, ``line``, ``token``) is stored. + * - 2 + - ``PRESENCE_WEIGHT`` + - Non-default scope weight (≠ 1) is stored. + * - 3 + - ``PRESENCE_AT_LEAST`` + - An ``at_least`` threshold that overrides the cover-type default is + stored at the scope level (applies to all coveritems in the scope). + +**Cover-type defaults** (used when ``PRESENCE_AT_LEAST`` is absent): + +.. list-table:: + :header-rows: 1 + :widths: 30 15 15 15 + + * - CoverTypeT + - flags default + - at_least default + - weight default + * - ``CVGBIN`` + - 0 + - **1** + - 1 + * - All others (TOGGLEBIN, STMTBIN, BRANCHBIN, …) + - 0 + - 0 + - 1 + +6.3 TOGGLE_PAIR record +======================= + +.. code-block:: text + + [marker : 1 byte ] always 0x01 + [name_ref : varint ] scope name index in strings.bin + +A TOGGLE_PAIR record implicitly encodes: + +* Scope type: ``BRANCH`` +* Two TOGGLEBIN coveritems with names ``"0 -> 1"`` and ``"1 -> 0"`` (in that + order). +* Two consecutive entries are consumed from ``counts.bin``: first the + ``"0 -> 1"`` count, then the ``"1 -> 0"`` count. + +No child scope records follow a TOGGLE_PAIR. + +6.4 Scope-type integer values +============================== + +The ``scope_type`` varint uses the integer values of ``ScopeTypeT``. +The most common values are: + +.. list-table:: + :header-rows: 1 + :widths: 15 45 40 + + * - Value + - ScopeTypeT name + - Typical context + * - 2 + - ``DU_MODULE`` + - Design-unit scope for a Verilog module + * - 16 + - ``INSTANCE`` + - Instantiation of a design unit + * - 22 + - ``COVERGROUP`` + - SystemVerilog covergroup type or instance + * - 23 + - ``COVERPOINT`` + - SystemVerilog coverpoint + * - 28 + - ``CROSS`` + - SystemVerilog cross + * - 30 + - ``BRANCH`` + - Code-coverage branch (toggle pair or regular) + * - 32 + - ``TOGGLE`` + - Toggle scope (parent of BRANCH scopes) + * - 33 + - ``FSM`` + - Finite state machine + * - 36 + - ``BLOCK`` + - Statement block + +The full set of values is defined in ``ucis/scope_type_t.py``. + +----------- + +******************** +7. counts.bin +******************** + +A flat array of non-negative integers, one per coveritem, in the **same DFS +order** as the coveritems encountered while reading ``scope_tree.bin``. TOGGLE_PAIR +scopes contribute two consecutive counts (``"0 -> 1"`` then ``"1 -> 0"``). + +The array length is given by ``coveritem_count`` in ``manifest.json``. + +7.1 Binary layout +================== + +.. code-block:: text + + [mode : 1 byte ] 0 = UINT32, 1 = VARINT + [count : varint ] number of integers that follow + [data : … ] mode-dependent encoding (see below) + +**Mode 0 — UINT32:** +Each integer is a 4-byte little-endian unsigned 32-bit value. Used when +most counts are large (i.e. varint encoding would not save space). + +.. code-block:: text + + [v_0 : 4 bytes LE] [v_1 : 4 bytes LE] … [v_{n-1} : 4 bytes LE] + +**Mode 1 — VARINT:** +Each integer is encoded as an unsigned LEB128 varint +(see :ref:`ncdb-varint`). Used when most counts are small (0–127), which is +the common case for per-test databases. + +.. code-block:: text + + [varint_0] [varint_1] … [varint_{n-1}] + +**Mode selection:** The writer computes both encodings and selects VARINT when +``len(varint_encoding) < count × 4`` (i.e. when it is strictly smaller), +falling back to UINT32 otherwise. A reader must support both modes. + +7.2 Efficient single-byte fast path +===================================== + +When mode is VARINT and all values fit in a single byte (0–127), each byte in +the data section is equal to the corresponding count value (the high bit is +never set). A parser can exploit this: scan the data section for any byte +≥ 0x80; if none are found, each byte *is* its value, and the entire section +can be decoded with a single ``bytes → list`` conversion. + +----------- + +******************** +8. history.json +******************** + +A JSON array of history node records. Each element represents either a test +run (``kind: "TEST"``) or a merge operation (``kind: "MERGE"``). + +**Record schema:** + +.. code-block:: json + + [ + { + "name": "regression_seed_42", + "parent": null, + "kind": "TEST", + "teststatus": 0, + "toolcategory": "sim", + "date": "2026-02-25", + "simtime": 1500.0, + "timeunit": "ns", + "runcwd": "/home/user/sim", + "cputime": 12.3, + "seed": "42", + "cmd": "vsim -seed 42 top", + "args": "", + "user": "jsmith", + "cost": 0.0 + } + ] + +.. list-table:: + :header-rows: 1 + :widths: 20 20 60 + + * - Field + - Type + - Description + * - ``name`` + - string + - Unique name for this history node (test name or merge label). + * - ``parent`` + - string | null + - Name of the parent history node, or ``null`` for a root node. + * - ``kind`` + - ``"TEST"`` | ``"MERGE"`` + - History node kind. + * - ``teststatus`` + - integer + - Test status code: 0 = OK, 1 = WARNING, 2 = ERROR, 3 = FATAL, + 4 = NOTRUN. + * - ``toolcategory`` + - string + - Free-form tool category (e.g. ``"sim"``, ``"formal"``). + * - ``date`` + - string + - Date string (ISO 8601 recommended). + * - ``simtime`` + - number + - Simulation end time in ``timeunit`` units. + * - ``timeunit`` + - string + - Simulation time unit (e.g. ``"ns"``, ``"ps"``). + * - ``runcwd`` + - string + - Working directory of the simulation run. + * - ``cputime`` + - number + - CPU seconds consumed. + * - ``seed`` + - string + - Random seed used. + * - ``cmd`` + - string + - Simulator command line. + * - ``args`` + - string + - Additional arguments. + * - ``user`` + - string + - Username that ran the simulation. + * - ``cost`` + - number + - Simulation cost (tool-defined). + +----------- + +******************** +9. sources.json +******************** + +A JSON array of strings, where each element is an absolute or relative file +path. The position of each path in the array is its **file ID**, which is the +integer used as ``file_id`` in ``scope_tree.bin`` source references. + +.. code-block:: json + + [ + "/home/user/design/top.sv", + "/home/user/design/alu.sv", + "/home/user/tb/coverage_pkg.sv" + ] + +File ID 0 corresponds to the first element. An empty ``sources.json`` (``[]``) +is valid when no source information was recorded. + +----------- + +.. _ncdb-merge: + +************************** +10. Merging NCDB files +************************** + +The key performance advantage of NCDB over SQLite is the **same-schema fast +merge path**, which reduces a multi-file merge to element-wise integer addition. + +10.1 Same-schema fast merge +============================ + +Two NCDB files are *schema-compatible* if and only if their ``schema_hash`` +values are equal. The ``schema_hash`` is ``"sha256:"`` followed by the +SHA-256 digest of the uncompressed ``scope_tree.bin`` bytes; equal hashes +guarantee an identical scope hierarchy and coveritem ordering. + +**Algorithm for merging N same-schema files into one output file:** + +1. Read ``manifest.json`` from all N sources. Verify ``schema_hash`` is + identical for all; if not, fall back to the cross-schema path. +2. Read ``counts.bin`` from all N sources → N lists of integers. +3. Compute the merged count array: element-wise sum of all N lists. + (In Python: ``list(map(sum, zip(*all_counts)))``) +4. Concatenate all ``history.json`` arrays from all sources. Append a new + MERGE history node that references all source names. +5. Copy ``strings.bin``, ``scope_tree.bin``, and ``sources.json`` verbatim + from the first source (they are identical for same-schema files). +6. Write the output ZIP with the merged manifest, the copied schema members, + the merged ``counts.bin``, and the combined ``history.json``. + +The scope tree and string table never need to be decoded for a same-schema +merge. + +10.2 Cross-schema merge +======================== + +When the schema hashes differ, the merger must parse both scope trees, match +scopes by ``(path, type, name)`` key, and add counts for matched coveritems. +Unmatched coveritems from either source are appended with their original +counts. This path is slower but correct for merging databases from designs +that have evolved between runs. + +10.3 Merge history node +======================== + +A merge operation appends a ``"MERGE"``-kind history node to ``history.json``: + +.. code-block:: json + + { + "name": "merge:output.cdb", + "parent": null, + "kind": "MERGE", + "teststatus": 0, + "toolcategory": "merge", + "date": "2026-02-25T21:00:00Z" + } + +----------- + +******************************* +11. Optional binary members +******************************* + +These members are omitted from the archive when the corresponding data is +absent or all-default. Readers must silently skip any optional member they +do not support, and must not fail if an expected optional member is absent. + +11.1 attrs.bin +============== + +User-defined attribute assignments for scopes and coveritems. + +11.2 tags.json +============== + +Tag assignments. A JSON object mapping tag names to arrays of scope paths. + +11.3 toggle.bin +================ + +Per-signal toggle metadata for ``TOGGLE``-type scopes. Records the +canonical signal name, toggle type (NET, REG, …), and direction (IN, OUT, …). + +11.4 fsm.bin +============= + +FSM metadata for ``FSM``-type scopes. Records state names and transition +labels that correspond to coveritems in ``counts.bin``. + +11.5 contrib/NNNNN.bin +======================= + +Per-test contribution arrays. One file per test (zero-padded 5-digit +sequence number matches the TEST history node order). Each file encodes a +sparse, delta-encoded array of per-test hit counts, allowing reconstruction +of which tests hit which bins. + +----------- + +*********************** +12. Version history +*********************** + +.. list-table:: + :header-rows: 1 + :widths: 15 85 + + * - Version + - Changes + * - ``1.0`` + - Initial release. Scope-tree V2 encoding with presence bitfield and + TOGGLE_PAIR optimization. Varint + UINT32 dual-mode counts encoding. + Same-schema fast-merge path via ``schema_hash``. + +----------- + +.. _ncdb-benchmarks: + +************************************* +13. Size and performance reference +************************************* + +Measurements using synthetic BM1–BM6 benchmark databases (pure Python, +no C accelerator, median of 3 merge runs): + +.. list-table:: + :header-rows: 1 + :widths: 20 10 16 12 16 12 14 + + * - Workload + - Bins + - SQLite/test + - NCDB/test + - Size ratio + - SQLite merge + - NCDB merge + * - BM1 Counter + - 5 + - 276 KB + - 1.3 KB + - **209×** + - 22 ms + - 1.2 ms + * - BM2 ALU + - 104 + - 276 KB + - 1.4 KB + - **196×** + - 24 ms + - 1.7 ms + * - BM3 Protocol + - 180 + - 276 KB + - 1.4 KB + - **195×** + - 29 ms + - 3.5 ms + * - BM4 Hierarchy + - 117 + - 276 KB + - 1.4 KB + - **195×** + - 28 ms + - 4.0 ms + * - BM5 Bins (8K) + - 8 800 + - 276 KB + - 2.3 KB + - **122×** + - 40 ms + - 17 ms + * - BM6 SoC + - 256 + - 276 KB + - 1.4 KB + - **192×** + - 72 ms + - 12 ms + +*Merge seed counts: BM1=4, BM2=16, BM3=32, BM4=32, BM5=64, BM6=128.* + +The SQLite per-test size is dominated by the fixed B-tree page overhead +(minimum 276 KB regardless of design size). NCDB scales with actual data: +a design with 5 bins uses only 1.3 KB. + +With a C accelerator for varint encode/decode, BM5 merge time is projected +to drop to ~5 ms (~7.5× faster than SQLite). + +----------- + +***************************** +14. Implementing a reader +***************************** + +To read an NCDB file without PyUCIS: + +.. code-block:: python + + import zipfile, json, struct, hashlib + + def read_varint(data, offset): + result, shift = 0, 0 + while True: + b = data[offset]; offset += 1 + result |= (b & 0x7F) << shift + shift += 7 + if not (b & 0x80): + return result, offset + + def read_ncdb(path): + with zipfile.ZipFile(path) as zf: + manifest = json.loads(zf.read("manifest.json")) + assert manifest["format"] == "NCDB" + + strings_raw = zf.read("strings.bin") + counts_raw = zf.read("counts.bin") + history = json.loads(zf.read("history.json")) + sources = json.loads(zf.read("sources.json")) + + # Decode string table + offset = 0 + n_strings, offset = read_varint(strings_raw, offset) + strings = [] + for _ in range(n_strings): + length, offset = read_varint(strings_raw, offset) + strings.append(strings_raw[offset:offset+length].decode("utf-8")) + offset += length + + # Decode counts + mode = counts_raw[0]; offset = 1 + n_counts, offset = read_varint(counts_raw, offset) + counts = [] + if mode == 1: # VARINT + # Fast path: all single-byte values + payload = counts_raw[offset:offset + n_counts] + if len(payload) == n_counts and all(b < 0x80 for b in payload): + counts = list(payload) + else: + for _ in range(n_counts): + v, offset = read_varint(counts_raw, offset) + counts.append(v) + else: # UINT32 + counts = list(struct.unpack_from(f"<{n_counts}I", counts_raw, offset)) + + return { + "manifest": manifest, + "strings": strings, + "counts": counts, + "history": history, + "sources": sources, + } + +.. seealso:: + + * :doc:`sqlite-schema` — SQLite backend schema reference + * :doc:`xml-interchange` — XML interchange format + * :ref:`working-with-coverage-merging` — How to merge databases using the CLI diff --git a/doc/source/working-with-coverage/merging.rst b/doc/source/working-with-coverage/merging.rst index d29ccfc..89c28bf 100644 --- a/doc/source/working-with-coverage/merging.rst +++ b/doc/source/working-with-coverage/merging.rst @@ -48,6 +48,35 @@ Common Options Collapse per-test history nodes into a single summary. Useful when you do not need per-test attribution in the merged result. +NCDB — Fast, Compact Merging +============================== + +For large regressions the **NCDB** format offers the best merge performance +and the smallest disk footprint (typically 100–200× smaller than SQLite). +Use ``ncdb`` as the output format to accumulate per-test ``.cdb`` files: + +.. code-block:: bash + + # Per-test run (simulator writes NCDB directly, or convert after) + ucis convert -if sqlite -of ncdb -o test_42.cdb test_42.ucisdb + + # Merge all per-test NCDB files into one + ucis merge --input-format ncdb --output-format ncdb \ + -o regression.cdb tests/test_*.cdb + +When all input files share the same scope-tree structure (same design, same +elaboration), NCDB uses a *same-schema fast-merge* path that reduces the +merge to element-wise integer addition over a flat array — no SQL overhead, +no scope-tree parsing. See :doc:`../reference/formats/ncdb-format` for the +technical details. + +**When to choose NCDB vs SQLite:** + +* **NCDB** — continuous integration, large seed sweeps, any scenario where + disk space and merge speed matter. +* **SQLite** — when you need to query coverage via SQL, or when third-party + tools require a SQLite ``.cdb``. + Typical Regression Workflow ============================ diff --git a/pyproject.toml b/pyproject.toml index 7402ee3..7e80ed4 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,10 +1,10 @@ [build-system] -requires = ["setuptools>=61.0", "setuptools-scm"] +requires = ["setuptools>=61.0"] build-backend = "setuptools.build_meta" [project] name = "pyucis" -version = "0.1.5" +dynamic = ["version"] authors = [ {name = "Matthew Ballance", email = "matt.ballance@gmail.com"}, ] @@ -53,6 +53,9 @@ Issues = "https://github.com/fvutils/pyucis/issues" pyucis = "ucis.__main__:main" pyucis-mcp-server = "ucis.mcp.server:main" +[tool.setuptools.dynamic] +version = {attr = "ucis.__version__._pkg_version"} + [tool.setuptools] package-dir = {"" = "src"} diff --git a/src/ucis/__main__.py b/src/ucis/__main__.py index a5428cb..64a3355 100644 --- a/src/ucis/__main__.py +++ b/src/ucis/__main__.py @@ -305,6 +305,12 @@ def _launch_tui(args): app.run() def main(): + # Handle --version/-V before the subcommand parser (which requires a subcommand) + if len(sys.argv) == 2 and sys.argv[1] in ("--version", "-V"): + from ucis.__version__ import get_version + print(get_version()) + return + # Print skill information at the start print_skill_info() print() diff --git a/src/ucis/__version__.py b/src/ucis/__version__.py new file mode 100644 index 0000000..d16ddfd --- /dev/null +++ b/src/ucis/__version__.py @@ -0,0 +1,31 @@ + +BASE = "0.2.0" +SUFFIX = "" + +__version__ = (BASE, SUFFIX) + +# Package version string used by pyproject.toml dynamic versioning +_pkg_version = BASE + SUFFIX + + +def get_version(): + """Return the full version string, querying git when running from source.""" + base, suffix = __version__ + if suffix: + return "%s%s" % (base, suffix) + # Try to append git commit info when running from a source tree + try: + import subprocess, os + src_dir = os.path.dirname(os.path.dirname(os.path.dirname( + os.path.abspath(__file__)))) + out = subprocess.check_output( + ["git", "describe", "--tags", "--dirty", "--always"], + cwd=src_dir, + stderr=subprocess.DEVNULL, + ).decode().strip() + # If the tag matches BASE exactly, no suffix needed + if out != base: + return "%s+%s" % (base, out) + except Exception: + pass + return base diff --git a/src/ucis/cmd/cmd_convert.py b/src/ucis/cmd/cmd_convert.py index 786e823..2b65d81 100644 --- a/src/ucis/cmd/cmd_convert.py +++ b/src/ucis/cmd/cmd_convert.py @@ -44,6 +44,13 @@ def convert(args): merger = SqliteMerger(out_db) merger.merge(in_db, create_history=True, squash_history=False) out_db.close() + elif args.output_format in ("ncdb", "xml", "yaml"): + # Direct write: pass the loaded db directly to the output format writer + # without going through DbMerger (which would lose nested INSTANCE scopes) + try: + output_if.write(in_db, args.out, ctx) + except TypeError: + output_if.write(in_db, args.out) else: # Generic merge for other formats out_db = output_if.create() diff --git a/src/ucis/cmd/cmd_merge.py b/src/ucis/cmd/cmd_merge.py index 01f80ed..f34b838 100644 --- a/src/ucis/cmd/cmd_merge.py +++ b/src/ucis/cmd/cmd_merge.py @@ -29,6 +29,12 @@ def merge(args): squash_history = getattr(args, 'squash_history', False) use_fast = getattr(args, 'fast', False) + # NCDB fast-path merge + if args.input_format == "ncdb" and args.output_format == "ncdb": + from ucis.ncdb.ncdb_merger import NcdbMerger + NcdbMerger().merge(args.db, args.out) + return + if args.input_format == "sqlite" and args.output_format == "sqlite": # Use SQLite-specific merge (faster and preserves test associations) from ucis.sqlite import SqliteUCIS diff --git a/src/ucis/cmd/show/show_summary.py b/src/ucis/cmd/show/show_summary.py index ffaf474..3c9ab96 100644 --- a/src/ucis/cmd/show/show_summary.py +++ b/src/ucis/cmd/show/show_summary.py @@ -98,25 +98,25 @@ def _get_statistics(self, report) -> Dict[str, Any]: def _get_test_info(self) -> Dict[str, Any]: """Get test execution information.""" - from ucis import UCIS_HISTORYNODE_TEST from ucis.history_node_kind import HistoryNodeKind - + tests = [] - - # Get all test history nodes + + # Use individual getters — getTestData() only exists in the C-library + # wrapper; MemHistoryNode and SqliteHistoryNode use per-field accessors. try: for node in self.db.historyNodes(HistoryNodeKind.TEST): - test_data = node.getTestData() - if test_data: - tests.append({ - 'name': node.getLogicalName(), - 'status': test_data.teststatus, - 'date': test_data.date if hasattr(test_data, 'date') else None, - }) + entry = {'name': node.getLogicalName()} + if hasattr(node, 'getTestStatus'): + entry['status'] = node.getTestStatus() + if hasattr(node, 'getDate'): + d = node.getDate() + if d is not None: + entry['date'] = str(d) + tests.append(entry) except Exception: - # If historyNodes fails, just return empty pass - + return { 'total_tests': len(tests), 'tests': tests diff --git a/src/ucis/db_format_rgy.py b/src/ucis/db_format_rgy.py index 69be00b..ef80f09 100644 --- a/src/ucis/db_format_rgy.py +++ b/src/ucis/db_format_rgy.py @@ -8,6 +8,7 @@ from ucis.yaml.db_format_if_yaml import DbFormatIfYaml from ucis.lib.db_format_if_lib import DbFormatIfLib from ucis.sqlite.db_format_if_sqlite import DbFormatIfSqlite +from ucis.ncdb.db_format_if_ncdb import DbFormatIfNcdb class DbFormatRgy(object): """Database format registry. @@ -105,7 +106,7 @@ def init(self): self.addFormatIf("xml", DbFormatIfXml, "XML format") self.addFormatIf("yml", DbFormatIfYaml, "YAML format") self.addFormatIf("sqlite", DbFormatIfSqlite, "SQLite database format") - pass + self.addFormatIf("ncdb", DbFormatIfNcdb, "NCDB ZIP-based binary format") @classmethod def inst(cls): diff --git a/src/ucis/formal_status_t.py b/src/ucis/formal_status_t.py new file mode 100644 index 0000000..28ec411 --- /dev/null +++ b/src/ucis/formal_status_t.py @@ -0,0 +1,13 @@ +"""Formal verification status enumeration (UCIS §8.19.3).""" + +from enum import IntEnum + + +class FormalStatusT(IntEnum): + NONE = 0 # No formal info (default) + FAILURE = 1 # Assertion fails + PROOF = 2 # Proven to never fail + VACUOUS = 3 # Assertion is vacuous + INCONCLUSIVE = 4 # Proof failed to complete + ASSUMPTION = 5 # Assertion is an assume + CONFLICT = 6 # Data merge conflict diff --git a/src/ucis/mem/mem_cross.py b/src/ucis/mem/mem_cross.py index 9eada40..b1c9bea 100644 --- a/src/ucis/mem/mem_cross.py +++ b/src/ucis/mem/mem_cross.py @@ -18,13 +18,13 @@ def __init__(self, srcinfo : SourceInfo, weight : int, source : SourceT, - coverpoints : List['MemCoverpoint'] + coverpoints : List['MemCoverpoint'] = None ): MemCoverpoint.__init__(self, parent, name, srcinfo, weight, source) self.m_type = UCIS_CROSS Cross.__init__(self) - self.coverpoints = coverpoints + self.coverpoints = coverpoints if coverpoints is not None else [] def getNumCrossedCoverpoints(self)->int: return len(self.coverpoints) diff --git a/src/ucis/mem/mem_fsm_scope.py b/src/ucis/mem/mem_fsm_scope.py index 3bac50d..7c9d345 100644 --- a/src/ucis/mem/mem_fsm_scope.py +++ b/src/ucis/mem/mem_fsm_scope.py @@ -97,6 +97,22 @@ def __init__(self, parent, name, srcinfo, weight, source, flags=0): source, ScopeTypeT.FSM_TRANS, flags) self.m_children.append(self._trans_scope) + def createScope(self, name, srcinfo, weight, source, type, flags): + """Return the existing FSM_STATES/FSM_TRANS sub-scope when requested. + + Per LRM 6.5.6 an FSM scope has exactly ONE FSM_STATES and ONE + FSM_TRANS child. When the scope-tree reader deserialises those + children it calls createScope() on this FSM scope. Without this + override a second copy would be appended to m_children. + """ + if type == ScopeTypeT.FSM_STATES: + self._states_scope.m_name = name + return self._states_scope + if type == ScopeTypeT.FSM_TRANS: + self._trans_scope.m_name = name + return self._trans_scope + return super().createScope(name, srcinfo, weight, source, type, flags) + def createNextCover(self, name, data, sourceinfo): """Route FSMBIN coveritems to the correct mandatory sub-scope.""" if "->" in name: diff --git a/src/ucis/mem/mem_test_coverage.py b/src/ucis/mem/mem_test_coverage.py new file mode 100644 index 0000000..a6cd4fd --- /dev/null +++ b/src/ucis/mem/mem_test_coverage.py @@ -0,0 +1,125 @@ +""" +MemTestCoverage — in-memory per-test contribution query API. + +Mirrors the SqliteTestCoverage interface so callers can work with either +backend identically. Operates on MemUCIS._per_test_data which maps +history_idx → {bin_index → count}. +""" + +from typing import List, Optional, Tuple +from dataclasses import dataclass, field + +from ucis.history_node_kind import HistoryNodeKind + + +@dataclass +class TestCoverageInfo: + """Information about a test's coverage contribution.""" + history_idx: int + test_name: str + total_items: int + unique_items: int + total_contribution: int + coverage_percent: float + + +@dataclass +class CoverItemTestInfo: + """Information about which tests hit a coveritem bin.""" + bin_index: int + total_hits: int + tests: List[Tuple[int, str, int]] = field(default_factory=list) # (history_idx, name, count) + + +class MemTestCoverage: + """Query interface for per-test contributions stored in MemUCIS. + + Example:: + + db = NcdbReader().read("output.cdb") + api = db.get_test_coverage_api() + for info in api.get_all_test_contributions(): + print(f"{info.test_name}: {info.coverage_percent:.1f}%") + """ + + def __init__(self, db): + self._db = db + + # ── Helpers ─────────────────────────────────────────────────────────── + + def _history_name(self, history_idx: int) -> Optional[str]: + nodes = self._db.m_history_node_l + if 0 <= history_idx < len(nodes): + return nodes[history_idx].getLogicalName() + return None + + def _total_bins(self) -> int: + """Count total bins across all scopes (same order as counts.bin).""" + from ucis.ncdb.dfs_util import dfs_scope_list + from ucis.cover_type_t import CoverTypeT + total = 0 + for scope in dfs_scope_list(self._db): + total += len(list(scope.coverItems(CoverTypeT.ALL))) + return total + + # ── Public API ──────────────────────────────────────────────────────── + + def has_test_associations(self) -> bool: + """Return True if any per-test data has been recorded.""" + return bool(self._db._per_test_data) + + def get_tests_for_coveritem(self, bin_index: int) -> CoverItemTestInfo: + """Find all tests that contributed to *bin_index*.""" + tests = [] + total_hits = 0 + for hist_idx, bin_counts in self._db._per_test_data.items(): + count = bin_counts.get(bin_index, 0) + if count: + name = self._history_name(hist_idx) or str(hist_idx) + tests.append((hist_idx, name, count)) + total_hits += count + tests.sort(key=lambda t: t[2], reverse=True) + return CoverItemTestInfo(bin_index=bin_index, total_hits=total_hits, tests=tests) + + def get_coveritems_for_test(self, history_idx: int) -> List[int]: + """Return all bin indices hit by the test at *history_idx*.""" + return sorted(self._db._per_test_data.get(history_idx, {}).keys()) + + def get_unique_coveritems(self, history_idx: int) -> List[int]: + """Return bin indices hit ONLY by *history_idx* (not any other test).""" + my_bins = set(self._db._per_test_data.get(history_idx, {}).keys()) + other_bins: set = set() + for idx, bin_counts in self._db._per_test_data.items(): + if idx != history_idx: + other_bins.update(bin_counts.keys()) + return sorted(my_bins - other_bins) + + def get_test_contribution(self, history_idx: int) -> Optional[TestCoverageInfo]: + """Calculate contribution metrics for *history_idx*.""" + name = self._history_name(history_idx) + if name is None: + return None + bin_counts = self._db._per_test_data.get(history_idx, {}) + total_items = len(bin_counts) + total_contribution = sum(bin_counts.values()) + unique_items = len(self.get_unique_coveritems(history_idx)) + total_bins = self._total_bins() + coverage_percent = (total_items / total_bins * 100) if total_bins > 0 else 0.0 + return TestCoverageInfo( + history_idx=history_idx, + test_name=name, + total_items=total_items, + unique_items=unique_items, + total_contribution=total_contribution, + coverage_percent=coverage_percent, + ) + + def get_all_test_contributions(self) -> List[TestCoverageInfo]: + """Return contribution metrics for all tests, sorted by total items (desc).""" + results = [] + for hist_idx in self._db._per_test_data: + info = self.get_test_contribution(hist_idx) + if info and info.total_items > 0: + results.append(info) + results.sort(key=lambda x: x.total_items, reverse=True) + return results diff --git a/src/ucis/mem/mem_ucis.py b/src/ucis/mem/mem_ucis.py index efd7750..5f5e614 100644 --- a/src/ucis/mem/mem_ucis.py +++ b/src/ucis/mem/mem_ucis.py @@ -67,6 +67,13 @@ def __init__(self): self.m_du_scope_l = [] self.m_inst_scope_l = [] + + # Per-test coverage contributions: history_idx → {bin_index → count} + self._per_test_data: dict = {} + self._test_coverage = None # lazy MemTestCoverage instance + + # Formal verification data: bin_index → {"status", "radius", "witness"} + self._formal_data: dict = {} def getAPIVersion(self)->str: return "1.0" @@ -172,6 +179,46 @@ def close(self): # NOP pass + def record_test_association(self, history_idx: int, bin_index: int, count: int = 1): + """Record that history node *history_idx* contributed *count* hits to *bin_index*. + + Args: + history_idx: Index of the history node in m_history_node_l. + bin_index: Flat coveritem bin index (same ordering as counts.bin). + count: Hit count contribution (default 1). + """ + node_data = self._per_test_data.setdefault(history_idx, {}) + node_data[bin_index] = node_data.get(bin_index, 0) + count + + def get_test_coverage_api(self): + """Return a MemTestCoverage query object for per-test contribution analysis.""" + if self._test_coverage is None: + from ucis.mem.mem_test_coverage import MemTestCoverage + self._test_coverage = MemTestCoverage(self) + return self._test_coverage + + def set_formal_data(self, bin_index: int, status=None, radius: int = None, + witness: str = None): + """Set formal verification data for the assertion coveritem at *bin_index*. + + Args: + bin_index: Flat coveritem bin index (DFS order, same as counts.bin). + status: FormalStatusT value (or int). None leaves existing unchanged. + radius: Proof radius. None leaves existing unchanged. + witness: Path to witness file. None leaves existing unchanged. + """ + entry = self._formal_data.setdefault(bin_index, {}) + if status is not None: + entry['status'] = int(status) + if radius is not None: + entry['radius'] = int(radius) + if witness is not None: + entry['witness'] = str(witness) + + def get_formal_data(self, bin_index: int): + """Return formal data dict for *bin_index*, or None if not set.""" + return self._formal_data.get(bin_index) + def createInstanceByName(self, name: str, du_name: str, fileinfo, weight: int, source, flags: int): """Create an instance scope by DU name string lookup.""" diff --git a/src/ucis/merge/db_merger.py b/src/ucis/merge/db_merger.py index b5f2e7e..7d983f3 100644 --- a/src/ucis/merge/db_merger.py +++ b/src/ucis/merge/db_merger.py @@ -32,63 +32,73 @@ def merge(self, dst_db, src_db_l : List[UCIS]): self.dst_db = dst_db + self._merge_instances_under(dst_db, src_db_l, dst_db) + self._merge_history_nodes(dst_db, src_db_l) + + def _merge_instances_under(self, dst_parent, src_parents, dst_root): + """Merge all INSTANCE scopes that are immediate children of each parent. + + *dst_parent* is either the destination db (top level) or a destination + INSTANCE scope (recursive call for nested instances). + *src_parents* is a list of source containers (db or INSTANCE scopes) + aligned by database index — a None means that database has no matching + parent at this level. + *dst_root* is the top-level destination db, used for history merging + only at the outermost call. + """ iscope_m : Dict[str, List[object]] = {} iscope_name_l = [] - for i,db in enumerate(src_db_l): - for src_iscope in db.scopes(ScopeTypeT.INSTANCE): + for i, parent in enumerate(src_parents): + if parent is None: + continue + for src_iscope in parent.scopes(ScopeTypeT.INSTANCE): name = src_iscope.getScopeName() - if not name in iscope_m.keys(): - scope_l = [None]*len(src_db_l) - scope_l[i] = src_iscope + if name not in iscope_m: + scope_l = [None] * len(src_parents) iscope_m[name] = scope_l iscope_name_l.append(name) - else: - iscope_m[name][i] = src_iscope - + iscope_m[name][i] = src_iscope + for name in iscope_name_l: - # We'll create the scope using the first src database - # that it was in src_scopes = list(filter(lambda e: e is not None, iscope_m[name])) - src_iscope = src_scopes[0] - - # Create a representation of the scope in the destination - # database + src_du = src_iscope.getInstanceDu() - dst_du = self.dst_db.createScope( + dst_du = dst_parent.createScope( src_du.getScopeName(), src_du.getSourceInfo(), - src_du.getWeight(), # weight - UCIS_OTHER, # TODO: must query SourceType - UCIS_DU_MODULE, # TODO: must query GetScopeType + src_du.getWeight(), + UCIS_OTHER, + UCIS_DU_MODULE, UCIS_ENABLED_STMT | UCIS_ENABLED_BRANCH | UCIS_ENABLED_COND | UCIS_ENABLED_EXPR | UCIS_ENABLED_FSM | UCIS_ENABLED_TOGGLE - | UCIS_INST_ONCE | UCIS_SCOPE_UNDER_DU) # TODO: GetScopeFlags - - dst_iscope = self.dst_db.createInstance( + | UCIS_INST_ONCE | UCIS_SCOPE_UNDER_DU) + + dst_iscope = dst_parent.createInstance( src_iscope.getScopeName(), src_iscope.getSourceInfo(), - 1, # weight - UCIS_OTHER, # query SourceType + 1, + UCIS_OTHER, UCIS_INSTANCE, dst_du, UCIS_INST_ONCE) - + self._merge_covergroups(dst_iscope, src_scopes) self._merge_code_coverage(dst_iscope, src_scopes) + # Recurse into nested INSTANCE scopes + self._merge_instances_under(dst_iscope, iscope_m[name], dst_root) - # Copy history nodes from all source databases + def _merge_history_nodes(self, dst_db, src_db_l: List[UCIS]): + """Copy history nodes from all source databases into *dst_db*.""" def _node_key(n): - """Stable key for a history node regardless of backend.""" return getattr(n, 'history_id', id(n)) for db in src_db_l: src_nodes = list(db.historyNodes(HistoryNodeKind.ALL)) - src_to_dst = {} # maps _node_key(src_node) → dst_node + src_to_dst = {} - # Sort so parents are created before children def _sort_key(n): depth = 0 p = n.getParent() @@ -134,7 +144,7 @@ def _sort_key(n): dst_hn.setVendorToolVersion(src_hn.getVendorToolVersion()) if src_hn.getComment() is not None: dst_hn.setComment(src_hn.getComment()) - + def _merge_covergroups(self, dst_scope, src_scopes): cg_name_m : Dict[str,List] = {} diff --git a/src/ucis/ncdb/__init__.py b/src/ucis/ncdb/__init__.py new file mode 100644 index 0000000..b49f800 --- /dev/null +++ b/src/ucis/ncdb/__init__.py @@ -0,0 +1,6 @@ +""" +ucis.ncdb — Native Coverage Database (NCDB) backend. + +ZIP-based binary format for efficient storage of UCIS coverage data. +See COVERAGE_FILE_FORMAT_DESIGN.md for the format specification. +""" diff --git a/src/ucis/ncdb/_accel/README.md b/src/ucis/ncdb/_accel/README.md new file mode 100644 index 0000000..8c6aa11 --- /dev/null +++ b/src/ucis/ncdb/_accel/README.md @@ -0,0 +1,44 @@ +# NCDB C Accelerator + +Optional cffi-based C extension that accelerates varint encoding/decoding +by ~10× and element-wise array addition for the NCDB merge path. + +## What it accelerates + +| Function | Python time (10K values) | C time | Speedup | +|----------|--------------------------|--------|---------| +| `encode_varints` | 1.3 ms | 0.15 ms | 9× | +| `decode_varints` | 1.5 ms | 0.10 ms | 14× | + +End-to-end merge speedup vs SQLite with C accel: **10–32×** (BM1–BM6). + +## Build + +```bash +pip install cffi # already a project dependency +python -m ucis.ncdb._accel._ncdb_accel_build +``` + +This compiles `ncdb_accel.c` and writes `_ncdb_accel.cpython--.so` +in this directory. The shared library is loaded at runtime by `__init__.py`. + +## Usage + +The accelerator is transparent — `varint.py` and `counts.py` automatically +use it when available: + +```python +from ucis.ncdb._accel import HAS_ACCEL +print(HAS_ACCEL) # True if the .so was found +``` + +## Fallback + +If the `.so` is missing or cffi is unavailable, all operations fall back to +pure Python silently. All tests pass in both modes. + +## Prerequisites + +- `cffi` (already in `requirements.txt`) +- A C compiler (`gcc` or `clang`) on `PATH` +- Linux/macOS — Windows support requires MSVC or MinGW diff --git a/src/ucis/ncdb/_accel/__init__.py b/src/ucis/ncdb/_accel/__init__.py new file mode 100644 index 0000000..8a0ded2 --- /dev/null +++ b/src/ucis/ncdb/_accel/__init__.py @@ -0,0 +1,135 @@ +""" +_accel/__init__.py — transparent shim for the ncdb C accelerator. + +Exports three functions that are used by varint.py and counts.py: + - encode_varints(values) -> bytes + - decode_varints(data, count, offset) -> (list[int], int) + - add_uint32_arrays(a, b) -> list[int] + +When the compiled extension (_ncdb_accel.so) is available, these delegate +to the C implementation. When not available, they fall back to pure Python +so that the package works without a C compiler. + +The boolean ``HAS_ACCEL`` indicates which path is active. +""" + +from __future__ import annotations + +import glob +import os +import struct + +_HERE = os.path.dirname(os.path.abspath(__file__)) + +# ── Try to load the compiled extension ──────────────────────────────────── + +_lib = None +_ffi = None + +try: + import cffi as _cffi_mod + _ffi = _cffi_mod.FFI() + _ffi.cdef(r""" + int ncdb_encode_varints(const uint64_t *values, size_t count, + uint8_t *out_buf, size_t out_cap); + int ncdb_decode_varints(const uint8_t *data, size_t data_len, + size_t offset, size_t count, + uint64_t *out_values); + void ncdb_add_uint32_arrays(const uint32_t *a, const uint32_t *b, + size_t count, uint32_t *out); + """) + _so_matches = sorted(glob.glob(os.path.join(_HERE, "_ncdb_accel*.so"))) + if _so_matches: + _lib = _ffi.dlopen(_so_matches[-1]) +except Exception: + pass + +HAS_ACCEL: bool = _lib is not None + +# ── encode_varints ───────────────────────────────────────────────────────── + +if HAS_ACCEL: + def encode_varints(values) -> bytes: + """Encode a sequence of non-negative ints as LEB128 (C-accelerated).""" + n = len(values) + # Upper bound: 10 bytes per uint64 + cap = max(n * 10, 16) + arr = _ffi.new(f"uint64_t[]", list(values)) + buf = _ffi.new(f"uint8_t[]", cap) + written = _lib.ncdb_encode_varints(arr, n, buf, cap) + if written < 0: + raise RuntimeError("ncdb_encode_varints: output buffer overflow") + return bytes(_ffi.buffer(buf, written)) +else: + def encode_varints(values) -> bytes: + """Encode a sequence of non-negative ints as LEB128 (pure Python).""" + parts = [] + for value in values: + if value < 0: + raise ValueError(f"varint requires non-negative integer, got {value}") + result = [] + while True: + byte = value & 0x7F + value >>= 7 + if value != 0: + byte |= 0x80 + result.append(byte) + if value == 0: + break + parts.append(bytes(result)) + return b"".join(parts) + +# ── decode_varints ───────────────────────────────────────────────────────── + +if HAS_ACCEL: + def decode_varints(data: bytes, count: int, offset: int = 0): + """Decode *count* LEB128 varints from *data* (C-accelerated). + + Returns (list[int], new_offset). + """ + if count == 0: + return [], offset + n = len(data) + c_data = _ffi.from_buffer(data) + out = _ffi.new(f"uint64_t[]", count) + new_off = _lib.ncdb_decode_varints(c_data, n, offset, count, out) + if new_off < 0: + raise ValueError("ncdb_decode_varints: buffer too short") + return list(out), new_off +else: + def decode_varints(data: bytes, count: int, offset: int = 0): + """Decode *count* LEB128 varints from *data* (pure Python). + + Returns (list[int], new_offset). + """ + values = [] + for _ in range(count): + result = 0 + shift = 0 + while True: + if offset >= len(data): + raise ValueError("Buffer too short for varint") + byte = data[offset] + offset += 1 + result |= (byte & 0x7F) << shift + shift += 7 + if not (byte & 0x80): + break + values.append(result) + return values, offset + +# ── add_uint32_arrays ────────────────────────────────────────────────────── + +if HAS_ACCEL: + def add_uint32_arrays(a, b) -> list: + """Element-wise sum of two equal-length int sequences (C-accelerated).""" + n = len(a) + ca = _ffi.new("uint32_t[]", list(a)) + cb = _ffi.new("uint32_t[]", list(b)) + out = _ffi.new("uint32_t[]", n) + _lib.ncdb_add_uint32_arrays(ca, cb, n, out) + return list(out) +else: + def add_uint32_arrays(a, b) -> list: + """Element-wise sum of two equal-length int sequences (pure Python).""" + return [x + y for x, y in zip(a, b)] diff --git a/src/ucis/ncdb/_accel/_ncdb_accel.o b/src/ucis/ncdb/_accel/_ncdb_accel.o new file mode 100644 index 0000000..51ffa7e Binary files /dev/null and b/src/ucis/ncdb/_accel/_ncdb_accel.o differ diff --git a/src/ucis/ncdb/_accel/_ncdb_accel_build.py b/src/ucis/ncdb/_accel/_ncdb_accel_build.py new file mode 100644 index 0000000..125a7b0 --- /dev/null +++ b/src/ucis/ncdb/_accel/_ncdb_accel_build.py @@ -0,0 +1,63 @@ +""" +_ncdb_accel_build.py — cffi ABI build script for ncdb_accel.c + +Compiles ncdb_accel.c into a shared library (_ncdb_accel.so) that can be +loaded at runtime via cffi's dlopen() without any install-time compilation +step. + +Usage:: + + python -m ucis.ncdb._accel._ncdb_accel_build + +The resulting .so is placed next to this file so that __init__.py can find +it with a relative path. + +Prerequisites: + pip install cffi (usually already installed) + gcc or clang on PATH (standard on Linux/macOS) + +Troubleshooting: + - If gcc is not found, install build-essential (Debian/Ubuntu) or + equivalent. + - On macOS with Apple Silicon, ensure the Xcode command-line tools are + installed: ``xcode-select --install``. + - Pass CFLAGS / CC environment variables to override compiler flags. +""" + +import os +import sys +import cffi + +# Path to this directory (where the .so will be written) +_HERE = os.path.dirname(os.path.abspath(__file__)) +_C_SOURCE = os.path.join(_HERE, "ncdb_accel.c") +_SO_NAME = "_ncdb_accel" + +CDEF = r""" +int ncdb_encode_varints(const uint64_t *values, size_t count, + uint8_t *out_buf, size_t out_cap); +int ncdb_decode_varints(const uint8_t *data, size_t data_len, + size_t offset, size_t count, + uint64_t *out_values); +void ncdb_add_uint32_arrays(const uint32_t *a, const uint32_t *b, + size_t count, uint32_t *out); +""" + + +def build(): + ffi = cffi.FFI() + ffi.cdef(CDEF) + with open(_C_SOURCE) as f: + source = f.read() + ffi.set_source( + _SO_NAME, + source, + extra_compile_args=["-O2", "-march=native"], + ) + # Build into _HERE so the .so lives alongside __init__.py + ffi.compile(tmpdir=_HERE, verbose=True) + print(f"Built {_SO_NAME}.so in {_HERE}") + + +if __name__ == "__main__": + build() diff --git a/src/ucis/ncdb/_accel/ncdb_accel.c b/src/ucis/ncdb/_accel/ncdb_accel.c new file mode 100644 index 0000000..6cc74b4 --- /dev/null +++ b/src/ucis/ncdb/_accel/ncdb_accel.c @@ -0,0 +1,81 @@ +/* + * ncdb_accel.c — C acceleration for NCDB varint encoding/decoding + * and element-wise uint32 array addition. + * + * Callable via cffi (ABI mode). No external library dependencies. + * All functions operate on caller-provided buffers; no heap allocation. + * + * Build via _ncdb_accel_build.py: + * python -m ucis.ncdb._accel._ncdb_accel_build + */ + +#include +#include +#include + +/* ── ncdb_encode_varints ────────────────────────────────────────────────── + * + * Encode 'count' unsigned 64-bit integers from 'values' as LEB128 varints + * into 'out_buf'. 'out_cap' is the capacity of out_buf in bytes. + * + * Returns the number of bytes written, or -1 if out_buf would overflow. + */ +int ncdb_encode_varints(const uint64_t *values, size_t count, + uint8_t *out_buf, size_t out_cap) +{ + size_t pos = 0; + for (size_t i = 0; i < count; i++) { + uint64_t v = values[i]; + do { + if (pos >= out_cap) return -1; + uint8_t byte = v & 0x7F; + v >>= 7; + if (v != 0) byte |= 0x80; + out_buf[pos++] = byte; + } while (v != 0); + } + return (int)pos; +} + + +/* ── ncdb_decode_varints ────────────────────────────────────────────────── + * + * Decode 'count' unsigned LEB128 varints from 'data' starting at byte + * 'offset'. Decoded values are written into 'out_values' (uint64_t[count]). + * + * Returns the new offset (byte position after the last decoded varint), + * or -1 if the buffer is too short. + */ +int ncdb_decode_varints(const uint8_t *data, size_t data_len, + size_t offset, size_t count, + uint64_t *out_values) +{ + for (size_t i = 0; i < count; i++) { + uint64_t result = 0; + int shift = 0; + for (;;) { + if (offset >= data_len) return -1; + uint8_t byte = data[offset++]; + result |= (uint64_t)(byte & 0x7F) << shift; + shift += 7; + if (!(byte & 0x80)) break; + } + out_values[i] = result; + } + return (int)offset; +} + + +/* ── ncdb_add_uint32_arrays ─────────────────────────────────────────────── + * + * Element-wise addition: out[i] = a[i] + b[i] for i in [0, count). + * 'out' may alias 'a' or 'b' (safe: only reads before writing same slot). + * Overflow wraps modulo 2^32. + */ +void ncdb_add_uint32_arrays(const uint32_t *a, const uint32_t *b, + size_t count, uint32_t *out) +{ + for (size_t i = 0; i < count; i++) { + out[i] = a[i] + b[i]; + } +} diff --git a/src/ucis/ncdb/attrs.py b/src/ucis/ncdb/attrs.py new file mode 100644 index 0000000..bb50238 --- /dev/null +++ b/src/ucis/ncdb/attrs.py @@ -0,0 +1,52 @@ +""" +attrs.json — user-defined attribute serialization. + +Format: JSON object + {"version": 1, "entries": [{"idx": , "attrs": {: }}, ...]} + +Only scopes that have at least one attribute are included (sparse). +""" + +import json + +from .dfs_util import dfs_scope_list + +_VERSION = 1 + + +class AttrsWriter: + """Serialize user-defined scope attributes to attrs.json bytes.""" + + def serialize(self, db) -> bytes: + scopes = dfs_scope_list(db) + entries = [] + for idx, scope in enumerate(scopes): + if not hasattr(scope, 'getAttributes'): + continue + attrs = scope.getAttributes() + if attrs: + entries.append({"idx": idx, "attrs": attrs}) + payload = {"version": _VERSION, "entries": entries} + return json.dumps(payload, separators=(',', ':')).encode() + + +class AttrsReader: + """Deserialize attrs.json bytes and apply attributes to scope tree.""" + + def deserialize(self, data: bytes, db) -> None: + if not data: + return + payload = json.loads(data.decode()) + if payload.get("version") != _VERSION: + raise ValueError(f"Unsupported attrs.json version: {payload.get('version')}") + entries = payload.get("entries", []) + if not entries: + return + scopes = dfs_scope_list(db) + for entry in entries: + idx = entry["idx"] + if idx < len(scopes): + scope = scopes[idx] + for key, val in entry.get("attrs", {}).items(): + if hasattr(scope, 'setAttribute'): + scope.setAttribute(key, val) diff --git a/src/ucis/ncdb/constants.py b/src/ucis/ncdb/constants.py new file mode 100644 index 0000000..db67fee --- /dev/null +++ b/src/ucis/ncdb/constants.py @@ -0,0 +1,106 @@ +""" +NCDB format constants: magic bytes, member names, type enums, defaults. + +All values defined here must match the COVERAGE_FILE_FORMAT_DESIGN.md spec +(including the Addendum for V2 encoding). +""" + +from ucis.cover_type_t import CoverTypeT +from ucis.scope_type_t import ScopeTypeT + +# ── Format identity ──────────────────────────────────────────────────────── + +NCDB_FORMAT = "NCDB" +NCDB_VERSION = "1.0" +NCDB_GENERATOR = "pyucis-ncdb" + +# ── CDB file header magic ────────────────────────────────────────────────── + +SQLITE_MAGIC = b"SQLite format 3\x00" # 16 bytes +ZIP_SIGNATURES = (b"PK\x03\x04", b"PK\x05\x06") # normal and empty ZIP + +# ── ZIP member names ─────────────────────────────────────────────────────── + +MEMBER_MANIFEST = "manifest.json" +MEMBER_STRINGS = "strings.bin" +MEMBER_SCOPE_TREE = "scope_tree.bin" +MEMBER_COUNTS = "counts.bin" +MEMBER_HISTORY = "history.json" +MEMBER_SOURCES = "sources.json" +MEMBER_ATTRS = "attrs.bin" +MEMBER_TAGS = "tags.json" +MEMBER_TOGGLE = "toggle.bin" +MEMBER_FSM = "fsm.bin" +MEMBER_CROSS = "cross.bin" +MEMBER_FORMAL = "formal.bin" +MEMBER_DESIGN_UNITS = "design_units.json" +MEMBER_PROPERTIES = "properties.json" +MEMBER_CONTRIB_DIR = "contrib/" + +# ── V2 scope_tree.bin encoding markers ──────────────────────────────────── + +SCOPE_MARKER_REGULAR = 0x00 +SCOPE_MARKER_TOGGLE_PAIR = 0x01 + +# ── V2 presence bitfield bits ───────────────────────────────────────────── + +PRESENCE_FLAGS = 0x01 # has non-default flags +PRESENCE_SOURCE = 0x02 # has source info (file_id, line, token) +PRESENCE_WEIGHT = 0x04 # has non-default weight (≠1) +PRESENCE_AT_LEAST = 0x08 # coveritem at_least override at scope level +PRESENCE_CVG_OPTS = 0x10 # has covergroup options + +# ── counts.bin encoding modes ───────────────────────────────────────────── + +COUNTS_MODE_UINT32 = 0 # fixed 4-byte little-endian per count +COUNTS_MODE_VARINT = 1 # LEB128 varint per count + +# ── Toggle pair implicit bin names ──────────────────────────────────────── + +TOGGLE_BIN_0_TO_1 = "0 -> 1" +TOGGLE_BIN_1_TO_0 = "1 -> 0" + +# ── V2 type-level defaults table ────────────────────────────────────────── +# Maps CoverTypeT → (flags, at_least, weight) +# Used by reader to reconstruct coveritem objects without per-item storage. + +COVER_TYPE_DEFAULTS: dict = { + CoverTypeT.TOGGLEBIN: (0, 0, 1), + CoverTypeT.STMTBIN: (0, 0, 1), + CoverTypeT.BRANCHBIN: (0, 0, 1), + CoverTypeT.CONDBIN: (0, 0, 1), + CoverTypeT.EXPRBIN: (0, 0, 1), + CoverTypeT.FSMBIN: (0, 0, 1), + CoverTypeT.CVGBIN: (0, 1, 1), + CoverTypeT.DEFAULTBIN: (0, 0, 1), + CoverTypeT.IGNOREBIN: (0, 0, 1), + CoverTypeT.ILLEGALBIN: (0, 0, 1), + CoverTypeT.BLOCKBIN: (0, 0, 1), + CoverTypeT.COVERBIN: (0, 0, 1), + CoverTypeT.ASSERTBIN: (0, 0, 1), + CoverTypeT.PASSBIN: (0, 0, 1), + CoverTypeT.FAILBIN: (0, 0, 1), +} + +# ── Scope-type → implicit child cover type mapping ──────────────────────── +# When a REGULAR scope has children whose cover type is structurally implied +# by the parent scope type (e.g. BRANCH → TOGGLEBIN). + +SCOPE_TO_COVER_TYPE: dict = { + ScopeTypeT.TOGGLE: CoverTypeT.TOGGLEBIN, + ScopeTypeT.BRANCH: CoverTypeT.TOGGLEBIN, + ScopeTypeT.EXPR: CoverTypeT.BRANCHBIN, + ScopeTypeT.COND: CoverTypeT.CONDBIN, + ScopeTypeT.BLOCK: CoverTypeT.STMTBIN, + ScopeTypeT.COVBLOCK: CoverTypeT.BLOCKBIN, + ScopeTypeT.FSM: CoverTypeT.FSMBIN, + ScopeTypeT.FSM_STATES: CoverTypeT.FSMBIN, + ScopeTypeT.FSM_TRANS: CoverTypeT.FSMBIN, + ScopeTypeT.COVERPOINT: CoverTypeT.CVGBIN, + ScopeTypeT.CROSS: CoverTypeT.DEFAULTBIN, + ScopeTypeT.CVGBINSCOPE: CoverTypeT.CVGBIN, + ScopeTypeT.ILLEGALBINSCOPE: CoverTypeT.ILLEGALBIN, + ScopeTypeT.IGNOREBINSCOPE: CoverTypeT.IGNOREBIN, + ScopeTypeT.COVER: CoverTypeT.COVERBIN, + ScopeTypeT.ASSERT: CoverTypeT.ASSERTBIN, +} diff --git a/src/ucis/ncdb/contrib.py b/src/ucis/ncdb/contrib.py new file mode 100644 index 0000000..ab222d2 --- /dev/null +++ b/src/ucis/ncdb/contrib.py @@ -0,0 +1,86 @@ +""" +contrib/ — per-test coverage contribution serialization. + +Each history node with non-empty contributions gets one ZIP member: + contrib/{history_idx}.bin + +Binary layout of each member: + num_entries : varint + For each entry (sorted by bin_index, delta-encoded): + delta_bin_index : varint (bin_index - previous_bin_index) + count : varint + +Delta encoding exploits spatial locality; entries must be in ascending +bin_index order. The file is absent when the history node has no +contributions. + +The ContribWriter produces a dict {member_name: bytes} for the ZIP writer. +The ContribReader reads all contrib/* members from the ZIP and calls +db.record_test_association() to populate db._per_test_data. +""" + +import io + +from .constants import MEMBER_CONTRIB_DIR +from .varint import encode_varint, decode_varint + + +class ContribWriter: + """Serialize per-test contributions from MemUCIS._per_test_data.""" + + def serialize(self, db) -> dict: + """Return a dict of {member_name: bytes} for all history nodes with contributions. + + Returns an empty dict when no per-test data is present. + """ + per_test = getattr(db, '_per_test_data', {}) + members = {} + for hist_idx, bin_counts in per_test.items(): + if not bin_counts: + continue + sorted_entries = sorted(bin_counts.items()) # ascending bin_index + buf = io.BytesIO() + buf.write(encode_varint(len(sorted_entries))) + prev_bin = 0 + for bin_idx, count in sorted_entries: + buf.write(encode_varint(bin_idx - prev_bin)) + buf.write(encode_varint(count)) + prev_bin = bin_idx + members[f"{MEMBER_CONTRIB_DIR}{hist_idx}.bin"] = buf.getvalue() + return members + + +class ContribReader: + """Deserialize per-test contributions and populate MemUCIS._per_test_data.""" + + def apply(self, db, contrib_members: dict) -> None: + """Read all contrib/*.bin members and call db.record_test_association(). + + Args: + db: MemUCIS (or NcdbUCIS) instance to populate. + contrib_members: Dict mapping member name → bytes for all + contrib/* entries extracted from the ZIP. + """ + if not contrib_members: + return + + for member_name, data in contrib_members.items(): + if not member_name.startswith(MEMBER_CONTRIB_DIR): + continue + # Parse history_idx from filename: "contrib/{idx}.bin" + basename = member_name[len(MEMBER_CONTRIB_DIR):] + try: + hist_idx = int(basename.rstrip(".bin").split(".")[0]) + except ValueError: + continue # skip malformed names + + buf = data + offset = 0 + num_entries, offset = decode_varint(buf, offset) + prev_bin = 0 + for _ in range(num_entries): + delta, offset = decode_varint(buf, offset) + count, offset = decode_varint(buf, offset) + bin_idx = prev_bin + delta + db.record_test_association(hist_idx, bin_idx, count) + prev_bin = bin_idx diff --git a/src/ucis/ncdb/counts.py b/src/ucis/ncdb/counts.py new file mode 100644 index 0000000..fbd6564 --- /dev/null +++ b/src/ucis/ncdb/counts.py @@ -0,0 +1,84 @@ +""" +counts.bin — packed hit-count array serialization. + +Two encoding modes: + Mode 0 (COUNTS_MODE_UINT32): fixed 4-byte little-endian per count. + Mode 1 (COUNTS_MODE_VARINT): LEB128 varint per count. + +Binary layout: + [mode: 1 byte] + [count: varint] + [data: mode-dependent encoding of *count* integers] + +The writer chooses varint mode when the varint-encoded size is smaller +than the fixed uint32 size (typically true when most counts are 0 or small). +""" + +import array +import io + +from .varint import encode_varint, decode_varint, encode_varints, decode_varints +from .constants import COUNTS_MODE_UINT32, COUNTS_MODE_VARINT + + +class CountsWriter: + """Serialize a list of hit counts to counts.bin bytes.""" + + def serialize(self, counts: list) -> bytes: + """Choose the best encoding and return the serialized bytes.""" + n = len(counts) + # Estimate varint size + varint_bytes = encode_varints(counts) + fixed_bytes = n * 4 + + # Force varint mode if any count exceeds uint32 max (4294967295) + has_large = any(c > 0xFFFFFFFF for c in counts) + + if has_large or len(varint_bytes) < fixed_bytes: + mode = COUNTS_MODE_VARINT + else: + mode = COUNTS_MODE_UINT32 + + buf = io.BytesIO() + buf.write(bytes([mode])) + buf.write(encode_varint(n)) + + if mode == COUNTS_MODE_VARINT: + buf.write(varint_bytes) + else: + arr = array.array("I", counts) # unsigned int (4 bytes) + if arr.itemsize != 4: # guard against exotic platforms + for c in counts: + buf.write(c.to_bytes(4, "little")) + else: + arr.byteswap() if __import__("sys").byteorder == "big" else None + buf.write(arr.tobytes()) + + return buf.getvalue() + + +class CountsReader: + """Deserialize hit counts from counts.bin bytes.""" + + def deserialize(self, data: bytes) -> list: + """Return a list of integers decoded from *data*.""" + if not data: + return [] + offset = 0 + mode = data[offset] + offset += 1 + count, offset = decode_varint(data, offset) + + if mode == COUNTS_MODE_VARINT: + # Fast path: if all values fit in one byte (0–127), each byte IS + # the value (high bit clear). Covers the common small-count case. + payload = data[offset:offset + count] + if len(payload) == count and all(b < 0x80 for b in payload): + return list(payload) + # General path — use bulk decode (C-accelerated when available) + values, _ = decode_varints(data, count, offset) + return values + else: # UINT32 + import struct + values = list(struct.unpack_from(f"<{count}I", data, offset)) + return values diff --git a/src/ucis/ncdb/cross.py b/src/ucis/ncdb/cross.py new file mode 100644 index 0000000..b3491cb --- /dev/null +++ b/src/ucis/ncdb/cross.py @@ -0,0 +1,94 @@ +""" +cross.json — CROSS scope coverpoint-link serialization. + +A UCIS CROSS scope records which coverpoints it crosses. The coverpoint +references (by object) are lost after scope-tree deserialization because +scope_tree.bin only stores names and structure. This module persists the +links and restores them by navigating the reconstructed scope tree by path. + +Format: + {"version": 1, "entries": [ + {"idx": , "crossed": ["", ...]}, + ... + ]} + +*idx* is the DFS index of the CROSS scope. *crossed* contains the +getScopeName() value of each crossed coverpoint (they must be siblings of +the CROSS scope inside the same COVERINSTANCE or COVERGROUP). +""" + +import json + +from ucis.scope_type_t import ScopeTypeT + +from .dfs_util import dfs_scope_list + +_VERSION = 1 + + +class CrossWriter: + """Serialize CROSS scope coverpoint links to cross.json bytes.""" + + def serialize(self, db) -> bytes: + entries = [] + for idx, scope in enumerate(dfs_scope_list(db)): + if scope.getScopeType() != ScopeTypeT.CROSS: + continue + n = 0 + try: + n = scope.getNumCrossedCoverpoints() + except Exception: + pass + if n == 0: + continue + crossed = [scope.getIthCrossedCoverpoint(i).getScopeName() + for i in range(n)] + entries.append({"idx": idx, "crossed": crossed}) + + if not entries: + return b"" + payload = {"version": _VERSION, "entries": entries} + return json.dumps(payload, separators=(',', ':')).encode() + + +class CrossReader: + """Rebuild CROSS scope coverpoint links from cross.json bytes.""" + + def apply(self, db, data: bytes) -> None: + if not data: + return + payload = json.loads(data.decode()) + if payload.get("version") != _VERSION: + raise ValueError( + f"Unsupported cross.json version: {payload.get('version')}") + entries = payload.get("entries", []) + if not entries: + return + + scopes = dfs_scope_list(db) + for entry in entries: + idx = entry["idx"] + if idx >= len(scopes): + continue + cross_scope = scopes[idx] + if cross_scope.getScopeType() != ScopeTypeT.CROSS: + continue + + crossed_names = entry.get("crossed", []) + if not crossed_names: + continue + + # Find the parent container and resolve siblings by name + parent = cross_scope.m_parent if hasattr(cross_scope, 'm_parent') else None + if parent is None: + continue + + # Build name → scope map from siblings + sibling_map = {} + for sib in parent.scopes(ScopeTypeT.ALL): + sibling_map[sib.getScopeName()] = sib + + resolved = [sibling_map[name] for name in crossed_names + if name in sibling_map] + if hasattr(cross_scope, 'coverpoints'): + cross_scope.coverpoints = resolved diff --git a/src/ucis/ncdb/db_format_if_ncdb.py b/src/ucis/ncdb/db_format_if_ncdb.py new file mode 100644 index 0000000..7ca512c --- /dev/null +++ b/src/ucis/ncdb/db_format_if_ncdb.py @@ -0,0 +1,69 @@ +""" +DbFormatIfNcdb — UCIS format registry interface for the NCDB backend. + +Registers the 'ncdb' format so that existing pyucis CLI commands and +utilities can use NCDB files via -if ncdb / -of ncdb. +""" + +from ucis.rgy.format_if_db import FormatIfDb, FormatDescDb, FormatDbFlags, FormatCapabilities +from ucis.ucis import UCIS + + +class DbFormatIfNcdb(FormatIfDb): + """NCDB ZIP-based format interface.""" + + def __init__(self): + self.options = {} + + def init(self, options): + self.options = options or {} + + def create(self, filename=None) -> UCIS: + """Create a new NCDB database. + + Returns an in-memory MemUCIS that can later be written to disk + via write(). If *filename* is given, the file is created on close(). + """ + from ucis.mem.mem_ucis import MemUCIS + db = MemUCIS() + db._ncdb_filename = filename # stash for write() + return db + + def read(self, file_or_filename) -> UCIS: + """Read an NCDB .cdb file into a MemUCIS.""" + from .ncdb_reader import NcdbReader + if isinstance(file_or_filename, str): + path = file_or_filename + elif hasattr(file_or_filename, 'name'): + path = file_or_filename.name + else: + raise ValueError("NCDB format requires a file path") + return NcdbReader().read(path) + + def write(self, db: UCIS, file_or_filename) -> None: + """Write *db* to an NCDB .cdb file.""" + from .ncdb_writer import NcdbWriter + if isinstance(file_or_filename, str): + path = file_or_filename + elif hasattr(file_or_filename, 'name'): + path = file_or_filename.name + else: + raise ValueError("NCDB format requires a file path") + NcdbWriter().write(db, path) + + @classmethod + def register(cls, rgy): + """Register NCDB format with the format registry.""" + rgy.addDatabaseFormat(FormatDescDb( + fmt_if=cls, + name='ncdb', + flags=FormatDbFlags.Create | FormatDbFlags.Read | FormatDbFlags.Write, + description='NCDB — Native Coverage Database (ZIP-based binary format)', + capabilities=FormatCapabilities( + can_read=True, can_write=True, + functional_coverage=True, cross_coverage=True, + ignore_illegal_bins=True, code_coverage=True, + toggle_coverage=True, fsm_coverage=True, + assertions=True, history_nodes=True, + design_hierarchy=True, lossless=True, + ))) diff --git a/src/ucis/ncdb/design_units.py b/src/ucis/ncdb/design_units.py new file mode 100644 index 0000000..fbcec1d --- /dev/null +++ b/src/ucis/ncdb/design_units.py @@ -0,0 +1,69 @@ +""" +design_units.json — design unit (DU) name-to-scope index. + +Provides a fast lookup table mapping DU scope names to their DFS indices so +tools can locate design units without scanning the full scope tree. + +Format: + {"version": 1, "units": [ + {"name": "", "idx": , "type": }, + ... + ]} + +Only DU_ANY scopes are included. *idx* is the DFS index from dfs_scope_list(). +""" + +import json + +from ucis.scope_type_t import ScopeTypeT + +from .dfs_util import dfs_scope_list + +_VERSION = 1 + + +class DesignUnitsWriter: + """Serialize DU scope index to design_units.json bytes.""" + + def serialize(self, db) -> bytes: + units = [] + for idx, scope in enumerate(dfs_scope_list(db)): + scope_type = scope.getScopeType() + if ScopeTypeT.DU_ANY(scope_type): + units.append({ + "name": scope.getScopeName(), + "idx": idx, + "type": int(scope_type), + }) + if not units: + return b"" + payload = {"version": _VERSION, "units": units} + return json.dumps(payload, separators=(',', ':')).encode() + + +class DesignUnitsReader: + """Deserialize design_units.json and build a name → scope lookup.""" + + def build_index(self, data: bytes, db) -> dict: + """Return a {name: scope} dict from design_units.json *data*. + + Falls back to scanning dfs_scope_list() when *data* is empty so the + method always returns a usable index. + """ + if data: + payload = json.loads(data.decode()) + if payload.get("version") == _VERSION: + scopes = dfs_scope_list(db) + index = {} + for entry in payload.get("units", []): + idx = entry["idx"] + if idx < len(scopes): + index[entry["name"]] = scopes[idx] + return index + + # Fallback: linear scan (used when design_units.json absent) + index = {} + for scope in dfs_scope_list(db): + if ScopeTypeT.DU_ANY(scope.getScopeType()): + index[scope.getScopeName()] = scope + return index diff --git a/src/ucis/ncdb/dfs_util.py b/src/ucis/ncdb/dfs_util.py new file mode 100644 index 0000000..92969fa --- /dev/null +++ b/src/ucis/ncdb/dfs_util.py @@ -0,0 +1,46 @@ +""" +DFS traversal utility — shared between attrs/tags/properties serializers. + +Produces a flat list of (scope, dfs_index) pairs in the same DFS order +that scope_tree.py uses for encoding, so that index-based serializers +(attrs.json, tags.json, properties.bin) can map directly to scope_tree +offsets without re-reading the binary. +""" + +from ucis.scope_type_t import ScopeTypeT +from ucis.cover_type_t import CoverTypeT + +from .constants import TOGGLE_BIN_0_TO_1, TOGGLE_BIN_1_TO_0 + + +def _is_toggle_pair(scope) -> bool: + """Match the toggle-pair detection logic in scope_tree.py.""" + if scope.getScopeType() != ScopeTypeT.BRANCH: + return False + cover_items = list(scope.coverItems(CoverTypeT.ALL)) + if len(cover_items) != 2: + return False + if list(scope.scopes(ScopeTypeT.ALL)): + return False + names = {ci.getName() for ci in cover_items} + return names == {TOGGLE_BIN_0_TO_1, TOGGLE_BIN_1_TO_0} + + +def dfs_scope_list(db) -> list: + """Return all scopes in DFS order (matches scope_tree.bin encoding). + + Toggle-pair BRANCH scopes are included (they appear in scope_tree.bin + as TOGGLE_PAIR records but still occupy one DFS slot each). + """ + result = [] + + def _visit(scope): + result.append(scope) + if not _is_toggle_pair(scope): + for child in scope.scopes(ScopeTypeT.ALL): + _visit(child) + + for top_scope in db.scopes(ScopeTypeT.ALL): + _visit(top_scope) + + return result diff --git a/src/ucis/ncdb/formal.py b/src/ucis/ncdb/formal.py new file mode 100644 index 0000000..af5c8c5 --- /dev/null +++ b/src/ucis/ncdb/formal.py @@ -0,0 +1,81 @@ +""" +formal.bin — formal verification data serialization. + +Stores per-coveritem formal results (status, proof radius, witness file path) +for assertion scopes. Only non-default entries are serialized (sparse). + +Format (JSON, gzip-compressed): + {"version": 1, "entries": [ + {"idx": , "status": , "radius": , "witness": ""}, + ... + ]} + +Default values (omitted from JSON): + status = 0 (FormalStatusT.NONE) + radius = 0 + witness = null (absent) + +The bin_index is the flat DFS coveritem index — same ordering as counts.bin. +""" + +import json + +from .constants import MEMBER_FORMAL + +_VERSION = 1 +_DEFAULT_STATUS = 0 # FormalStatusT.NONE +_DEFAULT_RADIUS = 0 + + +class FormalWriter: + """Serialize formal data from MemUCIS._formal_data to formal.bin bytes.""" + + def serialize(self, db) -> bytes: + """Return JSON bytes for all non-default formal entries. + + Returns empty bytes when no formal data is present. + """ + formal_data = getattr(db, '_formal_data', {}) + entries = [] + for bin_index, fd in sorted(formal_data.items()): + status = fd.get('status', _DEFAULT_STATUS) + radius = fd.get('radius', _DEFAULT_RADIUS) + witness = fd.get('witness', None) + # Skip fully-default entries + if status == _DEFAULT_STATUS and radius == _DEFAULT_RADIUS and witness is None: + continue + entry = {"idx": bin_index} + if status != _DEFAULT_STATUS: + entry["status"] = status + if radius != _DEFAULT_RADIUS: + entry["radius"] = radius + if witness is not None: + entry["witness"] = witness + entries.append(entry) + if not entries: + return b"" + payload = {"version": _VERSION, "entries": entries} + return json.dumps(payload, separators=(',', ':')).encode() + + +class FormalReader: + """Deserialize formal.bin and populate MemUCIS._formal_data.""" + + def apply(self, db, data: bytes) -> None: + """Parse *data* (formal.bin bytes) and call db.set_formal_data(). + + No-op when *data* is empty. + """ + if not data: + return + payload = json.loads(data.decode()) + if payload.get("version") != _VERSION: + return + for entry in payload.get("entries", []): + bin_index = entry["idx"] + db.set_formal_data( + bin_index, + status = entry.get("status"), + radius = entry.get("radius"), + witness = entry.get("witness"), + ) diff --git a/src/ucis/ncdb/format_detect.py b/src/ucis/ncdb/format_detect.py new file mode 100644 index 0000000..3350876 --- /dev/null +++ b/src/ucis/ncdb/format_detect.py @@ -0,0 +1,42 @@ +""" +format_detect.py — CDB file format discrimination. + +Detects whether a .cdb file is a SQLite database or an NCDB ZIP archive. +""" + +import zipfile +import json + +from .constants import SQLITE_MAGIC, ZIP_SIGNATURES, NCDB_FORMAT, MEMBER_MANIFEST + + +def detect_cdb_format(path: str) -> str: + """Return 'sqlite', 'ncdb', or 'unknown' for the file at *path*. + + Detection algorithm: + 1. Read first 16 bytes. + 2. If bytes[0:16] == SQLITE_MAGIC → 'sqlite' + 3. If bytes[0:4] in ZIP_SIGNATURES: + open as ZIP, look for manifest.json with "format": "NCDB" → 'ncdb' + 4. Otherwise → 'unknown' + """ + try: + with open(path, "rb") as f: + header = f.read(16) + except OSError: + return "unknown" + + if len(header) >= 16 and header[:16] == SQLITE_MAGIC: + return "sqlite" + + if len(header) >= 4 and header[:4] in ZIP_SIGNATURES: + try: + with zipfile.ZipFile(path, "r") as zf: + with zf.open(MEMBER_MANIFEST) as mf: + manifest = json.load(mf) + if manifest.get("format") == NCDB_FORMAT: + return "ncdb" + except (zipfile.BadZipFile, KeyError, json.JSONDecodeError, Exception): + pass + + return "unknown" diff --git a/src/ucis/ncdb/fsm.py b/src/ucis/ncdb/fsm.py new file mode 100644 index 0000000..6f9d5a8 --- /dev/null +++ b/src/ucis/ncdb/fsm.py @@ -0,0 +1,144 @@ +""" +fsm.json — FSM scope metadata serialization. + +The FSM state/transition names and hit counts are already encoded in +scope_tree.bin as FSMBIN cover items inside FSM_STATES / FSM_TRANS sub-scopes. +What is NOT encoded there: + + 1. The numeric state-value/index for each state + (UCIS_INTPROP_FSM_STATEVAL — maps state names to RTL enumeration values). + 2. The MemFSMScope._states and ._transitions dicts are empty after + deserialisation because the scope-tree reader only creates cover items. + +This module handles both: + + * FsmWriter — serializes non-sequential state indices to fsm.json (sparse; + absent when all indices match the default 0, 1, 2, … sequence). + * FsmReader — rebuilds _states/_transitions from the existing cover items + in FSM_STATES/FSM_TRANS sub-scopes, then applies any stored index overrides. + +Format (sparse): + {"version": 1, "entries": [ + {"fsm_idx": , "states": [{"name": "", "index": }, ...]}, + ... + ]} + +Only FSM scopes whose state indices differ from 0, 1, 2, … are included. +DFS index (fsm_idx) corresponds to dfs_scope_list() order. +""" + +import json + +from ucis.scope_type_t import ScopeTypeT +from ucis.cover_type_t import CoverTypeT + +from .dfs_util import dfs_scope_list + +_VERSION = 1 + + +def _fsm_scopes(db): + """Yield (dfs_idx, scope) for every FSM scope in DFS order.""" + for idx, scope in enumerate(dfs_scope_list(db)): + if scope.getScopeType() == ScopeTypeT.FSM: + yield idx, scope + + +class FsmWriter: + """Serialize FSM state-index overrides to fsm.json bytes. + + Returns empty bytes when all state indices follow the default + 0, 1, 2, … sequence (the common case). + """ + + def serialize(self, db) -> bytes: + entries = [] + for fsm_idx, scope in _fsm_scopes(db): + # Collect state indices from the _states dict (MemFSMScope). + states_dict = getattr(scope, '_states', None) + if not states_dict: + continue + state_entries = [] + for i, (name, state) in enumerate(states_dict.items()): + idx = getattr(state, 'index', i) + if idx != i: # non-sequential → store + state_entries.append({"name": name, "index": idx}) + if state_entries: + entries.append({"fsm_idx": fsm_idx, "states": state_entries}) + + if not entries: + return b"" + payload = {"version": _VERSION, "entries": entries} + return json.dumps(payload, separators=(',', ':')).encode() + + +class FsmReader: + """Rebuild MemFSMScope._states/_transitions and apply stored index overrides. + + Called after the scope tree has been fully deserialized. Requires that + FSM_STATES and FSM_TRANS sub-scopes already hold the correct FSMBIN cover + items (guaranteed by scope_tree.py). + """ + + def apply(self, db, data: bytes) -> None: + # Build index override map from stored JSON (may be empty) + index_overrides: dict = {} # fsm_idx -> {state_name -> index} + if data: + payload = json.loads(data.decode()) + if payload.get("version") != _VERSION: + raise ValueError( + f"Unsupported fsm.json version: {payload.get('version')}") + for entry in payload.get("entries", []): + idx_map = {s["name"]: s["index"] + for s in entry.get("states", [])} + index_overrides[entry["fsm_idx"]] = idx_map + + # Rebuild _states / _transitions for every MemFSMScope + for fsm_idx, scope in _fsm_scopes(db): + if not (hasattr(scope, '_states') and hasattr(scope, '_transitions')): + continue + self._rebuild(scope, index_overrides.get(fsm_idx, {})) + + def _rebuild(self, fsm_scope, index_overrides: dict) -> None: + """Populate _states/_transitions from existing cover items.""" + from ucis.mem.mem_fsm_scope import MemFSMState, MemFSMTransition + + # Locate the mandatory sub-scopes + states_scope = getattr(fsm_scope, '_states_scope', None) + trans_scope = getattr(fsm_scope, '_trans_scope', None) + + if states_scope is None or trans_scope is None: + # Fall back: find by scope type + for child in fsm_scope.scopes(ScopeTypeT.ALL): + if child.getScopeType() == ScopeTypeT.FSM_STATES: + states_scope = child + elif child.getScopeType() == ScopeTypeT.FSM_TRANS: + trans_scope = child + + if states_scope is None: + return + + # Rebuild _states from FSM_STATES cover items + fsm_scope._states = {} + for i, ci in enumerate(states_scope.coverItems(CoverTypeT.ALL)): + name = ci.getName() + override_idx = index_overrides.get(name, i) + state = MemFSMState(name, override_idx) + state.visit_count = ci.getCoverData().data + fsm_scope._states[name] = state + + # Rebuild _transitions from FSM_TRANS cover items + fsm_scope._transitions = {} + if trans_scope is not None: + for ci in trans_scope.coverItems(CoverTypeT.ALL): + name = ci.getName() + if "->" not in name: + continue + from_name, to_name = name.split("->", 1) + from_state = fsm_scope._states.get(from_name) + to_state = fsm_scope._states.get(to_name) + if from_state is None or to_state is None: + continue + trans = MemFSMTransition(from_state, to_state) + trans.count = ci.getCoverData().data + fsm_scope._transitions[(from_name, to_name)] = trans diff --git a/src/ucis/ncdb/history.py b/src/ucis/ncdb/history.py new file mode 100644 index 0000000..ba6b99d --- /dev/null +++ b/src/ucis/ncdb/history.py @@ -0,0 +1,129 @@ +""" +history.json — test and merge history serialization. + +JSON array of history node records. Each record encodes the fields +available via the MemHistoryNode API. +""" + +import json +from ucis.mem.mem_history_node import MemHistoryNode +from ucis.history_node_kind import HistoryNodeKind +from ucis.test_status_t import TestStatusT + + +def _kind_to_str(kind) -> str: + if kind is None: + return "TEST" + if isinstance(kind, HistoryNodeKind): + return kind.name + # SQLite backend may return bare int + try: + return HistoryNodeKind(int(kind)).name + except (ValueError, TypeError): + return "TEST" + + +def _kind_from_str(s: str) -> HistoryNodeKind: + try: + return HistoryNodeKind[s] + except KeyError: + return HistoryNodeKind.TEST + + +def _status_to_int(status) -> int: + if status is None: + return int(TestStatusT.OK) + return int(status) + + +def _status_from_int(v: int): + try: + return TestStatusT(v) + except Exception: + return TestStatusT.OK + + +class HistoryWriter: + """Serialize UCIS history nodes to a JSON bytes object.""" + + def serialize(self, history_nodes: list) -> bytes: + records = [] + for node in history_nodes: + rec = { + "logical_name": node.getLogicalName(), + "physical_name": node.getPhysicalName(), + "kind": _kind_to_str(node.getKind()), + "test_status": _status_to_int(node.getTestStatus()), + "sim_time": node.getSimTime(), + "time_unit": node.getTimeUnit(), + "run_cwd": node.getRunCwd(), + "cpu_time": node.getCpuTime(), + "seed": node.getSeed(), + "cmd": node.getCmd(), + "args": node.getArgs(), + "compulsory": node.getCompulsory(), + "date": node.getDate(), + "user_name": node.getUserName(), + "cost": node.getCost(), + "tool_category": node.getToolCategory(), + "ucis_version": node.getUCISVersion(), + "vendor_id": node.getVendorId(), + "vendor_tool": node.getVendorTool(), + "vendor_tool_version": node.getVendorToolVersion(), + "same_tests": node.getSameTests(), + "comment": node.getComment(), + } + records.append(rec) + return json.dumps(records, indent=2).encode("utf-8") + + +class HistoryReader: + """Deserialize history nodes from history.json bytes.""" + + def deserialize(self, data: bytes) -> list: + records = json.loads(data.decode("utf-8")) + nodes = [] + for rec in records: + node = MemHistoryNode( + parent=None, + logicalname=rec.get("logical_name", ""), + physicalname=rec.get("physical_name"), + kind=_kind_from_str(rec.get("kind", "TEST")), + ) + node.setTestStatus(_status_from_int(rec.get("test_status", 0))) + if rec.get("sim_time") is not None: + node.setSimTime(rec["sim_time"]) + if rec.get("time_unit") is not None: + node.setTimeUnit(rec["time_unit"]) + if rec.get("run_cwd") is not None: + node.setRunCwd(rec["run_cwd"]) + if rec.get("cpu_time") is not None: + node.setCpuTime(rec["cpu_time"]) + if rec.get("seed") is not None: + node.setSeed(rec["seed"]) + if rec.get("cmd") is not None: + node.setCmd(rec["cmd"]) + if rec.get("args") is not None: + node.setArgs(rec["args"]) + if rec.get("compulsory") is not None: + node.setCompulsory(rec["compulsory"]) + if rec.get("date") is not None: + node.setDate(rec["date"]) + if rec.get("user_name") is not None: + node.setUserName(rec["user_name"]) + if rec.get("cost") is not None: + node.setCost(rec["cost"]) + if rec.get("tool_category") is not None: + node.setToolCategory(rec["tool_category"]) + if rec.get("vendor_id") is not None: + node.setVendorId(rec["vendor_id"]) + if rec.get("vendor_tool") is not None: + node.setVendorTool(rec["vendor_tool"]) + if rec.get("vendor_tool_version") is not None: + node.setVendorToolVersion(rec["vendor_tool_version"]) + if rec.get("same_tests") is not None: + node.setSameTests(rec["same_tests"]) + if rec.get("comment") is not None: + node.setComment(rec["comment"]) + nodes.append(node) + return nodes diff --git a/src/ucis/ncdb/manifest.py b/src/ucis/ncdb/manifest.py new file mode 100644 index 0000000..67065c9 --- /dev/null +++ b/src/ucis/ncdb/manifest.py @@ -0,0 +1,77 @@ +""" +manifest.json — NCDB archive manifest. + +Stores format identity, version, statistics, and the schema hash that +enables the same-schema fast-merge path. +""" + +import hashlib +import json +from dataclasses import dataclass, field, asdict +from datetime import datetime, timezone +from typing import Optional + +from .constants import NCDB_FORMAT, NCDB_VERSION, NCDB_GENERATOR + + +@dataclass +class Manifest: + format: str = NCDB_FORMAT + version: str = NCDB_VERSION + ucis_version: str = "1.0" + created: str = "" + path_separator: str = "/" + scope_count: int = 0 + coveritem_count:int = 0 + test_count: int = 0 + total_hits: int = 0 + covered_bins: int = 0 + schema_hash: str = "" + generator: str = NCDB_GENERATOR + + def serialize(self) -> bytes: + d = asdict(self) + return json.dumps(d, indent=2).encode("utf-8") + + @classmethod + def from_bytes(cls, data: bytes) -> "Manifest": + d = json.loads(data.decode("utf-8")) + m = cls() + for k, v in d.items(): + if hasattr(m, k): + setattr(m, k, v) + return m + + @staticmethod + def compute_schema_hash(scope_tree_bytes: bytes) -> str: + """SHA-256 of the *uncompressed* scope_tree.bin content.""" + digest = hashlib.sha256(scope_tree_bytes).hexdigest() + return f"sha256:{digest}" + + @classmethod + def build(cls, db, scope_tree_bytes: bytes, + counts: list, history_nodes: list) -> "Manifest": + """Build a Manifest from a UCIS database and serialized members.""" + from ucis.scope_type_t import ScopeTypeT + from ucis.cover_type_t import CoverTypeT + + total_hits = sum(counts) + covered_bins = sum(1 for c in counts if c > 0) + + # Count history TEST nodes + from ucis.history_node_kind import HistoryNodeKind + test_count = sum( + 1 for n in history_nodes + if n.getKind() == HistoryNodeKind.TEST + ) + + return cls( + created=datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ"), + path_separator=db.getPathSeparator() + if hasattr(db, 'getPathSeparator') else "/", + coveritem_count=len(counts), + test_count=test_count, + total_hits=total_hits, + covered_bins=covered_bins, + schema_hash=cls.compute_schema_hash(scope_tree_bytes), + ) diff --git a/src/ucis/ncdb/ncdb_merger.py b/src/ucis/ncdb/ncdb_merger.py new file mode 100644 index 0000000..a322a17 --- /dev/null +++ b/src/ucis/ncdb/ncdb_merger.py @@ -0,0 +1,192 @@ +""" +NcdbMerger — merge one or more NCDB .cdb files into a target .cdb file. + +Two merge paths: + + Same-schema fast merge (all sources share the same schema_hash): + Counts arrays are added element-wise. The scope tree and string + table from the first source are reused verbatim. This is O(bins) + and requires no scope-tree parsing beyond reading the manifest. + + Cross-schema merge (schemas differ): + Each source is loaded into memory via NcdbReader → MemUCIS. + The existing generic DbMerger handles the structural union. + The result is written as a new NCDB file via NcdbWriter. + +History nodes from all sources are accumulated in the output. A new +MERGE HistoryNode is appended to record the operation. +""" + +import zipfile +import json +import struct +from datetime import datetime, timezone +from typing import List + +from .ncdb_reader import NcdbReader +from .ncdb_writer import NcdbWriter +from .manifest import Manifest +from .counts import CountsReader, CountsWriter +from .history import HistoryWriter, HistoryReader +from .constants import ( + MEMBER_MANIFEST, MEMBER_STRINGS, MEMBER_SCOPE_TREE, + MEMBER_COUNTS, MEMBER_HISTORY, MEMBER_SOURCES, +) +from ucis.ncdb._accel import add_uint32_arrays as _add_arrays, HAS_ACCEL as _HAS_ACCEL + +from ucis.history_node_kind import HistoryNodeKind +from ucis.mem.mem_history_node import MemHistoryNode + + +class NcdbMerger: + """Merge N NCDB source files into a single NCDB target file.""" + + def merge(self, sources: List[str], target: str) -> None: + """Merge *sources* into *target*. + + Args: + sources: List of input .cdb (NCDB) file paths. + target: Output .cdb file path (will be overwritten). + """ + if not sources: + raise ValueError("No source files provided") + + # Read manifests to determine merge path + manifests = [self._read_manifest(s) for s in sources] + hashes = [m.schema_hash for m in manifests] + + all_same = len(set(hashes)) == 1 + + if all_same: + self._merge_same_schema(sources, manifests, target) + else: + self._merge_cross_schema(sources, target) + + # ── Same-schema fast path ───────────────────────────────────────────── + + def _merge_same_schema(self, sources, manifests, target): + """Element-wise counts addition; reuse scope tree from first source.""" + # Load counts from all sources and add them + all_counts = [self._read_counts(s) for s in sources] + n = len(all_counts[0]) + for counts in all_counts: + if len(counts) != n: + raise ValueError( + f"Count array length mismatch: expected {n}, got {len(counts)}") + merged_counts = list(map(sum, zip(*all_counts))) + if _HAS_ACCEL and len(all_counts) == 2: + # For two-source merges use the C element-wise adder + merged_counts = _add_arrays(all_counts[0], all_counts[1]) + + # Gather all history nodes from all sources + all_history = [] + for s in sources: + all_history.extend(self._read_history(s)) + + # Add a MERGE history node + merge_node = self._make_merge_node(target, sources) + all_history.append(merge_node) + + # Build new manifest using first source's schema data + first_manifest = manifests[0] + new_manifest = Manifest( + format=first_manifest.format, + version=first_manifest.version, + ucis_version=first_manifest.ucis_version, + created=datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ"), + path_separator=first_manifest.path_separator, + scope_count=first_manifest.scope_count, + coveritem_count=n, + test_count=sum(1 for h in all_history + if h.getKind() == HistoryNodeKind.TEST), + total_hits=sum(merged_counts), + covered_bins=sum(1 for c in merged_counts if c > 0), + schema_hash=first_manifest.schema_hash, + generator=first_manifest.generator, + ) + + # Read schema members verbatim from first source + with zipfile.ZipFile(sources[0], "r") as zf: + strings_bytes = zf.read(MEMBER_STRINGS) + scope_tree_bytes = zf.read(MEMBER_SCOPE_TREE) + sources_bytes = zf.read(MEMBER_SOURCES) + + counts_bytes = CountsWriter().serialize(merged_counts) + history_bytes = HistoryWriter().serialize(all_history) + + with zipfile.ZipFile(target, "w", compression=zipfile.ZIP_DEFLATED) as zf: + zf.writestr(MEMBER_MANIFEST, new_manifest.serialize()) + zf.writestr(MEMBER_STRINGS, strings_bytes) + zf.writestr(MEMBER_SCOPE_TREE, scope_tree_bytes) + zf.writestr(MEMBER_COUNTS, counts_bytes) + zf.writestr(MEMBER_HISTORY, history_bytes) + zf.writestr(MEMBER_SOURCES, sources_bytes) + + # ── Cross-schema fallback ───────────────────────────────────────────── + + def _merge_cross_schema(self, sources, target): + """Load all sources into MemUCIS and use generic DbMerger.""" + from ucis.merge.db_merger import DbMerger + from ucis.mem.mem_ucis import MemUCIS + + dbs = [NcdbReader().read(s) for s in sources] + out_db = MemUCIS() + + # Collect all history from sources for the output + all_history_nodes = [] + for db in dbs: + try: + all_history_nodes.extend( + list(db.historyNodes(HistoryNodeKind.TEST)) + + list(db.historyNodes(HistoryNodeKind.MERGE)) + ) + except Exception: + all_history_nodes.extend(list(db.historyNodes(HistoryNodeKind.TEST))) + + DbMerger().merge(out_db, dbs) + + # Re-add history nodes to out_db + for node in all_history_nodes: + hn = out_db.createHistoryNode( + None, node.getLogicalName(), node.getPhysicalName(), node.getKind()) + hn.setTestStatus(node.getTestStatus()) + + # Add MERGE node + merge_node = self._make_merge_node(target, sources) + out_db.createHistoryNode( + None, + merge_node.getLogicalName(), + merge_node.getPhysicalName(), + merge_node.getKind(), + ) + + NcdbWriter().write(out_db, target) + + for db in dbs: + db.close() + + # ── Helpers ─────────────────────────────────────────────────────────── + + def _read_manifest(self, path: str) -> Manifest: + with zipfile.ZipFile(path, "r") as zf: + return Manifest.from_bytes(zf.read(MEMBER_MANIFEST)) + + def _read_counts(self, path: str) -> list: + with zipfile.ZipFile(path, "r") as zf: + return CountsReader().deserialize(zf.read(MEMBER_COUNTS)) + + def _read_history(self, path: str) -> list: + with zipfile.ZipFile(path, "r") as zf: + return HistoryReader().deserialize(zf.read(MEMBER_HISTORY)) + + def _make_merge_node(self, target: str, sources: List[str]) -> MemHistoryNode: + node = MemHistoryNode( + parent=None, + logicalname=target, + physicalname=target, + kind=HistoryNodeKind.MERGE, + ) + node.setDate(int(datetime.now(timezone.utc).timestamp())) + node.setToolCategory("ncdb-merger") + node.setComment(f"Merged from: {', '.join(sources)}") + return node diff --git a/src/ucis/ncdb/ncdb_reader.py b/src/ucis/ncdb/ncdb_reader.py new file mode 100644 index 0000000..83d8873 --- /dev/null +++ b/src/ucis/ncdb/ncdb_reader.py @@ -0,0 +1,194 @@ +""" +NcdbReader — deserialize an NCDB .cdb ZIP file into a MemUCIS model. +""" + +import zipfile +import json + +from .string_table import StringTable +from .scope_tree import ScopeTreeReader +from .counts import CountsReader +from .history import HistoryReader +from .sources import SourcesReader +from .attrs import AttrsReader +from .tags import TagsReader +from .properties import PropertiesReader +from .toggle import ToggleReader +from .fsm import FsmReader +from .cross import CrossReader +from .contrib import ContribReader +from .formal import FormalReader +from .design_units import DesignUnitsReader +from .manifest import Manifest +from .constants import ( + MEMBER_MANIFEST, MEMBER_STRINGS, MEMBER_SCOPE_TREE, + MEMBER_COUNTS, MEMBER_HISTORY, MEMBER_SOURCES, + MEMBER_ATTRS, MEMBER_TAGS, MEMBER_PROPERTIES, MEMBER_TOGGLE, MEMBER_FSM, + MEMBER_CROSS, MEMBER_DESIGN_UNITS, MEMBER_CONTRIB_DIR, MEMBER_FORMAL, + NCDB_FORMAT, +) + +from ucis.mem.mem_ucis import MemUCIS +from ucis.history_node_kind import HistoryNodeKind +from ucis.scope_type_t import ScopeTypeT + + +def _fixup_instance_du_links(db: MemUCIS) -> None: + """Link INSTANCE scope DU references to real DU scopes in the same parent. + + When scope_tree decodes INSTANCE scopes whose DU was serialized *after* + them, a detached placeholder DU is used temporarily. This function + replaces those placeholders with the actual DU sibling scopes. + """ + from ucis.mem.mem_instance_scope import MemInstanceScope + + def _fix_parent(parent): + # Build name → DU map from real (attached) children + du_map = {} + for child in parent.scopes(ScopeTypeT.ALL): + if ScopeTypeT.DU_ANY(child.getScopeType()): + du_map[child.getScopeName()] = child + + # Replace placeholder DU refs on INSTANCE scopes + for child in parent.scopes(ScopeTypeT.ALL): + if isinstance(child, MemInstanceScope): + du = child.m_du_scope + if du is not None and du.m_parent is None: + # Detached placeholder — replace with real DU if available + real_du = du_map.get(child.getScopeName()) + if real_du is not None: + child.m_du_scope = real_du + # Recurse + _fix_parent(child) + + _fix_parent(db) + + +class NcdbReader: + """Read an NCDB .cdb ZIP file and return a populated MemUCIS.""" + + def read(self, path: str) -> MemUCIS: + with zipfile.ZipFile(path, "r") as zf: + names = zf.namelist() + manifest_bytes = zf.read(MEMBER_MANIFEST) + strings_bytes = zf.read(MEMBER_STRINGS) + scope_tree_bytes = zf.read(MEMBER_SCOPE_TREE) + counts_bytes = zf.read(MEMBER_COUNTS) + history_bytes = zf.read(MEMBER_HISTORY) + sources_bytes = zf.read(MEMBER_SOURCES) + attrs_bytes = zf.read(MEMBER_ATTRS) if MEMBER_ATTRS in names else b'' + tags_bytes = zf.read(MEMBER_TAGS) if MEMBER_TAGS in names else b'' + props_bytes = zf.read(MEMBER_PROPERTIES) if MEMBER_PROPERTIES in names else b'' + toggle_bytes = zf.read(MEMBER_TOGGLE) if MEMBER_TOGGLE in names else b'' + fsm_bytes = zf.read(MEMBER_FSM) if MEMBER_FSM in names else b'' + cross_bytes = zf.read(MEMBER_CROSS) if MEMBER_CROSS in names else b'' + du_bytes = zf.read(MEMBER_DESIGN_UNITS) if MEMBER_DESIGN_UNITS in names else b'' + formal_bytes = zf.read(MEMBER_FORMAL) if MEMBER_FORMAL in names else b'' + # Collect all contrib/* members + contrib_members = { + n: zf.read(n) for n in names if n.startswith(MEMBER_CONTRIB_DIR) + } + + manifest = Manifest.from_bytes(manifest_bytes) + if manifest.format != NCDB_FORMAT: + raise ValueError( + f"Expected NCDB format, got '{manifest.format}'") + + # Strings + string_table = StringTable.from_bytes(strings_bytes) + + # Source file handles + file_handles = SourcesReader().deserialize(sources_bytes) + + # Counts (as a flat iterator) + counts = CountsReader().deserialize(counts_bytes) + counts_iter = iter(counts) + + # Build MemUCIS + db = MemUCIS() + db.setPathSeparator(manifest.path_separator) + + # Rebuild scope tree + st_reader = ScopeTreeReader(string_table, file_handles) + st_reader.read(scope_tree_bytes, db, counts_iter) + + # Fix up INSTANCE scope DU links: replace detached placeholder DUs with + # real DU scopes that were written as separate top-level entries. + _fixup_instance_du_links(db) + + # Apply optional attrs, tags, typed properties, toggle and FSM metadata + if attrs_bytes: + AttrsReader().deserialize(attrs_bytes, db) + if tags_bytes: + TagsReader().deserialize(tags_bytes, db) + if props_bytes: + PropertiesReader().apply(db, props_bytes) + if toggle_bytes: + ToggleReader().apply(db, toggle_bytes) + # FSM reader always runs (rebuilds _states/_transitions from cover items) + FsmReader().apply(db, fsm_bytes) + if cross_bytes: + CrossReader().apply(db, cross_bytes) + + # Build design-unit index (available via db._du_index after this) + db._du_index = DesignUnitsReader().build_index(du_bytes, db) + + # Per-test contributions (optional) + ContribReader().apply(db, contrib_members) + + # Formal verification data (optional) + if formal_bytes: + FormalReader().apply(db, formal_bytes) + + # Register source files as file handles in db + for fh in file_handles: + db.createFileHandle(fh.getFileName(), None) + + # History nodes + history_nodes = HistoryReader().deserialize(history_bytes) + for node in history_nodes: + hn = db.createHistoryNode( + None, + node.getLogicalName(), + node.getPhysicalName(), + node.getKind(), + ) + # Copy all fields + if node.getTestStatus() is not None: + hn.setTestStatus(node.getTestStatus()) + if node.getSimTime() is not None and node.getSimTime() >= 0: + hn.setSimTime(node.getSimTime()) + if node.getTimeUnit() is not None: + hn.setTimeUnit(node.getTimeUnit()) + if node.getRunCwd() is not None: + hn.setRunCwd(node.getRunCwd()) + if node.getCpuTime() is not None and node.getCpuTime() >= 0: + hn.setCpuTime(node.getCpuTime()) + if node.getSeed() is not None: + hn.setSeed(node.getSeed()) + if node.getCmd() is not None: + hn.setCmd(node.getCmd()) + if node.getArgs() is not None: + hn.setArgs(node.getArgs()) + if node.getCompulsory() is not None: + hn.setCompulsory(node.getCompulsory()) + if node.getDate() is not None: + hn.setDate(node.getDate()) + if node.getUserName() is not None: + hn.setUserName(node.getUserName()) + if node.getCost() is not None and node.getCost() >= 0: + hn.setCost(node.getCost()) + if node.getToolCategory() is not None: + hn.setToolCategory(node.getToolCategory()) + if node.getVendorId() is not None: + hn.setVendorId(node.getVendorId()) + if node.getVendorTool() is not None: + hn.setVendorTool(node.getVendorTool()) + if node.getVendorToolVersion() is not None: + hn.setVendorToolVersion(node.getVendorToolVersion()) + if node.getSameTests() is not None and node.getSameTests() >= 0: + hn.setSameTests(node.getSameTests()) + if node.getComment() is not None: + hn.setComment(node.getComment()) + + return db diff --git a/src/ucis/ncdb/ncdb_ucis.py b/src/ucis/ncdb/ncdb_ucis.py new file mode 100644 index 0000000..18c8016 --- /dev/null +++ b/src/ucis/ncdb/ncdb_ucis.py @@ -0,0 +1,227 @@ +""" +NcdbUCIS — lazy-loading UCIS wrapper for NCDB files. + +NcdbUCIS defers all scope-tree and count parsing until the database is first +accessed. This avoids upfront parsing cost when only a subset of the data +is needed (e.g. reading history nodes without loading 60K+ scope records). + +Usage:: + + db = NcdbUCIS("coverage.cdb") + # No parsing yet + for hn in db.historyNodes(HistoryNodeKind.TEST): + ... # only history.json is parsed here + for scope in db.scopes(...): + ... # scope_tree + counts parsed on first call +""" + +import zipfile +import json + +from ucis.mem.mem_ucis import MemUCIS +from ucis.history_node_kind import HistoryNodeKind + +from .constants import ( + MEMBER_MANIFEST, MEMBER_STRINGS, MEMBER_SCOPE_TREE, + MEMBER_COUNTS, MEMBER_HISTORY, MEMBER_SOURCES, + MEMBER_ATTRS, MEMBER_TAGS, MEMBER_PROPERTIES, + MEMBER_TOGGLE, MEMBER_FSM, MEMBER_CROSS, MEMBER_DESIGN_UNITS, + MEMBER_CONTRIB_DIR, MEMBER_FORMAL, + NCDB_FORMAT, +) +from .manifest import Manifest + + +class NcdbUCIS(MemUCIS): + """Lazy-loading UCIS backed by an NCDB .cdb file. + + The file is kept closed until first access. After loading, the decoded + MemUCIS state is merged into *self* so that all existing MemUCIS methods + work transparently. + + The lazy-loading covers two independent units: + - **history**: loaded when ``historyNodes()`` is first called. + - **scopes**: loaded when ``scopes()`` or any scope-creation method is + called for the first time. + + Once loaded, a unit is never re-read. + """ + + def __init__(self, path: str): + super().__init__() + self._ncdb_path = path + self._loaded_history = False + self._loaded_scopes = False + self._loaded_attrs = False + self._du_index: dict = {} # name → DU scope (populated after _ensure_scopes) + self._zf_cache: dict = {} # member name → bytes (populated on first open) + + # ── Public extra API ────────────────────────────────────────────────── + + @property + def path(self) -> str: + return self._ncdb_path + + def preload(self) -> 'NcdbUCIS': + """Eagerly load all data from the NCDB file. Returns self.""" + self._ensure_history() + self._ensure_scopes() + return self + + def getDesignUnit(self, name: str): + """Return the DU scope with *name*, or None if not found.""" + self._ensure_scopes() + return self._du_index.get(name) + + # ── MemUCIS overrides — trigger lazy loads ───────────────────────── + + def historyNodes(self, kind: HistoryNodeKind): + self._ensure_history() + return super().historyNodes(kind) + + def createHistoryNode(self, *args, **kwargs): + self._ensure_history() + return super().createHistoryNode(*args, **kwargs) + + def scopes(self, mask): + self._ensure_scopes() + return super().scopes(mask) + + def createScope(self, *args, **kwargs): + self._ensure_scopes() + return super().createScope(*args, **kwargs) + + def createInstance(self, *args, **kwargs): + self._ensure_scopes() + return super().createInstance(*args, **kwargs) + + # ── Internal loading helpers ─────────────────────────────────────── + + def _read_zip(self) -> None: + """Read all ZIP members into the byte cache (called at most once).""" + if self._zf_cache: + return + with zipfile.ZipFile(self._ncdb_path, "r") as zf: + names = zf.namelist() + for name in names: + self._zf_cache[name] = zf.read(name) + + def _ensure_history(self) -> None: + if self._loaded_history: + return + self._loaded_history = True + self._read_zip() + _load_history(self, self._zf_cache.get(MEMBER_HISTORY, b'')) + + def _ensure_scopes(self) -> None: + if self._loaded_scopes: + return + self._loaded_scopes = True + self._read_zip() + data = self._zf_cache + + manifest = Manifest.from_bytes(data[MEMBER_MANIFEST]) + if manifest.format != NCDB_FORMAT: + raise ValueError( + f"Expected NCDB format, got '{manifest.format}'") + self.setPathSeparator(manifest.path_separator) + + from .string_table import StringTable + from .scope_tree import ScopeTreeReader + from .counts import CountsReader + from .sources import SourcesReader + from .ncdb_reader import _fixup_instance_du_links + + string_table = StringTable.from_bytes(data[MEMBER_STRINGS]) + file_handles = SourcesReader().deserialize(data.get(MEMBER_SOURCES, b'[]')) + counts_iter = iter(CountsReader().deserialize(data[MEMBER_COUNTS])) + + ScopeTreeReader(string_table, file_handles).read( + data[MEMBER_SCOPE_TREE], self, counts_iter) + + for fh in file_handles: + self.createFileHandle(fh.getFileName(), None) + + _fixup_instance_du_links(self) + + # Attrs / tags / properties (optional) + attrs_data = data.get(MEMBER_ATTRS, b'') + tags_data = data.get(MEMBER_TAGS, b'') + props_data = data.get(MEMBER_PROPERTIES, b'') + toggle_data = data.get(MEMBER_TOGGLE, b'') + fsm_data = data.get(MEMBER_FSM, b'') + cross_data = data.get(MEMBER_CROSS, b'') + du_data = data.get(MEMBER_DESIGN_UNITS, b'') + + if attrs_data: + from .attrs import AttrsReader + AttrsReader().deserialize(attrs_data, self) + if tags_data: + from .tags import TagsReader + TagsReader().deserialize(tags_data, self) + if props_data: + from .properties import PropertiesReader + PropertiesReader().apply(self, props_data) + if toggle_data: + from .toggle import ToggleReader + ToggleReader().apply(self, toggle_data) + from .fsm import FsmReader + FsmReader().apply(self, fsm_data) + if cross_data: + from .cross import CrossReader + CrossReader().apply(self, cross_data) + from .design_units import DesignUnitsReader + self._du_index = DesignUnitsReader().build_index(du_data, self) + + # Per-test contributions (optional) + contrib_members = { + name: data[name] for name in data if name.startswith(MEMBER_CONTRIB_DIR) + } + if contrib_members: + from .contrib import ContribReader + ContribReader().apply(self, contrib_members) + + # Formal verification data (optional) + formal_data = data.get(MEMBER_FORMAL, b'') + if formal_data: + from .formal import FormalReader + FormalReader().apply(self, formal_data) + + +def _load_history(db: MemUCIS, history_bytes: bytes) -> None: + """Deserialize history.json and populate *db* with history nodes.""" + from .history import HistoryReader + nodes = HistoryReader().deserialize(history_bytes) + for node in nodes: + hn = db.createHistoryNode( + None, node.getLogicalName(), node.getPhysicalName(), node.getKind()) + if node.getTestStatus() is not None: + hn.setTestStatus(node.getTestStatus()) + if node.getSimTime() is not None and node.getSimTime() >= 0: + hn.setSimTime(node.getSimTime()) + if node.getTimeUnit() is not None: + hn.setTimeUnit(node.getTimeUnit()) + if node.getRunCwd() is not None: + hn.setRunCwd(node.getRunCwd()) + if node.getCpuTime() is not None and node.getCpuTime() >= 0: + hn.setCpuTime(node.getCpuTime()) + if node.getSeed() is not None: + hn.setSeed(node.getSeed()) + if node.getCmd() is not None: + hn.setCmd(node.getCmd()) + if node.getArgs() is not None: + hn.setArgs(node.getArgs()) + if node.getDate() is not None: + hn.setDate(node.getDate()) + if node.getUserName() is not None: + hn.setUserName(node.getUserName()) + if node.getToolCategory() is not None: + hn.setToolCategory(node.getToolCategory()) + if node.getVendorId() is not None: + hn.setVendorId(node.getVendorId()) + if node.getVendorTool() is not None: + hn.setVendorTool(node.getVendorTool()) + if node.getVendorToolVersion() is not None: + hn.setVendorToolVersion(node.getVendorToolVersion()) + if node.getComment() is not None: + hn.setComment(node.getComment()) diff --git a/src/ucis/ncdb/ncdb_writer.py b/src/ucis/ncdb/ncdb_writer.py new file mode 100644 index 0000000..50acf96 --- /dev/null +++ b/src/ucis/ncdb/ncdb_writer.py @@ -0,0 +1,115 @@ +""" +NcdbWriter — serialize a UCIS model to a ZIP .cdb (NCDB) file. +""" + +import zipfile +from datetime import datetime, timezone + +from .string_table import StringTable +from .scope_tree import ScopeTreeWriter +from .counts import CountsWriter +from .history import HistoryWriter +from .sources import SourcesWriter +from .attrs import AttrsWriter +from .tags import TagsWriter +from .properties import PropertiesWriter +from .toggle import ToggleWriter +from .fsm import FsmWriter +from .cross import CrossWriter +from .contrib import ContribWriter +from .formal import FormalWriter +from .design_units import DesignUnitsWriter +from .manifest import Manifest +from .constants import ( + MEMBER_MANIFEST, MEMBER_STRINGS, MEMBER_SCOPE_TREE, + MEMBER_COUNTS, MEMBER_HISTORY, MEMBER_SOURCES, + MEMBER_ATTRS, MEMBER_TAGS, MEMBER_PROPERTIES, MEMBER_TOGGLE, MEMBER_FSM, + MEMBER_CROSS, MEMBER_DESIGN_UNITS, MEMBER_FORMAL, +) + +from ucis.history_node_kind import HistoryNodeKind + + +class NcdbWriter: + """Write a UCIS database to an NCDB .cdb ZIP file.""" + + def write(self, db, path: str) -> None: + """Serialize *db* (UCIS) to the file at *path*.""" + string_table = StringTable() + file_handles: list = [] + + # 1. Serialize scope tree (populates string_table, file_handles, counts) + st_writer = ScopeTreeWriter(string_table, file_handles) + scope_tree_bytes = st_writer.write(db) + counts = st_writer.counts_list + + # 2. Serialize counts + counts_bytes = CountsWriter().serialize(counts) + + # 3. Serialize strings + strings_bytes = string_table.serialize() + + # 4. History nodes + history_nodes = list(db.historyNodes(HistoryNodeKind.ALL + if hasattr(HistoryNodeKind, 'ALL') + else HistoryNodeKind.TEST)) + # Collect both TEST and MERGE nodes + try: + all_nodes = ( + list(db.historyNodes(HistoryNodeKind.TEST)) + + list(db.historyNodes(HistoryNodeKind.MERGE)) + ) + except Exception: + all_nodes = list(db.historyNodes(HistoryNodeKind.TEST)) + history_bytes = HistoryWriter().serialize(all_nodes) + + # 5. Source files + # Use file handles discovered during scope_tree walk; fall back to db.getSourceFiles() + if not file_handles: + try: + file_handles = list(db.getSourceFiles()) + except Exception: + file_handles = [] + sources_bytes = SourcesWriter().serialize(file_handles) + + # 6. Sparse optional members + attrs_bytes = AttrsWriter().serialize(db) + tags_bytes = TagsWriter().serialize(db) + props_bytes = PropertiesWriter().serialize(db) + toggle_bytes = ToggleWriter().serialize(db) + fsm_bytes = FsmWriter().serialize(db) + cross_bytes = CrossWriter().serialize(db) + du_bytes = DesignUnitsWriter().serialize(db) + contrib_members = ContribWriter().serialize(db) + formal_bytes = FormalWriter().serialize(db) + + # 7. Manifest + manifest = Manifest.build(db, scope_tree_bytes, counts, all_nodes) + manifest_bytes = manifest.serialize() + + # 8. Write ZIP + with zipfile.ZipFile(path, "w", compression=zipfile.ZIP_DEFLATED) as zf: + zf.writestr(MEMBER_MANIFEST, manifest_bytes) + zf.writestr(MEMBER_STRINGS, strings_bytes) + zf.writestr(MEMBER_SCOPE_TREE, scope_tree_bytes) + zf.writestr(MEMBER_COUNTS, counts_bytes) + zf.writestr(MEMBER_HISTORY, history_bytes) + zf.writestr(MEMBER_SOURCES, sources_bytes) + if attrs_bytes != b'{"version":1,"entries":[]}': + zf.writestr(MEMBER_ATTRS, attrs_bytes) + if tags_bytes != b'{"version":1,"entries":[]}': + zf.writestr(MEMBER_TAGS, tags_bytes) + if props_bytes: + zf.writestr(MEMBER_PROPERTIES, props_bytes) + if toggle_bytes: + zf.writestr(MEMBER_TOGGLE, toggle_bytes) + if fsm_bytes: + zf.writestr(MEMBER_FSM, fsm_bytes) + if cross_bytes: + zf.writestr(MEMBER_CROSS, cross_bytes) + if du_bytes: + zf.writestr(MEMBER_DESIGN_UNITS, du_bytes) + for member_name, member_bytes in contrib_members.items(): + zf.writestr(member_name, member_bytes) + if formal_bytes: + zf.writestr(MEMBER_FORMAL, formal_bytes) diff --git a/src/ucis/ncdb/properties.py b/src/ucis/ncdb/properties.py new file mode 100644 index 0000000..98dcc90 --- /dev/null +++ b/src/ucis/ncdb/properties.py @@ -0,0 +1,101 @@ +""" +properties.json — typed UCIS scope/coveritem/history property serialization. + +Format: JSON object + {"version": 1, "entries": [ + {"kind": "scope", "idx": , "key": , "type": "str"|"int"|"real", "value": }, + ... + ]} + +Only objects that have at least one explicitly-set property are included (sparse). + +Scope entries use the DFS index from dfs_scope_list(). +Coveritem and history entries are not yet implemented. + +String properties are serialized from MemObj._str_properties when the scope +is a MemObj subclass. For non-MemObj scopes (e.g. SqliteScope), only +StrProperty.COMMENT is queried via the standard getStringProperty() API. +""" + +import json + +from ucis.str_property import StrProperty +from ucis.ncdb.dfs_util import dfs_scope_list + +_VERSION = 1 + +# Fallback: non-MemObj scopes — probe only COMMENT (the common case). +_PROBE_STR_PROPERTIES = (StrProperty.COMMENT,) + + +class PropertiesWriter: + """Serialize scope properties to properties.json bytes.""" + + def serialize(self, db) -> bytes: + scopes = dfs_scope_list(db) + entries = [] + for idx, scope in enumerate(scopes): + sp = self._get_str_properties(scope) + for key, val in sp: + entries.append({ + "kind": "scope", + "idx": idx, + "key": int(key), + "type": "str", + "value": val, + }) + if not entries: + return b"" + payload = {"version": _VERSION, "entries": entries} + return json.dumps(payload, separators=(',', ':')).encode() + + def _get_str_properties(self, scope): + """Yield (StrProperty, value) pairs that are explicitly set on *scope*.""" + # Fast path: MemObj subclasses store string properties in a plain dict. + sp_dict = getattr(scope, '_str_properties', None) + if sp_dict is not None: + for k, v in sp_dict.items(): + if v is not None: + yield (k, v) + return + # Slow path: probe via public API for a small set of common properties. + for prop in _PROBE_STR_PROPERTIES: + try: + val = scope.getStringProperty(-1, prop) + except Exception: + val = None + if val is not None: + yield (prop, val) + + +class PropertiesReader: + """Deserialize properties.json and apply properties to the scope tree.""" + + def apply(self, db, data: bytes) -> None: + if not data: + return + payload = json.loads(data.decode()) + if payload.get("version") != _VERSION: + raise ValueError( + f"Unsupported properties.json version: {payload.get('version')}") + entries = payload.get("entries", []) + if not entries: + return + scopes = dfs_scope_list(db) + for entry in entries: + kind = entry.get("kind") + if kind != "scope": + continue # coveritem / history not yet supported + idx = entry["idx"] + if idx >= len(scopes): + continue + scope = scopes[idx] + prop_type = entry.get("type", "str") + key_int = entry["key"] + value = entry["value"] + if prop_type == "str": + try: + prop = StrProperty(key_int) + scope.setStringProperty(-1, prop, value) + except (ValueError, Exception): + pass diff --git a/src/ucis/ncdb/scope_tree.py b/src/ucis/ncdb/scope_tree.py new file mode 100644 index 0000000..40cce4e --- /dev/null +++ b/src/ucis/ncdb/scope_tree.py @@ -0,0 +1,334 @@ +""" +scope_tree.bin — scope hierarchy V2 encoding/decoding. + +Writer: DFS walk of UCIS scope tree → binary bytes + populates StringTable. +Reader: binary bytes + StringTable → reconstructed MemUCIS scope tree. + +V2 encoding (from Addendum §A.2): + - BRANCH scopes with exactly 2 TOGGLEBIN children encoded as TOGGLE_PAIR + (2-byte record: marker + name_ref varint). + - REGULAR scopes use a presence bitfield to omit default fields. + - Coveritem records contain only the name_ref (type from parent scope). + - Counts are NOT stored here; they are written separately to counts.bin + in DFS order (toggle pairs contribute 2 counts each). +""" + +import io +import struct + +from ucis.cover_data import CoverData +from ucis.cover_type_t import CoverTypeT +from ucis.scope_type_t import ScopeTypeT +from ucis.source_t import SourceT +from ucis.source_info import SourceInfo + +from .varint import encode_varint, decode_varint +from .constants import ( + SCOPE_MARKER_REGULAR, SCOPE_MARKER_TOGGLE_PAIR, + PRESENCE_FLAGS, PRESENCE_SOURCE, PRESENCE_WEIGHT, PRESENCE_AT_LEAST, + TOGGLE_BIN_0_TO_1, TOGGLE_BIN_1_TO_0, + COVER_TYPE_DEFAULTS, +) + + +# ────────────────────────────────────────────────────────────────────────── +# Helpers +# ────────────────────────────────────────────────────────────────────────── + +def _is_toggle_pair(scope) -> bool: + """True if *scope* is a BRANCH with exactly 2 TOGGLEBIN children.""" + if scope.getScopeType() != ScopeTypeT.BRANCH: + return False + cover_items = list(scope.coverItems(CoverTypeT.ALL)) + if len(cover_items) != 2: + return False + child_scopes = list(scope.scopes(ScopeTypeT.ALL)) + if len(child_scopes) != 0: + return False + names = {ci.getName() for ci in cover_items} + return names == {TOGGLE_BIN_0_TO_1, TOGGLE_BIN_1_TO_0} + + +# ────────────────────────────────────────────────────────────────────────── +# Writer +# ────────────────────────────────────────────────────────────────────────── + +class ScopeTreeWriter: + """Serialize a UCIS scope tree to scope_tree.bin bytes. + + Also populates the *string_table* and *counts_list* as a side-effect. + After calling write(), use string_table for strings.bin and + counts_list for counts.bin. + + Also tracks file handles so that sources.json can be written + consistently with the source IDs embedded in scope_tree.bin. + """ + + def __init__(self, string_table, file_handles: list = None): + """ + Args: + string_table: StringTable instance to populate with names. + file_handles: Mutable list; file handles will be appended in the + order they are first encountered. The index in this list + becomes the file_id stored in scope_tree.bin. + """ + self._st = string_table + self._file_handles = file_handles if file_handles is not None else [] + self._fh_index: dict = {} # filename → int id + self.counts_list: list = [] # hit counts in DFS order + self._buf = io.BytesIO() + + # ── Public API ──────────────────────────────────────────────────────── + + def write(self, db) -> bytes: + """Walk *db* (UCIS root) and return the serialized scope_tree.bin bytes.""" + self._buf = io.BytesIO() + for scope in db.scopes(ScopeTypeT.ALL): + self._write_scope(scope) + return self._buf.getvalue() + + # ── Internal DFS ────────────────────────────────────────────────────── + + def _write_scope(self, scope): + if _is_toggle_pair(scope): + self._write_toggle_pair(scope) + else: + self._write_regular_scope(scope) + + def _write_toggle_pair(self, scope): + name_ref = self._st.add(scope.getScopeName()) + self._buf.write(bytes([SCOPE_MARKER_TOGGLE_PAIR])) + self._buf.write(encode_varint(name_ref)) + # Two implicit coveritems: "0 -> 1" then "1 -> 0" + cover_items = {ci.getName(): ci for ci in scope.coverItems(CoverTypeT.ALL)} + for name in (TOGGLE_BIN_0_TO_1, TOGGLE_BIN_1_TO_0): + ci = cover_items.get(name) + self.counts_list.append(ci.getCoverData().data if ci else 0) + + def _write_regular_scope(self, scope): + scope_type = scope.getScopeType() + name_ref = self._st.add(scope.getScopeName()) + + # Collect source info + srcinfo = scope.getSourceInfo() + has_src = (srcinfo is not None + and srcinfo.file is not None + and srcinfo.line >= 0) + has_flags = (hasattr(scope, 'm_flags') and scope.m_flags != 0) + weight = scope.getWeight() if hasattr(scope, 'getWeight') else 1 + has_weight = (weight is not None and weight != 1) + + # Cover items under this scope + cover_items = list(scope.coverItems(CoverTypeT.ALL)) + num_coveritems = len(cover_items) + + # Determine child cover type — always read from the actual first cover + # item. SCOPE_TO_COVER_TYPE was an optimisation hint but BRANCH scopes + # can carry either BRANCHBIN (regular branch) or TOGGLEBIN (toggle pair, + # which is handled by _write_toggle_pair before reaching here), so using + # a fixed mapping would misidentify the type. + child_cover_type_val = 0 + if num_coveritems > 0: + child_cover_type_val = int(cover_items[0].getCoverData().type) + + # at_least override (non-default for this scope's cover type) + at_least_override = None + if num_coveritems > 0 and cover_items: + defaults = COVER_TYPE_DEFAULTS.get( + CoverTypeT(child_cover_type_val), + (0, 0, 1)) + default_at_least = defaults[1] + first_cd = cover_items[0].getCoverData() + if hasattr(first_cd, 'at_least') and first_cd.at_least != default_at_least: + at_least_override = first_cd.at_least + has_at_least = (at_least_override is not None) + + # Presence bitfield + presence = 0 + if has_flags: presence |= PRESENCE_FLAGS + if has_src: presence |= PRESENCE_SOURCE + if has_weight: presence |= PRESENCE_WEIGHT + if has_at_least: presence |= PRESENCE_AT_LEAST + + # Count child sub-scopes + child_scopes = list(scope.scopes(ScopeTypeT.ALL)) + + w = self._buf.write + w(bytes([SCOPE_MARKER_REGULAR])) + w(encode_varint(int(scope_type))) + w(encode_varint(name_ref)) + w(encode_varint(presence)) + + if has_flags: + w(encode_varint(int(scope.m_flags))) + if has_src: + file_id = self._get_file_id(srcinfo.file) + w(encode_varint(file_id)) + w(encode_varint(max(0, srcinfo.line))) + w(encode_varint(max(0, srcinfo.token))) + if has_weight: + w(encode_varint(weight)) + if has_at_least: + w(encode_varint(at_least_override)) + + w(encode_varint(len(child_scopes))) + w(encode_varint(num_coveritems)) + + if num_coveritems > 0: + w(encode_varint(child_cover_type_val)) + # Write coveritem names and accumulate counts + for ci in cover_items: + name_ref_ci = self._st.add(ci.getName()) + w(encode_varint(name_ref_ci)) + self.counts_list.append(ci.getCoverData().data) + + # Recurse into child scopes + for child in child_scopes: + self._write_scope(child) + + def _get_file_id(self, file_handle) -> int: + if file_handle is None: + return 0 + fname = file_handle.getFileName() + if fname not in self._fh_index: + fid = len(self._file_handles) + self._file_handles.append(file_handle) + self._fh_index[fname] = fid + return self._fh_index[fname] + + +# ────────────────────────────────────────────────────────────────────────── +# Reader +# ────────────────────────────────────────────────────────────────────────── + +class ScopeTreeReader: + """Deserialize scope_tree.bin bytes into UCIS scope tree under *parent*. + + Reconstructs MemScope-based objects. Cover item data (hit counts) come + from the separate *counts_iter* iterator so that scope_tree.bin and + counts.bin stay decoupled. + """ + + def __init__(self, string_table, file_handles: list): + self._st = string_table + self._fh = file_handles # indexed by file_id + + def read(self, data: bytes, parent, counts_iter) -> int: + """Populate *parent* with decoded scopes; return total coveritems read.""" + offset = 0 + total = 0 + while offset < len(data): + consumed, n = self._read_scope(data, offset, parent, counts_iter) + offset = consumed + total += n + return total + + # ── Internal ────────────────────────────────────────────────────────── + + def _read_scope(self, data: bytes, offset: int, parent, counts_iter): + """Decode one scope record at *offset*, attach to *parent*. + + Returns (new_offset, coveritems_count). + """ + marker = data[offset] + offset += 1 + + if marker == SCOPE_MARKER_TOGGLE_PAIR: + return self._read_toggle_pair(data, offset, parent, counts_iter) + else: + return self._read_regular_scope(data, offset, parent, counts_iter) + + def _read_toggle_pair(self, data: bytes, offset: int, parent, counts_iter): + name_ref, offset = decode_varint(data, offset) + name = self._st.get(name_ref) + + # Consume two counts + count_0to1 = next(counts_iter, 0) + count_1to0 = next(counts_iter, 0) + + scope = parent.createScope( + name, None, 1, SourceT.NONE, ScopeTypeT.BRANCH, 0) + + # Create the two implicit TOGGLEBIN coveritems + for (bin_name, count) in ((TOGGLE_BIN_0_TO_1, count_0to1), + (TOGGLE_BIN_1_TO_0, count_1to0)): + cd = CoverData(CoverTypeT.TOGGLEBIN, 0) + cd.data = count + scope.createNextCover(bin_name, cd, None) + + return offset, 2 + + def _read_regular_scope(self, data: bytes, offset: int, parent, counts_iter): + scope_type_val, offset = decode_varint(data, offset) + name_ref, offset = decode_varint(data, offset) + presence, offset = decode_varint(data, offset) + + name = self._st.get(name_ref) + scope_type = ScopeTypeT(scope_type_val) + + flags = 0 + srcinfo = None + weight = 1 + at_least_override = None + + if presence & PRESENCE_FLAGS: + flags, offset = decode_varint(data, offset) + if presence & PRESENCE_SOURCE: + file_id, offset = decode_varint(data, offset) + line, offset = decode_varint(data, offset) + token, offset = decode_varint(data, offset) + fh = self._fh[file_id] if file_id < len(self._fh) else None + srcinfo = SourceInfo(fh, line, token) + if presence & PRESENCE_WEIGHT: + weight, offset = decode_varint(data, offset) + if presence & PRESENCE_AT_LEAST: + at_least_override, offset = decode_varint(data, offset) + + num_children, offset = decode_varint(data, offset) + num_coveritems, offset = decode_varint(data, offset) + + child_cover_type = None + if num_coveritems > 0: + ctv, offset = decode_varint(data, offset) + child_cover_type = CoverTypeT(ctv) + defaults = COVER_TYPE_DEFAULTS.get(child_cover_type, (0, 0, 1)) + at_least = at_least_override if at_least_override is not None else defaults[1] + + if scope_type == ScopeTypeT.INSTANCE: + # createInstance() requires a DU reference; find the matching DU + # that was already serialized (DU scopes precede INSTANCE in DFS). + du_scope = None + for sibling in parent.scopes(ScopeTypeT.ALL): + if (ScopeTypeT.DU_ANY(sibling.getScopeType()) + and sibling.getScopeName() == name): + du_scope = sibling + break + if du_scope is None: + # DU not yet in parent (INSTANCE precedes DU in source ordering). + # Create a detached placeholder so createInstance() can succeed + # without adding an extra scope to parent's children. + from ucis.mem.mem_scope import MemScope as _MemScope + du_scope = _MemScope( + None, name, srcinfo, weight, SourceT.NONE, ScopeTypeT.DU_MODULE, flags) + scope = parent.createInstance( + name, srcinfo, weight, SourceT.NONE, scope_type, du_scope, flags) + else: + scope = parent.createScope(name, srcinfo, weight, SourceT.NONE, scope_type, flags) + + # Coveritems + for _ in range(num_coveritems): + ci_name_ref, offset = decode_varint(data, offset) + ci_name = self._st.get(ci_name_ref) + count = next(counts_iter, 0) + cd = CoverData(child_cover_type, 0) + cd.data = count + if at_least_override is not None or (child_cover_type and + COVER_TYPE_DEFAULTS.get(child_cover_type, (0,0,1))[1] != 0): + cd.at_least = at_least if 'at_least' in dir() else 0 + scope.createNextCover(ci_name, cd, None) + + # Child scopes + for _ in range(num_children): + offset, _ = self._read_scope(data, offset, scope, counts_iter) + + return offset, num_coveritems diff --git a/src/ucis/ncdb/sources.py b/src/ucis/ncdb/sources.py new file mode 100644 index 0000000..0716139 --- /dev/null +++ b/src/ucis/ncdb/sources.py @@ -0,0 +1,30 @@ +""" +sources.json — source file table serialization. + +JSON array mapping integer IDs to file paths. The order of entries +matches the file IDs used in scope_tree.bin source references. +""" + +import json +from ucis.mem.mem_file_handle import MemFileHandle + + +class SourcesWriter: + """Serialize source file handles to sources.json bytes.""" + + def serialize(self, file_handles) -> bytes: + records = [] + for fh in file_handles: + records.append(fh.getFileName()) + return json.dumps(records, indent=2).encode("utf-8") + + +class SourcesReader: + """Deserialize source file handles from sources.json bytes.""" + + def deserialize(self, data: bytes) -> list: + records = json.loads(data.decode("utf-8")) + handles = [] + for filename in records: + handles.append(MemFileHandle(filename)) + return handles diff --git a/src/ucis/ncdb/string_table.py b/src/ucis/ncdb/string_table.py new file mode 100644 index 0000000..c69d9bf --- /dev/null +++ b/src/ucis/ncdb/string_table.py @@ -0,0 +1,72 @@ +""" +Deduplicated string table for the NCDB format. + +Strings are stored once in strings.bin; all other members reference them +by integer index. The table is length-prefixed, null-terminated UTF-8. + +Binary layout of strings.bin: + [count: varint] + [len_0: varint][bytes_0: UTF-8] + [len_1: varint][bytes_1: UTF-8] + ... + +Indices are 0-based. Index 0 is always the empty string "". +""" + +import io +from .varint import encode_varint, decode_varint + + +class StringTable: + """Build, serialize and deserialize the NCDB string table.""" + + def __init__(self): + self._strings: list[str] = [] + self._index: dict[str, int] = {} + + # ── Building ────────────────────────────────────────────────────────── + + def add(self, s: str) -> int: + """Return the index for *s*, adding it if not already present.""" + if s is None: + s = "" + if s not in self._index: + idx = len(self._strings) + self._strings.append(s) + self._index[s] = idx + return self._index[s] + + def get(self, idx: int) -> str: + """Return the string at *idx*.""" + return self._strings[idx] + + def __len__(self) -> int: + return len(self._strings) + + def __iter__(self): + return iter(self._strings) + + # ── Serialization ───────────────────────────────────────────────────── + + def serialize(self) -> bytes: + """Encode the string table to bytes.""" + buf = io.BytesIO() + buf.write(encode_varint(len(self._strings))) + for s in self._strings: + encoded = s.encode("utf-8") + buf.write(encode_varint(len(encoded))) + buf.write(encoded) + return buf.getvalue() + + @classmethod + def from_bytes(cls, data: bytes) -> "StringTable": + """Decode a string table from bytes.""" + table = cls() + offset = 0 + count, offset = decode_varint(data, offset) + for _ in range(count): + length, offset = decode_varint(data, offset) + s = data[offset: offset + length].decode("utf-8") + offset += length + table.add(s) + return table diff --git a/src/ucis/ncdb/tags.py b/src/ucis/ncdb/tags.py new file mode 100644 index 0000000..6d9b36c --- /dev/null +++ b/src/ucis/ncdb/tags.py @@ -0,0 +1,52 @@ +""" +tags.json — scope tag serialization. + +Format: JSON object + {"version": 1, "entries": [{"idx": , "tags": [, ...]}, ...]} + +Only scopes that have at least one tag are included (sparse). +""" + +import json + +from .dfs_util import dfs_scope_list + +_VERSION = 1 + + +class TagsWriter: + """Serialize scope tags to tags.json bytes.""" + + def serialize(self, db) -> bytes: + scopes = dfs_scope_list(db) + entries = [] + for idx, scope in enumerate(scopes): + if not hasattr(scope, 'getTags'): + continue + tags = list(scope.getTags()) if scope.getTags() is not None else [] + if tags: + entries.append({"idx": idx, "tags": tags}) + payload = {"version": _VERSION, "entries": entries} + return json.dumps(payload, separators=(',', ':')).encode() + + +class TagsReader: + """Deserialize tags.json bytes and apply tags to scope tree.""" + + def deserialize(self, data: bytes, db) -> None: + if not data: + return + payload = json.loads(data.decode()) + if payload.get("version") != _VERSION: + raise ValueError(f"Unsupported tags.json version: {payload.get('version')}") + entries = payload.get("entries", []) + if not entries: + return + scopes = dfs_scope_list(db) + for entry in entries: + idx = entry["idx"] + if idx < len(scopes): + scope = scopes[idx] + if hasattr(scope, 'addTag'): + for tag in entry.get("tags", []): + scope.addTag(tag) diff --git a/src/ucis/ncdb/toggle.py b/src/ucis/ncdb/toggle.py new file mode 100644 index 0000000..6c61ea6 --- /dev/null +++ b/src/ucis/ncdb/toggle.py @@ -0,0 +1,152 @@ +""" +toggle.json — TOGGLE scope metadata serialization. + +Persists per-toggle-scope fields that are not encoded in scope_tree.bin: + - canonical name (full hierarchical signal path) + - toggle metric (ToggleMetricT enum value) + - toggle type (ToggleTypeT enum value) + - toggle direction (ToggleDirT enum value) + +Format: + {"version": 1, "entries": [ + {"idx": , "canonical": "", "metric": , + "type": , "dir": }, + ... + ]} + +Only TOGGLE scopes with at least one non-default value are included (sparse). +DFS index corresponds to dfs_scope_list() order (same as scope_tree.bin). +""" + +import json + +from ucis.scope_type_t import ScopeTypeT +from ucis.toggle_dir_t import ToggleDirT +from ucis.toggle_metric_t import ToggleMetricT +from ucis.toggle_type_t import ToggleTypeT + +from .dfs_util import dfs_scope_list + +_VERSION = 1 + +# MemToggleScope defaults — matching __init__ in mem_toggle_scope.py +_DEFAULT_METRIC = int(ToggleMetricT._2STOGGLE) +_DEFAULT_TYPE = int(ToggleTypeT.NET) +_DEFAULT_DIR = int(ToggleDirT.INTERNAL) + + +class ToggleWriter: + """Serialize TOGGLE-scope metadata to toggle.json bytes.""" + + def serialize(self, db) -> bytes: + scopes = dfs_scope_list(db) + entries = [] + for idx, scope in enumerate(scopes): + if scope.getScopeType() != ScopeTypeT.TOGGLE: + continue + entry = self._build_entry(idx, scope) + if entry: + entries.append(entry) + if not entries: + return b"" + payload = {"version": _VERSION, "entries": entries} + return json.dumps(payload, separators=(',', ':')).encode() + + def _build_entry(self, idx, scope) -> dict: + entry = {"idx": idx} + changed = False + + # Canonical name — stored on MemToggleScope as _canonical_name + canonical = None + if hasattr(scope, '_canonical_name'): + canonical = scope._canonical_name + elif hasattr(scope, 'getCanonicalName'): + canonical = scope.getCanonicalName() + scope_name = scope.getScopeName() + if canonical and canonical != scope_name: + entry["canonical"] = canonical + changed = True + + # Toggle metric + metric = None + if hasattr(scope, '_toggle_metric') and scope._toggle_metric is not None: + metric = int(scope._toggle_metric) + elif hasattr(scope, 'getToggleMetric'): + try: + val = scope.getToggleMetric() + metric = int(val) if val is not None else None + except Exception: + pass + if metric is not None and metric != _DEFAULT_METRIC: + entry["metric"] = metric + changed = True + + # Toggle type + ttype = None + if hasattr(scope, '_toggle_type') and scope._toggle_type is not None: + ttype = int(scope._toggle_type) + elif hasattr(scope, 'getToggleType'): + try: + val = scope.getToggleType() + ttype = int(val) if val is not None else None + except Exception: + pass + if ttype is not None and ttype != _DEFAULT_TYPE: + entry["type"] = ttype + changed = True + + # Toggle direction + tdir = None + if hasattr(scope, '_toggle_dir') and scope._toggle_dir is not None: + tdir = int(scope._toggle_dir) + elif hasattr(scope, 'getToggleDir'): + try: + val = scope.getToggleDir() + tdir = int(val) if val is not None else None + except Exception: + pass + if tdir is not None and tdir != _DEFAULT_DIR: + entry["dir"] = tdir + changed = True + + return entry if changed else None + + +class ToggleReader: + """Deserialize toggle.json bytes and apply metadata to TOGGLE scopes.""" + + def apply(self, db, data: bytes) -> None: + if not data: + return + payload = json.loads(data.decode()) + if payload.get("version") != _VERSION: + raise ValueError( + f"Unsupported toggle.json version: {payload.get('version')}") + entries = payload.get("entries", []) + if not entries: + return + scopes = dfs_scope_list(db) + for entry in entries: + idx = entry["idx"] + if idx >= len(scopes): + continue + scope = scopes[idx] + if scope.getScopeType() != ScopeTypeT.TOGGLE: + continue + if "canonical" in entry and hasattr(scope, 'setCanonicalName'): + scope.setCanonicalName(entry["canonical"]) + if "metric" in entry and hasattr(scope, 'setToggleMetric'): + try: + scope.setToggleMetric(ToggleMetricT(entry["metric"])) + except (ValueError, Exception): + pass + if "type" in entry and hasattr(scope, 'setToggleType'): + try: + scope.setToggleType(ToggleTypeT(entry["type"])) + except (ValueError, Exception): + pass + if "dir" in entry and hasattr(scope, 'setToggleDir'): + try: + scope.setToggleDir(ToggleDirT(entry["dir"])) + except (ValueError, Exception): + pass diff --git a/src/ucis/ncdb/varint.py b/src/ucis/ncdb/varint.py new file mode 100644 index 0000000..b6aa05a --- /dev/null +++ b/src/ucis/ncdb/varint.py @@ -0,0 +1,81 @@ +""" +LEB128 (unsigned) varint encoding/decoding. + +Used throughout the NCDB format for compact integer serialization. + +The bulk functions encode_varints() and decode_varints() are accelerated by +a C extension when available (see ucis.ncdb._accel). The single-value +functions encode_varint() and decode_varint() remain in pure Python for use +by scope_tree.py which needs them inline. +""" + + +def encode_varint(value: int) -> bytes: + """Encode a non-negative integer as unsigned LEB128 bytes.""" + if value < 0: + raise ValueError(f"varint requires non-negative integer, got {value}") + result = [] + while True: + byte = value & 0x7F + value >>= 7 + if value != 0: + byte |= 0x80 + result.append(byte) + if value == 0: + break + return bytes(result) + + +def decode_varint(buf: bytes, offset: int = 0): + """Decode an unsigned LEB128 varint from buf starting at offset. + + Returns: + (value, new_offset) — the decoded integer and the offset of the + first byte after the varint. + """ + result = 0 + shift = 0 + while True: + if offset >= len(buf): + raise ValueError("Buffer too short for varint") + byte = buf[offset] + offset += 1 + result |= (byte & 0x7F) << shift + shift += 7 + if not (byte & 0x80): + break + return result, offset + + +# ── Bulk encode/decode — use C accelerator when available ───────────────── + +try: + from ucis.ncdb._accel import ( + encode_varints as _accel_encode, + decode_varints as _accel_decode, + ) + _HAVE_ACCEL = True +except Exception: + _HAVE_ACCEL = False + + +def encode_varints(values) -> bytes: + """Encode a sequence of non-negative integers as concatenated LEB128.""" + if _HAVE_ACCEL: + return _accel_encode(values) + return b"".join(encode_varint(v) for v in values) + + +def decode_varints(buf: bytes, count: int, offset: int = 0): + """Decode *count* consecutive LEB128 varints from buf. + + Returns: + (list_of_values, new_offset) + """ + if _HAVE_ACCEL: + return _accel_decode(buf, count, offset) + values = [] + for _ in range(count): + v, offset = decode_varint(buf, offset) + values.append(v) + return values, offset diff --git a/src/ucis/rgy/format_rgy.py b/src/ucis/rgy/format_rgy.py index cfc9fe6..e91180f 100644 --- a/src/ucis/rgy/format_rgy.py +++ b/src/ucis/rgy/format_rgy.py @@ -85,6 +85,14 @@ def detectDatabaseFormat(self, path): elif ext in ['.yaml', '.yml']: return 'yaml' elif ext in ['.cdb', '.db', '.sqlite', '.sqlite3']: + # Use header-byte detection to distinguish SQLite vs NCDB + try: + from ucis.ncdb.format_detect import detect_cdb_format + fmt = detect_cdb_format(path) + if fmt in ('sqlite', 'ncdb'): + return fmt + except Exception: + pass return 'sqlite' elif ext == '.dat': return 'vltcov' @@ -134,6 +142,10 @@ def _init_rgy(self): # Register LCOV format from ucis.formatters.db_format_if_lcov import DbFormatIfLcov DbFormatIfLcov.register(self) + + # Register NCDB format + from ucis.ncdb.db_format_if_ncdb import DbFormatIfNcdb + DbFormatIfNcdb.register(self) FormatRptJson.register(self) FormatRptText.register(self) diff --git a/src/ucis/sqlite/sqlite_merge.py b/src/ucis/sqlite/sqlite_merge.py index 233082a..137ed02 100644 --- a/src/ucis/sqlite/sqlite_merge.py +++ b/src/ucis/sqlite/sqlite_merge.py @@ -679,27 +679,29 @@ def _merge_scope_recursive_api(self, src_scope, tgt_scope): elif hasattr(src_scope, 'getScopes'): child_scopes = list(src_scope.getScopes()) + if not child_scopes: + return + + # Build target children map ONCE for this parent (avoid O(N²) SQL fetches) + tgt_children_map: dict = {} + if hasattr(tgt_scope, 'scopes'): + for tgt_c in tgt_scope.scopes(-1): + key = (tgt_c.getScopeName(), tgt_c.getScopeType()) + tgt_children_map[key] = tgt_c + elif hasattr(tgt_scope, 'getScopes'): + for tgt_c in tgt_scope.getScopes(): + key = (tgt_c.getScopeName(), tgt_c.getScopeType()) + tgt_children_map[key] = tgt_c + for src_child in child_scopes: src_name = src_child.getScopeName() src_type = src_child.getScopeType() - - # Find or create matching child in target - tgt_child = None - tgt_children = [] - if hasattr(tgt_scope, 'scopes'): - # SqliteUCIS also uses scopes(mask) - tgt_children = list(tgt_scope.scopes(-1)) - elif hasattr(tgt_scope, 'getScopes'): - tgt_children = list(tgt_scope.getScopes()) - - for tgt_c in tgt_children: - if tgt_c.getScopeName() == src_name and tgt_c.getScopeType() == src_type: - tgt_child = tgt_c - break - + key = (src_name, src_type) + + tgt_child = tgt_children_map.get(key) + if tgt_child is None: # Create new scope in target - # Try to get flags, but default to 0 if not implemented flags = 0 try: if hasattr(src_child, 'getFlags'): @@ -707,7 +709,6 @@ def _merge_scope_recursive_api(self, src_scope, tgt_scope): except NotImplementedError: flags = 0 - # Try to get source type, default to 0 if not available source_type = 0 try: if hasattr(src_child, 'getSourceType'): @@ -715,7 +716,6 @@ def _merge_scope_recursive_api(self, src_scope, tgt_scope): except (NotImplementedError, AttributeError): source_type = 0 - # Create scope on parent (not passing parent as argument) tgt_child = tgt_scope.createScope( src_name, src_child.getSourceInfo(), @@ -725,6 +725,7 @@ def _merge_scope_recursive_api(self, src_scope, tgt_scope): flags ) tgt_child.setGoal(src_child.getGoal()) + tgt_children_map[key] = tgt_child self.stats.scopes_added += 1 else: self.stats.scopes_matched += 1 @@ -1260,124 +1261,87 @@ def _merge_test_associations(self, source_ucis, create_history: bool): def _merge_test_associations_mem_to_sqlite(self, source_ucis, create_history: bool): """ Create test associations when merging from MemUCIS to SQLite. - - Assumes all coveritems in the source were covered by all tests in the source, - which is the typical case when importing coverage data. + + Two paths: + 1. If source has _per_test_data (e.g. from NCDB), use those exact bin-index + mappings resolved against the DFS-ordered coveritem list. + 2. Otherwise, assume all tests covered all coveritems (legacy behavior) + — but skip the expensive walk if there is no per-test data to write. """ # Get all test history nodes from source source_tests = list(source_ucis.historyNodes(HistoryNodeKind.TEST)) if not source_tests: - # No tests to associate return - - # Find matching tests in target by logical name - target_tests = {} - for src_test in source_tests: - src_name = src_test.getLogicalName() - for tgt_test in self.target.historyNodes(HistoryNodeKind.TEST): - if tgt_test.getLogicalName() == src_name: - target_tests[src_name] = tgt_test.history_id + + # Fast path: use NCDB-style per_test_data when present + per_test = getattr(source_ucis, '_per_test_data', {}) + if not per_test: + # No per-test data recorded — skip the expensive scope-walk entirely. + # (Importing merged coverage without per-test granularity is the common + # case; associating every coveritem with every test would be misleading.) + return + + # Build history_idx → target history_id map + target_history_ids: dict = {} + for tgt_test in self.target.historyNodes(HistoryNodeKind.TEST): + for i, src_test in enumerate(source_tests): + if src_test.getLogicalName() == tgt_test.getLogicalName(): + target_history_ids[i] = tgt_test.history_id break - - if not target_tests: - # No matching tests found + + if not target_history_ids: return - - # Get all coveritems in target that were just merged - # We need to find coveritems that match the source structure - associations_to_insert = [] - - # Iterate through source scopes and find matching target scopes - def process_scope(src_scope): - # Get scope path + + # Build flat DFS coveritem → cover_id mapping in the target. + # We need to match bin_index (from per_test_data) to cover_id in SQLite. + # Approach: query coveritems ordered by scope DFS order. + # For simplicity, build the list from the source DFS order and look up + # matching coveritems in the target by scope path + cover_index. + from ucis.ncdb.dfs_util import dfs_scope_list + from ucis.cover_type_t import CoverTypeT + + dfs_scopes = dfs_scope_list(source_ucis) + + # Pre-build bin_index → cover_id map using a single SQL query per scope + bin_to_cover_id: dict = {} + bin_offset = 0 + for src_scope in dfs_scopes: + src_items = list(src_scope.coverItems(CoverTypeT.ALL)) + if not src_items: + continue + # Find the matching target scope by path src_path = self._get_scope_path(src_scope) - - # Find matching target scope tgt_scope = self._find_scope_by_path(self.target, src_path) - if not tgt_scope: - return - - # Get coveritems from source - try both methods - src_coveritems = [] - if hasattr(src_scope, 'coverItems'): - src_coveritems = list(src_scope.coverItems(-1)) # -1 = all types - elif hasattr(src_scope, 'getCoverage'): - src_coveritems = list(src_scope.getCoverage()) - - # For each coveritem in source scope, find matching in target - for src_cover in src_coveritems: - # Get source item name - src_name = src_cover.getName() - - # Get target coveritems - try both methods - tgt_coveritems = [] - if hasattr(tgt_scope, 'coverItems'): - tgt_coveritems = list(tgt_scope.coverItems(-1)) # -1 = all types - elif hasattr(tgt_scope, 'getCoverage'): - tgt_coveritems = list(tgt_scope.getCoverage()) - - # Find target coveritem by name (more reliable than cover_index for MemUCIS) - tgt_cover = None - for tc in tgt_coveritems: - if tc.getName() == src_name: - tgt_cover = tc - break - - if tgt_cover and hasattr(tgt_cover, 'cover_id'): - # Get hit count as contribution - cover_data = src_cover.getCoverData() - contribution = cover_data.data if cover_data else 1 - - # Associate with all tests - for test_name, tgt_history_id in target_tests.items(): - associations_to_insert.append(( - tgt_cover.cover_id, - tgt_history_id, - contribution - )) - - # Process child scopes - try both methods - child_scopes = [] - if hasattr(src_scope, 'scopes'): - child_scopes = list(src_scope.scopes(-1)) # -1 = all types - elif hasattr(src_scope, 'getScopes'): - child_scopes = list(src_scope.getScopes()) - - for child in child_scopes: - process_scope(child) - - # Start from root - MemUCIS IS the root scope - src_root = source_ucis - if hasattr(source_ucis, 'getRoot'): - src_root = source_ucis.getRoot() - - if src_root: - process_scope(src_root) - - # Batch insert associations - if associations_to_insert: - # Remove duplicates and handle existing associations - for cover_id, history_id, contribution in associations_to_insert: - existing = self.target.conn.execute(""" - SELECT count_contribution FROM coveritem_tests - WHERE cover_id = ? AND history_id = ? - """, (cover_id, history_id)).fetchone() - - if existing: - # Update existing - new_count = existing[0] + contribution - self.target.conn.execute(""" - UPDATE coveritem_tests - SET count_contribution = ? - WHERE cover_id = ? AND history_id = ? - """, (new_count, cover_id, history_id)) - else: - # Insert new - self.target.conn.execute(""" - INSERT INTO coveritem_tests (cover_id, history_id, count_contribution) - VALUES (?, ?, ?) - """, (cover_id, history_id, contribution)) - + if tgt_scope and hasattr(tgt_scope, 'scope_id'): + # Fetch all coveritems for this scope in cover_index order + rows = self.target.conn.execute( + "SELECT cover_id, cover_index FROM coveritems WHERE scope_id = ? ORDER BY cover_index", + (tgt_scope.scope_id,) + ).fetchall() + for row in rows: + cover_id, cover_index = row + if cover_index < len(src_items): + bin_to_cover_id[bin_offset + cover_index] = cover_id + bin_offset += len(src_items) + + # Insert associations from per_test_data + inserts = [] + for hist_idx, bin_counts in per_test.items(): + tgt_history_id = target_history_ids.get(hist_idx) + if tgt_history_id is None: + continue + for bin_index, count in bin_counts.items(): + cover_id = bin_to_cover_id.get(bin_index) + if cover_id is not None: + inserts.append((cover_id, tgt_history_id, count)) + + if inserts: + self.target.conn.executemany( + """INSERT OR REPLACE INTO coveritem_tests (cover_id, history_id, count_contribution) + VALUES (?, ?, ?)""", + inserts + ) + def _merge_test_associations_sqlite_to_sqlite(self, source_ucis, create_history: bool): """ Copy test associations when merging from SQLite to SQLite. diff --git a/src/ucis/xml/xml_reader.py b/src/ucis/xml/xml_reader.py index 84dc0cf..b9d4f91 100644 --- a/src/ucis/xml/xml_reader.py +++ b/src/ucis/xml/xml_reader.py @@ -742,8 +742,14 @@ def getInstScope(self, def getAttrDateTime(self, e, name): """Converts ISO time used by XML to the YYYYMMDDHHMMSS format used by the library""" val = e.get(name) - dateVal = datetime.strptime(val,"%Y-%m-%dT%H:%M:%S") - return dateVal.strftime("%Y%m%d%H%M%S") + # Try with and without fractional seconds + for fmt in ("%Y-%m-%dT%H:%M:%S.%f", "%Y-%m-%dT%H:%M:%S"): + try: + dateVal = datetime.strptime(val, fmt) + return dateVal.strftime("%Y%m%d%H%M%S") + except ValueError: + continue + raise ValueError(f"Cannot parse datetime: {val!r}") def getAttr(self, node, name, default): if name in node.attrib: diff --git a/tests/unit/ncdb/__init__.py b/tests/unit/ncdb/__init__.py new file mode 100644 index 0000000..c8bf6ef --- /dev/null +++ b/tests/unit/ncdb/__init__.py @@ -0,0 +1 @@ +# tests/unit/ncdb/__init__.py diff --git a/tests/unit/ncdb/test_attrs.py b/tests/unit/ncdb/test_attrs.py new file mode 100644 index 0000000..2c33f4a --- /dev/null +++ b/tests/unit/ncdb/test_attrs.py @@ -0,0 +1,133 @@ +""" +Tests for ucis.ncdb.attrs — user-defined attribute round-trip via NCDB ZIP. +""" + +import json +import os +import tempfile +import zipfile + +import pytest + +from ucis.cover_data import CoverData +from ucis.cover_type_t import CoverTypeT +from ucis.history_node_kind import HistoryNodeKind +from ucis.mem.mem_ucis import MemUCIS +from ucis.ncdb.attrs import AttrsReader, AttrsWriter +from ucis.ncdb.constants import MEMBER_ATTRS +from ucis.ncdb.ncdb_reader import NcdbReader +from ucis.ncdb.ncdb_writer import NcdbWriter +from ucis.scope_type_t import ScopeTypeT +from ucis.source_t import SourceT + + +# ── Helpers ─────────────────────────────────────────────────────────────── + +def _make_db_with_attrs(attr_map): + """Build a MemUCIS with a single BLOCK scope; apply *attr_map* to it.""" + db = MemUCIS() + db.createHistoryNode(None, "t", None, HistoryNodeKind.TEST) + block = db.createScope("blk", None, 1, SourceT.SV, ScopeTypeT.BLOCK, 0) + cd = CoverData(CoverTypeT.STMTBIN, 0) + cd.data = 7 + block.createNextCover("s0", cd, None) + for k, v in attr_map.items(): + block.setAttribute(k, v) + return db, block + + +def _write_read(db): + with tempfile.TemporaryDirectory() as d: + path = os.path.join(d, "out.cdb") + NcdbWriter().write(db, path) + return NcdbReader().read(path), path + + +# ── Unit tests: AttrsWriter / AttrsReader ───────────────────────────────── + +def test_attrs_writer_empty(): + """No attrs → empty entries list.""" + db = MemUCIS() + db.createScope("blk", None, 1, SourceT.SV, ScopeTypeT.BLOCK, 0) + data = AttrsWriter().serialize(db) + payload = json.loads(data) + assert payload["version"] == 1 + assert payload["entries"] == [] + + +def test_attrs_writer_single(): + """One scope with one attr → one entry.""" + db, block = _make_db_with_attrs({"author": "alice"}) + data = AttrsWriter().serialize(db) + payload = json.loads(data) + assert len(payload["entries"]) == 1 + assert payload["entries"][0]["attrs"] == {"author": "alice"} + + +def test_attrs_writer_multiple_keys(): + """Multiple attrs on one scope → all keys present.""" + db, block = _make_db_with_attrs({"k1": "v1", "k2": "v2", "k3": "v3"}) + data = AttrsWriter().serialize(db) + payload = json.loads(data) + assert payload["entries"][0]["attrs"] == {"k1": "v1", "k2": "v2", "k3": "v3"} + + +def test_attrs_reader_applies_attrs(): + """AttrsReader must set attributes back onto scopes.""" + db, block = _make_db_with_attrs({"foo": "bar"}) + data = AttrsWriter().serialize(db) + + # Fresh DB, re-create same scope tree structure + db2 = MemUCIS() + block2 = db2.createScope("blk", None, 1, SourceT.SV, ScopeTypeT.BLOCK, 0) + AttrsReader().deserialize(data, db2) + assert block2.getAttribute("foo") == "bar" + + +def test_attrs_reader_empty_data(): + """Empty bytes must not raise.""" + db = MemUCIS() + AttrsReader().deserialize(b'', db) # should not raise + + +# ── Integration: NCDB round-trip ───────────────────────────────────────── + +def test_attrs_round_trip_single(): + """Attribute survives NCDB write → read.""" + db, _ = _make_db_with_attrs({"tool": "pytest"}) + rdb, _ = _write_read(db) + blocks = list(rdb.scopes(ScopeTypeT.BLOCK)) + assert len(blocks) == 1 + assert blocks[0].getAttribute("tool") == "pytest" + + +def test_attrs_round_trip_multiple(): + """Multiple attributes all survive round-trip.""" + attrs = {"a": "1", "b": "hello world", "c": "true"} + db, _ = _make_db_with_attrs(attrs) + rdb, _ = _write_read(db) + block = list(rdb.scopes(ScopeTypeT.BLOCK))[0] + for k, v in attrs.items(): + assert block.getAttribute(k) == v, f"attr '{k}' mismatch" + + +def test_attrs_absent_from_zip_when_empty(): + """No attrs → attrs member must be absent from ZIP.""" + db = MemUCIS() + db.createHistoryNode(None, "t", None, HistoryNodeKind.TEST) + db.createScope("blk", None, 1, SourceT.SV, ScopeTypeT.BLOCK, 0) + with tempfile.TemporaryDirectory() as d: + path = os.path.join(d, "out.cdb") + NcdbWriter().write(db, path) + with zipfile.ZipFile(path) as zf: + assert MEMBER_ATTRS not in zf.namelist() + + +def test_attrs_present_in_zip_when_set(): + """If any attr is set, attrs member must appear in ZIP.""" + db, _ = _make_db_with_attrs({"x": "y"}) + with tempfile.TemporaryDirectory() as d: + path = os.path.join(d, "out.cdb") + NcdbWriter().write(db, path) + with zipfile.ZipFile(path) as zf: + assert MEMBER_ATTRS in zf.namelist() diff --git a/tests/unit/ncdb/test_cli.py b/tests/unit/ncdb/test_cli.py new file mode 100644 index 0000000..32c5b9d --- /dev/null +++ b/tests/unit/ncdb/test_cli.py @@ -0,0 +1,148 @@ +""" +CLI integration tests for NCDB format. + +Tests `pyucis convert`, `pyucis merge`, and `pyucis show summary` commands +with NCDB input/output via subprocess so that auto-detect and format +registry paths are exercised end-to-end. +""" + +import os +import json +import subprocess +import sys +import tempfile + +import pytest + +from ucis.ncdb.ncdb_writer import NcdbWriter +from ucis.ncdb.ncdb_reader import NcdbReader +from ucis.mem.mem_ucis import MemUCIS +from ucis.source_t import SourceT +from ucis.scope_type_t import ScopeTypeT +from ucis.cover_data import CoverData +from ucis.cover_type_t import CoverTypeT +from ucis.history_node_kind import HistoryNodeKind + +_REPO_ROOT = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "..", "..")) +_TESTS_ROOT = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "..")) +_SRC = os.path.join(_REPO_ROOT, "src") +_PYTHON = sys.executable + +_SKIP_IF_NO_MERGED = pytest.mark.skipif( + not os.path.exists(os.path.join(_TESTS_ROOT, "merged.cdb")), + reason="merged.cdb not present", +) + + +def _run(*args, check=True, **kw): + env = dict(os.environ) + env["PYTHONPATH"] = _SRC + return subprocess.run( + [_PYTHON, "-m", "ucis"] + list(args), + capture_output=True, text=True, env=env, **kw + ) + + +def _make_simple_ncdb(path: str, scope_name: str = "top", + bins: int = 3, test_name: str = "t1"): + db = MemUCIS() + db.createHistoryNode(None, test_name, None, HistoryNodeKind.TEST) + blk = db.createScope(scope_name, None, 1, SourceT.SV, ScopeTypeT.BLOCK, 0) + for i in range(bins): + cd = CoverData(CoverTypeT.STMTBIN, 0) + cd.data = i + 1 + blk.createNextCover(f"s{i}", cd, None) + NcdbWriter().write(db, path) + + +# ── Tests ───────────────────────────────────────────────────────────────── + +def test_convert_ncdb_to_ncdb(): + """convert --if ncdb --of ncdb → valid output file.""" + with tempfile.TemporaryDirectory() as d: + src = os.path.join(d, "src.cdb") + dst = os.path.join(d, "dst.cdb") + _make_simple_ncdb(src) + r = _run("convert", "--input-format", "ncdb", "--output-format", "ncdb", + src, "-o", dst) + assert r.returncode == 0, r.stderr + assert os.path.exists(dst) + rt = NcdbReader().read(dst) + assert list(rt.scopes(ScopeTypeT.ALL)) + + +def test_convert_ncdb_to_xml(): + """convert --if ncdb --of xml → non-empty XML file.""" + with tempfile.TemporaryDirectory() as d: + src = os.path.join(d, "src.cdb") + dst = os.path.join(d, "out.xml") + _make_simple_ncdb(src) + r = _run("convert", "--input-format", "ncdb", "--output-format", "xml", + src, "-o", dst) + assert r.returncode == 0, r.stderr + assert os.path.exists(dst) + assert os.path.getsize(dst) > 0 + + +def test_merge_same_schema_ncdb(): + """merge --if ncdb --of ncdb → counts doubled.""" + with tempfile.TemporaryDirectory() as d: + a = os.path.join(d, "a.cdb") + b = os.path.join(d, "b.cdb") + out = os.path.join(d, "merged.cdb") + _make_simple_ncdb(a, bins=3) + _make_simple_ncdb(b, bins=3) + r = _run("merge", "--input-format", "ncdb", "--output-format", "ncdb", + a, b, "-o", out) + assert r.returncode == 0, r.stderr + merged = NcdbReader().read(out) + scopes = list(merged.scopes(ScopeTypeT.ALL)) + assert scopes + items = list(scopes[0].coverItems(CoverTypeT.ALL)) + assert items[0].getCoverData().data == 2 # 1+1 + assert items[1].getCoverData().data == 4 # 2+2 + assert items[2].getCoverData().data == 6 # 3+3 + + +def test_show_summary_ncdb_auto_detect(): + """show summary with auto-detect should succeed for NCDB files.""" + with tempfile.TemporaryDirectory() as d: + src = os.path.join(d, "cov.cdb") + _make_simple_ncdb(src) + r = _run("show", "summary", src) + assert r.returncode == 0, r.stderr + # Output should be valid JSON + out = r.stdout + # Strip any preamble text before the JSON object + start = out.find("{") + if start >= 0: + data = json.loads(out[start:]) + assert "overall_coverage" in data + + +def test_format_registry_ncdb(): + """DbFormatRgy must have 'ncdb' registered.""" + from ucis.db_format_rgy import DbFormatRgy + rgy = DbFormatRgy.inst() + assert rgy.hasFormatType("ncdb") + fmt_if = rgy.getFormatIf("ncdb") + assert fmt_if is not None + + +def test_format_registry_ncdb_rgy(): + """FormatRgy must have 'ncdb' registered.""" + from ucis.rgy.format_rgy import FormatRgy + rgy = FormatRgy.inst() + assert rgy.hasDatabaseFormat("ncdb") + + +@_SKIP_IF_NO_MERGED +def test_convert_sqlite_to_ncdb_cli(): + """CLI convert --if sqlite --of ncdb → smaller file than input.""" + merged = os.path.join(_TESTS_ROOT, "merged.cdb") + with tempfile.TemporaryDirectory() as d: + out = os.path.join(d, "out.cdb") + r = _run("convert", "--input-format", "sqlite", + "--output-format", "ncdb", merged, "-o", out) + assert r.returncode == 0, r.stderr + assert os.path.getsize(out) < os.path.getsize(merged) / 10 diff --git a/tests/unit/ncdb/test_contrib.py b/tests/unit/ncdb/test_contrib.py new file mode 100644 index 0000000..b9524f9 --- /dev/null +++ b/tests/unit/ncdb/test_contrib.py @@ -0,0 +1,215 @@ +"""Tests for contrib.py — per-test contribution round-trip.""" + +import os +import tempfile + +import pytest + +from ucis.mem.mem_ucis import MemUCIS +from ucis.scope_type_t import ScopeTypeT +from ucis.cover_type_t import CoverTypeT +from ucis.history_node_kind import HistoryNodeKind + +from ucis.ncdb.contrib import ContribWriter, ContribReader +from ucis.ncdb.ncdb_writer import NcdbWriter +from ucis.ncdb.ncdb_reader import NcdbReader + + +# ── helpers ─────────────────────────────────────────────────────────────────── + +def _make_db(): + """Build a MemUCIS with one covergroup/coverpoint/3 bins and 2 history nodes.""" + db = MemUCIS() + cg_du = db.createScope("cg_du", None, 1, None, ScopeTypeT.DU_MODULE, 0) + cp = cg_du.createScope("cp", None, 1, None, ScopeTypeT.COVERPOINT, 0) + cp.createBin("b0", None, 1, 0, "0") # bin index 0 + cp.createBin("b1", None, 1, 0, "1") # bin index 1 + cp.createBin("b2", None, 1, 0, "2") # bin index 2 + + db.createHistoryNode(None, "test_a", None, HistoryNodeKind.TEST) # hist_idx 0 + db.createHistoryNode(None, "test_b", None, HistoryNodeKind.TEST) # hist_idx 1 + return db + + +# ── MemUCIS API ─────────────────────────────────────────────────────────────── + +def test_record_test_association_basic(): + db = MemUCIS() + db.record_test_association(0, 5, 3) + assert db._per_test_data == {0: {5: 3}} + + +def test_record_test_association_accumulates(): + db = MemUCIS() + db.record_test_association(0, 5, 2) + db.record_test_association(0, 5, 3) + assert db._per_test_data[0][5] == 5 + + +def test_record_test_association_multiple_tests(): + db = MemUCIS() + db.record_test_association(0, 1, 1) + db.record_test_association(1, 2, 4) + assert db._per_test_data[0] == {1: 1} + assert db._per_test_data[1] == {2: 4} + + +def test_get_test_coverage_api_returns_instance(): + db = _make_db() + api = db.get_test_coverage_api() + assert api is not None + assert api._db is db + + +def test_get_test_coverage_api_cached(): + db = _make_db() + api1 = db.get_test_coverage_api() + api2 = db.get_test_coverage_api() + assert api1 is api2 + + +# ── MemTestCoverage queries ─────────────────────────────────────────────────── + +def test_has_test_associations_false(): + db = _make_db() + assert not db.get_test_coverage_api().has_test_associations() + + +def test_has_test_associations_true(): + db = _make_db() + db.record_test_association(0, 0, 1) + assert db.get_test_coverage_api().has_test_associations() + + +def test_get_tests_for_coveritem(): + db = _make_db() + db.record_test_association(0, 1, 2) + db.record_test_association(1, 1, 5) + info = db.get_test_coverage_api().get_tests_for_coveritem(1) + assert info.total_hits == 7 + names = {t[1] for t in info.tests} + assert names == {"test_a", "test_b"} + + +def test_get_tests_for_coveritem_empty(): + db = _make_db() + info = db.get_test_coverage_api().get_tests_for_coveritem(99) + assert info.total_hits == 0 + assert info.tests == [] + + +def test_get_coveritems_for_test(): + db = _make_db() + db.record_test_association(0, 0, 1) + db.record_test_association(0, 2, 3) + bins = db.get_test_coverage_api().get_coveritems_for_test(0) + assert bins == [0, 2] + + +def test_get_unique_coveritems(): + db = _make_db() + db.record_test_association(0, 0, 1) + db.record_test_association(0, 1, 1) + db.record_test_association(1, 1, 1) + unique = db.get_test_coverage_api().get_unique_coveritems(0) + assert unique == [0] + + +def test_get_all_test_contributions(): + db = _make_db() + db.record_test_association(0, 0, 1) + db.record_test_association(0, 1, 1) + db.record_test_association(1, 2, 1) + contribs = db.get_test_coverage_api().get_all_test_contributions() + assert len(contribs) == 2 + # Sorted by total_items descending → test_a (2 bins) first + assert contribs[0].test_name == "test_a" + assert contribs[0].total_items == 2 + + +# ── ContribWriter / ContribReader round-trip ────────────────────────────────── + +def test_contrib_writer_empty(): + db = MemUCIS() + members = ContribWriter().serialize(db) + assert members == {} + + +def test_contrib_writer_produces_members(): + db = MemUCIS() + db.record_test_association(0, 10, 3) + db.record_test_association(0, 20, 1) + members = ContribWriter().serialize(db) + assert "contrib/0.bin" in members + assert len(members) == 1 + + +def test_contrib_round_trip(): + db = MemUCIS() + db.record_test_association(0, 5, 2) + db.record_test_association(0, 15, 4) + db.record_test_association(1, 7, 1) + + members = ContribWriter().serialize(db) + + db2 = MemUCIS() + ContribReader().apply(db2, members) + + assert db2._per_test_data[0] == {5: 2, 15: 4} + assert db2._per_test_data[1] == {7: 1} + + +def test_contrib_round_trip_large_indices(): + """Delta encoding should handle large sparse bin indices.""" + db = MemUCIS() + db.record_test_association(0, 0, 1) + db.record_test_association(0, 100_000, 99) + + members = ContribWriter().serialize(db) + db2 = MemUCIS() + ContribReader().apply(db2, members) + + assert db2._per_test_data[0] == {0: 1, 100_000: 99} + + +# ── Full NCDB round-trip ────────────────────────────────────────────────────── + +def _write_read(db): + with tempfile.NamedTemporaryFile(suffix=".cdb", delete=False) as f: + path = f.name + try: + NcdbWriter().write(db, path) + return NcdbReader().read(path) + finally: + os.unlink(path) + + +def test_ncdb_round_trip_no_contrib(): + db = _make_db() + db2 = _write_read(db) + assert db2._per_test_data == {} + + +def test_ncdb_round_trip_with_contrib(): + db = _make_db() + db.record_test_association(0, 0, 1) + db.record_test_association(0, 2, 3) + db.record_test_association(1, 1, 2) + + db2 = _write_read(db) + + assert db2._per_test_data[0] == {0: 1, 2: 3} + assert db2._per_test_data[1] == {1: 2} + + +def test_ncdb_round_trip_api_after_read(): + db = _make_db() + db.record_test_association(0, 0, 5) + db.record_test_association(1, 0, 3) + + db2 = _write_read(db) + api = db2.get_test_coverage_api() + + info = api.get_tests_for_coveritem(0) + assert info.total_hits == 8 + assert len(info.tests) == 2 diff --git a/tests/unit/ncdb/test_counts.py b/tests/unit/ncdb/test_counts.py new file mode 100644 index 0000000..a7dcc70 --- /dev/null +++ b/tests/unit/ncdb/test_counts.py @@ -0,0 +1,47 @@ +"""Unit tests for ucis.ncdb.counts.""" + +import pytest +from ucis.ncdb.counts import CountsWriter, CountsReader + + +def _rt(values): + """Round-trip helper.""" + data = CountsWriter().serialize(values) + return CountsReader().deserialize(data) + + +def test_empty(): + assert _rt([]) == [] + + +def test_all_zeros(): + result = _rt([0] * 100) + assert result == [0] * 100 + + +def test_all_ones(): + result = _rt([1] * 100) + assert result == [1] * 100 + + +def test_mixed(): + values = [0, 1, 2, 127, 128, 255, 256, 65535, 2**32 - 1] + assert _rt(values) == values + + +def test_large_counts_use_uint32_mode(): + """When counts are large, uint32 mode is often more compact.""" + values = [2**31] * 200 # all near max uint32 + result = _rt(values) + assert result == values + + +def test_small_counts_prefer_varint_mode(): + """Mostly-zero data should use varint (smaller).""" + values = [0] * 1000 + [1, 2, 3] + writer = CountsWriter() + data = writer.serialize(values) + # varint encoding: 1000 zeros at 1 byte each + 3 small = 1003 bytes + # uint32: 1003 * 4 = 4012 bytes + assert len(data) < 4012 + assert CountsReader().deserialize(data) == values diff --git a/tests/unit/ncdb/test_cross.py b/tests/unit/ncdb/test_cross.py new file mode 100644 index 0000000..3a25393 --- /dev/null +++ b/tests/unit/ncdb/test_cross.py @@ -0,0 +1,209 @@ +""" +Tests for ucis.ncdb.cross — CROSS scope coverpoint link round-trip via NCDB ZIP. +""" + +import json +import os +import tempfile +import zipfile + +import pytest + +from ucis.cover_data import CoverData +from ucis.cover_type_t import CoverTypeT +from ucis.history_node_kind import HistoryNodeKind +from ucis.mem.mem_ucis import MemUCIS +from ucis.ncdb.constants import MEMBER_CROSS +from ucis.ncdb.cross import CrossReader, CrossWriter +from ucis.ncdb.ncdb_reader import NcdbReader +from ucis.ncdb.ncdb_writer import NcdbWriter +from ucis.scope_type_t import ScopeTypeT +from ucis.source_t import SourceT + + +# ── Helpers ─────────────────────────────────────────────────────────────── + +def _make_cross_db(num_bins=4): + """Build a MemUCIS with a COVERGROUP → COVERINSTANCE → 2 COVERPOINTs + CROSS.""" + db = MemUCIS() + db.createHistoryNode(None, "t", None, HistoryNodeKind.TEST) + cg = db.createScope("cg", None, 1, SourceT.SV, ScopeTypeT.COVERGROUP, 0) + ci = cg.createScope("cg", None, 1, SourceT.SV, ScopeTypeT.COVERINSTANCE, 0) + + cp1 = ci.createScope("cp_a", None, 1, SourceT.SV, ScopeTypeT.COVERPOINT, 0) + cp2 = ci.createScope("cp_b", None, 1, SourceT.SV, ScopeTypeT.COVERPOINT, 0) + + for i in range(2): + cd = CoverData(CoverTypeT.CVGBIN, 0); cd.data = i + 1 + cp1.createNextCover(f"a{i}", cd, None) + for i in range(2): + cd = CoverData(CoverTypeT.CVGBIN, 0); cd.data = i + 1 + cp2.createNextCover(f"b{i}", cd, None) + + cross = ci.createCross("cp_a X cp_b", None, 1, SourceT.SV, [cp1, cp2]) + for i in range(num_bins): + cd = CoverData(CoverTypeT.DEFAULTBIN, 0); cd.data = i + 1 + cross.createNextCover(f"cross_{i}", cd, None) + + return db, cross, cp1, cp2 + + +def _write_read(db): + with tempfile.TemporaryDirectory() as d: + path = os.path.join(d, "out.cdb") + NcdbWriter().write(db, path) + return NcdbReader().read(path), path + + +def _first_cross(db): + def _walk(scope): + if scope.getScopeType() == ScopeTypeT.CROSS: + return scope + for c in scope.scopes(ScopeTypeT.ALL): + r = _walk(c) + if r: + return r + return None + for s in db.scopes(ScopeTypeT.ALL): + r = _walk(s) + if r: + return r + return None + + +# ── Unit tests: CrossWriter / CrossReader ───────────────────────────────── + +def test_cross_writer_empty_when_no_cross(): + """DB without CROSS scopes → empty bytes.""" + db = MemUCIS() + db.createScope("blk", None, 1, SourceT.SV, ScopeTypeT.BLOCK, 0) + assert CrossWriter().serialize(db) == b"" + + +def test_cross_writer_captures_crossed_names(): + """Writer must record crossed coverpoint names.""" + db, cross, cp1, cp2 = _make_cross_db() + data = CrossWriter().serialize(db) + payload = json.loads(data) + assert payload["version"] == 1 + assert len(payload["entries"]) == 1 + crossed = payload["entries"][0]["crossed"] + assert "cp_a" in crossed + assert "cp_b" in crossed + + +def test_cross_reader_empty_data(): + """Empty bytes must not raise.""" + db = MemUCIS() + CrossReader().apply(db, b"") + + +def test_cross_reader_restores_links(): + """CrossReader must populate coverpoints list on CROSS scope.""" + db, cross, cp1, cp2 = _make_cross_db() + data = CrossWriter().serialize(db) + + # Simulate a freshly-deserialized cross scope with no coverpoints + cross.coverpoints = [] + assert cross.getNumCrossedCoverpoints() == 0 + + CrossReader().apply(db, data) + assert cross.getNumCrossedCoverpoints() == 2 + + +# ── Integration: NCDB round-trip ───────────────────────────────────────── + +def test_cross_round_trip_num_crossed(): + """Number of crossed coverpoints survives NCDB write → read.""" + db, _, _, _ = _make_cross_db() + rdb, _ = _write_read(db) + cross = _first_cross(rdb) + assert cross is not None + assert cross.getNumCrossedCoverpoints() == 2 + + +def test_cross_round_trip_crossed_names(): + """Crossed coverpoint names are correct after round-trip.""" + db, _, _, _ = _make_cross_db() + rdb, _ = _write_read(db) + cross = _first_cross(rdb) + names = {cross.getIthCrossedCoverpoint(i).getScopeName() + for i in range(cross.getNumCrossedCoverpoints())} + assert names == {"cp_a", "cp_b"} + + +def test_cross_round_trip_bin_counts(): + """Cross bin counts survive round-trip.""" + db, _, _, _ = _make_cross_db(num_bins=4) + rdb, _ = _write_read(db) + cross = _first_cross(rdb) + counts = [ci.getCoverData().data + for ci in cross.coverItems(CoverTypeT.ALL)] + assert counts == [1, 2, 3, 4] + + +def test_cross_absent_from_zip_when_no_cross(): + """No CROSS scopes → cross.json must be absent from ZIP.""" + db = MemUCIS() + db.createHistoryNode(None, "t", None, HistoryNodeKind.TEST) + db.createScope("blk", None, 1, SourceT.SV, ScopeTypeT.BLOCK, 0) + with tempfile.TemporaryDirectory() as d: + path = os.path.join(d, "out.cdb") + NcdbWriter().write(db, path) + with zipfile.ZipFile(path) as zf: + assert MEMBER_CROSS not in zf.namelist() + + +def test_cross_present_in_zip_when_cross_exists(): + """CROSS scope present → cross.json must appear in ZIP.""" + db, _, _, _ = _make_cross_db() + with tempfile.TemporaryDirectory() as d: + path = os.path.join(d, "out.cdb") + NcdbWriter().write(db, path) + with zipfile.ZipFile(path) as zf: + assert MEMBER_CROSS in zf.namelist() + + +def test_cross_coverpoints_are_actual_siblings(): + """Resolved coverpoints must be the same objects as the sibling scopes.""" + db, _, _, _ = _make_cross_db() + rdb, _ = _write_read(db) + cross = _first_cross(rdb) + + # Find the parent COVERINSTANCE + parent = cross.m_parent + sibling_names = {s.getScopeName(): s + for s in parent.scopes(ScopeTypeT.ALL)} + + for i in range(cross.getNumCrossedCoverpoints()): + cp = cross.getIthCrossedCoverpoint(i) + assert cp.getScopeName() in sibling_names + assert cp is sibling_names[cp.getScopeName()] + + +def test_cross_three_way(): + """Three-way cross: all three coverpoints resolved.""" + db = MemUCIS() + db.createHistoryNode(None, "t", None, HistoryNodeKind.TEST) + cg = db.createScope("cg", None, 1, SourceT.SV, ScopeTypeT.COVERGROUP, 0) + ci = cg.createScope("cg", None, 1, SourceT.SV, ScopeTypeT.COVERINSTANCE, 0) + + cps = [] + for letter in ("x", "y", "z"): + cp = ci.createScope(f"cp_{letter}", None, 1, SourceT.SV, + ScopeTypeT.COVERPOINT, 0) + cd = CoverData(CoverTypeT.CVGBIN, 0); cd.data = 1 + cp.createNextCover("b0", cd, None) + cps.append(cp) + + cross = ci.createCross("x_y_z", None, 1, SourceT.SV, cps) + for i in range(8): + cd = CoverData(CoverTypeT.DEFAULTBIN, 0); cd.data = i + cross.createNextCover(f"c{i}", cd, None) + + rdb, _ = _write_read(db) + rcross = _first_cross(rdb) + assert rcross.getNumCrossedCoverpoints() == 3 + names = {rcross.getIthCrossedCoverpoint(i).getScopeName() + for i in range(3)} + assert names == {"cp_x", "cp_y", "cp_z"} diff --git a/tests/unit/ncdb/test_design_units.py b/tests/unit/ncdb/test_design_units.py new file mode 100644 index 0000000..aaef3b0 --- /dev/null +++ b/tests/unit/ncdb/test_design_units.py @@ -0,0 +1,140 @@ +"""Tests for design_units.py serialization and round-trip.""" + +import io +import os +import tempfile +import zipfile + +import pytest + +from ucis.mem.mem_ucis import MemUCIS +from ucis.scope_type_t import ScopeTypeT + +from ucis.ncdb.design_units import DesignUnitsWriter, DesignUnitsReader +from ucis.ncdb.ncdb_writer import NcdbWriter +from ucis.ncdb.ncdb_reader import NcdbReader + + +# ── helpers ─────────────────────────────────────────────────────────────────── + +def _make_db_with_dus(): + """Build a MemUCIS with several design units.""" + db = MemUCIS() + db.createScope("pkg_a", None, 1, None, ScopeTypeT.DU_PACKAGE, 0) + db.createScope("mod_b", None, 1, None, ScopeTypeT.DU_MODULE, 0) + db.createScope("ifc_c", None, 1, None, ScopeTypeT.DU_INTERFACE, 0) + return db + + +# ── serialize / deserialize ─────────────────────────────────────────────────── + +def test_serialize_produces_bytes(): + db = _make_db_with_dus() + data = DesignUnitsWriter().serialize(db) + assert isinstance(data, bytes) + assert len(data) > 0 + + +def test_serialize_empty_when_no_dus(): + db = MemUCIS() + data = DesignUnitsWriter().serialize(db) + assert data == b"" + + +def test_round_trip_names(): + db = _make_db_with_dus() + data = DesignUnitsWriter().serialize(db) + index = DesignUnitsReader().build_index(data, db) + assert set(index.keys()) == {"pkg_a", "mod_b", "ifc_c"} + + +def test_round_trip_scope_objects(): + db = _make_db_with_dus() + data = DesignUnitsWriter().serialize(db) + index = DesignUnitsReader().build_index(data, db) + for name, scope in index.items(): + assert scope.getScopeName() == name + assert ScopeTypeT.DU_ANY(scope.getScopeType()) + + +def test_fallback_when_empty_data(): + """build_index returns a usable index even without serialized data.""" + db = _make_db_with_dus() + index = DesignUnitsReader().build_index(b"", db) + assert set(index.keys()) == {"pkg_a", "mod_b", "ifc_c"} + + +def test_fallback_matches_serialized(): + db = _make_db_with_dus() + data = DesignUnitsWriter().serialize(db) + idx_from_data = DesignUnitsReader().build_index(data, db) + idx_from_scan = DesignUnitsReader().build_index(b"", db) + assert set(idx_from_data.keys()) == set(idx_from_scan.keys()) + + +# ── full NCDB round-trip ────────────────────────────────────────────────────── + +def test_ncdb_round_trip(): + db = _make_db_with_dus() + import tempfile, os + with tempfile.NamedTemporaryFile(suffix=".cdb", delete=False) as f: + path = f.name + try: + NcdbWriter().write(db, path) + db2 = NcdbReader().read(path) + assert hasattr(db2, "_du_index") + assert set(db2._du_index.keys()) == {"pkg_a", "mod_b", "ifc_c"} + finally: + os.unlink(path) + + +def test_ncdb_round_trip_du_type(): + db = _make_db_with_dus() + import tempfile, os + with tempfile.NamedTemporaryFile(suffix=".cdb", delete=False) as f: + path = f.name + try: + NcdbWriter().write(db, path) + db2 = NcdbReader().read(path) + mod = db2._du_index.get("mod_b") + assert mod is not None + assert mod.getScopeType() == ScopeTypeT.DU_MODULE + finally: + os.unlink(path) + + +# ── merged.cdb regression ───────────────────────────────────────────────────── + +_MERGED_CDB = os.path.join(os.path.dirname(__file__), "..", "..", "merged.cdb") + + +@pytest.mark.skipif(not os.path.exists(_MERGED_CDB), reason="merged.cdb not found") +def test_merged_cdb_du_index(): + from ucis.sqlite.sqlite_ucis import SqliteUCIS + sqlite_db = SqliteUCIS(_MERGED_CDB) + with tempfile.NamedTemporaryFile(suffix=".cdb", delete=False) as f: + ncdb_path = f.name + try: + NcdbWriter().write(sqlite_db, ncdb_path) + sqlite_db.close() + db = NcdbReader().read(ncdb_path) + assert hasattr(db, "_du_index") + assert len(db._du_index) > 0 + finally: + os.unlink(ncdb_path) + + +@pytest.mark.skipif(not os.path.exists(_MERGED_CDB), reason="merged.cdb not found") +def test_merged_cdb_du_are_du_any(): + from ucis.sqlite.sqlite_ucis import SqliteUCIS + sqlite_db = SqliteUCIS(_MERGED_CDB) + with tempfile.NamedTemporaryFile(suffix=".cdb", delete=False) as f: + ncdb_path = f.name + try: + NcdbWriter().write(sqlite_db, ncdb_path) + sqlite_db.close() + db = NcdbReader().read(ncdb_path) + for scope in db._du_index.values(): + assert ScopeTypeT.DU_ANY(scope.getScopeType()) + finally: + os.unlink(ncdb_path) diff --git a/tests/unit/ncdb/test_formal.py b/tests/unit/ncdb/test_formal.py new file mode 100644 index 0000000..e54f929 --- /dev/null +++ b/tests/unit/ncdb/test_formal.py @@ -0,0 +1,213 @@ +"""Tests for formal.py — formal verification data round-trip.""" + +import os +import tempfile + +import pytest + +from ucis.formal_status_t import FormalStatusT +from ucis.mem.mem_ucis import MemUCIS +from ucis.scope_type_t import ScopeTypeT +from ucis.history_node_kind import HistoryNodeKind + +from ucis.ncdb.formal import FormalWriter, FormalReader +from ucis.ncdb.ncdb_writer import NcdbWriter +from ucis.ncdb.ncdb_reader import NcdbReader + + +# ── helpers ─────────────────────────────────────────────────────────────────── + +def _make_db(): + db = MemUCIS() + du = db.createScope("top", None, 1, None, ScopeTypeT.DU_MODULE, 0) + cp = du.createScope("assert_cp", None, 1, None, ScopeTypeT.COVERPOINT, 0) + cp.createBin("pass", None, 1, 0, "0") # bin 0 + cp.createBin("fail", None, 1, 0, "1") # bin 1 + cp.createBin("vacuous", None, 1, 0, "2") # bin 2 + return db + + +# ── FormalStatusT enum ──────────────────────────────────────────────────────── + +def test_formal_status_values(): + assert FormalStatusT.NONE == 0 + assert FormalStatusT.FAILURE == 1 + assert FormalStatusT.PROOF == 2 + assert FormalStatusT.VACUOUS == 3 + assert FormalStatusT.INCONCLUSIVE == 4 + assert FormalStatusT.ASSUMPTION == 5 + assert FormalStatusT.CONFLICT == 6 + + +# ── MemUCIS formal API ──────────────────────────────────────────────────────── + +def test_set_formal_data_status(): + db = MemUCIS() + db.set_formal_data(0, status=FormalStatusT.PROOF) + assert db._formal_data[0]['status'] == FormalStatusT.PROOF + + +def test_set_formal_data_radius(): + db = MemUCIS() + db.set_formal_data(5, radius=42) + assert db._formal_data[5]['radius'] == 42 + + +def test_set_formal_data_witness(): + db = MemUCIS() + db.set_formal_data(3, witness="/tmp/wit.vcd") + assert db._formal_data[3]['witness'] == "/tmp/wit.vcd" + + +def test_set_formal_data_all_fields(): + db = MemUCIS() + db.set_formal_data(7, status=FormalStatusT.FAILURE, radius=10, witness="w.vcd") + fd = db._formal_data[7] + assert fd['status'] == FormalStatusT.FAILURE + assert fd['radius'] == 10 + assert fd['witness'] == "w.vcd" + + +def test_get_formal_data_present(): + db = MemUCIS() + db.set_formal_data(2, status=FormalStatusT.VACUOUS) + fd = db.get_formal_data(2) + assert fd is not None + assert fd['status'] == FormalStatusT.VACUOUS + + +def test_get_formal_data_absent(): + db = MemUCIS() + assert db.get_formal_data(99) is None + + +def test_set_formal_data_partial_update(): + """Multiple calls to set_formal_data merge fields.""" + db = MemUCIS() + db.set_formal_data(1, status=FormalStatusT.PROOF) + db.set_formal_data(1, radius=5) + fd = db.get_formal_data(1) + assert fd['status'] == FormalStatusT.PROOF + assert fd['radius'] == 5 + + +# ── FormalWriter / FormalReader ─────────────────────────────────────────────── + +def test_writer_empty_when_no_data(): + db = MemUCIS() + assert FormalWriter().serialize(db) == b"" + + +def test_writer_empty_when_all_default(): + """Entries with all-default values are not serialized.""" + db = MemUCIS() + db.set_formal_data(0, status=FormalStatusT.NONE) + assert FormalWriter().serialize(db) == b"" + + +def test_writer_produces_bytes(): + db = MemUCIS() + db.set_formal_data(0, status=FormalStatusT.PROOF) + data = FormalWriter().serialize(db) + assert len(data) > 0 + + +def test_round_trip_status(): + db = MemUCIS() + db.set_formal_data(4, status=FormalStatusT.INCONCLUSIVE) + data = FormalWriter().serialize(db) + + db2 = MemUCIS() + FormalReader().apply(db2, data) + assert db2._formal_data[4]['status'] == FormalStatusT.INCONCLUSIVE + + +def test_round_trip_radius(): + db = MemUCIS() + db.set_formal_data(3, radius=100) + data = FormalWriter().serialize(db) + + db2 = MemUCIS() + FormalReader().apply(db2, data) + assert db2._formal_data[3]['radius'] == 100 + + +def test_round_trip_witness(): + db = MemUCIS() + db.set_formal_data(2, witness="/evidence/witness.vcd") + data = FormalWriter().serialize(db) + + db2 = MemUCIS() + FormalReader().apply(db2, data) + assert db2._formal_data[2]['witness'] == "/evidence/witness.vcd" + + +def test_round_trip_multiple_entries(): + db = MemUCIS() + db.set_formal_data(0, status=FormalStatusT.PROOF) + db.set_formal_data(1, status=FormalStatusT.FAILURE, witness="fail.vcd") + db.set_formal_data(5, status=FormalStatusT.VACUOUS, radius=3) + data = FormalWriter().serialize(db) + + db2 = MemUCIS() + FormalReader().apply(db2, data) + assert db2._formal_data[0]['status'] == FormalStatusT.PROOF + assert db2._formal_data[1]['witness'] == "fail.vcd" + assert db2._formal_data[5]['radius'] == 3 + + +def test_reader_noop_on_empty(): + db = MemUCIS() + FormalReader().apply(db, b"") + assert db._formal_data == {} + + +def test_all_status_values_round_trip(): + db = MemUCIS() + for i, st in enumerate(FormalStatusT): + if st != FormalStatusT.NONE: + db.set_formal_data(i, status=st) + data = FormalWriter().serialize(db) + + db2 = MemUCIS() + FormalReader().apply(db2, data) + for i, st in enumerate(FormalStatusT): + if st != FormalStatusT.NONE: + assert db2._formal_data[i]['status'] == int(st) + + +# ── Full NCDB round-trip ────────────────────────────────────────────────────── + +def _write_read(db): + with tempfile.NamedTemporaryFile(suffix=".cdb", delete=False) as f: + path = f.name + try: + NcdbWriter().write(db, path) + return NcdbReader().read(path) + finally: + os.unlink(path) + + +def test_ncdb_round_trip_no_formal(): + db = _make_db() + db2 = _write_read(db) + assert db2._formal_data == {} + + +def test_ncdb_round_trip_with_formal(): + db = _make_db() + db.set_formal_data(0, status=FormalStatusT.PROOF) + db.set_formal_data(1, status=FormalStatusT.FAILURE, witness="fail.vcd") + + db2 = _write_read(db) + assert db2.get_formal_data(0)['status'] == FormalStatusT.PROOF + assert db2.get_formal_data(1)['witness'] == "fail.vcd" + + +def test_ncdb_round_trip_formal_not_present_for_unset(): + db = _make_db() + db.set_formal_data(0, status=FormalStatusT.VACUOUS) + + db2 = _write_read(db) + assert db2.get_formal_data(1) is None + assert db2.get_formal_data(2) is None diff --git a/tests/unit/ncdb/test_format_detect.py b/tests/unit/ncdb/test_format_detect.py new file mode 100644 index 0000000..f743520 --- /dev/null +++ b/tests/unit/ncdb/test_format_detect.py @@ -0,0 +1,70 @@ +"""Unit tests for ucis.ncdb.format_detect.""" + +import os +import tempfile +import zipfile +import json +import pytest + +from ucis.ncdb.format_detect import detect_cdb_format +from ucis.ncdb.constants import SQLITE_MAGIC, MEMBER_MANIFEST, NCDB_FORMAT + + +def _make_sqlite_file(path): + with open(path, "wb") as f: + f.write(SQLITE_MAGIC) + f.write(b"\x00" * 100) + + +def _make_ncdb_file(path): + with zipfile.ZipFile(path, "w") as zf: + manifest = {"format": NCDB_FORMAT, "version": "1.0"} + zf.writestr(MEMBER_MANIFEST, json.dumps(manifest)) + + +def _make_plain_zip(path): + with zipfile.ZipFile(path, "w") as zf: + zf.writestr("data.txt", "hello") + + +def test_detect_sqlite(): + with tempfile.NamedTemporaryFile(suffix=".cdb", delete=False) as f: + _make_sqlite_file(f.name) + path = f.name + try: + assert detect_cdb_format(path) == "sqlite" + finally: + os.unlink(path) + + +def test_detect_ncdb(): + with tempfile.NamedTemporaryFile(suffix=".cdb", delete=False) as f: + path = f.name + _make_ncdb_file(path) + try: + assert detect_cdb_format(path) == "ncdb" + finally: + os.unlink(path) + + +def test_detect_unknown_plain_zip(): + with tempfile.NamedTemporaryFile(suffix=".cdb", delete=False) as f: + path = f.name + _make_plain_zip(path) + try: + assert detect_cdb_format(path) == "unknown" + finally: + os.unlink(path) + + +def test_detect_nonexistent(): + assert detect_cdb_format("/no/such/file.cdb") == "unknown" + + +def test_detect_real_merged_cdb(): + """The repo's merged.cdb should be detected as SQLite.""" + repo_root = os.path.join(os.path.dirname(__file__), "..", "..") + cdb_path = os.path.join(repo_root, "merged.cdb") + if not os.path.exists(cdb_path): + pytest.skip("merged.cdb not present") + assert detect_cdb_format(cdb_path) == "sqlite" diff --git a/tests/unit/ncdb/test_fsm.py b/tests/unit/ncdb/test_fsm.py new file mode 100644 index 0000000..7fae44c --- /dev/null +++ b/tests/unit/ncdb/test_fsm.py @@ -0,0 +1,310 @@ +""" +Tests for ucis.ncdb.fsm — FSM scope metadata round-trip via NCDB ZIP. +""" + +import json +import os +import tempfile +import zipfile + +import pytest + +from ucis.cover_data import CoverData +from ucis.cover_type_t import CoverTypeT +from ucis.history_node_kind import HistoryNodeKind +from ucis.mem.mem_fsm_scope import MemFSMScope +from ucis.mem.mem_ucis import MemUCIS +from ucis.ncdb.constants import MEMBER_FSM +from ucis.ncdb.fsm import FsmReader, FsmWriter +from ucis.ncdb.ncdb_reader import NcdbReader +from ucis.ncdb.ncdb_writer import NcdbWriter +from ucis.scope_type_t import ScopeTypeT +from ucis.source_t import SourceT + + +# ── Helpers ─────────────────────────────────────────────────────────────── + +def _make_fsm_db(state_data=None, transition_data=None): + """ + Build a MemUCIS with one FSM scope. + + *state_data* is a list of (name, index, visit_count) triples. + *transition_data* is a list of (from_name, to_name, count) triples. + If None, defaults to IDLE(0,1) / RUN(1,2) with IDLE->RUN(1). + """ + if state_data is None: + state_data = [("IDLE", 0, 1), ("RUN", 1, 2)] + if transition_data is None: + transition_data = [("IDLE", "RUN", 1), ("RUN", "IDLE", 0)] + + db = MemUCIS() + db.createHistoryNode(None, "t", None, HistoryNodeKind.TEST) + du = db.createScope("mod", None, 1, SourceT.SV, ScopeTypeT.DU_MODULE, 0) + inst = db.createInstance("mod", None, 1, SourceT.SV, ScopeTypeT.INSTANCE, du, 0) + + fsm = MemFSMScope(inst, "fsm1", None, 1, SourceT.SV) + inst.m_children.append(fsm) + fsm.m_parent = inst + + states = {} + for (name, idx, cnt) in state_data: + s = fsm.createState(name, idx) + s.visit_count = cnt + # Update the matching cover item count + for ci in fsm._states_scope.coverItems(CoverTypeT.ALL): + if ci.getName() == name: + ci.getCoverData().data = cnt + states[name] = s + + for (from_name, to_name, cnt) in transition_data: + t = fsm.createTransition(states[from_name], states[to_name]) + t.count = cnt + for ci in fsm._trans_scope.coverItems(CoverTypeT.ALL): + if ci.getName() == f"{from_name}->{to_name}": + ci.getCoverData().data = cnt + + return db, fsm + + +def _write_read(db): + with tempfile.TemporaryDirectory() as d: + path = os.path.join(d, "out.cdb") + NcdbWriter().write(db, path) + return NcdbReader().read(path), path + + +def _first_fsm(db): + """Find the first FSM scope in DFS order.""" + def _walk(scope): + if scope.getScopeType() == ScopeTypeT.FSM: + return scope + for child in scope.scopes(ScopeTypeT.ALL): + result = _walk(child) + if result: + return result + return None + for s in db.scopes(ScopeTypeT.ALL): + result = _walk(s) + if result: + return result + return None + + +# ── Unit tests: FsmWriter ───────────────────────────────────────────────── + +def test_fsm_writer_empty_when_sequential_indices(): + """Sequential 0,1,2,… state indices → empty bytes (no fsm.json needed).""" + db, _ = _make_fsm_db() + data = FsmWriter().serialize(db) + assert data == b"" + + +def test_fsm_writer_stores_nonsequential_index(): + """Non-sequential state index is captured in fsm.json.""" + db, _ = _make_fsm_db( + state_data=[("IDLE", 0, 0), ("RUN", 5, 0)], # index 5 != sequential 1 + transition_data=[("IDLE", "RUN", 0)], + ) + data = FsmWriter().serialize(db) + assert data != b"" + payload = json.loads(data) + assert payload["version"] == 1 + entry = payload["entries"][0] + names_to_idx = {s["name"]: s["index"] for s in entry["states"]} + assert names_to_idx["RUN"] == 5 + assert "IDLE" not in names_to_idx # IDLE index 0 == sequential → omitted + + +def test_fsm_reader_empty_data(): + """Empty bytes must not raise; still rebuilds dicts from cover items.""" + db, _ = _make_fsm_db() + rdb, _ = _write_read(db) + fsm = _first_fsm(rdb) + assert fsm is not None + # Dicts must be rebuilt even with empty fsm.json + FsmReader().apply(rdb, b"") + + +# ── Integration: NCDB round-trip ───────────────────────────────────────── + +def test_fsm_states_dict_rebuilt(): + """_states dict must be populated after NCDB round-trip.""" + db, _ = _make_fsm_db() + rdb, _ = _write_read(db) + fsm = _first_fsm(rdb) + assert fsm is not None + assert fsm.getNumStates() == 2 + assert "IDLE" in fsm._states + assert "RUN" in fsm._states + + +def test_fsm_transitions_dict_rebuilt(): + """_transitions dict must be populated after NCDB round-trip.""" + db, _ = _make_fsm_db() + rdb, _ = _write_read(db) + fsm = _first_fsm(rdb) + assert fsm.getNumTransitions() == 2 + assert ("IDLE", "RUN") in fsm._transitions + assert ("RUN", "IDLE") in fsm._transitions + + +def test_fsm_state_visit_counts(): + """State visit counts survive NCDB round-trip.""" + db, _ = _make_fsm_db( + state_data=[("IDLE", 0, 7), ("RUN", 1, 3)], + transition_data=[("IDLE", "RUN", 4), ("RUN", "IDLE", 0)], + ) + rdb, _ = _write_read(db) + fsm = _first_fsm(rdb) + assert fsm._states["IDLE"].visit_count == 7 + assert fsm._states["RUN"].visit_count == 3 + + +def test_fsm_transition_counts(): + """Transition counts survive NCDB round-trip.""" + db, _ = _make_fsm_db( + state_data=[("IDLE", 0, 0), ("RUN", 1, 0)], + transition_data=[("IDLE", "RUN", 11), ("RUN", "IDLE", 5)], + ) + rdb, _ = _write_read(db) + fsm = _first_fsm(rdb) + assert fsm._transitions[("IDLE", "RUN")].count == 11 + assert fsm._transitions[("RUN", "IDLE")].count == 5 + + +def test_fsm_state_index_round_trip(): + """Non-sequential state index survives round-trip via fsm.json.""" + db, _ = _make_fsm_db( + state_data=[("IDLE", 0, 0), ("RUN", 100, 0)], + transition_data=[("IDLE", "RUN", 0)], + ) + rdb, _ = _write_read(db) + fsm = _first_fsm(rdb) + assert fsm._states["RUN"].getIndex() == 100 + + +def test_fsm_absent_from_zip_when_sequential(): + """Sequential indices → fsm.json must be absent from ZIP.""" + db, _ = _make_fsm_db() + with tempfile.TemporaryDirectory() as d: + path = os.path.join(d, "out.cdb") + NcdbWriter().write(db, path) + with zipfile.ZipFile(path) as zf: + assert MEMBER_FSM not in zf.namelist() + + +def test_fsm_present_in_zip_when_nonsequential(): + """Non-sequential index → fsm.json must appear in ZIP.""" + db, _ = _make_fsm_db( + state_data=[("IDLE", 0, 0), ("RUN", 99, 0)], + transition_data=[("IDLE", "RUN", 0)], + ) + with tempfile.TemporaryDirectory() as d: + path = os.path.join(d, "out.cdb") + NcdbWriter().write(db, path) + with zipfile.ZipFile(path) as zf: + assert MEMBER_FSM in zf.namelist() + + +def test_fsm_multiple_scopes(): + """Multiple FSM scopes in same DB each have dicts rebuilt.""" + db = MemUCIS() + db.createHistoryNode(None, "t", None, HistoryNodeKind.TEST) + + for fname in ("fsm_a", "fsm_b", "fsm_c"): + du = db.createScope(fname, None, 1, SourceT.SV, ScopeTypeT.DU_MODULE, 0) + inst = db.createInstance(fname, None, 1, SourceT.SV, + ScopeTypeT.INSTANCE, du, 0) + fsm = MemFSMScope(inst, fname, None, 1, SourceT.SV) + inst.m_children.append(fsm) + fsm.m_parent = inst + s0 = fsm.createState("S0", 0) + s1 = fsm.createState("S1", 1) + fsm.createTransition(s0, s1) + + rdb, _ = _write_read(db) + count = 0 + def _walk(scope): + nonlocal count + if scope.getScopeType() == ScopeTypeT.FSM: + count += 1 + assert scope.getNumStates() == 2 + assert scope.getNumTransitions() == 1 + for child in scope.scopes(ScopeTypeT.ALL): + _walk(child) + for s in rdb.scopes(ScopeTypeT.ALL): + _walk(s) + assert count == 3 + + +# ── Regression: merged.cdb FSM round-trip ──────────────────────────────── + +_REPO_ROOT = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "..", "..")) +MERGED_CDB = os.path.join(os.path.dirname(__file__), "..", "..", "merged.cdb") + +needs_merged_cdb = pytest.mark.skipif( + not os.path.exists(MERGED_CDB), + reason="merged.cdb not present in tests/", +) + + +@needs_merged_cdb +def test_fsm_merged_cdb_states_transitions_intact(): + """FSM scopes from merged.cdb have correct state and transition counts.""" + import shutil + from ucis.sqlite.sqlite_ucis import SqliteUCIS + + with tempfile.TemporaryDirectory() as d: + cdb_copy = os.path.join(d, "merged_copy.cdb") + shutil.copy2(MERGED_CDB, cdb_copy) + sqlite_db = SqliteUCIS(cdb_copy) + + # Count total FSM states and transitions in SQLite + sqlite_states = 0 + sqlite_trans = 0 + + def _count_sqlite(scope): + nonlocal sqlite_states, sqlite_trans + if scope.getScopeType() == ScopeTypeT.FSM: + for child in scope.scopes(ScopeTypeT.ALL): + ct = child.getScopeType() + items = list(child.coverItems(CoverTypeT.ALL)) + if ct == ScopeTypeT.FSM_STATES: + sqlite_states += len(items) + elif ct == ScopeTypeT.FSM_TRANS: + sqlite_trans += len(items) + return + for c in scope.scopes(ScopeTypeT.ALL): + _count_sqlite(c) + + for s in sqlite_db.scopes(ScopeTypeT.ALL): + _count_sqlite(s) + sqlite_db.close() + + # Convert to NCDB and read back + ncdb_path = os.path.join(d, "out.cdb") + from ucis.sqlite.sqlite_ucis import SqliteUCIS as _SU + sqlite_db2 = _SU(cdb_copy) + NcdbWriter().write(sqlite_db2, ncdb_path) + sqlite_db2.close() + ncdb_db = NcdbReader().read(ncdb_path) + + ncdb_states = 0 + ncdb_trans = 0 + + def _count_ncdb(scope): + nonlocal ncdb_states, ncdb_trans + if scope.getScopeType() == ScopeTypeT.FSM: + ncdb_states += scope.getNumStates() + ncdb_trans += scope.getNumTransitions() + return + for c in scope.scopes(ScopeTypeT.ALL): + _count_ncdb(c) + + for s in ncdb_db.scopes(ScopeTypeT.ALL): + _count_ncdb(s) + + assert ncdb_states == sqlite_states, ( + f"state count mismatch: ncdb={ncdb_states} sqlite={sqlite_states}") + assert ncdb_trans == sqlite_trans, ( + f"transition count mismatch: ncdb={ncdb_trans} sqlite={sqlite_trans}") diff --git a/tests/unit/ncdb/test_manifest.py b/tests/unit/ncdb/test_manifest.py new file mode 100644 index 0000000..b302d06 --- /dev/null +++ b/tests/unit/ncdb/test_manifest.py @@ -0,0 +1,107 @@ +""" +Manifest round-trip and validation tests. +""" + +import json +import hashlib +import tempfile +import os + +import pytest + +from ucis.mem.mem_ucis import MemUCIS +from ucis.source_t import SourceT +from ucis.scope_type_t import ScopeTypeT +from ucis.cover_data import CoverData +from ucis.cover_type_t import CoverTypeT +from ucis.history_node_kind import HistoryNodeKind + +from ucis.ncdb.manifest import Manifest +from ucis.ncdb.string_table import StringTable +from ucis.ncdb.scope_tree import ScopeTreeWriter +from ucis.ncdb.constants import NCDB_FORMAT + + +def _make_db(): + db = MemUCIS() + db.createHistoryNode(None, "t1", None, HistoryNodeKind.TEST) + blk = db.createScope("top", None, 1, SourceT.SV, ScopeTypeT.BLOCK, 0) + cd = CoverData(CoverTypeT.STMTBIN, 0) + cd.data = 5 + blk.createNextCover("s0", cd, None) + return db + + +def _scope_tree_bytes(db): + st = StringTable() + writer = ScopeTreeWriter(st) + return writer.write(db) + + +def test_manifest_format_field(): + db = _make_db() + tree = _scope_tree_bytes(db) + m = Manifest.build(db, tree, [], []) + data = json.loads(m.serialize()) + assert data["format"] == NCDB_FORMAT + + +def test_manifest_schema_hash_sha256(): + db = _make_db() + tree = _scope_tree_bytes(db) + m = Manifest.build(db, tree, [], []) + data = json.loads(m.serialize()) + expected = "sha256:" + hashlib.sha256(tree).hexdigest() + assert data["schema_hash"] == expected + + +def test_manifest_schema_hash_deterministic(): + """Same DB → same schema hash.""" + db1 = _make_db() + db2 = _make_db() + tree1 = _scope_tree_bytes(db1) + tree2 = _scope_tree_bytes(db2) + m1 = Manifest.build(db1, tree1, [], []) + m2 = Manifest.build(db2, tree2, [], []) + assert m1.schema_hash == m2.schema_hash + + +def test_manifest_different_schema_different_hash(): + """DBs with different scope trees get different hashes.""" + db1 = _make_db() + db2 = MemUCIS() + db2.createScope("other", None, 1, SourceT.SV, ScopeTypeT.BLOCK, 0) + tree1 = _scope_tree_bytes(db1) + tree2 = _scope_tree_bytes(db2) + m1 = Manifest.build(db1, tree1, [], []) + m2 = Manifest.build(db2, tree2, [], []) + assert m1.schema_hash != m2.schema_hash + + +def test_manifest_round_trip(): + """Manifest serializes and deserializes cleanly.""" + db = _make_db() + tree = _scope_tree_bytes(db) + m = Manifest.build(db, tree, [], []) + data = m.serialize() + m2 = Manifest.from_bytes(data) + assert m2.format == m.format + assert m2.schema_hash == m.schema_hash + assert m2.path_separator == m.path_separator + + +def test_manifest_version_present(): + db = _make_db() + tree = _scope_tree_bytes(db) + m = Manifest.build(db, tree, [], []) + data = json.loads(m.serialize()) + assert "version" in data + + +def test_manifest_path_separator_stored(): + db = MemUCIS() + db.setPathSeparator(".") + tree = _scope_tree_bytes(db) + m = Manifest.build(db, tree, [], []) + data = json.loads(m.serialize()) + assert data.get("path_separator") == "." diff --git a/tests/unit/ncdb/test_merger.py b/tests/unit/ncdb/test_merger.py new file mode 100644 index 0000000..a2a688e --- /dev/null +++ b/tests/unit/ncdb/test_merger.py @@ -0,0 +1,336 @@ +""" +Tests for ucis.ncdb.ncdb_merger — same-schema and cross-schema merge. +""" + +import os +import tempfile +import pytest + +from ucis.ncdb.ncdb_writer import NcdbWriter +from ucis.ncdb.ncdb_reader import NcdbReader +from ucis.ncdb.ncdb_merger import NcdbMerger +from ucis.ncdb.manifest import Manifest +from ucis.cover_type_t import CoverTypeT +from ucis.scope_type_t import ScopeTypeT +from ucis.history_node_kind import HistoryNodeKind +from ucis.mem.mem_ucis import MemUCIS +from ucis.source_t import SourceT +from ucis.cover_data import CoverData + + +# ── Helper: build a simple UCIS DB ──────────────────────────────────────── + +def _make_simple_db(counts, name="test1"): + """Build a MemUCIS with a single BLOCK scope containing STMTBIN items.""" + db = MemUCIS() + hn = db.createHistoryNode(None, name, None, HistoryNodeKind.TEST) + block = db.createScope("top", None, 1, SourceT.SV, ScopeTypeT.BLOCK, 0) + for i, c in enumerate(counts): + cd = CoverData(CoverTypeT.STMTBIN, 0) + cd.data = c + block.createNextCover(f"stmt_{i}", cd, None) + return db + + +def _write_ncdb(db, path): + NcdbWriter().write(db, path) + + +def _collect_counts(db): + counts = [] + def _walk(scope): + for ci in scope.coverItems(CoverTypeT.ALL): + counts.append(ci.getCoverData().data) + for child in scope.scopes(ScopeTypeT.ALL): + _walk(child) + for top in db.scopes(ScopeTypeT.ALL): + _walk(top) + return counts + + +# ── Same-schema merge ───────────────────────────────────────────────────── + +def test_same_schema_merge_counts(): + """Merged counts must equal element-wise sum of sources.""" + counts_a = [1, 0, 3, 0, 5] + counts_b = [0, 2, 0, 4, 0] + expected = [1, 2, 3, 4, 5] + + with tempfile.TemporaryDirectory() as d: + pa = os.path.join(d, "a.cdb") + pb = os.path.join(d, "b.cdb") + pm = os.path.join(d, "merged.cdb") + + _write_ncdb(_make_simple_db(counts_a, "test_a"), pa) + _write_ncdb(_make_simple_db(counts_b, "test_b"), pb) + + NcdbMerger().merge([pa, pb], pm) + + merged_db = NcdbReader().read(pm) + result = _collect_counts(merged_db) + assert result == expected + + +def test_same_schema_merge_item_count(): + """Merged database must have same number of items as each source.""" + counts = [10, 20, 30] + with tempfile.TemporaryDirectory() as d: + pa = os.path.join(d, "a.cdb") + pb = os.path.join(d, "b.cdb") + pm = os.path.join(d, "merged.cdb") + _write_ncdb(_make_simple_db(counts, "a"), pa) + _write_ncdb(_make_simple_db(counts, "b"), pb) + NcdbMerger().merge([pa, pb], pm) + merged_db = NcdbReader().read(pm) + result = _collect_counts(merged_db) + assert len(result) == len(counts) + + +def test_multi_source_merge(): + """Merge 4 sources: result counts == sum of all.""" + import random + rng = random.Random(42) + N = 20 + source_counts = [[rng.randint(0, 10) for _ in range(N)] for _ in range(4)] + expected = [sum(source_counts[j][i] for j in range(4)) for i in range(N)] + + with tempfile.TemporaryDirectory() as d: + paths = [] + for k, cnts in enumerate(source_counts): + p = os.path.join(d, f"src_{k}.cdb") + _write_ncdb(_make_simple_db(cnts, f"t{k}"), p) + paths.append(p) + pm = os.path.join(d, "merged.cdb") + NcdbMerger().merge(paths, pm) + merged_db = NcdbReader().read(pm) + assert _collect_counts(merged_db) == expected + + +def test_same_schema_preserves_schema_hash(): + """Same-schema merge output should share the schema_hash of sources.""" + with tempfile.TemporaryDirectory() as d: + pa = os.path.join(d, "a.cdb") + pb = os.path.join(d, "b.cdb") + pm = os.path.join(d, "merged.cdb") + db = _make_simple_db([1, 2, 3], "t") + _write_ncdb(db, pa) + _write_ncdb(db, pb) + NcdbMerger().merge([pa, pb], pm) + + import zipfile, json + with zipfile.ZipFile(pa) as zf: + mf_a = json.loads(zf.read("manifest.json")) + with zipfile.ZipFile(pm) as zf: + mf_m = json.loads(zf.read("manifest.json")) + assert mf_a["schema_hash"] == mf_m["schema_hash"] + + +# ── History accumulation ────────────────────────────────────────────────── + +def test_merge_history_accumulation(): + """All source TEST history nodes must appear in merged output.""" + with tempfile.TemporaryDirectory() as d: + pa = os.path.join(d, "a.cdb") + pb = os.path.join(d, "b.cdb") + pm = os.path.join(d, "merged.cdb") + _write_ncdb(_make_simple_db([1], "test_alpha"), pa) + _write_ncdb(_make_simple_db([2], "test_beta"), pb) + NcdbMerger().merge([pa, pb], pm) + merged_db = NcdbReader().read(pm) + names = {n.getLogicalName() + for n in merged_db.historyNodes(HistoryNodeKind.TEST)} + assert "test_alpha" in names + assert "test_beta" in names + + +def test_merge_adds_merge_history_node(): + """A MERGE HistoryNode should be present in the merged output.""" + with tempfile.TemporaryDirectory() as d: + pa = os.path.join(d, "a.cdb") + pb = os.path.join(d, "b.cdb") + pm = os.path.join(d, "merged.cdb") + _write_ncdb(_make_simple_db([1], "t1"), pa) + _write_ncdb(_make_simple_db([1], "t2"), pb) + NcdbMerger().merge([pa, pb], pm) + merged_db = NcdbReader().read(pm) + merge_nodes = list(merged_db.historyNodes(HistoryNodeKind.MERGE)) + assert len(merge_nodes) >= 1 + + +# ── Nested INSTANCE merge (DbMerger recursive walk) ────────────────────── + +def _make_nested_db(counts, hist_name): + """DB with top -> mid -> leaf nested INSTANCE hierarchy; leaf has a BLOCK + scope containing STMTBIN items whose hit counts are *counts*.""" + db = MemUCIS() + db.createHistoryNode(None, hist_name, None, HistoryNodeKind.TEST) + + def _make_level(parent, inst_name): + du = parent.createScope(inst_name, None, 1, SourceT.SV, ScopeTypeT.DU_MODULE, 0) + return parent.createInstance(inst_name, None, 1, SourceT.SV, + ScopeTypeT.INSTANCE, du, 0) + + top = _make_level(db, "top") + mid = _make_level(top, "mid") + leaf = _make_level(mid, "leaf") + + block = leaf.createScope("blk", None, 1, SourceT.SV, ScopeTypeT.BLOCK, 0) + for i, c in enumerate(counts): + cd = CoverData(CoverTypeT.STMTBIN, 0) + cd.data = c + block.createNextCover(f"stmt_{i}", cd, None) + return db + + +def test_nested_instance_merge_structure(): + """Merged DB must contain the same nested INSTANCE hierarchy.""" + from ucis.mem.mem_ucis import MemUCIS + from ucis.merge.db_merger import DbMerger + + db_a = _make_nested_db([1, 2, 3], "a") + db_b = _make_nested_db([4, 5, 6], "b") + + dst = MemUCIS() + DbMerger().merge(dst, [db_a, db_b]) + + # Navigate top -> mid -> leaf + tops = list(dst.scopes(ScopeTypeT.INSTANCE)) + assert len(tops) == 1 and tops[0].getScopeName() == "top" + mids = list(tops[0].scopes(ScopeTypeT.INSTANCE)) + assert len(mids) == 1 and mids[0].getScopeName() == "mid" + leafs = list(mids[0].scopes(ScopeTypeT.INSTANCE)) + assert len(leafs) == 1 and leafs[0].getScopeName() == "leaf" + + +def test_nested_instance_merge_counts(): + """Coverage counts in nested INSTANCE must be summed.""" + from ucis.mem.mem_ucis import MemUCIS + from ucis.merge.db_merger import DbMerger + + counts_a = [1, 2, 3] + counts_b = [10, 20, 30] + expected = [11, 22, 33] + + db_a = _make_nested_db(counts_a, "a") + db_b = _make_nested_db(counts_b, "b") + + dst = MemUCIS() + DbMerger().merge(dst, [db_a, db_b]) + + tops = list(dst.scopes(ScopeTypeT.INSTANCE)) + mids = list(tops[0].scopes(ScopeTypeT.INSTANCE)) + leafs = list(mids[0].scopes(ScopeTypeT.INSTANCE)) + blocks = list(leafs[0].scopes(ScopeTypeT.BLOCK)) + result = [ci.getCoverData().data + for ci in blocks[0].coverItems(CoverTypeT.ALL)] + assert result == expected + + +def test_nested_instance_merge_history(): + """Both source history nodes must appear in merged DB.""" + from ucis.mem.mem_ucis import MemUCIS + from ucis.merge.db_merger import DbMerger + + dst = MemUCIS() + DbMerger().merge(dst, [ + _make_nested_db([1], "test_alpha"), + _make_nested_db([2], "test_beta"), + ]) + names = {n.getLogicalName() + for n in dst.historyNodes(HistoryNodeKind.TEST)} + assert "test_alpha" in names + assert "test_beta" in names + + +# ── Cross-schema merge ──────────────────────────────────────────────────── + +def _make_db_with_scope(scope_name, bin_count, hist_name): + """DB with a BLOCK scope under a INSTANCE->DU structure.""" + from ucis.mem.mem_ucis import MemUCIS + db = MemUCIS() + db.createHistoryNode(None, hist_name, None, HistoryNodeKind.TEST) + # Create a DU + INSTANCE to satisfy DbMerger's INSTANCE-based walk + du = db.createScope(scope_name, None, 1, SourceT.SV, ScopeTypeT.DU_MODULE, 0) + inst = db.createInstance(scope_name, None, 1, SourceT.SV, + ScopeTypeT.INSTANCE, du, 0) + block = inst.createScope("blk", None, 1, SourceT.SV, ScopeTypeT.BLOCK, 0) + for i in range(bin_count): + cd = CoverData(CoverTypeT.STMTBIN, 0) + cd.data = i + 1 + block.createNextCover(f"stmt_{i}", cd, None) + return db + + +def test_cross_schema_merge_union(): + """Cross-schema merge should contain bins from both sources.""" + with tempfile.TemporaryDirectory() as d: + pa = os.path.join(d, "a.cdb") + pb = os.path.join(d, "b.cdb") + pm = os.path.join(d, "merged.cdb") + _write_ncdb(_make_db_with_scope("module_A", 3, "test_a"), pa) + _write_ncdb(_make_db_with_scope("module_B", 4, "test_b"), pb) + NcdbMerger().merge([pa, pb], pm) + merged_db = NcdbReader().read(pm) + + def _count_all(db): + total = 0 + def _walk(scope): + nonlocal total + total += sum(1 for _ in scope.coverItems(CoverTypeT.ALL)) + for child in scope.scopes(ScopeTypeT.ALL): + _walk(child) + for top in db.scopes(ScopeTypeT.ALL): + _walk(top) + return total + + # module_A has 3 bins, module_B has 4 bins; union must have at least 7 + assert _count_all(merged_db) >= 7 + + +# ── Idempotency: merge of single source ────────────────────────────────── + +def test_single_source_merge_is_identity(): + """Merging a single NCDB into itself should preserve counts exactly.""" + counts = [5, 0, 3, 7, 1] + with tempfile.TemporaryDirectory() as d: + pa = os.path.join(d, "a.cdb") + pm = os.path.join(d, "merged.cdb") + _write_ncdb(_make_simple_db(counts, "only"), pa) + NcdbMerger().merge([pa], pm) + merged_db = NcdbReader().read(pm) + assert _collect_counts(merged_db) == counts + + +# ── merged.cdb round-trip via NCDB merge ───────────────────────────────── + +_REPO_ROOT = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "..", "..")) +MERGED_CDB = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "..", "merged.cdb")) + +needs_merged_cdb = pytest.mark.skipif( + not os.path.exists(MERGED_CDB), + reason="merged.cdb not present in tests/", +) + + +@needs_merged_cdb +def test_merge_merged_cdb_same_schema(): + """Merging merged.cdb with itself should double all counts.""" + from ucis.sqlite.sqlite_ucis import SqliteUCIS + + sqlite_db = SqliteUCIS(MERGED_CDB) + with tempfile.TemporaryDirectory() as d: + p1 = os.path.join(d, "copy1.cdb") + p2 = os.path.join(d, "copy2.cdb") + pm = os.path.join(d, "double.cdb") + NcdbWriter().write(sqlite_db, p1) + NcdbWriter().write(sqlite_db, p2) + NcdbMerger().merge([p1, p2], pm) + + merged = NcdbReader().read(pm) + orig_counts = _collect_counts(NcdbReader().read(p1)) + merged_counts = _collect_counts(merged) + + assert len(orig_counts) == len(merged_counts) + for o, m in zip(orig_counts, merged_counts): + assert m == o * 2, f"Expected {o*2}, got {m}" + sqlite_db.close() diff --git a/tests/unit/ncdb/test_properties.py b/tests/unit/ncdb/test_properties.py new file mode 100644 index 0000000..e8fbdad --- /dev/null +++ b/tests/unit/ncdb/test_properties.py @@ -0,0 +1,160 @@ +""" +Tests for PropertiesWriter / PropertiesReader round-trip. +""" + +import tempfile +import os + +import pytest + +from ucis.mem.mem_ucis import MemUCIS +from ucis.source_t import SourceT +from ucis.scope_type_t import ScopeTypeT +from ucis.cover_data import CoverData +from ucis.cover_type_t import CoverTypeT +from ucis.history_node_kind import HistoryNodeKind +from ucis.str_property import StrProperty + +from ucis.ncdb.properties import PropertiesWriter, PropertiesReader +from ucis.ncdb.dfs_util import dfs_scope_list +from ucis.ncdb.ncdb_writer import NcdbWriter +from ucis.ncdb.ncdb_reader import NcdbReader + + +def _make_db_with_comment(comment="hello"): + db = MemUCIS() + db.createHistoryNode(None, "t1", None, HistoryNodeKind.TEST) + blk = db.createScope("top", None, 1, SourceT.SV, ScopeTypeT.BLOCK, 0) + cd = CoverData(CoverTypeT.STMTBIN, 0) + cd.data = 1 + blk.createNextCover("s0", cd, None) + blk.setStringProperty(-1, StrProperty.COMMENT, comment) + return db + + +# ── PropertiesWriter unit tests ──────────────────────────────────────────── + +def test_no_properties_returns_empty(): + """DB with no string properties → empty bytes.""" + db = MemUCIS() + db.createScope("top", None, 1, SourceT.SV, ScopeTypeT.BLOCK, 0) + result = PropertiesWriter().serialize(db) + assert result == b"" + + +def test_single_comment_serializes(): + """A scope with COMMENT → non-empty JSON.""" + db = _make_db_with_comment("my comment") + result = PropertiesWriter().serialize(db) + assert result != b"" + import json + payload = json.loads(result) + assert payload["version"] == 1 + assert len(payload["entries"]) == 1 + entry = payload["entries"][0] + assert entry["kind"] == "scope" + assert entry["key"] == int(StrProperty.COMMENT) + assert entry["type"] == "str" + assert entry["value"] == "my comment" + + +def test_multiple_scopes_multiple_comments(): + """Two scopes with different comments.""" + db = MemUCIS() + s1 = db.createScope("a", None, 1, SourceT.SV, ScopeTypeT.BLOCK, 0) + s2 = db.createScope("b", None, 1, SourceT.SV, ScopeTypeT.BLOCK, 0) + s1.setStringProperty(-1, StrProperty.COMMENT, "comment A") + s2.setStringProperty(-1, StrProperty.COMMENT, "comment B") + result = PropertiesWriter().serialize(db) + import json + payload = json.loads(result) + assert len(payload["entries"]) == 2 + values = {e["idx"]: e["value"] for e in payload["entries"]} + # DFS order: a=0, b=1 + assert values[0] == "comment A" + assert values[1] == "comment B" + + +# ── Round-trip via apply() ───────────────────────────────────────────────── + +def test_properties_round_trip_via_apply(): + """serialize + apply restores COMMENT on the scope.""" + db_src = _make_db_with_comment("test comment") + data = PropertiesWriter().serialize(db_src) + + # New empty DB with same scope structure + db_dst = MemUCIS() + db_dst.createScope("top", None, 1, SourceT.SV, ScopeTypeT.BLOCK, 0) + + PropertiesReader().apply(db_dst, data) + + scopes = dfs_scope_list(db_dst) + val = scopes[0].getStringProperty(-1, StrProperty.COMMENT) + assert val == "test comment" + + +def test_apply_empty_bytes_is_noop(): + """apply() with empty bytes does nothing.""" + db = MemUCIS() + db.createScope("top", None, 1, SourceT.SV, ScopeTypeT.BLOCK, 0) + PropertiesReader().apply(db, b"") # must not raise + + +def test_apply_out_of_range_index_ignored(): + """apply() silently ignores entries whose idx > scope count.""" + import json + payload = {"version": 1, "entries": [ + {"kind": "scope", "idx": 9999, "key": int(StrProperty.COMMENT), + "type": "str", "value": "x"} + ]} + db = MemUCIS() + db.createScope("top", None, 1, SourceT.SV, ScopeTypeT.BLOCK, 0) + PropertiesReader().apply(db, json.dumps(payload).encode()) # must not raise + + +# ── Full NCDB round-trip ─────────────────────────────────────────────────── + +def test_ncdb_round_trip_preserves_comment(): + """NcdbWriter → NcdbReader preserves COMMENT string property.""" + db = _make_db_with_comment("round-trip comment") + with tempfile.NamedTemporaryFile(suffix=".cdb", delete=False) as f: + path = f.name + try: + NcdbWriter().write(db, path) + rt = NcdbReader().read(path) + scopes = dfs_scope_list(rt) + val = scopes[0].getStringProperty(-1, StrProperty.COMMENT) + assert val == "round-trip comment" + finally: + os.unlink(path) + + +def test_ncdb_round_trip_no_properties_member_when_empty(): + """NcdbWriter doesn't add properties.json when there are no properties.""" + import zipfile + from ucis.ncdb.constants import MEMBER_PROPERTIES + db = MemUCIS() + db.createScope("top", None, 1, SourceT.SV, ScopeTypeT.BLOCK, 0) + with tempfile.NamedTemporaryFile(suffix=".cdb", delete=False) as f: + path = f.name + try: + NcdbWriter().write(db, path) + with zipfile.ZipFile(path) as zf: + assert MEMBER_PROPERTIES not in zf.namelist() + finally: + os.unlink(path) + + +def test_ncdb_round_trip_properties_member_present_when_set(): + """NcdbWriter writes properties.json when a scope has a comment.""" + import zipfile + from ucis.ncdb.constants import MEMBER_PROPERTIES + db = _make_db_with_comment("x") + with tempfile.NamedTemporaryFile(suffix=".cdb", delete=False) as f: + path = f.name + try: + NcdbWriter().write(db, path) + with zipfile.ZipFile(path) as zf: + assert MEMBER_PROPERTIES in zf.namelist() + finally: + os.unlink(path) diff --git a/tests/unit/ncdb/test_roundtrip.py b/tests/unit/ncdb/test_roundtrip.py new file mode 100644 index 0000000..fb8ccd3 --- /dev/null +++ b/tests/unit/ncdb/test_roundtrip.py @@ -0,0 +1,324 @@ +""" +Round-trip test: SQLite merged.cdb → NCDB → compare. + +Loads the existing merged.cdb SQLite database, writes it as NCDB, +reads it back, and validates that scope tree structure, coveritem +counts, and history are preserved. +""" + +import os +import tempfile +import pytest + +from ucis.ncdb.ncdb_writer import NcdbWriter +from ucis.ncdb.ncdb_reader import NcdbReader +from ucis.ncdb.format_detect import detect_cdb_format +from ucis.cover_type_t import CoverTypeT +from ucis.scope_type_t import ScopeTypeT +from ucis.history_node_kind import HistoryNodeKind + +# Path to repo's merged.cdb SQLite file +_REPO_ROOT = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "..")) +MERGED_CDB = os.path.join(_REPO_ROOT, "merged.cdb") + +needs_merged_cdb = pytest.mark.skipif( + not os.path.exists(MERGED_CDB), + reason="merged.cdb not present in tests/", +) + + +def _load_sqlite(path): + """Load the SQLite CDB via the SqliteUCIS backend.""" + from ucis.sqlite.sqlite_ucis import SqliteUCIS + return SqliteUCIS(path) + + +def _collect_coveritems(db): + """DFS collect all (scope_path, cover_name, count) tuples (ordered).""" + results = [] + + def _walk(scope, path): + for ci in scope.coverItems(CoverTypeT.ALL): + count = ci.getCoverData().data + results.append((path, ci.getName(), count)) + for child in scope.scopes(ScopeTypeT.ALL): + _walk(child, path + "/" + child.getScopeName()) + + for top in db.scopes(ScopeTypeT.ALL): + _walk(top, top.getScopeName()) + + return results + + +@needs_merged_cdb +def test_format_detect_merged_cdb(): + assert detect_cdb_format(MERGED_CDB) == "sqlite" + + +@needs_merged_cdb +def test_round_trip_counts(): + """Hit counts must be identical after SQLite → NCDB → read back.""" + sqlite_db = _load_sqlite(MERGED_CDB) + orig_items = _collect_coveritems(sqlite_db) + + with tempfile.NamedTemporaryFile(suffix=".cdb", delete=False) as tf: + out_path = tf.name + + try: + NcdbWriter().write(sqlite_db, out_path) + assert detect_cdb_format(out_path) == "ncdb" + + ncdb_db = NcdbReader().read(out_path) + rt_items = _collect_coveritems(ncdb_db) + + assert len(orig_items) == len(rt_items), ( + f"Item count mismatch: {len(orig_items)} → {len(rt_items)}") + + for i, ((op, on, oc), (rp, rn, rc)) in enumerate(zip(orig_items, rt_items)): + assert op == rp, f"Path mismatch at index {i}: {op!r} vs {rp!r}" + assert on == rn, f"Name mismatch at index {i}: {on!r} vs {rn!r}" + assert oc == rc, ( + f"Count mismatch at index {i} ({op}/{on}): " + f"expected {oc}, got {rc}") + finally: + os.unlink(out_path) + sqlite_db.close() + + +@needs_merged_cdb +def test_round_trip_scope_structure(): + """Top-level scope names and types must be preserved.""" + sqlite_db = _load_sqlite(MERGED_CDB) + orig_scopes = [(s.getScopeName(), s.getScopeType()) + for s in sqlite_db.scopes(ScopeTypeT.ALL)] + + with tempfile.NamedTemporaryFile(suffix=".cdb", delete=False) as tf: + out_path = tf.name + try: + NcdbWriter().write(sqlite_db, out_path) + ncdb_db = NcdbReader().read(out_path) + rt_scopes = [(s.getScopeName(), s.getScopeType()) + for s in ncdb_db.scopes(ScopeTypeT.ALL)] + assert orig_scopes == rt_scopes + finally: + os.unlink(out_path) + sqlite_db.close() + + +@needs_merged_cdb +def test_round_trip_history(): + """History node logical names must be preserved.""" + sqlite_db = _load_sqlite(MERGED_CDB) + orig_names = sorted( + n.getLogicalName() + for n in sqlite_db.historyNodes(HistoryNodeKind.TEST) + ) + + with tempfile.NamedTemporaryFile(suffix=".cdb", delete=False) as tf: + out_path = tf.name + try: + NcdbWriter().write(sqlite_db, out_path) + ncdb_db = NcdbReader().read(out_path) + rt_names = sorted( + n.getLogicalName() + for n in ncdb_db.historyNodes(HistoryNodeKind.TEST) + ) + assert orig_names == rt_names + finally: + os.unlink(out_path) + sqlite_db.close() + + +@needs_merged_cdb +def test_ncdb_smaller_than_sqlite(): + """NCDB file should be substantially smaller than the SQLite original.""" + sqlite_size = os.path.getsize(MERGED_CDB) + sqlite_db = _load_sqlite(MERGED_CDB) + with tempfile.NamedTemporaryFile(suffix=".cdb", delete=False) as tf: + out_path = tf.name + try: + NcdbWriter().write(sqlite_db, out_path) + ncdb_size = os.path.getsize(out_path) + # Expect at least 10× size reduction (design doc claims 73×) + assert ncdb_size < sqlite_size / 10, ( + f"NCDB ({ncdb_size} B) not much smaller than SQLite ({sqlite_size} B)") + finally: + os.unlink(out_path) + sqlite_db.close() + + +def _make_db_with_attrs_and_tags(): + """Build a MemUCIS with attributes and tags on some scopes.""" + from ucis.mem.mem_ucis import MemUCIS + from ucis.source_t import SourceT + from ucis.cover_data import CoverData + from ucis.history_node_kind import HistoryNodeKind + db = MemUCIS() + db.createHistoryNode(None, "attrs_test", None, HistoryNodeKind.TEST) + block = db.createScope("top", None, 1, SourceT.SV, ScopeTypeT.BLOCK, 0) + block.setAttribute("author", "tester") + block.setAttribute("reviewed", "yes") + block.addTag("important") + block.addTag("regression") + cd = CoverData(CoverTypeT.STMTBIN, 0) + cd.data = 5 + block.createNextCover("stmt_0", cd, None) + child = block.createScope("inner", None, 1, SourceT.SV, ScopeTypeT.BLOCK, 0) + child.setAttribute("notes", "inner block") + child.addTag("skip") + return db + + +def test_attrs_round_trip(): + """User-defined attributes must be preserved across write/read.""" + db = _make_db_with_attrs_and_tags() + with tempfile.NamedTemporaryFile(suffix=".cdb", delete=False) as tf: + out_path = tf.name + try: + NcdbWriter().write(db, out_path) + rt_db = NcdbReader().read(out_path) + top = list(rt_db.scopes(ScopeTypeT.BLOCK))[0] + assert top.getAttribute("author") == "tester" + assert top.getAttribute("reviewed") == "yes" + children = list(top.scopes(ScopeTypeT.BLOCK)) + inner = children[0] + assert inner.getAttribute("notes") == "inner block" + finally: + os.unlink(out_path) + + +def test_tags_round_trip(): + """Scope tags must be preserved across write/read.""" + db = _make_db_with_attrs_and_tags() + with tempfile.NamedTemporaryFile(suffix=".cdb", delete=False) as tf: + out_path = tf.name + try: + NcdbWriter().write(db, out_path) + rt_db = NcdbReader().read(out_path) + top = list(rt_db.scopes(ScopeTypeT.BLOCK))[0] + tags = set(top.getTags()) + assert "important" in tags + assert "regression" in tags + inner = list(top.scopes(ScopeTypeT.BLOCK))[0] + assert "skip" in set(inner.getTags()) + finally: + os.unlink(out_path) + + +@needs_merged_cdb +def test_ncdb_ucis_lazy_loading(): + """NcdbUCIS loads history without parsing scope tree, then loads scopes lazily.""" + import tempfile, os + from ucis.db_format_rgy import DbFormatRgy + from ucis.ncdb.ncdb_ucis import NcdbUCIS + + sqlite_db = _load_sqlite(MERGED_CDB) + with tempfile.NamedTemporaryFile(suffix=".cdb", delete=False) as tf: + out_path = tf.name + try: + NcdbWriter().write(sqlite_db, out_path) + sqlite_db.close() + + lazy_db = NcdbUCIS(out_path) + assert not lazy_db._loaded_scopes, "Scopes should not be loaded yet" + + # Read history without triggering scope load + hn_names = [hn.getLogicalName() + for hn in lazy_db.historyNodes(HistoryNodeKind.TEST)] + assert len(hn_names) > 0 + assert not lazy_db._loaded_scopes, "Scopes should still not be loaded" + + # Accessing scopes triggers scope load + scope_list = list(lazy_db.scopes(ScopeTypeT.ALL)) + assert len(scope_list) > 0 + assert lazy_db._loaded_scopes + + # Coveritem count should match original + rt_items = _collect_coveritems(lazy_db) + assert len(rt_items) == 131923 # known count from merged.cdb + finally: + os.unlink(out_path) + + +# ─── Regression tests: FSM scope duplication and BRANCHBIN type ─────────── + +@needs_merged_cdb +def test_fsm_scope_counts_not_doubled(): + """MemFSMScope must not duplicate FSM_STATES/FSM_TRANS on NCDB round-trip. + + Regression for bug where createScope(FSM_STATES) on MemFSMScope created a + second FSM_STATES child instead of returning the pre-existing one, doubling + FSM_STATES and FSM_TRANS counts (16 → 32). + """ + import shutil + import tempfile + from ucis.db_format_rgy import DbFormatRgy + from ucis.ncdb.ncdb_writer import NcdbWriter + from ucis.ncdb.ncdb_reader import NcdbReader + + def _count_type(db, scope_type): + n = [0] + def _walk(scope): + for child in scope.scopes(ScopeTypeT.ALL): + if child.getScopeType() == scope_type: + n[0] += 1 + _walk(child) + for s in db.scopes(ScopeTypeT.ALL): + if s.getScopeType() == scope_type: + n[0] += 1 + _walk(s) + return n[0] + + rgy = DbFormatRgy.inst() + with tempfile.TemporaryDirectory() as d: + src_copy = os.path.join(d, "src.cdb") + out_path = os.path.join(d, "out.cdb") + shutil.copy2(MERGED_CDB, src_copy) + src_db = rgy.getFormatIf('sqlite').read(src_copy) + orig_states = _count_type(src_db, ScopeTypeT.FSM_STATES) + orig_trans = _count_type(src_db, ScopeTypeT.FSM_TRANS) + orig_fsm = _count_type(src_db, ScopeTypeT.FSM) + NcdbWriter().write(src_db, out_path) + rt = NcdbReader().read(out_path) + assert _count_type(rt, ScopeTypeT.FSM) == orig_fsm, "FSM count mismatch" + assert _count_type(rt, ScopeTypeT.FSM_STATES) == orig_states, "FSM_STATES doubled" + assert _count_type(rt, ScopeTypeT.FSM_TRANS) == orig_trans, "FSM_TRANS doubled" + + +@needs_merged_cdb +def test_branchbin_not_mistyped_as_togglebin(): + """Non-toggle BRANCH scopes must keep BRANCHBIN type through NCDB round-trip. + + Regression for bug where SCOPE_TO_COVER_TYPE mapped BRANCH→TOGGLEBIN, + causing BRANCHBIN cover items to be serialised and read back as TOGGLEBIN. + """ + import shutil + import tempfile + from ucis.db_format_rgy import DbFormatRgy + from ucis.ncdb.ncdb_writer import NcdbWriter + from ucis.ncdb.ncdb_reader import NcdbReader + from ucis.cover_type_t import CoverTypeT + + def _count_cover(db, cover_type): + n = [0] + def _walk(scope): + for _ci in scope.coverItems(cover_type): + n[0] += 1 + for child in scope.scopes(ScopeTypeT.ALL): + _walk(child) + for s in db.scopes(ScopeTypeT.ALL): + _walk(s) + return n[0] + + rgy = DbFormatRgy.inst() + with tempfile.TemporaryDirectory() as d: + src_copy = os.path.join(d, "src.cdb") + out_path = os.path.join(d, "out.cdb") + shutil.copy2(MERGED_CDB, src_copy) + src_db = rgy.getFormatIf('sqlite').read(src_copy) + orig_branch = _count_cover(src_db, CoverTypeT.BRANCHBIN) + assert orig_branch > 0, "merged.cdb should have BRANCHBIN items" + NcdbWriter().write(src_db, out_path) + rt = NcdbReader().read(out_path) + assert _count_cover(rt, CoverTypeT.BRANCHBIN) == orig_branch, \ + "BRANCHBIN items were lost or mistyped as TOGGLEBIN" diff --git a/tests/unit/ncdb/test_scope_tree.py b/tests/unit/ncdb/test_scope_tree.py new file mode 100644 index 0000000..7902870 --- /dev/null +++ b/tests/unit/ncdb/test_scope_tree.py @@ -0,0 +1,229 @@ +""" +Scope-tree encoder/decoder tests. + +Covers: single scope, deep hierarchy, wide hierarchy, +toggle-pair encoding, mixed scope types, presence bitfield edge cases. +""" + +import io +import pytest + +from ucis.mem.mem_ucis import MemUCIS +from ucis.source_t import SourceT +from ucis.scope_type_t import ScopeTypeT +from ucis.cover_type_t import CoverTypeT +from ucis.cover_data import CoverData +from ucis.history_node_kind import HistoryNodeKind + +from ucis.ncdb.string_table import StringTable +from ucis.ncdb.scope_tree import ScopeTreeWriter, ScopeTreeReader +from ucis.ncdb.counts import CountsWriter, CountsReader +from ucis.ncdb.constants import TOGGLE_BIN_0_TO_1, TOGGLE_BIN_1_TO_0 + + +# ── Helpers ──────────────────────────────────────────────────────────────── + +def _roundtrip(db_in: MemUCIS) -> MemUCIS: + """Encode *db_in* scope tree → bytes, decode into a fresh MemUCIS.""" + st = StringTable() + fh = [] + writer = ScopeTreeWriter(st, fh) + tree_bytes = writer.write(db_in) + counts_bytes = CountsWriter().serialize(writer.counts_list) + + db_out = MemUCIS() + counts_iter = iter(CountsReader().deserialize(counts_bytes)) + ScopeTreeReader(st, fh).read(tree_bytes, db_out, counts_iter) + return db_out + + +def _add_stmtbin(scope, name: str, count: int = 0): + cd = CoverData(CoverTypeT.STMTBIN, 0) + cd.data = count + scope.createNextCover(name, cd, None) + + +def _add_toggle_pair(parent, name: str, c0=0, c1=0): + """Create a BRANCH scope with 2 TOGGLEBIN coveritems (toggle-pair pattern).""" + branch = parent.createScope(name, None, 1, SourceT.SV, ScopeTypeT.BRANCH, 0) + for bin_name, count in ((TOGGLE_BIN_0_TO_1, c0), (TOGGLE_BIN_1_TO_0, c1)): + cd = CoverData(CoverTypeT.TOGGLEBIN, 0) + cd.data = count + branch.createNextCover(bin_name, cd, None) + return branch + + +# ── Tests ────────────────────────────────────────────────────────────────── + +def test_single_scope_no_children(): + db = MemUCIS() + db.createScope("top", None, 1, SourceT.SV, ScopeTypeT.BLOCK, 0) + rt = _roundtrip(db) + scopes = list(rt.scopes(ScopeTypeT.ALL)) + assert len(scopes) == 1 + assert scopes[0].getScopeName() == "top" + assert scopes[0].getScopeType() == ScopeTypeT.BLOCK + + +def test_single_scope_with_coveritems(): + db = MemUCIS() + block = db.createScope("blk", None, 1, SourceT.SV, ScopeTypeT.BLOCK, 0) + _add_stmtbin(block, "s0", 3) + _add_stmtbin(block, "s1", 7) + + rt = _roundtrip(db) + blk = list(rt.scopes(ScopeTypeT.ALL))[0] + items = list(blk.coverItems(CoverTypeT.ALL)) + assert len(items) == 2 + assert items[0].getName() == "s0" + assert items[0].getCoverData().data == 3 + assert items[1].getName() == "s1" + assert items[1].getCoverData().data == 7 + + +def test_deep_hierarchy_100_levels(): + """100-level deep hierarchy should encode/decode correctly.""" + db = MemUCIS() + cur = db + for i in range(100): + scope_type = ScopeTypeT.BLOCK + child = cur.createScope(f"level_{i}", None, 1, SourceT.SV, scope_type, 0) + cur = child + + rt = _roundtrip(db) + + # Walk down 100 levels + cur = rt + for i in range(100): + children = list(cur.scopes(ScopeTypeT.ALL)) + assert len(children) == 1, f"Level {i}: expected 1 child, got {len(children)}" + assert children[0].getScopeName() == f"level_{i}" + cur = children[0] + + +def test_wide_hierarchy_1000_children(): + """Single scope with 1000 BLOCK children.""" + db = MemUCIS() + top = db.createScope("top", None, 1, SourceT.SV, ScopeTypeT.BLOCK, 0) + for i in range(1000): + child = top.createScope(f"c{i}", None, 1, SourceT.SV, ScopeTypeT.BLOCK, 0) + _add_stmtbin(child, "s0", i) + + rt = _roundtrip(db) + children = list(list(rt.scopes(ScopeTypeT.ALL))[0].scopes(ScopeTypeT.ALL)) + assert len(children) == 1000 + for i, child in enumerate(children): + assert child.getScopeName() == f"c{i}" + items = list(child.coverItems(CoverTypeT.ALL)) + assert items[0].getCoverData().data == i + + +def test_toggle_pair_encoding(): + """BRANCH+2×TOGGLEBIN must be encoded as TOGGLE_PAIR (2 bytes + varint).""" + db = MemUCIS() + top = db.createScope("top", None, 1, SourceT.SV, ScopeTypeT.BLOCK, 0) + _add_toggle_pair(top, "sig_a", c0=5, c1=3) + _add_toggle_pair(top, "sig_b", c0=0, c1=0) + + st = StringTable() + fh = [] + writer = ScopeTreeWriter(st, fh) + tree_bytes = writer.write(db) + + # TOGGLE_PAIR (0x01) records are much smaller than REGULAR (0x00) records + # The writer should have emitted 2 TOGGLE_PAIR records + assert writer.counts_list == [5, 3, 0, 0] # two pairs + + rt = _roundtrip(db) + top_rt = list(rt.scopes(ScopeTypeT.ALL))[0] + branches = list(top_rt.scopes(ScopeTypeT.BRANCH)) + assert len(branches) == 2 + assert branches[0].getScopeName() == "sig_a" + items_a = {ci.getName(): ci.getCoverData().data + for ci in branches[0].coverItems(CoverTypeT.ALL)} + assert items_a[TOGGLE_BIN_0_TO_1] == 5 + assert items_a[TOGGLE_BIN_1_TO_0] == 3 + + +def test_non_toggle_branch_not_compressed(): + """BRANCH scope with non-toggle bins must NOT use TOGGLE_PAIR encoding.""" + db = MemUCIS() + top = db.createScope("top", None, 1, SourceT.SV, ScopeTypeT.BLOCK, 0) + branch = top.createScope("br", None, 1, SourceT.SV, ScopeTypeT.BRANCH, 0) + cd = CoverData(CoverTypeT.BRANCHBIN, 0) + cd.data = 2 + branch.createNextCover("taken", cd, None) + cd2 = CoverData(CoverTypeT.BRANCHBIN, 0) + cd2.data = 0 + branch.createNextCover("not_taken", cd2, None) + + rt = _roundtrip(db) + top_rt = list(rt.scopes(ScopeTypeT.ALL))[0] + branches = list(top_rt.scopes(ScopeTypeT.BRANCH)) + assert len(branches) == 1 + items = {ci.getName(): ci.getCoverData().data + for ci in branches[0].coverItems(CoverTypeT.ALL)} + assert items["taken"] == 2 + assert items["not_taken"] == 0 + + +def test_presence_bitfield_no_source_info(): + """Scopes without source info should decode with file=None in srcinfo.""" + db = MemUCIS() + db.createScope("no_src", None, 1, SourceT.SV, ScopeTypeT.BLOCK, 0) + rt = _roundtrip(db) + scope = list(rt.scopes(ScopeTypeT.ALL))[0] + # MemScope always stores a SourceInfo even for None; check file is None + srcinfo = scope.getSourceInfo() + assert srcinfo is None or srcinfo.file is None + + +def test_mixed_scope_types(): + """Block + Branch + Toggle + Covergroup children all round-trip.""" + db = MemUCIS() + top = db.createScope("top", None, 1, SourceT.SV, ScopeTypeT.BLOCK, 0) + + # BLOCK with stmt bins + blk = top.createScope("blk", None, 1, SourceT.SV, ScopeTypeT.BLOCK, 0) + _add_stmtbin(blk, "s0", 10) + + # BRANCH (non-toggle) + br = top.createScope("br", None, 1, SourceT.SV, ScopeTypeT.BRANCH, 0) + cd = CoverData(CoverTypeT.BRANCHBIN, 0) + cd.data = 1 + br.createNextCover("taken", cd, None) + + # Toggle pair + _add_toggle_pair(top, "sig", c0=1, c1=1) + + rt = _roundtrip(db) + top_rt = list(rt.scopes(ScopeTypeT.ALL))[0] + children = list(top_rt.scopes(ScopeTypeT.ALL)) + assert len(children) == 3 + types = {c.getScopeName(): c.getScopeType() for c in children} + assert types["blk"] == ScopeTypeT.BLOCK + assert types["br"] == ScopeTypeT.BRANCH + assert types["sig"] == ScopeTypeT.BRANCH # toggle pair decoded as BRANCH + + +def test_large_count_values(): + """Hit counts exceeding 32 bits should round-trip correctly.""" + db = MemUCIS() + blk = db.createScope("blk", None, 1, SourceT.SV, ScopeTypeT.BLOCK, 0) + big_count = 2**32 + 1 + cd = CoverData(CoverTypeT.STMTBIN, 0) + cd.data = big_count + blk.createNextCover("big", cd, None) + + rt = _roundtrip(db) + items = list(list(rt.scopes(ScopeTypeT.ALL))[0].coverItems(CoverTypeT.ALL)) + assert items[0].getCoverData().data == big_count + + +def test_zero_coveritems_scope(): + """Scope with 0 coveritems encodes num_coveritems=0 (no child_cover_type field).""" + db = MemUCIS() + db.createScope("empty", None, 1, SourceT.SV, ScopeTypeT.BLOCK, 0) + rt = _roundtrip(db) + scope = list(rt.scopes(ScopeTypeT.ALL))[0] + assert list(scope.coverItems(CoverTypeT.ALL)) == [] diff --git a/tests/unit/ncdb/test_string_table.py b/tests/unit/ncdb/test_string_table.py new file mode 100644 index 0000000..70cd075 --- /dev/null +++ b/tests/unit/ncdb/test_string_table.py @@ -0,0 +1,55 @@ +"""Unit tests for ucis.ncdb.string_table.""" + +import pytest +from ucis.ncdb.string_table import StringTable + + +def test_empty_table(): + st = StringTable() + data = st.serialize() + st2 = StringTable.from_bytes(data) + assert len(st2) == 0 + + +def test_single_string(): + st = StringTable() + idx = st.add("hello") + assert idx == 0 + assert st.get(0) == "hello" + st2 = StringTable.from_bytes(st.serialize()) + assert st2.get(0) == "hello" + + +def test_deduplication(): + st = StringTable() + i1 = st.add("foo") + i2 = st.add("bar") + i3 = st.add("foo") + assert i1 == i3 + assert i1 != i2 + + +def test_large_table(): + st = StringTable() + strings = [f"signal_{i}" for i in range(1000)] + ["signal_0"] * 500 + indices = [st.add(s) for s in strings] + # Unique strings + unique = list(dict.fromkeys(strings)) + assert len(st) == len(unique) + # Round-trip + st2 = StringTable.from_bytes(st.serialize()) + for s in unique: + assert st2.get(st.add(s)) == s + + +def test_unicode(): + st = StringTable() + idx = st.add("信号_test") + st2 = StringTable.from_bytes(st.serialize()) + assert st2.get(idx) == "信号_test" + + +def test_none_becomes_empty(): + st = StringTable() + idx = st.add(None) + assert st.get(idx) == "" diff --git a/tests/unit/ncdb/test_tags.py b/tests/unit/ncdb/test_tags.py new file mode 100644 index 0000000..9175ddf --- /dev/null +++ b/tests/unit/ncdb/test_tags.py @@ -0,0 +1,140 @@ +""" +Tests for ucis.ncdb.tags — scope tag round-trip via NCDB ZIP. +""" + +import json +import os +import tempfile +import zipfile + +import pytest + +from ucis.cover_data import CoverData +from ucis.cover_type_t import CoverTypeT +from ucis.history_node_kind import HistoryNodeKind +from ucis.mem.mem_ucis import MemUCIS +from ucis.ncdb.constants import MEMBER_TAGS +from ucis.ncdb.ncdb_reader import NcdbReader +from ucis.ncdb.ncdb_writer import NcdbWriter +from ucis.ncdb.tags import TagsReader, TagsWriter +from ucis.scope_type_t import ScopeTypeT +from ucis.source_t import SourceT + + +# ── Helpers ─────────────────────────────────────────────────────────────── + +def _make_db_with_tags(tag_set): + """Build a MemUCIS with one BLOCK scope tagged with *tag_set*.""" + db = MemUCIS() + db.createHistoryNode(None, "t", None, HistoryNodeKind.TEST) + block = db.createScope("blk", None, 1, SourceT.SV, ScopeTypeT.BLOCK, 0) + cd = CoverData(CoverTypeT.STMTBIN, 0) + cd.data = 1 + block.createNextCover("s0", cd, None) + for tag in tag_set: + block.addTag(tag) + return db, block + + +def _write_read(db): + with tempfile.TemporaryDirectory() as d: + path = os.path.join(d, "out.cdb") + NcdbWriter().write(db, path) + return NcdbReader().read(path), path + + +# ── Unit tests: TagsWriter / TagsReader ─────────────────────────────────── + +def test_tags_writer_empty(): + """No tags → empty entries list.""" + db = MemUCIS() + db.createScope("blk", None, 1, SourceT.SV, ScopeTypeT.BLOCK, 0) + data = TagsWriter().serialize(db) + payload = json.loads(data) + assert payload["version"] == 1 + assert payload["entries"] == [] + + +def test_tags_writer_single(): + """One tag on one scope → one entry.""" + db, _ = _make_db_with_tags({"critical"}) + data = TagsWriter().serialize(db) + payload = json.loads(data) + assert len(payload["entries"]) == 1 + assert "critical" in payload["entries"][0]["tags"] + + +def test_tags_writer_multiple(): + """Multiple tags on one scope → all present.""" + tags = {"a", "b", "c"} + db, _ = _make_db_with_tags(tags) + data = TagsWriter().serialize(db) + payload = json.loads(data) + assert set(payload["entries"][0]["tags"]) == tags + + +def test_tags_reader_applies_tags(): + """TagsReader must restore tags on scopes.""" + db, block = _make_db_with_tags({"important"}) + data = TagsWriter().serialize(db) + + db2 = MemUCIS() + block2 = db2.createScope("blk", None, 1, SourceT.SV, ScopeTypeT.BLOCK, 0) + TagsReader().deserialize(data, db2) + assert block2.hasTag("important") + + +def test_tags_reader_empty_data(): + """Empty bytes must not raise.""" + db = MemUCIS() + TagsReader().deserialize(b'', db) + + +# ── Integration: NCDB round-trip ───────────────────────────────────────── + +def test_tags_round_trip_single(): + """Single tag survives NCDB write → read.""" + db, _ = _make_db_with_tags({"regression"}) + rdb, _ = _write_read(db) + blocks = list(rdb.scopes(ScopeTypeT.BLOCK)) + assert blocks[0].hasTag("regression") + + +def test_tags_round_trip_multiple(): + """Multiple tags all survive round-trip.""" + tags = {"tag1", "tag2", "tag3"} + db, _ = _make_db_with_tags(tags) + rdb, _ = _write_read(db) + block = list(rdb.scopes(ScopeTypeT.BLOCK))[0] + for t in tags: + assert block.hasTag(t), f"tag '{t}' missing after round-trip" + + +def test_tags_absent_from_zip_when_empty(): + """No tags → tags member must be absent from ZIP.""" + db = MemUCIS() + db.createHistoryNode(None, "t", None, HistoryNodeKind.TEST) + db.createScope("blk", None, 1, SourceT.SV, ScopeTypeT.BLOCK, 0) + with tempfile.TemporaryDirectory() as d: + path = os.path.join(d, "out.cdb") + NcdbWriter().write(db, path) + with zipfile.ZipFile(path) as zf: + assert MEMBER_TAGS not in zf.namelist() + + +def test_tags_present_in_zip_when_set(): + """If any tag is set, tags member must appear in ZIP.""" + db, _ = _make_db_with_tags({"check"}) + with tempfile.TemporaryDirectory() as d: + path = os.path.join(d, "out.cdb") + NcdbWriter().write(db, path) + with zipfile.ZipFile(path) as zf: + assert MEMBER_TAGS in zf.namelist() + + +def test_tags_hasTag_false_for_unset(): + """hasTag must return False for a tag that was never set.""" + db, _ = _make_db_with_tags({"present"}) + rdb, _ = _write_read(db) + block = list(rdb.scopes(ScopeTypeT.BLOCK))[0] + assert not block.hasTag("absent") diff --git a/tests/unit/ncdb/test_toggle.py b/tests/unit/ncdb/test_toggle.py new file mode 100644 index 0000000..68e5fc5 --- /dev/null +++ b/tests/unit/ncdb/test_toggle.py @@ -0,0 +1,209 @@ +""" +Tests for ucis.ncdb.toggle — TOGGLE scope metadata round-trip via NCDB ZIP. +""" + +import json +import os +import tempfile +import zipfile + +import pytest + +from ucis.cover_data import CoverData +from ucis.cover_type_t import CoverTypeT +from ucis.history_node_kind import HistoryNodeKind +from ucis.mem.mem_ucis import MemUCIS +from ucis.ncdb.constants import MEMBER_TOGGLE +from ucis.ncdb.ncdb_reader import NcdbReader +from ucis.ncdb.ncdb_writer import NcdbWriter +from ucis.ncdb.toggle import ToggleReader, ToggleWriter +from ucis.scope_type_t import ScopeTypeT +from ucis.source_t import SourceT +from ucis.toggle_dir_t import ToggleDirT +from ucis.toggle_metric_t import ToggleMetricT +from ucis.toggle_type_t import ToggleTypeT + + +# ── Helpers ─────────────────────────────────────────────────────────────── + +def _make_toggle_db(canonical, metric, ttype, tdir): + db = MemUCIS() + db.createHistoryNode(None, "t", None, HistoryNodeKind.TEST) + t = db.createToggle("sig", canonical, 0, metric, ttype, tdir) + cd = CoverData(CoverTypeT.TOGGLEBIN, 0); cd.data = 3 + t.createNextCover("0->1", cd, None) + cd2 = CoverData(CoverTypeT.TOGGLEBIN, 0); cd2.data = 2 + t.createNextCover("1->0", cd2, None) + return db, t + + +def _write_read(db): + with tempfile.TemporaryDirectory() as d: + path = os.path.join(d, "out.cdb") + NcdbWriter().write(db, path) + return NcdbReader().read(path), path + + +# ── Unit tests: ToggleWriter / ToggleReader ─────────────────────────────── + +def test_toggle_writer_empty_when_defaults(): + """No toggle metadata when all values are defaults — returns empty bytes.""" + db, _ = _make_toggle_db( + "sig", # canonical == scope name → omitted + ToggleMetricT._2STOGGLE, # default + ToggleTypeT.NET, # default + ToggleDirT.INTERNAL, # default + ) + data = ToggleWriter().serialize(db) + assert data == b"" + + +def test_toggle_writer_captures_canonical(): + """Non-default canonical name is included in entry.""" + db, _ = _make_toggle_db( + "tb.dut.sig", + ToggleMetricT._2STOGGLE, + ToggleTypeT.NET, + ToggleDirT.INTERNAL, + ) + data = ToggleWriter().serialize(db) + payload = json.loads(data) + assert payload["version"] == 1 + assert len(payload["entries"]) == 1 + assert payload["entries"][0]["canonical"] == "tb.dut.sig" + + +def test_toggle_writer_captures_type(): + """Non-default toggle type is included.""" + db, _ = _make_toggle_db( + "sig", + ToggleMetricT._2STOGGLE, + ToggleTypeT.REG, # non-default (default is NET) + ToggleDirT.INTERNAL, + ) + data = ToggleWriter().serialize(db) + payload = json.loads(data) + assert payload["entries"][0]["type"] == int(ToggleTypeT.REG) + + +def test_toggle_writer_captures_dir(): + """Non-default toggle direction is included.""" + db, _ = _make_toggle_db( + "sig", + ToggleMetricT._2STOGGLE, + ToggleTypeT.NET, + ToggleDirT.IN, # non-default (default is INTERNAL) + ) + data = ToggleWriter().serialize(db) + payload = json.loads(data) + assert payload["entries"][0]["dir"] == int(ToggleDirT.IN) + + +def test_toggle_reader_restores_canonical(): + """ToggleReader must set canonical name back on scope.""" + db, _ = _make_toggle_db("tb.dut.sig", ToggleMetricT._2STOGGLE, + ToggleTypeT.NET, ToggleDirT.INTERNAL) + data = ToggleWriter().serialize(db) + + db2 = MemUCIS() + t2 = db2.createToggle("sig", "sig", 0, ToggleMetricT._2STOGGLE, + ToggleTypeT.NET, ToggleDirT.INTERNAL) + ToggleReader().apply(db2, data) + assert t2.getCanonicalName() == "tb.dut.sig" + + +def test_toggle_reader_empty_data(): + """Empty bytes must not raise.""" + db = MemUCIS() + ToggleReader().apply(db, b"") + + +# ── Integration: NCDB round-trip ───────────────────────────────────────── + +def test_toggle_canonical_round_trip(): + """Canonical name survives NCDB write → read.""" + db, _ = _make_toggle_db("tb.dut.my_signal", ToggleMetricT._2STOGGLE, + ToggleTypeT.NET, ToggleDirT.INTERNAL) + rdb, _ = _write_read(db) + t = list(rdb.scopes(ScopeTypeT.TOGGLE))[0] + assert t.getCanonicalName() == "tb.dut.my_signal" + + +def test_toggle_type_round_trip(): + """Toggle type survives NCDB write → read.""" + db, _ = _make_toggle_db("sig", ToggleMetricT._2STOGGLE, + ToggleTypeT.REG, ToggleDirT.INTERNAL) + rdb, _ = _write_read(db) + t = list(rdb.scopes(ScopeTypeT.TOGGLE))[0] + assert t.getToggleType() == ToggleTypeT.REG + + +def test_toggle_dir_round_trip(): + """Toggle direction survives NCDB write → read.""" + db, _ = _make_toggle_db("sig", ToggleMetricT._2STOGGLE, + ToggleTypeT.NET, ToggleDirT.OUT) + rdb, _ = _write_read(db) + t = list(rdb.scopes(ScopeTypeT.TOGGLE))[0] + assert t.getToggleDir() == ToggleDirT.OUT + + +def test_toggle_default_values_no_zip_member(): + """All-default toggle values → toggle.json absent from ZIP.""" + db, _ = _make_toggle_db("sig", ToggleMetricT._2STOGGLE, + ToggleTypeT.NET, ToggleDirT.INTERNAL) + with tempfile.TemporaryDirectory() as d: + path = os.path.join(d, "out.cdb") + NcdbWriter().write(db, path) + with zipfile.ZipFile(path) as zf: + assert MEMBER_TOGGLE not in zf.namelist() + + +def test_toggle_present_in_zip_when_canonical_differs(): + """Non-default canonical → toggle.json present in ZIP.""" + db, _ = _make_toggle_db("tb.dut.sig", ToggleMetricT._2STOGGLE, + ToggleTypeT.NET, ToggleDirT.INTERNAL) + with tempfile.TemporaryDirectory() as d: + path = os.path.join(d, "out.cdb") + NcdbWriter().write(db, path) + with zipfile.ZipFile(path) as zf: + assert MEMBER_TOGGLE in zf.namelist() + + +def test_toggle_counts_preserved(): + """Toggle hit counts are unaffected by toggle metadata serialization.""" + db, _ = _make_toggle_db("tb.sig", ToggleMetricT._2STOGGLE, + ToggleTypeT.NET, ToggleDirT.IN) + rdb, _ = _write_read(db) + t = list(rdb.scopes(ScopeTypeT.TOGGLE))[0] + items = list(t.coverItems(CoverTypeT.ALL)) + counts = [ci.getCoverData().data for ci in items] + assert counts == [3, 2] + + +def test_multiple_toggle_scopes(): + """Multiple TOGGLE scopes each preserve their own metadata.""" + db = MemUCIS() + db.createHistoryNode(None, "t", None, HistoryNodeKind.TEST) + + sigs = [ + ("sig_a", "tb.a", ToggleDirT.IN), + ("sig_b", "tb.b", ToggleDirT.OUT), + ("sig_c", "tb.c", ToggleDirT.INOUT), + ] + for name, canon, tdir in sigs: + t = db.createToggle(name, canon, 0, ToggleMetricT._2STOGGLE, + ToggleTypeT.NET, tdir) + cd = CoverData(CoverTypeT.TOGGLEBIN, 0); cd.data = 1 + t.createNextCover("0->1", cd, None) + cd2 = CoverData(CoverTypeT.TOGGLEBIN, 0); cd2.data = 0 + t.createNextCover("1->0", cd2, None) + + rdb, _ = _write_read(db) + toggles = {s.getScopeName(): s + for s in rdb.scopes(ScopeTypeT.TOGGLE)} + assert toggles["sig_a"].getCanonicalName() == "tb.a" + assert toggles["sig_a"].getToggleDir() == ToggleDirT.IN + assert toggles["sig_b"].getCanonicalName() == "tb.b" + assert toggles["sig_b"].getToggleDir() == ToggleDirT.OUT + assert toggles["sig_c"].getCanonicalName() == "tb.c" + assert toggles["sig_c"].getToggleDir() == ToggleDirT.INOUT diff --git a/tests/unit/ncdb/test_varint.py b/tests/unit/ncdb/test_varint.py new file mode 100644 index 0000000..8e99fb2 --- /dev/null +++ b/tests/unit/ncdb/test_varint.py @@ -0,0 +1,57 @@ +"""Unit tests for ucis.ncdb.varint (LEB128 encode/decode).""" + +import pytest +from ucis.ncdb.varint import encode_varint, decode_varint, encode_varints, decode_varints + + +ROUND_TRIP_CASES = [ + 0, 1, 127, 128, 255, 256, + 16383, 16384, + 2**32 - 1, 2**32, + 2**64 - 1, +] + + +@pytest.mark.parametrize("value", ROUND_TRIP_CASES) +def test_round_trip(value): + encoded = encode_varint(value) + decoded, offset = decode_varint(encoded) + assert decoded == value + assert offset == len(encoded) + + +def test_single_byte_range(): + """Values 0–127 should encode to exactly 1 byte.""" + for v in range(128): + assert len(encode_varint(v)) == 1 + + +def test_two_byte_range(): + """Values 128–16383 should encode to exactly 2 bytes.""" + for v in (128, 129, 16383): + assert len(encode_varint(v)) == 2 + + +def test_negative_raises(): + with pytest.raises(ValueError): + encode_varint(-1) + + +def test_decode_offset(): + """decode_varint respects a non-zero starting offset.""" + buf = bytes([0x00]) + encode_varint(300) + bytes([0xFF]) + value, new_off = decode_varint(buf, 1) + assert value == 300 + + +def test_encode_decode_sequence(): + values = [0, 1, 128, 16384, 2**32] + buf = encode_varints(values) + decoded, offset = decode_varints(buf, len(values)) + assert decoded == values + assert offset == len(buf) + + +def test_decode_short_buffer_raises(): + with pytest.raises(ValueError): + decode_varint(b"\x80") # continuation bit set but buffer ends