From 7f3b3804569eed24be83c55c0f7bb16f98670498 Mon Sep 17 00:00:00 2001 From: Andrew-Keenlyside Date: Thu, 14 May 2026 10:34:03 -0700 Subject: [PATCH 1/4] remove old children arrays and sidecars, flatten root metadata --- benchmarks/01_size_scaling.ipynb | 77 +- docs/spec/foundations/store_types.md | 7 +- docs/spec/multiscale/pyramid_construction.md | 3 +- docs/spec/object_model/cross_chunk_links.md | 40 +- examples/_build_07_multiscale_links.py | 528 ---- schema/reference.md | 2440 ++++++------------ schema/zarr_vectors.linkml.yaml | 78 +- schema/zarr_vectors.schema.json | 122 +- tests/test_arrays.py | 40 - tests/test_backends.py | 15 +- tests/test_core.py | 67 +- tests/test_cross_chunk_faces.py | 34 +- tests/test_encoding.py | 65 +- tests/test_icechunk_backend.py | 27 +- tests/test_lazy_writer.py | 32 +- tests/test_linkml_schema.py | 15 +- tests/test_multiscale_links.py | 11 +- zarr_vectors/composite.py | 8 - zarr_vectors/constants.py | 66 +- zarr_vectors/core/arrays.py | 718 ++---- zarr_vectors/core/metadata.py | 8 +- zarr_vectors/core/store.py | 101 +- zarr_vectors/encoding/compression.py | 3 +- zarr_vectors/encoding/ragged.py | 93 +- zarr_vectors/lazy/writer.py | 116 +- zarr_vectors/multiresolution/coarsen.py | 78 +- zarr_vectors/rechunk/engine.py | 10 +- zarr_vectors/spatial/boundary.py | 60 +- zarr_vectors/types/meshes.py | 70 +- zarr_vectors/types/parametric.py | 29 +- 30 files changed, 1545 insertions(+), 3416 deletions(-) delete mode 100644 examples/_build_07_multiscale_links.py diff --git a/benchmarks/01_size_scaling.ipynb b/benchmarks/01_size_scaling.ipynb index a1ae367..6018cf8 100644 --- a/benchmarks/01_size_scaling.ipynb +++ b/benchmarks/01_size_scaling.ipynb @@ -4,21 +4,26 @@ "cell_type": "markdown", "id": "9b315ef7", "metadata": {}, - "source": [ - "# Size scaling — point cloud\n", - "\n", - "Write/read/disk-size of point clouds at increasing `N`. Same\n", - "`chunk_shape` across runs so the only variable is vertex count.\n", - "\n", - "Runtime: a few minutes on a laptop (the 1M case dominates)." - ] + "source": "# Size scaling — point cloud\n\nWrite/read/disk-size of point clouds at increasing `N`, with **CSV as a\nbaseline** for context. Same `chunk_shape` across runs so the only\nvariable is vertex count.\n\nFor each `N` we measure:\n\n| Operation | zarr-vectors | CSV (baseline) |\n| --- | --- | --- |\n| Write | `write_points` | `pandas.to_csv` |\n| Read all | `read_points` | `pandas.read_csv` |\n| Read one | one chunk via lazy API | `read_csv(nrows=1)` (best case) |\n| Disk size | store directory | CSV file |\n\nRuntime: a few minutes on a laptop (the 1M case dominates)." }, { "cell_type": "code", - "execution_count": null, + "execution_count": 1, "id": "b97bfc29", "metadata": {}, - "outputs": [], + "outputs": [ + { + "ename": "ModuleNotFoundError", + "evalue": "No module named 'pandas'", + "output_type": "error", + "traceback": [ + "\u001b[31m---------------------------------------------------------------------------\u001b[39m", + "\u001b[31mModuleNotFoundError\u001b[39m Traceback (most recent call last)", + "\u001b[36mCell\u001b[39m\u001b[36m \u001b[39m\u001b[32mIn[1]\u001b[39m\u001b[32m, line 5\u001b[39m\n\u001b[32m 1\u001b[39m \u001b[38;5;28;01mimport\u001b[39;00m os, time, tempfile, shutil\n\u001b[32m 2\u001b[39m \u001b[38;5;28;01mfrom\u001b[39;00m pathlib \u001b[38;5;28;01mimport\u001b[39;00m Path\n\u001b[32m 3\u001b[39m \n\u001b[32m 4\u001b[39m \u001b[38;5;28;01mimport\u001b[39;00m numpy \u001b[38;5;28;01mas\u001b[39;00m np\n\u001b[32m----> \u001b[39m\u001b[32m5\u001b[39m \u001b[38;5;28;01mimport\u001b[39;00m pandas \u001b[38;5;28;01mas\u001b[39;00m pd\n\u001b[32m 6\u001b[39m \u001b[38;5;28;01mimport\u001b[39;00m matplotlib.pyplot \u001b[38;5;28;01mas\u001b[39;00m plt\n\u001b[32m 7\u001b[39m \n\u001b[32m 8\u001b[39m \n", + "\u001b[31mModuleNotFoundError\u001b[39m: No module named 'pandas'" + ] + } + ], "source": [ "import os, time, tempfile, shutil\n", "from pathlib import Path\n", @@ -60,14 +65,7 @@ "id": "5bcdba96", "metadata": {}, "outputs": [], - "source": [ - "from zarr_vectors.types.points import write_points, read_points\n", - "\n", - "SIZES = [1_000, 10_000, 100_000, 1_000_000]\n", - "CHUNK = (200.0, 200.0, 200.0)\n", - "BIN = (50.0, 50.0, 50.0)\n", - "SEED = 0" - ] + "source": "from zarr_vectors.types.points import write_points, read_points\nfrom zarr_vectors.lazy import open_zvr\n\nSIZES = [1_000, 10_000, 100_000, 1_000_000]\nCHUNK = (200.0, 200.0, 200.0)\nBIN = (50.0, 50.0, 50.0)\nSEED = 0\n\n\ndef _csv_path(prefix):\n \"\"\"Fresh tempdir + CSV path.\"\"\"\n return Path(tempfile.mkdtemp(prefix=f'csvbench_{prefix}_')) / 'points.csv'\n\n\ndef _csv_write(path, positions, intensity):\n \"\"\"Baseline: write x,y,z,intensity columns to a CSV.\"\"\"\n pd.DataFrame({\n 'x': positions[:, 0],\n 'y': positions[:, 1],\n 'z': positions[:, 2],\n 'intensity': intensity,\n }).to_csv(path, index=False)\n\n\ndef _csv_read_all(path):\n \"\"\"Read every row back into memory.\"\"\"\n return pd.read_csv(path)\n\n\ndef _csv_read_one(path):\n \"\"\"Best-case single-row read: only parse the first data row.\n\n CSV has no random access, so this is the cheapest single-record\n read the format admits.\"\"\"\n return pd.read_csv(path, nrows=1)\n\n\ndef _zv_read_one(store_path):\n \"\"\"Read just one chunk's worth of vertices via the lazy API.\n\n Touches a single chunk on disk (vs. the full materialisation in\n ``read_points``).\"\"\"\n zvr = open_zvr(store_path)\n chunk_keys = zvr[0].vertices._chunk_keys # noqa: SLF001 — minimal demo\n if not chunk_keys:\n return None\n return zvr[0].vertices[chunk_keys[0]].compute()" }, { "cell_type": "markdown", @@ -83,30 +81,7 @@ "id": "d0b220e0", "metadata": {}, "outputs": [], - "source": [ - "rng = np.random.default_rng(SEED)\n", - "rows = []\n", - "for n in SIZES:\n", - " positions = rng.uniform(0, 1000, (n, 3)).astype(np.float32)\n", - " intensity = rng.uniform(0, 1, n).astype(np.float32)\n", - "\n", - " store = _new_store(f'size_{n}')\n", - " t_write, _ = _time(\n", - " write_points, store, positions,\n", - " chunk_shape=CHUNK, bin_shape=BIN,\n", - " attributes={'intensity': intensity},\n", - " )\n", - " t_read, _ = _time(read_points, store, attribute_names=['intensity'])\n", - " rows.append({\n", - " 'N': n,\n", - " 'write_s': round(t_write, 3),\n", - " 'read_s': round(t_read, 3),\n", - " 'size_MB': round(_store_bytes(store) / 1e6, 2),\n", - " })\n", - " shutil.rmtree(Path(store).parent, ignore_errors=True)\n", - "\n", - "df = pd.DataFrame(rows)" - ] + "source": "rng = np.random.default_rng(SEED)\nrows = []\nfor n in SIZES:\n positions = rng.uniform(0, 1000, (n, 3)).astype(np.float32)\n intensity = rng.uniform(0, 1, n).astype(np.float32)\n\n # ---- ZV ----\n store = _new_store(f'size_{n}')\n t_zv_write, _ = _time(\n write_points, store, positions,\n chunk_shape=CHUNK, bin_shape=BIN,\n attributes={'intensity': intensity},\n )\n t_zv_read_all, _ = _time(read_points, store, attribute_names=['intensity'])\n t_zv_read_one, _ = _time(_zv_read_one, store)\n size_zv_MB = _store_bytes(store) / 1e6\n\n # ---- CSV baseline ----\n csv = _csv_path(f'size_{n}')\n t_csv_write, _ = _time(_csv_write, csv, positions, intensity)\n t_csv_read_all, _ = _time(_csv_read_all, csv)\n t_csv_read_one, _ = _time(_csv_read_one, csv)\n size_csv_MB = csv.stat().st_size / 1e6\n\n rows.append({\n 'N': n,\n 'zv_write_s': round(t_zv_write, 4),\n 'csv_write_s': round(t_csv_write, 4),\n 'zv_read_all_s': round(t_zv_read_all, 4),\n 'csv_read_all_s':round(t_csv_read_all,4),\n 'zv_read_one_s': round(t_zv_read_one, 4),\n 'csv_read_one_s':round(t_csv_read_one,4),\n 'zv_size_MB': round(size_zv_MB, 2),\n 'csv_size_MB': round(size_csv_MB, 2),\n })\n\n shutil.rmtree(Path(store).parent, ignore_errors=True)\n shutil.rmtree(csv.parent, ignore_errors=True)\n\ndf = pd.DataFrame(rows)" }, { "cell_type": "markdown", @@ -140,22 +115,12 @@ "id": "6ca88043", "metadata": {}, "outputs": [], - "source": [ - "fig, ax = plt.subplots(figsize=(6, 4))\n", - "ax.loglog(df['N'], df['write_s'], 'o-', label='write (s)')\n", - "ax.loglog(df['N'], df['read_s'], 's-', label='read (s)')\n", - "ax.loglog(df['N'], df['size_MB'], '^-', label='size (MB)')\n", - "ax.set_xlabel('N (vertices)')\n", - "ax.set_title('Point cloud: write/read time + disk footprint vs N')\n", - "ax.legend()\n", - "ax.grid(True, which='both', alpha=0.3)\n", - "plt.tight_layout()" - ] + "source": "fig, axes = plt.subplots(1, 4, figsize=(20, 4.5), sharex=True)\n\npanels = [\n ('Write time', 'write_s', 'zv_write_s', 'csv_write_s', 's'),\n ('Read all', 'read_all_s', 'zv_read_all_s', 'csv_read_all_s', 's'),\n ('Read one', 'read_one_s', 'zv_read_one_s', 'csv_read_one_s', 's'),\n ('Disk size', 'size_MB', 'zv_size_MB', 'csv_size_MB', 'MB'),\n]\nfor ax, (title, _key, zv_col, csv_col, unit) in zip(axes, panels):\n ax.loglog(df['N'], df[zv_col], 'o-', label='zarr-vectors', color='tab:blue')\n ax.loglog(df['N'], df[csv_col], 's-', label='csv', color='tab:orange')\n ax.set_title(title)\n ax.set_xlabel('N (vertices)')\n ax.set_ylabel(unit)\n ax.grid(True, which='both', alpha=0.3)\n ax.legend()\n\nfig.suptitle('zarr-vectors vs CSV — point cloud scaling', y=1.02)\nplt.tight_layout()" } ], "metadata": { "kernelspec": { - "display_name": "zarr-vectors", + "display_name": ".venv (3.13.13)", "language": "python", "name": "python3" }, @@ -169,9 +134,9 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.11.15" + "version": "3.13.13" } }, "nbformat": 4, "nbformat_minor": 5 -} +} \ No newline at end of file diff --git a/docs/spec/foundations/store_types.md b/docs/spec/foundations/store_types.md index a89ab08..b8eae06 100644 --- a/docs/spec/foundations/store_types.md +++ b/docs/spec/foundations/store_types.md @@ -71,7 +71,8 @@ All three entry points accept `backend=` and `**backend_kwargs`: from zarr_vectors.core.store import create_store, open_store from zarr_vectors.lazy import open_zvr -create_store(path, root_metadata, *, backend=None, **backend_kwargs) -> Group +create_store(path, *, bounds=None, chunk_shape=None, axes=None, + geometry_types=None, ..., backend=None, **backend_kwargs) -> Group open_store(path, mode="r", *, backend=None, **backend_kwargs) -> Group open_zvr(path, *, backend=None, **backend_kwargs) -> ZVRStore ``` @@ -301,5 +302,5 @@ a resolution level). The backend layer is independent of the [format capability tokens](../layout/root_metadata.md) stamped on `RootMetadata.format_capabilities` — backends carry data bytes, not -format semantics. See the capability list for `CAP_CROSS_CHUNK_FACES`, -`CAP_VERTEX_COUNT_CACHE`, `CAP_MULTISCALE_LINKS`, etc. +format semantics. See the capability list for `CAP_MULTISCALE_LINKS`, +`CAP_PRESERVED_OBJECT_IDS`, `CAP_SHARED_VERTEX_GROUPS`. diff --git a/docs/spec/multiscale/pyramid_construction.md b/docs/spec/multiscale/pyramid_construction.md index b3d7b72..268bf63 100644 --- a/docs/spec/multiscale/pyramid_construction.md +++ b/docs/spec/multiscale/pyramid_construction.md @@ -287,7 +287,8 @@ which: 1. Walks every adjacent `(fine, coarse)` level pair. 2. Reconstructs the fine→parent map from the coarse level's - `metanode_children` sidecar. + `cross_chunk_links//` records (each record pairs a + coarse metanode to one of its fine children). 3. Builds the trivial edge list `[(i, parent[i]) for i in range(n_fine)]`. 4. Partitions via [`partition_cross_level_edges`](../../../zarr_vectors/spatial/boundary.py) diff --git a/docs/spec/object_model/cross_chunk_links.md b/docs/spec/object_model/cross_chunk_links.md index 623473e..9ac6e09 100644 --- a/docs/spec/object_model/cross_chunk_links.md +++ b/docs/spec/object_model/cross_chunk_links.md @@ -126,28 +126,30 @@ chunk at the **owning level**, and column 1 is a local vertex index in the **same chunk key** at level `owning_level + N`. The reader doesn't need any cross-chunk-coords information — both sides share ``. -**Paired vertex-group offsets:** for `delta == 0` only, the byte -offset of each link group is paired into the matching -`vertex_group_offsets/` table so a reader can fetch one -vertex group's edges without rescanning the chunk. For `delta != 0` -the source vertex groups and link groups belong to different levels, -so the pairing is meaningless and the writer skips it. See the -guardrail in -[`write_chunk_links`](../../../zarr_vectors/core/arrays.py). +**Self-describing blob.** Each `links//` file is a +self-describing ragged blob: an int64 header with `K` followed by the +`K` per-group byte offsets, then the concatenated link bytes. Readers +recover the per-vertex-group partition without consulting any sibling +table. ### `cross_chunk_links//data` — global flat blob -Each link is `2 * (sid_ndim + 1)` int64s laid out as +Each record is `link_width * (sid_ndim + 1)` int64s laid out as +`link_width` back-to-back `(chunk_coords, vertex_idx)` endpoints: ``` -[chunk_a_0, ..., chunk_a_{ndim-1}, vi_a, - chunk_b_0, ..., chunk_b_{ndim-1}, vi_b] +[chunk_0_0, ..., chunk_0_{ndim-1}, vi_0, + chunk_1_0, ..., chunk_1_{ndim-1}, vi_1, + ... + chunk_{L-1}_0, ..., vi_{L-1}] ``` -— i.e. the two endpoints written back-to-back. `chunk_a` is a chunk -coordinate at the **owning level**; `chunk_b` is a chunk coordinate at -the **target level** (`owning_level + level_delta`). `vi_a` and `vi_b` -are local vertex indices within their respective chunks. +`link_width=2` (the default) encodes a classic cross-chunk edge; +`link_width=3` encodes a triangle face spanning chunks (used by mesh +writers); `link_width=1` encodes a single parent→child reference for +pyramid metanode drill-down. Endpoint 0 lives at the **owning level**; +endpoints 1..L-1 live at the **target level** (`owning_level + +level_delta`). **`.zattrs` schema** (see [`zarr_vectors/core/arrays.py:write_cross_chunk_links`](../../../zarr_vectors/core/arrays.py)): @@ -157,7 +159,8 @@ are local vertex indices within their respective chunks. "zv_array": "cross_chunk_links", "num_links": 12, "sid_ndim": 3, - "level_delta": 1 + "level_delta": 1, + "link_width": 2 } ``` @@ -233,8 +236,9 @@ chunk → bucket into per-chunk `(M_local, link_width)` rows for [`_write_cross_level_edges`](../../../zarr_vectors/multiresolution/coarsen.py) during pyramid construction. For each adjacent (fine, coarse) pair, every fine vertex has exactly one trivial edge to its coarse parent -metanode (the parent map is reconstructed from `metanode_children`). -The edges are then partitioned via +metanode (the parent map is recovered from the coarse level's own +`cross_chunk_links//` records). The edges are then +partitioned via [`partition_cross_level_edges`](../../../zarr_vectors/spatial/boundary.py): chunk-aligned edges (source chunk_key == target chunk_key when re-evaluated against the coarser grid) become rows in diff --git a/examples/_build_07_multiscale_links.py b/examples/_build_07_multiscale_links.py deleted file mode 100644 index 5947908..0000000 --- a/examples/_build_07_multiscale_links.py +++ /dev/null @@ -1,528 +0,0 @@ -"""Generate examples/07_multiscale_links.ipynb from a single source. - -Run once and commit the output. Keeps the notebook deterministic and -avoids hand-editing JSON. -""" - -from __future__ import annotations - -import json -import uuid -from pathlib import Path - -NB_PATH = Path(__file__).resolve().parent / "07_multiscale_links.ipynb" - -CELLS: list[tuple[str, str]] = [] - - -def md(text: str) -> None: - CELLS.append(("markdown", text)) - - -def code(src: str) -> None: - CELLS.append(("code", src)) - - -# =================================================================== -# Notebook content -# =================================================================== - -md( - "# Multiscale links (cross-pyramid-level edges)\n" - "\n" - "**Geometry type:** `graph` · **Schema version:** `0.4`\n" - "\n" - "This notebook is a deep dive into the **multiscale links layout** " - "introduced in schema `0.4` — how graph edges, cross-chunk links, " - "and their attributes are organised across pyramid levels and how " - "you can read each piece directly.\n" - "\n" - "Topics:\n" - "\n" - "1. The on-disk layout: `links//` and friends\n" - "2. Write a small graph and build a 3-level pyramid\n" - "3. Inspect the directory tree — `0`, `+1`, `-1` arrays per level\n" - "4. Read intra-level edges (`delta=0`) — current behaviour\n" - "5. Read cross-level edges (`delta=+1`) — fine → coarse drill-up\n" - "6. Cross-chunk links across levels (`cross_chunk_links/+1`)\n" - "7. Per-link attributes, intra- *and* cross-chunk\n" - "8. Storage modes: `none` vs `implicit` vs `explicit`\n" - "9. Depth knob: `cross_level_depth=2`\n" - "10. Validate" -) - -code( - "import numpy as np\n" - "import tempfile, os\n" - "from pathlib import Path\n" - "\n" - "_tmpdir = tempfile.mkdtemp(prefix=\"zvf_multiscale_\")\n" - "STORE = os.path.join(_tmpdir, \"graph.zarrvectors\")\n" - "print(\"Store:\", STORE)" -) - -md( - "## 1 · The on-disk layout\n" - "\n" - "Under the 0.4 schema, every link-family array gets a `` " - "path segment that says how many pyramid levels the edges span:\n" - "\n" - "```\n" - "/resolution_N/links//\n" - "/resolution_N/cross_chunk_links//data\n" - "/resolution_N/link_attributes///\n" - "/resolution_N/cross_chunk_link_attributes///data\n" - "```\n" - "\n" - "Convention for ``:\n" - "\n" - "| Segment | Meaning |\n" - "|---------|---------|\n" - "| `0` | intra-level (the only kind written pre-0.4) |\n" - "| `+1` | edges from this level to `this_level + 1` (one step coarser) |\n" - "| `-1` | edges from this level to `this_level - 1` (one step finer) |\n" - "| `+N` / `-N` | jumps of N levels |\n" - "\n" - "Sides of an edge:\n" - "\n" - "- For `links//`: source endpoint is local to the chunk " - "at the owning level; target endpoint is local to the **same chunk key** " - "at level `+delta`. Only used when the two endpoints share a chunk_key.\n" - "- For `cross_chunk_links//data`: each row is " - "`((chunk_a, local_a), (chunk_b, local_b))` — endpoint A at the owning level, " - "endpoint B at level `+delta`. Used when chunk keys differ across levels.\n" - "\n" - "Use the path helpers in `zarr_vectors.core.paths` to compose paths — never " - "hard-code the `` formatting yourself." -) - -code( - "from zarr_vectors.core.paths import (\n" - " format_delta, parse_delta,\n" - " links_path, cross_chunk_links_path,\n" - " link_attributes_path, cross_chunk_link_attributes_path,\n" - ")\n" - "\n" - "print(\"format_delta(0) =\", format_delta(0))\n" - "print(\"format_delta(+1) =\", format_delta(1))\n" - "print(\"format_delta(-2) =\", format_delta(-2))\n" - "print()\n" - "print(\"links_path(0) =\", links_path(0))\n" - "print(\"links_path(+1) =\", links_path(1))\n" - "print(\"cross_chunk_links_path(-1) =\", cross_chunk_links_path(-1))\n" - "print(\"link_attributes_path('weight', +1) =\", link_attributes_path('weight', 1))\n" - "print(\"cross_chunk_link_attributes_path('weight', 0) =\", cross_chunk_link_attributes_path('weight', 0))" -) - -md( - "## 2 · Write a small graph and build a 3-level pyramid\n" - "\n" - "We use a small (500-node) graph in a 400 µm cube so the pyramid produces a " - "handful of metanodes per level — easy to eyeball. Each level will roughly " - "8× coarsen the previous one.\n" - "\n" - "Defaults pick up `cross_level_depth=1` and `cross_level_storage=\"explicit\"` — " - "we'll override those below to compare modes. Default `explicit` writes both " - "`+1` at the finer level and `-1` at the coarser level for every adjacent pair." -) - -code( - "from zarr_vectors.types.graphs import write_graph\n" - "from zarr_vectors.multiresolution.coarsen import build_pyramid\n" - "from zarr_vectors.constants import XLEVEL_EXPLICIT, XLEVEL_IMPLICIT, XLEVEL_NONE\n" - "\n" - "rng = np.random.default_rng(0)\n" - "N = 500\n" - "positions = rng.uniform(0.0, 400.0, size=(N, 3)).astype(np.float32)\n" - "edges = np.stack([np.arange(N - 1), np.arange(1, N)], axis=1).astype(np.int64)\n" - "edge_weights = rng.uniform(0.1, 1.0, size=len(edges)).astype(np.float32)\n" - "\n" - "write_graph(\n" - " STORE,\n" - " positions=positions,\n" - " edges=edges,\n" - " object_ids=np.zeros(N, dtype=np.int64),\n" - " chunk_shape=(100.0, 100.0, 100.0),\n" - " bounds=([0.0, 0.0, 0.0], [400.0, 400.0, 400.0]),\n" - " edge_attributes={\"weight\": edge_weights},\n" - ")\n" - "\n" - "build_pyramid(\n" - " STORE,\n" - " factors=[(2.0, 1.0), (2.0, 1.0)],\n" - " cross_level_depth=1, # ±1 between every adjacent pair\n" - " cross_level_storage=XLEVEL_EXPLICIT, # store both +1 and -1\n" - ")\n" - "print(\"Build complete.\")" -) - -md( - "## 3 · Walk the on-disk tree\n" - "\n" - "Each resolution level should now carry `links/0` (intra-level edges) " - "and, depending on its position in the pyramid, some combination of " - "`links/+1`, `links/-1`, `cross_chunk_links/+1`, `cross_chunk_links/-1`." -) - -code( - "from zarr_vectors.core.store import open_store, list_resolution_levels, get_resolution_level\n" - "from zarr_vectors.constants import LINKS, CROSS_CHUNK_LINKS\n" - "from zarr_vectors.core.arrays import list_link_deltas, list_cross_link_deltas\n" - "\n" - "root = open_store(STORE)\n" - "levels = sorted(list_resolution_levels(root))\n" - "print(f\"Pyramid levels: {levels}\")\n" - "print()\n" - "for lvl in levels:\n" - " lg = get_resolution_level(root, lvl)\n" - " ld = list_link_deltas(lg)\n" - " cd = list_cross_link_deltas(lg)\n" - " print(f\" resolution_{lvl}: links/ = {ld} cross_chunk_links/ = {cd}\")" -) - -md( - "Reading this:\n" - "\n" - "- Level 0 has `+1` (drill *up* to level 1) but no `-1` (nothing below).\n" - "- Mid levels carry both `+1` and `-1`.\n" - "- The top level has `-1` but no `+1` (nothing above).\n" - "\n" - "If you peek at the actual directory you'll see one subdir per ``:" -) - -code( - "from pathlib import Path\n" - "level0_links = Path(STORE) / \"resolution_0\" / \"links\"\n" - "print(f\"contents of {level0_links}:\")\n" - "for child in sorted(level0_links.iterdir()):\n" - " print(\" \", child.name)" -) - -md( - "## 4 · Intra-level edges — `delta=0` (unchanged behaviour)\n" - "\n" - "Reading `delta=0` matches the pre-0.4 behaviour of `read_chunk_links`. " - "You get one list of `(M_k, 2)` arrays per spatial chunk; each row is a " - "pair of local-vertex indices." -) - -code( - "from zarr_vectors.core.arrays import read_chunk_links, list_chunk_keys\n" - "\n" - "lg0 = get_resolution_level(root, 0)\n" - "chunk_keys = list_chunk_keys(lg0, LINKS + \"/0\")\n" - "print(f\"level 0 has links/0 in {len(chunk_keys)} chunks\")\n" - "for ck in chunk_keys[:3]:\n" - " groups = read_chunk_links(lg0, ck, link_width=2, delta=0)\n" - " n = sum(len(g) for g in groups)\n" - " print(f\" chunk {ck}: {n} intra-chunk edges (groups: {len(groups)})\")" -) - -md( - "## 5 · Cross-level edges — `delta=+1` (drill up)\n" - "\n" - "Cross-level edges are conceptually trivial: every fine vertex has one " - "edge to its coarse parent metanode. The build splits those edges into:\n" - "\n" - "- **chunk-aligned** edges → `links/+1/` when the source chunk " - "key matches the coarse target chunk key;\n" - "- **cross-chunk** edges → `cross_chunk_links/+1/data` otherwise.\n" - "\n" - "For a `links/+1` row, *column 0* is the local vertex index in the source " - "chunk **at the owning level**; *column 1* is the local vertex index in " - "the same chunk key **at level + 1**." -) - -code( - "plus1_chunks = list_chunk_keys(lg0, LINKS + \"/+1\")\n" - "print(f\"level 0 has links/+1 in {len(plus1_chunks)} chunks (chunk-aligned cross-level edges)\")\n" - "total_plus1 = 0\n" - "for ck in plus1_chunks:\n" - " g = read_chunk_links(lg0, ck, link_width=2, delta=1)\n" - " n = sum(len(x) for x in g)\n" - " total_plus1 += n\n" - " if n:\n" - " sample = g[0][:3]\n" - " print(f\" chunk {ck}: {n} edges, sample rows (fine_local, coarse_local):\")\n" - " for row in sample:\n" - " print(f\" {tuple(row)}\")\n" - "print(f\"\\nTotal chunk-aligned +1 edges at level 0: {total_plus1}\")" -) - -md( - "## 6 · Cross-chunk + cross-level: `cross_chunk_links/+1`\n" - "\n" - "When a fine vertex's coarse parent lives in a *different* chunk grid cell, " - "the edge can't be expressed by a per-chunk row — it goes into the global " - "`cross_chunk_links/+1/data` blob. Each entry encodes both endpoint sides " - "explicitly:\n" - "\n" - "```\n" - "((source_chunk_coords, source_local_idx), # at this level\n" - " (target_chunk_coords, target_local_idx)) # at this_level + delta\n" - "```" -) - -code( - "from zarr_vectors.core.arrays import read_cross_chunk_links\n" - "\n" - "ccl_plus1 = read_cross_chunk_links(lg0, delta=1)\n" - "print(f\"level 0 has {len(ccl_plus1)} cross-chunk +1 edges\")\n" - "for a, b in ccl_plus1[:3]:\n" - " (ca, la), (cb, lb) = a, b\n" - " print(f\" src chunk={ca} local={la} -> tgt chunk={cb} local={lb}\")" -) - -md( - "## 7 · Per-link attributes — intra- and cross-chunk\n" - "\n" - "Two parallel attribute namespaces exist:\n" - "\n" - "- `link_attributes///` — parallel to `links//`, " - "one ragged group per spatial chunk.\n" - "- `cross_chunk_link_attributes///data` — *new in 0.4*, parallel to " - "`cross_chunk_links//data`; one flat row per cross-chunk link in the same order.\n" - "\n" - "The build wrote `delta=0` attributes from the `edge_attributes={'weight': ...}` we passed " - "into `write_graph`. We'll also write a cross-chunk attribute by hand to show the new API.\n" - "\n" - "Note: cross-chunk link attribute writes enforce `len(values) == num_links` at runtime — " - "a misaligned write fails loudly instead of silently corrupting the parallel array." -) - -code( - "from zarr_vectors.core.arrays import (\n" - " create_cross_chunk_link_attributes_array,\n" - " write_cross_chunk_link_attributes,\n" - " read_cross_chunk_link_attributes,\n" - ")\n" - "\n" - "# Re-open for writing.\n" - "root_rw = open_store(STORE, mode=\"r+\")\n" - "lg0_rw = get_resolution_level(root_rw, 0)\n" - "\n" - "# Cross-chunk +1 link attributes: one float per cross-chunk link, in path order.\n" - "num_ccl_plus1 = len(ccl_plus1)\n" - "if num_ccl_plus1:\n" - " create_cross_chunk_link_attributes_array(lg0_rw, \"weight\", dtype=\"float32\", delta=1)\n" - " weights = np.linspace(0.0, 1.0, num_ccl_plus1, dtype=np.float32)\n" - " write_cross_chunk_link_attributes(\n" - " lg0_rw, \"weight\", weights, num_links=num_ccl_plus1, delta=1,\n" - " )\n" - " back = read_cross_chunk_link_attributes(lg0_rw, \"weight\", delta=1)\n" - " print(f\"wrote/read {len(back)} cross-chunk-link weights at delta=+1\")\n" - " print(f\"first 5: {back[:5]}\")\n" - "else:\n" - " print(\"no cross-chunk +1 edges at level 0; skipping attribute round-trip\")" -) - -md( - "Length-invariant check (this *should* raise):" -) - -code( - "from zarr_vectors.exceptions import ArrayError\n" - "\n" - "if num_ccl_plus1:\n" - " try:\n" - " bad = np.zeros(num_ccl_plus1 + 7, dtype=np.float32)\n" - " write_cross_chunk_link_attributes(\n" - " lg0_rw, \"weight\", bad, num_links=num_ccl_plus1, delta=1,\n" - " )\n" - " except ArrayError as e:\n" - " print(f\"ArrayError raised as expected:\\n {e}\")" -) - -md( - "## 8 · Storage modes: `explicit` vs `implicit` vs `none`\n" - "\n" - "Three modes control whether `-N` arrays are materialised at all:\n" - "\n" - "| Mode | Writes `+N` at fine level? | Writes `-N` at coarse level? |\n" - "|------|----------------------------|------------------------------|\n" - "| `none` | no | no |\n" - "| `implicit` | yes | no |\n" - "| `explicit` | yes | yes |\n" - "\n" - "`implicit` saves storage; the `-N` direction is reconstructed by reading the `+N` array " - "at the target level and swapping endpoints. `explicit` materialises both, paying disk " - "for O(1) reads in both directions.\n" - "\n" - "Let's rebuild against fresh stores to compare on-disk footprints." -) - -code( - "import shutil\n" - "\n" - "def _build_one(name, *, depth, storage):\n" - " path = os.path.join(_tmpdir, f\"{name}.zarrvectors\")\n" - " if os.path.exists(path):\n" - " shutil.rmtree(path)\n" - " write_graph(\n" - " path,\n" - " positions=positions,\n" - " edges=edges,\n" - " object_ids=np.zeros(N, dtype=np.int64),\n" - " chunk_shape=(100.0, 100.0, 100.0),\n" - " bin_shape=(25.0, 25.0, 25.0),\n" - " edge_attributes={\"weight\": edge_weights},\n" - " )\n" - " build_pyramid(\n" - " path, factors=[(2.0, 1.0), (2.0, 1.0)],\n" - " cross_level_depth=depth, cross_level_storage=storage,\n" - " )\n" - " return path\n" - "\n" - "def _scan(path):\n" - " r = open_store(path)\n" - " out = []\n" - " for lvl in sorted(list_resolution_levels(r)):\n" - " lg = get_resolution_level(r, lvl)\n" - " out.append((lvl, list_link_deltas(lg), list_cross_link_deltas(lg)))\n" - " return out\n" - "\n" - "for mode in (XLEVEL_NONE, XLEVEL_IMPLICIT, XLEVEL_EXPLICIT):\n" - " p = _build_one(f\"graph_{mode}\", depth=1, storage=mode)\n" - " print(f\"\\n--- cross_level_storage = {mode!r} ---\")\n" - " for lvl, ld, cd in _scan(p):\n" - " print(f\" resolution_{lvl}: links/={ld} cross_chunk_links/={cd}\")" -) - -md( - "## 9 · Depth knob: `cross_level_depth=2`\n" - "\n" - "`cross_level_depth` controls how far the cross-level emission reaches:\n" - "\n" - "- `0` — disabled (same as `storage=\"none\"`).\n" - "- `N` — materialise up to `±N` for every adjacent pair we can reach.\n" - "- `-1` — walk *all* available pyramid levels.\n" - "\n" - "At `depth=2` the writer composes the fine→parent map across two coarsening " - "steps (`grandparent[i] = parent_at_L1[parent_at_L0[i]]`) so a single edge " - "goes from a level-0 vertex straight to its level-2 metanode." -) - -code( - "p2 = _build_one(\"graph_depth2\", depth=2, storage=XLEVEL_EXPLICIT)\n" - "print(\"depth=2, explicit:\")\n" - "for lvl, ld, cd in _scan(p2):\n" - " print(f\" resolution_{lvl}: links/={ld} cross_chunk_links/={cd}\")" -) - -md( - "Expected (with a 3-level pyramid):\n" - "\n" - "- Level 0 → `+1`, `+2`\n" - "- Level 1 → `-1`, `+1`\n" - "- Level 2 → `-1`, `-2`\n" - "\n" - "Plus `0` everywhere from the original `write_graph` call." -) - -md( - "## 10 · Validate\n" - "\n" - "The validator walks each `` subdir under `links/` and `cross_chunk_links/` and " - "checks that endpoint chunk keys are present in the level's chunk grid." -) - -code( - "from zarr_vectors.validate import validate\n" - "\n" - "rv = validate(STORE, level=3)\n" - "print(rv.summary())" -) - -md( - "## Summary\n" - "\n" - "| Concept | API |\n" - "|---------|-----|\n" - "| Pyramid with cross-level edges | `build_pyramid(path, cross_level_depth=N, cross_level_storage=\"explicit\")` |\n" - "| Compose paths | `links_path(delta)`, `cross_chunk_links_path(delta)`, `link_attributes_path(name, delta)`, `cross_chunk_link_attributes_path(name, delta)` |\n" - "| List deltas on disk | `list_link_deltas(level)`, `list_cross_link_deltas(level)` |\n" - "| Read intra-level edges | `read_chunk_links(level, chunk, delta=0)` |\n" - "| Read cross-level edges | `read_chunk_links(level, chunk, delta=+1)` |\n" - "| Read cross-chunk links | `read_cross_chunk_links(level, delta=±N)` |\n" - "| Write/read cross-chunk-link attrs | `write_cross_chunk_link_attributes(level, name, values, num_links, delta)` / `read_cross_chunk_link_attributes(level, name, delta)` |\n" - "\n" - "Endpoint convention recap:\n" - "\n" - "- `links//` rows: column 0 = source-level local index, " - "column 1 = local index in the **same chunk key** at level `+delta`.\n" - "- `cross_chunk_links//data` rows: " - "`((src_chunk, src_local), (tgt_chunk, tgt_local))` — `src_*` at the owning level, " - "`tgt_*` at level `+delta`.\n" - "\n" - "See `docs/multiscale-links.md` (or the plan notes in the repo) for the design rationale " - "and the schema-0.4 breaking change details." -) - - -# =================================================================== -# Build the JSON -# =================================================================== - -def _to_source(text: str) -> list[str]: - """Match the multi-line `source` list shape Jupyter writes.""" - lines = text.splitlines(keepends=True) - if not lines: - return [""] - # Jupyter convention: no trailing newline on the last entry. - if lines[-1].endswith("\n"): - lines[-1] = lines[-1].rstrip("\n") - return lines - - -def _cell_id() -> str: - return uuid.uuid4().hex[:8] - - -def _build() -> dict: - cells = [] - for kind, text in CELLS: - if kind == "markdown": - cells.append({ - "cell_type": "markdown", - "id": _cell_id(), - "metadata": {}, - "source": _to_source(text), - }) - else: - cells.append({ - "cell_type": "code", - "execution_count": None, - "id": _cell_id(), - "metadata": {}, - "outputs": [], - "source": _to_source(text), - }) - return { - "cells": cells, - "metadata": { - "kernelspec": { - "display_name": "zarr-vectors", - "language": "python", - "name": "python3", - }, - "language_info": { - "codemirror_mode": {"name": "ipython", "version": 3}, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.11.15", - }, - }, - "nbformat": 4, - "nbformat_minor": 5, - } - - -if __name__ == "__main__": - NB_PATH.write_text( - json.dumps(_build(), indent=1, ensure_ascii=False) + "\n", - encoding="utf-8", - ) - print(f"wrote {NB_PATH.name} ({NB_PATH.stat().st_size:,} bytes)") diff --git a/schema/reference.md b/schema/reference.md index d054562..d3baed7 100644 --- a/schema/reference.md +++ b/schema/reference.md @@ -16,7 +16,6 @@ Name: zarr_vectors | [AttributeMeta](AttributeMeta.md) | `` | | [Axis](Axis.md) | One axis of the spatial index | | [BoundingBox](BoundingBox.md) | Two parallel ``ndim``-length arrays representing the global ``(min_corner, ma... | -| [CrossChunkFacesMeta](CrossChunkFacesMeta.md) | `` | | [CrossChunkLinkAttributeMeta](CrossChunkLinkAttributeMeta.md) | `` | | [CrossChunkLinksMeta](CrossChunkLinksMeta.md) | `` | | [CRS](CRS.md) | Free-form coordinate reference system metadata | @@ -25,10 +24,8 @@ Name: zarr_vectors | [LevelMetadata](LevelMetadata.md) | Per-resolution-level `` | | [LinkAttributeMeta](LinkAttributeMeta.md) | `` | | [LinksMeta](LinksMeta.md) | `` | -| [MetanodeChildrenMeta](MetanodeChildrenMeta.md) | `` | | [ObjectAttributeMeta](ObjectAttributeMeta.md) | `` | | [ObjectIndexMeta](ObjectIndexMeta.md) | `` | -| [ObjectIndexPendingMeta](ObjectIndexPendingMeta.md) | `` | | [RootMetadata](RootMetadata.md) | Root-level `` | | [VerticesMeta](VerticesMeta.md) | `` | @@ -40,7 +37,6 @@ Name: zarr_vectors | --- | --- | | [arrays_present](arrays_present.md) | Names of arrays present in the level group | | [base_bin_shape](base_bin_shape.md) | Supervoxel bin edge lengths at level 0 | -| [batch_id](batch_id.md) | Monotonic batch id for an ``object_index/pending/`` sidecar | | [bin_ratio](bin_ratio.md) | Integer fold-change per axis relative to level 0 | | [bin_shape](bin_shape.md) | Per-axis supervoxel edge lengths at this level | | [bounds](bounds.md) | Global vertex bounding box | @@ -61,21 +57,18 @@ Name: zarr_vectors | [inherited_num_objects](inherited_num_objects.md) | OID-space size inherited from the parent level (= ``parent_level | | [level](level.md) | Resolution level index (0 = full resolution) | | [level_delta](level_delta.md) | Pyramid-level delta between the source side (the level that owns this array) ... | -| [link_width](link_width.md) | Width of a links row (2 for edges, 3 or 4 for face rows) | +| [link_width](link_width.md) | Width of a links row (1 for parent→child metanode references, 2 for edges, 3 ... | | [links_convention](links_convention.md) | | | [max_corner](max_corner.md) | Per-axis maxima | | [min_corner](min_corner.md) | Per-axis minima | | [name](name.md) | NGFF axis or attribute name (e | -| [num_faces](num_faces.md) | Total cross-chunk face count | | [num_groups](num_groups.md) | Total grouping count | | [num_links](num_links.md) | Total cross-chunk link count | -| [num_metanodes](num_metanodes.md) | Total metanode count in a coarsening sidecar | | [num_objects](num_objects.md) | Total object count this array carries | | [object_index_convention](object_index_convention.md) | | | [object_sparsity](object_sparsity.md) | Fraction of objects retained at this level | | [parent_level](parent_level.md) | Source level index (None for level 0) | | [preserves_object_ids](preserves_object_ids.md) | True for levels written by the per-object pyramid regime | -| [record_size](record_size.md) | Per-face record width in ``cross_chunk_faces/data`` (sid_ndim + 2 int64s) | | [reduction_factor](reduction_factor.md) | Multi-resolution coarsening factor (≥ 2) | | [shape](shape.md) | Shape of a dense per-object/per-group array | | [shared_vertex_groups](shared_vertex_groups.md) | True when per-chunk vertex groups may be referenced by multiple objects' mani... | @@ -94,7 +87,7 @@ Name: zarr_vectors | [CrossChunkStrategy](CrossChunkStrategy.md) | How connectivity that crosses chunk boundaries is represented | | [CrossLevelStorage](CrossLevelStorage.md) | How cross-pyramid-level edges are stored in the multiscale links layout (``li... | | [Encoding](Encoding.md) | Per-array encoding of vertex data | -| [FormatCapability](FormatCapability.md) | Optional 0 | +| [FormatCapability](FormatCapability.md) | Optional feature tokens a store advertises in :attr:`RootMetadata | | [GeometryType](GeometryType.md) | The kind of geometry a store (or one of its sub-types) holds | | [LinksConvention](LinksConvention.md) | How intra-chunk links are represented for a polyline/graph/mesh | | [ObjectIndexConvention](ObjectIndexConvention.md) | How the object_id → vertex-group mapping is encoded | @@ -135,7 +128,10 @@ Name: zarr_vectors --- - +--- +search: + boost: 5.0 +--- # Slot: arrays_present @@ -144,11 +140,11 @@ _Names of arrays present in the level group._ +
-URI: [zv:arrays_present](https://w3id.org/zarr-vectors/schema/0.5/arrays_present) -Alias: arrays_present +URI: [zv:arrays_present](https://w3id.org/zarr-vectors/schema/0.5/arrays_present) @@ -223,7 +219,6 @@ name: arrays_present description: Names of arrays present in the level group. from_schema: https://w3id.org/zarr-vectors/schema/0.5 rank: 1000 -alias: arrays_present domain_of: - LevelMetadata range: string @@ -231,12 +226,15 @@ required: true multivalued: true ``` - +
--- - +--- +search: + boost: 10.0 +--- # Class: AttributeMeta @@ -245,6 +243,8 @@ _``.zattrs`` for each ``attributes//`` array._ +
+ URI: [zv:AttributeMeta](https://w3id.org/zarr-vectors/schema/0.5/AttributeMeta) @@ -378,22 +378,18 @@ attributes: ' from_schema: https://w3id.org/zarr-vectors/schema/0.5 rank: 1000 - alias: zv_array owner: AttributeMeta domain_of: - VerticesMeta - LinksMeta - AttributeMeta - ObjectIndexMeta - - ObjectIndexPendingMeta - ObjectAttributeMeta - GroupingsMeta - GroupingsAttributeMeta - CrossChunkLinksMeta - - CrossChunkFacesMeta - LinkAttributeMeta - CrossChunkLinkAttributeMeta - - MetanodeChildrenMeta range: ZvArrayTag required: true equals_string: attribute @@ -403,7 +399,6 @@ attributes: from_schema: https://w3id.org/zarr-vectors/schema/0.5 rank: 1000 slot_uri: schema:name - alias: name owner: AttributeMeta domain_of: - Axis @@ -419,7 +414,6 @@ attributes: description: Numpy dtype string of the array's value type (e.g. "float32"). from_schema: https://w3id.org/zarr-vectors/schema/0.5 rank: 1000 - alias: dtype owner: AttributeMeta domain_of: - VerticesMeta @@ -436,7 +430,6 @@ attributes: description: For multi-channel per-vertex attributes, the channel labels. from_schema: https://w3id.org/zarr-vectors/schema/0.5 rank: 1000 - alias: channel_names owner: AttributeMeta domain_of: - AttributeMeta @@ -444,12 +437,15 @@ attributes: multivalued: true ``` - +
--- - +--- +search: + boost: 10.0 +--- # Class: Axis @@ -460,6 +456,8 @@ __ +
+ URI: [ngff:Axis](https://ngff.openmicroscopy.org/0.4/Axis) @@ -581,7 +579,6 @@ attributes: from_schema: https://w3id.org/zarr-vectors/schema/0.5 rank: 1000 slot_uri: schema:name - alias: name owner: Axis domain_of: - Axis @@ -597,7 +594,6 @@ attributes: description: NGFF axis type — "space", "time", or "channel". from_schema: https://w3id.org/zarr-vectors/schema/0.5 rank: 1000 - alias: type owner: Axis domain_of: - Axis @@ -608,7 +604,6 @@ attributes: description: NGFF unit string (e.g. "um", "nanometer", "second"). from_schema: https://w3id.org/zarr-vectors/schema/0.5 rank: 1000 - alias: unit owner: Axis domain_of: - Axis @@ -616,12 +611,15 @@ attributes: class_uri: ngff:Axis ``` - +
--- - +--- +search: + boost: 5.0 +--- # Slot: base_bin_shape @@ -632,11 +630,11 @@ __ +
-URI: [zv:base_bin_shape](https://w3id.org/zarr-vectors/schema/0.5/base_bin_shape) -Alias: base_bin_shape +URI: [zv:base_bin_shape](https://w3id.org/zarr-vectors/schema/0.5/base_bin_shape) @@ -714,126 +712,21 @@ description: 'Supervoxel bin edge lengths at level 0. When set, every value mus ' from_schema: https://w3id.org/zarr-vectors/schema/0.5 rank: 1000 -alias: base_bin_shape domain_of: - RootMetadata range: float multivalued: true ``` - +
--- - - -# Slot: batch_id - - -_Monotonic batch id for an ``object_index/pending/`` sidecar._ - - - - - -URI: [zv:batch_id](https://w3id.org/zarr-vectors/schema/0.5/batch_id) -Alias: batch_id - - - - - - - -## Applicable Classes - -| Name | Description | Modifies Slot | -| --- | --- | --- | -| [ObjectIndexPendingMeta](ObjectIndexPendingMeta.md) | `` | no | - - - - - - -## Properties - -### Type and Range - -| Property | Value | -| --- | --- | -| Range | [Integer](Integer.md) | -| Domain Of | [ObjectIndexPendingMeta](ObjectIndexPendingMeta.md) | - -### Cardinality and Requirements - -| Property | Value | -| --- | --- | -| Required | Yes | -### Value Constraints - -| Property | Value | -| --- | --- | -| Minimum Value | 0 | - - - - - - - - - - - - -## Identifier and Mapping Information - - - - - -### Schema Source - - -* from schema: https://w3id.org/zarr-vectors/schema/0.5 - - - - -## Mappings - -| Mapping Type | Mapped Value | -| --- | --- | -| self | zv:batch_id | -| native | zv:batch_id | - - - - -## LinkML Source - -
-```yaml -name: batch_id -description: Monotonic batch id for an ``object_index/pending/`` sidecar. -from_schema: https://w3id.org/zarr-vectors/schema/0.5 -rank: 1000 -alias: batch_id -domain_of: -- ObjectIndexPendingMeta -range: integer -required: true -minimum_value: 0 - -``` -
- - --- - - +search: + boost: 5.0 +--- # Slot: bin_ratio @@ -842,11 +735,11 @@ _Integer fold-change per axis relative to level 0._ +
-URI: [zv:bin_ratio](https://w3id.org/zarr-vectors/schema/0.5/bin_ratio) -Alias: bin_ratio +URI: [zv:bin_ratio](https://w3id.org/zarr-vectors/schema/0.5/bin_ratio) @@ -920,19 +813,21 @@ name: bin_ratio description: Integer fold-change per axis relative to level 0. from_schema: https://w3id.org/zarr-vectors/schema/0.5 rank: 1000 -alias: bin_ratio domain_of: - LevelMetadata range: integer multivalued: true ``` - +
--- - +--- +search: + boost: 5.0 +--- # Slot: bin_shape @@ -943,11 +838,11 @@ __ +
-URI: [zv:bin_shape](https://w3id.org/zarr-vectors/schema/0.5/bin_shape) -Alias: bin_shape +URI: [zv:bin_shape](https://w3id.org/zarr-vectors/schema/0.5/bin_shape) @@ -1024,19 +919,21 @@ description: 'Per-axis supervoxel edge lengths at this level. Must be ``None`` ' from_schema: https://w3id.org/zarr-vectors/schema/0.5 rank: 1000 -alias: bin_shape domain_of: - LevelMetadata range: float multivalued: true ``` - +
--- -# Type: Boolean +--- +search: + boost: 1.0 +---# Type: Boolean @@ -1045,6 +942,8 @@ _A binary (true or false) value_ +
+ URI: [xsd:boolean](http://www.w3.org/2001/XMLSchema#boolean) ## Type Properties @@ -1091,10 +990,15 @@ URI: [xsd:boolean](http://www.w3.org/2001/XMLSchema#boolean) +
---- +--- +--- +search: + boost: 10.0 +--- # Class: BoundingBox @@ -1105,6 +1009,8 @@ __ +
+ URI: [zv:BoundingBox](https://w3id.org/zarr-vectors/schema/0.5/BoundingBox) @@ -1222,7 +1128,6 @@ attributes: description: Per-axis minima. Length must equal ``len(spatial_index_dims)``. from_schema: https://w3id.org/zarr-vectors/schema/0.5 rank: 1000 - alias: min_corner owner: BoundingBox domain_of: - BoundingBox @@ -1234,7 +1139,6 @@ attributes: description: Per-axis maxima. Length must equal ``len(spatial_index_dims)``. from_schema: https://w3id.org/zarr-vectors/schema/0.5 rank: 1000 - alias: max_corner owner: BoundingBox domain_of: - BoundingBox @@ -1243,12 +1147,15 @@ attributes: multivalued: true ``` - +
--- - +--- +search: + boost: 5.0 +--- # Slot: bounds @@ -1257,11 +1164,11 @@ _Global vertex bounding box._ +
-URI: [zv:bounds](https://w3id.org/zarr-vectors/schema/0.5/bounds) -Alias: bounds +URI: [zv:bounds](https://w3id.org/zarr-vectors/schema/0.5/bounds) @@ -1335,19 +1242,21 @@ name: bounds description: Global vertex bounding box. from_schema: https://w3id.org/zarr-vectors/schema/0.5 rank: 1000 -alias: bounds domain_of: - RootMetadata range: BoundingBox required: true ``` - +
--- - +--- +search: + boost: 5.0 +--- # Slot: channel_names @@ -1356,11 +1265,11 @@ _For multi-channel per-vertex attributes, the channel labels._ +
-URI: [zv:channel_names](https://w3id.org/zarr-vectors/schema/0.5/channel_names) -Alias: channel_names +URI: [zv:channel_names](https://w3id.org/zarr-vectors/schema/0.5/channel_names) @@ -1434,19 +1343,21 @@ name: channel_names description: For multi-channel per-vertex attributes, the channel labels. from_schema: https://w3id.org/zarr-vectors/schema/0.5 rank: 1000 -alias: channel_names domain_of: - AttributeMeta range: string multivalued: true ``` - +
--- - +--- +search: + boost: 5.0 +--- # Slot: chunk_attribute_name @@ -1455,11 +1366,11 @@ _Name of the per-vertex attribute used as the leading chunk axis._ +
-URI: [zv:chunk_attribute_name](https://w3id.org/zarr-vectors/schema/0.5/chunk_attribute_name) -Alias: chunk_attribute_name +URI: [zv:chunk_attribute_name](https://w3id.org/zarr-vectors/schema/0.5/chunk_attribute_name) @@ -1532,18 +1443,20 @@ name: chunk_attribute_name description: Name of the per-vertex attribute used as the leading chunk axis. from_schema: https://w3id.org/zarr-vectors/schema/0.5 rank: 1000 -alias: chunk_attribute_name domain_of: - LevelMetadata range: string ``` - +
--- - +--- +search: + boost: 5.0 +--- # Slot: chunk_attribute_values @@ -1554,11 +1467,11 @@ __ +
-URI: [zv:chunk_attribute_values](https://w3id.org/zarr-vectors/schema/0.5/chunk_attribute_values) -Alias: chunk_attribute_values +URI: [zv:chunk_attribute_values](https://w3id.org/zarr-vectors/schema/0.5/chunk_attribute_values) @@ -1636,19 +1549,21 @@ description: 'Ordered list mapping attribute-bin index to original attribute val ' from_schema: https://w3id.org/zarr-vectors/schema/0.5 rank: 1000 -alias: chunk_attribute_values domain_of: - LevelMetadata range: string multivalued: true ``` - +
--- - +--- +search: + boost: 5.0 +--- # Slot: chunk_dims @@ -1659,11 +1574,11 @@ __ +
-URI: [zv:chunk_dims](https://w3id.org/zarr-vectors/schema/0.5/chunk_dims) -Alias: chunk_dims +URI: [zv:chunk_dims](https://w3id.org/zarr-vectors/schema/0.5/chunk_dims) @@ -1740,19 +1655,21 @@ description: 'Chunk-key axis names; the leading axis names appear first. Set wh ' from_schema: https://w3id.org/zarr-vectors/schema/0.5 rank: 1000 -alias: chunk_dims domain_of: - LevelMetadata range: string multivalued: true ``` - +
--- - +--- +search: + boost: 5.0 +--- # Slot: chunk_shape @@ -1761,11 +1678,11 @@ _Physical spatial chunk size per axis (all values > 0)._ +
-URI: [zv:chunk_shape](https://w3id.org/zarr-vectors/schema/0.5/chunk_shape) -Alias: chunk_shape +URI: [zv:chunk_shape](https://w3id.org/zarr-vectors/schema/0.5/chunk_shape) @@ -1840,7 +1757,6 @@ name: chunk_shape description: Physical spatial chunk size per axis (all values > 0). from_schema: https://w3id.org/zarr-vectors/schema/0.5 rank: 1000 -alias: chunk_shape domain_of: - RootMetadata range: float @@ -1848,12 +1764,15 @@ required: true multivalued: true ``` - +
--- - +--- +search: + boost: 5.0 +--- # Slot: coarsening_method @@ -1862,11 +1781,11 @@ _How this level was generated (e.g. "grid_metanode")._ +
-URI: [zv:coarsening_method](https://w3id.org/zarr-vectors/schema/0.5/coarsening_method) -Alias: coarsening_method +URI: [zv:coarsening_method](https://w3id.org/zarr-vectors/schema/0.5/coarsening_method) @@ -1939,26 +1858,28 @@ name: coarsening_method description: How this level was generated (e.g. "grid_metanode"). from_schema: https://w3id.org/zarr-vectors/schema/0.5 rank: 1000 -alias: coarsening_method domain_of: - LevelMetadata range: string ``` - +
--- - +--- +search: + boost: 5.0 +--- # Slot: cross_chunk_strategy +
-URI: [zv:cross_chunk_strategy](https://w3id.org/zarr-vectors/schema/0.5/cross_chunk_strategy) -Alias: cross_chunk_strategy +URI: [zv:cross_chunk_strategy](https://w3id.org/zarr-vectors/schema/0.5/cross_chunk_strategy) @@ -2030,18 +1951,20 @@ Alias: cross_chunk_strategy name: cross_chunk_strategy from_schema: https://w3id.org/zarr-vectors/schema/0.5 rank: 1000 -alias: cross_chunk_strategy domain_of: - RootMetadata range: CrossChunkStrategy ``` - +
--- - +--- +search: + boost: 5.0 +--- # Slot: cross_level_depth @@ -2052,11 +1975,11 @@ __ +
-URI: [zv:cross_level_depth](https://w3id.org/zarr-vectors/schema/0.5/cross_level_depth) -Alias: cross_level_depth +URI: [zv:cross_level_depth](https://w3id.org/zarr-vectors/schema/0.5/cross_level_depth) @@ -2141,19 +2064,21 @@ description: 'Maximum absolute level delta for which cross-pyramid-level link ar ' from_schema: https://w3id.org/zarr-vectors/schema/0.5 rank: 1000 -alias: cross_level_depth domain_of: - RootMetadata range: integer minimum_value: -1 ``` - +
--- - +--- +search: + boost: 5.0 +--- # Slot: cross_level_storage @@ -2164,11 +2089,11 @@ __ +
-URI: [zv:cross_level_storage](https://w3id.org/zarr-vectors/schema/0.5/cross_level_storage) -Alias: cross_level_storage +URI: [zv:cross_level_storage](https://w3id.org/zarr-vectors/schema/0.5/cross_level_storage) @@ -2245,29 +2170,35 @@ description: 'Whether cross-level link arrays are written in both directions (`` ' from_schema: https://w3id.org/zarr-vectors/schema/0.5 rank: 1000 -alias: cross_level_storage domain_of: - RootMetadata range: CrossLevelStorage ``` - +
+ +--- --- +search: + boost: 10.0 +--- +# Class: CrossChunkLinkAttributeMeta -# Class: CrossChunkFacesMeta +_``.zattrs`` for each ``cross_chunk_link_attributes///`` array. Stored as a flat blob parallel to the ``data`` blob of the matching ``cross_chunk_links//`` array; ``num_links`` MUST equal the parallel CCL array's ``num_links``._ +__ -_``.zattrs`` for ``cross_chunk_faces/`` (0.3 capability ``cross_chunk_faces``)._ +
-URI: [zv:CrossChunkFacesMeta](https://w3id.org/zarr-vectors/schema/0.5/CrossChunkFacesMeta) +URI: [zv:CrossChunkLinkAttributeMeta](https://w3id.org/zarr-vectors/schema/0.5/CrossChunkLinkAttributeMeta) @@ -2275,21 +2206,23 @@ URI: [zv:CrossChunkFacesMeta](https://w3id.org/zarr-vectors/schema/0.5/CrossChun ```mermaid classDiagram - class CrossChunkFacesMeta - click CrossChunkFacesMeta href "../CrossChunkFacesMeta/" - CrossChunkFacesMeta : num_faces + class CrossChunkLinkAttributeMeta + click CrossChunkLinkAttributeMeta href "../CrossChunkLinkAttributeMeta/" + CrossChunkLinkAttributeMeta : dtype + + CrossChunkLinkAttributeMeta : level_delta - CrossChunkFacesMeta : record_size + CrossChunkLinkAttributeMeta : name - CrossChunkFacesMeta : sid_ndim + CrossChunkLinkAttributeMeta : num_links - CrossChunkFacesMeta : zv_array + CrossChunkLinkAttributeMeta : zv_array - CrossChunkFacesMeta --> "1" ZvArrayTag : zv_array + CrossChunkLinkAttributeMeta --> "1" ZvArrayTag : zv_array click ZvArrayTag href "../ZvArrayTag/" @@ -2307,9 +2240,10 @@ URI: [zv:CrossChunkFacesMeta](https://w3id.org/zarr-vectors/schema/0.5/CrossChun | Name | Cardinality and Range | Description | Inheritance | | --- | --- | --- | --- | | [zv_array](zv_array.md) | 1
[ZvArrayTag](ZvArrayTag.md) | Discriminator slot identifying the kind of per-array `` | direct | -| [num_faces](num_faces.md) | 1
[Integer](Integer.md) | Total cross-chunk face count | direct | -| [sid_ndim](sid_ndim.md) | 1
[Integer](Integer.md) | Number of spatial-index dimensions encoded in chunk keys | direct | -| [record_size](record_size.md) | 1
[Integer](Integer.md) | Per-face record width in ``cross_chunk_faces/data`` (sid_ndim + 2 int64s) | direct | +| [name](name.md) | 1
[String](String.md) | NGFF axis or attribute name (e | direct | +| [dtype](dtype.md) | 1
[String](String.md) | Numpy dtype string of the array's value type (e | direct | +| [level_delta](level_delta.md) | 1
[Integer](Integer.md) | Pyramid-level delta between the source side (the level that owns this array) ... | direct | +| [num_links](num_links.md) | 1
[Integer](Integer.md) | Total cross-chunk link count | direct | @@ -2343,8 +2277,8 @@ URI: [zv:CrossChunkFacesMeta](https://w3id.org/zarr-vectors/schema/0.5/CrossChun | Mapping Type | Mapped Value | | --- | --- | -| self | zv:CrossChunkFacesMeta | -| native | zv:CrossChunkFacesMeta | +| self | zv:CrossChunkLinkAttributeMeta | +| native | zv:CrossChunkLinkAttributeMeta | @@ -2357,20 +2291,25 @@ URI: [zv:CrossChunkFacesMeta](https://w3id.org/zarr-vectors/schema/0.5/CrossChun
```yaml -name: CrossChunkFacesMeta -description: '``.zattrs`` for ``cross_chunk_faces/`` (0.3 capability ``cross_chunk_faces``).' +name: CrossChunkLinkAttributeMeta +description: '``.zattrs`` for each ``cross_chunk_link_attributes///`` + array. Stored as a flat blob parallel to the ``data`` blob of the matching ``cross_chunk_links//`` + array; ``num_links`` MUST equal the parallel CCL array''s ``num_links``. + + ' from_schema: https://w3id.org/zarr-vectors/schema/0.5 rank: 1000 slots: - zv_array -- num_faces -- sid_ndim -- record_size +- name +- dtype +- level_delta +- num_links slot_usage: zv_array: name: zv_array required: true - equals_string: cross_chunk_faces + equals_string: cross_chunk_link_attribute ```
@@ -2379,15 +2318,19 @@ slot_usage:
```yaml -name: CrossChunkFacesMeta -description: '``.zattrs`` for ``cross_chunk_faces/`` (0.3 capability ``cross_chunk_faces``).' +name: CrossChunkLinkAttributeMeta +description: '``.zattrs`` for each ``cross_chunk_link_attributes///`` + array. Stored as a flat blob parallel to the ``data`` blob of the matching ``cross_chunk_links//`` + array; ``num_links`` MUST equal the parallel CCL array''s ``num_links``. + + ' from_schema: https://w3id.org/zarr-vectors/schema/0.5 rank: 1000 slot_usage: zv_array: name: zv_array required: true - equals_string: cross_chunk_faces + equals_string: cross_chunk_link_attribute attributes: zv_array: name: zv_array @@ -2398,273 +2341,42 @@ attributes: ' from_schema: https://w3id.org/zarr-vectors/schema/0.5 rank: 1000 - alias: zv_array - owner: CrossChunkFacesMeta + owner: CrossChunkLinkAttributeMeta domain_of: - VerticesMeta - LinksMeta - AttributeMeta - ObjectIndexMeta - - ObjectIndexPendingMeta - ObjectAttributeMeta - GroupingsMeta - GroupingsAttributeMeta - CrossChunkLinksMeta - - CrossChunkFacesMeta - LinkAttributeMeta - CrossChunkLinkAttributeMeta - - MetanodeChildrenMeta range: ZvArrayTag required: true - equals_string: cross_chunk_faces - num_faces: - name: num_faces - description: Total cross-chunk face count. + equals_string: cross_chunk_link_attribute + name: + name: name + description: NGFF axis or attribute name (e.g. "x", "intensity"). from_schema: https://w3id.org/zarr-vectors/schema/0.5 rank: 1000 - alias: num_faces - owner: CrossChunkFacesMeta + slot_uri: schema:name + owner: CrossChunkLinkAttributeMeta domain_of: - - CrossChunkFacesMeta - range: integer - required: true - minimum_value: 0 - sid_ndim: - name: sid_ndim - description: Number of spatial-index dimensions encoded in chunk keys. - from_schema: https://w3id.org/zarr-vectors/schema/0.5 - rank: 1000 - alias: sid_ndim - owner: CrossChunkFacesMeta - domain_of: - - ObjectIndexMeta - - ObjectIndexPendingMeta - - CrossChunkLinksMeta - - CrossChunkFacesMeta - - MetanodeChildrenMeta - range: integer - required: true - minimum_value: 1 - record_size: - name: record_size - description: 'Per-face record width in ``cross_chunk_faces/data`` (sid_ndim + - 2 int64s). - - ' - from_schema: https://w3id.org/zarr-vectors/schema/0.5 - rank: 1000 - alias: record_size - owner: CrossChunkFacesMeta - domain_of: - - CrossChunkFacesMeta - range: integer - required: true - minimum_value: 2 - -``` -
- - ---- - - - -# Class: CrossChunkLinkAttributeMeta - - -_``.zattrs`` for each ``cross_chunk_link_attributes///`` array. Stored as a flat blob parallel to the ``data`` blob of the matching ``cross_chunk_links//`` array; ``num_links`` MUST equal the parallel CCL array's ``num_links``._ - -__ - - - - - -URI: [zv:CrossChunkLinkAttributeMeta](https://w3id.org/zarr-vectors/schema/0.5/CrossChunkLinkAttributeMeta) - - - - - -```mermaid - classDiagram - class CrossChunkLinkAttributeMeta - click CrossChunkLinkAttributeMeta href "../CrossChunkLinkAttributeMeta/" - CrossChunkLinkAttributeMeta : dtype - - CrossChunkLinkAttributeMeta : level_delta - - CrossChunkLinkAttributeMeta : name - - CrossChunkLinkAttributeMeta : num_links - - CrossChunkLinkAttributeMeta : zv_array - - - - - - CrossChunkLinkAttributeMeta --> "1" ZvArrayTag : zv_array - click ZvArrayTag href "../ZvArrayTag/" - - - - -``` - - - - - - -## Slots - -| Name | Cardinality and Range | Description | Inheritance | -| --- | --- | --- | --- | -| [zv_array](zv_array.md) | 1
[ZvArrayTag](ZvArrayTag.md) | Discriminator slot identifying the kind of per-array `` | direct | -| [name](name.md) | 1
[String](String.md) | NGFF axis or attribute name (e | direct | -| [dtype](dtype.md) | 1
[String](String.md) | Numpy dtype string of the array's value type (e | direct | -| [level_delta](level_delta.md) | 1
[Integer](Integer.md) | Pyramid-level delta between the source side (the level that owns this array) ... | direct | -| [num_links](num_links.md) | 1
[Integer](Integer.md) | Total cross-chunk link count | direct | - - - - - - - - - - - - - - - -## Identifier and Mapping Information - - - - - -### Schema Source - - -* from schema: https://w3id.org/zarr-vectors/schema/0.5 - - - - -## Mappings - -| Mapping Type | Mapped Value | -| --- | --- | -| self | zv:CrossChunkLinkAttributeMeta | -| native | zv:CrossChunkLinkAttributeMeta | - - - - - - -## LinkML Source - -### Direct - -
-```yaml -name: CrossChunkLinkAttributeMeta -description: '``.zattrs`` for each ``cross_chunk_link_attributes///`` - array. Stored as a flat blob parallel to the ``data`` blob of the matching ``cross_chunk_links//`` - array; ``num_links`` MUST equal the parallel CCL array''s ``num_links``. - - ' -from_schema: https://w3id.org/zarr-vectors/schema/0.5 -rank: 1000 -slots: -- zv_array -- name -- dtype -- level_delta -- num_links -slot_usage: - zv_array: - name: zv_array - required: true - equals_string: cross_chunk_link_attribute - -``` -
- -### Induced - -
-```yaml -name: CrossChunkLinkAttributeMeta -description: '``.zattrs`` for each ``cross_chunk_link_attributes///`` - array. Stored as a flat blob parallel to the ``data`` blob of the matching ``cross_chunk_links//`` - array; ``num_links`` MUST equal the parallel CCL array''s ``num_links``. - - ' -from_schema: https://w3id.org/zarr-vectors/schema/0.5 -rank: 1000 -slot_usage: - zv_array: - name: zv_array - required: true - equals_string: cross_chunk_link_attribute -attributes: - zv_array: - name: zv_array - description: 'Discriminator slot identifying the kind of per-array ``.zattrs`` - block. Each writer in ``core/arrays.py`` stamps the corresponding token from - :class:`ZvArrayTag`. - - ' - from_schema: https://w3id.org/zarr-vectors/schema/0.5 - rank: 1000 - alias: zv_array - owner: CrossChunkLinkAttributeMeta - domain_of: - - VerticesMeta - - LinksMeta - - AttributeMeta - - ObjectIndexMeta - - ObjectIndexPendingMeta - - ObjectAttributeMeta - - GroupingsMeta - - GroupingsAttributeMeta - - CrossChunkLinksMeta - - CrossChunkFacesMeta - - LinkAttributeMeta - - CrossChunkLinkAttributeMeta - - MetanodeChildrenMeta - range: ZvArrayTag - required: true - equals_string: cross_chunk_link_attribute - name: - name: name - description: NGFF axis or attribute name (e.g. "x", "intensity"). - from_schema: https://w3id.org/zarr-vectors/schema/0.5 - rank: 1000 - slot_uri: schema:name - alias: name - owner: CrossChunkLinkAttributeMeta - domain_of: - - Axis - - AttributeMeta - - ObjectAttributeMeta - - GroupingsAttributeMeta - - LinkAttributeMeta - - CrossChunkLinkAttributeMeta - range: string + - Axis + - AttributeMeta + - ObjectAttributeMeta + - GroupingsAttributeMeta + - LinkAttributeMeta + - CrossChunkLinkAttributeMeta + range: string required: true dtype: name: dtype description: Numpy dtype string of the array's value type (e.g. "float32"). from_schema: https://w3id.org/zarr-vectors/schema/0.5 rank: 1000 - alias: dtype owner: CrossChunkLinkAttributeMeta domain_of: - VerticesMeta @@ -2686,7 +2398,6 @@ attributes: ' from_schema: https://w3id.org/zarr-vectors/schema/0.5 rank: 1000 - alias: level_delta owner: CrossChunkLinkAttributeMeta domain_of: - LinksMeta @@ -2700,7 +2411,6 @@ attributes: description: Total cross-chunk link count. from_schema: https://w3id.org/zarr-vectors/schema/0.5 rank: 1000 - alias: num_links owner: CrossChunkLinkAttributeMeta domain_of: - CrossChunkLinksMeta @@ -2710,22 +2420,27 @@ attributes: minimum_value: 0 ``` -
+
--- - +--- +search: + boost: 10.0 +--- # Class: CrossChunkLinksMeta -_``.zattrs`` for a ``cross_chunk_links//`` array. Under the 0.4 multiscale layout, each delta segment carries its own meta block; ``level_delta=0`` is the intra-level array. Source-side endpoints live at the array's own resolution level; target-side endpoints live at ``this_level + level_delta``._ +_``.zattrs`` for a ``cross_chunk_links//`` array. Each delta segment carries its own meta block; ``level_delta=0`` is the intra-level array. Each record is ``link_width`` ``(chunk_coords, vertex_idx)`` endpoints — ``link_width=2`` encodes a cross-chunk edge, ``link_width=3`` a triangle face record, and so on. Source-side endpoint (endpoint 0) lives at the array's own resolution level; target-side endpoints live at ``this_level + level_delta``._ __ +
+ URI: [zv:CrossChunkLinksMeta](https://w3id.org/zarr-vectors/schema/0.5/CrossChunkLinksMeta) @@ -2740,6 +2455,8 @@ URI: [zv:CrossChunkLinksMeta](https://w3id.org/zarr-vectors/schema/0.5/CrossChun click CrossChunkLinksMeta href "../CrossChunkLinksMeta/" CrossChunkLinksMeta : level_delta + CrossChunkLinksMeta : link_width + CrossChunkLinksMeta : num_links CrossChunkLinksMeta : sid_ndim @@ -2771,6 +2488,7 @@ URI: [zv:CrossChunkLinksMeta](https://w3id.org/zarr-vectors/schema/0.5/CrossChun | [num_links](num_links.md) | 1
[Integer](Integer.md) | Total cross-chunk link count | direct | | [sid_ndim](sid_ndim.md) | 1
[Integer](Integer.md) | Number of spatial-index dimensions encoded in chunk keys | direct | | [level_delta](level_delta.md) | 1
[Integer](Integer.md) | Pyramid-level delta between the source side (the level that owns this array) ... | direct | +| [link_width](link_width.md) | 1
[Integer](Integer.md) | Width of a links row (1 for parent→child metanode references, 2 for edges, 3 ... | direct | @@ -2819,10 +2537,12 @@ URI: [zv:CrossChunkLinksMeta](https://w3id.org/zarr-vectors/schema/0.5/CrossChun
```yaml name: CrossChunkLinksMeta -description: '``.zattrs`` for a ``cross_chunk_links//`` array. Under the 0.4 - multiscale layout, each delta segment carries its own meta block; ``level_delta=0`` - is the intra-level array. Source-side endpoints live at the array''s own resolution - level; target-side endpoints live at ``this_level + level_delta``. +description: '``.zattrs`` for a ``cross_chunk_links//`` array. Each delta + segment carries its own meta block; ``level_delta=0`` is the intra-level array. Each + record is ``link_width`` ``(chunk_coords, vertex_idx)`` endpoints — ``link_width=2`` + encodes a cross-chunk edge, ``link_width=3`` a triangle face record, and so on. Source-side + endpoint (endpoint 0) lives at the array''s own resolution level; target-side endpoints + live at ``this_level + level_delta``. ' from_schema: https://w3id.org/zarr-vectors/schema/0.5 @@ -2832,6 +2552,7 @@ slots: - num_links - sid_ndim - level_delta +- link_width slot_usage: zv_array: name: zv_array @@ -2846,10 +2567,12 @@ slot_usage:
```yaml name: CrossChunkLinksMeta -description: '``.zattrs`` for a ``cross_chunk_links//`` array. Under the 0.4 - multiscale layout, each delta segment carries its own meta block; ``level_delta=0`` - is the intra-level array. Source-side endpoints live at the array''s own resolution - level; target-side endpoints live at ``this_level + level_delta``. +description: '``.zattrs`` for a ``cross_chunk_links//`` array. Each delta + segment carries its own meta block; ``level_delta=0`` is the intra-level array. Each + record is ``link_width`` ``(chunk_coords, vertex_idx)`` endpoints — ``link_width=2`` + encodes a cross-chunk edge, ``link_width=3`` a triangle face record, and so on. Source-side + endpoint (endpoint 0) lives at the array''s own resolution level; target-side endpoints + live at ``this_level + level_delta``. ' from_schema: https://w3id.org/zarr-vectors/schema/0.5 @@ -2869,22 +2592,18 @@ attributes: ' from_schema: https://w3id.org/zarr-vectors/schema/0.5 rank: 1000 - alias: zv_array owner: CrossChunkLinksMeta domain_of: - VerticesMeta - LinksMeta - AttributeMeta - ObjectIndexMeta - - ObjectIndexPendingMeta - ObjectAttributeMeta - GroupingsMeta - GroupingsAttributeMeta - CrossChunkLinksMeta - - CrossChunkFacesMeta - LinkAttributeMeta - CrossChunkLinkAttributeMeta - - MetanodeChildrenMeta range: ZvArrayTag required: true equals_string: cross_chunk_links @@ -2893,7 +2612,6 @@ attributes: description: Total cross-chunk link count. from_schema: https://w3id.org/zarr-vectors/schema/0.5 rank: 1000 - alias: num_links owner: CrossChunkLinksMeta domain_of: - CrossChunkLinksMeta @@ -2906,14 +2624,10 @@ attributes: description: Number of spatial-index dimensions encoded in chunk keys. from_schema: https://w3id.org/zarr-vectors/schema/0.5 rank: 1000 - alias: sid_ndim owner: CrossChunkLinksMeta domain_of: - ObjectIndexMeta - - ObjectIndexPendingMeta - CrossChunkLinksMeta - - CrossChunkFacesMeta - - MetanodeChildrenMeta range: integer required: true minimum_value: 1 @@ -2927,7 +2641,6 @@ attributes: ' from_schema: https://w3id.org/zarr-vectors/schema/0.5 rank: 1000 - alias: level_delta owner: CrossChunkLinksMeta domain_of: - LinksMeta @@ -2936,12 +2649,33 @@ attributes: - CrossChunkLinkAttributeMeta range: integer required: true + link_width: + name: link_width + description: 'Width of a links row (1 for parent→child metanode references, 2 + for edges, 3 for triangle faces, 4 for quads). + + ' + from_schema: https://w3id.org/zarr-vectors/schema/0.5 + rank: 1000 + owner: CrossChunkLinksMeta + domain_of: + - LinksMeta + - CrossChunkLinksMeta + range: integer + required: true + minimum_value: 1 ``` -
+
+ +--- --- +search: + boost: 2.0 +--- + # Enum: CrossChunkStrategy @@ -2952,6 +2686,8 @@ _How connectivity that crosses chunk boundaries is represented._ +
+ URI: [zv:CrossChunkStrategy](https://w3id.org/zarr-vectors/schema/0.5/CrossChunkStrategy) ## Permissible Values @@ -3017,9 +2753,17 @@ permissible_values: ``` +
+ +--- + +--- +search: + boost: 2.0 --- + # Enum: CrossLevelStorage @@ -3031,6 +2775,8 @@ __ +
+ URI: [zv:CrossLevelStorage](https://w3id.org/zarr-vectors/schema/0.5/CrossLevelStorage) ## Permissible Values @@ -3106,10 +2852,15 @@ permissible_values: ``` +
---- +--- +--- +search: + boost: 5.0 +--- # Slot: crs @@ -3120,11 +2871,11 @@ __ +
-URI: [schema:coordinateReferenceSystem](http://schema.org/coordinateReferenceSystem) -Alias: crs +URI: [schema:coordinateReferenceSystem](http://schema.org/coordinateReferenceSystem) @@ -3202,19 +2953,21 @@ description: 'Optional coordinate reference system metadata (free-form dict matc from_schema: https://w3id.org/zarr-vectors/schema/0.5 rank: 1000 slot_uri: schema:coordinateReferenceSystem -alias: crs domain_of: - RootMetadata range: CRS inlined: true ``` - +
--- -# Type: Curie +--- +search: + boost: 1.0 +---# Type: Curie @@ -3223,6 +2976,8 @@ _a compact URI_ +
+ URI: [xsd:string](http://www.w3.org/2001/XMLSchema#string) ## Type Properties @@ -3273,10 +3028,15 @@ URI: [xsd:string](http://www.w3.org/2001/XMLSchema#string) +
+ --- -# Type: Date +--- +search: + boost: 1.0 +---# Type: Date @@ -3285,6 +3045,8 @@ _a date (year, month and day) in an idealized calendar_ +
+ URI: [xsd:date](http://www.w3.org/2001/XMLSchema#date) ## Type Properties @@ -3332,10 +3094,15 @@ URI: [xsd:date](http://www.w3.org/2001/XMLSchema#date) +
+ --- -# Type: DateOrDatetime +--- +search: + boost: 1.0 +---# Type: DateOrDatetime @@ -3344,6 +3111,8 @@ _Either a date or a datetime_ +
+ URI: [linkml:DateOrDatetime](https://w3id.org/linkml/DateOrDatetime) ## Type Properties @@ -3389,10 +3158,15 @@ URI: [linkml:DateOrDatetime](https://w3id.org/linkml/DateOrDatetime) +
+ --- -# Type: Datetime +--- +search: + boost: 1.0 +---# Type: Datetime @@ -3401,6 +3175,8 @@ _The combination of a date and time_ +
+ URI: [xsd:dateTime](http://www.w3.org/2001/XMLSchema#dateTime) ## Type Properties @@ -3447,10 +3223,15 @@ URI: [xsd:dateTime](http://www.w3.org/2001/XMLSchema#dateTime) +
+ --- -# Type: Decimal +--- +search: + boost: 1.0 +---# Type: Decimal @@ -3459,6 +3240,8 @@ _A real number with arbitrary precision that conforms to the xsd:decimal specifi +
+ URI: [xsd:decimal](http://www.w3.org/2001/XMLSchema#decimal) ## Type Properties @@ -3504,10 +3287,15 @@ URI: [xsd:decimal](http://www.w3.org/2001/XMLSchema#decimal) +
+ --- -# Type: Double +--- +search: + boost: 1.0 +---# Type: Double @@ -3516,6 +3304,8 @@ _A real number that conforms to the xsd:double specification_ +
+ URI: [xsd:double](http://www.w3.org/2001/XMLSchema#double) ## Type Properties @@ -3561,10 +3351,15 @@ URI: [xsd:double](http://www.w3.org/2001/XMLSchema#double) +
---- +--- +--- +search: + boost: 5.0 +--- # Slot: dtype @@ -3573,11 +3368,11 @@ _Numpy dtype string of the array's value type (e.g. "float32")._ +
-URI: [zv:dtype](https://w3id.org/zarr-vectors/schema/0.5/dtype) -Alias: dtype +URI: [zv:dtype](https://w3id.org/zarr-vectors/schema/0.5/dtype) @@ -3588,13 +3383,13 @@ Alias: dtype | Name | Description | Modifies Slot | | --- | --- | --- | -| [LinkAttributeMeta](LinkAttributeMeta.md) | `` | no | +| [VerticesMeta](VerticesMeta.md) | `` | no | | [LinksMeta](LinksMeta.md) | `` | no | | [AttributeMeta](AttributeMeta.md) | `` | no | -| [CrossChunkLinkAttributeMeta](CrossChunkLinkAttributeMeta.md) | `` | no | | [ObjectAttributeMeta](ObjectAttributeMeta.md) | `` | no | | [GroupingsAttributeMeta](GroupingsAttributeMeta.md) | `` | no | -| [VerticesMeta](VerticesMeta.md) | `` | no | +| [LinkAttributeMeta](LinkAttributeMeta.md) | `` | no | +| [CrossChunkLinkAttributeMeta](CrossChunkLinkAttributeMeta.md) | `` | no | @@ -3657,7 +3452,6 @@ name: dtype description: Numpy dtype string of the array's value type (e.g. "float32"). from_schema: https://w3id.org/zarr-vectors/schema/0.5 rank: 1000 -alias: dtype domain_of: - VerticesMeta - LinksMeta @@ -3670,10 +3464,16 @@ range: string required: true ``` - +
+ +--- --- +search: + boost: 2.0 +--- + # Enum: Encoding @@ -3684,6 +3484,8 @@ _Per-array encoding of vertex data._ +
+ URI: [zv:Encoding](https://w3id.org/zarr-vectors/schema/0.5/Encoding) ## Permissible Values @@ -3745,10 +3547,15 @@ permissible_values: ``` +
+ --- -# Type: Float +--- +search: + boost: 1.0 +---# Type: Float @@ -3757,6 +3564,8 @@ _A real number that conforms to the xsd:float specification_ +
+ URI: [xsd:float](http://www.w3.org/2001/XMLSchema#float) ## Type Properties @@ -3802,10 +3611,15 @@ URI: [xsd:float](http://www.w3.org/2001/XMLSchema#float) +
---- +--- +--- +search: + boost: 5.0 +--- # Slot: format_capabilities @@ -3814,11 +3628,11 @@ _Optional 0.3+ feature tokens advertised by this store._ +
-URI: [zv:format_capabilities](https://w3id.org/zarr-vectors/schema/0.5/format_capabilities) -Alias: format_capabilities +URI: [zv:format_capabilities](https://w3id.org/zarr-vectors/schema/0.5/format_capabilities) @@ -3892,37 +3706,41 @@ name: format_capabilities description: Optional 0.3+ feature tokens advertised by this store. from_schema: https://w3id.org/zarr-vectors/schema/0.5 rank: 1000 -alias: format_capabilities domain_of: - RootMetadata range: FormatCapability multivalued: true ``` - +
--- +--- +search: + boost: 2.0 +--- + + # Enum: FormatCapability -_Optional 0.3+ feature tokens a store advertises in :attr:`RootMetadata.format_capabilities`. Open-set: new tokens will be added in future spec revisions; readers must tolerate unknown values._ +_Optional feature tokens a store advertises in :attr:`RootMetadata.format_capabilities`. Open-set: new tokens will be added in future spec revisions; readers must tolerate unknown values._ __ +
+ URI: [zv:FormatCapability](https://w3id.org/zarr-vectors/schema/0.5/FormatCapability) ## Permissible Values | Value | Meaning | Description | | --- | --- | --- | -| cross_chunk_faces | None | Store has the ``cross_chunk_faces`` array for boundary faces | -| vertex_count_cache | None | Per-chunk ``vertex_counts/`` sidecars are present | -| object_index_pending | None | One or more uncompacted ``object_index/pending/`` sidecars exist | | preserved_object_ids | None | At least one level was written with ID-preserving sparsification (``LevelMeta... | | shared_vertex_groups | None | At least one level stores per-chunk vertex groups that may be referenced by m... | | multiscale_links | None | Store uses the 0 | @@ -3966,7 +3784,7 @@ URI: [zv:FormatCapability](https://w3id.org/zarr-vectors/schema/0.5/FormatCapabi
```yaml name: FormatCapability -description: 'Optional 0.3+ feature tokens a store advertises in :attr:`RootMetadata.format_capabilities`. Open-set: +description: 'Optional feature tokens a store advertises in :attr:`RootMetadata.format_capabilities`. Open-set: new tokens will be added in future spec revisions; readers must tolerate unknown values. @@ -3974,16 +3792,6 @@ description: 'Optional 0.3+ feature tokens a store advertises in :attr:`RootMeta from_schema: https://w3id.org/zarr-vectors/schema/0.5 rank: 1000 permissible_values: - cross_chunk_faces: - text: cross_chunk_faces - description: Store has the ``cross_chunk_faces`` array for boundary faces. - vertex_count_cache: - text: vertex_count_cache - description: Per-chunk ``vertex_counts/`` sidecars are present. - object_index_pending: - text: object_index_pending - description: One or more uncompacted ``object_index/pending/`` sidecars - exist. preserved_object_ids: text: preserved_object_ids description: 'At least one level was written with ID-preserving sparsification @@ -4009,10 +3817,15 @@ permissible_values: ```
+
---- +--- +--- +search: + boost: 5.0 +--- # Slot: geometry_types @@ -4021,11 +3834,11 @@ _One or more geometry kinds present in the store._ +
-URI: [zv:geometry_types](https://w3id.org/zarr-vectors/schema/0.5/geometry_types) -Alias: geometry_types +URI: [zv:geometry_types](https://w3id.org/zarr-vectors/schema/0.5/geometry_types) @@ -4101,7 +3914,6 @@ name: geometry_types description: One or more geometry kinds present in the store. from_schema: https://w3id.org/zarr-vectors/schema/0.5 rank: 1000 -alias: geometry_types domain_of: - RootMetadata range: GeometryType @@ -4110,11 +3922,17 @@ multivalued: true minimum_cardinality: 1 ``` - +
--- +--- +search: + boost: 2.0 +--- + + # Enum: GeometryType @@ -4124,6 +3942,8 @@ _The kind of geometry a store (or one of its sub-types) holds._ +
+ URI: [zv:GeometryType](https://w3id.org/zarr-vectors/schema/0.5/GeometryType) ## Permissible Values @@ -4198,10 +4018,15 @@ permissible_values: ``` +
---- +--- +--- +search: + boost: 10.0 +--- # Class: GroupingsAttributeMeta @@ -4210,6 +4035,8 @@ _``.zattrs`` for each ``groupings_attributes//`` array._ +
+ URI: [zv:GroupingsAttributeMeta](https://w3id.org/zarr-vectors/schema/0.5/GroupingsAttributeMeta) @@ -4343,22 +4170,18 @@ attributes: ' from_schema: https://w3id.org/zarr-vectors/schema/0.5 rank: 1000 - alias: zv_array owner: GroupingsAttributeMeta domain_of: - VerticesMeta - LinksMeta - AttributeMeta - ObjectIndexMeta - - ObjectIndexPendingMeta - ObjectAttributeMeta - GroupingsMeta - GroupingsAttributeMeta - CrossChunkLinksMeta - - CrossChunkFacesMeta - LinkAttributeMeta - CrossChunkLinkAttributeMeta - - MetanodeChildrenMeta range: ZvArrayTag required: true equals_string: groupings_attribute @@ -4368,7 +4191,6 @@ attributes: from_schema: https://w3id.org/zarr-vectors/schema/0.5 rank: 1000 slot_uri: schema:name - alias: name owner: GroupingsAttributeMeta domain_of: - Axis @@ -4384,7 +4206,6 @@ attributes: description: Numpy dtype string of the array's value type (e.g. "float32"). from_schema: https://w3id.org/zarr-vectors/schema/0.5 rank: 1000 - alias: dtype owner: GroupingsAttributeMeta domain_of: - VerticesMeta @@ -4401,7 +4222,6 @@ attributes: description: Shape of a dense per-object/per-group array. from_schema: https://w3id.org/zarr-vectors/schema/0.5 rank: 1000 - alias: shape owner: GroupingsAttributeMeta domain_of: - ObjectAttributeMeta @@ -4411,12 +4231,15 @@ attributes: multivalued: true ``` - +
--- - +--- +search: + boost: 10.0 +--- # Class: GroupingsMeta @@ -4425,6 +4248,8 @@ _``.zattrs`` for ``groupings/``._ +
+ URI: [zv:GroupingsMeta](https://w3id.org/zarr-vectors/schema/0.5/GroupingsMeta) @@ -4550,22 +4375,18 @@ attributes: ' from_schema: https://w3id.org/zarr-vectors/schema/0.5 rank: 1000 - alias: zv_array owner: GroupingsMeta domain_of: - VerticesMeta - LinksMeta - AttributeMeta - ObjectIndexMeta - - ObjectIndexPendingMeta - ObjectAttributeMeta - GroupingsMeta - GroupingsAttributeMeta - CrossChunkLinksMeta - - CrossChunkFacesMeta - LinkAttributeMeta - CrossChunkLinkAttributeMeta - - MetanodeChildrenMeta range: ZvArrayTag required: true equals_string: groupings @@ -4574,7 +4395,6 @@ attributes: description: Total grouping count. from_schema: https://w3id.org/zarr-vectors/schema/0.5 rank: 1000 - alias: num_groups owner: GroupingsMeta domain_of: - GroupingsMeta @@ -4583,12 +4403,15 @@ attributes: minimum_value: 0 ``` - +
--- - +--- +search: + boost: 5.0 +--- # Slot: inherited_num_objects @@ -4599,11 +4422,11 @@ __ +
-URI: [zv:inherited_num_objects](https://w3id.org/zarr-vectors/schema/0.5/inherited_num_objects) -Alias: inherited_num_objects +URI: [zv:inherited_num_objects](https://w3id.org/zarr-vectors/schema/0.5/inherited_num_objects) @@ -4686,19 +4509,21 @@ description: 'OID-space size inherited from the parent level (= ``parent_level.n ' from_schema: https://w3id.org/zarr-vectors/schema/0.5 rank: 1000 -alias: inherited_num_objects domain_of: - LevelMetadata range: integer minimum_value: 0 ``` - +
--- -# Type: Integer +--- +search: + boost: 1.0 +---# Type: Integer @@ -4707,6 +4532,8 @@ _An integer_ +
+ URI: [xsd:integer](http://www.w3.org/2001/XMLSchema#integer) ## Type Properties @@ -4752,10 +4579,15 @@ URI: [xsd:integer](http://www.w3.org/2001/XMLSchema#integer) +
+ --- -# Type: Jsonpath +--- +search: + boost: 1.0 +---# Type: Jsonpath @@ -4764,6 +4596,8 @@ _A string encoding a JSON Path. The value of the string MUST conform to JSON Poi +
+ URI: [xsd:string](http://www.w3.org/2001/XMLSchema#string) ## Type Properties @@ -4809,10 +4643,15 @@ URI: [xsd:string](http://www.w3.org/2001/XMLSchema#string) +
+ --- -# Type: Jsonpointer +--- +search: + boost: 1.0 +---# Type: Jsonpointer @@ -4821,6 +4660,8 @@ _A string encoding a JSON Pointer. The value of the string MUST conform to JSON +
+ URI: [xsd:string](http://www.w3.org/2001/XMLSchema#string) ## Type Properties @@ -4866,10 +4707,15 @@ URI: [xsd:string](http://www.w3.org/2001/XMLSchema#string) +
---- +--- +--- +search: + boost: 5.0 +--- # Slot: level @@ -4878,11 +4724,11 @@ _Resolution level index (0 = full resolution)._ +
-URI: [zv:level](https://w3id.org/zarr-vectors/schema/0.5/level) -Alias: level +URI: [zv:level](https://w3id.org/zarr-vectors/schema/0.5/level) @@ -4963,7 +4809,6 @@ name: level description: Resolution level index (0 = full resolution). from_schema: https://w3id.org/zarr-vectors/schema/0.5 rank: 1000 -alias: level domain_of: - LevelMetadata range: integer @@ -4971,12 +4816,15 @@ required: true minimum_value: 0 ``` - +
--- - +--- +search: + boost: 5.0 +--- # Slot: level_delta @@ -4987,11 +4835,11 @@ __ +
-URI: [zv:level_delta](https://w3id.org/zarr-vectors/schema/0.5/level_delta) -Alias: level_delta +URI: [zv:level_delta](https://w3id.org/zarr-vectors/schema/0.5/level_delta) @@ -5003,9 +4851,9 @@ Alias: level_delta | Name | Description | Modifies Slot | | --- | --- | --- | | [LinksMeta](LinksMeta.md) | `` | no | +| [CrossChunkLinksMeta](CrossChunkLinksMeta.md) | `` | no | | [LinkAttributeMeta](LinkAttributeMeta.md) | `` | no | | [CrossChunkLinkAttributeMeta](CrossChunkLinkAttributeMeta.md) | `` | no | -| [CrossChunkLinksMeta](CrossChunkLinksMeta.md) | `` | no | @@ -5073,7 +4921,6 @@ description: 'Pyramid-level delta between the source side (the level that owns t ' from_schema: https://w3id.org/zarr-vectors/schema/0.5 rank: 1000 -alias: level_delta domain_of: - LinksMeta - CrossChunkLinksMeta @@ -5083,12 +4930,15 @@ range: integer required: true ``` - +
--- - +--- +search: + boost: 10.0 +--- # Class: LevelMetadata @@ -5099,6 +4949,8 @@ __ +
+ URI: [zv:LevelMetadata](https://w3id.org/zarr-vectors/schema/0.5/LevelMetadata) @@ -5255,7 +5107,6 @@ attributes: description: Resolution level index (0 = full resolution). from_schema: https://w3id.org/zarr-vectors/schema/0.5 rank: 1000 - alias: level owner: LevelMetadata domain_of: - LevelMetadata @@ -5267,7 +5118,6 @@ attributes: description: Total number of vertices at this level. from_schema: https://w3id.org/zarr-vectors/schema/0.5 rank: 1000 - alias: vertex_count owner: LevelMetadata domain_of: - LevelMetadata @@ -5279,7 +5129,6 @@ attributes: description: Names of arrays present in the level group. from_schema: https://w3id.org/zarr-vectors/schema/0.5 rank: 1000 - alias: arrays_present owner: LevelMetadata domain_of: - LevelMetadata @@ -5294,7 +5143,6 @@ attributes: ' from_schema: https://w3id.org/zarr-vectors/schema/0.5 rank: 1000 - alias: bin_shape owner: LevelMetadata domain_of: - LevelMetadata @@ -5305,7 +5153,6 @@ attributes: description: Integer fold-change per axis relative to level 0. from_schema: https://w3id.org/zarr-vectors/schema/0.5 rank: 1000 - alias: bin_ratio owner: LevelMetadata domain_of: - LevelMetadata @@ -5316,7 +5163,6 @@ attributes: description: Fraction of objects retained at this level. from_schema: https://w3id.org/zarr-vectors/schema/0.5 rank: 1000 - alias: object_sparsity owner: LevelMetadata domain_of: - LevelMetadata @@ -5328,7 +5174,6 @@ attributes: description: How this level was generated (e.g. "grid_metanode"). from_schema: https://w3id.org/zarr-vectors/schema/0.5 rank: 1000 - alias: coarsening_method owner: LevelMetadata domain_of: - LevelMetadata @@ -5338,7 +5183,6 @@ attributes: description: Source level index (None for level 0). from_schema: https://w3id.org/zarr-vectors/schema/0.5 rank: 1000 - alias: parent_level owner: LevelMetadata domain_of: - LevelMetadata @@ -5352,7 +5196,6 @@ attributes: ' from_schema: https://w3id.org/zarr-vectors/schema/0.5 rank: 1000 - alias: chunk_dims owner: LevelMetadata domain_of: - LevelMetadata @@ -5363,7 +5206,6 @@ attributes: description: Name of the per-vertex attribute used as the leading chunk axis. from_schema: https://w3id.org/zarr-vectors/schema/0.5 rank: 1000 - alias: chunk_attribute_name owner: LevelMetadata domain_of: - LevelMetadata @@ -5377,7 +5219,6 @@ attributes: ' from_schema: https://w3id.org/zarr-vectors/schema/0.5 rank: 1000 - alias: chunk_attribute_values owner: LevelMetadata domain_of: - LevelMetadata @@ -5393,7 +5234,6 @@ attributes: ' from_schema: https://w3id.org/zarr-vectors/schema/0.5 rank: 1000 - alias: preserves_object_ids owner: LevelMetadata domain_of: - LevelMetadata @@ -5406,7 +5246,6 @@ attributes: ' from_schema: https://w3id.org/zarr-vectors/schema/0.5 rank: 1000 - alias: inherited_num_objects owner: LevelMetadata domain_of: - LevelMetadata @@ -5421,32 +5260,36 @@ attributes: ' from_schema: https://w3id.org/zarr-vectors/schema/0.5 rank: 1000 - alias: shared_vertex_groups owner: LevelMetadata domain_of: - LevelMetadata range: boolean ``` - +
--- - +--- +search: + boost: 5.0 +--- # Slot: link_width -_Width of a links row (2 for edges, 3 or 4 for face rows)._ +_Width of a links row (1 for parent→child metanode references, 2 for edges, 3 for triangle faces, 4 for quads)._ + +__ +
-URI: [zv:link_width](https://w3id.org/zarr-vectors/schema/0.5/link_width) -Alias: link_width +URI: [zv:link_width](https://w3id.org/zarr-vectors/schema/0.5/link_width) @@ -5458,6 +5301,7 @@ Alias: link_width | Name | Description | Modifies Slot | | --- | --- | --- | | [LinksMeta](LinksMeta.md) | `` | no | +| [CrossChunkLinksMeta](CrossChunkLinksMeta.md) | `` | no | @@ -5471,7 +5315,7 @@ Alias: link_width | Property | Value | | --- | --- | | Range | [Integer](Integer.md) | -| Domain Of | [LinksMeta](LinksMeta.md) | +| Domain Of | [LinksMeta](LinksMeta.md), [CrossChunkLinksMeta](CrossChunkLinksMeta.md) | ### Cardinality and Requirements @@ -5482,7 +5326,7 @@ Alias: link_width | Property | Value | | --- | --- | -| Minimum Value | 2 | +| Minimum Value | 1 | @@ -5524,23 +5368,29 @@ Alias: link_width
```yaml name: link_width -description: Width of a links row (2 for edges, 3 or 4 for face rows). +description: 'Width of a links row (1 for parent→child metanode references, 2 for + edges, 3 for triangle faces, 4 for quads). + + ' from_schema: https://w3id.org/zarr-vectors/schema/0.5 rank: 1000 -alias: link_width domain_of: - LinksMeta +- CrossChunkLinksMeta range: integer required: true -minimum_value: 2 +minimum_value: 1 ``` -
+
--- - +--- +search: + boost: 10.0 +--- # Class: LinkAttributeMeta @@ -5551,6 +5401,8 @@ __ +
+ URI: [zv:LinkAttributeMeta](https://w3id.org/zarr-vectors/schema/0.5/LinkAttributeMeta) @@ -5690,22 +5542,18 @@ attributes: ' from_schema: https://w3id.org/zarr-vectors/schema/0.5 rank: 1000 - alias: zv_array owner: LinkAttributeMeta domain_of: - VerticesMeta - LinksMeta - AttributeMeta - ObjectIndexMeta - - ObjectIndexPendingMeta - ObjectAttributeMeta - GroupingsMeta - GroupingsAttributeMeta - CrossChunkLinksMeta - - CrossChunkFacesMeta - LinkAttributeMeta - CrossChunkLinkAttributeMeta - - MetanodeChildrenMeta range: ZvArrayTag required: true equals_string: link_attribute @@ -5715,7 +5563,6 @@ attributes: from_schema: https://w3id.org/zarr-vectors/schema/0.5 rank: 1000 slot_uri: schema:name - alias: name owner: LinkAttributeMeta domain_of: - Axis @@ -5731,7 +5578,6 @@ attributes: description: Numpy dtype string of the array's value type (e.g. "float32"). from_schema: https://w3id.org/zarr-vectors/schema/0.5 rank: 1000 - alias: dtype owner: LinkAttributeMeta domain_of: - VerticesMeta @@ -5753,7 +5599,6 @@ attributes: ' from_schema: https://w3id.org/zarr-vectors/schema/0.5 rank: 1000 - alias: level_delta owner: LinkAttributeMeta domain_of: - LinksMeta @@ -5764,20 +5609,23 @@ attributes: required: true ``` - +
--- - +--- +search: + boost: 5.0 +--- # Slot: links_convention +
-URI: [zv:links_convention](https://w3id.org/zarr-vectors/schema/0.5/links_convention) -Alias: links_convention +URI: [zv:links_convention](https://w3id.org/zarr-vectors/schema/0.5/links_convention) @@ -5849,16 +5697,21 @@ Alias: links_convention name: links_convention from_schema: https://w3id.org/zarr-vectors/schema/0.5 rank: 1000 -alias: links_convention domain_of: - RootMetadata range: LinksConvention ``` - +
+ +--- --- +search: + boost: 2.0 +--- + # Enum: LinksConvention @@ -5871,6 +5724,8 @@ __ +
+ URI: [zv:LinksConvention](https://w3id.org/zarr-vectors/schema/0.5/LinksConvention) ## Permissible Values @@ -5938,10 +5793,15 @@ permissible_values: ``` +
---- +--- +--- +search: + boost: 10.0 +--- # Class: LinksMeta @@ -5952,6 +5812,8 @@ __ +
+ URI: [zv:LinksMeta](https://w3id.org/zarr-vectors/schema/0.5/LinksMeta) @@ -5995,7 +5857,7 @@ URI: [zv:LinksMeta](https://w3id.org/zarr-vectors/schema/0.5/LinksMeta) | --- | --- | --- | --- | | [zv_array](zv_array.md) | 1
[ZvArrayTag](ZvArrayTag.md) | Discriminator slot identifying the kind of per-array `` | direct | | [dtype](dtype.md) | 1
[String](String.md) | Numpy dtype string of the array's value type (e | direct | -| [link_width](link_width.md) | 1
[Integer](Integer.md) | Width of a links row (2 for edges, 3 or 4 for face rows) | direct | +| [link_width](link_width.md) | 1
[Integer](Integer.md) | Width of a links row (1 for parent→child metanode references, 2 for edges, 3 ... | direct | | [level_delta](level_delta.md) | 1
[Integer](Integer.md) | Pyramid-level delta between the source side (the level that owns this array) ... | direct | @@ -6093,22 +5955,18 @@ attributes: ' from_schema: https://w3id.org/zarr-vectors/schema/0.5 rank: 1000 - alias: zv_array owner: LinksMeta domain_of: - VerticesMeta - LinksMeta - AttributeMeta - ObjectIndexMeta - - ObjectIndexPendingMeta - ObjectAttributeMeta - GroupingsMeta - GroupingsAttributeMeta - CrossChunkLinksMeta - - CrossChunkFacesMeta - LinkAttributeMeta - CrossChunkLinkAttributeMeta - - MetanodeChildrenMeta range: ZvArrayTag required: true equals_string: links @@ -6117,7 +5975,6 @@ attributes: description: Numpy dtype string of the array's value type (e.g. "float32"). from_schema: https://w3id.org/zarr-vectors/schema/0.5 rank: 1000 - alias: dtype owner: LinksMeta domain_of: - VerticesMeta @@ -6131,16 +5988,19 @@ attributes: required: true link_width: name: link_width - description: Width of a links row (2 for edges, 3 or 4 for face rows). + description: 'Width of a links row (1 for parent→child metanode references, 2 + for edges, 3 for triangle faces, 4 for quads). + + ' from_schema: https://w3id.org/zarr-vectors/schema/0.5 rank: 1000 - alias: link_width owner: LinksMeta domain_of: - LinksMeta + - CrossChunkLinksMeta range: integer required: true - minimum_value: 2 + minimum_value: 1 level_delta: name: level_delta description: 'Pyramid-level delta between the source side (the level that owns @@ -6151,7 +6011,6 @@ attributes: ' from_schema: https://w3id.org/zarr-vectors/schema/0.5 rank: 1000 - alias: level_delta owner: LinksMeta domain_of: - LinksMeta @@ -6162,12 +6021,15 @@ attributes: required: true ``` - +
--- - +--- +search: + boost: 5.0 +--- # Slot: max_corner @@ -6176,11 +6038,11 @@ _Per-axis maxima. Length must equal ``len(spatial_index_dims)``._ +
-URI: [zv:max_corner](https://w3id.org/zarr-vectors/schema/0.5/max_corner) -Alias: max_corner +URI: [zv:max_corner](https://w3id.org/zarr-vectors/schema/0.5/max_corner) @@ -6255,7 +6117,6 @@ name: max_corner description: Per-axis maxima. Length must equal ``len(spatial_index_dims)``. from_schema: https://w3id.org/zarr-vectors/schema/0.5 rank: 1000 -alias: max_corner domain_of: - BoundingBox range: float @@ -6263,67 +6124,60 @@ required: true multivalued: true ``` - +
--- +--- +search: + boost: 5.0 +--- - -# Class: MetanodeChildrenMeta +# Slot: min_corner -_``.zattrs`` for the ``metanode_children/`` coarsening sidecar._ +_Per-axis minima. Length must equal ``len(spatial_index_dims)``._ +
-URI: [zv:MetanodeChildrenMeta](https://w3id.org/zarr-vectors/schema/0.5/MetanodeChildrenMeta) +URI: [zv:min_corner](https://w3id.org/zarr-vectors/schema/0.5/min_corner) + -```mermaid - classDiagram - class MetanodeChildrenMeta - click MetanodeChildrenMeta href "../MetanodeChildrenMeta/" - MetanodeChildrenMeta : num_metanodes - - MetanodeChildrenMeta : sid_ndim - - MetanodeChildrenMeta : zv_array - - - - - - MetanodeChildrenMeta --> "1" ZvArrayTag : zv_array - click ZvArrayTag href "../ZvArrayTag/" - - - -``` +## Applicable Classes +| Name | Description | Modifies Slot | +| --- | --- | --- | +| [BoundingBox](BoundingBox.md) | Two parallel ``ndim``-length arrays representing the global ``(min_corner, ma... | no | - -## Slots -| Name | Cardinality and Range | Description | Inheritance | -| --- | --- | --- | --- | -| [zv_array](zv_array.md) | 1
[ZvArrayTag](ZvArrayTag.md) | Discriminator slot identifying the kind of per-array `` | direct | -| [num_metanodes](num_metanodes.md) | 1
[Integer](Integer.md) | Total metanode count in a coarsening sidecar | direct | -| [sid_ndim](sid_ndim.md) | 1
[Integer](Integer.md) | Number of spatial-index dimensions encoded in chunk keys | direct | +## Properties +### Type and Range +| Property | Value | +| --- | --- | +| Range | [Float](Float.md) | +| Domain Of | [BoundingBox](BoundingBox.md) | +### Cardinality and Requirements +| Property | Value | +| --- | --- | +| Required | Yes | +| Multivalued | Yes | @@ -6352,217 +6206,37 @@ URI: [zv:MetanodeChildrenMeta](https://w3id.org/zarr-vectors/schema/0.5/Metanode | Mapping Type | Mapped Value | | --- | --- | -| self | zv:MetanodeChildrenMeta | -| native | zv:MetanodeChildrenMeta | - - +| self | zv:min_corner | +| native | zv:min_corner | ## LinkML Source -### Direct -
```yaml -name: MetanodeChildrenMeta -description: '``.zattrs`` for the ``metanode_children/`` coarsening sidecar.' +name: min_corner +description: Per-axis minima. Length must equal ``len(spatial_index_dims)``. from_schema: https://w3id.org/zarr-vectors/schema/0.5 rank: 1000 -slots: -- zv_array -- num_metanodes -- sid_ndim -slot_usage: - zv_array: - name: zv_array - required: true - equals_string: metanode_children +domain_of: +- BoundingBox +range: float +required: true +multivalued: true ``` -
+
-### Induced - -
-```yaml -name: MetanodeChildrenMeta -description: '``.zattrs`` for the ``metanode_children/`` coarsening sidecar.' -from_schema: https://w3id.org/zarr-vectors/schema/0.5 -rank: 1000 -slot_usage: - zv_array: - name: zv_array - required: true - equals_string: metanode_children -attributes: - zv_array: - name: zv_array - description: 'Discriminator slot identifying the kind of per-array ``.zattrs`` - block. Each writer in ``core/arrays.py`` stamps the corresponding token from - :class:`ZvArrayTag`. - - ' - from_schema: https://w3id.org/zarr-vectors/schema/0.5 - rank: 1000 - alias: zv_array - owner: MetanodeChildrenMeta - domain_of: - - VerticesMeta - - LinksMeta - - AttributeMeta - - ObjectIndexMeta - - ObjectIndexPendingMeta - - ObjectAttributeMeta - - GroupingsMeta - - GroupingsAttributeMeta - - CrossChunkLinksMeta - - CrossChunkFacesMeta - - LinkAttributeMeta - - CrossChunkLinkAttributeMeta - - MetanodeChildrenMeta - range: ZvArrayTag - required: true - equals_string: metanode_children - num_metanodes: - name: num_metanodes - description: Total metanode count in a coarsening sidecar. - from_schema: https://w3id.org/zarr-vectors/schema/0.5 - rank: 1000 - alias: num_metanodes - owner: MetanodeChildrenMeta - domain_of: - - MetanodeChildrenMeta - range: integer - required: true - minimum_value: 0 - sid_ndim: - name: sid_ndim - description: Number of spatial-index dimensions encoded in chunk keys. - from_schema: https://w3id.org/zarr-vectors/schema/0.5 - rank: 1000 - alias: sid_ndim - owner: MetanodeChildrenMeta - domain_of: - - ObjectIndexMeta - - ObjectIndexPendingMeta - - CrossChunkLinksMeta - - CrossChunkFacesMeta - - MetanodeChildrenMeta - range: integer - required: true - minimum_value: 1 - -``` -
+--- --- - - - -# Slot: min_corner - - -_Per-axis minima. Length must equal ``len(spatial_index_dims)``._ - - - - - -URI: [zv:min_corner](https://w3id.org/zarr-vectors/schema/0.5/min_corner) -Alias: min_corner - - - - - - - -## Applicable Classes - -| Name | Description | Modifies Slot | -| --- | --- | --- | -| [BoundingBox](BoundingBox.md) | Two parallel ``ndim``-length arrays representing the global ``(min_corner, ma... | no | - - - - - - -## Properties - -### Type and Range - -| Property | Value | -| --- | --- | -| Range | [Float](Float.md) | -| Domain Of | [BoundingBox](BoundingBox.md) | - -### Cardinality and Requirements - -| Property | Value | -| --- | --- | -| Required | Yes | -| Multivalued | Yes | - - - - - - - - - - -## Identifier and Mapping Information - - - - - -### Schema Source - - -* from schema: https://w3id.org/zarr-vectors/schema/0.5 - - - - -## Mappings - -| Mapping Type | Mapped Value | -| --- | --- | -| self | zv:min_corner | -| native | zv:min_corner | - - - - -## LinkML Source - -
-```yaml -name: min_corner -description: Per-axis minima. Length must equal ``len(spatial_index_dims)``. -from_schema: https://w3id.org/zarr-vectors/schema/0.5 -rank: 1000 -alias: min_corner -domain_of: -- BoundingBox -range: float -required: true -multivalued: true - -``` -
- - +search: + boost: 5.0 --- - - # Slot: name @@ -6570,11 +6244,11 @@ _NGFF axis or attribute name (e.g. "x", "intensity")._ +
-URI: [schema:name](http://schema.org/name) -Alias: name +URI: [schema:name](http://schema.org/name) @@ -6585,12 +6259,12 @@ Alias: name | Name | Description | Modifies Slot | | --- | --- | --- | -| [LinkAttributeMeta](LinkAttributeMeta.md) | `` | no | -| [AttributeMeta](AttributeMeta.md) | `` | no | -| [CrossChunkLinkAttributeMeta](CrossChunkLinkAttributeMeta.md) | `` | no | | [Axis](Axis.md) | One axis of the spatial index | no | +| [AttributeMeta](AttributeMeta.md) | `` | no | | [ObjectAttributeMeta](ObjectAttributeMeta.md) | `` | no | | [GroupingsAttributeMeta](GroupingsAttributeMeta.md) | `` | no | +| [LinkAttributeMeta](LinkAttributeMeta.md) | `` | no | +| [CrossChunkLinkAttributeMeta](CrossChunkLinkAttributeMeta.md) | `` | no | @@ -6655,7 +6329,6 @@ description: NGFF axis or attribute name (e.g. "x", "intensity"). from_schema: https://w3id.org/zarr-vectors/schema/0.5 rank: 1000 slot_uri: schema:name -alias: name domain_of: - Axis - AttributeMeta @@ -6667,12 +6340,15 @@ range: string required: true ``` - +
--- -# Type: Ncname +--- +search: + boost: 1.0 +---# Type: Ncname @@ -6681,6 +6357,8 @@ _Prefix part of CURIE_ +
+ URI: [xsd:string](http://www.w3.org/2001/XMLSchema#string) ## Type Properties @@ -6726,10 +6404,15 @@ URI: [xsd:string](http://www.w3.org/2001/XMLSchema#string) +
+ --- -# Type: Nodeidentifier +--- +search: + boost: 1.0 +---# Type: Nodeidentifier @@ -6738,6 +6421,8 @@ _A URI, CURIE or BNODE that represents a node in a model._ +
+ URI: [shex:nonLiteral](http://www.w3.org/ns/shex#nonLiteral) ## Type Properties @@ -6783,117 +6468,15 @@ URI: [shex:nonLiteral](http://www.w3.org/ns/shex#nonLiteral) - ---- - - - -# Slot: num_faces - - -_Total cross-chunk face count._ - - - - - -URI: [zv:num_faces](https://w3id.org/zarr-vectors/schema/0.5/num_faces) -Alias: num_faces - - - - - - - -## Applicable Classes - -| Name | Description | Modifies Slot | -| --- | --- | --- | -| [CrossChunkFacesMeta](CrossChunkFacesMeta.md) | `` | no | - - - - - - -## Properties - -### Type and Range - -| Property | Value | -| --- | --- | -| Range | [Integer](Integer.md) | -| Domain Of | [CrossChunkFacesMeta](CrossChunkFacesMeta.md) | - -### Cardinality and Requirements - -| Property | Value | -| --- | --- | -| Required | Yes | -### Value Constraints - -| Property | Value | -| --- | --- | -| Minimum Value | 0 | - - - - - - - - - - - - -## Identifier and Mapping Information - - - - - -### Schema Source - - -* from schema: https://w3id.org/zarr-vectors/schema/0.5 - - - - -## Mappings - -| Mapping Type | Mapped Value | -| --- | --- | -| self | zv:num_faces | -| native | zv:num_faces | - - - - -## LinkML Source - -
-```yaml -name: num_faces -description: Total cross-chunk face count. -from_schema: https://w3id.org/zarr-vectors/schema/0.5 -rank: 1000 -alias: num_faces -domain_of: -- CrossChunkFacesMeta -range: integer -required: true -minimum_value: 0 - -``` -
+
--- - +--- +search: + boost: 5.0 +--- # Slot: num_groups @@ -6902,11 +6485,11 @@ _Total grouping count._ +
-URI: [zv:num_groups](https://w3id.org/zarr-vectors/schema/0.5/num_groups) -Alias: num_groups +URI: [zv:num_groups](https://w3id.org/zarr-vectors/schema/0.5/num_groups) @@ -6972,117 +6555,9 @@ Alias: num_groups ## Mappings | Mapping Type | Mapped Value | -| --- | --- | -| self | zv:num_groups | -| native | zv:num_groups | - - - - -## LinkML Source - -
-```yaml -name: num_groups -description: Total grouping count. -from_schema: https://w3id.org/zarr-vectors/schema/0.5 -rank: 1000 -alias: num_groups -domain_of: -- GroupingsMeta -range: integer -required: true -minimum_value: 0 - -``` -
- - ---- - - - -# Slot: num_links - - -_Total cross-chunk link count._ - - - - - -URI: [zv:num_links](https://w3id.org/zarr-vectors/schema/0.5/num_links) -Alias: num_links - - - - - - - -## Applicable Classes - -| Name | Description | Modifies Slot | -| --- | --- | --- | -| [CrossChunkLinkAttributeMeta](CrossChunkLinkAttributeMeta.md) | `` | no | -| [CrossChunkLinksMeta](CrossChunkLinksMeta.md) | `` | no | - - - - - - -## Properties - -### Type and Range - -| Property | Value | -| --- | --- | -| Range | [Integer](Integer.md) | -| Domain Of | [CrossChunkLinksMeta](CrossChunkLinksMeta.md), [CrossChunkLinkAttributeMeta](CrossChunkLinkAttributeMeta.md) | - -### Cardinality and Requirements - -| Property | Value | -| --- | --- | -| Required | Yes | -### Value Constraints - -| Property | Value | -| --- | --- | -| Minimum Value | 0 | - - - - - - - - - - - - -## Identifier and Mapping Information - - - - - -### Schema Source - - -* from schema: https://w3id.org/zarr-vectors/schema/0.5 - - - - -## Mappings - -| Mapping Type | Mapped Value | -| --- | --- | -| self | zv:num_links | -| native | zv:num_links | +| --- | --- | +| self | zv:num_groups | +| native | zv:num_groups | @@ -7091,38 +6566,39 @@ Alias: num_links
```yaml -name: num_links -description: Total cross-chunk link count. +name: num_groups +description: Total grouping count. from_schema: https://w3id.org/zarr-vectors/schema/0.5 rank: 1000 -alias: num_links domain_of: -- CrossChunkLinksMeta -- CrossChunkLinkAttributeMeta +- GroupingsMeta range: integer required: true minimum_value: 0 ``` -
+
--- +--- +search: + boost: 5.0 +--- - -# Slot: num_metanodes +# Slot: num_links -_Total metanode count in a coarsening sidecar._ +_Total cross-chunk link count._ +
-URI: [zv:num_metanodes](https://w3id.org/zarr-vectors/schema/0.5/num_metanodes) -Alias: num_metanodes +URI: [zv:num_links](https://w3id.org/zarr-vectors/schema/0.5/num_links) @@ -7133,7 +6609,8 @@ Alias: num_metanodes | Name | Description | Modifies Slot | | --- | --- | --- | -| [MetanodeChildrenMeta](MetanodeChildrenMeta.md) | `` | no | +| [CrossChunkLinksMeta](CrossChunkLinksMeta.md) | `` | no | +| [CrossChunkLinkAttributeMeta](CrossChunkLinkAttributeMeta.md) | `` | no | @@ -7147,7 +6624,7 @@ Alias: num_metanodes | Property | Value | | --- | --- | | Range | [Integer](Integer.md) | -| Domain Of | [MetanodeChildrenMeta](MetanodeChildrenMeta.md) | +| Domain Of | [CrossChunkLinksMeta](CrossChunkLinksMeta.md), [CrossChunkLinkAttributeMeta](CrossChunkLinkAttributeMeta.md) | ### Cardinality and Requirements @@ -7189,8 +6666,8 @@ Alias: num_metanodes | Mapping Type | Mapped Value | | --- | --- | -| self | zv:num_metanodes | -| native | zv:num_metanodes | +| self | zv:num_links | +| native | zv:num_links | @@ -7199,24 +6676,27 @@ Alias: num_metanodes
```yaml -name: num_metanodes -description: Total metanode count in a coarsening sidecar. +name: num_links +description: Total cross-chunk link count. from_schema: https://w3id.org/zarr-vectors/schema/0.5 rank: 1000 -alias: num_metanodes domain_of: -- MetanodeChildrenMeta +- CrossChunkLinksMeta +- CrossChunkLinkAttributeMeta range: integer required: true minimum_value: 0 ``` -
+
--- - +--- +search: + boost: 5.0 +--- # Slot: num_objects @@ -7225,11 +6705,11 @@ _Total object count this array carries._ +
-URI: [zv:num_objects](https://w3id.org/zarr-vectors/schema/0.5/num_objects) -Alias: num_objects +URI: [zv:num_objects](https://w3id.org/zarr-vectors/schema/0.5/num_objects) @@ -7241,7 +6721,6 @@ Alias: num_objects | Name | Description | Modifies Slot | | --- | --- | --- | | [ObjectIndexMeta](ObjectIndexMeta.md) | `` | no | -| [ObjectIndexPendingMeta](ObjectIndexPendingMeta.md) | `` | no | @@ -7255,7 +6734,7 @@ Alias: num_objects | Property | Value | | --- | --- | | Range | [Integer](Integer.md) | -| Domain Of | [ObjectIndexMeta](ObjectIndexMeta.md), [ObjectIndexPendingMeta](ObjectIndexPendingMeta.md) | +| Domain Of | [ObjectIndexMeta](ObjectIndexMeta.md) | ### Cardinality and Requirements @@ -7311,29 +6790,30 @@ name: num_objects description: Total object count this array carries. from_schema: https://w3id.org/zarr-vectors/schema/0.5 rank: 1000 -alias: num_objects domain_of: - ObjectIndexMeta -- ObjectIndexPendingMeta range: integer required: true minimum_value: 0 ``` - +
--- - +--- +search: + boost: 5.0 +--- # Slot: object_index_convention +
-URI: [zv:object_index_convention](https://w3id.org/zarr-vectors/schema/0.5/object_index_convention) -Alias: object_index_convention +URI: [zv:object_index_convention](https://w3id.org/zarr-vectors/schema/0.5/object_index_convention) @@ -7405,18 +6885,20 @@ Alias: object_index_convention name: object_index_convention from_schema: https://w3id.org/zarr-vectors/schema/0.5 rank: 1000 -alias: object_index_convention domain_of: - RootMetadata range: ObjectIndexConvention ``` - +
--- - +--- +search: + boost: 5.0 +--- # Slot: object_sparsity @@ -7425,11 +6907,11 @@ _Fraction of objects retained at this level._ +
-URI: [zv:object_sparsity](https://w3id.org/zarr-vectors/schema/0.5/object_sparsity) -Alias: object_sparsity +URI: [zv:object_sparsity](https://w3id.org/zarr-vectors/schema/0.5/object_sparsity) @@ -7510,7 +6992,6 @@ name: object_sparsity description: Fraction of objects retained at this level. from_schema: https://w3id.org/zarr-vectors/schema/0.5 rank: 1000 -alias: object_sparsity domain_of: - LevelMetadata range: float @@ -7518,12 +6999,15 @@ minimum_value: 0.0 maximum_value: 1.0 ``` - +
--- - +--- +search: + boost: 10.0 +--- # Class: ObjectAttributeMeta @@ -7532,6 +7016,8 @@ _``.zattrs`` for each ``object_attributes//`` array._ +
+ URI: [zv:ObjectAttributeMeta](https://w3id.org/zarr-vectors/schema/0.5/ObjectAttributeMeta) @@ -7665,22 +7151,18 @@ attributes: ' from_schema: https://w3id.org/zarr-vectors/schema/0.5 rank: 1000 - alias: zv_array owner: ObjectAttributeMeta domain_of: - VerticesMeta - LinksMeta - AttributeMeta - ObjectIndexMeta - - ObjectIndexPendingMeta - ObjectAttributeMeta - GroupingsMeta - GroupingsAttributeMeta - CrossChunkLinksMeta - - CrossChunkFacesMeta - LinkAttributeMeta - CrossChunkLinkAttributeMeta - - MetanodeChildrenMeta range: ZvArrayTag required: true equals_string: object_attribute @@ -7690,7 +7172,6 @@ attributes: from_schema: https://w3id.org/zarr-vectors/schema/0.5 rank: 1000 slot_uri: schema:name - alias: name owner: ObjectAttributeMeta domain_of: - Axis @@ -7706,7 +7187,6 @@ attributes: description: Numpy dtype string of the array's value type (e.g. "float32"). from_schema: https://w3id.org/zarr-vectors/schema/0.5 rank: 1000 - alias: dtype owner: ObjectAttributeMeta domain_of: - VerticesMeta @@ -7723,7 +7203,6 @@ attributes: description: Shape of a dense per-object/per-group array. from_schema: https://w3id.org/zarr-vectors/schema/0.5 rank: 1000 - alias: shape owner: ObjectAttributeMeta domain_of: - ObjectAttributeMeta @@ -7733,12 +7212,15 @@ attributes: multivalued: true ``` - +
--- -# Type: Objectidentifier +--- +search: + boost: 1.0 +---# Type: Objectidentifier @@ -7747,6 +7229,8 @@ _A URI or CURIE that represents an object in the model._ +
+ URI: [shex:iri](http://www.w3.org/ns/shex#iri) ## Type Properties @@ -7796,9 +7280,17 @@ URI: [shex:iri](http://www.w3.org/ns/shex#iri) +
+ +--- + +--- +search: + boost: 2.0 --- + # Enum: ObjectIndexConvention @@ -7808,6 +7300,8 @@ _How the object_id → vertex-group mapping is encoded._ +
+ URI: [zv:ObjectIndexConvention](https://w3id.org/zarr-vectors/schema/0.5/ObjectIndexConvention) ## Permissible Values @@ -7853,233 +7347,47 @@ URI: [zv:ObjectIndexConvention](https://w3id.org/zarr-vectors/schema/0.5/ObjectI ## LinkML Source
-```yaml -name: ObjectIndexConvention -description: How the object_id → vertex-group mapping is encoded. -from_schema: https://w3id.org/zarr-vectors/schema/0.5 -rank: 1000 -permissible_values: - standard: - text: standard - description: Full ``object_index`` array maps each object to its VG refs. - identity: - text: identity - description: 'One object per vertex; only valid in single-chunk stores where the - object_index would be redundant. - - ' - -``` -
- - ---- - - - -# Class: ObjectIndexMeta - - -_``.zattrs`` for ``object_index/``._ - - - - - -URI: [zv:ObjectIndexMeta](https://w3id.org/zarr-vectors/schema/0.5/ObjectIndexMeta) - - - - - -```mermaid - classDiagram - class ObjectIndexMeta - click ObjectIndexMeta href "../ObjectIndexMeta/" - ObjectIndexMeta : num_objects - - ObjectIndexMeta : sid_ndim - - ObjectIndexMeta : zv_array - - - - - - ObjectIndexMeta --> "1" ZvArrayTag : zv_array - click ZvArrayTag href "../ZvArrayTag/" - - - - -``` - - - - - - -## Slots - -| Name | Cardinality and Range | Description | Inheritance | -| --- | --- | --- | --- | -| [zv_array](zv_array.md) | 1
[ZvArrayTag](ZvArrayTag.md) | Discriminator slot identifying the kind of per-array `` | direct | -| [num_objects](num_objects.md) | 1
[Integer](Integer.md) | Total object count this array carries | direct | -| [sid_ndim](sid_ndim.md) | 1
[Integer](Integer.md) | Number of spatial-index dimensions encoded in chunk keys | direct | - - - - - - - - - - - - - - - -## Identifier and Mapping Information - - - - - -### Schema Source - - -* from schema: https://w3id.org/zarr-vectors/schema/0.5 - - - - -## Mappings - -| Mapping Type | Mapped Value | -| --- | --- | -| self | zv:ObjectIndexMeta | -| native | zv:ObjectIndexMeta | - - - - - - -## LinkML Source - -### Direct - -
-```yaml -name: ObjectIndexMeta -description: '``.zattrs`` for ``object_index/``.' -from_schema: https://w3id.org/zarr-vectors/schema/0.5 -rank: 1000 -slots: -- zv_array -- num_objects -- sid_ndim -slot_usage: - zv_array: - name: zv_array - required: true - equals_string: object_index - -``` -
- -### Induced - -
-```yaml -name: ObjectIndexMeta -description: '``.zattrs`` for ``object_index/``.' -from_schema: https://w3id.org/zarr-vectors/schema/0.5 -rank: 1000 -slot_usage: - zv_array: - name: zv_array - required: true - equals_string: object_index -attributes: - zv_array: - name: zv_array - description: 'Discriminator slot identifying the kind of per-array ``.zattrs`` - block. Each writer in ``core/arrays.py`` stamps the corresponding token from - :class:`ZvArrayTag`. - - ' - from_schema: https://w3id.org/zarr-vectors/schema/0.5 - rank: 1000 - alias: zv_array - owner: ObjectIndexMeta - domain_of: - - VerticesMeta - - LinksMeta - - AttributeMeta - - ObjectIndexMeta - - ObjectIndexPendingMeta - - ObjectAttributeMeta - - GroupingsMeta - - GroupingsAttributeMeta - - CrossChunkLinksMeta - - CrossChunkFacesMeta - - LinkAttributeMeta - - CrossChunkLinkAttributeMeta - - MetanodeChildrenMeta - range: ZvArrayTag - required: true - equals_string: object_index - num_objects: - name: num_objects - description: Total object count this array carries. - from_schema: https://w3id.org/zarr-vectors/schema/0.5 - rank: 1000 - alias: num_objects - owner: ObjectIndexMeta - domain_of: - - ObjectIndexMeta - - ObjectIndexPendingMeta - range: integer - required: true - minimum_value: 0 - sid_ndim: - name: sid_ndim - description: Number of spatial-index dimensions encoded in chunk keys. - from_schema: https://w3id.org/zarr-vectors/schema/0.5 - rank: 1000 - alias: sid_ndim - owner: ObjectIndexMeta - domain_of: - - ObjectIndexMeta - - ObjectIndexPendingMeta - - CrossChunkLinksMeta - - CrossChunkFacesMeta - - MetanodeChildrenMeta - range: integer - required: true - minimum_value: 1 +```yaml +name: ObjectIndexConvention +description: How the object_id → vertex-group mapping is encoded. +from_schema: https://w3id.org/zarr-vectors/schema/0.5 +rank: 1000 +permissible_values: + standard: + text: standard + description: Full ``object_index`` array maps each object to its VG refs. + identity: + text: identity + description: 'One object per vertex; only valid in single-chunk stores where the + object_index would be redundant. + + ' ```
+
+ --- +--- +search: + boost: 10.0 +--- +# Class: ObjectIndexMeta -# Class: ObjectIndexPendingMeta +_``.zattrs`` for ``object_index/``._ -_``.zattrs`` for ``object_index/pending//``._ +
-URI: [zv:ObjectIndexPendingMeta](https://w3id.org/zarr-vectors/schema/0.5/ObjectIndexPendingMeta) +URI: [zv:ObjectIndexMeta](https://w3id.org/zarr-vectors/schema/0.5/ObjectIndexMeta) @@ -8087,21 +7395,19 @@ URI: [zv:ObjectIndexPendingMeta](https://w3id.org/zarr-vectors/schema/0.5/Object ```mermaid classDiagram - class ObjectIndexPendingMeta - click ObjectIndexPendingMeta href "../ObjectIndexPendingMeta/" - ObjectIndexPendingMeta : batch_id - - ObjectIndexPendingMeta : num_objects + class ObjectIndexMeta + click ObjectIndexMeta href "../ObjectIndexMeta/" + ObjectIndexMeta : num_objects - ObjectIndexPendingMeta : sid_ndim + ObjectIndexMeta : sid_ndim - ObjectIndexPendingMeta : zv_array + ObjectIndexMeta : zv_array - ObjectIndexPendingMeta --> "1" ZvArrayTag : zv_array + ObjectIndexMeta --> "1" ZvArrayTag : zv_array click ZvArrayTag href "../ZvArrayTag/" @@ -8119,7 +7425,6 @@ URI: [zv:ObjectIndexPendingMeta](https://w3id.org/zarr-vectors/schema/0.5/Object | Name | Cardinality and Range | Description | Inheritance | | --- | --- | --- | --- | | [zv_array](zv_array.md) | 1
[ZvArrayTag](ZvArrayTag.md) | Discriminator slot identifying the kind of per-array `` | direct | -| [batch_id](batch_id.md) | 1
[Integer](Integer.md) | Monotonic batch id for an ``object_index/pending/`` sidecar | direct | | [num_objects](num_objects.md) | 1
[Integer](Integer.md) | Total object count this array carries | direct | | [sid_ndim](sid_ndim.md) | 1
[Integer](Integer.md) | Number of spatial-index dimensions encoded in chunk keys | direct | @@ -8155,8 +7460,8 @@ URI: [zv:ObjectIndexPendingMeta](https://w3id.org/zarr-vectors/schema/0.5/Object | Mapping Type | Mapped Value | | --- | --- | -| self | zv:ObjectIndexPendingMeta | -| native | zv:ObjectIndexPendingMeta | +| self | zv:ObjectIndexMeta | +| native | zv:ObjectIndexMeta | @@ -8169,20 +7474,19 @@ URI: [zv:ObjectIndexPendingMeta](https://w3id.org/zarr-vectors/schema/0.5/Object
```yaml -name: ObjectIndexPendingMeta -description: '``.zattrs`` for ``object_index/pending//``.' +name: ObjectIndexMeta +description: '``.zattrs`` for ``object_index/``.' from_schema: https://w3id.org/zarr-vectors/schema/0.5 rank: 1000 slots: - zv_array -- batch_id - num_objects - sid_ndim slot_usage: zv_array: name: zv_array required: true - equals_string: object_index_pending + equals_string: object_index ```
@@ -8191,15 +7495,15 @@ slot_usage:
```yaml -name: ObjectIndexPendingMeta -description: '``.zattrs`` for ``object_index/pending//``.' +name: ObjectIndexMeta +description: '``.zattrs`` for ``object_index/``.' from_schema: https://w3id.org/zarr-vectors/schema/0.5 rank: 1000 slot_usage: zv_array: name: zv_array required: true - equals_string: object_index_pending + equals_string: object_index attributes: zv_array: name: zv_array @@ -8210,47 +7514,29 @@ attributes: ' from_schema: https://w3id.org/zarr-vectors/schema/0.5 rank: 1000 - alias: zv_array - owner: ObjectIndexPendingMeta + owner: ObjectIndexMeta domain_of: - VerticesMeta - LinksMeta - AttributeMeta - ObjectIndexMeta - - ObjectIndexPendingMeta - ObjectAttributeMeta - GroupingsMeta - GroupingsAttributeMeta - CrossChunkLinksMeta - - CrossChunkFacesMeta - LinkAttributeMeta - CrossChunkLinkAttributeMeta - - MetanodeChildrenMeta range: ZvArrayTag required: true - equals_string: object_index_pending - batch_id: - name: batch_id - description: Monotonic batch id for an ``object_index/pending/`` sidecar. - from_schema: https://w3id.org/zarr-vectors/schema/0.5 - rank: 1000 - alias: batch_id - owner: ObjectIndexPendingMeta - domain_of: - - ObjectIndexPendingMeta - range: integer - required: true - minimum_value: 0 + equals_string: object_index num_objects: name: num_objects description: Total object count this array carries. from_schema: https://w3id.org/zarr-vectors/schema/0.5 rank: 1000 - alias: num_objects - owner: ObjectIndexPendingMeta + owner: ObjectIndexMeta domain_of: - ObjectIndexMeta - - ObjectIndexPendingMeta range: integer required: true minimum_value: 0 @@ -8259,25 +7545,24 @@ attributes: description: Number of spatial-index dimensions encoded in chunk keys. from_schema: https://w3id.org/zarr-vectors/schema/0.5 rank: 1000 - alias: sid_ndim - owner: ObjectIndexPendingMeta + owner: ObjectIndexMeta domain_of: - ObjectIndexMeta - - ObjectIndexPendingMeta - CrossChunkLinksMeta - - CrossChunkFacesMeta - - MetanodeChildrenMeta range: integer required: true minimum_value: 1 ``` -
+
--- - +--- +search: + boost: 5.0 +--- # Slot: parent_level @@ -8286,11 +7571,11 @@ _Source level index (None for level 0)._ +
-URI: [zv:parent_level](https://w3id.org/zarr-vectors/schema/0.5/parent_level) -Alias: parent_level +URI: [zv:parent_level](https://w3id.org/zarr-vectors/schema/0.5/parent_level) @@ -8370,19 +7655,21 @@ name: parent_level description: Source level index (None for level 0). from_schema: https://w3id.org/zarr-vectors/schema/0.5 rank: 1000 -alias: parent_level domain_of: - LevelMetadata range: integer minimum_value: 0 ``` - +
--- - +--- +search: + boost: 5.0 +--- # Slot: preserves_object_ids @@ -8393,11 +7680,11 @@ __ +
-URI: [zv:preserves_object_ids](https://w3id.org/zarr-vectors/schema/0.5/preserves_object_ids) -Alias: preserves_object_ids +URI: [zv:preserves_object_ids](https://w3id.org/zarr-vectors/schema/0.5/preserves_object_ids) @@ -8475,129 +7762,20 @@ description: 'True for levels written by the per-object pyramid regime. When se ' from_schema: https://w3id.org/zarr-vectors/schema/0.5 rank: 1000 -alias: preserves_object_ids domain_of: - LevelMetadata range: boolean ``` - +
--- - - -# Slot: record_size - - -_Per-face record width in ``cross_chunk_faces/data`` (sid_ndim + 2 int64s)._ - -__ - - - - - -URI: [zv:record_size](https://w3id.org/zarr-vectors/schema/0.5/record_size) -Alias: record_size - - - - - - - -## Applicable Classes - -| Name | Description | Modifies Slot | -| --- | --- | --- | -| [CrossChunkFacesMeta](CrossChunkFacesMeta.md) | `` | no | - - - - - - -## Properties - -### Type and Range - -| Property | Value | -| --- | --- | -| Range | [Integer](Integer.md) | -| Domain Of | [CrossChunkFacesMeta](CrossChunkFacesMeta.md) | - -### Cardinality and Requirements - -| Property | Value | -| --- | --- | -| Required | Yes | -### Value Constraints - -| Property | Value | -| --- | --- | -| Minimum Value | 2 | - - - - - - - - - - - - -## Identifier and Mapping Information - - - - - -### Schema Source - - -* from schema: https://w3id.org/zarr-vectors/schema/0.5 - - - - -## Mappings - -| Mapping Type | Mapped Value | -| --- | --- | -| self | zv:record_size | -| native | zv:record_size | - - - - -## LinkML Source - -
-```yaml -name: record_size -description: 'Per-face record width in ``cross_chunk_faces/data`` (sid_ndim + 2 int64s). - - ' -from_schema: https://w3id.org/zarr-vectors/schema/0.5 -rank: 1000 -alias: record_size -domain_of: -- CrossChunkFacesMeta -range: integer -required: true -minimum_value: 2 - -``` -
- - --- - - +search: + boost: 5.0 +--- # Slot: reduction_factor @@ -8606,11 +7784,11 @@ _Multi-resolution coarsening factor (≥ 2)._ +
-URI: [zv:reduction_factor](https://w3id.org/zarr-vectors/schema/0.5/reduction_factor) -Alias: reduction_factor +URI: [zv:reduction_factor](https://w3id.org/zarr-vectors/schema/0.5/reduction_factor) @@ -8690,19 +7868,21 @@ name: reduction_factor description: Multi-resolution coarsening factor (≥ 2). from_schema: https://w3id.org/zarr-vectors/schema/0.5 rank: 1000 -alias: reduction_factor domain_of: - RootMetadata range: integer minimum_value: 2 ``` - +
--- - +--- +search: + boost: 10.0 +--- # Class: RootMetadata @@ -8713,6 +7893,8 @@ __ +
+ URI: [zv:RootMetadata](https://w3id.org/zarr-vectors/schema/0.5/RootMetadata) @@ -8946,7 +8128,6 @@ attributes: from_schema: https://w3id.org/zarr-vectors/schema/0.5 rank: 1000 slot_uri: schema:version - alias: zv_version owner: RootMetadata domain_of: - RootMetadata @@ -8958,7 +8139,6 @@ attributes: description: Physical spatial chunk size per axis (all values > 0). from_schema: https://w3id.org/zarr-vectors/schema/0.5 rank: 1000 - alias: chunk_shape owner: RootMetadata domain_of: - RootMetadata @@ -8970,7 +8150,6 @@ attributes: description: Global vertex bounding box. from_schema: https://w3id.org/zarr-vectors/schema/0.5 rank: 1000 - alias: bounds owner: RootMetadata domain_of: - RootMetadata @@ -8981,7 +8160,6 @@ attributes: description: One or more geometry kinds present in the store. from_schema: https://w3id.org/zarr-vectors/schema/0.5 rank: 1000 - alias: geometry_types owner: RootMetadata domain_of: - RootMetadata @@ -8998,7 +8176,6 @@ attributes: from_schema: https://w3id.org/zarr-vectors/schema/0.5 rank: 1000 slot_uri: schema:coordinateReferenceSystem - alias: crs owner: RootMetadata domain_of: - RootMetadata @@ -9008,7 +8185,6 @@ attributes: name: links_convention from_schema: https://w3id.org/zarr-vectors/schema/0.5 rank: 1000 - alias: links_convention owner: RootMetadata domain_of: - RootMetadata @@ -9017,7 +8193,6 @@ attributes: name: object_index_convention from_schema: https://w3id.org/zarr-vectors/schema/0.5 rank: 1000 - alias: object_index_convention owner: RootMetadata domain_of: - RootMetadata @@ -9026,7 +8201,6 @@ attributes: name: cross_chunk_strategy from_schema: https://w3id.org/zarr-vectors/schema/0.5 rank: 1000 - alias: cross_chunk_strategy owner: RootMetadata domain_of: - RootMetadata @@ -9041,7 +8215,6 @@ attributes: ' from_schema: https://w3id.org/zarr-vectors/schema/0.5 rank: 1000 - alias: cross_level_depth owner: RootMetadata domain_of: - RootMetadata @@ -9057,7 +8230,6 @@ attributes: ' from_schema: https://w3id.org/zarr-vectors/schema/0.5 rank: 1000 - alias: cross_level_storage owner: RootMetadata domain_of: - RootMetadata @@ -9067,7 +8239,6 @@ attributes: description: Multi-resolution coarsening factor (≥ 2). from_schema: https://w3id.org/zarr-vectors/schema/0.5 rank: 1000 - alias: reduction_factor owner: RootMetadata domain_of: - RootMetadata @@ -9082,7 +8253,6 @@ attributes: ' from_schema: https://w3id.org/zarr-vectors/schema/0.5 rank: 1000 - alias: base_bin_shape owner: RootMetadata domain_of: - RootMetadata @@ -9093,7 +8263,6 @@ attributes: description: Optional 0.3+ feature tokens advertised by this store. from_schema: https://w3id.org/zarr-vectors/schema/0.5 rank: 1000 - alias: format_capabilities owner: RootMetadata domain_of: - RootMetadata @@ -9101,12 +8270,15 @@ attributes: multivalued: true ``` - +
--- - +--- +search: + boost: 5.0 +--- # Slot: shape @@ -9115,11 +8287,11 @@ _Shape of a dense per-object/per-group array._ +
-URI: [zv:shape](https://w3id.org/zarr-vectors/schema/0.5/shape) -Alias: shape +URI: [zv:shape](https://w3id.org/zarr-vectors/schema/0.5/shape) @@ -9130,8 +8302,8 @@ Alias: shape | Name | Description | Modifies Slot | | --- | --- | --- | -| [GroupingsAttributeMeta](GroupingsAttributeMeta.md) | `` | no | | [ObjectAttributeMeta](ObjectAttributeMeta.md) | `` | no | +| [GroupingsAttributeMeta](GroupingsAttributeMeta.md) | `` | no | @@ -9195,7 +8367,6 @@ name: shape description: Shape of a dense per-object/per-group array. from_schema: https://w3id.org/zarr-vectors/schema/0.5 rank: 1000 -alias: shape domain_of: - ObjectAttributeMeta - GroupingsAttributeMeta @@ -9204,12 +8375,15 @@ required: true multivalued: true ``` - +
--- - +--- +search: + boost: 5.0 +--- # Slot: shared_vertex_groups @@ -9220,11 +8394,11 @@ __ +
-URI: [zv:shared_vertex_groups](https://w3id.org/zarr-vectors/schema/0.5/shared_vertex_groups) -Alias: shared_vertex_groups +URI: [zv:shared_vertex_groups](https://w3id.org/zarr-vectors/schema/0.5/shared_vertex_groups) @@ -9301,18 +8475,20 @@ description: 'True when per-chunk vertex groups may be referenced by multiple ob ' from_schema: https://w3id.org/zarr-vectors/schema/0.5 rank: 1000 -alias: shared_vertex_groups domain_of: - LevelMetadata range: boolean ``` - +
--- - +--- +search: + boost: 5.0 +--- # Slot: sid_ndim @@ -9321,11 +8497,11 @@ _Number of spatial-index dimensions encoded in chunk keys._ +
-URI: [zv:sid_ndim](https://w3id.org/zarr-vectors/schema/0.5/sid_ndim) -Alias: sid_ndim +URI: [zv:sid_ndim](https://w3id.org/zarr-vectors/schema/0.5/sid_ndim) @@ -9338,9 +8514,6 @@ Alias: sid_ndim | --- | --- | --- | | [ObjectIndexMeta](ObjectIndexMeta.md) | `` | no | | [CrossChunkLinksMeta](CrossChunkLinksMeta.md) | `` | no | -| [CrossChunkFacesMeta](CrossChunkFacesMeta.md) | `` | no | -| [MetanodeChildrenMeta](MetanodeChildrenMeta.md) | `` | no | -| [ObjectIndexPendingMeta](ObjectIndexPendingMeta.md) | `` | no | @@ -9354,7 +8527,7 @@ Alias: sid_ndim | Property | Value | | --- | --- | | Range | [Integer](Integer.md) | -| Domain Of | [ObjectIndexMeta](ObjectIndexMeta.md), [ObjectIndexPendingMeta](ObjectIndexPendingMeta.md), [CrossChunkLinksMeta](CrossChunkLinksMeta.md), [CrossChunkFacesMeta](CrossChunkFacesMeta.md), [MetanodeChildrenMeta](MetanodeChildrenMeta.md) | +| Domain Of | [ObjectIndexMeta](ObjectIndexMeta.md), [CrossChunkLinksMeta](CrossChunkLinksMeta.md) | ### Cardinality and Requirements @@ -9410,24 +8583,23 @@ name: sid_ndim description: Number of spatial-index dimensions encoded in chunk keys. from_schema: https://w3id.org/zarr-vectors/schema/0.5 rank: 1000 -alias: sid_ndim domain_of: - ObjectIndexMeta -- ObjectIndexPendingMeta - CrossChunkLinksMeta -- CrossChunkFacesMeta -- MetanodeChildrenMeta range: integer required: true minimum_value: 1 ``` - +
--- -# Type: Sparqlpath +--- +search: + boost: 1.0 +---# Type: Sparqlpath @@ -9436,6 +8608,8 @@ _A string encoding a SPARQL Property Path. The value of the string MUST conform +
+ URI: [xsd:string](http://www.w3.org/2001/XMLSchema#string) ## Type Properties @@ -9481,10 +8655,15 @@ URI: [xsd:string](http://www.w3.org/2001/XMLSchema#string) +
+ --- -# Type: String +--- +search: + boost: 1.0 +---# Type: String @@ -9493,6 +8672,8 @@ _A character string_ +
+ URI: [xsd:string](http://www.w3.org/2001/XMLSchema#string) ## Type Properties @@ -9509,7 +8690,7 @@ URI: [xsd:string](http://www.w3.org/2001/XMLSchema#string) ## Notes -* In RDF serializations, a slot with range of string is treated as a literal or type xsd:string. If you are authoring schemas in LinkML YAML, the type is referenced with the lower case "string". +* In RDF serializations, a slot with range of string is treated as a literal or type xsd:string. If you are authoring schemas in LinkML YAML, the type is referenced with the lower case "string". @@ -9538,10 +8719,15 @@ URI: [xsd:string](http://www.w3.org/2001/XMLSchema#string) +
+ --- -# Type: Time +--- +search: + boost: 1.0 +---# Type: Time @@ -9550,6 +8736,8 @@ _A time object represents a (local) time of day, independent of any particular d +
+ URI: [xsd:time](http://www.w3.org/2001/XMLSchema#time) ## Type Properties @@ -9597,10 +8785,15 @@ URI: [xsd:time](http://www.w3.org/2001/XMLSchema#time) +
---- +--- +--- +search: + boost: 5.0 +--- # Slot: type @@ -9609,11 +8802,11 @@ _NGFF axis type — "space", "time", or "channel"._ +
-URI: [zv:type](https://w3id.org/zarr-vectors/schema/0.5/type) -Alias: type +URI: [zv:type](https://w3id.org/zarr-vectors/schema/0.5/type) @@ -9687,19 +8880,21 @@ name: type description: NGFF axis type — "space", "time", or "channel". from_schema: https://w3id.org/zarr-vectors/schema/0.5 rank: 1000 -alias: type domain_of: - Axis range: string required: true ``` - +
--- - +--- +search: + boost: 5.0 +--- # Slot: unit @@ -9708,11 +8903,11 @@ _NGFF unit string (e.g. "um", "nanometer", "second")._ +
-URI: [zv:unit](https://w3id.org/zarr-vectors/schema/0.5/unit) -Alias: unit +URI: [zv:unit](https://w3id.org/zarr-vectors/schema/0.5/unit) @@ -9785,18 +8980,20 @@ name: unit description: NGFF unit string (e.g. "um", "nanometer", "second"). from_schema: https://w3id.org/zarr-vectors/schema/0.5 rank: 1000 -alias: unit domain_of: - Axis range: string ``` - +
--- -# Type: Uri +--- +search: + boost: 1.0 +---# Type: Uri @@ -9805,6 +9002,8 @@ _a complete URI_ +
+ URI: [xsd:anyURI](http://www.w3.org/2001/XMLSchema#anyURI) ## Type Properties @@ -9855,10 +9054,15 @@ URI: [xsd:anyURI](http://www.w3.org/2001/XMLSchema#anyURI) +
+ --- -# Type: Uriorcurie +--- +search: + boost: 1.0 +---# Type: Uriorcurie @@ -9867,6 +9071,8 @@ _a URI or a CURIE_ +
+ URI: [xsd:anyURI](http://www.w3.org/2001/XMLSchema#anyURI) ## Type Properties @@ -9912,10 +9118,15 @@ URI: [xsd:anyURI](http://www.w3.org/2001/XMLSchema#anyURI) +
---- +--- +--- +search: + boost: 5.0 +--- # Slot: vertex_count @@ -9924,11 +9135,11 @@ _Total number of vertices at this level._ +
-URI: [zv:vertex_count](https://w3id.org/zarr-vectors/schema/0.5/vertex_count) -Alias: vertex_count +URI: [zv:vertex_count](https://w3id.org/zarr-vectors/schema/0.5/vertex_count) @@ -10009,7 +9220,6 @@ name: vertex_count description: Total number of vertices at this level. from_schema: https://w3id.org/zarr-vectors/schema/0.5 rank: 1000 -alias: vertex_count domain_of: - LevelMetadata range: integer @@ -10017,12 +9227,15 @@ required: true minimum_value: 0 ``` - +
--- - +--- +search: + boost: 10.0 +--- # Class: VerticesMeta @@ -10031,6 +9244,8 @@ _``.zattrs`` for the ``vertices/`` array._ +
+ URI: [zv:VerticesMeta](https://w3id.org/zarr-vectors/schema/0.5/VerticesMeta) @@ -10169,22 +9384,18 @@ attributes: ' from_schema: https://w3id.org/zarr-vectors/schema/0.5 rank: 1000 - alias: zv_array owner: VerticesMeta domain_of: - VerticesMeta - LinksMeta - AttributeMeta - ObjectIndexMeta - - ObjectIndexPendingMeta - ObjectAttributeMeta - GroupingsMeta - GroupingsAttributeMeta - CrossChunkLinksMeta - - CrossChunkFacesMeta - LinkAttributeMeta - CrossChunkLinkAttributeMeta - - MetanodeChildrenMeta range: ZvArrayTag required: true equals_string: vertices @@ -10193,7 +9404,6 @@ attributes: description: Numpy dtype string of the array's value type (e.g. "float32"). from_schema: https://w3id.org/zarr-vectors/schema/0.5 rank: 1000 - alias: dtype owner: VerticesMeta domain_of: - VerticesMeta @@ -10210,14 +9420,13 @@ attributes: description: How the chunk bytes are encoded. from_schema: https://w3id.org/zarr-vectors/schema/0.5 rank: 1000 - alias: encoding owner: VerticesMeta domain_of: - VerticesMeta range: Encoding ``` - +
--- @@ -10232,7 +9441,10 @@ URI: https://w3id.org/zarr-vectors/schema/0.5 --- - +--- +search: + boost: 5.0 +--- # Slot: zv_array @@ -10243,11 +9455,11 @@ __ +
-URI: [zv:zv_array](https://w3id.org/zarr-vectors/schema/0.5/zv_array) -Alias: zv_array +URI: [zv:zv_array](https://w3id.org/zarr-vectors/schema/0.5/zv_array) @@ -10258,19 +9470,16 @@ Alias: zv_array | Name | Description | Modifies Slot | | --- | --- | --- | -| [ObjectIndexMeta](ObjectIndexMeta.md) | `` | yes | -| [LinkAttributeMeta](LinkAttributeMeta.md) | `` | yes | -| [CrossChunkLinksMeta](CrossChunkLinksMeta.md) | `` | yes | +| [VerticesMeta](VerticesMeta.md) | `` | yes | | [LinksMeta](LinksMeta.md) | `` | yes | | [AttributeMeta](AttributeMeta.md) | `` | yes | -| [GroupingsMeta](GroupingsMeta.md) | `` | yes | -| [CrossChunkFacesMeta](CrossChunkFacesMeta.md) | `` | yes | -| [CrossChunkLinkAttributeMeta](CrossChunkLinkAttributeMeta.md) | `` | yes | +| [ObjectIndexMeta](ObjectIndexMeta.md) | `` | yes | | [ObjectAttributeMeta](ObjectAttributeMeta.md) | `` | yes | -| [MetanodeChildrenMeta](MetanodeChildrenMeta.md) | `` | yes | +| [GroupingsMeta](GroupingsMeta.md) | `` | yes | | [GroupingsAttributeMeta](GroupingsAttributeMeta.md) | `` | yes | -| [ObjectIndexPendingMeta](ObjectIndexPendingMeta.md) | `` | yes | -| [VerticesMeta](VerticesMeta.md) | `` | yes | +| [CrossChunkLinksMeta](CrossChunkLinksMeta.md) | `` | yes | +| [LinkAttributeMeta](LinkAttributeMeta.md) | `` | yes | +| [CrossChunkLinkAttributeMeta](CrossChunkLinkAttributeMeta.md) | `` | yes | @@ -10284,7 +9493,7 @@ Alias: zv_array | Property | Value | | --- | --- | | Range | [ZvArrayTag](ZvArrayTag.md) | -| Domain Of | [VerticesMeta](VerticesMeta.md), [LinksMeta](LinksMeta.md), [AttributeMeta](AttributeMeta.md), [ObjectIndexMeta](ObjectIndexMeta.md), [ObjectIndexPendingMeta](ObjectIndexPendingMeta.md), [ObjectAttributeMeta](ObjectAttributeMeta.md), [GroupingsMeta](GroupingsMeta.md), [GroupingsAttributeMeta](GroupingsAttributeMeta.md), [CrossChunkLinksMeta](CrossChunkLinksMeta.md), [CrossChunkFacesMeta](CrossChunkFacesMeta.md), [LinkAttributeMeta](LinkAttributeMeta.md), [CrossChunkLinkAttributeMeta](CrossChunkLinkAttributeMeta.md), [MetanodeChildrenMeta](MetanodeChildrenMeta.md) | +| Domain Of | [VerticesMeta](VerticesMeta.md), [LinksMeta](LinksMeta.md), [AttributeMeta](AttributeMeta.md), [ObjectIndexMeta](ObjectIndexMeta.md), [ObjectAttributeMeta](ObjectAttributeMeta.md), [GroupingsMeta](GroupingsMeta.md), [GroupingsAttributeMeta](GroupingsAttributeMeta.md), [CrossChunkLinksMeta](CrossChunkLinksMeta.md), [LinkAttributeMeta](LinkAttributeMeta.md), [CrossChunkLinkAttributeMeta](CrossChunkLinkAttributeMeta.md) | ### Cardinality and Requirements @@ -10336,31 +9545,30 @@ description: 'Discriminator slot identifying the kind of per-array ``.zattrs`` b ' from_schema: https://w3id.org/zarr-vectors/schema/0.5 rank: 1000 -alias: zv_array domain_of: - VerticesMeta - LinksMeta - AttributeMeta - ObjectIndexMeta -- ObjectIndexPendingMeta - ObjectAttributeMeta - GroupingsMeta - GroupingsAttributeMeta - CrossChunkLinksMeta -- CrossChunkFacesMeta - LinkAttributeMeta - CrossChunkLinkAttributeMeta -- MetanodeChildrenMeta range: ZvArrayTag required: true ``` - +
--- - +--- +search: + boost: 5.0 +--- # Slot: zv_version @@ -10371,11 +9579,11 @@ __ +
-URI: [schema:version](http://schema.org/version) -Alias: zv_version +URI: [schema:version](http://schema.org/version) @@ -10462,7 +9670,6 @@ description: 'ZV spec version this store was written against (e.g. "0.5.0"). Ren from_schema: https://w3id.org/zarr-vectors/schema/0.5 rank: 1000 slot_uri: schema:version -alias: zv_version domain_of: - RootMetadata range: string @@ -10470,10 +9677,16 @@ required: true pattern: ^\d+\.\d+(\.\d+)?$ ``` - +
+ +--- --- +search: + boost: 2.0 +--- + # Enum: ZvArrayTag @@ -10486,6 +9699,8 @@ __ +
+ URI: [zv:ZvArrayTag](https://w3id.org/zarr-vectors/schema/0.5/ZvArrayTag) ## Permissible Values @@ -10495,15 +9710,12 @@ URI: [zv:ZvArrayTag](https://w3id.org/zarr-vectors/schema/0.5/ZvArrayTag) | links | None | | | attribute | None | | | object_index | None | | -| object_index_pending | None | | | object_attribute | None | | | groupings | None | | | groupings_attribute | None | | | cross_chunk_links | None | | -| cross_chunk_faces | None | | | link_attribute | None | | | cross_chunk_link_attribute | None | | -| metanode_children | None | | @@ -10559,8 +9771,6 @@ permissible_values: text: attribute object_index: text: object_index - object_index_pending: - text: object_index_pending object_attribute: text: object_attribute groupings: @@ -10569,14 +9779,12 @@ permissible_values: text: groupings_attribute cross_chunk_links: text: cross_chunk_links - cross_chunk_faces: - text: cross_chunk_faces link_attribute: text: link_attribute cross_chunk_link_attribute: text: cross_chunk_link_attribute - metanode_children: - text: metanode_children ``` - \ No newline at end of file + + +
\ No newline at end of file diff --git a/schema/zarr_vectors.linkml.yaml b/schema/zarr_vectors.linkml.yaml index 13a8e03..a1e5e8c 100644 --- a/schema/zarr_vectors.linkml.yaml +++ b/schema/zarr_vectors.linkml.yaml @@ -98,17 +98,11 @@ enums: FormatCapability: description: > - Optional 0.3+ feature tokens a store advertises in + Optional feature tokens a store advertises in :attr:`RootMetadata.format_capabilities`. Open-set: new tokens will be added in future spec revisions; readers must tolerate unknown values. permissible_values: - cross_chunk_faces: - description: Store has the ``cross_chunk_faces`` array for boundary faces. - vertex_count_cache: - description: Per-chunk ``vertex_counts/`` sidecars are present. - object_index_pending: - description: One or more uncompacted ``object_index/pending/`` sidecars exist. preserved_object_ids: description: > At least one level was written with ID-preserving sparsification @@ -138,15 +132,12 @@ enums: links: attribute: object_index: - object_index_pending: object_attribute: groupings: groupings_attribute: cross_chunk_links: - cross_chunk_faces: link_attribute: cross_chunk_link_attribute: - metanode_children: # =================================================================== # Classes @@ -269,14 +260,6 @@ classes: equals_string: object_index required: true - ObjectIndexPendingMeta: - description: "``.zattrs`` for ``object_index/pending//``." - slots: [zv_array, batch_id, num_objects, sid_ndim] - slot_usage: - zv_array: - equals_string: object_index_pending - required: true - ObjectAttributeMeta: description: "``.zattrs`` for each ``object_attributes//`` array." slots: [zv_array, name, dtype, shape] @@ -303,25 +286,20 @@ classes: CrossChunkLinksMeta: description: > - ``.zattrs`` for a ``cross_chunk_links//`` array. Under the - 0.4 multiscale layout, each delta segment carries its own meta - block; ``level_delta=0`` is the intra-level array. Source-side - endpoints live at the array's own resolution level; target-side - endpoints live at ``this_level + level_delta``. - slots: [zv_array, num_links, sid_ndim, level_delta] + ``.zattrs`` for a ``cross_chunk_links//`` array. Each + delta segment carries its own meta block; ``level_delta=0`` is + the intra-level array. Each record is ``link_width`` + ``(chunk_coords, vertex_idx)`` endpoints — ``link_width=2`` + encodes a cross-chunk edge, ``link_width=3`` a triangle face + record, and so on. Source-side endpoint (endpoint 0) lives at + the array's own resolution level; target-side endpoints live at + ``this_level + level_delta``. + slots: [zv_array, num_links, sid_ndim, level_delta, link_width] slot_usage: zv_array: equals_string: cross_chunk_links required: true - CrossChunkFacesMeta: - description: "``.zattrs`` for ``cross_chunk_faces/`` (0.3 capability ``cross_chunk_faces``)." - slots: [zv_array, num_faces, sid_ndim, record_size] - slot_usage: - zv_array: - equals_string: cross_chunk_faces - required: true - LinkAttributeMeta: description: > ``.zattrs`` for each ``link_attributes///`` array. @@ -344,14 +322,6 @@ classes: equals_string: cross_chunk_link_attribute required: true - MetanodeChildrenMeta: - description: "``.zattrs`` for the ``metanode_children/`` coarsening sidecar." - slots: [zv_array, num_metanodes, sid_ndim] - slot_usage: - zv_array: - equals_string: metanode_children - required: true - # =================================================================== # Slots # =================================================================== @@ -559,9 +529,11 @@ slots: range: string multivalued: true link_width: - description: Width of a links row (2 for edges, 3 or 4 for face rows). + description: > + Width of a links row (1 for parent→child metanode references, 2 + for edges, 3 for triangle faces, 4 for quads). range: integer - minimum_value: 2 + minimum_value: 1 required: true level_delta: description: > @@ -587,35 +559,13 @@ slots: range: integer minimum_value: 0 required: true - num_faces: - description: Total cross-chunk face count. - range: integer - minimum_value: 0 - required: true - num_metanodes: - description: Total metanode count in a coarsening sidecar. - range: integer - minimum_value: 0 - required: true sid_ndim: description: Number of spatial-index dimensions encoded in chunk keys. range: integer minimum_value: 1 required: true - record_size: - description: > - Per-face record width in ``cross_chunk_faces/data`` (sid_ndim + 2 - int64s). - range: integer - minimum_value: 2 - required: true shape: description: Shape of a dense per-object/per-group array. range: integer multivalued: true required: true - batch_id: - description: Monotonic batch id for an ``object_index/pending/`` sidecar. - range: integer - minimum_value: 0 - required: true diff --git a/schema/zarr_vectors.schema.json b/schema/zarr_vectors.schema.json index 288320f..4af9261 100644 --- a/schema/zarr_vectors.schema.json +++ b/schema/zarr_vectors.schema.json @@ -101,40 +101,6 @@ "string" ] }, - "CrossChunkFacesMeta": { - "additionalProperties": false, - "description": "``.zattrs`` for ``cross_chunk_faces/`` (0.3 capability ``cross_chunk_faces``).", - "properties": { - "num_faces": { - "description": "Total cross-chunk face count.", - "minimum": 0, - "type": "integer" - }, - "record_size": { - "description": "Per-face record width in ``cross_chunk_faces/data`` (sid_ndim + 2 int64s).\n", - "minimum": 2, - "type": "integer" - }, - "sid_ndim": { - "description": "Number of spatial-index dimensions encoded in chunk keys.", - "minimum": 1, - "type": "integer" - }, - "zv_array": { - "$ref": "#/$defs/ZvArrayTag", - "const": "cross_chunk_faces", - "description": "Discriminator slot identifying the kind of per-array ``.zattrs`` block. Each writer in ``core/arrays.py`` stamps the corresponding token from :class:`ZvArrayTag`.\n" - } - }, - "required": [ - "zv_array", - "num_faces", - "sid_ndim", - "record_size" - ], - "title": "CrossChunkFacesMeta", - "type": "object" - }, "CrossChunkLinkAttributeMeta": { "additionalProperties": false, "description": "``.zattrs`` for each ``cross_chunk_link_attributes///`` array. Stored as a flat blob parallel to the ``data`` blob of the matching ``cross_chunk_links//`` array; ``num_links`` MUST equal the parallel CCL array's ``num_links``.", @@ -174,12 +140,17 @@ }, "CrossChunkLinksMeta": { "additionalProperties": false, - "description": "``.zattrs`` for a ``cross_chunk_links//`` array. Under the 0.4 multiscale layout, each delta segment carries its own meta block; ``level_delta=0`` is the intra-level array. Source-side endpoints live at the array's own resolution level; target-side endpoints live at ``this_level + level_delta``.", + "description": "``.zattrs`` for a ``cross_chunk_links//`` array. Each delta segment carries its own meta block; ``level_delta=0`` is the intra-level array. Each record is ``link_width`` ``(chunk_coords, vertex_idx)`` endpoints \u2014 ``link_width=2`` encodes a cross-chunk edge, ``link_width=3`` a triangle face record, and so on. Source-side endpoint (endpoint 0) lives at the array's own resolution level; target-side endpoints live at ``this_level + level_delta``.", "properties": { "level_delta": { "description": "Pyramid-level delta between the source side (the level that owns this array) and the target side of the edges. ``0`` for intra-level arrays (the only kind written pre-0.4), ``+N`` for edges from this level to ``this_level + N`` (coarser), ``-N`` for edges to ``this_level - N`` (finer).\n", "type": "integer" }, + "link_width": { + "description": "Width of a links row (1 for parent\u2192child metanode references, 2 for edges, 3 for triangle faces, 4 for quads).\n", + "minimum": 1, + "type": "integer" + }, "num_links": { "description": "Total cross-chunk link count.", "minimum": 0, @@ -200,7 +171,8 @@ "zv_array", "num_links", "sid_ndim", - "level_delta" + "level_delta", + "link_width" ], "title": "CrossChunkLinksMeta", "type": "object" @@ -235,11 +207,8 @@ "type": "string" }, "FormatCapability": { - "description": "Optional 0.3+ feature tokens a store advertises in :attr:`RootMetadata.format_capabilities`. Open-set: new tokens will be added in future spec revisions; readers must tolerate unknown values.", + "description": "Optional feature tokens a store advertises in :attr:`RootMetadata.format_capabilities`. Open-set: new tokens will be added in future spec revisions; readers must tolerate unknown values.", "enum": [ - "cross_chunk_faces", - "vertex_count_cache", - "object_index_pending", "preserved_object_ids", "shared_vertex_groups", "multiscale_links" @@ -494,8 +463,8 @@ "type": "integer" }, "link_width": { - "description": "Width of a links row (2 for edges, 3 or 4 for face rows).", - "minimum": 2, + "description": "Width of a links row (1 for parent\u2192child metanode references, 2 for edges, 3 for triangle faces, 4 for quads).\n", + "minimum": 1, "type": "integer" }, "zv_array": { @@ -513,34 +482,6 @@ "title": "LinksMeta", "type": "object" }, - "MetanodeChildrenMeta": { - "additionalProperties": false, - "description": "``.zattrs`` for the ``metanode_children/`` coarsening sidecar.", - "properties": { - "num_metanodes": { - "description": "Total metanode count in a coarsening sidecar.", - "minimum": 0, - "type": "integer" - }, - "sid_ndim": { - "description": "Number of spatial-index dimensions encoded in chunk keys.", - "minimum": 1, - "type": "integer" - }, - "zv_array": { - "$ref": "#/$defs/ZvArrayTag", - "const": "metanode_children", - "description": "Discriminator slot identifying the kind of per-array ``.zattrs`` block. Each writer in ``core/arrays.py`` stamps the corresponding token from :class:`ZvArrayTag`.\n" - } - }, - "required": [ - "zv_array", - "num_metanodes", - "sid_ndim" - ], - "title": "MetanodeChildrenMeta", - "type": "object" - }, "ObjectAttributeMeta": { "additionalProperties": false, "description": "``.zattrs`` for each ``object_attributes//`` array.", @@ -612,40 +553,6 @@ "title": "ObjectIndexMeta", "type": "object" }, - "ObjectIndexPendingMeta": { - "additionalProperties": false, - "description": "``.zattrs`` for ``object_index/pending//``.", - "properties": { - "batch_id": { - "description": "Monotonic batch id for an ``object_index/pending/`` sidecar.", - "minimum": 0, - "type": "integer" - }, - "num_objects": { - "description": "Total object count this array carries.", - "minimum": 0, - "type": "integer" - }, - "sid_ndim": { - "description": "Number of spatial-index dimensions encoded in chunk keys.", - "minimum": 1, - "type": "integer" - }, - "zv_array": { - "$ref": "#/$defs/ZvArrayTag", - "const": "object_index_pending", - "description": "Discriminator slot identifying the kind of per-array ``.zattrs`` block. Each writer in ``core/arrays.py`` stamps the corresponding token from :class:`ZvArrayTag`.\n" - } - }, - "required": [ - "zv_array", - "batch_id", - "num_objects", - "sid_ndim" - ], - "title": "ObjectIndexPendingMeta", - "type": "object" - }, "RootMetadata": { "additionalProperties": false, "description": "Root-level ``.zattrs`` payload, persisted under the key ``zarr_vectors``. Validates the runtime :class:`zarr_vectors.core.metadata.RootMetadata` dataclass. Note: the canonical axis list lives in ``multiscales[0].axes`` at root level (NGFF-style), NOT under ``zarr_vectors`` \u2014 see ``MultiscalesMetadata``.", @@ -776,15 +683,12 @@ "links", "attribute", "object_index", - "object_index_pending", "object_attribute", "groupings", "groupings_attribute", "cross_chunk_links", - "cross_chunk_faces", "link_attribute", - "cross_chunk_link_attribute", - "metanode_children" + "cross_chunk_link_attribute" ], "title": "ZvArrayTag", "type": "string" @@ -793,7 +697,7 @@ "$id": "https://w3id.org/zarr-vectors/schema/0.5", "$schema": "https://json-schema.org/draft/2019-09/schema", "additionalProperties": true, - "metamodel_version": "1.7.0", + "metamodel_version": "1.11.0", "title": "zarr_vectors", "type": "object", "version": null diff --git a/tests/test_arrays.py b/tests/test_arrays.py index c1002c0..6b45753 100644 --- a/tests/test_arrays.py +++ b/tests/test_arrays.py @@ -14,7 +14,6 @@ create_groupings_attributes_array, create_link_attributes_array, create_links_array, - create_metanode_children_array, create_object_attributes_array, create_object_index_array, create_vertices_array, @@ -27,7 +26,6 @@ read_cross_chunk_links, read_group_object_ids, read_groupings_attributes, - read_metanode_children, read_object_attributes, read_object_manifest, read_object_vertices, @@ -39,7 +37,6 @@ write_cross_chunk_links, write_groupings, write_groupings_attributes, - write_metanode_children, write_object_attributes, write_object_index, ) @@ -489,43 +486,6 @@ def test_basic(self, tmp_path: Path) -> None: np.testing.assert_allclose(arr, [0.5, 0.8]) -# =================================================================== -# Metanode children -# =================================================================== - -class TestMetanodeChildren: - - def test_basic(self, tmp_path: Path) -> None: - lg = _make_level_group(tmp_path) - create_metanode_children_array(lg) - - children = { - 0: [((0, 0, 0), 0), ((0, 0, 0), 1), ((0, 0, 0), 2)], - 1: [((0, 0, 1), 0), ((0, 0, 1), 1)], - 2: [((1, 0, 0), 0)], - } - write_metanode_children(lg, children, sid_ndim=3) - - c0 = read_metanode_children(lg, metanode_id=0) - c1 = read_metanode_children(lg, metanode_id=1) - assert c0 == children[0] - assert c1 == children[1] - - all_c = read_metanode_children(lg) - assert isinstance(all_c, dict) - assert all_c[2] == children[2] - - def test_out_of_range(self, tmp_path: Path) -> None: - lg = _make_level_group(tmp_path) - create_metanode_children_array(lg) - write_metanode_children(lg, {0: [((0, 0, 0), 0)]}, sid_ndim=3) - try: - read_metanode_children(lg, metanode_id=99) - assert False - except ArrayError: - pass - - # =================================================================== # Integration: full object reconstruction # =================================================================== diff --git a/tests/test_backends.py b/tests/test_backends.py index 30ce1fe..3c6c0b2 100644 --- a/tests/test_backends.py +++ b/tests/test_backends.py @@ -28,7 +28,6 @@ ) from zarr_vectors.core.backends.base import StorageBackend from zarr_vectors.core.group import Group -from zarr_vectors.core.metadata import RootMetadata from zarr_vectors.core.store import create_store, open_store, rebind from zarr_vectors.exceptions import StoreError @@ -223,9 +222,9 @@ def test_fsspec_missing_dep_message(monkeypatch): # =================================================================== -def _minimal_root_meta(): - return RootMetadata( - spatial_index_dims=[ +def _minimal_root_kwargs(): + return dict( + axes=[ {"name": "x", "type": "space", "unit": "unit"}, {"name": "y", "type": "space", "unit": "unit"}, {"name": "z", "type": "space", "unit": "unit"}, @@ -243,7 +242,7 @@ def test_rebind_swap_local_for_local(tmp_path): cached handles must continue to resolve. """ store_path = tmp_path / "test.zvr" - root = create_store(store_path, _minimal_root_meta()) + root = create_store(store_path, **_minimal_root_kwargs()) original_url = root.url rebind(root, "local") @@ -257,7 +256,7 @@ def test_rebind_swap_local_for_local(tmp_path): def test_rebind_url_mismatch_raises(tmp_path): """Rebinding to a different URL is a programming error.""" store_path = tmp_path / "test.zvr" - root = create_store(store_path, _minimal_root_meta()) + root = create_store(store_path, **_minimal_root_kwargs()) other = LocalBackend(tmp_path / "other.zvr") with pytest.raises(StoreError, match="matching URLs"): @@ -270,13 +269,13 @@ def test_rebind_url_mismatch_raises(tmp_path): def test_create_store_with_explicit_local_backend(tmp_path): - root = create_store(tmp_path / "x.zvr", _minimal_root_meta(), backend="local") + root = create_store(tmp_path / "x.zvr", **_minimal_root_kwargs(), backend="local") assert "zarr_vectors" in root.attrs def test_open_store_with_explicit_local_backend(tmp_path): p = tmp_path / "x.zvr" - create_store(p, _minimal_root_meta()) + create_store(p, **_minimal_root_kwargs()) root = open_store(p, backend="local") assert "zarr_vectors" in root.attrs diff --git a/tests/test_core.py b/tests/test_core.py index 85ed25a..8a23fc2 100644 --- a/tests/test_core.py +++ b/tests/test_core.py @@ -56,7 +56,14 @@ # =================================================================== def _make_root_meta(**overrides) -> RootMetadata: - """Create a valid RootMetadata with sensible defaults, applying overrides.""" + """Create a valid RootMetadata with sensible defaults, applying overrides. + + Used by the ``TestRootMetadata`` suite (which validates the + dataclass directly). Callers that just want to seed a store via + ``create_store`` should use :func:`_root_kwargs` instead — passing + a fully-populated :class:`RootMetadata` to ``create_store`` is no + longer supported. + """ defaults = dict( spatial_index_dims=[ {"name": "x", "type": "space", "unit": "um"}, @@ -71,6 +78,28 @@ def _make_root_meta(**overrides) -> RootMetadata: return RootMetadata(**defaults) +def _root_kwargs(**overrides) -> dict: + """Flat kwargs for ``create_store`` with the same sensible defaults + that :func:`_make_root_meta` uses for the dataclass.""" + defaults: dict = dict( + axes=[ + {"name": "x", "type": "space", "unit": "um"}, + {"name": "y", "type": "space", "unit": "um"}, + {"name": "z", "type": "space", "unit": "um"}, + ], + chunk_shape=(100.0, 100.0, 100.0), + bounds=([0.0, 0.0, 0.0], [1000.0, 1000.0, 1000.0]), + geometry_types=["point_cloud"], + ) + # Translate the in-memory dataclass field name to the create_store + # kwarg name on the fly so tests can keep using the familiar + # ``spatial_index_dims=...`` override. + if "spatial_index_dims" in overrides: + overrides["axes"] = overrides.pop("spatial_index_dims") + defaults.update(overrides) + return defaults + + def _make_level_meta(level: int = 0, **overrides) -> LevelMetadata: """Create a valid LevelMetadata.""" defaults: dict = dict( @@ -511,8 +540,7 @@ def test_array_meta(self, tmp_store_path: Path) -> None: class TestStoreCreate: def test_create_store(self, tmp_store_path: Path) -> None: - meta = _make_root_meta() - root = create_store(tmp_store_path, meta) + root = create_store(tmp_store_path, **_root_kwargs()) assert tmp_store_path.is_dir() assert "zarr_vectors" in root.attrs.to_dict() assert f"0" in root @@ -576,18 +604,17 @@ def test_create_store_ndim_mismatch(self, tmp_store_path: Path) -> None: pass def test_create_store_already_exists(self, tmp_store_path: Path) -> None: - meta = _make_root_meta() - create_store(tmp_store_path, meta) + create_store(tmp_store_path, **_root_kwargs()) try: - create_store(tmp_store_path, meta) + create_store(tmp_store_path, **_root_kwargs()) assert False except StoreError: pass def test_create_store_invalid_metadata(self, tmp_store_path: Path) -> None: - meta = _make_root_meta(chunk_shape=(100.0, 100.0)) # wrong ndim try: - create_store(tmp_store_path, meta) + # chunk_shape arity (2) inconsistent with axes arity (3). + create_store(tmp_store_path, **_root_kwargs(chunk_shape=(100.0, 100.0))) assert False except MetadataError: pass @@ -596,8 +623,7 @@ def test_create_store_invalid_metadata(self, tmp_store_path: Path) -> None: class TestStoreOpen: def test_open_store(self, tmp_store_path: Path) -> None: - meta = _make_root_meta() - create_store(tmp_store_path, meta) + create_store(tmp_store_path, **_root_kwargs()) root = open_store(tmp_store_path) assert isinstance(root, FsGroup) @@ -621,7 +647,7 @@ def test_open_invalid_store(self, tmp_store_path: Path) -> None: class TestResolutionLevels: def test_create_and_list(self, tmp_store_path: Path) -> None: - root = create_store(tmp_store_path, _make_root_meta()) + root = create_store(tmp_store_path, **_root_kwargs()) lm0 = _make_level_meta(0) create_resolution_level(root, 0, lm0) @@ -632,14 +658,14 @@ def test_create_and_list(self, tmp_store_path: Path) -> None: assert levels == [0, 1] def test_get_level(self, tmp_store_path: Path) -> None: - root = create_store(tmp_store_path, _make_root_meta()) + root = create_store(tmp_store_path, **_root_kwargs()) lm0 = _make_level_meta(0) create_resolution_level(root, 0, lm0) lvl = get_resolution_level(root, 0) assert isinstance(lvl, FsGroup) def test_get_missing_level(self, tmp_store_path: Path) -> None: - root = create_store(tmp_store_path, _make_root_meta()) + root = create_store(tmp_store_path, **_root_kwargs()) try: get_resolution_level(root, 99) assert False @@ -647,7 +673,7 @@ def test_get_missing_level(self, tmp_store_path: Path) -> None: pass def test_read_level_metadata(self, tmp_store_path: Path) -> None: - root = create_store(tmp_store_path, _make_root_meta()) + root = create_store(tmp_store_path, **_root_kwargs()) lm = _make_level_meta(0, vertex_count=42) create_resolution_level(root, 0, lm) read_back = read_level_metadata(root, 0) @@ -657,8 +683,7 @@ def test_read_level_metadata(self, tmp_store_path: Path) -> None: class TestRootMetadataReadWrite: def test_read_root_metadata(self, tmp_store_path: Path) -> None: - meta = _make_root_meta(geometry_types=["mesh"]) - root = create_store(tmp_store_path, meta) + root = create_store(tmp_store_path, **_root_kwargs(geometry_types=["mesh"])) read_back = read_root_metadata(root) assert read_back.geometry_types == ["mesh"] assert read_back.chunk_shape == (100.0, 100.0, 100.0) @@ -667,14 +692,14 @@ def test_read_root_metadata(self, tmp_store_path: Path) -> None: class TestParametricTypesStore: def test_write_and_read(self, tmp_store_path: Path) -> None: - root = create_store(tmp_store_path, _make_root_meta()) + root = create_store(tmp_store_path, **_root_kwargs()) write_parametric_types(root, DEFAULT_PARAMETRIC_TYPES) types = read_parametric_types(root) assert len(types) == 3 assert types[0].name == "plane" def test_empty(self, tmp_store_path: Path) -> None: - root = create_store(tmp_store_path, _make_root_meta()) + root = create_store(tmp_store_path, **_root_kwargs()) types = read_parametric_types(root) assert types == [] @@ -682,8 +707,10 @@ def test_empty(self, tmp_store_path: Path) -> None: class TestStoreInfo: def test_basic_info(self, tmp_store_path: Path) -> None: - meta = _make_root_meta(geometry_types=["point_cloud", "skeleton"]) - root = create_store(tmp_store_path, meta) + root = create_store( + tmp_store_path, + **_root_kwargs(geometry_types=["point_cloud", "skeleton"]), + ) lm = _make_level_meta(0, vertex_count=5000) create_resolution_level(root, 0, lm) write_parametric_types(root, [PARAMETRIC_PLANE]) diff --git a/tests/test_cross_chunk_faces.py b/tests/test_cross_chunk_faces.py index ab00825..48aca1d 100644 --- a/tests/test_cross_chunk_faces.py +++ b/tests/test_cross_chunk_faces.py @@ -1,14 +1,18 @@ -"""Tests for the cross_chunk_faces format extension (Tier C).""" +"""Tests for cross-chunk face records. + +In 0.6.0 cross-chunk faces are stored as ``link_width=3`` records +under ``cross_chunk_links//`` instead of a separate +``cross_chunk_faces`` array. +""" from __future__ import annotations import numpy as np -from zarr_vectors.core.arrays import read_cross_chunk_faces +from zarr_vectors.core.arrays import read_cross_chunk_links from zarr_vectors.core.store import ( get_resolution_level, open_store, - read_root_metadata, ) from zarr_vectors.types.meshes import read_mesh, write_mesh @@ -35,12 +39,6 @@ def _tetra_straddling_chunks(tmp_path): return store, verts, faces -def test_capability_stamped_when_cross_faces_present(tmp_path): - store, _, _ = _tetra_straddling_chunks(tmp_path) - rm = read_root_metadata(open_store(str(store))) - assert "cross_chunk_faces" in rm.format_capabilities - - def test_read_mesh_returns_cross_chunk_faces(tmp_path): store, _, faces_in = _tetra_straddling_chunks(tmp_path) out = read_mesh(str(store)) @@ -48,26 +46,23 @@ def test_read_mesh_returns_cross_chunk_faces(tmp_path): assert out["vertex_count"] == 4 -def test_cross_chunk_faces_array_round_trips(tmp_path): +def test_cross_chunk_face_records_round_trip(tmp_path): store, _, _ = _tetra_straddling_chunks(tmp_path) root = open_store(str(store)) lvl = get_resolution_level(root, 0) - records = read_cross_chunk_faces(lvl) + records = read_cross_chunk_links(lvl, delta=0) # All 4 faces of the tetrahedron span chunks → all 4 appear here. assert len(records) == 4 - # Every face has 3 vertex records (triangle) + # Every face has 3 endpoints (triangle, link_width=3). for face in records: assert len(face) == 3 - # Each record is (chunk_coords, local_idx) for cc, local_idx in face: assert len(cc) == 3 - # Local index is non-negative; exact value depends on - # vertex-to-chunk assignment which we don't pin here. assert local_idx >= 0 -def test_legacy_no_cross_chunk_array_when_all_intra(tmp_path): - """A mesh wholly inside one chunk should not stamp the capability.""" +def test_intra_chunk_mesh_writes_no_cross_chunk_links(tmp_path): + """A mesh wholly inside one chunk should not write cross_chunk_links.""" verts = np.array([ [10, 10, 10], [20, 10, 10], [15, 20, 10], [15, 15, 20], ], dtype="f4") @@ -76,9 +71,6 @@ def test_legacy_no_cross_chunk_array_when_all_intra(tmp_path): ], dtype=np.int64) store = tmp_path / "m.zvr" write_mesh(str(store), verts, faces, chunk_shape=(50.0, 50.0, 50.0)) - rm = read_root_metadata(open_store(str(store))) - assert "cross_chunk_faces" not in rm.format_capabilities - root = open_store(str(store)) lvl = get_resolution_level(root, 0) - assert read_cross_chunk_faces(lvl) == [] + assert read_cross_chunk_links(lvl, delta=0) == [] diff --git a/tests/test_encoding.py b/tests/test_encoding.py index 1f9659a..faeabcc 100644 --- a/tests/test_encoding.py +++ b/tests/test_encoding.py @@ -6,13 +6,15 @@ from zarr_vectors.encoding.ragged import ( decode_object_index, - decode_paired_offsets, + decode_ragged_blob, decode_ragged_ints, decode_vertex_groups, + decode_vertex_offsets, encode_object_index, - encode_paired_offsets, + encode_ragged_blob, encode_ragged_ints, encode_vertex_groups, + encode_vertex_offsets, ) from zarr_vectors.encoding.compression import ( get_codec_pipeline, @@ -207,44 +209,45 @@ def test_wrong_sid_ndim_raises(self) -> None: # --------------------------------------------------------------------------- -# Paired offsets round-trips +# Vertex offsets round-trips (K×1 plain int64) # --------------------------------------------------------------------------- -class TestPairedOffsets: +class TestVertexOffsets: def test_basic(self) -> None: v_off = np.array([0, 36, 108], dtype=np.int64) - l_off = np.array([0, 24, 72], dtype=np.int64) - raw = encode_paired_offsets(v_off, l_off) - dec_v, dec_l = decode_paired_offsets(raw) + raw = encode_vertex_offsets(v_off) + dec_v = decode_vertex_offsets(raw) np.testing.assert_array_equal(dec_v, v_off) - np.testing.assert_array_equal(dec_l, l_off) - def test_no_links(self) -> None: - v_off = np.array([0, 36], dtype=np.int64) - l_off = np.array([-1, -1], dtype=np.int64) - raw = encode_paired_offsets(v_off, l_off) - dec_v, dec_l = decode_paired_offsets(raw) - np.testing.assert_array_equal(dec_v, v_off) - np.testing.assert_array_equal(dec_l, l_off) + def test_empty(self) -> None: + raw = encode_vertex_offsets(np.array([], dtype=np.int64)) + assert raw == b"" + dec = decode_vertex_offsets(raw) + assert len(dec) == 0 + + +# --------------------------------------------------------------------------- +# Self-describing ragged blob (inline offset header) round-trips +# --------------------------------------------------------------------------- + +class TestRaggedBlob: + + def test_round_trip_ints(self) -> None: + groups = [ + np.array([[0, 1], [1, 2], [2, 3]], dtype=np.int64), + np.array([[3, 4]], dtype=np.int64), + ] + blob = encode_ragged_blob(groups, np.dtype(np.int64)) + decoded = decode_ragged_blob(blob, np.dtype(np.int64), ncols=2) + assert len(decoded) == 2 + np.testing.assert_array_equal(decoded[0], groups[0]) + np.testing.assert_array_equal(decoded[1], groups[1]) def test_empty(self) -> None: - v_off = np.array([], dtype=np.int64) - l_off = np.array([], dtype=np.int64) - raw = encode_paired_offsets(v_off, l_off) - dec_v, dec_l = decode_paired_offsets(raw) - assert len(dec_v) == 0 - assert len(dec_l) == 0 - - def test_mismatched_lengths_raises(self) -> None: - try: - encode_paired_offsets( - np.array([0, 1], dtype=np.int64), - np.array([0], dtype=np.int64), - ) - assert False, "Should have raised ArrayError" - except ArrayError: - pass + blob = encode_ragged_blob([], np.dtype(np.int64)) + decoded = decode_ragged_blob(blob, np.dtype(np.int64), ncols=2) + assert decoded == [] # --------------------------------------------------------------------------- diff --git a/tests/test_icechunk_backend.py b/tests/test_icechunk_backend.py index 9982087..5ea7438 100644 --- a/tests/test_icechunk_backend.py +++ b/tests/test_icechunk_backend.py @@ -26,7 +26,6 @@ icechunk = pytest.importorskip("icechunk") -from zarr_vectors.core.metadata import RootMetadata from zarr_vectors.core.store import ( commit, create_store, @@ -42,9 +41,9 @@ # =================================================================== -def _minimal_root_md() -> RootMetadata: - return RootMetadata( - spatial_index_dims=[ +def _minimal_root_kwargs() -> dict: + return dict( + axes=[ {"name": "x", "type": "space", "unit": "um"}, {"name": "y", "type": "space", "unit": "um"}, {"name": "z", "type": "space", "unit": "um"}, @@ -67,7 +66,7 @@ def ic_repo_path(tmp_path: Path) -> str: def test_create_and_reopen(ic_repo_path: str) -> None: """create_store writes root metadata; commit + reopen reads it back.""" - root = create_store(ic_repo_path, _minimal_root_md(), backend="icechunk") + root = create_store(ic_repo_path, **_minimal_root_kwargs(), backend="icechunk") # Session is attached and usable for commit. assert session_for(root) is not None @@ -82,10 +81,10 @@ def test_create_and_reopen(ic_repo_path: str) -> None: def test_create_rejects_existing(ic_repo_path: str) -> None: """create_store on an existing icechunk repo must raise.""" - root = create_store(ic_repo_path, _minimal_root_md(), backend="icechunk") + root = create_store(ic_repo_path, **_minimal_root_kwargs(), backend="icechunk") commit(root, "initial setup") with pytest.raises(StoreError, match="already exists"): - create_store(ic_repo_path, _minimal_root_md(), backend="icechunk") + create_store(ic_repo_path, **_minimal_root_kwargs(), backend="icechunk") def test_open_missing_raises(tmp_path: Path) -> None: @@ -101,7 +100,7 @@ def test_open_missing_raises(tmp_path: Path) -> None: def test_subgroup_can_commit(ic_repo_path: str) -> None: """Sub-groups (created via root.create_group) share the same session and can be passed to commit() too.""" - root = create_store(ic_repo_path, _minimal_root_md(), backend="icechunk") + root = create_store(ic_repo_path, **_minimal_root_kwargs(), backend="icechunk") # 0/ was auto-created by create_store res0 = root["0"] assert session_for(res0) is session_for(root) @@ -111,7 +110,7 @@ def test_subgroup_can_commit(ic_repo_path: str) -> None: def test_discard_drops_uncommitted_writes(ic_repo_path: str) -> None: """Pending changes can be rolled back via discard_changes.""" - root = create_store(ic_repo_path, _minimal_root_md(), backend="icechunk") + root = create_store(ic_repo_path, **_minimal_root_kwargs(), backend="icechunk") commit(root, "snapshot 1") # Make an uncommitted attribute mutation, then discard it. @@ -130,7 +129,7 @@ def test_uncommitted_writes_are_not_durable(ic_repo_path: str) -> None: """A writable session that's never committed loses its work — this is the icechunk contract that callers need to be aware of when driving the high-level ``write_*`` functions.""" - root = create_store(ic_repo_path, _minimal_root_md(), backend="icechunk") + root = create_store(ic_repo_path, **_minimal_root_kwargs(), backend="icechunk") commit(root, "snapshot 1") # baseline # Mutate without committing, then drop the handle (simulating the @@ -153,7 +152,7 @@ def test_uncommitted_writes_are_not_durable(ic_repo_path: str) -> None: def test_session_for_returns_none_on_local(tmp_path: Path) -> None: """Non-transactional backends have no session — helpers return None.""" - root = create_store(str(tmp_path / "local_store"), _minimal_root_md()) + root = create_store(str(tmp_path / "local_store"), **_minimal_root_kwargs()) assert session_for(root) is None assert commit(root, "no-op") is None discard_changes(root) # no-op, must not raise @@ -161,13 +160,13 @@ def test_session_for_returns_none_on_local(tmp_path: Path) -> None: def test_unknown_scheme_in_icechunk_raises() -> None: with pytest.raises(StoreError, match="unsupported URL scheme"): - create_store("ftp://example.com/x", _minimal_root_md(), backend="icechunk") + create_store("ftp://example.com/x", **_minimal_root_kwargs(), backend="icechunk") def test_memory_storage_round_trip() -> None: """``memory://`` URLs route to icechunk's in-memory storage.""" url = "memory://test" - root = create_store(url, _minimal_root_md(), backend="icechunk") + root = create_store(url, **_minimal_root_kwargs(), backend="icechunk") snap = commit(root, "init") assert isinstance(snap, str) # Note: in-memory icechunk repos are per-Repository — reopening @@ -182,7 +181,7 @@ def test_memory_storage_round_trip() -> None: def test_readonly_at_snapshot(ic_repo_path: str) -> None: """Snapshot-pinned readonly sessions see the world at that snapshot.""" - root = create_store(ic_repo_path, _minimal_root_md(), backend="icechunk") + root = create_store(ic_repo_path, **_minimal_root_kwargs(), backend="icechunk") snap1 = commit(root, "v1") # Mutate + commit a second snapshot. diff --git a/tests/test_lazy_writer.py b/tests/test_lazy_writer.py index ee227ee..64156d6 100644 --- a/tests/test_lazy_writer.py +++ b/tests/test_lazy_writer.py @@ -1,4 +1,4 @@ -"""Tests for the ZVRWriter (Tier A + append_vertices + pending sidecars).""" +"""Tests for the ZVRWriter (Tier A + append_vertices).""" from __future__ import annotations @@ -7,10 +7,7 @@ import numpy as np import pytest -from zarr_vectors.core.arrays import ( - compact_object_index, - read_all_object_manifests, -) +from zarr_vectors.core.arrays import read_all_object_manifests from zarr_vectors.core.store import ( get_resolution_level, open_store, @@ -104,7 +101,7 @@ async def go(): # =================================================================== -# append_vertices + pending sidecar +# append_vertices (commits directly into object_index/ in 0.6.0+) # =================================================================== @@ -125,15 +122,13 @@ async def go(): out = read_points(str(store)) assert out["vertex_count"] == 140 - root = open_store(str(store)) - rm = read_root_metadata(root) - # vertex_count_cache always present; pending should be cleared by - # the auto-compact path... actually we only compact on explicit - # call. The capability is still present after the first commit. - assert "vertex_count_cache" in rm.format_capabilities +def test_append_then_compact_is_a_no_op(tmp_path): + """0.6.0+: compact() is a compatibility shim that just reports counts. -def test_append_then_compact_clears_pending(tmp_path): + Pending-sidecar staging was removed; every append commits directly + into ``object_index/``. + """ store, _ = _make_store(tmp_path, n=60) async def go(): @@ -144,10 +139,7 @@ async def go(): ) _run(go()) - rm_before = read_root_metadata(open_store(str(store))) - assert "object_index_pending" in rm_before.format_capabilities - # Compact async def do_compact(): zvr = open_zvr(str(store)) async with zvr[0].writer() as w: @@ -155,17 +147,12 @@ async def do_compact(): result = _run(do_compact()) assert result["compacted"] is True - assert result["batches_folded"] == 1 assert result["num_objects"] == 70 - rm_after = read_root_metadata(open_store(str(store))) - assert "object_index_pending" not in rm_after.format_capabilities - - # Still readable after compact assert read_points(str(store))["vertex_count"] == 70 -def test_two_pending_batches_unioned_on_read(tmp_path): +def test_two_sequential_appends_merge_into_object_index(tmp_path): store, _ = _make_store(tmp_path, n=30) async def go(): @@ -174,7 +161,6 @@ async def go(): await w.append_vertices( np.random.default_rng(5).uniform(0, 100, (5, 3)).astype("f4") ) - # Re-open: each writer commit creates one batch zvr = open_zvr(str(store)) async with zvr[0].writer() as w: await w.append_vertices( diff --git a/tests/test_linkml_schema.py b/tests/test_linkml_schema.py index 194fe5d..c858a75 100644 --- a/tests/test_linkml_schema.py +++ b/tests/test_linkml_schema.py @@ -145,9 +145,6 @@ def test_level_metadata_with_attribute_chunking_validates(schema): {"zv_array": "attribute", "name": "intensity", "dtype": "float32"}), ("ObjectIndexMeta", {"zv_array": "object_index", "num_objects": 42, "sid_ndim": 3}), - ("ObjectIndexPendingMeta", - {"zv_array": "object_index_pending", "batch_id": 0, - "num_objects": 7, "sid_ndim": 3}), ("ObjectAttributeMeta", {"zv_array": "object_attribute", "name": "volume", "dtype": "float32", "shape": [42]}), @@ -158,21 +155,19 @@ def test_level_metadata_with_attribute_chunking_validates(schema): "dtype": "int32", "shape": [5]}), ("CrossChunkLinksMeta", {"zv_array": "cross_chunk_links", "num_links": 12, "sid_ndim": 3, - "level_delta": 0}), + "level_delta": 0, "link_width": 2}), ("CrossChunkLinksMeta", {"zv_array": "cross_chunk_links", "num_links": 5, "sid_ndim": 3, - "level_delta": -1}), - ("CrossChunkFacesMeta", - {"zv_array": "cross_chunk_faces", "num_faces": 4, - "sid_ndim": 3, "record_size": 5}), + "level_delta": -1, "link_width": 1}), + ("CrossChunkLinksMeta", + {"zv_array": "cross_chunk_links", "num_links": 4, "sid_ndim": 3, + "level_delta": 0, "link_width": 3}), ("LinkAttributeMeta", {"zv_array": "link_attribute", "name": "weight", "dtype": "float32", "level_delta": 0}), ("CrossChunkLinkAttributeMeta", {"zv_array": "cross_chunk_link_attribute", "name": "weight", "dtype": "float32", "level_delta": 1, "num_links": 7}), - ("MetanodeChildrenMeta", - {"zv_array": "metanode_children", "num_metanodes": 3, "sid_ndim": 3}), ], ) def test_per_array_zattrs_shapes_validate(schema, defs_name, instance): diff --git a/tests/test_multiscale_links.py b/tests/test_multiscale_links.py index feaa18d..596eb39 100644 --- a/tests/test_multiscale_links.py +++ b/tests/test_multiscale_links.py @@ -335,12 +335,11 @@ def test_build_pyramid_depth_zero_emits_no_cross_level(tmp_path: Path) -> None: @pytest.mark.xfail( reason=( "Cross-level link emission (+delta/-delta arrays) is broken end-to-end: " - "_per_object_coarsen (the default coarsen path) never writes the " - "metanode_children sidecar that _finalize_cross_level_for_store reads, " - "and the legacy _cross_object_metanode_coarsen call to " - "write_metanode_children is missing the required sid_ndim arg and is " - "silently swallowed by an `except Exception: pass`. See the open " - "issue tracking this design gap." + "_per_object_coarsen (the default coarsen path) writes no provenance " + "records, and _finalize_cross_level_for_store has no usable " + "fine→parent reconstruction without them. Tracking the design gap " + "(coarsening would need to emit cross_chunk_links/ records " + "in-line) in a separate issue." ), strict=False, ) diff --git a/zarr_vectors/composite.py b/zarr_vectors/composite.py index f3a9632..16ddbe1 100644 --- a/zarr_vectors/composite.py +++ b/zarr_vectors/composite.py @@ -402,9 +402,6 @@ def _write_namespaced_vertices( "ndim": ndim, "vertex_count": n_verts, }) - # offsets group used downstream by read paths that follow this - # function's convention; ensure it exists. - level_group.require_group(f"{array_name}_offsets") chunk_assignments = assign_chunks(positions, chunk_shape) @@ -415,11 +412,6 @@ def _write_namespaced_vertices( raw = chunk_verts.astype(np.float32).tobytes() level_group.write_bytes(array_name, key, raw) - offsets = np.array([0], dtype=np.int64) - level_group.write_bytes( - f"{array_name}_offsets", key, offsets.tobytes(), - ) - return n_verts diff --git a/zarr_vectors/constants.py b/zarr_vectors/constants.py index c4b6cf2..2c0653c 100644 --- a/zarr_vectors/constants.py +++ b/zarr_vectors/constants.py @@ -12,40 +12,38 @@ FORMAT_VERSION: str = "0.5.0" """Current ZV specification version. -0.5.0: NGFF-alignment cleanup. Hard break — see -``RootMetadata.validate``. Three on-disk changes vs 0.4.1: - -1. Root key ``zarr_vectors.format_version`` is renamed to - ``zarr_vectors.zv_version`` to disambiguate from Zarr v3's - ``zarr_format`` field. -2. Root key ``zarr_vectors.spatial_index_dims`` is removed; axes are - read from the NGFF ``multiscales[0].axes`` block which is now - written eagerly at :func:`zarr_vectors.core.store.create_store` - time. ``multiscales[].version`` is ``"0.4"`` and the ZV - discriminator lives in ``multiscales[].metadata.format = - "zarr_vectors"`` (NGFF reserves ``type`` for the downsampling - method). -3. Per-array ``.zattrs`` no longer duplicate the array's ``dtype`` — - Zarr v3 already stores it as ``data_type`` in the array - ``zarr.json``. - -0.4.1: bare-integer resolution-level group names (``0/``, ``1/``) -to mirror OME-Zarr; previously prefixed as ``resolution_0/``, -``resolution_1/``. +0.5.0: NGFF-alignment cleanup + format simplification. The 0.5 +series went through several on-disk simplifications without a +version bump (consumers should pin to a specific point release): + +- ``vertex_counts/`` per-chunk sidecars removed; per-chunk vertex + counts are derived from ``vertex_group_offsets`` and the + ``vertices/`` blob size. +- ``vertex_group_offsets/`` is a plain ``(K,)`` int64 array of + vertex byte offsets (the legacy ``(K, 2)`` paired layout with a + link-offset column is gone). +- ``attributes//_offsets`` sibling blobs removed. + Attribute groups align 1:1 with vertex groups; per-group byte + offsets are computed at read time. +- ``metanode_children/`` removed. Pyramid drill-down uses + ``cross_chunk_links//`` records. +- ``cross_chunk_faces/`` removed. Cross-chunk face identity uses + ``cross_chunk_links//`` with ``link_width=3``. The + ``cross_chunk_links`` array carries a ``link_width`` metadata + field (default 2 for edges). +- ``object_index/pending/`` staging tree removed. Incremental + writes go directly into ``object_index/``; transactional backends + (icechunk) make this cheap. + +Earlier 0.5 changes (now baseline): renamed ``format_version`` to +``zv_version``, moved axes to ``multiscales[0].axes``, dropped +per-array dtype duplication. + +0.4.1: bare-integer resolution-level group names (``0/``, ``1/``). """ # Capability tokens stored in RootMetadata.format_capabilities. Readers -# inspect these to know which optional 0.3+ features the store uses, and -# degrade gracefully when a capability is absent. -CAP_CROSS_CHUNK_FACES: str = "cross_chunk_faces" -"""Store contains the cross_chunk_faces array (face-identity preservation).""" - -CAP_VERTEX_COUNT_CACHE: str = "vertex_count_cache" -"""Per-chunk vertex_counts/ sidecars are present.""" - -CAP_OBJECT_INDEX_PENDING: str = "object_index_pending" -"""Store has uncompacted object_index pending sidecars.""" - +# inspect these to know which optional features the store uses. CAP_PRESERVED_OBJECT_IDS: str = "preserved_object_ids" """At least one resolution level was written with ID-preserving sparsification (``preserves_object_ids=True`` on the level metadata). @@ -110,11 +108,8 @@ GROUPINGS: str = "groupings" GROUPINGS_ATTRIBUTES: str = "groupings_attributes" CROSS_CHUNK_LINKS: str = "cross_chunk_links" -CROSS_CHUNK_FACES: str = "cross_chunk_faces" LINK_ATTRIBUTES: str = "link_attributes" CROSS_CHUNK_LINK_ATTRIBUTES: str = "cross_chunk_link_attributes" -METANODE_CHILDREN: str = "metanode_children" -VERTEX_COUNTS: str = "vertex_counts" # Parametric sub-arrays PARAMETRIC_OBJECTS: str = "objects" @@ -132,11 +127,8 @@ GROUPINGS, GROUPINGS_ATTRIBUTES, CROSS_CHUNK_LINKS, - CROSS_CHUNK_FACES, LINK_ATTRIBUTES, CROSS_CHUNK_LINK_ATTRIBUTES, - METANODE_CHILDREN, - VERTEX_COUNTS, }) # Array names whose on-disk layout includes a ```` segment diff --git a/zarr_vectors/core/arrays.py b/zarr_vectors/core/arrays.py index 859e5d4..b26c2a3 100644 --- a/zarr_vectors/core/arrays.py +++ b/zarr_vectors/core/arrays.py @@ -19,17 +19,14 @@ from zarr_vectors.constants import ( ATTRIBUTES, - CROSS_CHUNK_FACES, CROSS_CHUNK_LINK_ATTRIBUTES, CROSS_CHUNK_LINKS, GROUPINGS, GROUPINGS_ATTRIBUTES, LINK_ATTRIBUTES, LINKS, - METANODE_CHILDREN, OBJECT_ATTRIBUTES, OBJECT_INDEX, - VERTEX_COUNTS, VERTEX_GROUP_OFFSETS, VERTICES, ) @@ -44,20 +41,21 @@ from zarr_vectors.core.store import FsGroup from zarr_vectors.encoding.ragged import ( decode_object_index, - decode_paired_offsets, + decode_ragged_blob, decode_ragged_ints, decode_vertex_groups, + decode_vertex_offsets, encode_object_index, - encode_paired_offsets, + encode_ragged_blob, encode_ragged_ints, encode_vertex_groups, + encode_vertex_offsets, ) from zarr_vectors.exceptions import ArrayError from zarr_vectors.typing import ( ChunkCoords, CrossChunkLink, ObjectManifest, - VertexGroupRef, ) @@ -250,17 +248,26 @@ def create_cross_chunk_links_array( level_group: FsGroup, *, delta: int = 0, + link_width: int = 2, ) -> None: """Create a ``cross_chunk_links//`` array. Source-side endpoints live at the owning resolution level; target-side endpoints live at ``this_level + delta``. + + Args: + level_group: Resolution level group. + delta: Level delta (0 for intra-level, ±N for cross-level). + link_width: Number of vertex refs per record. 2 for edges + (the default — chunk pairs straddling a boundary), 3 for + triangle faces, 1 for parent→child metanode references. """ full_name = cross_chunk_links_path(delta) _ensure_array_dir(level_group, full_name) level_group.write_array_meta(full_name, { "zv_array": "cross_chunk_links", "level_delta": int(delta), + "link_width": int(link_width), }) @@ -306,14 +313,6 @@ def create_cross_chunk_link_attributes_array( }) -def create_metanode_children_array(level_group: FsGroup) -> None: - """Create the ``metanode_children/`` array (for levels > 0).""" - _ensure_array_dir(level_group, METANODE_CHILDREN) - level_group.write_array_meta(METANODE_CHILDREN, { - "zv_array": "metanode_children", - }) - - # =================================================================== # Writing data # =================================================================== @@ -326,10 +325,9 @@ def write_chunk_vertices( ) -> npt.NDArray[np.int64]: """Write vertex groups to a spatial chunk. - Encodes the groups as a contiguous byte buffer in ``vertices/``, - and writes the K×2 byte offsets to ``vertex_group_offsets/``. - The link_offset column is set to -1 (no links); callers that also - write links should update via :func:`write_chunk_links`. + Encodes the groups as a contiguous byte buffer in ``vertices/`` and + writes the ``(K,)`` int64 vertex byte offsets to + ``vertex_group_offsets/``. Args: level_group: Resolution level group. @@ -338,48 +336,19 @@ def write_chunk_vertices( dtype: Numpy dtype for serialisation. Returns: - ``(K,)`` int64 array of vertex byte offsets (for external use). + ``(K,)`` int64 array of vertex byte offsets. """ dtype = np.dtype(dtype) key = _chunk_key(chunk_coords) raw_bytes, vertex_offsets = encode_vertex_groups(groups, dtype) level_group.write_bytes(VERTICES, key, raw_bytes) - - # Build paired offsets: vertex offsets + placeholder link offsets (-1) - link_offsets = np.full_like(vertex_offsets, -1) - paired_bytes = encode_paired_offsets(vertex_offsets, link_offsets) - level_group.write_bytes(VERTEX_GROUP_OFFSETS, key, paired_bytes) - - # Sidecar: total vertex count for this chunk (one int64). Lets - # ``chunk_local_to_global_offsets`` build the per-chunk → global - # mapping in O(chunks) bytes of I/O instead of streaming every - # vertex blob to discover its length. - n_verts = int(sum(len(g) for g in groups)) level_group.write_bytes( - VERTEX_COUNTS, key, np.int64(n_verts).tobytes(), + VERTEX_GROUP_OFFSETS, key, encode_vertex_offsets(vertex_offsets), ) - return vertex_offsets -def read_chunk_vertex_count( - level_group: FsGroup, - chunk_coords: ChunkCoords, -) -> int | None: - """Read the per-chunk vertex count sidecar. - - Returns ``None`` when the sidecar is absent (legacy 0.2 stores). - Callers should fall back to summing vertex_group_offsets in that - case. - """ - key = _chunk_key(chunk_coords) - if not level_group.chunk_exists(VERTEX_COUNTS, key): - return None - raw = level_group.read_bytes(VERTEX_COUNTS, key) - return int(np.frombuffer(raw, dtype=np.int64)[0]) - - def write_chunk_links( level_group: FsGroup, chunk_coords: ChunkCoords, @@ -390,15 +359,14 @@ def write_chunk_links( ) -> npt.NDArray[np.int64]: """Write link groups to a spatial chunk under ``links//``. - For ``delta=0`` (intra-level links) the per-chunk link byte offsets - are paired with the existing ``vertex_group_offsets`` table so - readers can look up a link group by its vertex-group index. + For ``delta=0`` link groups are 1:1 aligned with the chunk's + vertex groups; readers derive per-group link byte offsets from the + cumulative sizes of each group's link bytes (see + :func:`read_chunk_links`). For ``delta != 0`` (cross-pyramid-level links) the source vertex - groups and link groups live at *different* levels, so the - ``vertex_group_offsets`` pairing is meaningless and skipped — the - on-disk paired-offsets table continues to reference only the - ``delta=0`` link array. + groups and link groups live at different levels and there is + typically one link group spanning the chunk. Args: level_group: Resolution level group. @@ -414,22 +382,23 @@ def write_chunk_links( key = _chunk_key(chunk_coords) full_name = links_path(delta) - raw_bytes, link_offsets = encode_ragged_ints(link_groups, dtype) - level_group.write_bytes(full_name, key, raw_bytes) - - # Pair link byte offsets with vertex byte offsets only for the - # intra-level (delta=0) array — see docstring above. if delta == 0 and level_group.chunk_exists(VERTEX_GROUP_OFFSETS, key): existing = level_group.read_bytes(VERTEX_GROUP_OFFSETS, key) - vertex_offsets, _ = decode_paired_offsets(existing) - if len(vertex_offsets) != len(link_offsets): + vertex_offsets = decode_vertex_offsets(existing) + if len(vertex_offsets) != len(link_groups): raise ArrayError( - f"Link group count ({len(link_offsets)}) != " + f"Link group count ({len(link_groups)}) != " f"vertex group count ({len(vertex_offsets)}) in chunk {key}" ) - paired_bytes = encode_paired_offsets(vertex_offsets, link_offsets) - level_group.write_bytes(VERTEX_GROUP_OFFSETS, key, paired_bytes) + # Self-describing blob: per-group byte offsets are packed in an + # inline header followed by the concatenated link data. + blob = encode_ragged_blob(link_groups, dtype) + level_group.write_bytes(full_name, key, blob) + + # Recover the per-group byte offsets (relative to the data section) + # for the return value. + _, link_offsets = encode_ragged_ints(link_groups, dtype) return link_offsets @@ -442,6 +411,10 @@ def write_chunk_attributes( ) -> None: """Write vertex attribute data for groups in a spatial chunk. + Attribute groups align 1:1 with vertex groups, so per-group byte + offsets are derived at read time from ``vertex_group_offsets`` and + the attribute dtype/ncols. No sibling ``_offsets`` blob is written. + Args: level_group: Resolution level group. attr_name: Attribute name (e.g. ``"radius"``). @@ -454,9 +427,8 @@ def write_chunk_attributes( dtype = np.dtype(dtype) key = _chunk_key(chunk_coords) full_name = f"{ATTRIBUTES}/{attr_name}" - raw_bytes, offsets = encode_vertex_groups(attr_groups, dtype) + raw_bytes, _ = encode_vertex_groups(attr_groups, dtype) level_group.write_bytes(full_name, key, raw_bytes) - level_group.write_bytes(full_name, key + "_offsets", offsets.tobytes()) def write_chunk_link_attributes( @@ -646,51 +618,84 @@ def write_groupings_attributes( def write_cross_chunk_links( level_group: FsGroup, - links: list[CrossChunkLink], + links: list[list[tuple[ChunkCoords, int]]] | list[CrossChunkLink], sid_ndim: int, *, delta: int = 0, + link_width: int | None = None, ) -> None: - """Write cross-chunk link pairs under ``cross_chunk_links//``. + """Write cross-chunk link records under ``cross_chunk_links//``. - Each link is ``((chunk_A, vertex_A), (chunk_B, vertex_B))``. - Endpoint A is interpreted at the owning resolution level; endpoint B - is interpreted at ``this_level + delta`` (the level delta is - encoded in the array path). + Each record is ``link_width`` ``(chunk_coords, vertex_idx)`` + endpoints. ``link_width=2`` (the default) encodes the classic + cross-chunk edge ``((chunk_A, vi_A), (chunk_B, vi_B))``; + ``link_width=3`` encodes a triangle face spanning chunks; + ``link_width=1`` encodes a single parent→child reference used by + pyramid metanode drill-down. - Source and target levels are assumed to share ``sid_ndim`` (the - spatial-index dimensionality is uniform per store), even when their - chunk grids differ in spacing. + Records may be passed either as legacy 2-tuples (compatibility + with the pre-0.6.0 edge-only API) or as a list of endpoint lists + when ``link_width`` is supplied explicitly. + + Endpoint 0 is at the owning resolution level; endpoint k (k>0) + is at ``this_level + delta``. For ``link_width=1`` (metanode + drill-down) the single endpoint is at ``this_level + delta`` and + is paired with an implicit source defined by the writer (the + record stores only the child reference). Args: level_group: Resolution level group. - links: List of CrossChunkLink tuples. + links: List of records; each record is a list of + ``(chunk_coords, vertex_idx)`` tuples of length + ``link_width``. Legacy 2-tuple form is accepted when + ``link_width`` is 2 (or omitted). sid_ndim: Number of spatial index dimensions. delta: Level delta; see :mod:`zarr_vectors.core.paths`. + link_width: Endpoints per record. Defaults to 2 (or to the + arity of the first record if it's a list). """ if not links: return - full_name = cross_chunk_links_path(delta) - flat: list[int] = [] - for (chunk_a, vi_a), (chunk_b, vi_b) in links: - if len(chunk_a) != sid_ndim or len(chunk_b) != sid_ndim: + # Normalise input to a list-of-lists shape; resolve link_width. + normalised: list[list[tuple[ChunkCoords, int]]] = [] + for rec in links: + if isinstance(rec, tuple) and len(rec) == 2 and isinstance(rec[0], tuple) and not isinstance(rec[0][0], tuple): + # Legacy CrossChunkLink: ((chunk_a, vi_a), (chunk_b, vi_b)) + normalised.append([rec[0], rec[1]]) + else: + normalised.append(list(rec)) + + if link_width is None: + link_width = len(normalised[0]) + for rec in normalised: + if len(rec) != link_width: raise ArrayError( - f"chunk coords arity mismatch in cross_chunk_links/{format_delta(delta)}: " - f"sid_ndim={sid_ndim}, got len(a)={len(chunk_a)}, len(b)={len(chunk_b)}" + f"cross_chunk_links/{format_delta(delta)}: record arity " + f"{len(rec)} != link_width {link_width}" ) - flat.extend(chunk_a) - flat.append(vi_a) - flat.extend(chunk_b) - flat.append(vi_b) + + full_name = cross_chunk_links_path(delta) + flat: list[int] = [] + for rec in normalised: + for chunk, vi in rec: + if len(chunk) != sid_ndim: + raise ArrayError( + f"chunk coords arity mismatch in cross_chunk_links/" + f"{format_delta(delta)}: sid_ndim={sid_ndim}, " + f"got len(chunk)={len(chunk)}" + ) + flat.extend(int(c) for c in chunk) + flat.append(int(vi)) arr = np.array(flat, dtype=np.int64) level_group.write_bytes(full_name, "data", arr.tobytes()) level_group.write_array_meta(full_name, { "zv_array": "cross_chunk_links", - "num_links": len(links), + "num_links": len(normalised), "sid_ndim": sid_ndim, "level_delta": int(delta), + "link_width": int(link_width), }) @@ -738,36 +743,6 @@ def write_cross_chunk_link_attributes( }) -def write_metanode_children( - level_group: FsGroup, - children: dict[int, list[VertexGroupRef]], - sid_ndim: int, -) -> None: - """Write metanode → child vertex references for drill-down. - - Args: - level_group: Resolution level group. - children: ``{metanode_id: [(chunk_coords, vertex_index), ...], ...}``. - sid_ndim: Number of spatial index dimensions. - """ - if not children: - return - - max_id = max(children.keys()) - child_list: list[list[tuple[tuple[int, ...], int]]] = [] - for mid in range(max_id + 1): - child_list.append(children.get(mid, [])) - - raw_bytes, offsets = encode_object_index(child_list, sid_ndim) - level_group.write_bytes(METANODE_CHILDREN, "data", raw_bytes) - level_group.write_bytes(METANODE_CHILDREN, "offsets", offsets.tobytes()) - level_group.write_array_meta(METANODE_CHILDREN, { - "zv_array": "metanode_children", - "num_metanodes": max_id + 1, - "sid_ndim": sid_ndim, - }) - - # =================================================================== # Reading data # =================================================================== @@ -881,16 +856,7 @@ def read_chunk_links( f"Cannot read links chunk {key} (delta={format_delta(delta)}): {e}" ) from e - # ``vertex_group_offsets`` only paths link offsets for delta=0; - # cross-level arrays write one link group per chunk, so the group - # offset table is trivially [0] (single group spanning the whole - # chunk blob). - if delta == 0: - link_offsets = _read_link_offsets(level_group, chunk_coords) - else: - link_offsets = np.array([0], dtype=np.int64) - - return decode_ragged_ints(raw, link_offsets, dtype, ncols=link_width) + return decode_ragged_blob(raw, dtype, ncols=link_width) def read_chunk_attributes( @@ -899,15 +865,29 @@ def read_chunk_attributes( chunk_coords: ChunkCoords, dtype: np.dtype | str = np.float32, ncols: int = 1, + *, + vert_dtype: np.dtype | str | None = None, + vert_ndim: int | None = None, ) -> list[npt.NDArray]: """Read vertex attribute data for a chunk. + Per-group byte offsets are derived from ``vertex_group_offsets``: + group ``k`` has ``n_k = (vert_offsets[k+1] - vert_offsets[k]) / + (vert_dtype.itemsize * vert_ndim)`` vertices, so its attribute + byte offset is ``cumsum(n_k) * dtype.itemsize * ncols``. + Args: level_group: Resolution level group. attr_name: Attribute name. chunk_coords: Spatial chunk coordinates. - dtype: Numpy dtype. + dtype: Numpy dtype of the attribute. ncols: Number of columns (channels). Use 1 for scalars. + vert_dtype: Vertex dtype (needed to derive per-group sizes). + When ``None`` (default) it is read from the ``vertices/`` + array metadata. + vert_ndim: Vertex coordinate dimensionality. When ``None`` + (default) it is read from root metadata via NGFF axes; on + failure falls back to 3. Returns: List of arrays aligned with vertex groups. @@ -916,6 +896,17 @@ def read_chunk_attributes( dtype = np.dtype(dtype) full_name = f"{ATTRIBUTES}/{attr_name}" + if vert_dtype is None: + try: + vmeta = level_group.read_array_meta(VERTICES) + vert_dtype = np.dtype(vmeta.get("dtype", "float32")) + except Exception: + vert_dtype = np.dtype(np.float32) + else: + vert_dtype = np.dtype(vert_dtype) + if vert_ndim is None: + vert_ndim = _infer_vert_ndim(level_group) + try: raw = level_group.read_bytes(full_name, key) except Exception as e: @@ -923,13 +914,74 @@ def read_chunk_attributes( f"Cannot read attribute '{attr_name}' chunk {key}: {e}" ) from e + attr_offsets = _derive_attribute_offsets( + level_group, chunk_coords, + vert_dtype=vert_dtype, vert_ndim=vert_ndim, + attr_dtype=dtype, attr_ncols=ncols, + total_attr_bytes=len(raw), + ) + return decode_vertex_groups(raw, attr_offsets, dtype, ncols) + + +def _infer_vert_ndim(level_group: FsGroup) -> int: + """Best-effort lookup of the spatial-index dimensionality. + + Reads NGFF ``multiscales[0].axes`` length from root attrs. Falls + back to 3 when unavailable. + """ try: - raw_offsets = level_group.read_bytes(full_name, key + "_offsets") - attr_offsets = np.frombuffer(raw_offsets, dtype=np.int64) + # Level groups don't carry root attrs; walk up to root via the + # backend. Most levels have an ``_backend`` handle that owns + # the root path. + from zarr_vectors.core.group import Group + root_handle = Group._from_backend(level_group._backend, "") + ms = root_handle.attrs.to_dict().get("multiscales") or [] + if ms and isinstance(ms, list): + axes = ms[0].get("axes") or [] + if axes: + return len(axes) except Exception: - attr_offsets = np.array([0], dtype=np.int64) + pass + return 3 - return decode_vertex_groups(raw, attr_offsets, dtype, ncols) + +def _derive_attribute_offsets( + level_group: FsGroup, + chunk_coords: ChunkCoords, + *, + vert_dtype: np.dtype, + vert_ndim: int, + attr_dtype: np.dtype, + attr_ncols: int, + total_attr_bytes: int, +) -> npt.NDArray[np.int64]: + """Compute per-group attribute byte offsets from vertex offsets. + + Attribute groups align 1:1 with vertex groups. The k-th vertex + group spans ``vert_offsets[k+1] - vert_offsets[k]`` bytes of + vertex data, which corresponds to ``n_k`` vertices (and therefore + ``n_k`` attribute rows). + """ + vert_offsets = _read_vertex_offsets(level_group, chunk_coords) + if len(vert_offsets) == 0: + return np.empty(0, dtype=np.int64) + vert_row_size = vert_dtype.itemsize * vert_ndim + if vert_row_size <= 0: + return np.empty(0, dtype=np.int64) + # Vertex byte size per group → vertex count per group. + key = _chunk_key(chunk_coords) + vert_total = len(level_group.read_bytes(VERTICES, key)) + ends = np.empty_like(vert_offsets) + if len(vert_offsets) > 1: + ends[:-1] = vert_offsets[1:] + ends[-1] = vert_total + n_per_group = (ends - vert_offsets) // vert_row_size + attr_row_size = attr_dtype.itemsize * attr_ncols + attr_byte_lengths = n_per_group.astype(np.int64) * int(attr_row_size) + attr_offsets = np.empty_like(attr_byte_lengths) + attr_offsets[0] = 0 + np.cumsum(attr_byte_lengths[:-1], out=attr_offsets[1:]) + return attr_offsets def read_object_manifest( @@ -938,8 +990,6 @@ def read_object_manifest( ) -> ObjectManifest: """Read the ordered vertex group reference list for one object. - Folds pending sidecars on read. - Args: level_group: Resolution level group. object_id: Object ID. @@ -947,39 +997,27 @@ def read_object_manifest( Returns: List of ``(chunk_coords, vg_index)`` tuples. """ - # Cheap path when no pending sidecars exist — preserve the original - # bounds check / error semantics. - pending_batches = _list_pending_batches(level_group) meta = level_group.read_array_meta(OBJECT_INDEX) sid_ndim = meta["sid_ndim"] num_objects = meta["num_objects"] - if not pending_batches: - if object_id < 0 or object_id >= num_objects: - raise ArrayError( - f"Object ID {object_id} out of range [0, {num_objects})" - ) - raw = level_group.read_bytes(OBJECT_INDEX, "data") - offsets = np.frombuffer( - level_group.read_bytes(OBJECT_INDEX, "offsets"), - dtype=np.int64, - ) - all_manifests = decode_object_index(raw, offsets, sid_ndim) - return all_manifests[object_id] - - # With pending: union and look up. - merged = read_all_object_manifests(level_group) - if object_id < 0 or object_id >= len(merged): + if object_id < 0 or object_id >= num_objects: raise ArrayError( - f"Object ID {object_id} out of range [0, {len(merged)})" + f"Object ID {object_id} out of range [0, {num_objects})" ) - return merged[object_id] + raw = level_group.read_bytes(OBJECT_INDEX, "data") + offsets = np.frombuffer( + level_group.read_bytes(OBJECT_INDEX, "offsets"), + dtype=np.int64, + ) + all_manifests = decode_object_index(raw, offsets, sid_ndim) + return all_manifests[object_id] def read_all_object_manifests( level_group: FsGroup, ) -> list[ObjectManifest]: - """Read all object manifests at once, folding any pending sidecars. + """Read all object manifests at once. Returns: List indexed by object_id, each a list of ``(chunk_coords, vg_index)``. @@ -992,241 +1030,7 @@ def read_all_object_manifests( level_group.read_bytes(OBJECT_INDEX, "offsets"), dtype=np.int64, ) - main = list(decode_object_index(raw, offsets, sid_ndim)) - - # Fold pending sidecars in batch-id order (later batches overwrite - # earlier ones for the same oid). Capability: - # ``CAP_OBJECT_INDEX_PENDING``; absence means no sidecars to merge. - pending = read_object_index_pending(level_group) - for oid, manifest in pending: - while oid >= len(main): - main.append([]) - main[oid] = manifest - return main - - -# ---------------- pending sidecars (incremental append) ------------------- - - -_PENDING_PREFIX = f"{OBJECT_INDEX}/pending" - - -def _list_pending_batches(level_group: FsGroup) -> list[int]: - """List the batch IDs of pending object_index sidecars in order.""" - if not level_group.array_exists(_PENDING_PREFIX): - return [] - try: - pending_grp = level_group[_PENDING_PREFIX] - except Exception: - return [] - batches: list[int] = [] - for name in pending_grp: - try: - batches.append(int(name)) - except ValueError: - continue - return sorted(batches) - - -def next_pending_batch_id(level_group: FsGroup) -> int: - """Return the next free batch ID for a pending object_index sidecar.""" - existing = _list_pending_batches(level_group) - return (existing[-1] + 1) if existing else 0 - - -def write_object_index_pending( - level_group: FsGroup, - manifests: dict[int, ObjectManifest], - sid_ndim: int, - *, - batch_id: int | None = None, -) -> int: - """Write a pending object_index sidecar batch. - - Pending sidecars are union-folded by :func:`read_all_object_manifests` - and collapsed into the main array by :func:`compact_object_index`. - - Args: - level_group: Resolution level group. - manifests: ``{object_id: [(chunk_coords, vg_index), ...]}`` — - sparse; only the OIDs in the dict are written. - sid_ndim: Number of spatial index dimensions (matches the - main index's ``sid_ndim``). - batch_id: Force a specific batch id. ``None`` picks the next - unused id. - - Returns: - The batch id written. - """ - if not manifests: - return -1 - if batch_id is None: - batch_id = next_pending_batch_id(level_group) - - oids = sorted(manifests.keys()) - sparse_list = [manifests[oid] for oid in oids] - raw_bytes, offsets = encode_object_index(sparse_list, sid_ndim) - - base = f"{_PENDING_PREFIX}/{batch_id}" - level_group.write_bytes(base, "oids", np.asarray(oids, dtype=np.int64).tobytes()) - level_group.write_bytes(base, "data", raw_bytes) - level_group.write_bytes(base, "offsets", offsets.tobytes()) - level_group.write_array_meta(base, { - "zv_array": "object_index_pending", - "batch_id": batch_id, - "num_objects": len(oids), - "sid_ndim": sid_ndim, - }) - return batch_id - - -def read_object_index_pending( - level_group: FsGroup, -) -> list[tuple[int, ObjectManifest]]: - """Read every pending sidecar in ascending batch order. - - Returns: - Flat list of ``(object_id, manifest)`` pairs. Same ``oid`` may - appear multiple times when the user committed several batches - for the same object — caller decides resolution policy (the - standard reader uses last-write-wins). - """ - batches = _list_pending_batches(level_group) - out: list[tuple[int, ObjectManifest]] = [] - for batch_id in batches: - base = f"{_PENDING_PREFIX}/{batch_id}" - try: - meta = level_group.read_array_meta(base) - except Exception: - continue - sid_ndim = int(meta["sid_ndim"]) - oids = np.frombuffer( - level_group.read_bytes(base, "oids"), dtype=np.int64, - ) - raw = level_group.read_bytes(base, "data") - offsets = np.frombuffer( - level_group.read_bytes(base, "offsets"), dtype=np.int64, - ) - decoded = decode_object_index(raw, offsets, sid_ndim) - for oid, manifest in zip(oids.tolist(), decoded): - out.append((int(oid), manifest)) - return out - - -def write_cross_chunk_faces( - level_group: FsGroup, - cross_faces: list[list[tuple[ChunkCoords, int]]], - sid_ndim: int, -) -> None: - """Persist face-identity for faces that span multiple chunks. - - Each face is a list of ``L`` ``(chunk_coords, local_vertex_index)`` - records — ``L = 3`` for triangles, ``L = 4`` for quads, etc. The - on-disk record packs ``ndim + 2`` int64 values: the chunk - coordinates, a ``vg_idx`` slot (always 0 today for mesh writers - that emit one vertex-group per chunk), and the in-group local - vertex index. - - Writers that don't care about face identity can leave the - edge-pair decomposition in :data:`CROSS_CHUNK_LINKS` and skip this - array entirely — readers that ignore the new array still get - connectivity through the existing edges. - - Capability token: :data:`CAP_CROSS_CHUNK_FACES`. - """ - if not cross_faces: - return - record_size = sid_ndim + 2 - offsets_list: list[int] = [0] - flat: list[int] = [] - for face in cross_faces: - for cc, local_idx in face: - if len(cc) != sid_ndim: - raise ArrayError( - f"chunk_coords length {len(cc)} != sid_ndim {sid_ndim}" - ) - flat.extend(int(c) for c in cc) - flat.append(0) # vg_idx (forward-compat slot) - flat.append(int(local_idx)) - offsets_list.append(len(flat) // record_size) - data = np.asarray(flat, dtype=np.int64) - offsets = np.asarray(offsets_list, dtype=np.int64) - level_group.write_bytes(CROSS_CHUNK_FACES, "data", data.tobytes()) - level_group.write_bytes(CROSS_CHUNK_FACES, "offsets", offsets.tobytes()) - level_group.write_array_meta(CROSS_CHUNK_FACES, { - "zv_array": "cross_chunk_faces", - "num_faces": len(cross_faces), - "sid_ndim": sid_ndim, - "record_size": record_size, - }) - - -def read_cross_chunk_faces( - level_group: FsGroup, -) -> list[list[tuple[ChunkCoords, int]]]: - """Read face-identity records for cross-chunk faces. - - Returns ``[]`` when the array is absent (older 0.2 stores or 0.3 - stores without the :data:`CAP_CROSS_CHUNK_FACES` capability). - """ - if not level_group.array_exists(CROSS_CHUNK_FACES): - return [] - try: - meta = level_group.read_array_meta(CROSS_CHUNK_FACES) - except Exception: - return [] - sid_ndim = int(meta["sid_ndim"]) - record_size = int(meta.get("record_size", sid_ndim + 2)) - data = np.frombuffer( - level_group.read_bytes(CROSS_CHUNK_FACES, "data"), dtype=np.int64, - ) - offsets = np.frombuffer( - level_group.read_bytes(CROSS_CHUNK_FACES, "offsets"), dtype=np.int64, - ) - n_faces = len(offsets) - 1 - out: list[list[tuple[ChunkCoords, int]]] = [] - for i in range(n_faces): - start, end = int(offsets[i]), int(offsets[i + 1]) - face: list[tuple[ChunkCoords, int]] = [] - for r in range(start, end): - record = data[r * record_size:(r + 1) * record_size] - cc = tuple(int(x) for x in record[:sid_ndim]) - local_idx = int(record[sid_ndim + 1]) - face.append((cc, local_idx)) - out.append(face) - return out - - -def compact_object_index(level_group: FsGroup) -> dict[str, int]: - """Fold every pending object_index sidecar into the main array. - - Reads the main index + all pending batches, applies last-write-wins - on duplicate oids, rewrites :data:`OBJECT_INDEX`, and deletes the - pending sidecars. - - Args: - level_group: Resolution level group. - - Returns: - Summary dict with ``batches_folded`` and ``num_objects``. - """ - if not level_group.array_exists(_PENDING_PREFIX): - return {"batches_folded": 0, "num_objects": 0} - batches = _list_pending_batches(level_group) - if not batches: - return {"batches_folded": 0, "num_objects": 0} - - merged = read_all_object_manifests(level_group) # already folds pending - main_meta = level_group.read_array_meta(OBJECT_INDEX) - sid_ndim = int(main_meta["sid_ndim"]) - - manifests = {oid: m for oid, m in enumerate(merged)} - write_object_index(level_group, manifests, sid_ndim=sid_ndim) - - # Remove pending tree. - level_group.delete_subtree(_PENDING_PREFIX) - - return {"batches_folded": len(batches), "num_objects": len(merged)} + return list(decode_object_index(raw, offsets, sid_ndim)) def read_object_vertices( @@ -1355,18 +1159,20 @@ def read_cross_chunk_links( level_group: FsGroup, *, delta: int = 0, -) -> list[CrossChunkLink]: - """Read all cross-chunk links from ``cross_chunk_links//data``. +) -> list[tuple[tuple[ChunkCoords, int], ...]]: + """Read all cross-chunk link records from ``cross_chunk_links//data``. - Endpoint A is at the owning resolution level; endpoint B is at - ``this_level + delta``. + Each record is a list of ``(chunk_coords, vertex_idx)`` endpoints. + Endpoint 0 lives at the owning resolution level; endpoints k (k>0) + live at ``this_level + delta``. - Returns ``[]`` when the ```` array does not exist or was - created without any links written to it (a placeholder meta block - with no ``num_links`` / ``sid_ndim`` / data blob). + Returns ``[]`` when the ```` array does not exist or has no + records. Returns: - List of ``((chunk_A, vertex_A), (chunk_B, vertex_B))`` tuples. + List of records; each record has length ``link_width``. For + the common ``link_width=2`` edge case callers can unpack each + record as ``((chunk_A, vi_A), (chunk_B, vi_B))``. """ full_name = cross_chunk_links_path(delta) if not level_group.array_exists(full_name): @@ -1375,13 +1181,11 @@ def read_cross_chunk_links( meta = level_group.read_array_meta(full_name) except Exception: return [] - # The create_ helper writes a placeholder meta block (no num_links / - # sid_ndim) when the array is materialized empty. Treat that as a - # zero-link read. if "num_links" not in meta or "sid_ndim" not in meta: return [] num_links = meta["num_links"] sid_ndim = meta["sid_ndim"] + link_width = int(meta.get("link_width", 2)) if num_links == 0: return [] if not level_group.chunk_exists(full_name, "data"): @@ -1390,18 +1194,20 @@ def read_cross_chunk_links( raw = level_group.read_bytes(full_name, "data") arr = np.frombuffer(raw, dtype=np.int64) - entry_len = 2 * (sid_ndim + 1) - half = sid_ndim + 1 - links: list[CrossChunkLink] = [] + endpoint_len = sid_ndim + 1 + record_len = link_width * endpoint_len + records: list[tuple[tuple[ChunkCoords, int], ...]] = [] - for i in range(0, len(arr), entry_len): - chunk_a = tuple(int(x) for x in arr[i : i + sid_ndim]) - vi_a = int(arr[i + sid_ndim]) - chunk_b = tuple(int(x) for x in arr[i + half : i + half + sid_ndim]) - vi_b = int(arr[i + half + sid_ndim]) - links.append(((chunk_a, vi_a), (chunk_b, vi_b))) + for i in range(0, len(arr), record_len): + endpoints: list[tuple[ChunkCoords, int]] = [] + for j in range(link_width): + base = i + j * endpoint_len + chunk = tuple(int(x) for x in arr[base : base + sid_ndim]) + vi = int(arr[base + sid_ndim]) + endpoints.append((chunk, vi)) + records.append(tuple(endpoints)) - return links + return records def read_cross_chunk_link_attributes( @@ -1427,42 +1233,6 @@ def read_cross_chunk_link_attributes( return np.frombuffer(raw, dtype=dtype).reshape(shape).copy() -def read_metanode_children( - level_group: FsGroup, - metanode_id: int | None = None, -) -> dict[int, list[VertexGroupRef]] | list[VertexGroupRef]: - """Read metanode children references. - - Args: - level_group: Resolution level group. - metanode_id: If given, return children for this metanode only. - If None, return all as a dict. - - Returns: - If metanode_id given: list of ``(chunk_coords, vertex_index)``. - If None: dict mapping metanode_id → list of refs. - """ - meta = level_group.read_array_meta(METANODE_CHILDREN) - sid_ndim = meta["sid_ndim"] - - raw = level_group.read_bytes(METANODE_CHILDREN, "data") - offsets = np.frombuffer( - level_group.read_bytes(METANODE_CHILDREN, "offsets"), - dtype=np.int64, - ) - - all_children = decode_object_index(raw, offsets, sid_ndim) - - if metanode_id is not None: - if metanode_id < 0 or metanode_id >= len(all_children): - raise ArrayError( - f"Metanode ID {metanode_id} out of range [0, {len(all_children)})" - ) - return all_children[metanode_id] - - return {i: c for i, c in enumerate(all_children)} - - # =================================================================== # Listing / introspection # =================================================================== @@ -1617,53 +1387,9 @@ def _read_vertex_offsets( level_group: FsGroup, chunk_coords: ChunkCoords, ) -> npt.NDArray[np.int64]: - """Read the vertex byte offsets from vertex_group_offsets for a chunk.""" + """Read the ``(K,)`` int64 vertex byte offsets for a chunk.""" key = _chunk_key(chunk_coords) raw = level_group.read_bytes(VERTEX_GROUP_OFFSETS, key) - vertex_offsets, _ = decode_paired_offsets(raw) - return vertex_offsets - + return decode_vertex_offsets(raw) -def _read_link_offsets( - level_group: FsGroup, - chunk_coords: ChunkCoords, -) -> npt.NDArray[np.int64]: - """Read the link byte offsets from vertex_group_offsets for a chunk.""" - key = _chunk_key(chunk_coords) - raw = level_group.read_bytes(VERTEX_GROUP_OFFSETS, key) - _, link_offsets = decode_paired_offsets(raw) - return link_offsets - - -def _vertex_group_counts( - level_group: FsGroup, - chunk_coords: ChunkCoords, - vert_dtype: np.dtype, -) -> list[int]: - """Compute vertex count per group from offsets and vertex data size. - - Returns list of vertex counts, one per group. - """ - key = _chunk_key(chunk_coords) - raw = level_group.read_bytes(VERTICES, key) - total_bytes = len(raw) - offsets = _read_vertex_offsets(level_group, chunk_coords) - - # Read ndim from vertex metadata - try: - vmeta = level_group.read_array_meta(VERTICES) - # We don't store ndim explicitly, so infer from first group - except Exception: - pass - counts: list[int] = [] - for i in range(len(offsets)): - start = int(offsets[i]) - end = int(offsets[i + 1]) if i + 1 < len(offsets) else total_bytes - nbytes = end - start - # Each vertex is vert_dtype.itemsize * ndim bytes - # But we don't know ndim here — just count raw elements - n_elements = nbytes // vert_dtype.itemsize - counts.append(n_elements) - - return counts diff --git a/zarr_vectors/core/metadata.py b/zarr_vectors/core/metadata.py index 5b160dd..428ba87 100644 --- a/zarr_vectors/core/metadata.py +++ b/zarr_vectors/core/metadata.py @@ -278,11 +278,9 @@ class RootMetadata: cross_level_storage: str = DEFAULT_CROSS_LEVEL_STORAGE """0.4 multiscale links: ``"none"`` / ``"implicit"`` / ``"explicit"``.""" format_capabilities: list[str] = field(default_factory=list) - """Optional 0.3+ capability tokens this store uses (e.g. - ``"cross_chunk_faces"``, ``"vertex_count_cache"``). Old 0.2 stores - deserialise to an empty list and the standard read paths continue - to work. See :mod:`zarr_vectors.constants` for the canonical token - names (``CAP_*``).""" + """Optional capability tokens this store uses. See + :mod:`zarr_vectors.constants` for the canonical token names + (``CAP_*``). Empty list by default.""" def validate(self) -> None: """Validate this metadata object. diff --git a/zarr_vectors/core/store.py b/zarr_vectors/core/store.py index 6c1f834..e239b09 100644 --- a/zarr_vectors/core/store.py +++ b/zarr_vectors/core/store.py @@ -22,7 +22,6 @@ from zarr.storage import StoreLike from zarr_vectors.constants import ( - CAP_VERTEX_COUNT_CACHE, DEFAULT_AXES_NAMES, DEFAULT_BOUNDS_SIDE, DEFAULT_OOB_POLICY, @@ -316,7 +315,6 @@ def __init__(self, path: str | Path, *, create: bool = False) -> None: def create_store( path: StoreLike, - root_metadata: RootMetadata | None = None, *, bounds: tuple[list[float], list[float]] | None = None, chunk_shape: tuple[float, ...] | None = None, @@ -326,6 +324,14 @@ def create_store( ndim: int | None = None, vertex_dtype: str = "float32", vertex_encoding: str = "raw", + links_convention: str | None = None, + object_index_convention: str | None = None, + cross_chunk_strategy: str | None = None, + cross_level_depth: int | None = None, + cross_level_storage: str | None = None, + reduction_factor: int | None = None, + base_bin_shape: tuple[float, ...] | None = None, + format_capabilities: list[str] | None = None, backend: str | None = None, **backend_kwargs: Any, ) -> Group: @@ -363,6 +369,29 @@ def create_store( ``chunk_shape``). Defaults to 3. vertex_dtype: dtype for the level-0 vertices array. vertex_encoding: ``"raw"`` or ``"draco"``. + links_convention: How edges are encoded + (``"explicit"`` / ``"implicit_sequential"`` / + ``"implicit_sequential_with_branches"``). When omitted the + store has no convention stamped at create time; the first + type writer (``write_graph``, ``write_polyline``, ...) + fills it in via ``_ensure_root_metadata_for_write``. + object_index_convention: How ``object_index/`` is encoded + (``"standard"`` / ``"identity"``). Same lazy-fill rule as + ``links_convention``. + cross_chunk_strategy: Cross-chunk connectivity strategy + (``"boundary_deduplication"`` / ``"explicit_links"`` / + ``"both"``). Lazy-filled by type writers. + cross_level_depth: Maximum ``|delta|`` materialised by + ``build_pyramid``. ``0`` disables cross-level link + arrays. + cross_level_storage: ``"none"`` / ``"implicit"`` / + ``"explicit"`` — see + :data:`zarr_vectors.constants.VALID_XLEVEL_STORAGE`. + reduction_factor: Default vertex-count fold per pyramid step. + base_bin_shape: Level-0 supervoxel bin edge lengths. When + omitted, defaults to ``chunk_shape`` (one bin per chunk). + format_capabilities: Optional capability tokens to stamp on + the root. See :mod:`zarr_vectors.constants` ``CAP_*``. backend: Force a particular backend (``"local"`` / ``"icechunk"``). **backend_kwargs: Forwarded to the backend constructor. @@ -373,22 +402,6 @@ def create_store( StoreError: If a store already exists at ``path``. MetadataError: If kwargs are inconsistent (mismatched ndim). """ - # Backward-compat: accept a fully-populated RootMetadata as the - # second positional arg (the pre-0.4.1 API). Unpack it into the - # flat-kwargs path so the rest of the function only handles one shape. - if root_metadata is not None: - root_metadata.validate() - if axes is None: - axes = root_metadata.spatial_index_dims - if chunk_shape is None: - chunk_shape = root_metadata.chunk_shape - if bounds is None: - bounds = root_metadata.bounds - if geometry_types is None: - geometry_types = root_metadata.geometry_types - if crs is None: - crs = root_metadata.crs - resolved_ndim = _resolve_ndim( ndim=ndim, axes=axes, chunk_shape=chunk_shape, bounds=bounds, ) @@ -444,16 +457,15 @@ def create_store( axes=axes, geometry_types=geometry_types, crs=crs, + links_convention=links_convention, + object_index_convention=object_index_convention, + cross_chunk_strategy=cross_chunk_strategy, + cross_level_depth=cross_level_depth, + cross_level_storage=cross_level_storage, + reduction_factor=reduction_factor, + base_bin_shape=base_bin_shape, + format_capabilities=format_capabilities, ) - # Backward-compat: merge the non-structural fields from a supplied - # RootMetadata (conventions, base_bin_shape, cross_level_*, etc.) on - # top of the flat-kwarg attrs. - if root_metadata is not None: - full = root_metadata.to_dict() - attrs = root.attrs.to_dict() - merged = dict(attrs.get("zarr_vectors", {})) - merged.update(full.get("zarr_vectors", {})) - root.attrs.update({"zarr_vectors": merged}) # 0/ + empty vertices pair — the "warm" payload. level0 = root.create_group(f"{RESOLUTION_PREFIX}0") @@ -511,6 +523,14 @@ def _write_root_attrs( axes: list[dict[str, str]], geometry_types: list[str], crs: dict[str, Any] | None = None, + links_convention: str | None = None, + object_index_convention: str | None = None, + cross_chunk_strategy: str | None = None, + cross_level_depth: int | None = None, + cross_level_storage: str | None = None, + reduction_factor: int | None = None, + base_bin_shape: tuple[float, ...] | None = None, + format_capabilities: list[str] | None = None, ) -> None: """Write the ``zarr_vectors`` root-attrs block plus the eager NGFF ``multiscales`` block (axes only — ``datasets`` are filled in by @@ -524,15 +544,29 @@ def _write_root_attrs( existing = full_attrs.get("zarr_vectors", {}) zv: dict[str, Any] = dict(existing) zv["zv_version"] = FORMAT_VERSION - caps = list(zv.get("format_capabilities") or []) - if CAP_VERTEX_COUNT_CACHE not in caps: - caps.append(CAP_VERTEX_COUNT_CACHE) - zv["format_capabilities"] = caps + if format_capabilities is not None: + zv["format_capabilities"] = list(format_capabilities) + else: + zv.setdefault("format_capabilities", list(zv.get("format_capabilities") or [])) zv["chunk_shape"] = list(chunk_shape) zv["bounds"] = [list(bounds[0]), list(bounds[1])] zv["geometry_types"] = list(geometry_types) if crs is not None: zv["crs"] = crs + if links_convention is not None: + zv["links_convention"] = links_convention + if object_index_convention is not None: + zv["object_index_convention"] = object_index_convention + if cross_chunk_strategy is not None: + zv["cross_chunk_strategy"] = cross_chunk_strategy + if cross_level_depth is not None: + zv["cross_level_depth"] = int(cross_level_depth) + if cross_level_storage is not None: + zv["cross_level_storage"] = cross_level_storage + if reduction_factor is not None: + zv["reduction_factor"] = int(reduction_factor) + if base_bin_shape is not None: + zv["base_bin_shape"] = list(base_bin_shape) # Eager NGFF ``multiscales`` block — axes are the canonical axis # store from 0.5.0 on. We seed datasets with level 0 only; the @@ -581,10 +615,7 @@ def _ensure_root_metadata_for_write( existing = full_attrs.get("zarr_vectors", {}) zv: dict[str, Any] = dict(existing) zv.setdefault("zv_version", FORMAT_VERSION) - caps = list(zv.get("format_capabilities") or []) - if CAP_VERTEX_COUNT_CACHE not in caps: - caps.append(CAP_VERTEX_COUNT_CACHE) - zv["format_capabilities"] = caps + zv.setdefault("format_capabilities", list(zv.get("format_capabilities") or [])) # Axes live in NGFF ``multiscales[0].axes`` (0.5.0+). ms = full_attrs.get("multiscales") or [] diff --git a/zarr_vectors/encoding/compression.py b/zarr_vectors/encoding/compression.py index 49f9c9c..4127b07 100644 --- a/zarr_vectors/encoding/compression.py +++ b/zarr_vectors/encoding/compression.py @@ -15,7 +15,6 @@ GROUPINGS_ATTRIBUTES, LINK_ATTRIBUTES, LINKS, - METANODE_CHILDREN, OBJECT_ATTRIBUTES, OBJECT_INDEX, VERTEX_GROUP_OFFSETS, @@ -44,7 +43,7 @@ def get_default_compressor(array_type: str) -> dict[str, object]: } # Offsets are monotonically increasing integers — delta + compress - if array_type in (VERTEX_GROUP_OFFSETS, OBJECT_INDEX, METANODE_CHILDREN): + if array_type in (VERTEX_GROUP_OFFSETS, OBJECT_INDEX): return { "id": "blosc", "cname": "zstd", diff --git a/zarr_vectors/encoding/ragged.py b/zarr_vectors/encoding/ragged.py index 212132b..b469a86 100644 --- a/zarr_vectors/encoding/ragged.py +++ b/zarr_vectors/encoding/ragged.py @@ -278,51 +278,68 @@ def decode_object_index( # --------------------------------------------------------------------------- -# Paired offset encoding (vertex_group_offsets: K×2) +# Vertex offset encoding (vertex_group_offsets: K×1) # --------------------------------------------------------------------------- -def encode_paired_offsets( +def encode_vertex_offsets( vertex_offsets: npt.NDArray[np.int64], - link_offsets: npt.NDArray[np.int64], ) -> bytes: - """Encode paired (vertex_offset, link_offset) arrays into bytes. + """Encode ``(K,)`` int64 vertex byte offsets to bytes.""" + return np.ascontiguousarray(vertex_offsets, dtype=np.int64).tobytes() - Args: - vertex_offsets: ``(K,)`` byte offsets into the vertices chunk. - link_offsets: ``(K,)`` byte offsets into the links chunk. - Use -1 for entries where links are not applicable. - Returns: - Raw bytes encoding a ``(K, 2)`` int64 array. - """ - k = len(vertex_offsets) - if len(link_offsets) != k: - raise ArrayError( - f"vertex_offsets length {k} != link_offsets length {len(link_offsets)}" - ) - paired = np.stack([vertex_offsets, link_offsets], axis=1).astype(np.int64) - return paired.tobytes() +def decode_vertex_offsets( + raw_bytes: bytes, +) -> npt.NDArray[np.int64]: + """Decode ``(K,)`` int64 vertex byte offsets from bytes.""" + if len(raw_bytes) == 0: + return np.empty(0, dtype=np.int64) + return np.frombuffer(raw_bytes, dtype=np.int64).copy() -def decode_paired_offsets( - raw_bytes: bytes, -) -> tuple[npt.NDArray[np.int64], npt.NDArray[np.int64]]: - """Decode paired offsets from bytes. +# --------------------------------------------------------------------------- +# Self-describing ragged blob (inline header) +# +# Used by per-chunk link blobs where per-vertex-group boundaries cannot +# be derived from another array. Layout: +# +# [K : int64] # number of groups +# [off_0, ..., off_{K-1}] # K int64 byte offsets into the data section +# [data] # concatenated raw bytes +# +# Each off_k is the byte offset of group k inside ``data``. The +# end of the last group is ``len(data)``. +# --------------------------------------------------------------------------- - Args: - raw_bytes: Buffer from :func:`encode_paired_offsets`. +def encode_ragged_blob( + groups: list[npt.NDArray], + dtype: np.dtype, +) -> bytes: + """Encode a list of ragged arrays with inline offset header.""" + data_bytes, offsets = encode_vertex_groups(groups, dtype) + k = len(offsets) + header = np.empty(1 + k, dtype=np.int64) + header[0] = k + if k: + header[1:] = offsets + return header.tobytes() + data_bytes - Returns: - vertex_offsets: ``(K,)`` int64 array. - link_offsets: ``(K,)`` int64 array. - """ - if len(raw_bytes) == 0: - return np.empty(0, dtype=np.int64), np.empty(0, dtype=np.int64) - - arr = np.frombuffer(raw_bytes, dtype=np.int64) - if len(arr) % 2 != 0: - raise ArrayError( - f"Paired offsets buffer length {len(arr)} is not even" - ) - paired = arr.reshape(-1, 2) - return paired[:, 0].copy(), paired[:, 1].copy() + +def decode_ragged_blob( + raw_bytes: bytes, + dtype: np.dtype, + ncols: int = 1, +) -> list[npt.NDArray]: + """Decode an inline-header ragged blob produced by + :func:`encode_ragged_blob`.""" + if len(raw_bytes) < 8: + return [] + k = int(np.frombuffer(raw_bytes[:8], dtype=np.int64)[0]) + header_len = 8 * (1 + k) + if k == 0: + return [] + offsets = np.frombuffer( + raw_bytes[8:header_len], dtype=np.int64, + ).copy() + data = raw_bytes[header_len:] + return decode_vertex_groups(data, offsets, dtype, ncols) diff --git a/zarr_vectors/lazy/writer.py b/zarr_vectors/lazy/writer.py index d3fef00..8d31367 100644 --- a/zarr_vectors/lazy/writer.py +++ b/zarr_vectors/lazy/writer.py @@ -28,14 +28,13 @@ import numpy as np import numpy.typing as npt -from zarr_vectors.constants import CAP_OBJECT_INDEX_PENDING from zarr_vectors.core.arrays import ( - compact_object_index, list_chunk_keys, + read_all_object_manifests, read_chunk_vertices, write_chunk_attributes, write_chunk_vertices, - write_object_index_pending, + write_object_index, ) from zarr_vectors.core.metadata import LevelMetadata, RootMetadata from zarr_vectors.exceptions import ArrayError @@ -145,9 +144,10 @@ async def add_face_attribute( chunk's ``F_local`` faces appear in the same order as the decoded ``links/``. - Note: face attributes for **cross-chunk** faces require the 0.3 - ``cross_chunk_faces`` capability and are tracked in a separate - path; that path is wired up in step 8. + Note: cross-chunk faces are stored in + ``cross_chunk_links//`` with ``link_width=3`` (0.6.0+); + per-face attributes for those records use the parallel + ``cross_chunk_link_attributes///`` array. """ await self._write_per_face_attribute( name=name, values=values, dtype=dtype, @@ -482,14 +482,11 @@ async def _rmw_chunk(cc: ChunkCoords, indices) -> None: } def _current_num_objects(self) -> int: - """Inspect existing object_index + pending sidecars for total count.""" - from zarr_vectors.core.arrays import read_all_object_manifests + """Inspect existing object_index for total count.""" try: manifests = read_all_object_manifests(self._group) except Exception: return 0 - # Total count counts already-pending entries too so successive - # writes don't collide. existing_pending = self._pending_manifests if existing_pending: return max( @@ -501,31 +498,24 @@ def _current_num_objects(self) -> int: # ---------------- lifecycle ----------------------------------------- async def commit(self) -> dict: - """Flush pending appends to a new object_index sidecar batch. - - - Writes a pending sidecar at ``object_index/pending//`` - if there are staged manifests. - - Stamps the ``CAP_OBJECT_INDEX_PENDING`` capability on the root - metadata so readers know to fold the sidecar. - - Updates the level's recorded ``vertex_count`` to include the - appended vertices. + """Flush pending appends into the main ``object_index/`` array. + + Reads the existing main index (if any), merges the staged + manifests with last-write-wins on duplicate OIDs, and rewrites + ``object_index/``. Transactional backends (icechunk) make + this cheap via copy-on-write; plain LocalStore rewrites the + whole index on every commit. """ out: dict[str, int] = {"committed": True} if not self._pending_manifests: self._committed = True - return {**out, "batches_written": 0, "objects_committed": 0} + return {**out, "objects_committed": 0} sid_ndim = self._pending_sid_ndim or self._level._root_meta.sid_ndim - batch_id = await asyncio.to_thread( - write_object_index_pending, - self._group, self._pending_manifests, sid_ndim, - ) + await asyncio.to_thread(self._merge_and_write_object_index, sid_ndim) - # Stamp the pending capability on root metadata (idempotent). - await asyncio.to_thread(self._stamp_capability, CAP_OBJECT_INDEX_PENDING) - - # Update level vertex_count from the on-disk per-chunk sidecars. + # Update level vertex_count from the on-disk vertices blobs. await asyncio.to_thread(self._bump_level_vertex_count) committed = len(self._pending_manifests) @@ -534,57 +524,41 @@ async def commit(self) -> dict: self._committed = True return { **out, - "batches_written": 1 if batch_id >= 0 else 0, "objects_committed": committed, - "batch_id": batch_id, } async def compact(self) -> dict: - """Fold every pending object_index sidecar into the main index. - - Wraps :func:`zarr_vectors.core.arrays.compact_object_index`. - Also clears the ``CAP_OBJECT_INDEX_PENDING`` capability on root - metadata once the pending tree is empty. - """ + """Compatibility shim: pending-sidecar staging was removed in + 0.6.0. Calls :meth:`commit` (which now directly rewrites the + main index) and reports the count for callers that used to + rely on the explicit compaction step.""" if self._pending_manifests: await self.commit() - result = await asyncio.to_thread(compact_object_index, self._group) - await asyncio.to_thread(self._clear_capability, CAP_OBJECT_INDEX_PENDING) - return {"compacted": True, **result} + manifests = await asyncio.to_thread( + read_all_object_manifests, self._group, + ) + return {"compacted": True, "num_objects": len(manifests)} # ---------------- root-metadata mutators ---------------------------- - def _stamp_capability(self, cap: str) -> None: - # The level group is a sub-group; capabilities live on root. - from zarr_vectors.core.group import Group - root_handle = Group._from_backend(self._group._backend, "") - root_attrs = root_handle.attrs.to_dict() - zv = root_attrs.get("zarr_vectors", {}) - caps = list(zv.get("format_capabilities", [])) - if cap not in caps: - caps.append(cap) - zv["format_capabilities"] = caps - root_attrs["zarr_vectors"] = zv - root_handle.attrs.update({"zarr_vectors": zv}) - - def _clear_capability(self, cap: str) -> None: - from zarr_vectors.core.group import Group - root_handle = Group._from_backend(self._group._backend, "") - root_attrs = root_handle.attrs.to_dict() - zv = root_attrs.get("zarr_vectors", {}) - caps = list(zv.get("format_capabilities", [])) - if cap in caps: - caps.remove(cap) - zv["format_capabilities"] = caps - root_handle.attrs.update({"zarr_vectors": zv}) + def _merge_and_write_object_index(self, sid_ndim: int) -> None: + """Merge ``self._pending_manifests`` into the main ``object_index/``. - def _bump_level_vertex_count(self) -> None: - """Recompute the level's vertex_count from the actual on-disk data. - - ``append_vertices`` doesn't track per-call totals, so we - recount from the per-chunk ``vertex_counts`` sidecars (which - ``write_chunk_vertices`` always emits). + Reads the current index (if any), applies last-write-wins + on staged OIDs, and rewrites the index in one call. """ + try: + existing = read_all_object_manifests(self._group) + except Exception: + existing = [] + merged: dict[int, ObjectManifest] = { + oid: m for oid, m in enumerate(existing) + } + merged.update(self._pending_manifests) + write_object_index(self._group, merged, sid_ndim=sid_ndim) + + def _bump_level_vertex_count(self) -> None: + """Recompute the level's vertex_count from on-disk data.""" offsets, _keys, total = chunk_local_to_global_offsets(self._group) attrs = self._group.attrs.to_dict() lv = attrs.get("zarr_vectors_level", {}) @@ -674,7 +648,10 @@ def _write_custom_subpath( """Write attribute bytes to ``//``. Mirrors :func:`write_chunk_attributes` but with a configurable - top-level subpath (e.g. ``"face_attributes"``). + top-level subpath (e.g. ``"face_attributes"``). Per-group byte + offsets are derived at read time from the parallel + ``vertex_group_offsets`` table; no ``_offsets`` sibling is + written. """ from zarr_vectors.core.arrays import _chunk_key from zarr_vectors.encoding.ragged import encode_vertex_groups @@ -682,6 +659,5 @@ def _write_custom_subpath( dtype = np.dtype(dtype) key = _chunk_key(chunk_coords) full_name = f"{subpath}/{name}" - raw_bytes, offsets = encode_vertex_groups(attr_groups, dtype) + raw_bytes, _ = encode_vertex_groups(attr_groups, dtype) level_group.write_bytes(full_name, key, raw_bytes) - level_group.write_bytes(full_name, key + "_offsets", offsets.tobytes()) diff --git a/zarr_vectors/multiresolution/coarsen.py b/zarr_vectors/multiresolution/coarsen.py index 65297c2..064d8ee 100644 --- a/zarr_vectors/multiresolution/coarsen.py +++ b/zarr_vectors/multiresolution/coarsen.py @@ -38,19 +38,18 @@ from zarr_vectors.core.arrays import ( create_cross_chunk_links_array, create_links_array, - create_metanode_children_array, create_object_attributes_array, create_object_index_array, create_vertices_array, list_chunk_keys, read_all_object_manifests, read_chunk_vertices, + read_cross_chunk_links, read_object_attributes, read_vertex_group, write_chunk_links, write_chunk_vertices, write_cross_chunk_links, - write_metanode_children, write_object_attributes, write_object_index, ) @@ -280,13 +279,6 @@ def _cross_object_metanode_coarsen( dtype=np.float32, ) - # Write metanode_children - try: - create_metanode_children_array(level_group) - write_metanode_children(level_group, children) - except Exception: - pass - return { "vertex_count": n_objects, "source_count": n_source, @@ -621,45 +613,31 @@ def _reconstruct_chunk_assignments( def _reconstruct_parent_from_metanode_children( coarse_level_group, n_fine: int, ) -> npt.NDArray[np.int64] | None: - """Build a fine→coarse ``parent`` array from the ``metanode_children`` sidecar. - - For each metanode ``m`` at the coarse level, the sidecar records - the list of source-level ``(chunk_coords, vertex_index)`` refs - that became part of ``m``. We invert that to a per-fine-vertex - parent array. - - Returns ``None`` when the sidecar is missing. Fine vertices not - referenced by any metanode (e.g. dropped by sparsification) are - marked with ``-1``. + """Build a fine→coarse ``parent`` array from + ``cross_chunk_links//`` on the coarse level. + + Each record in that array is a 2-endpoint link + ``((coarse_chunk, coarse_vi), (fine_chunk, fine_vi))`` recording + that fine vertex ``fine_vi`` (at chunk ``fine_chunk``) was + aggregated into coarse metanode ``coarse_vi``. We invert the + records into a flat ``parent`` array of length ``n_fine``. + + Returns ``None`` when no such array exists or when the fine→global + index mapping cannot be reconstructed. This is currently a + best-effort hook: pyramid coarsening writes provenance inline + rather than going through this post-hoc reconstruction path. """ - from zarr_vectors.core.arrays import read_metanode_children try: - children = read_metanode_children(coarse_level_group) + records = read_cross_chunk_links(coarse_level_group, delta=-1) except Exception: return None - parent = np.full(n_fine, -1, dtype=np.int64) - if isinstance(children, dict): - items = children.items() - else: - items = enumerate(children) - # The sidecar's "vertex index" is the per-chunk vg_idx (see - # write_metanode_children); for pyramids written by the legacy - # cross-object path each metanode's children are *flat* source - # vertex indices, not (chunk, vg_idx) tuples. Try both. - for m_id, refs in items: - for ref in refs: - if isinstance(ref, tuple) and len(ref) == 2 and isinstance(ref[0], tuple): - # (chunk_coords, local_idx) form — reader returns this - # shape for object-index-style sidecars. We don't have - # the source chunk_assignments here to resolve it back - # to a global index, so this branch is skipped: callers - # that need cross-level edges in per-object mode must - # call the in-line helper instead of post-hoc finalize. - continue - fi = int(ref) - if 0 <= fi < n_fine: - parent[fi] = int(m_id) - return parent if (parent != -1).any() else None + if not records: + return None + # We do not have a fine-level chunk_offsets map here; the post-hoc + # finalize path is unable to translate (fine_chunk, fine_vi) into + # a flat fine index. Return None to signal "no usable provenance" + # so the caller skips cross-level emission for this level pair. + return None def _finalize_cross_level_for_store( @@ -672,7 +650,7 @@ def _finalize_cross_level_for_store( Driven post-hoc from on-disk state: enumerates every adjacent (fine, coarse) level pair, reconstructs the fine→parent map from - the coarse level's ``metanode_children`` sidecar, and writes + the coarse level's ``cross_chunk_links//`` array, and writes ``±delta`` link arrays up to ``cross_level_depth``. ``cross_level_depth=-1`` means "walk all available level pairs". @@ -727,7 +705,7 @@ def _finalize_cross_level_for_store( ) else: # Compose: parent_at_step = parent_at_(step-1)_from_(coarse-1) - # → grandparent via that coarser level's metanode_children. + # → grandparent via that coarser level's cross_chunk_links/. inter_lg = get_resolution_level(root, coarse_level - 1) inter_n = per_level[coarse_level - 1][1] inter_parent = _reconstruct_parent_from_metanode_children( @@ -741,7 +719,7 @@ def _finalize_cross_level_for_store( composed[valid] = inter_parent[parent[valid]] parent = composed if parent is None: - # No metanode_children info — skip this and all larger + # No provenance info — skip this and all larger # deltas for this fine level. break @@ -1078,12 +1056,6 @@ def build_pyramid( dtype=np.float32, ) - try: - create_metanode_children_array(level_group) - write_metanode_children(level_group, children) - except Exception: - pass - levels_created += 1 current_positions = meta_positions diff --git a/zarr_vectors/rechunk/engine.py b/zarr_vectors/rechunk/engine.py index 3f6f1fe..a70fa98 100644 --- a/zarr_vectors/rechunk/engine.py +++ b/zarr_vectors/rechunk/engine.py @@ -26,7 +26,7 @@ write_chunk_vertices, write_object_index, ) -from zarr_vectors.core.metadata import LevelMetadata, RootMetadata +from zarr_vectors.core.metadata import LevelMetadata from zarr_vectors.core.store import ( FsGroup, create_resolution_level, @@ -145,20 +145,18 @@ def rechunk( ] rechunk_dims = [spec.dimension_name, *spatial_dim_names] - out_meta = RootMetadata( - spatial_index_dims=src_meta.spatial_index_dims, + out_root = create_store( + str(output_path), + axes=src_meta.spatial_index_dims, chunk_shape=chunk_shape, bounds=src_meta.bounds, geometry_types=src_meta.geometry_types, - zv_version=src_meta.zv_version, links_convention=src_meta.links_convention, object_index_convention=src_meta.object_index_convention, cross_chunk_strategy=src_meta.cross_chunk_strategy, base_bin_shape=src_meta.base_bin_shape, ) - out_root = create_store(str(output_path), out_meta) - # Compute the bin → original-value list for attribute-based rechunking. # Only meaningful when ``by="attribute:..."`` and we have the source # values; non-attribute rechunks leave chunk_attribute_* unset. diff --git a/zarr_vectors/spatial/boundary.py b/zarr_vectors/spatial/boundary.py index 8a26213..709ab75 100644 --- a/zarr_vectors/spatial/boundary.py +++ b/zarr_vectors/spatial/boundary.py @@ -381,11 +381,8 @@ def chunk_local_to_global_offsets( algorithms that need to map ``(chunk_key, local_idx)`` to a global vertex ID without rebuilding the mapping themselves. - Backed by the ``vertex_counts/`` sidecar when it exists - (capability ``"vertex_count_cache"``), which makes the lookup O(K) - chunks of single-int8-byte reads rather than O(N) total vertices - of bytes. Falls back to summing decoded vertex_group_offsets on - legacy 0.2 stores — same answer, slower. + Per-chunk vertex counts are derived from the size of each + ``vertices/`` blob divided by ``ndim * dtype.itemsize``. Args: level_group: An open :class:`FsGroup` for one resolution level. @@ -400,45 +397,30 @@ def chunk_local_to_global_offsets( """ # Imported lazily to avoid circular import with core.arrays which # depends on this module's other helpers. - from zarr_vectors.core.arrays import ( - list_chunk_keys, - read_chunk_vertex_count, - _read_vertex_offsets, - ) + from zarr_vectors.core.arrays import list_chunk_keys chunk_keys = list_chunk_keys(level_group) offsets: dict[ChunkCoords, int] = {} running = 0 + + try: + vmeta = level_group.read_array_meta("vertices") + dtype_str = vmeta.get("dtype", "float32") + itemsize = np.dtype(dtype_str).itemsize + except Exception: + itemsize = 4 # float32 default + ndim_meta = 3 # ndim is not stored; default to 3 + row_size = ndim_meta * itemsize + for cc in chunk_keys: - count = read_chunk_vertex_count(level_group, cc) - if count is None: - # Legacy fallback: sum the per-vg vertex byte spans / itemsize. - # We don't know the dtype here without reading array metadata, - # so instead we decode the paired offsets to count vertex groups - # and sum group lengths via byte-spans / row size. - try: - vmeta = level_group.read_array_meta("vertices") - ndim_meta = vmeta.get("ndim") - dtype_str = vmeta.get("dtype", "float32") - itemsize = np.dtype(dtype_str).itemsize - except Exception: - ndim_meta = None - itemsize = 4 # float32 default - try: - v_offsets, _ = _read_vertex_offsets(level_group, cc) - except Exception: - v_offsets = np.empty(0, dtype=np.int64) - if v_offsets.size <= 1: - count = 0 - else: - total_bytes = int(v_offsets[-1] - v_offsets[0]) - # row size = ndim * itemsize. If ndim isn't recorded, - # infer from the level group's stored bytes / itemsize - # divided by something — best effort. - if ndim_meta is None: - ndim_meta = 3 - row = int(ndim_meta) * itemsize - count = total_bytes // row if row else 0 + # Derive total vertex count from the vertices/ blob size. + from zarr_vectors.core.arrays import _chunk_key # local: tight loop + from zarr_vectors.constants import VERTICES + try: + raw = level_group.read_bytes(VERTICES, _chunk_key(cc)) + count = len(raw) // row_size if row_size else 0 + except Exception: + count = 0 offsets[cc] = running running += int(count) return offsets, chunk_keys, running diff --git a/zarr_vectors/types/meshes.py b/zarr_vectors/types/meshes.py index 836ee3c..266a1f9 100644 --- a/zarr_vectors/types/meshes.py +++ b/zarr_vectors/types/meshes.py @@ -37,13 +37,11 @@ resolve_chunk_keys, read_chunk_links, read_chunk_vertices, - read_cross_chunk_faces, read_cross_chunk_links, read_object_vertices, write_chunk_attributes, write_chunk_links, write_chunk_vertices, - write_cross_chunk_faces, write_cross_chunk_links, write_object_attributes, write_object_index, @@ -86,17 +84,6 @@ ) -def _stamp_root_capability(root_group, cap: str) -> None: - """Add ``cap`` to root metadata's ``format_capabilities`` (idempotent).""" - attrs = root_group.attrs.to_dict() - zv = attrs.get("zarr_vectors", {}) - caps = list(zv.get("format_capabilities", [])) - if cap not in caps: - caps.append(cap) - zv["format_capabilities"] = caps - root_group.attrs.update({"zarr_vectors": zv}) - - def write_mesh( store_path: str, vertices: npt.NDArray[np.floating], @@ -306,32 +293,27 @@ def write_mesh( level_group, chunk_coords, [intra_faces[chunk_coords]], delta=0, ) - # Write cross-chunk faces - # Convert cross-face refs to cross_chunk_links format - # Each cross face is a list of (chunk, local_idx) tuples - cross_links: list[Any] = [] - for face_ref in cross_faces: - # Store as pairs: each consecutive pair of face vertices - for i in range(len(face_ref) - 1): - cross_links.append((face_ref[i], face_ref[i + 1])) - # Close the face: last vertex to first - if len(face_ref) >= 3: - cross_links.append((face_ref[-1], face_ref[0])) - + # Write cross-chunk faces as variable-width records under + # ``cross_chunk_links//``. Each record is a list of L + # ``(chunk_coords, local_vertex_idx)`` endpoints where L is the + # face arity (3 for triangles). Faces of different arity are + # rejected; meshes are uniform-arity by construction. idx_ndim = ndim + 1 if vertex_attr_bins is not None else ndim - if cross_links: + if cross_faces: + face_arities = {len(f) for f in cross_faces} + if len(face_arities) != 1: + raise ArrayError( + f"cross-chunk faces have inconsistent arities {face_arities}; " + "meshes must be uniform-arity" + ) write_cross_chunk_links( - level_group, cross_links, sid_ndim=idx_ndim, delta=0, + level_group, + [list(face) for face in cross_faces], + sid_ndim=idx_ndim, + delta=0, + link_width=face_arities.pop(), ) - # Tier C: persist cross-chunk face identity alongside the edge-pair - # fallback. Old readers that ignore the new array still see - # connectivity via the existing cross_chunk_links; new readers can - # reconstruct boundary faces exactly. - if cross_faces: - write_cross_chunk_faces(level_group, cross_faces, sid_ndim=idx_ndim) - _stamp_root_capability(root, "cross_chunk_faces") - # Write object index write_object_index(level_group, object_manifests, sid_ndim=idx_ndim) @@ -471,13 +453,14 @@ def read_mesh( except ArrayError: pass - # Tier C: emit cross-chunk faces using preserved identity records. - # Map each (chunk, local_idx) record into the global vertex index - # via ``chunk_offsets`` (built from the chunks we just read). When - # the array is absent (0.2 stores or no boundary faces), reads are - # untouched. - cross_face_records = read_cross_chunk_faces(level_group) + # Cross-chunk faces are stored as variable-width records under + # ``cross_chunk_links//`` (link_width = face arity). Map + # each (chunk, local_idx) endpoint into the global vertex index + # via ``chunk_offsets`` built above. + cross_face_records = read_cross_chunk_links(level_group, delta=0) for face in cross_face_records: + if len(face) != link_width: + continue # not a face record (e.g. edge-arity, ignore) vertex_ids: list[int] = [] for cc, local_idx in face: if cc not in chunk_offsets: @@ -552,15 +535,14 @@ def _write_draco_chunk( # Store as raw bytes in the vertices chunk from zarr_vectors.core.arrays import _chunk_key - from zarr_vectors.encoding.ragged import encode_paired_offsets + from zarr_vectors.encoding.ragged import encode_vertex_offsets key = _chunk_key(chunk_coords) level_group.write_bytes("vertices", key, blob) # Single vertex group spanning whole chunk v_off = np.array([0], dtype=np.int64) - l_off = np.array([-1], dtype=np.int64) level_group.write_bytes( "vertex_group_offsets", key, - encode_paired_offsets(v_off, l_off), + encode_vertex_offsets(v_off), ) diff --git a/zarr_vectors/types/parametric.py b/zarr_vectors/types/parametric.py index 1fdc7aa..ad9b0c2 100644 --- a/zarr_vectors/types/parametric.py +++ b/zarr_vectors/types/parametric.py @@ -100,21 +100,20 @@ def write_parametric_objects( # Open or create store if create_new_store: - from zarr_vectors.core.metadata import RootMetadata - kw = store_kwargs or {} - if "spatial_index_dims" not in kw: - kw["spatial_index_dims"] = [ - {"name": "x", "type": "space"}, - {"name": "y", "type": "space"}, - {"name": "z", "type": "space"}, - ] - if "chunk_shape" not in kw: - kw["chunk_shape"] = (1000.0, 1000.0, 1000.0) - if "bounds" not in kw: - kw["bounds"] = ([0, 0, 0], [1000, 1000, 1000]) - if "geometry_types" not in kw: - kw["geometry_types"] = ["point_cloud"] - root = create_store(store_path, RootMetadata(**kw), backend=backend) + kw = dict(store_kwargs or {}) + # Allow callers that still pass ``spatial_index_dims`` (the + # in-memory RootMetadata field name) — translate to ``axes``. + if "spatial_index_dims" in kw and "axes" not in kw: + kw["axes"] = kw.pop("spatial_index_dims") + kw.setdefault("axes", [ + {"name": "x", "type": "space"}, + {"name": "y", "type": "space"}, + {"name": "z", "type": "space"}, + ]) + kw.setdefault("chunk_shape", (1000.0, 1000.0, 1000.0)) + kw.setdefault("bounds", ([0, 0, 0], [1000, 1000, 1000])) + kw.setdefault("geometry_types", ["point_cloud"]) + root = create_store(store_path, backend=backend, **kw) else: root = open_store(store_path, mode="r+", backend=backend) From 1c3d479163a7a3fa8ab38f86ad19159c3bba4cc1 Mon Sep 17 00:00:00 2001 From: Andrew-Keenlyside Date: Thu, 14 May 2026 11:59:39 -0700 Subject: [PATCH 2/4] fix to multires link formation --- tests/integration/test_binning_sparsity.py | 22 +- .../integration/test_lazy_sharding_rechunk.py | 4 +- tests/test_core.py | 2 +- tests/test_multiscale_links.py | 11 - tests/test_per_object_pyramid.py | 2 - zarr_vectors/constants.py | 19 +- zarr_vectors/multiresolution/coarsen.py | 857 +++++++----------- zarr_vectors/multiresolution/layers.py | 304 +------ 8 files changed, 332 insertions(+), 889 deletions(-) diff --git a/tests/integration/test_binning_sparsity.py b/tests/integration/test_binning_sparsity.py index 33910d1..905f460 100644 --- a/tests/integration/test_binning_sparsity.py +++ b/tests/integration/test_binning_sparsity.py @@ -81,12 +81,11 @@ def test_streamline_sparsity(self, tmp_path: Path) -> None: bin_shape=(50., 50., 50.), ) - summary = build_pyramid(store, level_configs=[ - {"bin_ratio": (2, 2, 2), "object_sparsity": 0.5}, - ]) + summary = build_pyramid(store, factors=[(2.0, 2.0)]) assert summary["levels_created"] == 1 - assert summary["level_specs"][0]["object_sparsity"] == 0.5 - assert summary["level_specs"][0]["expected_volume_reduction"] == 16.0 + # sparsity_factor=2.0 → keep_frac=0.5 ≈ object_sparsity=0.5. + # Approximate check: at least one object was dropped. + assert summary["level_specs"][0]["objects_kept"] < summary["level_specs"][0]["source_objects"] assert validate(store, level=5).ok @@ -111,7 +110,7 @@ def test_mesh_coarsening(self, tmp_path: Path) -> None: bin_shape=(50., 50., 50.), ) - summary = coarsen_level(store, 0, 1, (2, 2, 2)) + summary = coarsen_level(store, 0, 1, coarsen_factor=2.0) assert summary["vertex_count"] > 0 assert summary["vertex_count"] < n_verts @@ -145,8 +144,8 @@ def test_multiple_manual_ratios(self, tmp_path: Path) -> None: ) # Add levels at ratios (2,2,2) and (6,6,6) — 6 divides 12 per axis - coarsen_level(store, 0, 1, (2, 2, 2)) - coarsen_level(store, 0, 2, (6, 6, 6)) + coarsen_level(store, 0, 1, coarsen_factor=2.0) + coarsen_level(store, 0, 2, coarsen_factor=6.0) ratios = list_available_ratios(open_store(store)) assert (1, 1, 1) in ratios @@ -184,7 +183,7 @@ def test_pyramid_backward_compat(self, tmp_path: Path) -> None: positions = rng.uniform(0, 1000, size=(10000, 3)).astype(np.float32) write_points(store, positions, chunk_shape=(100., 100., 100.)) - summary = build_pyramid(store) + summary = build_pyramid(store, factors=[(2.0, 1.0)]) assert summary["levels_created"] >= 1 assert validate(store, level=5).ok @@ -225,10 +224,7 @@ def test_multiscale_roundtrip(self, tmp_path: Path) -> None: chunk_shape=(200., 200., 200.), bin_shape=(50., 50., 50.), ) - build_pyramid(store, level_configs=[ - {"bin_ratio": (2, 2, 2), "object_sparsity": 1.0}, - {"bin_ratio": (4, 4, 4), "object_sparsity": 1.0}, - ]) + build_pyramid(store, factors=[(2.0, 1.0), (4.0, 1.0)]) root = open_store(store, mode="r+") ms = write_multiscale_metadata(root) diff --git a/tests/integration/test_lazy_sharding_rechunk.py b/tests/integration/test_lazy_sharding_rechunk.py index 694e574..c59ea35 100644 --- a/tests/integration/test_lazy_sharding_rechunk.py +++ b/tests/integration/test_lazy_sharding_rechunk.py @@ -148,7 +148,7 @@ def test_pyramid_then_shard(self, tmp_path: Path) -> None: rng.uniform(0, 1000, size=(10000, 3)).astype(np.float32), chunk_shape=(100., 100., 100.), ) - build_pyramid(store) + build_pyramid(store, factors=[(2.0, 1.0), (2.0, 1.0)]) levels_before = list_resolution_levels(open_store(store)) # Shard @@ -355,5 +355,5 @@ def test_all_types(self, tmp_path: Path) -> None: s = str(tmp_path / "pyr.zv") write_points(s, rng.uniform(0, 1000, size=(10000, 3)).astype(np.float32), chunk_shape=(100., 100., 100.)) - build_pyramid(s) + build_pyramid(s, factors=[(2.0, 1.0)]) assert validate(s, level=5).ok diff --git a/tests/test_core.py b/tests/test_core.py index 8a23fc2..34bce7b 100644 --- a/tests/test_core.py +++ b/tests/test_core.py @@ -109,7 +109,7 @@ def _make_level_meta(level: int = 0, **overrides) -> LevelMetadata: ) if level > 0: defaults["bin_shape"] = (200.0, 200.0, 200.0) - defaults["coarsening_method"] = "grid_metanode" + defaults["coarsening_method"] = "per_object" defaults["parent_level"] = level - 1 defaults.update(overrides) return LevelMetadata(**defaults) diff --git a/tests/test_multiscale_links.py b/tests/test_multiscale_links.py index 596eb39..7764cb1 100644 --- a/tests/test_multiscale_links.py +++ b/tests/test_multiscale_links.py @@ -332,17 +332,6 @@ def test_build_pyramid_depth_zero_emits_no_cross_level(tmp_path: Path) -> None: assert _delta_dirs(root, lvl, CROSS_CHUNK_LINKS) <= {"0"} -@pytest.mark.xfail( - reason=( - "Cross-level link emission (+delta/-delta arrays) is broken end-to-end: " - "_per_object_coarsen (the default coarsen path) writes no provenance " - "records, and _finalize_cross_level_for_store has no usable " - "fine→parent reconstruction without them. Tracking the design gap " - "(coarsening would need to emit cross_chunk_links/ records " - "in-line) in a separate issue." - ), - strict=False, -) def test_build_pyramid_explicit_depth_one(tmp_path: Path) -> None: store_path = _seed_simple_graph(tmp_path) build_pyramid( diff --git a/tests/test_per_object_pyramid.py b/tests/test_per_object_pyramid.py index 6d3f044..5fd206d 100644 --- a/tests/test_per_object_pyramid.py +++ b/tests/test_per_object_pyramid.py @@ -118,7 +118,6 @@ def test_monotone_oid_drop_across_levels(tmp_path): build_pyramid( str(store), factors=[(1.5, 2.0), (1.5, 2.0)], - method=COARSEN_PER_OBJECT, sparsity_seed=42, ) @@ -255,7 +254,6 @@ def test_factors_via_build_pyramid(tmp_path): result = build_pyramid( str(store), factors=[(2.0, 2.0), (2.0, 2.0)], - method=COARSEN_PER_OBJECT, sparsity_seed=42, ) assert result["levels_created"] == 2 diff --git a/zarr_vectors/constants.py b/zarr_vectors/constants.py index 2c0653c..a33f931 100644 --- a/zarr_vectors/constants.py +++ b/zarr_vectors/constants.py @@ -25,8 +25,10 @@ - ``attributes//_offsets`` sibling blobs removed. Attribute groups align 1:1 with vertex groups; per-group byte offsets are computed at read time. -- ``metanode_children/`` removed. Pyramid drill-down uses - ``cross_chunk_links//`` records. +- ``metanode_children/`` removed. Pyramid drill-down uses the + ``links/<+1>/`` + ``cross_chunk_links/<+1>/`` arrays emitted inline + during coarsening (mirrored as ``-1`` on the coarse side under + ``cross_level_storage="explicit"``). - ``cross_chunk_faces/`` removed. Cross-chunk face identity uses ``cross_chunk_links//`` with ``link_width=3``. The ``cross_chunk_links`` array carries a ``link_width`` metadata @@ -254,7 +256,7 @@ DEFAULT_BIN_RATIO: tuple[int, ...] = (1, 1, 1) """Bin ratio at level 0 (no downsampling).""" -DEFAULT_COARSENING_METHOD: str = "grid_metanode" +DEFAULT_COARSENING_METHOD: str = "per_object" # Valid values for LevelMetadata.coarsening_method. Open-set: future # strategies (e.g. mesh edge-collapse decimation) may add tokens here. @@ -263,22 +265,11 @@ into bin centroids (metavertices). Metavertices may be shared between objects; OIDs are preserved across levels.""" -COARSEN_CROSS_OBJECT_METANODE: str = "cross_object_metanode" -"""Legacy aggregation that merges vertices across objects, producing a -fresh OID space at each level. No provenance back to the source -objects.""" - -COARSEN_GRID_METANODE: str = "grid_metanode" -"""Alias for the legacy cross-object metanode aggregation; kept for -historical level metadata read-back.""" - COARSEN_MANUAL: str = "manual" COARSEN_NONE: str = "none" VALID_COARSENING_METHODS: frozenset[str] = frozenset({ COARSEN_PER_OBJECT, - COARSEN_CROSS_OBJECT_METANODE, - COARSEN_GRID_METANODE, COARSEN_MANUAL, COARSEN_NONE, }) diff --git a/zarr_vectors/multiresolution/coarsen.py b/zarr_vectors/multiresolution/coarsen.py index 064d8ee..c77dc7a 100644 --- a/zarr_vectors/multiresolution/coarsen.py +++ b/zarr_vectors/multiresolution/coarsen.py @@ -1,14 +1,16 @@ """Multi-resolution pyramid construction orchestrator. -Supports two modes: +Two entry points (use one): -1. **Automatic**: ``build_pyramid(store)`` auto-plans levels using - target volume reduction and sparsity weight. -2. **Manual**: ``coarsen_level(store, source, target, bin_ratio, sparsity)`` - creates a single coarsened level with explicit control. +* ``build_pyramid(store, factors=[(cf_1, sf_1), ...])`` builds every + coarser level in sequence, optionally emitting cross-level link + arrays (``cross_level_storage="implicit"`` or ``"explicit"``). +* ``coarsen_level(store, source, target, coarsen_factor=..., sparsity_factor=...)`` + writes a single coarser level for callers that want manual control. -Both modes handle vertex coarsening (via metanodes) and object -sparsity (via object selection strategies). +Both use the per-object pyramid: each surviving object's vertices are +aggregated into bin centroids (metavertices) that may be shared +between objects, and per-object OIDs are preserved across levels. """ from __future__ import annotations @@ -23,15 +25,13 @@ CAP_MULTISCALE_LINKS, CAP_PRESERVED_OBJECT_IDS, CAP_SHARED_VERTEX_GROUPS, - COARSEN_CROSS_OBJECT_METANODE, - COARSEN_GRID_METANODE, COARSEN_PER_OBJECT, DEFAULT_CROSS_LEVEL_DEPTH, DEFAULT_CROSS_LEVEL_STORAGE, + LINKS, OBJECT_ATTRIBUTES, VERTICES, XLEVEL_EXPLICIT, - XLEVEL_IMPLICIT, XLEVEL_NONE, VALID_XLEVEL_STORAGE, ) @@ -43,46 +43,32 @@ create_vertices_array, list_chunk_keys, read_all_object_manifests, + read_chunk_links, read_chunk_vertices, read_cross_chunk_links, read_object_attributes, - read_vertex_group, write_chunk_links, write_chunk_vertices, write_cross_chunk_links, write_object_attributes, write_object_index, ) -from zarr_vectors.core.metadata import ( - LevelMetadata, - compute_bin_shape, - validate_bin_shape_divides_chunk, -) +from zarr_vectors.core.metadata import LevelMetadata from zarr_vectors.core.store import ( - add_resolution_level, create_resolution_level, get_resolution_level, list_resolution_levels, open_store, read_root_metadata, ) -from zarr_vectors.multiresolution.layers import ( - LevelReductionSpec, - compute_level_specs, - plan_pyramid_with_sparsity, -) -from zarr_vectors.multiresolution.metanodes import generate_metanodes -from zarr_vectors.multiresolution.object_selection import ( - apply_sparsity, - compute_polyline_lengths, - compute_representative_points, -) +from zarr_vectors.exceptions import ArrayError +from zarr_vectors.multiresolution.object_selection import apply_sparsity from zarr_vectors.spatial.boundary import ( build_vertex_chunk_mapping, partition_cross_level_edges, ) -from zarr_vectors.spatial.chunking import assign_bins, assign_chunks -from zarr_vectors.typing import ChunkCoords, CrossChunkLink +from zarr_vectors.spatial.chunking import assign_chunks +from zarr_vectors.typing import ChunkCoords # =================================================================== @@ -93,201 +79,51 @@ def coarsen_level( store_path: str | Path, source_level: int, target_level: int, - bin_ratio: tuple[int, ...] | None = None, *, - coarsen_factor: float | None = None, - sparsity_factor: float | None = None, - method: str = COARSEN_PER_OBJECT, - object_sparsity: float = 1.0, + coarsen_factor: float = 1.0, + sparsity_factor: float = 1.0, sparsity_strategy: str = "random", sparsity_seed: int | None = None, - agg_mode: str = "mean", + cross_level_storage: str = XLEVEL_NONE, ) -> dict[str, Any]: """Coarsen a single level and write it to the store. - Two interfaces are supported (use one): - - * **Factor-based** (preferred): pass ``coarsen_factor`` and/or - ``sparsity_factor``. Defaults to ``method="per_object"`` — - per-object vertex aggregation with stable OIDs across levels. - A metavertex's source vertices may come from multiple source - objects; the resulting metavertex appears in each of those - objects' manifests at the coarser level. - * **Legacy `bin_ratio`**: passing ``bin_ratio`` (with optional - ``object_sparsity``) routes through the original cross-object - ``grid_metanode`` path which produces a fresh OID space at the - coarser level. Kept for back-compat. + Per-object vertex aggregation with stable OIDs across levels. A + metavertex's source vertices may come from multiple source objects; + the resulting metavertex appears in each of those objects' manifests + at the coarser level. Args: store_path: Path to the zarr vectors store. source_level: Level to read from. target_level: Level to write to (must not exist). - bin_ratio: Legacy interface — per-axis fold change. Implies - ``method="cross_object_metanode"`` unless ``method`` is - given explicitly. coarsen_factor: Per-object vertex aggregation factor (≥ 1). ``1.0`` is the identity (no aggregation). sparsity_factor: Object-dropping factor (≥ 1). Survivors keep their OIDs; dropped objects leave empty manifest slots. ``1.0`` is the identity (no drop). - method: ``"per_object"`` (default for the factor interface) or - ``"cross_object_metanode"`` / ``"grid_metanode"`` for the - legacy aggregation. - object_sparsity: Legacy keep-fraction. Mapped to - ``sparsity_factor = 1.0 / object_sparsity`` when present. sparsity_strategy: Object selection strategy. sparsity_seed: Random seed. - agg_mode: Metanode attribute aggregation. + cross_level_storage: When called via ``build_pyramid`` this is + threaded through to enable inline ``±1`` cross-level link + emission. Standalone callers should leave it at the + ``"none"`` default. Returns: Summary dict. Always includes ``method``, ``preserves_object_ids``, ``vertex_count``. """ - # Reconcile the two interfaces. ``bin_ratio`` is the legacy entry - # and implies the cross-object metanode path unless the caller - # explicitly opted into per-object via ``method``. - legacy_used = bin_ratio is not None - factor_used = coarsen_factor is not None or sparsity_factor is not None - if legacy_used and not factor_used and method == COARSEN_PER_OBJECT: - method = COARSEN_CROSS_OBJECT_METANODE - if coarsen_factor is None: - coarsen_factor = 1.0 - if sparsity_factor is None: - # Map legacy object_sparsity → sparsity_factor. - sparsity_factor = ( - 1.0 / object_sparsity if 0.0 < object_sparsity < 1.0 else 1.0 - ) - - if method in (COARSEN_CROSS_OBJECT_METANODE, COARSEN_GRID_METANODE): - return _cross_object_metanode_coarsen( - store_path=store_path, - source_level=source_level, - target_level=target_level, - bin_ratio=bin_ratio, - coarsen_factor=coarsen_factor, - object_sparsity=(1.0 / sparsity_factor), - sparsity_strategy=sparsity_strategy, - sparsity_seed=sparsity_seed, - agg_mode=agg_mode, - ) - if method == COARSEN_PER_OBJECT: - return _per_object_coarsen( - store_path=store_path, - source_level=source_level, - target_level=target_level, - coarsen_factor=coarsen_factor, - sparsity_factor=sparsity_factor, - sparsity_strategy=sparsity_strategy, - sparsity_seed=sparsity_seed, - ) - raise ValueError(f"Unknown coarsen method: {method!r}") - - -def _cross_object_metanode_coarsen( - *, - store_path: str | Path, - source_level: int, - target_level: int, - bin_ratio: tuple[int, ...] | None, - coarsen_factor: float, - object_sparsity: float, - sparsity_strategy: str, - sparsity_seed: int | None, - agg_mode: str, -) -> dict[str, Any]: - """Legacy cross-object aggregation (produces fresh OIDs).""" - root = open_store(str(store_path), mode="r+") - meta = read_root_metadata(root) - ndim = meta.sid_ndim - chunk_shape = meta.chunk_shape - base_bin = meta.effective_bin_shape - - if bin_ratio is None: - # Derive isotropic ratio from coarsen_factor. - bin_ratio = tuple(max(1, int(round(coarsen_factor))) for _ in range(ndim)) - - # Compute target bin shape - bin_shape = compute_bin_shape(base_bin, bin_ratio) - validate_bin_shape_divides_chunk(chunk_shape, bin_shape) - - # Read source level vertices - source_group = get_resolution_level(root, source_level) - positions = _read_all_vertices(source_group, ndim) - - if len(positions) == 0: - return { - "vertex_count": 0, - "object_count": 0, - "reduction_ratio": 0, - "method": COARSEN_CROSS_OBJECT_METANODE, - "preserves_object_ids": False, - } - - n_source = len(positions) - - # Generate metanodes - meta_result = generate_metanodes(positions, bin_shape, agg_mode=agg_mode) - meta_positions = meta_result["metanode_positions"] - children = meta_result["children"] - n_metanodes = len(meta_positions) - - # Apply object sparsity (on metanodes) - n_objects = n_metanodes - if object_sparsity < 1.0 and n_metanodes > 1: - kept = apply_sparsity( - n_metanodes, object_sparsity, sparsity_strategy, - seed=sparsity_seed, - representative_points=meta_positions, - bin_shape=bin_shape, - ) - meta_positions = meta_positions[kept] - children = [children[i] for i in kept] - n_objects = len(meta_positions) - - if n_objects == 0: - return { - "vertex_count": 0, - "object_count": 0, - "reduction_ratio": 0, - "method": COARSEN_CROSS_OBJECT_METANODE, - "preserves_object_ids": False, - } - - # Create the level - level_group = add_resolution_level( - root, target_level, bin_ratio, - object_sparsity=object_sparsity, - coarsening_method=COARSEN_GRID_METANODE, - parent_level=source_level, + return _per_object_coarsen( + store_path=store_path, + source_level=source_level, + target_level=target_level, + coarsen_factor=coarsen_factor, + sparsity_factor=sparsity_factor, + sparsity_strategy=sparsity_strategy, + sparsity_seed=sparsity_seed, + cross_level_storage=cross_level_storage, ) - # Update vertex count in metadata - level_group.attrs.update({ - "zarr_vectors_level": { - **level_group.attrs.to_dict().get("zarr_vectors_level", {}), - "vertex_count": n_objects, - } - }) - - create_vertices_array(level_group, dtype="float32") - - # Assign to chunks and write - chunk_assignments = assign_chunks(meta_positions, chunk_shape) - for chunk_coords, global_indices in sorted(chunk_assignments.items()): - write_chunk_vertices( - level_group, chunk_coords, [meta_positions[global_indices]], - dtype=np.float32, - ) - - return { - "vertex_count": n_objects, - "source_count": n_source, - "reduction_ratio": n_source / max(n_objects, 1), - "object_sparsity": object_sparsity, - "method": COARSEN_CROSS_OBJECT_METANODE, - "preserves_object_ids": False, - } - def _per_object_coarsen( *, @@ -298,6 +134,7 @@ def _per_object_coarsen( sparsity_factor: float, sparsity_strategy: str, sparsity_seed: int | None, + cross_level_storage: str = XLEVEL_NONE, ) -> dict[str, Any]: """Per-object pyramid: aggregate within-bin source vertices into shared metavertices, preserving each surviving object's OID and @@ -317,7 +154,29 @@ def _per_object_coarsen( src_group = get_resolution_level(root, source_level) # --- Step 0: read source manifests + vertex positions ---------------- - src_manifests = read_all_object_manifests(src_group) + # Read source vertex positions, indexed by (chunk_coords, vg_idx). + src_vg_positions: dict[tuple[ChunkCoords, int], npt.NDArray] = {} + for cc in list_chunk_keys(src_group, VERTICES): + try: + vgs = read_chunk_vertices(src_group, cc, dtype=np.float32, ndim=ndim) + except ArrayError: + continue + for vg_idx, vg in enumerate(vgs): + src_vg_positions[(cc, vg_idx)] = vg + + src_has_objects = "object_index" in src_group + if src_has_objects: + src_manifests = read_all_object_manifests(src_group) + else: + # No object_index — treat the level as one implicit object whose + # manifest enumerates every vg in chunk-major order. + implicit: list[tuple[ChunkCoords, int]] = [] + for cc in list_chunk_keys(src_group, VERTICES): + vg_idx = 0 + while (cc, vg_idx) in src_vg_positions: + implicit.append((cc, vg_idx)) + vg_idx += 1 + src_manifests = [implicit] if implicit else [] n_src_objects = len(src_manifests) if n_src_objects == 0: return { @@ -328,16 +187,6 @@ def _per_object_coarsen( "preserves_object_ids": True, } - # Read source vertex positions, indexed by (chunk_coords, vg_idx). - src_vg_positions: dict[tuple[ChunkCoords, int], npt.NDArray] = {} - for cc in list_chunk_keys(src_group, VERTICES): - try: - vgs = read_chunk_vertices(src_group, cc, dtype=np.float32, ndim=ndim) - except Exception: - continue - for vg_idx, vg in enumerate(vgs): - src_vg_positions[(cc, vg_idx)] = vg - # --- Step 1: drop a fraction of source objects ---------------------- keep_oids: list[int] if sparsity_factor > 1.0 and n_src_objects > 1: @@ -433,22 +282,24 @@ def _per_object_coarsen( ) # --- Step 6: write per-chunk vertex groups -------------------------- + arrays_present = [VERTICES, "object_index"] if src_has_objects else [VERTICES] level_meta_initial = LevelMetadata( level=target_level, vertex_count=int(n_metavertices), - arrays_present=[VERTICES, "object_index"], + arrays_present=arrays_present, bin_shape=target_bin_shape, bin_ratio=tuple(max(1, int(round(coarsen_factor))) for _ in range(ndim)), object_sparsity=(1.0 / sparsity_factor), coarsening_method=COARSEN_PER_OBJECT, parent_level=source_level, - preserves_object_ids=True, - inherited_num_objects=n_src_objects, + preserves_object_ids=src_has_objects, + inherited_num_objects=n_src_objects if src_has_objects else 0, shared_vertex_groups=True, ) level_group = create_resolution_level(root, target_level, level_meta_initial) create_vertices_array(level_group, dtype="float32") - create_object_index_array(level_group) + if src_has_objects: + create_object_index_array(level_group) for cc, groups in sorted(per_chunk_groups.items()): write_chunk_vertices(level_group, cc, groups, dtype=np.float32) @@ -477,22 +328,23 @@ def _per_object_coarsen( new_manifests[oid] = manifest # --- Step 9: emit object_index (gap-fill for dropped OIDs) ---------- - write_object_index( - level_group, new_manifests, sid_ndim=ndim, - total_objects=n_src_objects, - ) + if src_has_objects: + write_object_index( + level_group, new_manifests, sid_ndim=ndim, + total_objects=n_src_objects, + ) # --- Step 10: per-object attributes with present_mask --------------- src_obj_attr_group_name = f"{OBJECT_ATTRIBUTES}" - try: + if src_obj_attr_group_name in src_group: src_obj_attr_group = src_group[src_obj_attr_group_name] attr_names = [n for n in src_obj_attr_group] - except Exception: + else: attr_names = [] for attr_name in attr_names: try: src_data = read_object_attributes(src_group, attr_name) - except Exception: + except ArrayError: continue # Dense (O, C) or (O,) padded to the inherited OID space, with # rows for survivors copied over. Layout matches the source's @@ -508,9 +360,24 @@ def _per_object_coarsen( write_object_attributes(level_group, attr_name, out_data, present_mask=mask) # --- Step 12: stamp root capability tokens -------------------------- - _stamp_root_capability(root, CAP_PRESERVED_OBJECT_IDS) + if src_has_objects: + _stamp_root_capability(root, CAP_PRESERVED_OBJECT_IDS) _stamp_root_capability(root, CAP_SHARED_VERTEX_GROUPS) + # --- Step 13: emit inline ±1 cross-level link arrays ---------------- + if cross_level_storage != XLEVEL_NONE and n_metavertices > 0: + _emit_inline_cross_level_links( + root, + src_group=src_group, + level_group=level_group, + source_level=source_level, + ndim=ndim, + bin_shape_arr=bin_shape_arr, + bin_keys=bin_keys, + coarse_chunk_assignments_mv=chunk_assignments, + storage=cross_level_storage, + ) + return { "vertex_count": int(n_metavertices), "object_count": len(keep_oids), @@ -522,6 +389,89 @@ def _per_object_coarsen( } +def _emit_inline_cross_level_links( + root, + *, + src_group, + level_group, + source_level: int, + ndim: int, + bin_shape_arr: npt.NDArray[np.float64], + bin_keys: npt.NDArray, + coarse_chunk_assignments_mv: dict[ChunkCoords, npt.NDArray[np.int64]], + storage: str, +) -> None: + """Emit ``±1`` link/cross_chunk_link arrays for one coarsen step. + + Re-walks the source level in chunk-major order, re-bins each + vertex against ``bin_shape_arr``, and looks up the matching + metavertex via the ``bin_key`` ↔ ``mv_idx`` map implicit in + ``np.unique(bin_keys, return_inverse=inverse)``. Translates + metavertex IDs to chunk-major-flat coarse indices via the + just-written coarse-level chunks, then dispatches to + :func:`_write_cross_level_edges`. + """ + # bin_key_bytes → mv_idx (bin-key-ordered, matches np.unique output). + unique_keys = np.unique(bin_keys) + bin_key_to_mv: dict[bytes, int] = { + bytes(k): i for i, k in enumerate(unique_keys) + } + + # mv_idx → chunk-major-flat coarse index. + coarse_chunk_assignments, n_coarse = _reconstruct_chunk_assignments( + level_group, ndim, + ) + mv_to_coarse_global: dict[int, int] = {} + for cc, mv_indices_for_chunk in sorted(coarse_chunk_assignments_mv.items()): + for local_vg, mv_idx in enumerate(mv_indices_for_chunk.tolist()): + mv_to_coarse_global[int(mv_idx)] = int( + coarse_chunk_assignments[cc][local_vg] + ) + + # Build fine→coarse parent[] by re-walking source in chunk-major order. + fine_chunk_assignments, n_fine = _reconstruct_chunk_assignments( + src_group, ndim, + ) + parent = np.full(n_fine, -1, dtype=np.int64) + cursor = 0 + key_dtype = np.dtype(( + np.void, int(bin_shape_arr.shape[0]) * np.dtype(np.int64).itemsize, + )) + for cc in list_chunk_keys(src_group, VERTICES): + try: + vgs = read_chunk_vertices( + src_group, cc, dtype=np.float32, ndim=ndim, + ) + except ArrayError: + continue + for vg in vgs: + n_local = int(vg.shape[0]) + if n_local == 0: + continue + local_bins = np.floor( + np.asarray(vg, dtype=np.float32) / bin_shape_arr, + ).astype(np.int64) + local_keys = np.ascontiguousarray(local_bins).view(key_dtype).ravel() + for j in range(n_local): + mv = bin_key_to_mv.get(bytes(local_keys[j])) + if mv is not None: + parent[cursor + j] = mv_to_coarse_global[int(mv)] + cursor += n_local + + _write_cross_level_edges( + root, + fine_level=source_level, + delta=1, + fine_chunk_assignments=fine_chunk_assignments, + coarse_chunk_assignments=coarse_chunk_assignments, + n_fine=n_fine, + n_coarse=n_coarse, + parent=parent, + sid_ndim=ndim, + storage=storage, + ) + + def _write_empty_preserve_level( root, source_level: int, @@ -600,7 +550,7 @@ def _reconstruct_chunk_assignments( for cc in chunk_keys: try: vgs = read_chunk_vertices(level_group, cc, dtype=np.float32, ndim=ndim) - except Exception: + except ArrayError: continue n = sum(int(vg.shape[0]) for vg in vgs) if n == 0: @@ -610,34 +560,54 @@ def _reconstruct_chunk_assignments( return assignments, cursor -def _reconstruct_parent_from_metanode_children( - coarse_level_group, n_fine: int, +def _decode_parent_from_plus_one( + fine_lg, + *, + fine_assn: dict[ChunkCoords, npt.NDArray[np.int64]], + coarse_assn: dict[ChunkCoords, npt.NDArray[np.int64]], + n_fine: int, ) -> npt.NDArray[np.int64] | None: - """Build a fine→coarse ``parent`` array from - ``cross_chunk_links//`` on the coarse level. - - Each record in that array is a 2-endpoint link - ``((coarse_chunk, coarse_vi), (fine_chunk, fine_vi))`` recording - that fine vertex ``fine_vi`` (at chunk ``fine_chunk``) was - aggregated into coarse metanode ``coarse_vi``. We invert the - records into a flat ``parent`` array of length ``n_fine``. - - Returns ``None`` when no such array exists or when the fine→global - index mapping cannot be reconstructed. This is currently a - best-effort hook: pyramid coarsening writes provenance inline - rather than going through this post-hoc reconstruction path. + """Decode a fine→coarse ``parent`` array from already-written ``+1`` arrays. + + Reads ``links/<+1>/`` (intra-chunk edges) and + ``cross_chunk_links/<+1>/`` (cross-chunk edges) at the fine level + and converts each ``(chunk, local_idx)`` pair to global flat indices + via the supplied chunk-assignment dicts. Returns ``None`` when + neither array exists. """ + parent = np.full(n_fine, -1, dtype=np.int64) + found_any = False + + # Aligned (intra-chunk) edges: read each chunk in links/+1/. + try: + chunk_keys = list_chunk_keys(fine_lg, f"{LINKS}/+1") + except (ArrayError, KeyError): + chunk_keys = [] + for cc in chunk_keys: + try: + link_groups = read_chunk_links(fine_lg, cc, delta=1) + except ArrayError: + continue + for rows in link_groups: + if rows is None or len(rows) == 0: + continue + local_src = rows[:, 0].astype(np.int64) + local_tgt = rows[:, 1].astype(np.int64) + fine_global = fine_assn[cc][local_src] + coarse_global = coarse_assn[cc][local_tgt] + parent[fine_global] = coarse_global + found_any = True + + # Cross-chunk edges. try: - records = read_cross_chunk_links(coarse_level_group, delta=-1) - except Exception: - return None - if not records: - return None - # We do not have a fine-level chunk_offsets map here; the post-hoc - # finalize path is unable to translate (fine_chunk, fine_vi) into - # a flat fine index. Return None to signal "no usable provenance" - # so the caller skips cross-level emission for this level pair. - return None + records = read_cross_chunk_links(fine_lg, delta=1) + except (ArrayError, KeyError): + records = [] + for (cc_s, vi_s), (cc_t, vi_t) in records: + parent[int(fine_assn[cc_s][vi_s])] = int(coarse_assn[cc_t][vi_t]) + found_any = True + + return parent if found_any else None def _finalize_cross_level_for_store( @@ -646,12 +616,14 @@ def _finalize_cross_level_for_store( cross_level_depth: int, cross_level_storage: str, ) -> None: - """Persist root cross-level metadata and emit cross-level link arrays. + """Persist root cross-level metadata and emit ``±N`` (N ≥ 2) link arrays. - Driven post-hoc from on-disk state: enumerates every adjacent - (fine, coarse) level pair, reconstructs the fine→parent map from - the coarse level's ``cross_chunk_links//`` array, and writes - ``±delta`` link arrays up to ``cross_level_depth``. + Adjacent ``±1`` arrays are emitted inline during coarsening (see + :func:`_emit_inline_cross_level_links`). This finalize pass walks + every adjacent (fine, coarse) level pair, decodes the on-disk + ``+1`` parent map back into a flat fine→coarse array, then composes + step-by-step to produce ``+N``/``-N`` link arrays for N ≥ 2 up to + ``cross_level_depth``. ``cross_level_depth=-1`` means "walk all available level pairs". """ @@ -681,46 +653,52 @@ def _finalize_cross_level_for_store( if cross_level_depth == -1 else int(cross_level_depth) ) - - for fine_idx, fine_level in enumerate(levels[:-1]): + if max_delta < 2: + return # +1/-1 was already emitted inline + + # Cache each adjacent (fine_level, fine_level+1) parent array. + adjacent_parent: dict[int, npt.NDArray[np.int64]] = {} + for fine_level in levels[:-1]: + coarse_level = fine_level + 1 + if coarse_level not in per_level: + continue fine_assn, n_fine = per_level[fine_level] + coarse_assn, _ = per_level[coarse_level] if n_fine == 0: continue - # parent_step[k] holds fine→level(fine+k+1) parent at each step. - parent = None - prev_n = n_fine - prev_assn = fine_assn - for step in range(1, max_delta + 1): + fine_lg = get_resolution_level(root, fine_level) + parent = _decode_parent_from_plus_one( + fine_lg, + fine_assn=fine_assn, + coarse_assn=coarse_assn, + n_fine=n_fine, + ) + if parent is not None: + adjacent_parent[fine_level] = parent + + # Compose deeper-delta parents and emit. + for fine_level in levels[:-1]: + if fine_level not in adjacent_parent: + continue + fine_assn, n_fine = per_level[fine_level] + parent = adjacent_parent[fine_level].copy() + for step in range(2, max_delta + 1): coarse_level = fine_level + step if coarse_level not in per_level: break - coarse_lg = get_resolution_level(root, coarse_level) + inter_level = coarse_level - 1 + if inter_level not in adjacent_parent: + break + inter_parent = adjacent_parent[inter_level] coarse_assn, n_coarse = per_level[coarse_level] if n_coarse == 0: break - if step == 1: - parent = _reconstruct_parent_from_metanode_children( - coarse_lg, n_fine=n_fine, - ) - else: - # Compose: parent_at_step = parent_at_(step-1)_from_(coarse-1) - # → grandparent via that coarser level's cross_chunk_links/. - inter_lg = get_resolution_level(root, coarse_level - 1) - inter_n = per_level[coarse_level - 1][1] - inter_parent = _reconstruct_parent_from_metanode_children( - coarse_lg, n_fine=inter_n, - ) - if inter_parent is None or parent is None: - parent = None - else: - composed = np.full(n_fine, -1, dtype=np.int64) - valid = parent >= 0 - composed[valid] = inter_parent[parent[valid]] - parent = composed - if parent is None: - # No provenance info — skip this and all larger - # deltas for this fine level. + composed = np.full(n_fine, -1, dtype=np.int64) + valid = parent >= 0 + composed[valid] = inter_parent[parent[valid]] + parent = composed + if not np.any(parent >= 0): break _write_cross_level_edges( @@ -735,7 +713,6 @@ def _finalize_cross_level_for_store( sid_ndim=ndim, storage=cross_level_storage, ) - prev_n, prev_assn = n_coarse, coarse_assn def _write_cross_level_edges( @@ -765,6 +742,13 @@ def _write_cross_level_edges( return coarse_level = fine_level + delta + # Drop orphaned fine vertices (parent < 0) before building edges. + valid_mask = parent >= 0 + if not np.any(valid_mask): + return + fine_global = np.flatnonzero(valid_mask).astype(np.int64) + parent_valid = parent[valid_mask].astype(np.int64) + # Build chunk-mapping tables for both levels. fine_chunk_list = sorted(fine_chunk_assignments.keys()) fine_vchunks, fine_vlocal, fine_chunk_list = build_vertex_chunk_mapping( @@ -776,10 +760,7 @@ def _write_cross_level_edges( ) # Trivial fine→parent edge list. - edges = np.stack( - [np.arange(n_fine, dtype=np.int64), parent.astype(np.int64)], - axis=1, - ) + edges = np.stack([fine_global, parent_valid], axis=1) aligned, cross = partition_cross_level_edges( edges, fine_vchunks, fine_vlocal, fine_chunk_list, @@ -801,10 +782,7 @@ def _write_cross_level_edges( # Re-partition from the coarse side so chunk-alignment is # evaluated against the coarse chunk grid (intra/cross split # may differ from the fine-side view when grids don't align). - rev_edges = np.stack( - [parent.astype(np.int64), np.arange(n_fine, dtype=np.int64)], - axis=1, - ) + rev_edges = np.stack([parent_valid, fine_global], axis=1) rev_aligned, rev_cross = partition_cross_level_edges( rev_edges, coarse_vchunks, coarse_vlocal, coarse_chunk_list, @@ -828,15 +806,7 @@ def _write_cross_level_edges( def build_pyramid( store_path: str | Path, *, - factors: list[tuple[float, float]] | None = None, - method: str = COARSEN_PER_OBJECT, - level_configs: list[dict] | None = None, - target_volume_reduction: float = 8.0, - sparsity_weight: float = 0.0, - reduction_factor: int = 8, - max_levels: int = 10, - min_vertices: int = 8, - agg_mode: str = "mean", + factors: list[tuple[float, float]], sparsity_strategy: str = "random", sparsity_seed: int | None = None, cross_level_depth: int = DEFAULT_CROSS_LEVEL_DEPTH, @@ -844,41 +814,23 @@ def build_pyramid( ) -> dict[str, Any]: """Build a multi-resolution pyramid for an existing store. - Preferred interface — pass ``factors=[(coarsen_2, sparsity_3), - ...]`` where ``factors[i]`` is applied to produce level ``i+1`` - from level ``i``. Either factor at ``1.0`` opts out of that axis. - Method defaults to ``"per_object"`` (per-object pyramid with stable - OIDs; metavertices may be shared between objects). Pass - ``method="cross_object_metanode"`` (or the legacy - ``"grid_metanode"``) to fall back to the original aggregation that - produces a fresh OID space per level. - - Legacy interface (kept for back-compat): - - 1. **Explicit**: provide ``level_configs`` — a list of dicts, each - with ``"bin_ratio"`` and optionally ``"object_sparsity"``. - 2. **Auto**: auto-plan using ``target_volume_reduction`` and - ``sparsity_weight``. - - When ``factors`` is None and ``level_configs`` is None and - ``sparsity_weight`` is 0.0 (default), behaviour matches the - original pyramid builder (backward compatible). + Pass ``factors=[(coarsen_2, sparsity_3), ...]`` where ``factors[i]`` + is applied to produce level ``i+1`` from level ``i``. Either factor + at ``1.0`` opts out of that axis. Uses the per-object pyramid: + each surviving object's vertices are aggregated into bin centroids + (metavertices); metavertices may be shared between objects and OIDs + are preserved across levels. Args: store_path: Path to the store with level 0. - level_configs: Explicit per-level configs. - target_volume_reduction: Per-level target for auto mode. - sparsity_weight: 0.0=all binning, 1.0=all sparsity. - reduction_factor: Legacy threshold for old auto mode. - max_levels: Maximum levels. - min_vertices: Stop below this. - agg_mode: Metanode aggregation. + factors: List of ``(coarsen_factor, sparsity_factor)`` tuples, + one per coarser level. sparsity_strategy: Object selection strategy. sparsity_seed: Random seed. cross_level_depth: Maximum absolute level delta for materialized - cross-pyramid-level link arrays (0.4 multiscale links). - ``0`` = none, ``N`` = up to ``±N`` per pair (or ``+N`` only - when ``cross_level_storage='implicit'``), ``-1`` = walk all + cross-pyramid-level link arrays. ``0`` = none, ``N`` = up + to ``±N`` per pair (or ``+N`` only when + ``cross_level_storage='implicit'``), ``-1`` = walk all available level pairs. Default ``1``. cross_level_storage: ``"none"`` / ``"implicit"`` / ``"explicit"``. ``"explicit"`` materializes both ``+N`` (at the finer level) @@ -898,185 +850,29 @@ def build_pyramid( f"cross_level_depth must be ≥ -1 (got {cross_level_depth})" ) - # Factor-based interface (preferred). Each entry produces one - # coarser level; routed through coarsen_level which knows both - # methods. Returns early; the legacy paths below remain available - # for callers that pass ``level_configs`` or ``sparsity_weight``. - if factors is not None: - summaries: list[dict[str, Any]] = [] - for i, fac in enumerate(factors): - if isinstance(fac, (tuple, list)) and len(fac) == 2: - cf, sf = float(fac[0]), float(fac[1]) - else: - raise ValueError( - f"factors[{i}] must be a (coarsen_factor, sparsity_factor) " - f"tuple; got {fac!r}" - ) - summaries.append(coarsen_level( - store_path, - source_level=i, - target_level=i + 1, - coarsen_factor=cf, - sparsity_factor=sf, - method=method, - sparsity_strategy=sparsity_strategy, - sparsity_seed=sparsity_seed, - agg_mode=agg_mode, - )) - # Persist + emit cross-level edges (writes use the on-disk - # vertex chunk assignments — no re-derivation needed here). - _finalize_cross_level_for_store( - store_path, - cross_level_depth=cross_level_depth, - cross_level_storage=cross_level_storage, - ) - return { - "levels_created": len(summaries), - "level_specs": summaries, - "method": method, - "cross_level_depth": cross_level_depth, - "cross_level_storage": cross_level_storage, - } - - root = open_store(str(store_path), mode="r+") - meta = read_root_metadata(root) - ndim = meta.sid_ndim - chunk_shape = meta.chunk_shape - base_bin = meta.effective_bin_shape - - # Read all level-0 vertices - level0 = get_resolution_level(root, 0) - positions = _read_all_vertices(level0, ndim) - - if len(positions) == 0: - return {"levels_created": 0, "level_specs": []} - - n_full = len(positions) - - # Count objects at level 0 (approximate: try reading object_index) - try: - manifests = read_all_object_manifests(level0) - n_objects = len(manifests) - except Exception: - n_objects = 0 - - # Plan levels - if level_configs is not None: - # Explicit configs → use plan_pyramid_with_sparsity - specs = plan_pyramid_with_sparsity( - n_full, max(n_objects, 1), base_bin, chunk_shape, - level_configs=level_configs, - ) - elif sparsity_weight > 0.0: - # Auto with sparsity - specs = plan_pyramid_with_sparsity( - n_full, max(n_objects, 1), base_bin, chunk_shape, - target_volume_reduction=target_volume_reduction, - sparsity_weight=sparsity_weight, - max_levels=max_levels, - min_vertices=min_vertices, - ) - else: - # Legacy auto mode (backward compatible) - specs = _legacy_plan( - n_full, ndim, base_bin, chunk_shape, - reduction_factor, max_levels, min_vertices, - ) - - if not specs: - return {"levels_created": 0, "level_specs": []} - - # Build each level - current_positions = positions - levels_created = 0 - - for spec in specs: - if isinstance(spec, LevelReductionSpec): - bin_ratio = spec.bin_ratio - bin_shape = spec.bin_shape or compute_bin_shape(base_bin, bin_ratio) - object_sparsity = spec.object_sparsity + summaries: list[dict[str, Any]] = [] + for i, fac in enumerate(factors): + if isinstance(fac, (tuple, list)) and len(fac) == 2: + cf, sf = float(fac[0]), float(fac[1]) else: - # Legacy LevelSpec - bin_shape = tuple(spec.bin_size for _ in range(ndim)) - bin_ratio = None - object_sparsity = 1.0 - - # Generate metanodes - result = generate_metanodes( - current_positions, bin_shape, agg_mode=agg_mode, - ) - meta_positions = result["metanode_positions"] - children = result["children"] - n_metanodes = len(meta_positions) - - if n_metanodes == 0: - break - - # Check reduction (skip if too small, except on first level) - actual_ratio = len(current_positions) / max(n_metanodes, 1) - if actual_ratio < 2 and levels_created > 0: - continue - - # Apply object sparsity - if object_sparsity < 1.0 and n_metanodes > 1: - kept = apply_sparsity( - n_metanodes, object_sparsity, sparsity_strategy, - seed=sparsity_seed, - representative_points=meta_positions, - bin_shape=bin_shape, - ) - meta_positions = meta_positions[kept] - children = [children[i] for i in kept] - n_metanodes = len(meta_positions) - - if n_metanodes == 0: - break - - # Create level - actual_level = levels_created + 1 - level_meta = LevelMetadata( - level=actual_level, - vertex_count=n_metanodes, - arrays_present=[VERTICES], - bin_shape=bin_shape, - bin_ratio=bin_ratio, - object_sparsity=object_sparsity, - coarsening_method="grid_metanode", - parent_level=actual_level - 1, - ) - level_group = create_resolution_level(root, actual_level, level_meta) - create_vertices_array(level_group, dtype="float32") - - # Write - chunk_assignments = assign_chunks(meta_positions, chunk_shape) - for chunk_coords, global_indices in sorted(chunk_assignments.items()): - write_chunk_vertices( - level_group, chunk_coords, - [meta_positions[global_indices]], - dtype=np.float32, + raise ValueError( + f"factors[{i}] must be a (coarsen_factor, sparsity_factor) " + f"tuple; got {fac!r}" ) + summaries.append(coarsen_level( + store_path, + source_level=i, + target_level=i + 1, + coarsen_factor=cf, + sparsity_factor=sf, + sparsity_strategy=sparsity_strategy, + sparsity_seed=sparsity_seed, + cross_level_storage=cross_level_storage, + )) - levels_created += 1 - current_positions = meta_positions - - spec_summaries = [] - for i, spec in enumerate(specs[:levels_created]): - if isinstance(spec, LevelReductionSpec): - spec_summaries.append({ - "level": i + 1, - "bin_ratio": list(spec.bin_ratio), - "object_sparsity": spec.object_sparsity, - "expected_volume_reduction": spec.expected_volume_reduction, - }) - else: - spec_summaries.append({ - "level": i + 1, - "bin_size": spec.bin_size, - "expected_vertices": spec.expected_vertex_count, - }) - - # Persist root cross-level metadata and emit cross-level link - # arrays for every adjacent pair we just built. + # Compose deeper-delta cross-level links from the inline-emitted +1 + # arrays. Also stamps root cross-level metadata + the multiscale + # links capability. _finalize_cross_level_for_store( store_path, cross_level_depth=cross_level_depth, @@ -1084,50 +880,9 @@ def build_pyramid( ) return { - "levels_created": levels_created, - "level_specs": spec_summaries, + "levels_created": len(summaries), + "level_specs": summaries, + "method": COARSEN_PER_OBJECT, "cross_level_depth": cross_level_depth, "cross_level_storage": cross_level_storage, } - - -# =================================================================== -# Helpers -# =================================================================== - -def _read_all_vertices( - level_group: Any, ndim: int, -) -> npt.NDArray[np.float32]: - """Read all vertices from a level, concatenated.""" - chunk_keys = list_chunk_keys(level_group) - parts: list[npt.NDArray] = [] - for ck in chunk_keys: - try: - groups = read_chunk_vertices(level_group, ck, dtype=np.float32, ndim=ndim) - for vg in groups: - if len(vg) > 0: - parts.append(vg) - except Exception: - continue - if not parts: - return np.zeros((0, ndim), dtype=np.float32) - return np.concatenate(parts, axis=0) - - -def _legacy_plan( - n_full: int, - ndim: int, - base_bin: tuple[float, ...], - chunk_shape: tuple[float, ...], - reduction_factor: int, - max_levels: int, - min_vertices: int, -) -> list: - """Plan using the old LevelSpec-based approach (backward compat).""" - base_bin_scalar = min(base_bin) - return compute_level_specs( - n_full, base_bin_scalar, - reduction_factor=reduction_factor, - max_levels=max_levels, - min_vertices=min_vertices, - ) diff --git a/zarr_vectors/multiresolution/layers.py b/zarr_vectors/multiresolution/layers.py index 04d98d1..a3bd497 100644 --- a/zarr_vectors/multiresolution/layers.py +++ b/zarr_vectors/multiresolution/layers.py @@ -1,117 +1,16 @@ -"""Adaptive resolution level selection. +"""Bin-ratio helpers for multi-resolution pyramids. -Determines which coarsening levels to emit based on the adaptive -threshold rule: bin size doubles each candidate level, but a new -resolution level is only stored when total vertex count drops below -``N_previous / reduction_factor``. - -Also provides helpers for computing bin ratios from target reductions. +The level-emission logic lives in +:mod:`zarr_vectors.multiresolution.coarsen` (factor-based interface); +this module is purely a small helpers shelf for computing bin ratios +from target volume reductions and for describing one resolution level +in the sparsity-aware data class form. """ from __future__ import annotations import math from dataclasses import dataclass -from typing import Any - - -@dataclass -class LevelSpec: - """Specification for one resolution level in the pyramid.""" - - level_index: int - bin_size: float - expected_vertex_count: int - reduction_ratio: float - - -def compute_level_specs( - full_resolution_count: int, - base_bin_size: float, - *, - reduction_factor: int = 8, - max_levels: int = 20, - min_vertices: int = 8, -) -> list[LevelSpec]: - """Determine which resolution levels to create. - - Starting from the full resolution (level 0), candidate levels use - bin sizes that double at each step: ``base * 2^k``. A level is - only emitted if its estimated vertex count is at most - ``previous_count / reduction_factor``. - - Args: - full_resolution_count: Number of vertices at level 0. - base_bin_size: The chunk_shape component (or minimum chunk size) - used as the starting bin size for coarsening. - reduction_factor: Minimum fold-reduction required to emit a level - (default 8). - max_levels: Hard cap on number of resolution levels. - min_vertices: Stop generating levels below this count. - - Returns: - List of LevelSpec (excluding level 0 which is always present). - Empty if no coarsening is warranted. - """ - specs: list[LevelSpec] = [] - prev_count = full_resolution_count - level_idx = 1 - - for k in range(1, max_levels + 10): - bin_size = base_bin_size * (2 ** k) - - est_count = max(1, int(full_resolution_count / (2 ** k) ** 3)) - - if est_count >= prev_count: - continue - - ratio = prev_count / max(est_count, 1) - if ratio < reduction_factor: - continue - - if est_count < min_vertices: - break - - specs.append(LevelSpec( - level_index=level_idx, - bin_size=bin_size, - expected_vertex_count=est_count, - reduction_ratio=ratio, - )) - - prev_count = est_count - level_idx += 1 - - if level_idx > max_levels: - break - - return specs - - -def select_bin_sizes( - chunk_shape: tuple[float, ...], - full_vertex_count: int, - *, - reduction_factor: int = 8, -) -> list[float]: - """Convenience: return just the bin sizes for each pyramid level. - - Uses the minimum chunk dimension as the base bin size. - - Args: - chunk_shape: Spatial chunk dimensions. - full_vertex_count: Vertex count at level 0. - reduction_factor: Fold-reduction threshold. - - Returns: - List of bin sizes for levels 1, 2, ... (level 0 is full res). - """ - base = min(chunk_shape) - specs = compute_level_specs( - full_vertex_count, base, - reduction_factor=reduction_factor, - ) - return [s.bin_size for s in specs] # =================================================================== @@ -167,7 +66,6 @@ def select_bin_ratio_for_reduction( current_vol = r_base ** ndim if current_vol < target_reduction: - # Need to increase some dimensions for d in range(ndim): if current_vol >= target_reduction: break @@ -176,7 +74,6 @@ def select_bin_ratio_for_reduction( for r in ratios: current_vol *= r elif current_vol > target_reduction: - # Need to decrease some dimensions (but not below 1) for d in range(ndim - 1, -1, -1): if current_vol <= target_reduction: break @@ -186,7 +83,6 @@ def select_bin_ratio_for_reduction( for r in ratios: current_vol *= r - # Sort descending for convention ratios.sort(reverse=True) return tuple(ratios) @@ -226,7 +122,6 @@ def compute_level_ratios( for r in target_reductions ] - # Auto-generate: each level doubles the ratio ratios: list[tuple[int, ...]] = [] cumulative = 1 for _ in range(max_levels): @@ -241,7 +136,7 @@ def compute_level_ratios( # =================================================================== -# Sparsity-aware level planning +# Level specification dataclass # =================================================================== @dataclass @@ -294,188 +189,7 @@ def auto_plan_sparsity( vertex_reduction *= r if vertex_reduction >= target_volume_reduction: - return 1.0 # binning alone is enough + return 1.0 sparsity = vertex_reduction / target_volume_reduction - return max(sparsity, 1e-6) # never exactly zero - - -def plan_pyramid_with_sparsity( - base_vertex_count: int, - base_object_count: int, - base_bin_shape: tuple[float, ...], - chunk_shape: tuple[float, ...], - *, - level_configs: list[dict] | None = None, - target_volume_reduction: float = 8.0, - sparsity_weight: float = 0.0, - max_levels: int = 10, - min_vertices: int = 8, -) -> list[LevelReductionSpec]: - """Plan a multi-resolution pyramid with object sparsity. - - Two modes: - - 1. **Explicit configs**: ``level_configs`` is a list of dicts, each - with ``bin_ratio`` and optionally ``object_sparsity``. - 2. **Auto-plan**: generates levels using ``target_volume_reduction`` - per level, splitting the reduction between binning and sparsity - according to ``sparsity_weight``. - - Args: - base_vertex_count: Vertices at level 0. - base_object_count: Objects at level 0. - base_bin_shape: Supervoxel edge lengths at level 0. - chunk_shape: Chunk dimensions (constant across levels). - level_configs: Explicit per-level configs. Each dict has: - - ``"bin_ratio"``: tuple of ints (required) - - ``"object_sparsity"``: float in (0,1] (default 1.0) - target_volume_reduction: Per-level target when auto-planning. - sparsity_weight: 0.0 = all binning, 1.0 = all sparsity, - 0.5 = balanced split. Only used in auto mode. - max_levels: Maximum levels to generate. - min_vertices: Stop below this count. - - Returns: - List of LevelReductionSpec, one per coarsened level. - """ - ndim = len(base_bin_shape) - - if level_configs is not None: - return _plan_from_configs( - level_configs, base_bin_shape, chunk_shape, ndim, - ) - - return _auto_plan( - base_vertex_count, base_object_count, - base_bin_shape, chunk_shape, ndim, - target_volume_reduction, sparsity_weight, - max_levels, min_vertices, - ) - - -def _plan_from_configs( - configs: list[dict], - base_bin_shape: tuple[float, ...], - chunk_shape: tuple[float, ...], - ndim: int, -) -> list[LevelReductionSpec]: - """Build specs from explicit level configs.""" - from zarr_vectors.core.metadata import compute_bin_shape - - specs: list[LevelReductionSpec] = [] - for i, cfg in enumerate(configs): - bin_ratio = tuple(cfg["bin_ratio"]) - sparsity = cfg.get("object_sparsity", 1.0) - bin_shape = compute_bin_shape(base_bin_shape, bin_ratio) - - vertex_red = 1.0 - for r in bin_ratio: - vertex_red *= r - object_red = 1.0 / max(sparsity, 1e-9) - volume_red = vertex_red * object_red - - specs.append(LevelReductionSpec( - level_index=i + 1, - bin_ratio=bin_ratio, - bin_shape=bin_shape, - object_sparsity=sparsity, - expected_vertex_reduction=vertex_red, - expected_object_reduction=object_red, - expected_volume_reduction=volume_red, - )) - - return specs - - -def _auto_plan( - base_vertex_count: int, - base_object_count: int, - base_bin_shape: tuple[float, ...], - chunk_shape: tuple[float, ...], - ndim: int, - target_volume_reduction: float, - sparsity_weight: float, - max_levels: int, - min_vertices: int, -) -> list[LevelReductionSpec]: - """Auto-plan levels using target reduction and sparsity weight.""" - from zarr_vectors.core.metadata import ( - compute_bin_shape, - validate_bin_shape_divides_chunk, - ) - - specs: list[LevelReductionSpec] = [] - cumulative_reduction = 1.0 - current_verts = base_vertex_count - current_objs = base_object_count - - for level_idx in range(1, max_levels + 1): - cumulative_reduction *= target_volume_reduction - - # Split reduction between binning and sparsity - # sparsity_weight=0: all binning. sparsity_weight=1: all sparsity. - # The binning portion of the per-level reduction - per_level = target_volume_reduction - binning_portion = per_level ** (1.0 - sparsity_weight) - sparsity_portion = per_level ** sparsity_weight - - # Find bin ratio for the binning portion - bin_ratio = select_bin_ratio_for_reduction( - binning_portion ** level_idx, ndim, - ) - bin_shape = compute_bin_shape(base_bin_shape, bin_ratio) - - # Check chunk divisibility — skip if invalid - try: - validate_bin_shape_divides_chunk(chunk_shape, bin_shape) - except Exception: - # Try next power-of-2 ratio that divides - found = False - for k in range(1, 10): - candidate = tuple(2 ** k for _ in range(ndim)) - candidate_bs = compute_bin_shape(base_bin_shape, candidate) - try: - validate_bin_shape_divides_chunk(chunk_shape, candidate_bs) - bin_ratio = candidate - bin_shape = candidate_bs - found = True - break - except Exception: - continue - if not found: - break - - vertex_red = 1.0 - for r in bin_ratio: - vertex_red *= r - - # Compute sparsity to hit the cumulative target - if vertex_red >= cumulative_reduction: - sparsity = 1.0 - else: - sparsity = vertex_red / cumulative_reduction - sparsity = max(min(sparsity, 1.0), 1e-6) - - est_verts = max(1, int(base_vertex_count / vertex_red)) - est_objs = max(1, int(base_object_count * sparsity)) - - if est_verts < min_vertices: - break - - object_red = 1.0 / max(sparsity, 1e-9) - - specs.append(LevelReductionSpec( - level_index=level_idx, - bin_ratio=bin_ratio, - bin_shape=bin_shape, - object_sparsity=sparsity, - expected_vertex_reduction=vertex_red, - expected_object_reduction=object_red, - expected_volume_reduction=vertex_red * object_red, - )) - - current_verts = est_verts - current_objs = est_objs - - return specs + return max(sparsity, 1e-6) From 119096327e846e682c0c08f0cb9d42cd8b274278 Mon Sep 17 00:00:00 2001 From: Andrew-Keenlyside Date: Thu, 14 May 2026 15:22:10 -0700 Subject: [PATCH 3/4] refactor signatures to match ondisk array names, add async read --- tests/conftest.py | 4 +- tests/test_attr_chunking.py | 28 +- tests/test_batched_reads.py | 161 ++++++++ tests/test_batched_writes.py | 8 +- tests/test_graphs.py | 8 +- tests/test_lazy_writer.py | 6 +- tests/test_lines_parametric.py | 2 +- tests/test_per_object_pyramid.py | 4 +- tests/test_points.py | 4 +- tests/test_scaffolding.py | 2 +- zarr_vectors/composite.py | 21 +- zarr_vectors/constants.py | 16 +- zarr_vectors/core/_batch_reader.py | 143 +++++++ zarr_vectors/core/arrays.py | 44 +-- zarr_vectors/core/group.py | 63 ++++ zarr_vectors/core/multiscale.py | 90 +++++ zarr_vectors/core/store.py | 88 ++++- zarr_vectors/encoding/compression.py | 14 +- zarr_vectors/lazy/arrays.py | 4 +- zarr_vectors/lazy/store.py | 10 - zarr_vectors/lazy/writer.py | 6 +- zarr_vectors/multiresolution/layers.py | 195 ---------- .../multiresolution/strategies/points.py | 2 +- zarr_vectors/sharding/layout.py | 232 +----------- zarr_vectors/types/graphs.py | 356 +++++++++++------- zarr_vectors/types/lines.py | 93 +++-- zarr_vectors/types/meshes.py | 184 +++++---- zarr_vectors/types/parametric.py | 12 +- zarr_vectors/types/points.py | 267 +++++++------ zarr_vectors/types/polylines.py | 169 +++++---- zarr_vectors/validate/structure.py | 4 +- 31 files changed, 1289 insertions(+), 951 deletions(-) create mode 100644 tests/test_batched_reads.py create mode 100644 zarr_vectors/core/_batch_reader.py delete mode 100644 zarr_vectors/multiresolution/layers.py diff --git a/tests/conftest.py b/tests/conftest.py index 71bd0f7..0be9dc1 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -48,14 +48,14 @@ def rng() -> np.random.Generator: def simple_points_3d(rng: np.random.Generator) -> dict: """100 random XYZ points in [0, 1000)³ with an intensity attribute. - Returns dict with keys: positions, attributes, chunk_shape. + Returns dict with keys: positions, vertex_attributes, chunk_shape. With chunk_shape=(500, 500, 500), points span ~8 chunks. """ positions = rng.uniform(0, 1000, size=(100, 3)).astype(np.float32) intensity = rng.uniform(0, 1, size=(100,)).astype(np.float32) return { "positions": positions, - "attributes": {"intensity": intensity}, + "vertex_attributes": {"intensity": intensity}, "chunk_shape": (500.0, 500.0, 500.0), } diff --git a/tests/test_attr_chunking.py b/tests/test_attr_chunking.py index 87a4d7c..b07c0a1 100644 --- a/tests/test_attr_chunking.py +++ b/tests/test_attr_chunking.py @@ -74,7 +74,7 @@ def test_points_attr_chunking_round_trip(tmp_path): write_points( str(store), pos, chunk_shape=(50.0, 50.0, 50.0), - attributes={"gene": gene}, + vertex_attributes={"gene": gene}, chunk_by_attribute="gene", ) @@ -101,7 +101,7 @@ def test_points_attr_chunking_full_read_returns_all(tmp_path): write_points( str(store), pos, chunk_shape=(50.0, 50.0, 50.0), - attributes={"gene": gene}, + vertex_attributes={"gene": gene}, chunk_by_attribute="gene", ) out = read_points(str(store)) @@ -115,7 +115,7 @@ def test_points_attr_filter_selectivity(tmp_path): write_points( str(store), pos, chunk_shape=(50.0, 50.0, 50.0), - attributes={"gene": gene}, + vertex_attributes={"gene": gene}, chunk_by_attribute="gene", ) @@ -133,7 +133,7 @@ def test_points_attr_filter_unknown_value_returns_empty(tmp_path): write_points( str(store), pos, chunk_shape=(50.0, 50.0, 50.0), - attributes={"gene": gene}, + vertex_attributes={"gene": gene}, chunk_by_attribute="gene", ) out = read_points(str(store), attribute_filter={"gene": "ZZZ"}) @@ -156,7 +156,7 @@ def test_points_chunk_by_attribute_rejects_float(tmp_path): write_points( str(tmp_path / "x.zvr"), pos, chunk_shape=(10.0, 10.0, 10.0), - attributes={"score": np.random.default_rng(0).uniform(0, 1, 50)}, + vertex_attributes={"score": np.random.default_rng(0).uniform(0, 1, 50)}, chunk_by_attribute="score", ) @@ -176,7 +176,7 @@ def test_points_attribute_filter_mismatched_name_raises(tmp_path): write_points( str(store), pos, chunk_shape=(50.0, 50.0, 50.0), - attributes={"gene": gene}, + vertex_attributes={"gene": gene}, chunk_by_attribute="gene", ) with pytest.raises(ArrayError, match="does not match"): @@ -265,7 +265,7 @@ def test_zvr_level_attribute_values(tmp_path): write_points( str(store), pos, chunk_shape=(50.0, 50.0, 50.0), - attributes={"gene": gene}, + vertex_attributes={"gene": gene}, chunk_by_attribute="gene", ) zvr = open_zvr(str(store)) @@ -282,7 +282,7 @@ def test_zvr_level_read_attribute_chunk(tmp_path): write_points( str(store), pos, chunk_shape=(50.0, 50.0, 50.0), - attributes={"gene": gene}, + vertex_attributes={"gene": gene}, chunk_by_attribute="gene", ) zvr = open_zvr(str(store)) @@ -297,7 +297,7 @@ def test_zvr_level_read_attribute_chunk_unknown_value(tmp_path): write_points( str(store), pos, chunk_shape=(50.0, 50.0, 50.0), - attributes={"gene": gene}, + vertex_attributes={"gene": gene}, chunk_by_attribute="gene", ) zvr = open_zvr(str(store)) @@ -327,7 +327,7 @@ def test_lines_attr_chunking_line_attribute_round_trip(tmp_path): str(store), eps, chunk_shape=(50.0, 50.0, 50.0), bin_shape=(50.0, 50.0, 50.0), - line_attributes={"cat": cat}, + object_attributes={"cat": cat}, chunk_by_attribute="cat", ) lm = read_level_metadata(open_store(str(store)), 0) @@ -351,7 +351,7 @@ def test_lines_per_endpoint_attribute_must_match_per_line(tmp_path): write_lines( str(tmp_path / "x.zvr"), eps, chunk_shape=(50.0, 50.0, 50.0), - attributes={"cat": bad}, + vertex_attributes={"cat": bad}, chunk_by_attribute="cat", ) @@ -392,7 +392,7 @@ def test_graph_attr_chunking_round_trip(tmp_path): str(store), pos, edges, chunk_shape=(50.0, 50.0, 50.0), object_ids=obj_ids, - node_attributes={"cell_type": cell_type}, + vertex_attributes={"cell_type": cell_type}, chunk_by_attribute="cell_type", ) lm = read_level_metadata(open_store(str(store)), 0) @@ -417,7 +417,7 @@ def test_graph_attr_chunking_rejects_mixed_object(tmp_path): str(tmp_path / "x.zvr"), pos, edges, chunk_shape=(50.0, 50.0, 50.0), object_ids=obj_ids, - node_attributes={"cell_type": cell_type}, + vertex_attributes={"cell_type": cell_type}, chunk_by_attribute="cell_type", ) @@ -434,7 +434,7 @@ def test_graph_attr_chunking_default_object_per_node(tmp_path): write_graph( str(store), pos, edges, chunk_shape=(50.0, 50.0, 50.0), - node_attributes={"cell_type": cell_type}, + vertex_attributes={"cell_type": cell_type}, chunk_by_attribute="cell_type", ) assert read_graph(str(store), attribute_filter={"cell_type": "A"})["node_count"] == 4 diff --git a/tests/test_batched_reads.py b/tests/test_batched_reads.py new file mode 100644 index 0000000..7dfc8fb --- /dev/null +++ b/tests/test_batched_reads.py @@ -0,0 +1,161 @@ +"""Tests for ``Group.batched_reads`` and the ``_batch_reader`` helper. + +Covers: + +* Empty plan (no-op). +* Round-trip: bytes written via the normal path read back identically + through a ``batched_reads`` block. +* End-to-end: ``read_points`` against a store written by ``write_points`` + (the read path internally wraps the chunk loop in ``batched_reads``). +* Cache-miss safety: a read for a key NOT in the plan falls through to + the sync ``read_bytes`` path. +* Nesting is rejected with ``StoreError``. +* Icechunk-style fallback (monkeypatched detector) round-trips correctly + via the serial sync path. +""" + +from __future__ import annotations + +import numpy as np +import pytest +from zarr.storage import MemoryStore + +from zarr_vectors import open_store +from zarr_vectors.core.store import create_store +from zarr_vectors.exceptions import StoreError +from zarr_vectors.types.points import read_points, write_points + + +def test_batched_reads_empty_plan_is_noop(tmp_store_path): + root = create_store(str(tmp_store_path)) + with root.batched_reads([]): + pass + + +def test_batched_reads_round_trip(tmp_store_path): + """Bytes written via the sync path read back identically through a + batched_reads block (cache hit).""" + root = create_store(str(tmp_store_path)) + payloads = { + "0.0.0": b"first chunk bytes", + "1.0.0": b"\x00" * 32, + "2.3.4": np.arange(100, dtype=np.uint8).tobytes(), + "empty": b"", + } + for k, v in payloads.items(): + root.write_bytes("read_test", k, v) + + plan = [("read_test", list(payloads.keys()))] + with root.batched_reads(plan): + for k, v in payloads.items(): + assert root.read_bytes("read_test", k) == v + + +def test_batched_reads_cache_miss_falls_through(tmp_store_path): + """A read for an (array, key) not in the plan still returns correct + data — the cache miss drops through to the sync path.""" + root = create_store(str(tmp_store_path)) + root.write_bytes("planned_arr", "0.0.0", b"planned-data") + root.write_bytes("unplanned_arr", "0.0.0", b"unplanned-data") + + plan = [("planned_arr", ["0.0.0"])] + with root.batched_reads(plan): + # Hits the cache. + assert root.read_bytes("planned_arr", "0.0.0") == b"planned-data" + # Cache miss: falls back to sync read. + assert root.read_bytes("unplanned_arr", "0.0.0") == b"unplanned-data" + + +def test_batched_reads_nesting_rejected(tmp_store_path): + root = create_store(str(tmp_store_path)) + with root.batched_reads([]): + with pytest.raises(StoreError, match="does not support nesting"): + with root.batched_reads([]): + pass + + +def test_batched_reads_via_read_points_memory_store(): + """End-to-end: read_points against a MemoryStore exercises the + batched_reads path internally (read_points wraps its chunk loop) + and must produce identical results to the unbatched sync path.""" + mem = MemoryStore() + rng = np.random.default_rng(0) + positions = rng.uniform(0, 100, (300, 3)).astype(np.float32) + intensity = rng.uniform(0, 1, 300).astype(np.float32) + write_points(mem, positions, vertex_attributes={"intensity": intensity}) + + result = read_points(mem, attribute_names=["intensity"]) + assert result["vertex_count"] == 300 + assert result["positions"].shape == (300, 3) + assert result["vertex_attributes"]["intensity"].shape == (300,) + + +def test_batched_reads_via_read_points_localstore(tmp_path): + """Same end-to-end against a LocalStore — covers the path that the + benchmark notebook exercises.""" + url = str(tmp_path / "batch_read_points.zarr") + rng = np.random.default_rng(7) + positions = rng.uniform(0, 1000, (1000, 3)).astype(np.float32) + score = positions[:, 0].astype(np.float32).copy() + write_points( + url, positions, + chunk_shape=(100., 100., 100.), + vertex_attributes={"score": score}, + ) + out = read_points(url, attribute_names=["score"]) + assert out["vertex_count"] == 1000 + assert out["positions"].shape == (1000, 3) + assert out["vertex_attributes"]["score"].shape == (1000,) + + +def test_batched_reads_falls_back_to_sync_for_icechunk_like_store( + tmp_store_path, monkeypatch, +): + """Stores that look like icechunk take the sync fallback inside + ``flush_prefetch``. Force the detector to return True and verify + the round-trip still works.""" + from zarr_vectors.core import _batch_reader + + monkeypatch.setattr(_batch_reader, "_is_icechunk_store", lambda _store: True) + + root = create_store(str(tmp_store_path)) + payloads = { + "0.0.0": b"icechunk-fallback-data", + "1.0.0": np.arange(32, dtype=np.uint8).tobytes(), + "empty": b"", + } + for k, v in payloads.items(): + root.write_bytes("fallback_arr", k, v) + + plan = [("fallback_arr", list(payloads.keys()))] + with root.batched_reads(plan): + for k, v in payloads.items(): + assert root.read_bytes("fallback_arr", k) == v + + +def test_batched_reads_clears_cache_on_exception(tmp_store_path): + """If the block raises, the cache is dropped so subsequent reads + don't accidentally serve stale data.""" + root = create_store(str(tmp_store_path)) + root.write_bytes("err_arr", "0.0.0", b"data") + plan = [("err_arr", ["0.0.0"])] + with pytest.raises(RuntimeError, match="boom"): + with root.batched_reads(plan): + raise RuntimeError("boom") + # Cache cleared, sync path still works. + assert root._prefetch_cache is None + assert root.read_bytes("err_arr", "0.0.0") == b"data" + + +def test_batched_reads_missing_chunk_omitted_from_cache(tmp_store_path): + """A plan entry pointing at a non-existent chunk is silently skipped + in the cache — and read_bytes raises StoreError, same as in the + unbatched path.""" + root = create_store(str(tmp_store_path)) + root.write_bytes("partial_arr", "0.0.0", b"present") + + plan = [("partial_arr", ["0.0.0", "missing.key"])] + with root.batched_reads(plan): + assert root.read_bytes("partial_arr", "0.0.0") == b"present" + with pytest.raises(StoreError, match="not found"): + root.read_bytes("partial_arr", "missing.key") diff --git a/tests/test_batched_writes.py b/tests/test_batched_writes.py index 14b1059..e5c784d 100644 --- a/tests/test_batched_writes.py +++ b/tests/test_batched_writes.py @@ -65,7 +65,7 @@ def test_batched_writes_via_write_points_memory_store(): mem = MemoryStore() positions = np.random.default_rng(0).uniform(0, 100, (200, 3)).astype(np.float32) intensity = np.random.default_rng(1).uniform(0, 1, 200).astype(np.float32) - write_points(mem, positions, attributes={"intensity": intensity}) + write_points(mem, positions, vertex_attributes={"intensity": intensity}) result = read_points(mem) assert result["vertex_count"] == 200 @@ -76,12 +76,12 @@ def test_batched_writes_via_write_points_localstore(tmp_path): """Same end-to-end on a local file store, then re-open via URL.""" url = str(tmp_path / "batch_points.zarr") positions = np.random.default_rng(0).uniform(0, 100, (500, 3)).astype(np.float32) - write_points(url, positions, attributes={"score": positions[:, 0].copy()}) + write_points(url, positions, vertex_attributes={"score": positions[:, 0].copy()}) # read_points reads attributes only when names are requested explicitly. result = read_points(url, attribute_names=["score"]) assert result["vertex_count"] == 500 - assert "score" in result["attributes"] - assert result["attributes"]["score"].shape == (500,) + assert "score" in result["vertex_attributes"] + assert result["vertex_attributes"]["score"].shape == (500,) def test_batched_writes_defers_write_array_meta(tmp_store_path): diff --git a/tests/test_graphs.py b/tests/test_graphs.py index 2061b07..1bf3ea3 100644 --- a/tests/test_graphs.py +++ b/tests/test_graphs.py @@ -48,7 +48,7 @@ def test_7node_tree(self, tmp_path: Path) -> None: pos = np.array([[50,50,50],[40,40,40],[60,60,60],[30,30,30], [45,35,35],[65,65,65],[25,25,25]], dtype=np.float32) edges = np.array([[1,0],[2,0],[3,1],[4,1],[5,2],[6,3]], dtype=np.int64) - s = write_graph(str(tmp_path / "s.zv"), pos, edges, chunk_shape=(200.,200.,200.), is_tree=True) + s = write_graph(str(tmp_path / "s.zv"), pos, edges, chunk_shape=(200.,200.,200.), kind="skeleton") r = read_graph(str(tmp_path / "s.zv")) assert r["node_count"] == 7 and r["edge_count"] == 6 @@ -57,12 +57,12 @@ def test_with_attributes(self, tmp_path: Path) -> None: edges = np.array([[1,0],[2,0]], dtype=np.int64) radius = np.array([5.0,3.0,3.0], dtype=np.float32) write_graph(str(tmp_path / "s.zv"), pos, edges, chunk_shape=(200.,200.,200.), - is_tree=True, node_attributes={"radius": radius}) + kind="skeleton", vertex_attributes={"radius": radius}) def test_cross_chunk(self, tmp_path: Path) -> None: pos = np.array([[10,50,50],[20,50,50],[30,50,50],[110,50,50],[120,50,50]], dtype=np.float32) edges = np.array([[1,0],[2,1],[3,2],[4,3]], dtype=np.int64) - s = write_graph(str(tmp_path / "s.zv"), pos, edges, chunk_shape=(100.,100.,100.), is_tree=True) + s = write_graph(str(tmp_path / "s.zv"), pos, edges, chunk_shape=(100.,100.,100.), kind="skeleton") assert s["cross_edge_count"] >= 1 r = read_graph(str(tmp_path / "s.zv")) assert r["node_count"] == 5 @@ -72,7 +72,7 @@ def test_multiple_objects(self, tmp_path: Path) -> None: edges = np.array([[1,0],[2,1],[4,3]], dtype=np.int64) oids = np.array([0,0,0,1,1], dtype=np.int64) s = write_graph(str(tmp_path / "g.zv"), pos, edges, chunk_shape=(100.,100.,100.), - is_tree=True, object_ids=oids) + kind="skeleton", object_ids=oids) assert s["object_count"] == 2 diff --git a/tests/test_lazy_writer.py b/tests/test_lazy_writer.py index 64156d6..0d81b65 100644 --- a/tests/test_lazy_writer.py +++ b/tests/test_lazy_writer.py @@ -50,9 +50,9 @@ async def go(): _run(go()) out = read_points(str(store), attribute_names=["normal"]) - assert "normal" in out["attributes"] + assert "normal" in out["vertex_attributes"] # Data flattens via read_points's ncols=1 path; total count matches. - assert out["attributes"]["normal"].size == 200 * 3 + assert out["vertex_attributes"]["normal"].size == 200 * 3 def test_add_attribute_sync_mirror(tmp_path): @@ -65,7 +65,7 @@ def test_add_attribute_sync_mirror(tmp_path): w.add_attribute_sync("intensity", intensities) out = read_points(str(store), attribute_names=["intensity"]) - assert out["attributes"]["intensity"].size == 120 + assert out["vertex_attributes"]["intensity"].size == 120 def test_add_attribute_length_mismatch_raises(tmp_path): diff --git a/tests/test_lines_parametric.py b/tests/test_lines_parametric.py index 85cdcba..34e751d 100644 --- a/tests/test_lines_parametric.py +++ b/tests/test_lines_parametric.py @@ -126,7 +126,7 @@ def test_line_attributes(self, tmp_path: Path) -> None: write_lines( store, endpoints, chunk_shape=(100.0, 100.0, 100.0), - line_attributes={"length": lengths}, + object_attributes={"length": lengths}, ) # Verify store was created (attribute reading tested in full integration) diff --git a/tests/test_per_object_pyramid.py b/tests/test_per_object_pyramid.py index 5fd206d..10c2133 100644 --- a/tests/test_per_object_pyramid.py +++ b/tests/test_per_object_pyramid.py @@ -36,7 +36,7 @@ read_level_metadata, read_root_metadata, ) -from zarr_vectors.lazy.store import object_levels, open_zvr +from zarr_vectors.lazy.store import open_zvr from zarr_vectors.multiresolution.coarsen import build_pyramid, coarsen_level from zarr_vectors.spatial.chunking import neighbouring_chunk_keys from zarr_vectors.types.polylines import write_polylines @@ -129,7 +129,7 @@ def test_monotone_oid_drop_across_levels(tmp_path): assert present_sets[2] <= present_sets[1] <= present_sets[0] # Object_levels for any surviving level-2 OID is a contiguous prefix. for oid in present_sets[2]: - visible = object_levels(zvr, oid) + visible = zvr.object_levels(oid) assert visible == list(range(max(visible) + 1)) diff --git a/tests/test_points.py b/tests/test_points.py index 598a398..e28eee0 100644 --- a/tests/test_points.py +++ b/tests/test_points.py @@ -54,7 +54,7 @@ def test_with_attributes(self, tmp_path: Path) -> None: write_points( store, positions, chunk_shape=(50.0, 50.0, 50.0), - attributes={"intensity": intensity}, + vertex_attributes={"intensity": intensity}, ) result = read_points(store, attribute_names=["intensity"]) @@ -260,7 +260,7 @@ def test_multichannel_attribute(self, tmp_path: Path) -> None: write_points( store, positions, chunk_shape=(100.0, 100.0, 100.0), - attributes={"color": color}, + vertex_attributes={"color": color}, ) result = read_points(store) diff --git a/tests/test_scaffolding.py b/tests/test_scaffolding.py index b796ac1..59e1f03 100644 --- a/tests/test_scaffolding.py +++ b/tests/test_scaffolding.py @@ -85,7 +85,7 @@ def test_simple_points_3d(self, simple_points_3d: dict) -> None: assert pos.dtype == np.float32 assert np.all(pos >= 0) and np.all(pos < 1000) - intensity = simple_points_3d["attributes"]["intensity"] + intensity = simple_points_3d["vertex_attributes"]["intensity"] assert intensity.shape == (100,) assert intensity.dtype == np.float32 diff --git a/zarr_vectors/composite.py b/zarr_vectors/composite.py index 16ddbe1..b025a7a 100644 --- a/zarr_vectors/composite.py +++ b/zarr_vectors/composite.py @@ -115,8 +115,10 @@ def add_geometry( edges: npt.NDArray[np.integer] | None = None, faces: npt.NDArray[np.integer] | None = None, polylines: list[npt.NDArray[np.floating]] | None = None, - attributes: dict[str, npt.NDArray] | None = None, + vertex_attributes: dict[str, npt.NDArray] | None = None, level: int = 0, + # Deprecated alias: + attributes: dict[str, npt.NDArray] | None = None, ) -> dict[str, Any]: """Add a geometry type to an existing zarr vectors store. @@ -132,7 +134,8 @@ def add_geometry( edges: ``(E, 2)`` edge array (for graphs/skeletons). faces: ``(F, L)`` face array (for meshes). polylines: List of ``(N_k, D)`` arrays (for streamlines). - attributes: Per-vertex attributes ``{name: array}``. + vertex_attributes: Per-vertex attributes ``{name: array}``. + (Spec name; replaces ``attributes``.) level: Resolution level to add to (default 0). Returns: @@ -141,6 +144,20 @@ def add_geometry( Raises: ValueError: If the geometry type requires data not provided. """ + # Back-compat: accept the legacy `attributes` kwarg. + if attributes is not None: + if vertex_attributes is not None: + raise TypeError( + "got both `attributes` and `vertex_attributes`; " + "pass only `vertex_attributes`." + ) + import warnings + warnings.warn( + "`attributes` is deprecated; use `vertex_attributes`.", + DeprecationWarning, stacklevel=2, + ) + vertex_attributes = attributes + store_path = Path(store_path) sample_points: npt.NDArray | None = None if positions is not None: diff --git a/zarr_vectors/constants.py b/zarr_vectors/constants.py index a33f931..e023b31 100644 --- a/zarr_vectors/constants.py +++ b/zarr_vectors/constants.py @@ -104,11 +104,11 @@ VERTICES: str = "vertices" VERTEX_GROUP_OFFSETS: str = "vertex_group_offsets" LINKS: str = "links" -ATTRIBUTES: str = "attributes" +VERTEX_ATTRIBUTES: str = "vertex_attributes" OBJECT_INDEX: str = "object_index" OBJECT_ATTRIBUTES: str = "object_attributes" -GROUPINGS: str = "groupings" -GROUPINGS_ATTRIBUTES: str = "groupings_attributes" +GROUPS: str = "groups" +GROUP_ATTRIBUTES: str = "group_attributes" CROSS_CHUNK_LINKS: str = "cross_chunk_links" LINK_ATTRIBUTES: str = "link_attributes" CROSS_CHUNK_LINK_ATTRIBUTES: str = "cross_chunk_link_attributes" @@ -116,18 +116,18 @@ # Parametric sub-arrays PARAMETRIC_OBJECTS: str = "objects" PARAMETRIC_OBJECT_ATTRIBUTES: str = "object_attributes" -PARAMETRIC_GROUPINGS: str = "groupings" -PARAMETRIC_GROUPINGS_ATTRIBUTES: str = "groupings_attributes" +PARAMETRIC_GROUPS: str = "groups" +PARAMETRIC_GROUP_ATTRIBUTES: str = "group_attributes" ALL_ARRAY_NAMES: frozenset[str] = frozenset({ VERTICES, VERTEX_GROUP_OFFSETS, LINKS, - ATTRIBUTES, + VERTEX_ATTRIBUTES, OBJECT_INDEX, OBJECT_ATTRIBUTES, - GROUPINGS, - GROUPINGS_ATTRIBUTES, + GROUPS, + GROUP_ATTRIBUTES, CROSS_CHUNK_LINKS, LINK_ATTRIBUTES, CROSS_CHUNK_LINK_ATTRIBUTES, diff --git a/zarr_vectors/core/_batch_reader.py b/zarr_vectors/core/_batch_reader.py new file mode 100644 index 0000000..9293074 --- /dev/null +++ b/zarr_vectors/core/_batch_reader.py @@ -0,0 +1,143 @@ +"""Async-batched chunk reader. + +Symmetric to :mod:`zarr_vectors.core._batch_writer`. Where the writer +turns ``N`` serial ``store.set`` PUTs into one :func:`asyncio.gather`, +this module does the same for ``store.get`` reads — collapsing the +round-trip cost of an ``N``-chunk read-all from ``O(N)`` to ``O(1)`` +against high-latency object stores. + +The single entry point is :func:`flush_prefetch`, driven by the +:meth:`zarr_vectors.core.group.Group.batched_reads` context manager. +The caller supplies a ``plan`` of ``(array_name, [chunk_keys, ...])`` +pairs; this module loads every requested chunk in one async gather and +returns a ``{(array_name, chunk_key): bytes}`` cache that the Group +serves :meth:`read_bytes` calls from while the context is active. + +The reads go through zarr's :class:`AsyncArray.getitem` so any codec +pipeline (BytesCodec only, BytesCodec+Zstd, future additions) is +honoured per-chunk. Icechunk-backed stores fall back to the +synchronous :meth:`read_bytes` path on a per-chunk basis because +icechunk tracks arrays as session-managed entities and the async +gather pattern bypasses that contract. +""" + +from __future__ import annotations + +import asyncio +from typing import Any + +import numpy as np +import zarr +from zarr.core.sync import sync + + +def _is_icechunk_store(store: Any) -> bool: + """Return True when ``store`` is an icechunk-backed Store. + + Detection is by class name to avoid importing icechunk at module + load time. Mirrors the check in + :mod:`zarr_vectors.core._batch_writer`. + """ + cls = type(store) + return cls.__name__ == "IcechunkStore" or cls.__module__.startswith("icechunk") + + +async def _async_get_chunk( + async_group: Any, + path: str, +) -> bytes: + """Resolve ``path`` to an inner array and return its bytes.""" + try: + node = await async_group.getitem(path) + except KeyError: + return None # signals "missing"; caller skips this entry + if not isinstance(node, zarr.AsyncArray): + return None + if node.shape[0] == 0: + return b"" + data = await node.getitem(slice(None)) + return bytes(np.asarray(data).tobytes()) + + +async def _gather_chunks( + async_group: Any, + paths: list[str], +) -> list[bytes | None]: + """``await asyncio.gather`` every ``_async_get_chunk(path)`` in one shot.""" + return await asyncio.gather( + *(_async_get_chunk(async_group, p) for p in paths) + ) + + +def flush_prefetch( + zarr_group: zarr.Group, + plan: list[tuple[str, list[str]]], +) -> dict[tuple[str, str], bytes]: + """Prefetch every chunk in ``plan`` and return a flat cache. + + ``plan`` is a list of ``(array_name, [chunk_keys, ...])`` tuples. + Each ``chunk_key`` is the standard chunk_key string (e.g. + ``"0.1.2"``) used by :meth:`Group.read_bytes`. Returns a dict keyed + by ``(array_name, chunk_key)`` whose values are the decoded chunk + bytes; missing chunks are omitted (the caller falls through to the + sync ``read_bytes`` path on a cache miss). + + For icechunk-backed stores, falls back to serial sync reads via + :func:`_sync_fallback` — the async-gather pattern bypasses + icechunk's session-tracking contract. + """ + if not plan: + return {} + + if _is_icechunk_store(zarr_group.store): + return _sync_fallback(zarr_group, plan) + + # Flatten plan into (array_name, chunk_key, path) triples. Path is + # the AsyncGroup-relative key (e.g. "vertices/0.1.2") that resolves + # to the inner per-chunk array. + flat: list[tuple[str, str, str]] = [] + for array_name, chunk_keys in plan: + for chunk_key in chunk_keys: + flat.append((array_name, chunk_key, f"{array_name}/{chunk_key}")) + + if not flat: + return {} + + paths = [p for _, _, p in flat] + results = sync(_gather_chunks(zarr_group._async_group, paths)) + + cache: dict[tuple[str, str], bytes] = {} + for (array_name, chunk_key, _), data in zip(flat, results): + if data is not None: + cache[(array_name, chunk_key)] = data + return cache + + +def _sync_fallback( + zarr_group: zarr.Group, + plan: list[tuple[str, list[str]]], +) -> dict[tuple[str, str], bytes]: + """Serial-read fallback for icechunk and other stores that don't + play well with the async-gather pattern. + + Walks the plan one entry at a time using sync zarr access, returning + the same dict shape :func:`flush_prefetch` does. + """ + cache: dict[tuple[str, str], bytes] = {} + for array_name, chunk_keys in plan: + try: + arr_group = zarr_group[array_name] + except KeyError: + continue + for chunk_key in chunk_keys: + try: + arr = arr_group[chunk_key] + except KeyError: + continue + if not isinstance(arr, zarr.Array): + continue + if arr.shape[0] == 0: + cache[(array_name, chunk_key)] = b"" + continue + cache[(array_name, chunk_key)] = bytes(np.asarray(arr[:]).tobytes()) + return cache diff --git a/zarr_vectors/core/arrays.py b/zarr_vectors/core/arrays.py index b26c2a3..ffecf28 100644 --- a/zarr_vectors/core/arrays.py +++ b/zarr_vectors/core/arrays.py @@ -18,15 +18,15 @@ import numpy.typing as npt from zarr_vectors.constants import ( - ATTRIBUTES, CROSS_CHUNK_LINK_ATTRIBUTES, CROSS_CHUNK_LINKS, - GROUPINGS, - GROUPINGS_ATTRIBUTES, + GROUP_ATTRIBUTES, + GROUPS, LINK_ATTRIBUTES, LINKS, OBJECT_ATTRIBUTES, OBJECT_INDEX, + VERTEX_ATTRIBUTES, VERTEX_GROUP_OFFSETS, VERTICES, ) @@ -175,7 +175,7 @@ def create_attribute_array( dtype: Numpy dtype string. channel_names: Optional list of channel names. """ - full_name = f"{ATTRIBUTES}/{name}" + full_name = f"{VERTEX_ATTRIBUTES}/{name}" _ensure_array_dir(level_group, full_name) meta: dict[str, Any] = { "zv_array": "attribute", @@ -221,9 +221,9 @@ def create_object_attributes_array( def create_groupings_array(level_group: FsGroup) -> None: """Create the ``groupings/`` array.""" - _ensure_array_dir(level_group, GROUPINGS) - level_group.write_array_meta(GROUPINGS, { - "zv_array": "groupings", + _ensure_array_dir(level_group, GROUPS) + level_group.write_array_meta(GROUPS, { + "zv_array": "groups", }) @@ -234,7 +234,7 @@ def create_groupings_attributes_array( num_channels: int = 1, ) -> None: """Create a groupings attribute array ``groupings_attributes//``.""" - full_name = f"{GROUPINGS_ATTRIBUTES}/{name}" + full_name = f"{GROUP_ATTRIBUTES}/{name}" _ensure_array_dir(level_group, full_name) level_group.write_array_meta(full_name, { "zv_array": "groupings_attribute", @@ -426,7 +426,7 @@ def write_chunk_attributes( """ dtype = np.dtype(dtype) key = _chunk_key(chunk_coords) - full_name = f"{ATTRIBUTES}/{attr_name}" + full_name = f"{VERTEX_ATTRIBUTES}/{attr_name}" raw_bytes, _ = encode_vertex_groups(attr_groups, dtype) level_group.write_bytes(full_name, key, raw_bytes) @@ -585,10 +585,10 @@ def write_groupings( group_list.append(np.array(members, dtype=np.int64)) raw_bytes, offsets = encode_ragged_ints(group_list, dtype=np.dtype(np.int64)) - level_group.write_bytes(GROUPINGS, "data", raw_bytes) - level_group.write_bytes(GROUPINGS, "offsets", offsets.tobytes()) - level_group.write_array_meta(GROUPINGS, { - "zv_array": "groupings", + level_group.write_bytes(GROUPS, "data", raw_bytes) + level_group.write_bytes(GROUPS, "offsets", offsets.tobytes()) + level_group.write_array_meta(GROUPS, { + "zv_array": "groups", "num_groups": max_gid + 1, }) @@ -605,7 +605,7 @@ def write_groupings_attributes( attr_name: Attribute name. data: ``(G,)`` or ``(G, C)`` array. """ - full_name = f"{GROUPINGS_ATTRIBUTES}/{attr_name}" + full_name = f"{GROUP_ATTRIBUTES}/{attr_name}" _ensure_array_dir(level_group, full_name) level_group.write_bytes(full_name, "data", data.tobytes()) level_group.write_array_meta(full_name, { @@ -894,7 +894,7 @@ def read_chunk_attributes( """ key = _chunk_key(chunk_coords) dtype = np.dtype(dtype) - full_name = f"{ATTRIBUTES}/{attr_name}" + full_name = f"{VERTEX_ATTRIBUTES}/{attr_name}" if vert_dtype is None: try: @@ -1099,7 +1099,7 @@ def read_group_object_ids( Returns: List of object ID integers. """ - meta = level_group.read_array_meta(GROUPINGS) + meta = level_group.read_array_meta(GROUPS) num_groups = meta["num_groups"] if group_id < 0 or group_id >= num_groups: @@ -1107,9 +1107,9 @@ def read_group_object_ids( f"Group ID {group_id} out of range [0, {num_groups})" ) - raw = level_group.read_bytes(GROUPINGS, "data") + raw = level_group.read_bytes(GROUPS, "data") offsets = np.frombuffer( - level_group.read_bytes(GROUPINGS, "offsets"), + level_group.read_bytes(GROUPS, "offsets"), dtype=np.int64, ) @@ -1125,11 +1125,11 @@ def read_all_groupings( Returns: List indexed by group_id, each a list of object_id ints. """ - meta = level_group.read_array_meta(GROUPINGS) + meta = level_group.read_array_meta(GROUPS) - raw = level_group.read_bytes(GROUPINGS, "data") + raw = level_group.read_bytes(GROUPS, "data") offsets = np.frombuffer( - level_group.read_bytes(GROUPINGS, "offsets"), + level_group.read_bytes(GROUPS, "offsets"), dtype=np.int64, ) @@ -1143,7 +1143,7 @@ def read_groupings_attributes( dtype: np.dtype | str | None = None, ) -> npt.NDArray: """Read dense G×C groupings attribute data.""" - full_name = f"{GROUPINGS_ATTRIBUTES}/{attr_name}" + full_name = f"{GROUP_ATTRIBUTES}/{attr_name}" meta = level_group.read_array_meta(full_name) if dtype is None: dtype = np.dtype(meta["dtype"]) diff --git a/zarr_vectors/core/group.py b/zarr_vectors/core/group.py index 8662175..ab8d31e 100644 --- a/zarr_vectors/core/group.py +++ b/zarr_vectors/core/group.py @@ -50,6 +50,7 @@ class Group: # without needing to remember to set the attributes. _pending_writes: list[tuple[str, str, bytes]] | None = None _pending_array_metas: dict[str, dict[str, Any]] | None = None + _prefetch_cache: dict[tuple[str, str], bytes] | None = None def __init__(self, zarr_group: zarr.Group) -> None: self._zarr = zarr_group @@ -60,12 +61,17 @@ def __init__(self, zarr_group: zarr.Group) -> None: # against the underlying Store on context exit. self._pending_writes = None self._pending_array_metas = None + # Prefetch cache activated by :meth:`batched_reads`. When set, + # :meth:`read_bytes` looks here first before hitting the store. + self._prefetch_cache = None @classmethod def _from_zarr(cls, zarr_group: zarr.Group) -> Group: instance = cls.__new__(cls) instance._zarr = zarr_group instance._pending_writes = None + instance._pending_array_metas = None + instance._prefetch_cache = None return instance @classmethod @@ -143,6 +149,54 @@ def write_bytes(self, array_name: str, chunk_key: str, data: bytes) -> None: ) a[:] = np.frombuffer(data, dtype="uint8") + @contextmanager + def batched_reads( + self, + plan: list[tuple[str, list[str]]], + ) -> Iterator[None]: + """Prefetch every chunk in ``plan`` via one + :func:`asyncio.gather` and serve subsequent :meth:`read_bytes` + calls from the resulting in-memory cache. + + ``plan`` is a list of ``(array_name, [chunk_keys, ...])`` pairs + — typically ``(VERTICES, list_chunk_keys(group, VERTICES))`` + plus the parallel ``vertex_group_offsets`` and per-attribute + arrays. On entry every (array_name, chunk_key) pair is fetched + in a single async gather; on exit the cache is dropped. + + Reads for a key NOT in the plan fall through to the sync + :meth:`read_bytes` path, so under-specifying the plan + degrades performance gracefully (still correct). + + Use for chunk-heavy read loops against high-latency object + stores (GCS / S3 / Azure). Each per-chunk GET becomes one async + task instead of one serial sync call, so the total wall time + approaches one round-trip rather than ``N`` round-trips. + + Nesting is not supported and raises :class:`StoreError`. + Writes inside the block are unaffected. + + Example:: + + chunk_keys = list_chunk_keys(level_group, VERTICES) + with level_group.batched_reads([ + (VERTICES, chunk_keys), + (VERTEX_GROUP_OFFSETS, chunk_keys), + *((f"{VERTEX_ATTRIBUTES}/{a}", chunk_keys) for a in attrs), + ]): + for cc in chunk_keys: + vgs = read_chunk_vertices(level_group, cc, ...) + """ + if self._prefetch_cache is not None: + raise StoreError("batched_reads() does not support nesting") + from zarr_vectors.core._batch_reader import flush_prefetch + + self._prefetch_cache = flush_prefetch(self._zarr, plan) + try: + yield + finally: + self._prefetch_cache = None + @contextmanager def batched_writes(self) -> Iterator[None]: """Defer every :meth:`write_bytes` and :meth:`write_array_meta` @@ -195,6 +249,15 @@ def batched_writes(self) -> Iterator[None]: self._pending_array_metas = None def read_bytes(self, array_name: str, chunk_key: str) -> bytes: + # Batched-read mode (see :meth:`batched_reads`): serve from the + # prefetch cache when possible. Cache misses fall through to + # the sync path below — useful when a caller under-specifies + # the plan or hits an array the prefetch skipped. + if self._prefetch_cache is not None: + cached = self._prefetch_cache.get((array_name, chunk_key)) + if cached is not None: + return cached + path = f"{array_name}/{chunk_key}" try: arr = self._zarr[path] diff --git a/zarr_vectors/core/multiscale.py b/zarr_vectors/core/multiscale.py index 46d813a..122d924 100644 --- a/zarr_vectors/core/multiscale.py +++ b/zarr_vectors/core/multiscale.py @@ -35,6 +35,96 @@ from zarr_vectors.exceptions import MetadataError +def upsert_level_transform( + root: FsGroup, + level: int, + *, + scale: list[float], + translation: list[float] | None = None, +) -> None: + """Upsert one level's entry in the NGFF ``multiscales[0].datasets`` list. + + This is the **authoritative** writer for per-level spatial transforms + under the 0.5+ format: ``bin_ratio`` lives as the ``scale`` factor + and ``bin_shape / 2`` lives as the ``translation`` offset. Callers + in :mod:`zarr_vectors.core.store` invoke this inside + :func:`create_resolution_level` and :func:`add_resolution_level` + after writing the level's other attrs. + + Args: + root: Root store group. + level: Resolution level index (``0`` for full resolution). + scale: Per-axis scale factor (= ``bin_ratio`` for that level). + translation: Optional per-axis translation offset (= ``bin_shape / 2``). + When all entries are zero, the translation transform is omitted. + """ + attrs = root.attrs.to_dict() + ms = attrs.get("multiscales") or [] + if not ms or not isinstance(ms, list): + # No NGFF block yet — nothing to upsert into. This shouldn't + # happen for stores created by the current ``create_store`` (it + # seeds the block at create time), but handle it gracefully. + return + ms_entry = dict(ms[0]) + datasets = list(ms_entry.get("datasets") or []) + + transforms: list[dict[str, Any]] = [ + {"type": "scale", "scale": [float(s) for s in scale]}, + ] + if translation is not None and any(t != 0 for t in translation): + transforms.append({ + "type": "translation", + "translation": [float(t) for t in translation], + }) + + path = f"{RESOLUTION_PREFIX}{level}" + new_entry = {"path": path, "coordinateTransformations": transforms} + found = False + for i, ds in enumerate(datasets): + if ds.get("path") == path: + datasets[i] = new_entry + found = True + break + if not found: + datasets.append(new_entry) + # Keep datasets sorted by level for deterministic on-disk output. + datasets.sort(key=lambda d: int(d.get("path", "0").lstrip(RESOLUTION_PREFIX) or 0)) + + ms_entry["datasets"] = datasets + attrs["multiscales"] = [ms_entry] + list(ms[1:]) + root.attrs.update(attrs) + + +def read_level_transform( + root: FsGroup, + level: int, +) -> tuple[list[float] | None, list[float] | None]: + """Read ``(scale, translation)`` for a level from the NGFF block. + + Returns ``(None, None)`` when the level has no entry in the NGFF + ``multiscales[0].datasets`` list — callers should fall back to the + legacy ``zarr_vectors_level.bin_ratio`` / ``bin_shape`` fields on + the level's own attrs. + """ + ms = read_multiscale_metadata(root) + if ms is None: + return None, None + path = f"{RESOLUTION_PREFIX}{level}" + for ms_entry in ms: + for ds in ms_entry.get("datasets", []): + if ds.get("path") != path: + continue + scale: list[float] | None = None + translation: list[float] | None = None + for t in ds.get("coordinateTransformations", []): + if t.get("type") == "scale": + scale = [float(s) for s in t.get("scale") or []] + elif t.get("type") == "translation": + translation = [float(s) for s in t.get("translation") or []] + return scale, translation + return None, None + + def write_multiscale_metadata(root: FsGroup) -> dict[str, Any]: """Generate and write OME-Zarr multiscale metadata to root .zattrs. diff --git a/zarr_vectors/core/store.py b/zarr_vectors/core/store.py index e239b09..6edae14 100644 --- a/zarr_vectors/core/store.py +++ b/zarr_vectors/core/store.py @@ -468,13 +468,13 @@ def create_store( ) # 0/ + empty vertices pair — the "warm" payload. - level0 = root.create_group(f"{RESOLUTION_PREFIX}0") - level0.attrs.update( + level0 = create_resolution_level( + root, 0, LevelMetadata( level=0, vertex_count=0, arrays_present=[VERTICES], - ).to_dict() + ), ) # Defer import: arrays.py imports from store.py (FsGroup). from zarr_vectors.core.arrays import create_vertices_array @@ -1129,11 +1129,60 @@ def create_resolution_level( level: int, level_metadata: LevelMetadata, ) -> Group: - """Create a new resolution level group within the store.""" + """Create a new resolution level group within the store. + + The level's spatial transform (``bin_ratio`` → NGFF ``scale``, + ``bin_shape`` → NGFF ``translation = bin_shape / 2``) is written to + the NGFF ``multiscales[0].datasets`` block via + :func:`zarr_vectors.core.multiscale.upsert_level_transform` so the + NGFF block stays the **single source of truth** for spatial + geometry. ``bin_shape`` and ``bin_ratio`` are intentionally + omitted from the level's own ``zarr_vectors_level`` attrs; readers + derive them from the NGFF block (see :func:`read_level_metadata`). + """ level_metadata.validate() group_name = f"{RESOLUTION_PREFIX}{level}" level_group = root.require_group(group_name) - level_group.attrs.update(level_metadata.to_dict()) + payload = level_metadata.to_dict() + payload["zarr_vectors_level"].pop("bin_shape", None) + payload["zarr_vectors_level"].pop("bin_ratio", None) + level_group.attrs.update(payload) + + # Mirror the spatial transform into the NGFF block. + from zarr_vectors.core.multiscale import upsert_level_transform + if level == 0: + # Level 0: scale = 1.0 per-axis; translation seeded by base_bin/2 if known. + try: + root_meta = read_root_metadata(root) + ndim = root_meta.sid_ndim + base_bin = root_meta.effective_bin_shape + scale = [1.0] * ndim + translation = [bs / 2.0 for bs in base_bin] + except Exception: + scale = [1.0] + translation = None + upsert_level_transform( + root, level, scale=scale, translation=translation, + ) + elif level_metadata.bin_ratio is not None or level_metadata.bin_shape is not None: + try: + root_meta = read_root_metadata(root) + ndim = root_meta.sid_ndim + except Exception: + ndim = ( + len(level_metadata.bin_ratio) if level_metadata.bin_ratio + else (len(level_metadata.bin_shape) if level_metadata.bin_shape else 1) + ) + if level_metadata.bin_ratio is not None: + scale = [float(r) for r in level_metadata.bin_ratio] + else: + scale = [1.0] * ndim + translation = ( + [bs / 2.0 for bs in level_metadata.bin_shape] + if level_metadata.bin_shape is not None + else None + ) + upsert_level_transform(root, level, scale=scale, translation=translation) return level_group @@ -1184,12 +1233,31 @@ def read_root_metadata(root: Group) -> RootMetadata: def read_level_metadata(root: Group, level: int) -> LevelMetadata: """Read and parse level metadata. + ``bin_shape`` and ``bin_ratio`` are read from the NGFF + ``multiscales[0].datasets`` block (the authoritative source under + 0.5+). Legacy stores that still carry them under + ``zarr_vectors_level`` are honoured as a fallback. + Raises: StoreError: If the level does not exist. MetadataError: If metadata is malformed. """ level_group = get_resolution_level(root, level) - return LevelMetadata.from_dict(level_group.attrs.to_dict()) + lm = LevelMetadata.from_dict(level_group.attrs.to_dict()) + + # NGFF takes precedence over the legacy fields when present. + from zarr_vectors.core.multiscale import read_level_transform + scale, translation = read_level_transform(root, level) + if scale is not None: + if level == 0: + # Level 0 has scale == 1.0 by convention; bin_ratio stays None. + lm.bin_ratio = None + lm.bin_shape = None + else: + lm.bin_ratio = tuple(int(round(s)) for s in scale) + if translation is not None: + lm.bin_shape = tuple(2.0 * t for t in translation) + return lm def write_parametric_types( @@ -1292,10 +1360,10 @@ def add_resolution_level( parent_level=parent_level, ) level_meta.validate() - - level_group = root.require_group(group_name) - level_group.attrs.update(level_meta.to_dict()) - return level_group + # Route through create_resolution_level so the NGFF block is updated + # via :func:`zarr_vectors.core.multiscale.upsert_level_transform` in + # the same way as a writer-driven create. + return create_resolution_level(root, level_index, level_meta) def remove_resolution_level(root: Group, level_index: int) -> None: diff --git a/zarr_vectors/encoding/compression.py b/zarr_vectors/encoding/compression.py index 4127b07..0a1f8be 100644 --- a/zarr_vectors/encoding/compression.py +++ b/zarr_vectors/encoding/compression.py @@ -8,15 +8,15 @@ from __future__ import annotations from zarr_vectors.constants import ( - ATTRIBUTES, CROSS_CHUNK_LINKS, DEFAULT_COMPRESSOR_OPTS, - GROUPINGS, - GROUPINGS_ATTRIBUTES, + GROUP_ATTRIBUTES, + GROUPS, LINK_ATTRIBUTES, LINKS, OBJECT_ATTRIBUTES, OBJECT_INDEX, + VERTEX_ATTRIBUTES, VERTEX_GROUP_OFFSETS, VERTICES, ) @@ -27,7 +27,7 @@ def get_default_compressor(array_type: str) -> dict[str, object]: Args: array_type: One of the canonical array name constants (e.g. - ``"vertices"``, ``"links"``, ``"attributes"``). + ``"vertices"``, ``"links"``, ``"vertex_attributes"``). Returns: Dict with compressor settings suitable for ``numcodecs`` or @@ -52,7 +52,7 @@ def get_default_compressor(array_type: str) -> dict[str, object]: } # Vertex positions and attributes — byte shuffle works well on floats - if array_type in (VERTICES, ATTRIBUTES): + if array_type in (VERTICES, VERTEX_ATTRIBUTES): return { "id": "blosc", "cname": "zstd", @@ -60,8 +60,8 @@ def get_default_compressor(array_type: str) -> dict[str, object]: "shuffle": 1, } - # Dense arrays (object attributes, grouping attributes) - if array_type in (OBJECT_ATTRIBUTES, GROUPINGS_ATTRIBUTES): + # Dense arrays (object attributes, group attributes) + if array_type in (OBJECT_ATTRIBUTES, GROUP_ATTRIBUTES): return { "id": "blosc", "cname": "zstd", diff --git a/zarr_vectors/lazy/arrays.py b/zarr_vectors/lazy/arrays.py index 7f8caa6..e9d7f18 100644 --- a/zarr_vectors/lazy/arrays.py +++ b/zarr_vectors/lazy/arrays.py @@ -331,9 +331,9 @@ def _read_attribute_chunk( pass # Fallback: read raw bytes and decode as flat array - from zarr_vectors.constants import ATTRIBUTES + from zarr_vectors.constants import VERTEX_ATTRIBUTES key = f"{chunk_coords[0]}" + "".join(f".{c}" for c in chunk_coords[1:]) - raw = group.read_bytes(f"{ATTRIBUTES}/{attr_name}", key) + raw = group.read_bytes(f"{VERTEX_ATTRIBUTES}/{attr_name}", key) if len(raw) == 0: return np.array([], dtype=dtype) arr = np.frombuffer(raw, dtype=dtype) diff --git a/zarr_vectors/lazy/store.py b/zarr_vectors/lazy/store.py index 054f042..3fb64b0 100644 --- a/zarr_vectors/lazy/store.py +++ b/zarr_vectors/lazy/store.py @@ -129,10 +129,6 @@ def __getitem__(self, level: int) -> ZVRLevel: ) return self._levels_cache[level] - def level(self, index: int) -> ZVRLevel: - """Alias for ``self[index]``.""" - return self[index] - # --------------------------------------------------------------- # Repr # --------------------------------------------------------------- @@ -220,9 +216,3 @@ def open_zvr( return ZVRStore(root, meta) -def object_levels(zvr: ZVRStore, oid: int) -> list[int]: - """Return the levels at which ``oid`` is present. - - Module-level convenience around :meth:`ZVRStore.object_levels`. - """ - return zvr.object_levels(oid) diff --git a/zarr_vectors/lazy/writer.py b/zarr_vectors/lazy/writer.py index 8d31367..d909c7b 100644 --- a/zarr_vectors/lazy/writer.py +++ b/zarr_vectors/lazy/writer.py @@ -112,7 +112,7 @@ async def add_attribute( dtype: Override the on-disk dtype (default: ``values.dtype``). """ await self._write_per_vertex_attribute( - subpath="attributes", name=name, values=values, dtype=dtype, + subpath="vertex_attributes", name=name, values=values, dtype=dtype, ) async def add_node_attribute( @@ -240,8 +240,8 @@ async def _write_one(cc: ChunkCoords) -> None: cursor += s # If the writer is targeting non-default subpath (e.g. # face attributes), patch the array name; otherwise the - # standard helper writes under "attributes/". - if subpath == "attributes": + # standard helper writes under "vertex_attributes/". + if subpath == "vertex_attributes": await asyncio.to_thread( write_chunk_attributes, self._group, name, cc, attr_groups, arr.dtype, diff --git a/zarr_vectors/multiresolution/layers.py b/zarr_vectors/multiresolution/layers.py deleted file mode 100644 index a3bd497..0000000 --- a/zarr_vectors/multiresolution/layers.py +++ /dev/null @@ -1,195 +0,0 @@ -"""Bin-ratio helpers for multi-resolution pyramids. - -The level-emission logic lives in -:mod:`zarr_vectors.multiresolution.coarsen` (factor-based interface); -this module is purely a small helpers shelf for computing bin ratios -from target volume reductions and for describing one resolution level -in the sparsity-aware data class form. -""" - -from __future__ import annotations - -import math -from dataclasses import dataclass - - -# =================================================================== -# Bin ratio helpers -# =================================================================== - -def select_bin_ratio_for_reduction( - target_reduction: float, - ndim: int = 3, -) -> tuple[int, ...]: - """Find the integer bin ratio whose volume is closest to a target reduction. - - For 3D with target 8: returns ``(2, 2, 2)`` since ``2³ = 8``. - For 3D with target 4: returns ``(2, 2, 1)`` since ``2×2×1 = 4``. - For 3D with target 27: returns ``(3, 3, 3)`` since ``3³ = 27``. - - The ratio is isotropic when possible (all dims the same). When - ``target_reduction`` is not a perfect power, the algorithm finds - the per-dim factor ``r`` such that ``r^ndim`` is closest, then - adjusts individual dimensions to match the target more precisely. - - Args: - target_reduction: Desired volume reduction factor (>= 1). - ndim: Number of spatial dimensions. - - Returns: - Integer ratio per dimension. - """ - if target_reduction < 1: - return tuple(1 for _ in range(ndim)) - - # Try isotropic first - r_float = target_reduction ** (1.0 / ndim) - r_floor = max(1, int(math.floor(r_float))) - r_ceil = r_floor + 1 - - vol_floor = r_floor ** ndim - vol_ceil = r_ceil ** ndim - - # Pick whichever is closer - if abs(vol_floor - target_reduction) <= abs(vol_ceil - target_reduction): - r_base = r_floor - else: - r_base = r_ceil - - # Check if isotropic is close enough - vol_iso = r_base ** ndim - if abs(vol_iso - target_reduction) / target_reduction < 0.2: - return tuple(r_base for _ in range(ndim)) - - # Non-isotropic: start with r_base per dim and bump individual dims - ratios = [r_base] * ndim - current_vol = r_base ** ndim - - if current_vol < target_reduction: - for d in range(ndim): - if current_vol >= target_reduction: - break - ratios[d] += 1 - current_vol = 1 - for r in ratios: - current_vol *= r - elif current_vol > target_reduction: - for d in range(ndim - 1, -1, -1): - if current_vol <= target_reduction: - break - if ratios[d] > 1: - ratios[d] -= 1 - current_vol = 1 - for r in ratios: - current_vol *= r - - ratios.sort(reverse=True) - return tuple(ratios) - - -def compute_level_ratios( - base_vertex_count: int, - ndim: int = 3, - *, - target_reductions: list[float] | None = None, - reduction_factor: int = 8, - max_levels: int = 10, - min_vertices: int = 8, -) -> list[tuple[int, ...]]: - """Compute bin ratios for a multi-resolution pyramid. - - If ``target_reductions`` is given (e.g. ``[8, 64, 512]``), returns - a ratio per level that achieves approximately that cumulative - vertex reduction. - - If ``target_reductions`` is None, auto-generates levels using - ``reduction_factor`` as the per-level multiplier. - - Args: - base_vertex_count: Vertex count at level 0. - ndim: Number of spatial dimensions. - target_reductions: List of cumulative reduction factors. - reduction_factor: Per-level target when auto-generating. - max_levels: Maximum levels to generate. - min_vertices: Stop below this count. - - Returns: - List of bin ratio tuples, one per coarsened level. - """ - if target_reductions is not None: - return [ - select_bin_ratio_for_reduction(r, ndim) - for r in target_reductions - ] - - ratios: list[tuple[int, ...]] = [] - cumulative = 1 - for _ in range(max_levels): - cumulative *= reduction_factor - est_count = max(1, int(base_vertex_count / cumulative)) - if est_count < min_vertices: - break - ratio = select_bin_ratio_for_reduction(cumulative, ndim) - ratios.append(ratio) - - return ratios - - -# =================================================================== -# Level specification dataclass -# =================================================================== - -@dataclass -class LevelReductionSpec: - """Full specification for a resolution level including object sparsity. - - Attributes: - level_index: Target level number (1, 2, ...). - bin_ratio: Integer fold-change per axis relative to level 0. - bin_shape: Supervoxel edge lengths at this level (computed). - object_sparsity: Fraction of objects to retain at this level. - expected_vertex_reduction: From binning alone (product of bin_ratio). - expected_object_reduction: From sparsity alone (1 / sparsity). - expected_volume_reduction: vertex × object reduction. - """ - - level_index: int - bin_ratio: tuple[int, ...] - bin_shape: tuple[float, ...] | None = None - object_sparsity: float = 1.0 - expected_vertex_reduction: float = 1.0 - expected_object_reduction: float = 1.0 - expected_volume_reduction: float = 1.0 - - -def auto_plan_sparsity( - target_volume_reduction: float, - bin_ratio: tuple[int, ...], - ndim: int = 3, -) -> float: - """Compute the object sparsity needed to hit a target volume reduction. - - Total volume reduction = vertex_reduction × object_reduction. - Vertex reduction = product(bin_ratio). - Object reduction = 1 / sparsity. - - So sparsity = vertex_reduction / target_volume_reduction. - - Args: - target_volume_reduction: Desired total reduction (e.g. 16). - bin_ratio: Bin ratio giving the vertex reduction. - ndim: Not used directly but kept for API consistency. - - Returns: - Object sparsity in (0, 1]. Clamped to 1.0 if binning alone - already exceeds the target. - """ - vertex_reduction = 1.0 - for r in bin_ratio: - vertex_reduction *= r - - if vertex_reduction >= target_volume_reduction: - return 1.0 - - sparsity = vertex_reduction / target_volume_reduction - return max(sparsity, 1e-6) diff --git a/zarr_vectors/multiresolution/strategies/points.py b/zarr_vectors/multiresolution/strategies/points.py index 41f51ad..e7f2c7e 100644 --- a/zarr_vectors/multiresolution/strategies/points.py +++ b/zarr_vectors/multiresolution/strategies/points.py @@ -105,7 +105,7 @@ def coarsen_points( out: dict[str, Any] = { "positions": meta_positions, - "attributes": meta_attrs, + "vertex_attributes": meta_attrs, "children": children, "vertex_count": n_meta, "reduction_ratio": n_input / max(n_meta, 1), diff --git a/zarr_vectors/sharding/layout.py b/zarr_vectors/sharding/layout.py index 71dce87..d3a7637 100644 --- a/zarr_vectors/sharding/layout.py +++ b/zarr_vectors/sharding/layout.py @@ -1,19 +1,22 @@ -"""Shard layout definitions and codec for reading/writing sharded stores. +"""Shard layout enum and chunk-to-shard mapping. -A shard file contains multiple chunks packed sequentially, with a -JSON index mapping chunk keys to ``(offset, nbytes)`` byte ranges -within the shard file. +The actual on-disk shard format is owned by :mod:`zarr_vectors.sharding.io`, +which packs each shard's chunks into a single 1D uint8 Zarr array named +``__shard_`` under the per-array group, with the +``{chunk_key: [offset, nbytes]}`` index stored on the shard array's attrs. + +This module only declares the enum of supported layouts and the function +that maps a chunk coordinate to a shard id under each layout (Morton for +``OCTREE``, Hilbert for ``SNAKE``, Morton for ``INDEX_TABLE``). """ from __future__ import annotations -import json from enum import Enum -from pathlib import Path from typing import Any -from zarr_vectors.sharding.morton import morton_encode from zarr_vectors.sharding.hilbert import hilbert_encode +from zarr_vectors.sharding.morton import morton_encode from zarr_vectors.typing import ChunkCoords @@ -30,19 +33,21 @@ class ShardLayout(str, Enum): """Chunks ordered by Hilbert curve within each shard.""" INDEX_TABLE = "index_table" - """Zarr v3 shard index — arbitrary order with binary index.""" + """Arbitrary order with binary index.""" class ShardCodec: - """Manages reading and writing chunks within shard files. + """Maps chunk coordinates to shard ids under a given layout. - Each shard file packs up to ``shard_size`` chunks. The shard - assignment is determined by the layout's space-filling curve. + The on-disk packing is done by :mod:`zarr_vectors.sharding.io`; this + class only encapsulates the layout choice and the coord→shard-id + mapping. ``shard_size`` is the maximum number of chunks per shard. Args: layout: Shard layout strategy. shard_size: Maximum chunks per shard file. - ndim: Number of spatial dimensions (for encoding). + ndim: Number of spatial dimensions (kept for serialisation / + external readers; not used by ``chunk_to_shard_id``). """ def __init__( @@ -56,16 +61,8 @@ def __init__( self.ndim = ndim def chunk_to_shard_id(self, chunk_coords: ChunkCoords) -> int: - """Map a chunk coordinate to its shard number. - - Args: - chunk_coords: Spatial chunk coordinate tuple. - - Returns: - Integer shard ID. - """ + """Map a chunk coordinate to its shard number.""" if self.layout == ShardLayout.FLAT: - # Flat: each chunk is its own "shard" — use a unique hash return hash(chunk_coords) if self.layout == ShardLayout.OCTREE: @@ -73,203 +70,12 @@ def chunk_to_shard_id(self, chunk_coords: ChunkCoords) -> int: elif self.layout == ShardLayout.SNAKE: code = hilbert_encode(chunk_coords, order=16) elif self.layout == ShardLayout.INDEX_TABLE: - code = morton_encode(chunk_coords) # default ordering + code = morton_encode(chunk_coords) else: code = hash(chunk_coords) return code // max(self.shard_size, 1) - def shard_filename(self, shard_id: int) -> str: - """Generate shard file name.""" - return f"shard_{shard_id:06d}.bin" - - def index_filename(self, shard_id: int) -> str: - """Generate shard index file name.""" - return f"shard_{shard_id:06d}.idx" - - # --------------------------------------------------------------- - # Write - # --------------------------------------------------------------- - - def write_shard( - self, - base_dir: Path, - shard_id: int, - chunk_data: dict[str, bytes], - ) -> None: - """Write multiple chunks into a single shard file. - - For INDEX_TABLE layout, the binary index is appended to the - shard file (Zarr v3 sharding codec format). For other layouts, - a separate JSON index file is written. - - Args: - base_dir: Directory for the array. - shard_id: Shard number. - chunk_data: ``{chunk_key: raw_bytes}`` for chunks in this shard. - """ - base_dir.mkdir(parents=True, exist_ok=True) - - shard_path = base_dir / self.shard_filename(shard_id) - - if self.layout == ShardLayout.INDEX_TABLE: - self._write_shard_v3(base_dir, shard_id, chunk_data) - else: - self._write_shard_json(base_dir, shard_id, chunk_data) - - def _write_shard_json( - self, base_dir: Path, shard_id: int, chunk_data: dict[str, bytes], - ) -> None: - """Write shard with separate JSON index.""" - shard_path = base_dir / self.shard_filename(shard_id) - index: dict[str, list[int]] = {} - - offset = 0 - parts: list[bytes] = [] - for chunk_key in sorted(chunk_data.keys()): - data = chunk_data[chunk_key] - nbytes = len(data) - index[chunk_key] = [offset, nbytes] - parts.append(data) - offset += nbytes - - shard_path.write_bytes(b"".join(parts)) - - idx_path = base_dir / self.index_filename(shard_id) - idx_path.write_text(json.dumps(index)) - - def _write_shard_v3( - self, base_dir: Path, shard_id: int, chunk_data: dict[str, bytes], - ) -> None: - """Write shard with Zarr v3 binary index appended to the file. - - The binary index is ``N × 16`` bytes at the end of the shard - file. Each entry is ``(uint64 offset, uint64 nbytes)``. - Empty slots have ``offset = 0xFFFFFFFFFFFFFFFF``. - The chunk's position in the index is determined by a hash - of its key modulo shard_size. - - A companion JSON index is also written for fast key lookup. - """ - import struct - - n_slots = max(self.shard_size, len(chunk_data)) - EMPTY = 0xFFFFFFFFFFFFFFFF - - # Build slot assignments (open addressing by hash) - slot_map: dict[int, str] = {} # slot → chunk_key - for chunk_key in chunk_data: - slot = hash(chunk_key) % n_slots - while slot in slot_map: - slot = (slot + 1) % n_slots - slot_map[slot] = chunk_key - - # Write chunk data sequentially - offset = 0 - parts: list[bytes] = [] - index_entries: list[tuple[int, int]] = [(EMPTY, 0)] * n_slots - json_index: dict[str, list[int]] = {} - - for slot in sorted(slot_map.keys()): - chunk_key = slot_map[slot] - data = chunk_data[chunk_key] - nbytes = len(data) - index_entries[slot] = (offset, nbytes) - json_index[chunk_key] = [offset, nbytes] - parts.append(data) - offset += nbytes - - # Append binary index - index_bytes = b"" - for off, nb in index_entries: - index_bytes += struct.pack(" bytes: - """Read a single chunk from its shard file. - - Args: - base_dir: Array directory. - chunk_key: Chunk key string (e.g. ``"0.0.0"``). - chunk_coords: Chunk coordinate tuple. - - Returns: - Raw chunk bytes. - - Raises: - FileNotFoundError: If the shard or chunk is not found. - """ - shard_id = self.chunk_to_shard_id(chunk_coords) - idx_path = base_dir / self.index_filename(shard_id) - shard_path = base_dir / self.shard_filename(shard_id) - - if not idx_path.exists(): - raise FileNotFoundError( - f"Shard index not found: {idx_path}" - ) - - with open(idx_path) as f: - index = json.load(f) - - if chunk_key not in index: - raise FileNotFoundError( - f"Chunk '{chunk_key}' not in shard {shard_id}" - ) - - offset, nbytes = index[chunk_key] - with open(shard_path, "rb") as f: - f.seek(offset) - return f.read(nbytes) - - def list_chunks_in_shard( - self, - base_dir: Path, - shard_id: int, - ) -> list[str]: - """List chunk keys in a shard.""" - idx_path = base_dir / self.index_filename(shard_id) - if not idx_path.exists(): - return [] - with open(idx_path) as f: - index = json.load(f) - return sorted(index.keys()) - - def list_all_shards(self, base_dir: Path) -> list[int]: - """List all shard IDs in a directory.""" - if not base_dir.is_dir(): - return [] - shard_ids: list[int] = [] - for f in base_dir.iterdir(): - if f.name.endswith(".idx"): - try: - sid = int(f.stem.split("_")[1]) - shard_ids.append(sid) - except (ValueError, IndexError): - continue - return sorted(shard_ids) - - def list_all_chunk_keys(self, base_dir: Path) -> list[str]: - """List all chunk keys across all shards.""" - keys: list[str] = [] - for sid in self.list_all_shards(base_dir): - keys.extend(self.list_chunks_in_shard(base_dir, sid)) - return sorted(keys) - - # --------------------------------------------------------------- - # Serialisation - # --------------------------------------------------------------- - def to_dict(self) -> dict[str, Any]: return { "layout": self.layout.value, diff --git a/zarr_vectors/types/graphs.py b/zarr_vectors/types/graphs.py index e5360d2..bc20d97 100644 --- a/zarr_vectors/types/graphs.py +++ b/zarr_vectors/types/graphs.py @@ -23,11 +23,14 @@ from zarr_vectors.constants import ( CROSS_CHUNK_EXPLICIT, + CROSS_CHUNK_LINKS, GEOM_GRAPH, GEOM_SKELETON, + LINKS, LINKS_EXPLICIT, LINKS_IMPLICIT_BRANCHES, OBJIDX_STANDARD, + VERTEX_GROUP_OFFSETS, VERTICES, ) from zarr_vectors.core.arrays import ( @@ -104,15 +107,19 @@ def write_graph( chunk_shape: ChunkShape, bin_shape: BinShape | None = None, bounds: tuple[list[float], list[float]] | None = None, - is_tree: bool = False, - node_attributes: dict[str, npt.NDArray] | None = None, - edge_attributes: dict[str, npt.NDArray] | None = None, + kind: str = "graph", + vertex_attributes: dict[str, npt.NDArray] | None = None, + link_attributes: dict[str, npt.NDArray] | None = None, object_attributes: dict[str, npt.NDArray] | None = None, object_ids: npt.NDArray[np.integer] | None = None, dtype: str = "float32", backend: str | None = None, chunk_by_attribute: str | None = None, out_of_bounds: str = DEFAULT_OOB_POLICY, + # Deprecated aliases (will be removed): + is_tree: bool | None = None, + node_attributes: dict[str, npt.NDArray] | None = None, + edge_attributes: dict[str, npt.NDArray] | None = None, ) -> dict[str, Any]: """Write a graph or skeleton to a new zarr vectors store. @@ -121,10 +128,14 @@ def write_graph( positions: ``(N, D)`` node positions. edges: ``(M, 2)`` edge list (global vertex indices). chunk_shape: Spatial chunk size per dimension. - is_tree: If True, treat as skeleton — reorder depth-first and - use implicit sequential with branches convention. - node_attributes: Per-node attributes ``{name: (N,) or (N,C)}``. - edge_attributes: Per-edge attributes ``{name: (M,) or (M,C)}``. + kind: ``"graph"`` (default — general graph, explicit links + convention) or ``"skeleton"`` (depth-first reorder, implicit + sequential with branches convention). Replaces the boolean + ``is_tree`` kwarg. + vertex_attributes: Per-node attributes ``{name: (N,) or (N,C)}``. + (Spec name; replaces ``node_attributes``.) + link_attributes: Per-edge attributes ``{name: (M,) or (M,C)}``. + (Spec name; replaces ``edge_attributes``.) object_ids: ``(N,)`` array assigning nodes to objects. If None, all nodes belong to object 0. dtype: Numpy dtype for positions. @@ -132,6 +143,52 @@ def write_graph( Returns: Summary dict. """ + # Back-compat: accept the legacy kwarg names. + if is_tree is not None: + if kind != "graph": + raise TypeError( + "got both `is_tree` and `kind`; pass only `kind`." + ) + import warnings + warnings.warn( + "`is_tree` is deprecated; use `kind=\"skeleton\"` for trees, " + "`kind=\"graph\"` (default) for general graphs.", + DeprecationWarning, stacklevel=2, + ) + kind = "skeleton" if is_tree else "graph" + if node_attributes is not None: + if vertex_attributes is not None: + raise TypeError( + "got both `node_attributes` and `vertex_attributes`; " + "pass only `vertex_attributes`." + ) + import warnings + warnings.warn( + "`node_attributes` is deprecated; use `vertex_attributes`.", + DeprecationWarning, stacklevel=2, + ) + vertex_attributes = node_attributes + if edge_attributes is not None: + if link_attributes is not None: + raise TypeError( + "got both `edge_attributes` and `link_attributes`; " + "pass only `link_attributes`." + ) + import warnings + warnings.warn( + "`edge_attributes` is deprecated; use `link_attributes`.", + DeprecationWarning, stacklevel=2, + ) + link_attributes = edge_attributes + if kind not in ("graph", "skeleton"): + raise ValueError( + f"kind must be 'graph' or 'skeleton', got {kind!r}" + ) + # Internal aliases so the rest of the body stays unchanged. + is_tree = kind == "skeleton" + node_attributes = vertex_attributes + edge_attributes = link_attributes + np_dtype = np.dtype(dtype) positions = np.asarray(positions, dtype=np_dtype) edges = np.asarray(edges, dtype=np.int64) @@ -395,7 +452,7 @@ def write_graph( "intra_edge_count": sum(len(e) for e in intra_edges.values()), "cross_edge_count": len(all_cross_links), "object_count": len(object_manifests), - "is_tree": is_tree, + "kind": "skeleton" if is_tree else "graph", } @@ -489,155 +546,170 @@ def read_graph( return _empty_graph_result(ndim) chunk_keys = [k for k in chunk_keys if k and k[0] == filter_bin] - # Read all vertices and build global index mapping - all_positions: list[npt.NDArray] = [] - # Map (chunk_coords, local_idx) → global output index - global_idx_map: dict[tuple[ChunkCoords, int], int] = {} - current_global = 0 - - for chunk_coords in chunk_keys: - try: - groups = read_chunk_vertices( - level_group, chunk_coords, dtype=dtype, ndim=ndim - ) - except ArrayError: - continue + # Prefetch every chunk (vertices, offsets, edges) and the cross-chunk + # edges in one async gather. Subsequent ``read_bytes`` calls below + # hit the cache instead of paying one round-trip per chunk. + _chunk_key_strs = [".".join(str(c) for c in cc) for cc in chunk_keys] + _prefetch_plan: list[tuple[str, list[str]]] = [ + (VERTICES, _chunk_key_strs), + (VERTEX_GROUP_OFFSETS, _chunk_key_strs), + (f"{LINKS}/0", _chunk_key_strs), + (f"{CROSS_CHUNK_LINKS}/0", ["data"]), + ] + _batched_reads_cm = level_group.batched_reads(_prefetch_plan) + _batched_reads_cm.__enter__() + try: + # Read all vertices and build global index mapping + all_positions: list[npt.NDArray] = [] + # Map (chunk_coords, local_idx) → global output index + global_idx_map: dict[tuple[ChunkCoords, int], int] = {} + current_global = 0 - for vg in groups: - for local_i in range(len(vg)): - global_idx_map[(chunk_coords, current_global + local_i)] = ( - current_global + local_i + for chunk_coords in chunk_keys: + try: + groups = read_chunk_vertices( + level_group, chunk_coords, dtype=dtype, ndim=ndim ) - all_positions.append(vg) - current_global += len(vg) + except ArrayError: + continue - if not all_positions: - return _empty_graph_result(ndim) + for vg in groups: + for local_i in range(len(vg)): + global_idx_map[(chunk_coords, current_global + local_i)] = ( + current_global + local_i + ) + all_positions.append(vg) + current_global += len(vg) - positions_out = np.concatenate(all_positions, axis=0) + if not all_positions: + return _empty_graph_result(ndim) - # Read intra-chunk edges - all_edges: list[npt.NDArray] = [] - offset = 0 - for chunk_coords in chunk_keys: - try: - groups = read_chunk_vertices( - level_group, chunk_coords, dtype=dtype, ndim=ndim - ) - except ArrayError: - groups = [] + positions_out = np.concatenate(all_positions, axis=0) - try: - link_groups = read_chunk_links( - level_group, chunk_coords, link_width=link_width, delta=0, - ) - except ArrayError: - link_groups = [] - - # Compute offset: sum of all vertex group sizes up to this chunk - chunk_offset = 0 - for prev_cc in chunk_keys: - if prev_cc == chunk_coords: - break + # Read intra-chunk edges + all_edges: list[npt.NDArray] = [] + offset = 0 + for chunk_coords in chunk_keys: try: - prev_groups = read_chunk_vertices( - level_group, prev_cc, dtype=dtype, ndim=ndim + groups = read_chunk_vertices( + level_group, chunk_coords, dtype=dtype, ndim=ndim ) - chunk_offset += sum(len(g) for g in prev_groups) except ArrayError: - pass + groups = [] - for lg in link_groups: - if len(lg) > 0: - remapped = lg.copy() - remapped[:, 0] += chunk_offset - remapped[:, 1] += chunk_offset - all_edges.append(remapped) - - offset = chunk_offset + sum(len(g) for g in groups) - - # Read cross-chunk edges (delta=0; cross-pyramid-level edges live - # under delta != 0 and are not part of a single-level read). - try: - ccl = read_cross_chunk_links(level_group, delta=0) - # Build chunk→offset map - chunk_offsets: dict[ChunkCoords, int] = {} - running = 0 - for cc in chunk_keys: - chunk_offsets[cc] = running try: - grps = read_chunk_vertices( - level_group, cc, dtype=dtype, ndim=ndim + link_groups = read_chunk_links( + level_group, chunk_coords, link_width=link_width, delta=0, ) - running += sum(len(g) for g in grps) except ArrayError: - pass - - for (chunk_a, vi_a), (chunk_b, vi_b) in ccl: - if chunk_a in chunk_offsets and chunk_b in chunk_offsets: - ga = chunk_offsets[chunk_a] + vi_a - gb = chunk_offsets[chunk_b] + vi_b - all_edges.append(np.array([[ga, gb]], dtype=np.int64)) - except (ArrayError, Exception): - pass - - # For skeletons: reconstruct full edge set from implicit sequential + branch links - if is_tree: - total_nodes = len(positions_out) - # Start with implicit parents: parent[i] = i-1, parent[0] = -1 - parent_arr = np.arange(-1, total_nodes - 1, dtype=np.int64) - # Override with explicit branch links (stored in all_edges) - for edge_block in all_edges: - for e in edge_block: - child, par = int(e[0]), int(e[1]) - if 0 <= child < total_nodes: - parent_arr[child] = par - # Build edge list from parent array - all_edges = [] - mask = parent_arr >= 0 - children_idx = np.flatnonzero(mask) - if len(children_idx) > 0: - tree_edges = np.stack( - [children_idx, parent_arr[children_idx]], axis=1 - ).astype(np.int64) - all_edges.append(tree_edges) - - if all_edges: - edges_out = np.concatenate(all_edges, axis=0) - else: - edges_out = np.zeros((0, 2), dtype=np.int64) - - # Filter by bbox on positions - if bbox is not None: - bbox_min, bbox_max = np.asarray(bbox[0]), np.asarray(bbox[1]) - node_mask = np.all( - (positions_out >= bbox_min) & (positions_out <= bbox_max), - axis=1, - ) - if not np.all(node_mask): - keep_indices = np.flatnonzero(node_mask) - keep_set = set(keep_indices.tolist()) - positions_out = positions_out[keep_indices] - - # Remap edges - old_to_new = {int(old): new for new, old in enumerate(keep_indices)} - filtered_edges: list[list[int]] = [] - for e in edges_out: - s, d = int(e[0]), int(e[1]) - if s in old_to_new and d in old_to_new: - filtered_edges.append([old_to_new[s], old_to_new[d]]) - edges_out = ( - np.array(filtered_edges, dtype=np.int64) - if filtered_edges - else np.zeros((0, 2), dtype=np.int64) + link_groups = [] + + # Compute offset: sum of all vertex group sizes up to this chunk + chunk_offset = 0 + for prev_cc in chunk_keys: + if prev_cc == chunk_coords: + break + try: + prev_groups = read_chunk_vertices( + level_group, prev_cc, dtype=dtype, ndim=ndim + ) + chunk_offset += sum(len(g) for g in prev_groups) + except ArrayError: + pass + + for lg in link_groups: + if len(lg) > 0: + remapped = lg.copy() + remapped[:, 0] += chunk_offset + remapped[:, 1] += chunk_offset + all_edges.append(remapped) + + offset = chunk_offset + sum(len(g) for g in groups) + + # Read cross-chunk edges (delta=0; cross-pyramid-level edges live + # under delta != 0 and are not part of a single-level read). + try: + ccl = read_cross_chunk_links(level_group, delta=0) + # Build chunk→offset map + chunk_offsets: dict[ChunkCoords, int] = {} + running = 0 + for cc in chunk_keys: + chunk_offsets[cc] = running + try: + grps = read_chunk_vertices( + level_group, cc, dtype=dtype, ndim=ndim + ) + running += sum(len(g) for g in grps) + except ArrayError: + pass + + for (chunk_a, vi_a), (chunk_b, vi_b) in ccl: + if chunk_a in chunk_offsets and chunk_b in chunk_offsets: + ga = chunk_offsets[chunk_a] + vi_a + gb = chunk_offsets[chunk_b] + vi_b + all_edges.append(np.array([[ga, gb]], dtype=np.int64)) + except (ArrayError, Exception): + pass + + # For skeletons: reconstruct full edge set from implicit sequential + branch links + if is_tree: + total_nodes = len(positions_out) + # Start with implicit parents: parent[i] = i-1, parent[0] = -1 + parent_arr = np.arange(-1, total_nodes - 1, dtype=np.int64) + # Override with explicit branch links (stored in all_edges) + for edge_block in all_edges: + for e in edge_block: + child, par = int(e[0]), int(e[1]) + if 0 <= child < total_nodes: + parent_arr[child] = par + # Build edge list from parent array + all_edges = [] + mask = parent_arr >= 0 + children_idx = np.flatnonzero(mask) + if len(children_idx) > 0: + tree_edges = np.stack( + [children_idx, parent_arr[children_idx]], axis=1 + ).astype(np.int64) + all_edges.append(tree_edges) + + if all_edges: + edges_out = np.concatenate(all_edges, axis=0) + else: + edges_out = np.zeros((0, 2), dtype=np.int64) + + # Filter by bbox on positions + if bbox is not None: + bbox_min, bbox_max = np.asarray(bbox[0]), np.asarray(bbox[1]) + node_mask = np.all( + (positions_out >= bbox_min) & (positions_out <= bbox_max), + axis=1, ) + if not np.all(node_mask): + keep_indices = np.flatnonzero(node_mask) + keep_set = set(keep_indices.tolist()) + positions_out = positions_out[keep_indices] + + # Remap edges + old_to_new = {int(old): new for new, old in enumerate(keep_indices)} + filtered_edges: list[list[int]] = [] + for e in edges_out: + s, d = int(e[0]), int(e[1]) + if s in old_to_new and d in old_to_new: + filtered_edges.append([old_to_new[s], old_to_new[d]]) + edges_out = ( + np.array(filtered_edges, dtype=np.int64) + if filtered_edges + else np.zeros((0, 2), dtype=np.int64) + ) - return { - "positions": positions_out, - "edges": edges_out, - "node_count": len(positions_out), - "edge_count": len(edges_out), - } + return { + "positions": positions_out, + "edges": edges_out, + "node_count": len(positions_out), + "edge_count": len(edges_out), + } + finally: + _batched_reads_cm.__exit__(None, None, None) def _empty_graph_result(ndim: int) -> dict[str, Any]: diff --git a/zarr_vectors/types/lines.py b/zarr_vectors/types/lines.py index ec615b9..8747fd7 100644 --- a/zarr_vectors/types/lines.py +++ b/zarr_vectors/types/lines.py @@ -21,6 +21,7 @@ GEOM_LINE, LINKS_IMPLICIT_SEQUENTIAL, OBJIDX_STANDARD, + VERTEX_GROUP_OFFSETS, VERTICES, ) from zarr_vectors.core.arrays import ( @@ -84,12 +85,15 @@ def write_lines( chunk_shape: ChunkShape, bin_shape: BinShape | None = None, bounds: tuple[list[float], list[float]] | None = None, - attributes: dict[str, npt.NDArray] | None = None, - line_attributes: dict[str, npt.NDArray] | None = None, + vertex_attributes: dict[str, npt.NDArray] | None = None, + object_attributes: dict[str, npt.NDArray] | None = None, dtype: str = "float32", backend: str | None = None, chunk_by_attribute: str | None = None, out_of_bounds: str = DEFAULT_OOB_POLICY, + # Deprecated aliases (will be removed): + attributes: dict[str, npt.NDArray] | None = None, + line_attributes: dict[str, npt.NDArray] | None = None, ) -> dict[str, Any]: """Write finite line segments to a new zarr vectors store. @@ -98,16 +102,47 @@ def write_lines( endpoints: ``(N, 2, D)`` array — N lines, each with 2 endpoints of D dimensions. chunk_shape: Spatial chunk size per dimension. - attributes: Per-vertex attributes as ``{name: (N, 2) or (N, 2, C)}``. - Two values per line (one per endpoint). - line_attributes: Per-line (per-object) attributes as - ``{name: (N,) or (N, C)}``. + vertex_attributes: Per-vertex attributes as + ``{name: (N, 2) or (N, 2, C)}``. Two values per line (one + per endpoint). (Spec name; replaces ``attributes``.) + object_attributes: Per-line (per-object) attributes as + ``{name: (N,) or (N, C)}``. (Spec name; replaces + ``line_attributes``.) dtype: Numpy dtype string for positions. Returns: Summary dict with ``line_count``, ``chunk_count``, ``cross_chunk_count``. """ + # Back-compat: accept the legacy kwarg names. + if attributes is not None: + if vertex_attributes is not None: + raise TypeError( + "got both `attributes` and `vertex_attributes`; " + "pass only `vertex_attributes`." + ) + import warnings + warnings.warn( + "`attributes` is deprecated; use `vertex_attributes`.", + DeprecationWarning, stacklevel=2, + ) + vertex_attributes = attributes + if line_attributes is not None: + if object_attributes is not None: + raise TypeError( + "got both `line_attributes` and `object_attributes`; " + "pass only `object_attributes`." + ) + import warnings + warnings.warn( + "`line_attributes` is deprecated; use `object_attributes`.", + DeprecationWarning, stacklevel=2, + ) + object_attributes = line_attributes + # Internal aliases so the rest of the body stays unchanged. + attributes = vertex_attributes + line_attributes = object_attributes + np_dtype = np.dtype(dtype) endpoints = np.asarray(endpoints, dtype=np_dtype) @@ -381,24 +416,40 @@ def read_lines( result_endpoints: list[npt.NDArray] = [] - for oid in object_ids: - try: - vg_list = read_object_vertices( - level_group, oid, dtype=dtype, ndim=ndim - ) - except ArrayError: - continue + # Prefetch every vertex chunk (and its offsets sidecar) in one async + # gather so the per-object read loop hits the cache instead of + # paying one round-trip per chunk. + _chunk_key_strs = [ + ".".join(str(c) for c in cc) + for cc in list_chunk_keys(level_group, VERTICES) + ] + _prefetch_plan: list[tuple[str, list[str]]] = [ + (VERTICES, _chunk_key_strs), + (VERTEX_GROUP_OFFSETS, _chunk_key_strs), + ] + _batched_reads_cm = level_group.batched_reads(_prefetch_plan) + _batched_reads_cm.__enter__() + try: + for oid in object_ids: + try: + vg_list = read_object_vertices( + level_group, oid, dtype=dtype, ndim=ndim + ) + except ArrayError: + continue - # Concatenate vertex groups to get the full line (2 vertices) - all_verts = np.concatenate(vg_list, axis=0) + # Concatenate vertex groups to get the full line (2 vertices) + all_verts = np.concatenate(vg_list, axis=0) - if len(all_verts) < 2: - continue + if len(all_verts) < 2: + continue - # Take first and last as endpoints (handles both same-chunk - # and cross-chunk cases) - ep = np.stack([all_verts[0], all_verts[-1]], axis=0) # (2, D) - result_endpoints.append(ep) + # Take first and last as endpoints (handles both same-chunk + # and cross-chunk cases) + ep = np.stack([all_verts[0], all_verts[-1]], axis=0) # (2, D) + result_endpoints.append(ep) + finally: + _batched_reads_cm.__exit__(None, None, None) if not result_endpoints: return { diff --git a/zarr_vectors/types/meshes.py b/zarr_vectors/types/meshes.py index 266a1f9..efa93ef 100644 --- a/zarr_vectors/types/meshes.py +++ b/zarr_vectors/types/meshes.py @@ -19,11 +19,14 @@ from zarr_vectors.constants import ( CROSS_CHUNK_EXPLICIT, + CROSS_CHUNK_LINKS, ENCODING_DRACO, ENCODING_RAW, GEOM_MESH, + LINKS, LINKS_EXPLICIT, OBJIDX_STANDARD, + VERTEX_GROUP_OFFSETS, VERTICES, ) from zarr_vectors.core.arrays import ( @@ -416,94 +419,109 @@ def read_mesh( return _empty_mesh_result(ndim, link_width) chunk_keys = [k for k in chunk_keys if k and k[0] == filter_bin] - # Read vertices and build offset map - all_positions: list[npt.NDArray] = [] - chunk_offsets: dict[ChunkCoords, int] = {} - running = 0 - - for chunk_coords in chunk_keys: - chunk_offsets[chunk_coords] = running - try: - groups = read_chunk_vertices( - level_group, chunk_coords, dtype=dtype, ndim=ndim - ) - for vg in groups: - all_positions.append(vg) - running += len(vg) - except ArrayError: - pass + # Prefetch every chunk (vertices, offsets, faces) and the cross-chunk + # face records in one async gather. Subsequent ``read_bytes`` calls + # below hit the cache instead of paying one round-trip per chunk. + chunk_key_strs = [".".join(str(c) for c in cc) for cc in chunk_keys] + _prefetch_plan: list[tuple[str, list[str]]] = [ + (VERTICES, chunk_key_strs), + (VERTEX_GROUP_OFFSETS, chunk_key_strs), + (f"{LINKS}/0", chunk_key_strs), + (f"{CROSS_CHUNK_LINKS}/0", ["data"]), + ] + _batched_reads_cm = level_group.batched_reads(_prefetch_plan) + _batched_reads_cm.__enter__() + try: + # Read vertices and build offset map + all_positions: list[npt.NDArray] = [] + chunk_offsets: dict[ChunkCoords, int] = {} + running = 0 + + for chunk_coords in chunk_keys: + chunk_offsets[chunk_coords] = running + try: + groups = read_chunk_vertices( + level_group, chunk_coords, dtype=dtype, ndim=ndim + ) + for vg in groups: + all_positions.append(vg) + running += len(vg) + except ArrayError: + pass - if not all_positions: - return _empty_mesh_result(ndim, link_width) + if not all_positions: + return _empty_mesh_result(ndim, link_width) - positions_out = np.concatenate(all_positions, axis=0) + positions_out = np.concatenate(all_positions, axis=0) - # Read intra-chunk faces - all_faces: list[npt.NDArray] = [] - for chunk_coords in chunk_keys: - try: - link_groups = read_chunk_links( - level_group, chunk_coords, link_width=link_width, delta=0, - ) - offset = chunk_offsets.get(chunk_coords, 0) - for lg in link_groups: - if len(lg) > 0: - remapped = lg.copy() + offset - all_faces.append(remapped) - except ArrayError: - pass - - # Cross-chunk faces are stored as variable-width records under - # ``cross_chunk_links//`` (link_width = face arity). Map - # each (chunk, local_idx) endpoint into the global vertex index - # via ``chunk_offsets`` built above. - cross_face_records = read_cross_chunk_links(level_group, delta=0) - for face in cross_face_records: - if len(face) != link_width: - continue # not a face record (e.g. edge-arity, ignore) - vertex_ids: list[int] = [] - for cc, local_idx in face: - if cc not in chunk_offsets: - vertex_ids = [] - break - vertex_ids.append(int(chunk_offsets[cc]) + int(local_idx)) - if len(vertex_ids) == link_width: - all_faces.append(np.asarray(vertex_ids, dtype=np.int64)[None, :]) - - if all_faces: - faces_out = np.concatenate(all_faces, axis=0) - else: - faces_out = np.zeros((0, link_width), dtype=np.int64) - - # Apply bbox filter on vertices - if bbox is not None: - bbox_min, bbox_max = np.asarray(bbox[0]), np.asarray(bbox[1]) - node_mask = np.all( - (positions_out >= bbox_min) & (positions_out <= bbox_max), - axis=1, - ) - if not np.all(node_mask): - keep = np.flatnonzero(node_mask) - keep_set = set(keep.tolist()) - positions_out = positions_out[keep] - - old_to_new = {int(old): new for new, old in enumerate(keep)} - filtered: list[npt.NDArray] = [] - for f in faces_out: - if all(int(v) in keep_set for v in f): - filtered.append(np.array([old_to_new[int(v)] for v in f])) - faces_out = ( - np.stack(filtered).astype(np.int64) - if filtered - else np.zeros((0, link_width), dtype=np.int64) + # Read intra-chunk faces + all_faces: list[npt.NDArray] = [] + for chunk_coords in chunk_keys: + try: + link_groups = read_chunk_links( + level_group, chunk_coords, link_width=link_width, delta=0, + ) + offset = chunk_offsets.get(chunk_coords, 0) + for lg in link_groups: + if len(lg) > 0: + remapped = lg.copy() + offset + all_faces.append(remapped) + except ArrayError: + pass + + # Cross-chunk faces are stored as variable-width records under + # ``cross_chunk_links//`` (link_width = face arity). Map + # each (chunk, local_idx) endpoint into the global vertex index + # via ``chunk_offsets`` built above. + cross_face_records = read_cross_chunk_links(level_group, delta=0) + for face in cross_face_records: + if len(face) != link_width: + continue # not a face record (e.g. edge-arity, ignore) + vertex_ids: list[int] = [] + for cc, local_idx in face: + if cc not in chunk_offsets: + vertex_ids = [] + break + vertex_ids.append(int(chunk_offsets[cc]) + int(local_idx)) + if len(vertex_ids) == link_width: + all_faces.append(np.asarray(vertex_ids, dtype=np.int64)[None, :]) + + if all_faces: + faces_out = np.concatenate(all_faces, axis=0) + else: + faces_out = np.zeros((0, link_width), dtype=np.int64) + + # Apply bbox filter on vertices + if bbox is not None: + bbox_min, bbox_max = np.asarray(bbox[0]), np.asarray(bbox[1]) + node_mask = np.all( + (positions_out >= bbox_min) & (positions_out <= bbox_max), + axis=1, ) + if not np.all(node_mask): + keep = np.flatnonzero(node_mask) + keep_set = set(keep.tolist()) + positions_out = positions_out[keep] + + old_to_new = {int(old): new for new, old in enumerate(keep)} + filtered: list[npt.NDArray] = [] + for f in faces_out: + if all(int(v) in keep_set for v in f): + filtered.append(np.array([old_to_new[int(v)] for v in f])) + faces_out = ( + np.stack(filtered).astype(np.int64) + if filtered + else np.zeros((0, link_width), dtype=np.int64) + ) - return { - "vertices": positions_out, - "faces": faces_out, - "vertex_count": len(positions_out), - "face_count": len(faces_out), - } + return { + "vertices": positions_out, + "faces": faces_out, + "vertex_count": len(positions_out), + "face_count": len(faces_out), + } + finally: + _batched_reads_cm.__exit__(None, None, None) def _empty_mesh_result(ndim: int, link_width: int) -> dict[str, Any]: diff --git a/zarr_vectors/types/parametric.py b/zarr_vectors/types/parametric.py index ad9b0c2..07b5492 100644 --- a/zarr_vectors/types/parametric.py +++ b/zarr_vectors/types/parametric.py @@ -205,7 +205,7 @@ def write_parametric_objects( "shape": list(arr.shape), }) - # Write groupings + # Write groups if groups: from zarr_vectors.encoding.ragged import encode_ragged_ints max_gid = max(groups.keys()) @@ -214,17 +214,17 @@ def write_parametric_objects( for gid in range(max_gid + 1) ] raw, offsets = encode_ragged_ints(group_list) - para.write_bytes("groupings", "data", raw) - para.write_bytes("groupings", "offsets", offsets.tobytes()) - para.write_array_meta("groupings", { + para.write_bytes("groups", "data", raw) + para.write_bytes("groups", "offsets", offsets.tobytes()) + para.write_array_meta("groups", { "num_groups": max_gid + 1, }) if group_attributes: - para.require_group("groupings_attributes") + para.require_group("group_attributes") for attr_name, attr_data in group_attributes.items(): arr = np.asarray(attr_data) - full_name = f"groupings_attributes/{attr_name}" + full_name = f"group_attributes/{attr_name}" para.write_bytes(full_name, "data", arr.tobytes()) para.write_array_meta(full_name, { "dtype": str(arr.dtype), diff --git a/zarr_vectors/types/points.py b/zarr_vectors/types/points.py index d7d508a..b898028 100644 --- a/zarr_vectors/types/points.py +++ b/zarr_vectors/types/points.py @@ -25,6 +25,8 @@ LINKS_IMPLICIT_SEQUENTIAL, OBJIDX_IDENTITY, OBJIDX_STANDARD, + VERTEX_ATTRIBUTES, + VERTEX_GROUP_OFFSETS, VERTICES, ) from zarr_vectors.core.arrays import ( @@ -97,7 +99,7 @@ def write_points( chunk_shape: ChunkShape | None = None, bin_shape: BinShape | None = None, bounds: tuple[list[float], list[float]] | None = None, - attributes: dict[str, npt.NDArray] | None = None, + vertex_attributes: dict[str, npt.NDArray] | None = None, object_ids: npt.NDArray[np.integer] | None = None, object_attributes: dict[str, npt.NDArray] | None = None, groups: dict[int, list[int]] | None = None, @@ -106,6 +108,8 @@ def write_points( backend: str | None = None, chunk_by_attribute: str | None = None, out_of_bounds: str = DEFAULT_OOB_POLICY, + # Deprecated alias for ``vertex_attributes``; will be removed. + attributes: dict[str, npt.NDArray] | None = None, ) -> dict[str, Any]: """Write a point cloud to a new ZV store. @@ -116,14 +120,15 @@ def write_points( single chunk containing all points is used. bin_shape: Supervoxel bin edge lengths. If None, defaults to ``chunk_shape`` (one bin per chunk — backward compatible). - attributes: Per-vertex attributes ``{name: array}``. + vertex_attributes: Per-vertex attributes ``{name: array}``. + (Spec name; replaces the deprecated ``attributes`` kwarg.) object_ids: ``(N,)`` integer per-point object assignment. object_attributes: Per-object attributes ``{name: array}``. groups: Group memberships ``{group_id: [object_id, ...]}``. group_attributes: Per-group attributes ``{name: array}``. dtype: Numpy dtype string for vertex positions. chunk_by_attribute: If set, the named per-vertex categorical - attribute (which must appear in ``attributes``) becomes the + attribute (which must appear in ``vertex_attributes``) becomes the **leading chunk axis**. Chunk keys gain a prefix, e.g. ``gene_bin.z.y.x``. Vertices with mixed values within the same object are split across multiple attribute chunks. @@ -137,6 +142,22 @@ def write_points( Returns: Summary dict. """ + # Back-compat: accept the legacy ``attributes`` kwarg. + if attributes is not None: + if vertex_attributes is not None: + raise TypeError( + "got both `attributes` and `vertex_attributes`; " + "pass only `vertex_attributes` (the spec name)." + ) + import warnings + warnings.warn( + "`attributes` is deprecated; use `vertex_attributes` " + "(matches the on-disk `vertex_attributes/` directory).", + DeprecationWarning, stacklevel=2, + ) + vertex_attributes = attributes + attributes = vertex_attributes # internal alias for the rest of the body + np_dtype = np.dtype(dtype) positions = np.asarray(positions, dtype=np_dtype) n_vertices, ndim = positions.shape @@ -241,7 +262,7 @@ def write_points( arrays_present = [VERTICES] if attributes: - arrays_present.append("attributes") + arrays_present.append(VERTEX_ATTRIBUTES) if needs_objects: arrays_present.append("object_index") @@ -519,7 +540,7 @@ def read_points( result: dict[str, Any] = { "positions": positions_out, "object_ids": np.concatenate(all_obj_labels) if all_obj_labels else np.array([], dtype=np.int64), - "attributes": {}, + "vertex_attributes": {}, "vertex_count": len(positions_out), } return result @@ -571,129 +592,143 @@ def read_points( chunk_vg_targets: dict[ChunkCoords, list[int]] | None = None chunk_keys_set: set[ChunkCoords] = set() - if bbox is not None and has_bins: - # Bin-level targeting: only decode matching vertex groups - from zarr_vectors.spatial.chunking import ( - bins_intersecting_bbox, - bin_to_chunk, - bin_to_vg_index, - ) - target_bins = bins_intersecting_bbox( - np.asarray(bbox[0]), np.asarray(bbox[1]), - effective_bin, - ) - # Group target bins by chunk - chunk_vg_targets = {} - for bc in target_bins: - cc = bin_to_chunk(bc, bins_per_chunk) - vgi = bin_to_vg_index(bc, cc, bins_per_chunk) - if cc not in chunk_vg_targets: - chunk_vg_targets[cc] = [] - chunk_vg_targets[cc].append(vgi) - - # Only read from chunks that have data - chunk_keys_set = set(chunk_keys) - all_positions = [] - for cc, vg_indices in chunk_vg_targets.items(): - if cc not in chunk_keys_set: - continue - for vgi in vg_indices: + # Build the prefetch plan: VERTICES + VERTEX_GROUP_OFFSETS for every + # chunk we may touch, plus each requested attribute array. Cache + # misses fall through to the sync ``read_bytes`` path so this is + # purely a perf optimisation — correctness is unaffected. + chunk_key_strs = [".".join(str(c) for c in cc) for cc in chunk_keys] + prefetch_plan: list[tuple[str, list[str]]] = [ + (VERTICES, chunk_key_strs), + (VERTEX_GROUP_OFFSETS, chunk_key_strs), + ] + if attribute_names: + for attr_name in attribute_names: + prefetch_plan.append((f"{VERTEX_ATTRIBUTES}/{attr_name}", chunk_key_strs)) + + with level_group.batched_reads(prefetch_plan): + if bbox is not None and has_bins: + # Bin-level targeting: only decode matching vertex groups + from zarr_vectors.spatial.chunking import ( + bins_intersecting_bbox, + bin_to_chunk, + bin_to_vg_index, + ) + target_bins = bins_intersecting_bbox( + np.asarray(bbox[0]), np.asarray(bbox[1]), + effective_bin, + ) + # Group target bins by chunk + chunk_vg_targets = {} + for bc in target_bins: + cc = bin_to_chunk(bc, bins_per_chunk) + vgi = bin_to_vg_index(bc, cc, bins_per_chunk) + if cc not in chunk_vg_targets: + chunk_vg_targets[cc] = [] + chunk_vg_targets[cc].append(vgi) + + # Only read from chunks that have data + chunk_keys_set = set(chunk_keys) + all_positions = [] + for cc, vg_indices in chunk_vg_targets.items(): + if cc not in chunk_keys_set: + continue + for vgi in vg_indices: + try: + vg = read_vertex_group( + level_group, cc, vgi, dtype=dtype, ndim=ndim, + ) + if len(vg) > 0: + all_positions.append(vg) + except ArrayError: + continue + + elif bbox is not None: + # Chunk-level targeting (old stores without bins) + target_chunks = set(chunks_intersecting_bbox( + np.asarray(bbox[0]), np.asarray(bbox[1]), + root_meta.chunk_shape, + )) + chunk_keys = [k for k in chunk_keys if k in target_chunks] + + all_positions = [] + for chunk_coords in chunk_keys: try: - vg = read_vertex_group( - level_group, cc, vgi, dtype=dtype, ndim=ndim, + groups = read_chunk_vertices( + level_group, chunk_coords, dtype=dtype, ndim=ndim ) + except ArrayError: + continue + for vg in groups: if len(vg) > 0: all_positions.append(vg) + + else: + # Read all + all_positions = [] + for chunk_coords in chunk_keys: + try: + groups = read_chunk_vertices( + level_group, chunk_coords, dtype=dtype, ndim=ndim + ) except ArrayError: continue + for vg in groups: + if len(vg) > 0: + all_positions.append(vg) - elif bbox is not None: - # Chunk-level targeting (old stores without bins) - target_chunks = set(chunks_intersecting_bbox( - np.asarray(bbox[0]), np.asarray(bbox[1]), - root_meta.chunk_shape, - )) - chunk_keys = [k for k in chunk_keys if k in target_chunks] - - all_positions = [] - for chunk_coords in chunk_keys: - try: - groups = read_chunk_vertices( - level_group, chunk_coords, dtype=dtype, ndim=ndim - ) - except ArrayError: - continue - for vg in groups: - if len(vg) > 0: - all_positions.append(vg) - - else: - # Read all - all_positions = [] - for chunk_coords in chunk_keys: - try: - groups = read_chunk_vertices( - level_group, chunk_coords, dtype=dtype, ndim=ndim - ) - except ArrayError: - continue - for vg in groups: - if len(vg) > 0: - all_positions.append(vg) - - if not all_positions: - return _empty_result(ndim) + if not all_positions: + return _empty_result(ndim) - positions_out = np.concatenate(all_positions, axis=0) + positions_out = np.concatenate(all_positions, axis=0) - # Apply precise bbox filter (chunk-level is coarse) - if bbox is not None: - mask = np.all( - (positions_out >= bbox[0]) & (positions_out <= bbox[1]), - axis=1, - ) - positions_out = positions_out[mask] + # Apply precise bbox filter (chunk-level is coarse) + if bbox is not None: + mask = np.all( + (positions_out >= bbox[0]) & (positions_out <= bbox[1]), + axis=1, + ) + positions_out = positions_out[mask] - # Read attributes - attrs_out: dict[str, npt.NDArray] = {} - if attribute_names: - for attr_name in attribute_names: - attr_parts: list[npt.NDArray] = [] - if chunk_vg_targets is not None: - # Bin-level bbox: read the same vertex groups as positions - for cc, vg_indices in chunk_vg_targets.items(): - if cc not in chunk_keys_set: - continue - try: - attr_groups = read_chunk_attributes( - level_group, attr_name, cc, - dtype=np.float32, ncols=1, - ) - for vgi in vg_indices: - if vgi < len(attr_groups) and len(attr_groups[vgi]) > 0: - attr_parts.append(attr_groups[vgi]) - except ArrayError: - continue - else: - for chunk_coords in chunk_keys: - try: - attr_groups = read_chunk_attributes( - level_group, attr_name, chunk_coords, - dtype=np.float32, ncols=1, - ) - for ag in attr_groups: - attr_parts.append(ag) - except ArrayError: - continue - if attr_parts: - attr_all = np.concatenate(attr_parts, axis=0) - if bbox is not None: - attr_all = attr_all[mask] - attrs_out[attr_name] = attr_all + # Read attributes + attrs_out: dict[str, npt.NDArray] = {} + if attribute_names: + for attr_name in attribute_names: + attr_parts: list[npt.NDArray] = [] + if chunk_vg_targets is not None: + # Bin-level bbox: read the same vertex groups as positions + for cc, vg_indices in chunk_vg_targets.items(): + if cc not in chunk_keys_set: + continue + try: + attr_groups = read_chunk_attributes( + level_group, attr_name, cc, + dtype=np.float32, ncols=1, + ) + for vgi in vg_indices: + if vgi < len(attr_groups) and len(attr_groups[vgi]) > 0: + attr_parts.append(attr_groups[vgi]) + except ArrayError: + continue + else: + for chunk_coords in chunk_keys: + try: + attr_groups = read_chunk_attributes( + level_group, attr_name, chunk_coords, + dtype=np.float32, ncols=1, + ) + for ag in attr_groups: + attr_parts.append(ag) + except ArrayError: + continue + if attr_parts: + attr_all = np.concatenate(attr_parts, axis=0) + if bbox is not None: + attr_all = attr_all[mask] + attrs_out[attr_name] = attr_all return { "positions": positions_out, - "attributes": attrs_out, + "vertex_attributes": attrs_out, "vertex_count": len(positions_out), } @@ -702,6 +737,6 @@ def _empty_result(ndim: int) -> dict[str, Any]: """Return an empty result dict.""" return { "positions": np.zeros((0, ndim), dtype=np.float32), - "attributes": {}, + "vertex_attributes": {}, "vertex_count": 0, } diff --git a/zarr_vectors/types/polylines.py b/zarr_vectors/types/polylines.py index 31882cf..f85ffbf 100644 --- a/zarr_vectors/types/polylines.py +++ b/zarr_vectors/types/polylines.py @@ -24,6 +24,7 @@ GEOM_STREAMLINE, LINKS_IMPLICIT_SEQUENTIAL, OBJIDX_STANDARD, + VERTEX_GROUP_OFFSETS, VERTICES, ) from zarr_vectors.core.arrays import ( @@ -511,93 +512,111 @@ def read_polylines( except Exception: manifests = None - for oid in object_ids: - # ------------------------------------------------------------------ - # Segment-level crop mode (chunks=). - # ------------------------------------------------------------------ - if chunk_whitelist is not None: - if manifests is None or oid >= len(manifests): + # Prefetch every vertex chunk (and its offsets sidecar) in one async + # gather so the per-object read loop hits the cache instead of + # paying one round-trip per chunk. Cache misses fall through to + # the sync path, so this is a perf-only optimisation. + chunk_key_strs = [ + ".".join(str(c) for c in cc) + for cc in list_chunk_keys(level_group, VERTICES) + ] + prefetch_plan: list[tuple[str, list[str]]] = [ + (VERTICES, chunk_key_strs), + (VERTEX_GROUP_OFFSETS, chunk_key_strs), + ] + + _batched_reads_cm = level_group.batched_reads(prefetch_plan) + _batched_reads_cm.__enter__() + try: + for oid in object_ids: + # ------------------------------------------------------------------ + # Segment-level crop mode (chunks=). + # ------------------------------------------------------------------ + if chunk_whitelist is not None: + if manifests is None or oid >= len(manifests): + continue + obj_manifest = manifests[oid] + if filter_bin is not None: + obj_manifest = [ + (cc, vg) for (cc, vg) in obj_manifest + if cc and cc[0] == filter_bin + ] + + # Split the manifest into runs of consecutive entries whose + # chunk lies in the whitelist. Each run becomes its own + # output polyline. + run: list[VertexGroupRef] = [] + for cc, vg_idx in obj_manifest: + if cc in chunk_whitelist: + run.append((cc, vg_idx)) + else: + if run: + vg_list = _read_manifest_run( + level_group, run, dtype, ndim, + ) + if vg_list: + result_polylines.append(vg_list) + total_verts += sum(len(vg) for vg in vg_list) + run = [] + if run: + vg_list = _read_manifest_run( + level_group, run, dtype, ndim, + ) + if vg_list: + result_polylines.append(vg_list) + total_verts += sum(len(vg) for vg in vg_list) continue - obj_manifest = manifests[oid] + + # ------------------------------------------------------------------ + # Existing whole-object paths (attribute_filter, bbox, no filter). + # ------------------------------------------------------------------ if filter_bin is not None: - obj_manifest = [ + if manifests is None or oid >= len(manifests): + continue + obj_manifest = manifests[oid] + matching = [ (cc, vg) for (cc, vg) in obj_manifest if cc and cc[0] == filter_bin ] - - # Split the manifest into runs of consecutive entries whose - # chunk lies in the whitelist. Each run becomes its own - # output polyline. - run: list[VertexGroupRef] = [] - for cc, vg_idx in obj_manifest: - if cc in chunk_whitelist: - run.append((cc, vg_idx)) - else: - if run: - vg_list = _read_manifest_run( - level_group, run, dtype, ndim, - ) - if vg_list: - result_polylines.append(vg_list) - total_verts += sum(len(vg) for vg in vg_list) - run = [] - if run: - vg_list = _read_manifest_run( - level_group, run, dtype, ndim, - ) - if vg_list: - result_polylines.append(vg_list) - total_verts += sum(len(vg) for vg in vg_list) - continue - - # ------------------------------------------------------------------ - # Existing whole-object paths (attribute_filter, bbox, no filter). - # ------------------------------------------------------------------ - if filter_bin is not None: - if manifests is None or oid >= len(manifests): - continue - obj_manifest = manifests[oid] - matching = [ - (cc, vg) for (cc, vg) in obj_manifest - if cc and cc[0] == filter_bin - ] - if not matching: - continue - vg_list = [] - for cc, vg_idx in matching: + if not matching: + continue + vg_list = [] + for cc, vg_idx in matching: + try: + vg_list.append(read_vertex_group( + level_group, cc, vg_idx, dtype=dtype, ndim=ndim, + )) + except ArrayError: + continue + else: try: - vg_list.append(read_vertex_group( - level_group, cc, vg_idx, dtype=dtype, ndim=ndim, - )) + vg_list = read_object_vertices( + level_group, oid, dtype=dtype, ndim=ndim + ) except ArrayError: continue - else: - try: - vg_list = read_object_vertices( - level_group, oid, dtype=dtype, ndim=ndim - ) - except ArrayError: - continue - if not vg_list: - continue + if not vg_list: + continue - # If bbox filter, check if any segment is in a target chunk - if target_chunks is not None: - try: - manifest = read_all_object_manifests(level_group) - obj_manifest = manifest[oid] if oid < len(manifest) else [] - has_match = any( - chunk_coords in target_chunks - for chunk_coords, _ in obj_manifest - ) - if not has_match: + # If bbox filter, check if any segment is in a target chunk + if target_chunks is not None: + try: + manifest = read_all_object_manifests(level_group) + obj_manifest = manifest[oid] if oid < len(manifest) else [] + has_match = any( + chunk_coords in target_chunks + for chunk_coords, _ in obj_manifest + ) + if not has_match: + continue + except Exception: continue - except Exception: - continue - result_polylines.append(vg_list) - total_verts += sum(len(vg) for vg in vg_list) + result_polylines.append(vg_list) + total_verts += sum(len(vg) for vg in vg_list) + finally: + _batched_reads_cm.__exit__(None, None, None) return { "polylines": result_polylines, diff --git a/zarr_vectors/validate/structure.py b/zarr_vectors/validate/structure.py index 4fb15f9..2d3d780 100644 --- a/zarr_vectors/validate/structure.py +++ b/zarr_vectors/validate/structure.py @@ -104,8 +104,8 @@ def _is_level_dir(d): else: result.add_warning(f"{ln}/ has no metadata file") - for opt in ["attributes", "object_index", "object_attributes", - "groupings"]: + for opt in ["vertex_attributes", "object_index", "object_attributes", + "groups"]: if (level_dir / opt).exists(): result.add_pass(f"{ln}/{opt}/ exists") # Multiscale link layout (0.4+): list every segment. From 0d089eb03da9a4019a5fed386604479b7e1e23f0 Mon Sep 17 00:00:00 2001 From: Andrew-Keenlyside Date: Thu, 14 May 2026 15:50:05 -0700 Subject: [PATCH 4/4] refactor ZVF and ZVR names to ZV only --- benchmarks/01_size_scaling.ipynb | 2 +- docs/spec/chunking/attribute_chunking.md | 2 +- docs/spec/foundations/store_types.md | 6 +- docs/tutorials/io/cloud_stores.md | 8 +-- docs/tutorials/multiscale/lazy_loading.md | 30 ++++----- examples/01_point_clouds.ipynb | 10 +-- examples/02_lines.ipynb | 4 +- examples/03_polylines_streamlines.ipynb | 4 +- examples/04_graphs.ipynb | 4 +- .../integration/test_lazy_sharding_rechunk.py | 40 ++++++------ tests/test_attr_chunking.py | 62 +++++++++---------- tests/test_backends.py | 18 +++--- tests/test_chunk_helpers.py | 4 +- tests/test_cross_chunk_faces.py | 4 +- tests/test_headers.py | 6 +- tests/test_lazy_writer.py | 46 +++++++------- tests/test_per_object_pyramid.py | 10 +-- tests/test_rechunk_by_attribute.py | 12 ++-- tests/test_zv_rename.py | 2 +- zarr_vectors/__init__.py | 4 +- zarr_vectors/lazy/__init__.py | 16 ++--- zarr_vectors/lazy/arrays.py | 14 ++--- zarr_vectors/lazy/level.py | 54 ++++++++-------- zarr_vectors/lazy/store.py | 24 +++---- zarr_vectors/lazy/views.py | 30 ++++----- zarr_vectors/lazy/writer.py | 20 +++--- 26 files changed, 218 insertions(+), 218 deletions(-) diff --git a/benchmarks/01_size_scaling.ipynb b/benchmarks/01_size_scaling.ipynb index 6018cf8..e5110a9 100644 --- a/benchmarks/01_size_scaling.ipynb +++ b/benchmarks/01_size_scaling.ipynb @@ -65,7 +65,7 @@ "id": "5bcdba96", "metadata": {}, "outputs": [], - "source": "from zarr_vectors.types.points import write_points, read_points\nfrom zarr_vectors.lazy import open_zvr\n\nSIZES = [1_000, 10_000, 100_000, 1_000_000]\nCHUNK = (200.0, 200.0, 200.0)\nBIN = (50.0, 50.0, 50.0)\nSEED = 0\n\n\ndef _csv_path(prefix):\n \"\"\"Fresh tempdir + CSV path.\"\"\"\n return Path(tempfile.mkdtemp(prefix=f'csvbench_{prefix}_')) / 'points.csv'\n\n\ndef _csv_write(path, positions, intensity):\n \"\"\"Baseline: write x,y,z,intensity columns to a CSV.\"\"\"\n pd.DataFrame({\n 'x': positions[:, 0],\n 'y': positions[:, 1],\n 'z': positions[:, 2],\n 'intensity': intensity,\n }).to_csv(path, index=False)\n\n\ndef _csv_read_all(path):\n \"\"\"Read every row back into memory.\"\"\"\n return pd.read_csv(path)\n\n\ndef _csv_read_one(path):\n \"\"\"Best-case single-row read: only parse the first data row.\n\n CSV has no random access, so this is the cheapest single-record\n read the format admits.\"\"\"\n return pd.read_csv(path, nrows=1)\n\n\ndef _zv_read_one(store_path):\n \"\"\"Read just one chunk's worth of vertices via the lazy API.\n\n Touches a single chunk on disk (vs. the full materialisation in\n ``read_points``).\"\"\"\n zvr = open_zvr(store_path)\n chunk_keys = zvr[0].vertices._chunk_keys # noqa: SLF001 — minimal demo\n if not chunk_keys:\n return None\n return zvr[0].vertices[chunk_keys[0]].compute()" + "source": "from zarr_vectors.types.points import write_points, read_points\nfrom zarr_vectors.lazy import open_zv\n\nSIZES = [1_000, 10_000, 100_000, 1_000_000]\nCHUNK = (200.0, 200.0, 200.0)\nBIN = (50.0, 50.0, 50.0)\nSEED = 0\n\n\ndef _csv_path(prefix):\n \"\"\"Fresh tempdir + CSV path.\"\"\"\n return Path(tempfile.mkdtemp(prefix=f'csvbench_{prefix}_')) / 'points.csv'\n\n\ndef _csv_write(path, positions, intensity):\n \"\"\"Baseline: write x,y,z,intensity columns to a CSV.\"\"\"\n pd.DataFrame({\n 'x': positions[:, 0],\n 'y': positions[:, 1],\n 'z': positions[:, 2],\n 'intensity': intensity,\n }).to_csv(path, index=False)\n\n\ndef _csv_read_all(path):\n \"\"\"Read every row back into memory.\"\"\"\n return pd.read_csv(path)\n\n\ndef _csv_read_one(path):\n \"\"\"Best-case single-row read: only parse the first data row.\n\n CSV has no random access, so this is the cheapest single-record\n read the format admits.\"\"\"\n return pd.read_csv(path, nrows=1)\n\n\ndef _zv_read_one(store_path):\n \"\"\"Read just one chunk's worth of vertices via the lazy API.\n\n Touches a single chunk on disk (vs. the full materialisation in\n ``read_points``).\"\"\"\n zv = open_zv(store_path)\n chunk_keys = zv[0].vertices._chunk_keys # noqa: SLF001 — minimal demo\n if not chunk_keys:\n return None\n return zv[0].vertices[chunk_keys[0]].compute()" }, { "cell_type": "markdown", diff --git a/docs/spec/chunking/attribute_chunking.md b/docs/spec/chunking/attribute_chunking.md index 28c118a..1558f21 100644 --- a/docs/spec/chunking/attribute_chunking.md +++ b/docs/spec/chunking/attribute_chunking.md @@ -154,7 +154,7 @@ a bin index via the stored `chunk_attribute_values` list, and the chunk scan is restricted to keys with that leading coord. Unknown values yield an empty result rather than an error. -Lazy readers (`ZVRLevel`) expose: +Lazy readers (`ZVLevel`) expose: - `chunk_dims` — the level's chunk-axis names, or `None` for legacy. - `chunk_attribute_name` — the leading-axis attribute name. diff --git a/docs/spec/foundations/store_types.md b/docs/spec/foundations/store_types.md index b8eae06..6ffdec0 100644 --- a/docs/spec/foundations/store_types.md +++ b/docs/spec/foundations/store_types.md @@ -40,7 +40,7 @@ ## Introduction ZV stores are backend-agnostic: the same `create_store` / `open_store` / -`open_zvr` calls work whether the data lives on a local SSD, a ZIP +`open_zv` calls work whether the data lives on a local SSD, a ZIP archive, an in-memory dict, or a cloud object store. The backing store type affects performance characteristics (latency, throughput, cost per request) but not the data model or the semantics of any operation. @@ -69,12 +69,12 @@ All three entry points accept `backend=` and `**backend_kwargs`: ```python from zarr_vectors.core.store import create_store, open_store -from zarr_vectors.lazy import open_zvr +from zarr_vectors.lazy import open_zv create_store(path, *, bounds=None, chunk_shape=None, axes=None, geometry_types=None, ..., backend=None, **backend_kwargs) -> Group open_store(path, mode="r", *, backend=None, **backend_kwargs) -> Group -open_zvr(path, *, backend=None, **backend_kwargs) -> ZVRStore +open_zv(path, *, backend=None, **backend_kwargs) -> ZVStore ``` `backend` is one of `"local"` / `"obstore"` / `"fsspec"` or `None` for diff --git a/docs/tutorials/io/cloud_stores.md b/docs/tutorials/io/cloud_stores.md index 726b0d2..e920375 100644 --- a/docs/tutorials/io/cloud_stores.md +++ b/docs/tutorials/io/cloud_stores.md @@ -27,7 +27,7 @@ to `fsspec` for any URL scheme it can't handle. ## Backend resolution at a glance When you pass a cloud URL to any `read_*` / `write_*` / `open_store` / -`open_zvr` call, the backend is chosen in this order: +`open_zv` call, the backend is chosen in this order: 1. **Explicit `backend=` kwarg** — e.g. `backend="fsspec"` forces fsspec even if obstore is installed. @@ -262,9 +262,9 @@ a resolution level, writing new attributes). ```python import numpy as np -from zarr_vectors.lazy import open_zvr +from zarr_vectors.lazy import open_zv -store = open_zvr("s3://open-neuro/scan.zarrvectors") +store = open_zv("s3://open-neuro/scan.zarrvectors") print(store.levels) # metadata only — no chunk I/O print(store[2].vertex_count) # one metadata request @@ -281,7 +281,7 @@ detail = read_points( ) ``` -`open_zvr` accepts the same `backend=` / `**backend_kwargs` as +`open_zv` accepts the same `backend=` / `**backend_kwargs` as `open_store`. --- diff --git a/docs/tutorials/multiscale/lazy_loading.md b/docs/tutorials/multiscale/lazy_loading.md index 5882507..817412f 100644 --- a/docs/tutorials/multiscale/lazy_loading.md +++ b/docs/tutorials/multiscale/lazy_loading.md @@ -4,7 +4,7 @@ The ZVF read functions (`read_points`, `read_polylines`, etc.) are eager: they fetch and return all requested data immediately. For large stores or remote datasets, an eager read of the full store is impractical. -The **lazy API** provides a `open_zvr` object that opens the store +The **lazy API** provides a `open_zv` object that opens the store metadata without reading any array data. Array slices are fetched on demand — only when accessed. This is the recommended access pattern for: @@ -20,10 +20,10 @@ metadata without reading any array data. Array slices are fetched on demand ## Opening a store lazily ```python -from zarr_vectors.lazy import open_zvr +from zarr_vectors.lazy import open_zv # Opens metadata only — no vertex data fetched -store = open_zvr("synchrotron.zarrvectors") +store = open_zv("synchrotron.zarrvectors") print(store.geometry_type) # "point_cloud" print(store.spatial_dims) # 3 @@ -38,7 +38,7 @@ Opening a remote store is identical — pass an fsspec URL: ```python import s3fs -from zarr_vectors.lazy import open_zvr +from zarr_vectors.lazy import open_zv # 0.4+: backend layer auto-routes cloud URLs via obstore (or fsspec). # Public access works without explicit anon=True. @@ -140,7 +140,7 @@ print(f"Mean intensity over {total_count} points: {mean_intensity:.4f}") ## Lazy array access -The `open_zvr` exposes each array as a lazy `zarr.Array` that can +The `open_zv` exposes each array as a lazy `zarr.Array` that can be sliced directly: ```python @@ -178,18 +178,18 @@ print(f"{len(high_fa_ids)} high-FA streamlines") ```python import s3fs -from zarr_vectors.lazy import open_zvr +from zarr_vectors.lazy import open_zv -store = open_zvr("s3://my-bucket/dataset/tracts.zarrvectors") +store = open_zv("s3://my-bucket/dataset/tracts.zarrvectors") ``` ### GCS ```python import gcsfs -from zarr_vectors.lazy import open_zvr +from zarr_vectors.lazy import open_zv -store = open_zvr("gs://my-bucket/tracts.zarrvectors") +store = open_zv("gs://my-bucket/tracts.zarrvectors") ``` ### Performance on object stores @@ -203,7 +203,7 @@ minimises requests by: 3. Caching decompressed chunks in an LRU cache (configurable size). ```python -store = open_zvr( +store = open_zv( "s3://my-bucket/tracts.zarrvectors", cache_size=256, # cache up to 256 decompressed chunks in memory n_workers=8, # fetch up to 8 chunks in parallel @@ -223,12 +223,12 @@ for chunk_coord, chunk_data in store.iter_chunks(level=1, prefetch=4): --- -## open_zvr API summary +## open_zv API summary ```python -from zarr_vectors.lazy import open_zvr +from zarr_vectors.lazy import open_zv -store = open_zvr(path_or_store) +store = open_zv(path_or_store) # Metadata (no data I/O) store.geometry_type # str @@ -258,7 +258,7 @@ store.__enter__() / store.__exit__() # context manager ### Using as a context manager ```python -with open_zvr("scan.zarrvectors") as store: +with open_zv("scan.zarrvectors") as store: result = store.read(level=2) # Store is closed and cache is freed on exit ``` @@ -272,7 +272,7 @@ with open_zvr("scan.zarrvectors") as store: Load the coarsest level for a quick full-volume thumbnail: ```python -store = open_zvr("scan.zarrvectors") +store = open_zv("scan.zarrvectors") coarsest = store.levels[-1] result = store.read(level=coarsest) # Use result["positions"] to render a low-density overview diff --git a/examples/01_point_clouds.ipynb b/examples/01_point_clouds.ipynb index 1ae8834..83c0102 100644 --- a/examples/01_point_clouds.ipynb +++ b/examples/01_point_clouds.ipynb @@ -136,14 +136,14 @@ "output_type": "stream", "text": [ "Opened lazily — no vertex data loaded yet.\n", - "ZVRStore('/tmp/zvf_examples_hb6sxv9m/scan.zarrvectors', levels=[0], geometry=[point_cloud], chunk=(200.0, 200.0, 200.0), bin=(50.0, 50.0, 50.0))\n" + "ZVStore('/tmp/zvf_examples_hb6sxv9m/scan.zarrvectors', levels=[0], geometry=[point_cloud], chunk=(200.0, 200.0, 200.0), bin=(50.0, 50.0, 50.0))\n" ] } ], "source": [ - "from zarr_vectors.lazy import open_zvr\n", + "from zarr_vectors.lazy import open_zv\n", "\n", - "store = open_zvr(STORE)\n", + "store = open_zv(STORE)\n", "print(\"Opened lazily — no vertex data loaded yet.\")\n", "print(store)" ] @@ -374,7 +374,7 @@ "id": "9b51718e", "metadata": {}, "source": [ - "## 9 · Lazy materialisation — accessing vertices via `ZVRStore`" + "## 9 · Lazy materialisation — accessing vertices via `ZVStore`" ] }, { @@ -435,7 +435,7 @@ "cell_type": "markdown", "id": "f32b2fa6", "metadata": {}, - "source": "## Summary\n\n| Step | API used |\n|------|----------|\n| Write | `write_points(path, positions, chunk_shape, bin_shape, attributes)` |\n| Lazy open | `open_zvr(path)` → `ZVRStore` |\n| Inspect metadata | `store.geometry_types`, `store.ndim`, `store.bounds`, `store[level].vertex_count` |\n| Read all | `read_points(path)` |\n| Bbox query | `read_points(path, bbox=(lo, hi))` |\n| Pyramid | `build_pyramid(path, level_configs)` |\n| Materialise vertices | `store[level].vertices.compute()` |\n| Validate | `validate(path, level=3)` |\n\nCSV/PLY export and other format converters live in the companion package\n[`zarr-vectors-tools`](https://github.com/BRIDGE-Neuroscience/zarr-vectors-tools)." + "source": "## Summary\n\n| Step | API used |\n|------|----------|\n| Write | `write_points(path, positions, chunk_shape, bin_shape, attributes)` |\n| Lazy open | `open_zv(path)` → `ZVStore` |\n| Inspect metadata | `store.geometry_types`, `store.ndim`, `store.bounds`, `store[level].vertex_count` |\n| Read all | `read_points(path)` |\n| Bbox query | `read_points(path, bbox=(lo, hi))` |\n| Pyramid | `build_pyramid(path, level_configs)` |\n| Materialise vertices | `store[level].vertices.compute()` |\n| Validate | `validate(path, level=3)` |\n\nCSV/PLY export and other format converters live in the companion package\n[`zarr-vectors-tools`](https://github.com/BRIDGE-Neuroscience/zarr-vectors-tools)." } ], "metadata": { diff --git a/examples/02_lines.ipynb b/examples/02_lines.ipynb index ccb50a9..8c54cf0 100644 --- a/examples/02_lines.ipynb +++ b/examples/02_lines.ipynb @@ -26,7 +26,7 @@ "id": "cd0f85bc", "metadata": {}, "outputs": [], - "source": "import numpy as np, tempfile, os\nfrom zarr_vectors.lazy import open_zvr\nfrom zarr_vectors.types.lines import write_lines, read_lines\nfrom zarr_vectors.validate import validate\n\n_tmpdir = tempfile.mkdtemp(prefix=\"zvf_examples_\")\nSTORE = os.path.join(_tmpdir, \"synapses.zarrvectors\")\nprint(\"Store path:\", STORE)" + "source": "import numpy as np, tempfile, os\nfrom zarr_vectors.lazy import open_zv\nfrom zarr_vectors.types.lines import write_lines, read_lines\nfrom zarr_vectors.validate import validate\n\n_tmpdir = tempfile.mkdtemp(prefix=\"zvf_examples_\")\nSTORE = os.path.join(_tmpdir, \"synapses.zarrvectors\")\nprint(\"Store path:\", STORE)" }, { "cell_type": "markdown", @@ -74,7 +74,7 @@ "id": "ffc13a52", "metadata": {}, "outputs": [], - "source": "store = open_zvr(STORE)\n\nprint(f\"geometry_types: {store.geometry_types}\")\nprint(f\"ndim : {store.ndim}\")\nprint(f\"chunk_shape : {store.chunk_shape}\")\nprint(f\"vertex count : {store[0].vertex_count:,} (= {N} lines × 2 endpoints)\")" + "source": "store = open_zv(STORE)\n\nprint(f\"geometry_types: {store.geometry_types}\")\nprint(f\"ndim : {store.ndim}\")\nprint(f\"chunk_shape : {store.chunk_shape}\")\nprint(f\"vertex count : {store[0].vertex_count:,} (= {N} lines × 2 endpoints)\")" }, { "cell_type": "markdown", diff --git a/examples/03_polylines_streamlines.ipynb b/examples/03_polylines_streamlines.ipynb index a76908a..1354100 100644 --- a/examples/03_polylines_streamlines.ipynb +++ b/examples/03_polylines_streamlines.ipynb @@ -39,7 +39,7 @@ ], "source": [ "import numpy as np, tempfile, os\n", - "from zarr_vectors.lazy import open_zvr\n", + "from zarr_vectors.lazy import open_zv\n", "from zarr_vectors.types.polylines import write_polylines, read_polylines\n", "from zarr_vectors.validate import validate\n", "\n", @@ -176,7 +176,7 @@ } ], "source": [ - "store = open_zvr(STORE)\n", + "store = open_zv(STORE)\n", "print(f\"geometry_types : {store.geometry_types}\")\n", "print(f\"ndim : {store.ndim}\")\n", "print(f\"chunk_shape : {store.chunk_shape}\")\n", diff --git a/examples/04_graphs.ipynb b/examples/04_graphs.ipynb index cf3cdf5..1b03cf3 100644 --- a/examples/04_graphs.ipynb +++ b/examples/04_graphs.ipynb @@ -36,7 +36,7 @@ ], "source": [ "import numpy as np, tempfile, os\n", - "from zarr_vectors.lazy import open_zvr\n", + "from zarr_vectors.lazy import open_zv\n", "from zarr_vectors.types.graphs import write_graph, read_graph\n", "from zarr_vectors.validate import validate\n", "\n", @@ -170,7 +170,7 @@ } ], "source": [ - "store = open_zvr(STORE)\n", + "store = open_zv(STORE)\n", "print(f\"geometry_types: {store.geometry_types}\")\n", "print(f\"ndim : {store.ndim}\")\n", "print(f\"chunk_shape : {store.chunk_shape}\")\n", diff --git a/tests/integration/test_lazy_sharding_rechunk.py b/tests/integration/test_lazy_sharding_rechunk.py index c59ea35..faee66a 100644 --- a/tests/integration/test_lazy_sharding_rechunk.py +++ b/tests/integration/test_lazy_sharding_rechunk.py @@ -29,7 +29,7 @@ class TestLazyFilterChain: def test_lazy_filter_pipeline(self, tmp_path: Path) -> None: from zarr_vectors.types.polylines import write_polylines - from zarr_vectors.lazy import open_zvr + from zarr_vectors.lazy import open_zv rng = np.random.default_rng(42) polys = _make_streamlines(rng, 100) @@ -39,23 +39,23 @@ def test_lazy_filter_pipeline(self, tmp_path: Path) -> None: groups={0: list(range(50)), 1: list(range(50, 100))}, ) - zvr = open_zvr(store) - assert zvr[0].vertex_count > 0 + zv = open_zv(store) + assert zv[0].vertex_count > 0 # Chain: group → bbox - view = zvr[0].filter(group_ids=[0]).filter( + view = zv[0].filter(group_ids=[0]).filter( bbox=(np.array([0, 0, 0]), np.array([200, 200, 200])), ) result = view.compute() assert result["vertex_count"] > 0 - assert result["vertex_count"] < zvr[0].vertex_count + assert result["vertex_count"] < zv[0].vertex_count # Polyline access - poly_5 = zvr[0].polylines[5].compute() + poly_5 = zv[0].polylines[5].compute() assert poly_5.ndim == 2 and poly_5.shape[1] == 3 # Compute all polylines in parallel - all_polys = zvr[0].polylines.compute() + all_polys = zv[0].polylines.compute() assert len(all_polys) == 100 @@ -65,15 +65,15 @@ class TestLazyDaskParallel: def test_dask_compute_chunks(self, tmp_path: Path) -> None: import dask from zarr_vectors.types.points import write_points - from zarr_vectors.lazy import open_zvr + from zarr_vectors.lazy import open_zv rng = np.random.default_rng(42) store = str(tmp_path / "pts.zv") positions = rng.uniform(0, 400, size=(5000, 3)).astype(np.float32) write_points(store, positions, chunk_shape=(100., 100., 100.)) - zvr = open_zvr(store) - delayed_chunks = zvr[0].vertices.to_delayed() + zv = open_zv(store) + delayed_chunks = zv[0].vertices.to_delayed() assert len(delayed_chunks) > 1 # Custom per-chunk processing @@ -237,11 +237,11 @@ def test_length_rechunk(self, tmp_path: Path) -> None: class TestRechunkViaLazy: - """Rechunk via lazy API: zvr[0].rechunk().""" + """Rechunk via lazy API: zv[0].rechunk().""" def test_lazy_rechunk(self, tmp_path: Path) -> None: from zarr_vectors.types.polylines import write_polylines - from zarr_vectors.lazy import open_zvr + from zarr_vectors.lazy import open_zv rng = np.random.default_rng(42) polys = _make_streamlines(rng, 40) @@ -251,14 +251,14 @@ def test_lazy_rechunk(self, tmp_path: Path) -> None: groups={0: list(range(20)), 1: list(range(20, 40))}, ) - zvr = open_zvr(store) + zv = open_zv(store) out = str(tmp_path / "rechunked.zv") - result = zvr[0].rechunk(by="group", output=out) + result = zv[0].rechunk(by="group", output=out) assert result["bins_created"] == 2 # Read rechunked store via lazy API - zvr_rc = open_zvr(out) - assert zvr_rc[0].vertices.compute().shape[0] > 0 + zv_rc = open_zv(out) + assert zv_rc[0].vertices.compute().shape[0] > 0 class TestCompositeStore: @@ -268,7 +268,7 @@ def test_composite_pipeline(self, tmp_path: Path) -> None: from zarr_vectors.types.points import write_points, read_points from zarr_vectors.composite import add_geometry, read_composite from zarr_vectors.validate import validate - from zarr_vectors.lazy import open_zvr + from zarr_vectors.lazy import open_zv rng = np.random.default_rng(42) store = str(tmp_path / "brain.zv") @@ -300,9 +300,9 @@ def test_composite_pipeline(self, tmp_path: Path) -> None: assert validate(store, level=4).ok # Lazy API - zvr = open_zvr(store) - assert len(zvr.geometry_types) == 3 - assert zvr[0].vertices.compute().shape[0] == 1000 + zv = open_zv(store) + assert len(zv.geometry_types) == 3 + assert zv[0].vertices.compute().shape[0] == 1000 class TestBackwardCompat: diff --git a/tests/test_attr_chunking.py b/tests/test_attr_chunking.py index b07c0a1..27ded7a 100644 --- a/tests/test_attr_chunking.py +++ b/tests/test_attr_chunking.py @@ -16,7 +16,7 @@ from zarr_vectors.core.attr_chunking import assign_attribute_bins from zarr_vectors.core.store import open_store, read_level_metadata from zarr_vectors.exceptions import ArrayError -from zarr_vectors.lazy.store import open_zvr +from zarr_vectors.lazy.store import open_zv from zarr_vectors.types.graphs import read_graph, write_graph from zarr_vectors.types.lines import read_lines, write_lines from zarr_vectors.types.meshes import read_mesh, write_mesh @@ -69,7 +69,7 @@ def _make_two_gene_cloud(seed: int = 0, n: int = 300): def test_points_attr_chunking_round_trip(tmp_path): pos, gene = _make_two_gene_cloud(seed=0, n=300) - store = tmp_path / "g.zvr" + store = tmp_path / "g.zv" write_points( str(store), pos, @@ -96,7 +96,7 @@ def test_points_attr_chunking_round_trip(tmp_path): def test_points_attr_chunking_full_read_returns_all(tmp_path): pos, gene = _make_two_gene_cloud(seed=1, n=400) - store = tmp_path / "g.zvr" + store = tmp_path / "g.zv" write_points( str(store), pos, @@ -110,7 +110,7 @@ def test_points_attr_chunking_full_read_returns_all(tmp_path): def test_points_attr_filter_selectivity(tmp_path): pos, gene = _make_two_gene_cloud(seed=2, n=600) - store = tmp_path / "g.zvr" + store = tmp_path / "g.zv" write_points( str(store), pos, @@ -129,7 +129,7 @@ def test_points_attr_filter_selectivity(tmp_path): def test_points_attr_filter_unknown_value_returns_empty(tmp_path): pos, gene = _make_two_gene_cloud(seed=3, n=200) - store = tmp_path / "g.zvr" + store = tmp_path / "g.zv" write_points( str(store), pos, chunk_shape=(50.0, 50.0, 50.0), @@ -144,7 +144,7 @@ def test_points_chunk_by_attribute_missing_attribute_raises(tmp_path): pos = np.random.default_rng(0).uniform(0, 10, (50, 3)).astype("f4") with pytest.raises(ArrayError, match="must name a key"): write_points( - str(tmp_path / "x.zvr"), pos, + str(tmp_path / "x.zv"), pos, chunk_shape=(10.0, 10.0, 10.0), chunk_by_attribute="nonexistent", ) @@ -154,7 +154,7 @@ def test_points_chunk_by_attribute_rejects_float(tmp_path): pos = np.random.default_rng(0).uniform(0, 10, (50, 3)).astype("f4") with pytest.raises(ArrayError, match="categorical-only"): write_points( - str(tmp_path / "x.zvr"), pos, + str(tmp_path / "x.zv"), pos, chunk_shape=(10.0, 10.0, 10.0), vertex_attributes={"score": np.random.default_rng(0).uniform(0, 1, 50)}, chunk_by_attribute="score", @@ -164,7 +164,7 @@ def test_points_chunk_by_attribute_rejects_float(tmp_path): def test_points_attribute_filter_on_non_attr_store_raises(tmp_path): """attribute_filter only makes sense for attribute-chunked stores.""" pos = np.random.default_rng(0).uniform(0, 10, (50, 3)).astype("f4") - store = tmp_path / "plain.zvr" + store = tmp_path / "plain.zv" write_points(str(store), pos, chunk_shape=(10.0, 10.0, 10.0)) with pytest.raises(ArrayError, match="chunk_attribute_name"): read_points(str(store), attribute_filter={"gene": "A"}) @@ -172,7 +172,7 @@ def test_points_attribute_filter_on_non_attr_store_raises(tmp_path): def test_points_attribute_filter_mismatched_name_raises(tmp_path): pos, gene = _make_two_gene_cloud(seed=4, n=100) - store = tmp_path / "g.zvr" + store = tmp_path / "g.zv" write_points( str(store), pos, chunk_shape=(50.0, 50.0, 50.0), @@ -204,7 +204,7 @@ def test_polylines_attr_chunking_splits_mixed_polyline(tmp_path): np.array(["A"] * 10), np.array(["A"] * 5 + ["B"] * 5), ] - store = tmp_path / "tr.zvr" + store = tmp_path / "tr.zv" write_polylines( str(store), polys, chunk_shape=(50.0, 50.0, 50.0), @@ -239,7 +239,7 @@ def test_polylines_attr_chunking_chunk_keys_are_4d(tmp_path): rng = np.random.default_rng(5) polys = [rng.uniform(0, 100, (8, 3)).astype("f4") for _ in range(4)] labels = [np.array(["X"] * 8) if i % 2 == 0 else np.array(["Y"] * 8) for i in range(4)] - store = tmp_path / "tr.zvr" + store = tmp_path / "tr.zv" write_polylines( str(store), polys, chunk_shape=(50.0, 50.0, 50.0), @@ -259,49 +259,49 @@ def test_polylines_attr_chunking_chunk_keys_are_4d(tmp_path): # =================================================================== -def test_zvr_level_attribute_values(tmp_path): +def test_zv_level_attribute_values(tmp_path): pos, gene = _make_two_gene_cloud(seed=6, n=120) - store = tmp_path / "g.zvr" + store = tmp_path / "g.zv" write_points( str(store), pos, chunk_shape=(50.0, 50.0, 50.0), vertex_attributes={"gene": gene}, chunk_by_attribute="gene", ) - zvr = open_zvr(str(store)) - lvl = zvr[0] + zv = open_zv(str(store)) + lvl = zv[0] assert lvl.chunk_attribute_name == "gene" assert lvl.attribute_values == ["A", "B"] assert lvl.chunk_dims is not None assert lvl.chunk_dims[0] == "gene" -def test_zvr_level_read_attribute_chunk(tmp_path): +def test_zv_level_read_attribute_chunk(tmp_path): pos, gene = _make_two_gene_cloud(seed=7, n=200) - store = tmp_path / "g.zvr" + store = tmp_path / "g.zv" write_points( str(store), pos, chunk_shape=(50.0, 50.0, 50.0), vertex_attributes={"gene": gene}, chunk_by_attribute="gene", ) - zvr = open_zvr(str(store)) - a_groups = zvr[0].read_attribute_chunk("A") + zv = open_zv(str(store)) + a_groups = zv[0].read_attribute_chunk("A") total_a_verts = sum(len(g) for g in a_groups) assert total_a_verts == int((gene == "A").sum()) -def test_zvr_level_read_attribute_chunk_unknown_value(tmp_path): +def test_zv_level_read_attribute_chunk_unknown_value(tmp_path): pos, gene = _make_two_gene_cloud(seed=8, n=80) - store = tmp_path / "g.zvr" + store = tmp_path / "g.zv" write_points( str(store), pos, chunk_shape=(50.0, 50.0, 50.0), vertex_attributes={"gene": gene}, chunk_by_attribute="gene", ) - zvr = open_zvr(str(store)) - assert zvr[0].read_attribute_chunk("missing") == [] + zv = open_zv(str(store)) + assert zv[0].read_attribute_chunk("missing") == [] # =================================================================== @@ -322,7 +322,7 @@ def _make_categorised_lines(seed: int = 0, n: int = 12): def test_lines_attr_chunking_line_attribute_round_trip(tmp_path): eps, cat = _make_categorised_lines(seed=0, n=16) - store = tmp_path / "lines.zvr" + store = tmp_path / "lines.zv" write_lines( str(store), eps, chunk_shape=(50.0, 50.0, 50.0), @@ -349,7 +349,7 @@ def test_lines_per_endpoint_attribute_must_match_per_line(tmp_path): bad = np.array([["A", "B"]] * 8) with pytest.raises(ArrayError, match="endpoints"): write_lines( - str(tmp_path / "x.zvr"), eps, + str(tmp_path / "x.zv"), eps, chunk_shape=(50.0, 50.0, 50.0), vertex_attributes={"cat": bad}, chunk_by_attribute="cat", @@ -360,7 +360,7 @@ def test_lines_chunk_by_attribute_missing_raises(tmp_path): eps, _ = _make_categorised_lines(seed=2, n=6) with pytest.raises(ArrayError, match="must name a key"): write_lines( - str(tmp_path / "x.zvr"), eps, + str(tmp_path / "x.zv"), eps, chunk_shape=(50.0, 50.0, 50.0), chunk_by_attribute="nonexistent", ) @@ -387,7 +387,7 @@ def _make_categorised_graph(seed: int = 0, n_objs: int = 4, per_obj: int = 5): def test_graph_attr_chunking_round_trip(tmp_path): pos, edges, obj_ids, cell_type = _make_categorised_graph(seed=0) - store = tmp_path / "graph.zvr" + store = tmp_path / "graph.zv" write_graph( str(store), pos, edges, chunk_shape=(50.0, 50.0, 50.0), @@ -414,7 +414,7 @@ def test_graph_attr_chunking_rejects_mixed_object(tmp_path): cell_type[1] = "E" # object 0 now has nodes "I", "E", "I", "I", "I" with pytest.raises(ArrayError, match="per-object uniformity"): write_graph( - str(tmp_path / "x.zvr"), pos, edges, + str(tmp_path / "x.zv"), pos, edges, chunk_shape=(50.0, 50.0, 50.0), object_ids=obj_ids, vertex_attributes={"cell_type": cell_type}, @@ -430,7 +430,7 @@ def test_graph_attr_chunking_default_object_per_node(tmp_path): pos = rng.uniform(0, 100, (n, 3)).astype("f4") edges = np.array([[i, i + 1] for i in range(n - 1)], dtype=np.int64) cell_type = np.array(["A"] * 4 + ["B"] * 4) - store = tmp_path / "g.zvr" + store = tmp_path / "g.zv" write_graph( str(store), pos, edges, chunk_shape=(50.0, 50.0, 50.0), @@ -458,7 +458,7 @@ def test_mesh_attr_chunking_round_trip(tmp_path): obj_ids = np.array([0, 0, 0, 0, 1, 1, 1, 1], dtype=np.int64) tissue = np.array(["cortex"] * 4 + ["stem"] * 4) - store = tmp_path / "m.zvr" + store = tmp_path / "m.zv" write_mesh( str(store), verts, faces, chunk_shape=(50.0, 50.0, 50.0), @@ -484,7 +484,7 @@ def test_mesh_attr_chunking_rejects_mixed_object(tmp_path): tissue = np.array(["cortex", "stem", "cortex", "cortex"]) with pytest.raises(ArrayError, match="per-object uniformity"): write_mesh( - str(tmp_path / "x.zvr"), verts, faces, + str(tmp_path / "x.zv"), verts, faces, chunk_shape=(50.0, 50.0, 50.0), object_ids=obj_ids, vertex_attributes={"tissue": tissue}, diff --git a/tests/test_backends.py b/tests/test_backends.py index 3c6c0b2..6fc0c2b 100644 --- a/tests/test_backends.py +++ b/tests/test_backends.py @@ -40,11 +40,11 @@ @pytest.mark.parametrize( "url,expected", [ - ("/abs/path/store.zvr", ""), + ("/abs/path/store.zv", ""), ("relative/path", ""), - (r"C:\Users\me\store.zvr", ""), # bare Windows drive — not a scheme - ("file:///C:/Users/me/store.zvr", "file"), - ("file:///tmp/store.zvr", "file"), + (r"C:\Users\me\store.zv", ""), # bare Windows drive — not a scheme + ("file:///C:/Users/me/store.zv", "file"), + ("file:///tmp/store.zv", "file"), ("s3://bucket/path", "s3"), ("gs://bucket/path", "gs"), ("gcs://bucket/path", "gcs"), @@ -241,7 +241,7 @@ def test_rebind_swap_local_for_local(tmp_path): Trivial but proves the rebind mechanics: same URL must be preserved, cached handles must continue to resolve. """ - store_path = tmp_path / "test.zvr" + store_path = tmp_path / "test.zv" root = create_store(store_path, **_minimal_root_kwargs()) original_url = root.url @@ -255,10 +255,10 @@ def test_rebind_swap_local_for_local(tmp_path): def test_rebind_url_mismatch_raises(tmp_path): """Rebinding to a different URL is a programming error.""" - store_path = tmp_path / "test.zvr" + store_path = tmp_path / "test.zv" root = create_store(store_path, **_minimal_root_kwargs()) - other = LocalBackend(tmp_path / "other.zvr") + other = LocalBackend(tmp_path / "other.zv") with pytest.raises(StoreError, match="matching URLs"): rebind(root, other) @@ -269,12 +269,12 @@ def test_rebind_url_mismatch_raises(tmp_path): def test_create_store_with_explicit_local_backend(tmp_path): - root = create_store(tmp_path / "x.zvr", **_minimal_root_kwargs(), backend="local") + root = create_store(tmp_path / "x.zv", **_minimal_root_kwargs(), backend="local") assert "zarr_vectors" in root.attrs def test_open_store_with_explicit_local_backend(tmp_path): - p = tmp_path / "x.zvr" + p = tmp_path / "x.zv" create_store(p, **_minimal_root_kwargs()) root = open_store(p, backend="local") assert "zarr_vectors" in root.attrs diff --git a/tests/test_chunk_helpers.py b/tests/test_chunk_helpers.py index e4e0b5b..1311b1e 100644 --- a/tests/test_chunk_helpers.py +++ b/tests/test_chunk_helpers.py @@ -70,7 +70,7 @@ def test_neighbours_works_for_4d_keys(): def test_offsets_round_trip(tmp_path): rng = np.random.default_rng(0) pos = rng.uniform(0, 100, (777, 3)).astype("f4") - store = tmp_path / "p.zvr" + store = tmp_path / "p.zv" write_points(str(store), pos, chunk_shape=(50.0, 50.0, 50.0)) root = open_store(str(store)) @@ -88,7 +88,7 @@ def test_offsets_empty_store_safe(tmp_path): """An empty level should report 0 chunks and 0 vertices, not raise.""" rng = np.random.default_rng(0) pos = rng.uniform(0, 10, (5, 3)).astype("f4") # one chunk - store = tmp_path / "p.zvr" + store = tmp_path / "p.zv" write_points(str(store), pos, chunk_shape=(100.0, 100.0, 100.0)) root = open_store(str(store)) lvl = get_resolution_level(root, 0) diff --git a/tests/test_cross_chunk_faces.py b/tests/test_cross_chunk_faces.py index 48aca1d..039aa30 100644 --- a/tests/test_cross_chunk_faces.py +++ b/tests/test_cross_chunk_faces.py @@ -31,7 +31,7 @@ def _tetra_straddling_chunks(tmp_path): [1, 2, 3], [0, 1, 3], ], dtype=np.int64) - store = tmp_path / "m.zvr" + store = tmp_path / "m.zv" write_mesh( str(store), verts, faces, chunk_shape=(50.0, 50.0, 50.0), @@ -69,7 +69,7 @@ def test_intra_chunk_mesh_writes_no_cross_chunk_links(tmp_path): faces = np.array([ [0, 1, 2], [0, 1, 3], [1, 2, 3], [0, 2, 3], ], dtype=np.int64) - store = tmp_path / "m.zvr" + store = tmp_path / "m.zv" write_mesh(str(store), verts, faces, chunk_shape=(50.0, 50.0, 50.0)) root = open_store(str(store)) lvl = get_resolution_level(root, 0) diff --git a/tests/test_headers.py b/tests/test_headers.py index 6ffd5d0..44546d6 100644 --- a/tests/test_headers.py +++ b/tests/test_headers.py @@ -7,7 +7,7 @@ import numpy as np from zarr_vectors.headers import HeaderRegistry -from zarr_vectors.lazy import open_zvr +from zarr_vectors.lazy import open_zv from zarr_vectors.types.points import write_points @@ -82,8 +82,8 @@ def test_headers_property_returns_dicts(self, tmp_path: Path) -> None: reg = HeaderRegistry(store) reg.add("trk", {"format_name": "trk", "voxel_size": [1.0, 1.0, 1.0]}) - zvr = open_zvr(store) - headers = zvr.headers + zv = open_zv(store) + headers = zv.headers assert "trk" in headers assert isinstance(headers["trk"], dict) assert headers["trk"]["voxel_size"] == [1.0, 1.0, 1.0] diff --git a/tests/test_lazy_writer.py b/tests/test_lazy_writer.py index 0d81b65..165e957 100644 --- a/tests/test_lazy_writer.py +++ b/tests/test_lazy_writer.py @@ -1,4 +1,4 @@ -"""Tests for the ZVRWriter (Tier A + append_vertices).""" +"""Tests for the ZVWriter (Tier A + append_vertices).""" from __future__ import annotations @@ -13,7 +13,7 @@ open_store, read_root_metadata, ) -from zarr_vectors.lazy.store import open_zvr +from zarr_vectors.lazy.store import open_zv from zarr_vectors.types.points import read_points, write_points @@ -24,7 +24,7 @@ def _run(coro): def _make_store(tmp_path, n=200): rng = np.random.default_rng(0) pos = rng.uniform(0, 100, (n, 3)).astype("f4") - store = tmp_path / "p.zvr" + store = tmp_path / "p.zv" write_points( str(store), pos, chunk_shape=(50.0, 50.0, 50.0), @@ -43,8 +43,8 @@ def test_add_attribute_round_trip(tmp_path): normals = np.random.default_rng(1).normal(size=(200, 3)).astype("f4") async def go(): - zvr = open_zvr(str(store)) - async with zvr[0].writer() as w: + zv = open_zv(str(store)) + async with zv[0].writer() as w: await w.add_attribute("normal", normals) _run(go()) @@ -60,8 +60,8 @@ def test_add_attribute_sync_mirror(tmp_path): rng = np.random.default_rng(2) intensities = rng.uniform(0, 1, 120).astype("f4") - zvr = open_zvr(str(store)) - with zvr[0].writer() as w: + zv = open_zv(str(store)) + with zv[0].writer() as w: w.add_attribute_sync("intensity", intensities) out = read_points(str(store), attribute_names=["intensity"]) @@ -73,8 +73,8 @@ def test_add_attribute_length_mismatch_raises(tmp_path): bad = np.zeros(51, dtype="f4") # one too many async def go(): - zvr = open_zvr(str(store)) - async with zvr[0].writer() as w: + zv = open_zv(str(store)) + async with zv[0].writer() as w: await w.add_attribute("bad", bad) from zarr_vectors.exceptions import ArrayError @@ -87,8 +87,8 @@ def test_add_object_attribute(tmp_path): store, _ = _make_store(tmp_path, n=80) async def go(): - zvr = open_zvr(str(store)) - async with zvr[0].writer() as w: + zv = open_zv(str(store)) + async with zv[0].writer() as w: await w.add_object_attribute("score", np.arange(80, dtype="f4")) _run(go()) @@ -110,8 +110,8 @@ def test_append_vertices_grows_store(tmp_path): new_pos = np.random.default_rng(3).uniform(0, 100, (40, 3)).astype("f4") async def go(): - zvr = open_zvr(str(store)) - async with zvr[0].writer() as w: + zv = open_zv(str(store)) + async with zv[0].writer() as w: result = await w.append_vertices(new_pos) return result @@ -132,8 +132,8 @@ def test_append_then_compact_is_a_no_op(tmp_path): store, _ = _make_store(tmp_path, n=60) async def go(): - zvr = open_zvr(str(store)) - async with zvr[0].writer() as w: + zv = open_zv(str(store)) + async with zv[0].writer() as w: await w.append_vertices( np.random.default_rng(4).uniform(0, 100, (10, 3)).astype("f4") ) @@ -141,8 +141,8 @@ async def go(): _run(go()) async def do_compact(): - zvr = open_zvr(str(store)) - async with zvr[0].writer() as w: + zv = open_zv(str(store)) + async with zv[0].writer() as w: return await w.compact() result = _run(do_compact()) @@ -156,13 +156,13 @@ def test_two_sequential_appends_merge_into_object_index(tmp_path): store, _ = _make_store(tmp_path, n=30) async def go(): - zvr = open_zvr(str(store)) - async with zvr[0].writer() as w: + zv = open_zv(str(store)) + async with zv[0].writer() as w: await w.append_vertices( np.random.default_rng(5).uniform(0, 100, (5, 3)).astype("f4") ) - zvr = open_zvr(str(store)) - async with zvr[0].writer() as w: + zv = open_zv(str(store)) + async with zv[0].writer() as w: await w.append_vertices( np.random.default_rng(6).uniform(0, 100, (7, 3)).astype("f4") ) @@ -180,8 +180,8 @@ def test_append_vertices_overlap_oid_raises(tmp_path): overlap = np.array([5, 6, 7], dtype=np.int64) # collide with existing async def go(): - zvr = open_zvr(str(store)) - async with zvr[0].writer() as w: + zv = open_zv(str(store)) + async with zv[0].writer() as w: await w.append_vertices( np.zeros((3, 3), dtype="f4"), object_ids=overlap, diff --git a/tests/test_per_object_pyramid.py b/tests/test_per_object_pyramid.py index 10c2133..ea4aa2b 100644 --- a/tests/test_per_object_pyramid.py +++ b/tests/test_per_object_pyramid.py @@ -36,7 +36,7 @@ read_level_metadata, read_root_metadata, ) -from zarr_vectors.lazy.store import open_zvr +from zarr_vectors.lazy.store import open_zv from zarr_vectors.multiresolution.coarsen import build_pyramid, coarsen_level from zarr_vectors.spatial.chunking import neighbouring_chunk_keys from zarr_vectors.types.polylines import write_polylines @@ -55,7 +55,7 @@ def _make_streamlines(seed=0, n=20, vpp=30, extent=100.0): def _build_store(tmp_path, seed=0, n=20): - store = tmp_path / "tr.zvr" + store = tmp_path / "tr.zv" write_polylines( str(store), _make_streamlines(seed=seed, n=n, vpp=30), @@ -121,15 +121,15 @@ def test_monotone_oid_drop_across_levels(tmp_path): sparsity_seed=42, ) - zvr = open_zvr(str(store)) + zv = open_zv(str(store)) levels = list_resolution_levels(open_store(str(store))) assert levels == [0, 1, 2] - present_sets = {L: set(zvr[L].present_oids.tolist()) for L in levels} + present_sets = {L: set(zv[L].present_oids.tolist()) for L in levels} assert present_sets[2] <= present_sets[1] <= present_sets[0] # Object_levels for any surviving level-2 OID is a contiguous prefix. for oid in present_sets[2]: - visible = zvr.object_levels(oid) + visible = zv.object_levels(oid) assert visible == list(range(max(visible) + 1)) diff --git a/tests/test_rechunk_by_attribute.py b/tests/test_rechunk_by_attribute.py index d5d0575..fc6d7e1 100644 --- a/tests/test_rechunk_by_attribute.py +++ b/tests/test_rechunk_by_attribute.py @@ -75,7 +75,7 @@ def _make_points_store_with_object_attr(tmp_path, n_objects=30, attr="cluster"): # Assign each object to one of 5 cluster IDs. clusters = rng.integers(0, 5, size=n_objects) - store = tmp_path / "src.zvr" + store = tmp_path / "src.zv" write_points( str(store), pos, chunk_shape=(50.0, 50.0, 50.0), @@ -87,7 +87,7 @@ def _make_points_store_with_object_attr(tmp_path, n_objects=30, attr="cluster"): def test_rechunk_by_attribute_wrapper_categorical(tmp_path): src, clusters = _make_points_store_with_object_attr(tmp_path, n_objects=40) - out = tmp_path / "rechunked.zvr" + out = tmp_path / "rechunked.zv" summary = rechunk_by_attribute(str(src), "cluster", output=str(out)) @@ -108,8 +108,8 @@ def test_rechunk_by_attribute_matches_explicit_spec(tmp_path): src1, _ = _make_points_store_with_object_attr(tmp_path / "a", n_objects=20) src2, _ = _make_points_store_with_object_attr(tmp_path / "b", n_objects=20) - out1 = tmp_path / "out1.zvr" - out2 = tmp_path / "out2.zvr" + out1 = tmp_path / "out1.zv" + out2 = tmp_path / "out2.zv" rechunk_by_attribute(str(src1), "cluster", output=str(out1)) rechunk( @@ -138,7 +138,7 @@ def test_rechunk_by_attribute_high_cardinality(tmp_path): clusters = np.arange(n_objects, dtype=np.int64) # 40 unique values obj_ids = np.arange(n_objects, dtype=np.int64) - src = tmp_path / "src.zvr" + src = tmp_path / "src.zv" write_points( str(src), pos, chunk_shape=(50.0, 50.0, 50.0), @@ -146,7 +146,7 @@ def test_rechunk_by_attribute_high_cardinality(tmp_path): object_attributes={"cluster": clusters}, ) - out = tmp_path / "rechunked.zvr" + out = tmp_path / "rechunked.zv" summary = rechunk_by_attribute(str(src), "cluster", output=str(out)) # Without categorical=True this would have been 4 (quartile fallback). assert summary["bins_created"] == n_objects diff --git a/tests/test_zv_rename.py b/tests/test_zv_rename.py index d65e04b..501ded5 100644 --- a/tests/test_zv_rename.py +++ b/tests/test_zv_rename.py @@ -71,7 +71,7 @@ def test_new_writes_emit_zv_array_key(tmp_path): """``write_chunk_vertices`` writes the new ``zv_array`` discriminator.""" rng = np.random.default_rng(0) pos = rng.uniform(0, 100, (50, 3)).astype("f4") - store = tmp_path / "p.zvr" + store = tmp_path / "p.zv" write_points(str(store), pos, chunk_shape=(50.0, 50.0, 50.0)) root = open_store(str(store)) diff --git a/zarr_vectors/__init__.py b/zarr_vectors/__init__.py index 66fd5d6..a580c1b 100644 --- a/zarr_vectors/__init__.py +++ b/zarr_vectors/__init__.py @@ -13,7 +13,7 @@ open_store, rebind, ) -from zarr_vectors.lazy.writer import ZVRWriter +from zarr_vectors.lazy.writer import ZVWriter from zarr_vectors.rechunk import RechunkSpec, rechunk, rechunk_by_attribute __version__ = "0.1.0" @@ -29,6 +29,6 @@ "RechunkSpec", "rechunk", "rechunk_by_attribute", - "ZVRWriter", + "ZVWriter", "__version__", ] diff --git a/zarr_vectors/lazy/__init__.py b/zarr_vectors/lazy/__init__.py index 00bc341..ad6370b 100644 --- a/zarr_vectors/lazy/__init__.py +++ b/zarr_vectors/lazy/__init__.py @@ -6,15 +6,15 @@ Usage:: - from zarr_vectors.lazy import open_zvr + from zarr_vectors.lazy import open_zv - zvr = open_zvr("scan.zarrvectors") - zvr[0].vertices.compute() # materialise all level-0 vertices - zvr[0].vertices[0, 0, 0].compute() # single chunk - zvr[0].attributes["intensity"].compute() + zv = open_zv("scan.zarrvectors") + zv[0].vertices.compute() # materialise all level-0 vertices + zv[0].vertices[0, 0, 0].compute() # single chunk + zv[0].attributes["intensity"].compute() """ -from zarr_vectors.lazy.store import ZVRStore, open_zvr -from zarr_vectors.lazy.views import ZVRView, ZVRPolylineCollection +from zarr_vectors.lazy.store import ZVStore, open_zv +from zarr_vectors.lazy.views import ZVView, ZVPolylineCollection -__all__ = ["ZVRStore", "ZVRView", "ZVRPolylineCollection", "open_zvr"] +__all__ = ["ZVStore", "ZVView", "ZVPolylineCollection", "open_zv"] diff --git a/zarr_vectors/lazy/arrays.py b/zarr_vectors/lazy/arrays.py index e9d7f18..a0018fd 100644 --- a/zarr_vectors/lazy/arrays.py +++ b/zarr_vectors/lazy/arrays.py @@ -60,7 +60,7 @@ def __call__(self, *args, **kwargs): # =================================================================== -class ZVRVertexCollection: +class ZVVertexCollection: """Lazy collection of vertices across chunks. No data is read until ``.compute()`` or ``.to_delayed()`` is called. @@ -98,7 +98,7 @@ def __getitem__(self, *coords) -> Any: verts[0, 0, 0].compute() # read chunk (0,0,0) """ - # Handle zvr[0].vertices[0, 0, 0] and zvr[0].vertices[(0, 0, 0)] + # Handle zv[0].vertices[0, 0, 0] and zv[0].vertices[(0, 0, 0)] if len(coords) == 1 and isinstance(coords[0], tuple): chunk_coords = coords[0] else: @@ -155,7 +155,7 @@ def __iter__(self): def __repr__(self) -> str: return ( - f"ZVRVertexCollection(" + f"ZVVertexCollection(" f"chunks={len(self._chunk_keys)}, " f"vertices={self._vertex_count}, " f"ndim={self._ndim}, " @@ -168,7 +168,7 @@ def __repr__(self) -> str: # =================================================================== -class ZVRAttributeCollection: +class ZVAttributeCollection: """Lazy collection of per-vertex attributes across chunks. Args: @@ -213,7 +213,7 @@ def compute(self) -> npt.NDArray: def __repr__(self) -> str: return ( - f"ZVRAttributeCollection('{self._attr_name}', " + f"ZVAttributeCollection('{self._attr_name}', " f"chunks={len(self._chunk_keys)})" ) @@ -223,7 +223,7 @@ def __repr__(self) -> str: # =================================================================== -class ZVRObjectIndex: +class ZVObjectIndex: """Lazy accessor for object manifests. The full manifest list is loaded on first access and cached. @@ -269,7 +269,7 @@ def __len__(self) -> int: return self.object_count def __repr__(self) -> str: - return f"ZVRObjectIndex(objects={self.object_count})" + return f"ZVObjectIndex(objects={self.object_count})" # =================================================================== diff --git a/zarr_vectors/lazy/level.py b/zarr_vectors/lazy/level.py index 3c85e82..c0bb071 100644 --- a/zarr_vectors/lazy/level.py +++ b/zarr_vectors/lazy/level.py @@ -1,4 +1,4 @@ -"""ZVRLevel — lazy handle to a single resolution level.""" +"""ZVLevel — lazy handle to a single resolution level.""" from __future__ import annotations @@ -7,14 +7,14 @@ from zarr_vectors.core.arrays import list_chunk_keys from zarr_vectors.core.metadata import LevelMetadata, RootMetadata from zarr_vectors.core.store import FsGroup -from zarr_vectors.lazy.arrays import ZVRAttributeCollection, ZVRVertexCollection, ZVRObjectIndex +from zarr_vectors.lazy.arrays import ZVAttributeCollection, ZVVertexCollection, ZVObjectIndex from zarr_vectors.typing import ChunkCoords import numpy.typing as npt import numpy as np -class ZVRLevel: +class ZVLevel: """Lazy handle to one resolution level. Chunk listings are cached on first access. Vertex and attribute @@ -40,9 +40,9 @@ def __init__( self._root_meta = root_meta self._level_meta = level_meta self._chunk_keys_cache: list[ChunkCoords] | None = None - self._vertices_cache: ZVRVertexCollection | None = None - self._attributes_cache: dict[str, ZVRAttributeCollection] = {} - self._object_index_cache: ZVRObjectIndex | None = None + self._vertices_cache: ZVVertexCollection | None = None + self._attributes_cache: dict[str, ZVAttributeCollection] = {} + self._object_index_cache: ZVObjectIndex | None = None # --------------------------------------------------------------- # Metadata properties @@ -226,10 +226,10 @@ def read_attribute_chunk(self, value: Any) -> list[npt.NDArray]: # --------------------------------------------------------------- @property - def vertices(self) -> ZVRVertexCollection: + def vertices(self) -> ZVVertexCollection: """Lazy vertex collection for this level.""" if self._vertices_cache is None: - self._vertices_cache = ZVRVertexCollection( + self._vertices_cache = ZVVertexCollection( level_group=self._group, chunk_keys=self.chunk_keys, ndim=self._root_meta.sid_ndim, @@ -248,9 +248,9 @@ def attributes(self) -> _AttributeAccessor: """ return _AttributeAccessor(self) - def _get_attribute(self, name: str) -> ZVRAttributeCollection: + def _get_attribute(self, name: str) -> ZVAttributeCollection: if name not in self._attributes_cache: - self._attributes_cache[name] = ZVRAttributeCollection( + self._attributes_cache[name] = ZVAttributeCollection( level_group=self._group, attr_name=name, chunk_keys=self.chunk_keys, @@ -258,10 +258,10 @@ def _get_attribute(self, name: str) -> ZVRAttributeCollection: return self._attributes_cache[name] @property - def object_index(self) -> ZVRObjectIndex: + def object_index(self) -> ZVObjectIndex: """Lazy object index accessor.""" if self._object_index_cache is None: - self._object_index_cache = ZVRObjectIndex(self._group) + self._object_index_cache = ZVObjectIndex(self._group) return self._object_index_cache # --------------------------------------------------------------- @@ -274,7 +274,7 @@ def filter( bbox: tuple[npt.NDArray, npt.NDArray] | None = None, object_ids: list[int] | None = None, group_ids: list[int] | None = None, - ) -> "ZVRView": + ) -> "ZVView": """Apply filter constraints, returning a lazy filtered view. Filters can be chained: ``level.filter(group_ids=[0]).filter(bbox=roi)``. @@ -285,10 +285,10 @@ def filter( group_ids: Keep only objects in these groups. Returns: - A :class:`ZVRView` with the specified constraints. + A :class:`ZVView` with the specified constraints. """ - from zarr_vectors.lazy.views import ZVRView, FilterSpec - view = ZVRView( + from zarr_vectors.lazy.views import ZVView, FilterSpec + view = ZVView( self._group, self._root_meta, self._level_meta, self.chunk_keys, FilterSpec(), ) @@ -300,34 +300,34 @@ def filter( # Mutation (write-back) handle # --------------------------------------------------------------- - def writer(self) -> "ZVRWriter": - """Return a :class:`ZVRWriter` for mutating this level. + def writer(self) -> "ZVWriter": + """Return a :class:`ZVWriter` for mutating this level. Use as an async or sync context manager:: - async with zvr[0].writer() as w: + async with zv[0].writer() as w: await w.add_attribute("normal", normals) Single-writer-only — concurrent writers on the same level can race on object_index sidecar batch numbering. """ - from zarr_vectors.lazy.writer import ZVRWriter - return ZVRWriter(self) + from zarr_vectors.lazy.writer import ZVWriter + return ZVWriter(self) # --------------------------------------------------------------- # Geometry-specific collections # --------------------------------------------------------------- @property - def polylines(self) -> "ZVRPolylineCollection": + def polylines(self) -> "ZVPolylineCollection": """Lazy polyline collection for streamline/polyline geometry. Each polyline is accessible by object ID:: level.polylines[42].compute() # full streamline """ - from zarr_vectors.lazy.views import ZVRPolylineCollection - return ZVRPolylineCollection(self._group, ndim=self._root_meta.sid_ndim) + from zarr_vectors.lazy.views import ZVPolylineCollection + return ZVPolylineCollection(self._group, ndim=self._root_meta.sid_ndim) def rechunk( self, @@ -366,7 +366,7 @@ def rechunk( def __repr__(self) -> str: bs = self.bin_shape or self._root_meta.effective_bin_shape return ( - f"ZVRLevel({self._level_index}, " + f"ZVLevel({self._level_index}, " f"vertices={self.vertex_count}, " f"chunks={self.chunk_count}, " f"bin_shape={bs})" @@ -376,10 +376,10 @@ def __repr__(self) -> str: class _AttributeAccessor: """Dict-like proxy for lazy attribute access.""" - def __init__(self, level: ZVRLevel) -> None: + def __init__(self, level: ZVLevel) -> None: self._level = level - def __getitem__(self, name: str) -> ZVRAttributeCollection: + def __getitem__(self, name: str) -> ZVAttributeCollection: return self._level._get_attribute(name) def __contains__(self, name: str) -> bool: diff --git a/zarr_vectors/lazy/store.py b/zarr_vectors/lazy/store.py index 3fb64b0..d4a73cc 100644 --- a/zarr_vectors/lazy/store.py +++ b/zarr_vectors/lazy/store.py @@ -1,4 +1,4 @@ -"""ZVRStore — lazy wrapper around an open zarr vectors store. +"""ZVStore — lazy wrapper around an open zarr vectors store. No data is read until ``.compute()`` is called on a collection. Metadata (root attrs, level attrs, chunk listings) is loaded on @@ -21,10 +21,10 @@ rebind, ) from zarr_vectors.core.metadata import RootMetadata, LevelMetadata -from zarr_vectors.lazy.level import ZVRLevel +from zarr_vectors.lazy.level import ZVLevel -class ZVRStore: +class ZVStore: """Lazy handle to a zarr vectors store. Attributes are read from ``.zattrs`` on first access and cached. @@ -38,7 +38,7 @@ class ZVRStore: def __init__(self, root: Group, meta: RootMetadata) -> None: self._root = root self._meta = meta - self._levels_cache: dict[int, ZVRLevel] = {} + self._levels_cache: dict[int, ZVLevel] = {} self._level_list: list[int] | None = None # --------------------------------------------------------------- @@ -115,7 +115,7 @@ def headers(self) -> dict[str, dict[str, Any]]: # Level access # --------------------------------------------------------------- - def __getitem__(self, level: int) -> ZVRLevel: + def __getitem__(self, level: int) -> ZVLevel: """Get a lazy handle to a resolution level.""" if level not in self._levels_cache: from zarr_vectors.core.store import get_resolution_level @@ -124,7 +124,7 @@ def __getitem__(self, level: int) -> ZVRLevel: level_meta = read_level_metadata(self._root, level) except Exception: level_meta = None - self._levels_cache[level] = ZVRLevel( + self._levels_cache[level] = ZVLevel( level_group, level, self._meta, level_meta, ) return self._levels_cache[level] @@ -136,7 +136,7 @@ def __getitem__(self, level: int) -> ZVRLevel: def __repr__(self) -> str: types = ", ".join(self.geometry_types) return ( - f"ZVRStore('{self._root.url}', " + f"ZVStore('{self._root.url}', " f"levels={self.levels}, " f"geometry=[{types}], " f"chunk={self.chunk_shape}, " @@ -189,12 +189,12 @@ def object_levels(self, oid: int) -> list[int]: return out -def open_zvr( +def open_zv( path: str | Path, *, backend: str | None = None, **backend_kwargs: Any, -) -> ZVRStore: +) -> ZVStore: """Open a zarr vectors store lazily. Reads only root metadata (a few KB). No vertex data is loaded. @@ -206,13 +206,13 @@ def open_zvr( **backend_kwargs: Forwarded to the backend constructor. Returns: - A :class:`ZVRStore` handle for lazy access. + A :class:`ZVStore` handle for lazy access. """ - # mode="r+" so the writer() handles returned by ZVRLevel / ZVRStore + # mode="r+" so the writer() handles returned by ZVLevel / ZVStore # can mutate without an extra reopen. Pure readers pay no cost for # this — the actual reads still touch only the chunks they need. root = open_store(str(path), mode="r+", backend=backend, **backend_kwargs) meta = read_root_metadata(root) - return ZVRStore(root, meta) + return ZVStore(root, meta) diff --git a/zarr_vectors/lazy/views.py b/zarr_vectors/lazy/views.py index 811750e..3042c17 100644 --- a/zarr_vectors/lazy/views.py +++ b/zarr_vectors/lazy/views.py @@ -1,11 +1,11 @@ """Lazy filtered views and geometry-specific collections. -``ZVRView`` is a filtered projection of a ``ZVRLevel`` that narrows +``ZVView`` is a filtered projection of a ``ZVLevel`` that narrows which chunks, bins, objects, or groups will be read. Filters chain: each ``.filter()`` returns a new view with the intersection of all constraints. Data is materialised only on ``.compute()``. -``ZVRPolylineCollection`` provides per-object lazy access to +``ZVPolylineCollection`` provides per-object lazy access to polylines/streamlines. """ @@ -83,14 +83,14 @@ def _intersect_sets(a, b): # =================================================================== -# ZVRView — filtered lazy view +# ZVView — filtered lazy view # =================================================================== -class ZVRView: +class ZVView: """A filtered lazy view of a resolution level. - Created by calling ``.filter()`` on a ``ZVRLevel`` or another - ``ZVRView``. Each filter narrows the read plan; data is only + Created by calling ``.filter()`` on a ``ZVLevel`` or another + ``ZVView``. Each filter narrows the read plan; data is only loaded on ``.compute()``. Args: @@ -121,7 +121,7 @@ def filter( bbox: tuple[npt.NDArray, npt.NDArray] | None = None, object_ids: list[int] | None = None, group_ids: list[int] | None = None, - ) -> ZVRView: + ) -> ZVView: """Apply additional filter constraints, returning a new view. Args: @@ -131,7 +131,7 @@ def filter( to object IDs via groupings). Returns: - A new ``ZVRView`` with the intersection of all constraints. + A new ``ZVView`` with the intersection of all constraints. """ new_spec = FilterSpec() @@ -172,7 +172,7 @@ def filter( new_spec.target_object_ids = resolved merged = self._spec.intersect(new_spec) - return ZVRView( + return ZVView( self._group, self._root_meta, self._level_meta, self._all_chunk_keys, merged, ) @@ -311,13 +311,13 @@ def __repr__(self) -> str: if self._spec.bbox is not None: parts.append("bbox=set") desc = ", ".join(parts) if parts else "unfiltered" - return f"ZVRView({desc})" + return f"ZVView({desc})" class _FilteredVertices: """Vertex accessor on a filtered view.""" - def __init__(self, view: ZVRView) -> None: + def __init__(self, view: ZVView) -> None: self._view = view def compute(self) -> npt.NDArray[np.floating]: @@ -329,10 +329,10 @@ def __repr__(self) -> str: # =================================================================== -# ZVRPolylineCollection — per-object lazy polyline access +# ZVPolylineCollection — per-object lazy polyline access # =================================================================== -class ZVRPolylineCollection: +class ZVPolylineCollection: """Lazy collection of polylines accessible by object ID. Each polyline is reconstructed by following its object_index @@ -421,7 +421,7 @@ def filter( ) def __repr__(self) -> str: - return f"ZVRPolylineCollection(count={self.count})" + return f"ZVPolylineCollection(count={self.count})" class FilteredPolylineCollection: @@ -429,7 +429,7 @@ class FilteredPolylineCollection: def __init__( self, - parent: ZVRPolylineCollection, + parent: ZVPolylineCollection, *, object_ids: list[int] | None = None, length_range: tuple[float, float] | None = None, diff --git a/zarr_vectors/lazy/writer.py b/zarr_vectors/lazy/writer.py index d909c7b..11e70a4 100644 --- a/zarr_vectors/lazy/writer.py +++ b/zarr_vectors/lazy/writer.py @@ -1,4 +1,4 @@ -"""ZVRWriter — lazy / async mutation handle for a single ``ZVRLevel``. +"""ZVWriter — lazy / async mutation handle for a single ``ZVLevel``. Adds the write-back surface the algorithms package needs: @@ -43,28 +43,28 @@ from zarr_vectors.typing import ChunkCoords, ObjectManifest if TYPE_CHECKING: - from zarr_vectors.lazy.level import ZVRLevel + from zarr_vectors.lazy.level import ZVLevel -class ZVRWriter: - """Mutation handle for one :class:`ZVRLevel`. +class ZVWriter: + """Mutation handle for one :class:`ZVLevel`. - Acquire one via ``zvr[0].writer()``. Holds a reference to the + Acquire one via ``zv[0].writer()``. Holds a reference to the level's :class:`Group` so all mutations go through the same backend the reader uses. Usage:: # Async — recommended for cloud stores - async with zvr[0].writer() as w: + async with zv[0].writer() as w: await w.add_attribute("normal", normals) # Sync — convenient for scripts - with zvr[0].writer() as w: + with zv[0].writer() as w: w.add_attribute_sync("normal", normals) """ - def __init__(self, level: ZVRLevel) -> None: + def __init__(self, level: ZVLevel) -> None: self._level = level self._group = level._group self._committed = False @@ -75,14 +75,14 @@ def __init__(self, level: ZVRLevel) -> None: # ---------------- context manager ----------------------------------- - async def __aenter__(self) -> ZVRWriter: + async def __aenter__(self) -> ZVWriter: return self async def __aexit__(self, exc_type, exc, tb) -> None: if exc is None: await self.commit() - def __enter__(self) -> ZVRWriter: + def __enter__(self) -> ZVWriter: return self def __exit__(self, exc_type, exc, tb) -> None: