AllenInstitute · Andrew-Keenlyside · May 15, 2026 · May 14, 2026 · May 14, 2026 · May 14, 2026
diff --git a/benchmarks/01_size_scaling.ipynb b/benchmarks/01_size_scaling.ipynb
@@ -4,21 +4,26 @@
    "cell_type": "markdown",
    "id": "9b315ef7",
    "metadata": {},
-   "source": [
-    "# Size scaling — point cloud\n",
-    "\n",
-    "Write/read/disk-size of point clouds at increasing `N`. Same\n",
-    "`chunk_shape` across runs so the only variable is vertex count.\n",
-    "\n",
-    "Runtime: a few minutes on a laptop (the 1M case dominates)."
-   ]
+   "source": "# Size scaling — point cloud\n\nWrite/read/disk-size of point clouds at increasing `N`, with **CSV as a\nbaseline** for context. Same `chunk_shape` across runs so the only\nvariable is vertex count.\n\nFor each `N` we measure:\n\n| Operation | zarr-vectors | CSV (baseline) |\n| --- | --- | --- |\n| Write   | `write_points` | `pandas.to_csv` |\n| Read all  | `read_points` | `pandas.read_csv` |\n| Read one  | one chunk via lazy API | `read_csv(nrows=1)` (best case) |\n| Disk size | store directory | CSV file |\n\nRuntime: a few minutes on a laptop (the 1M case dominates)."
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 1,
    "id": "b97bfc29",
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "ename": "ModuleNotFoundError",
+     "evalue": "No module named 'pandas'",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[31m---------------------------------------------------------------------------\u001b[39m",
+      "\u001b[31mModuleNotFoundError\u001b[39m                       Traceback (most recent call last)",
+      "\u001b[36mCell\u001b[39m\u001b[36m \u001b[39m\u001b[32mIn[1]\u001b[39m\u001b[32m, line 5\u001b[39m\n\u001b[32m      1\u001b[39m \u001b[38;5;28;01mimport\u001b[39;00m os, time, tempfile, shutil\n\u001b[32m      2\u001b[39m \u001b[38;5;28;01mfrom\u001b[39;00m pathlib \u001b[38;5;28;01mimport\u001b[39;00m Path\n\u001b[32m      3\u001b[39m \n\u001b[32m      4\u001b[39m \u001b[38;5;28;01mimport\u001b[39;00m numpy \u001b[38;5;28;01mas\u001b[39;00m np\n\u001b[32m----> \u001b[39m\u001b[32m5\u001b[39m \u001b[38;5;28;01mimport\u001b[39;00m pandas \u001b[38;5;28;01mas\u001b[39;00m pd\n\u001b[32m      6\u001b[39m \u001b[38;5;28;01mimport\u001b[39;00m matplotlib.pyplot \u001b[38;5;28;01mas\u001b[39;00m plt\n\u001b[32m      7\u001b[39m \n\u001b[32m      8\u001b[39m \n",
+      "\u001b[31mModuleNotFoundError\u001b[39m: No module named 'pandas'"
+     ]
+    }
+   ],
    "source": [
     "import os, time, tempfile, shutil\n",
     "from pathlib import Path\n",
@@ -60,14 +65,7 @@
    "id": "5bcdba96",
    "metadata": {},
    "outputs": [],
-   "source": [
-    "from zarr_vectors.types.points import write_points, read_points\n",
-    "\n",
-    "SIZES = [1_000, 10_000, 100_000, 1_000_000]\n",
-    "CHUNK = (200.0, 200.0, 200.0)\n",
-    "BIN   = (50.0, 50.0, 50.0)\n",
-    "SEED  = 0"
-   ]
+   "source": "from zarr_vectors.types.points import write_points, read_points\nfrom zarr_vectors.lazy import open_zv\n\nSIZES = [1_000, 10_000, 100_000, 1_000_000]\nCHUNK = (200.0, 200.0, 200.0)\nBIN   = (50.0, 50.0, 50.0)\nSEED  = 0\n\n\ndef _csv_path(prefix):\n    \"\"\"Fresh tempdir + CSV path.\"\"\"\n    return Path(tempfile.mkdtemp(prefix=f'csvbench_{prefix}_')) / 'points.csv'\n\n\ndef _csv_write(path, positions, intensity):\n    \"\"\"Baseline: write x,y,z,intensity columns to a CSV.\"\"\"\n    pd.DataFrame({\n        'x': positions[:, 0],\n        'y': positions[:, 1],\n        'z': positions[:, 2],\n        'intensity': intensity,\n    }).to_csv(path, index=False)\n\n\ndef _csv_read_all(path):\n    \"\"\"Read every row back into memory.\"\"\"\n    return pd.read_csv(path)\n\n\ndef _csv_read_one(path):\n    \"\"\"Best-case single-row read: only parse the first data row.\n\n    CSV has no random access, so this is the cheapest single-record\n    read the format admits.\"\"\"\n    return pd.read_csv(path, nrows=1)\n\n\ndef _zv_read_one(store_path):\n    \"\"\"Read just one chunk's worth of vertices via the lazy API.\n\n    Touches a single chunk on disk (vs. the full materialisation in\n    ``read_points``).\"\"\"\n    zv = open_zv(store_path)\n    chunk_keys = zv[0].vertices._chunk_keys  # noqa: SLF001 — minimal demo\n    if not chunk_keys:\n        return None\n    return zv[0].vertices[chunk_keys[0]].compute()"
   },
   {
    "cell_type": "markdown",
@@ -83,30 +81,7 @@
    "id": "d0b220e0",
    "metadata": {},
    "outputs": [],
-   "source": [
-    "rng = np.random.default_rng(SEED)\n",
-    "rows = []\n",
-    "for n in SIZES:\n",
-    "    positions = rng.uniform(0, 1000, (n, 3)).astype(np.float32)\n",
-    "    intensity = rng.uniform(0, 1, n).astype(np.float32)\n",
-    "\n",
-    "    store = _new_store(f'size_{n}')\n",
-    "    t_write, _ = _time(\n",
-    "        write_points, store, positions,\n",
-    "        chunk_shape=CHUNK, bin_shape=BIN,\n",
-    "        attributes={'intensity': intensity},\n",
-    "    )\n",
-    "    t_read, _ = _time(read_points, store, attribute_names=['intensity'])\n",
-    "    rows.append({\n",
-    "        'N': n,\n",
-    "        'write_s': round(t_write, 3),\n",
-    "        'read_s':  round(t_read,  3),\n",
-    "        'size_MB': round(_store_bytes(store) / 1e6, 2),\n",
-    "    })\n",
-    "    shutil.rmtree(Path(store).parent, ignore_errors=True)\n",
-    "\n",
-    "df = pd.DataFrame(rows)"
-   ]
+   "source": "rng = np.random.default_rng(SEED)\nrows = []\nfor n in SIZES:\n    positions = rng.uniform(0, 1000, (n, 3)).astype(np.float32)\n    intensity = rng.uniform(0, 1, n).astype(np.float32)\n\n    # ---- ZV ----\n    store = _new_store(f'size_{n}')\n    t_zv_write, _ = _time(\n        write_points, store, positions,\n        chunk_shape=CHUNK, bin_shape=BIN,\n        attributes={'intensity': intensity},\n    )\n    t_zv_read_all, _ = _time(read_points, store, attribute_names=['intensity'])\n    t_zv_read_one, _ = _time(_zv_read_one, store)\n    size_zv_MB = _store_bytes(store) / 1e6\n\n    # ---- CSV baseline ----\n    csv = _csv_path(f'size_{n}')\n    t_csv_write, _ = _time(_csv_write, csv, positions, intensity)\n    t_csv_read_all, _ = _time(_csv_read_all, csv)\n    t_csv_read_one, _ = _time(_csv_read_one, csv)\n    size_csv_MB = csv.stat().st_size / 1e6\n\n    rows.append({\n        'N': n,\n        'zv_write_s':    round(t_zv_write,    4),\n        'csv_write_s':   round(t_csv_write,   4),\n        'zv_read_all_s': round(t_zv_read_all, 4),\n        'csv_read_all_s':round(t_csv_read_all,4),\n        'zv_read_one_s': round(t_zv_read_one, 4),\n        'csv_read_one_s':round(t_csv_read_one,4),\n        'zv_size_MB':    round(size_zv_MB,  2),\n        'csv_size_MB':   round(size_csv_MB, 2),\n    })\n\n    shutil.rmtree(Path(store).parent, ignore_errors=True)\n    shutil.rmtree(csv.parent, ignore_errors=True)\n\ndf = pd.DataFrame(rows)"
   },
   {
    "cell_type": "markdown",
@@ -140,22 +115,12 @@
    "id": "6ca88043",
    "metadata": {},
    "outputs": [],
-   "source": [
-    "fig, ax = plt.subplots(figsize=(6, 4))\n",
-    "ax.loglog(df['N'], df['write_s'], 'o-', label='write (s)')\n",
-    "ax.loglog(df['N'], df['read_s'],  's-', label='read (s)')\n",
-    "ax.loglog(df['N'], df['size_MB'], '^-', label='size (MB)')\n",
-    "ax.set_xlabel('N (vertices)')\n",
-    "ax.set_title('Point cloud: write/read time + disk footprint vs N')\n",
-    "ax.legend()\n",
-    "ax.grid(True, which='both', alpha=0.3)\n",
-    "plt.tight_layout()"
-   ]
+   "source": "fig, axes = plt.subplots(1, 4, figsize=(20, 4.5), sharex=True)\n\npanels = [\n    ('Write time', 'write_s',    'zv_write_s',    'csv_write_s',    's'),\n    ('Read all',   'read_all_s', 'zv_read_all_s', 'csv_read_all_s', 's'),\n    ('Read one',   'read_one_s', 'zv_read_one_s', 'csv_read_one_s', 's'),\n    ('Disk size',  'size_MB',    'zv_size_MB',    'csv_size_MB',    'MB'),\n]\nfor ax, (title, _key, zv_col, csv_col, unit) in zip(axes, panels):\n    ax.loglog(df['N'], df[zv_col],  'o-', label='zarr-vectors', color='tab:blue')\n    ax.loglog(df['N'], df[csv_col], 's-', label='csv',          color='tab:orange')\n    ax.set_title(title)\n    ax.set_xlabel('N (vertices)')\n    ax.set_ylabel(unit)\n    ax.grid(True, which='both', alpha=0.3)\n    ax.legend()\n\nfig.suptitle('zarr-vectors vs CSV — point cloud scaling', y=1.02)\nplt.tight_layout()"
   }
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": "zarr-vectors",
+   "display_name": ".venv (3.13.13)",
    "language": "python",
    "name": "python3"
   },
@@ -169,9 +134,9 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.11.15"
+   "version": "3.13.13"
   }
  },
  "nbformat": 4,
  "nbformat_minor": 5
-}
+}
diff --git a/docs/spec/chunking/attribute_chunking.md b/docs/spec/chunking/attribute_chunking.md
@@ -154,7 +154,7 @@ a bin index via the stored `chunk_attribute_values` list, and the chunk
 scan is restricted to keys with that leading coord. Unknown values
 yield an empty result rather than an error.
 
-Lazy readers (`ZVRLevel`) expose:
+Lazy readers (`ZVLevel`) expose:
 
 - `chunk_dims` — the level's chunk-axis names, or `None` for legacy.
 - `chunk_attribute_name` — the leading-axis attribute name.

diff --git a/docs/spec/foundations/store_types.md b/docs/spec/foundations/store_types.md
@@ -40,7 +40,7 @@
 ## Introduction
 
 ZV stores are backend-agnostic: the same `create_store` / `open_store` /
-`open_zvr` calls work whether the data lives on a local SSD, a ZIP
+`open_zv` calls work whether the data lives on a local SSD, a ZIP
 archive, an in-memory dict, or a cloud object store. The backing store
 type affects performance characteristics (latency, throughput, cost per
 request) but not the data model or the semantics of any operation.
@@ -69,11 +69,12 @@ All three entry points accept `backend=` and `**backend_kwargs`:
 
 ```python
 from zarr_vectors.core.store import create_store, open_store
-from zarr_vectors.lazy import open_zvr
+from zarr_vectors.lazy import open_zv
 
-create_store(path, root_metadata, *, backend=None, **backend_kwargs) -> Group
+create_store(path, *, bounds=None, chunk_shape=None, axes=None,
+              geometry_types=None, ..., backend=None, **backend_kwargs) -> Group
 open_store(path, mode="r", *, backend=None, **backend_kwargs)         -> Group
-open_zvr(path, *, backend=None, **backend_kwargs)                     -> ZVRStore
+open_zv(path, *, backend=None, **backend_kwargs)                     -> ZVStore
 ```
 
 `backend` is one of `"local"` / `"obstore"` / `"fsspec"` or `None` for
@@ -301,5 +302,5 @@ a resolution level).
 The backend layer is independent of the
 [format capability tokens](../layout/root_metadata.md) stamped on
 `RootMetadata.format_capabilities` — backends carry data bytes, not
-format semantics. See the capability list for `CAP_CROSS_CHUNK_FACES`,
-`CAP_VERTEX_COUNT_CACHE`, `CAP_MULTISCALE_LINKS`, etc.
+format semantics. See the capability list for `CAP_MULTISCALE_LINKS`,
+`CAP_PRESERVED_OBJECT_IDS`, `CAP_SHARED_VERTEX_GROUPS`.
diff --git a/docs/spec/multiscale/pyramid_construction.md b/docs/spec/multiscale/pyramid_construction.md
@@ -287,7 +287,8 @@ which:
 
 1. Walks every adjacent `(fine, coarse)` level pair.
 2. Reconstructs the fine→parent map from the coarse level's
-   `metanode_children` sidecar.
+   `cross_chunk_links/<delta=-1>/` records (each record pairs a
+   coarse metanode to one of its fine children).
 3. Builds the trivial edge list `[(i, parent[i]) for i in range(n_fine)]`.
 4. Partitions via
    [`partition_cross_level_edges`](../../../zarr_vectors/spatial/boundary.py)

diff --git a/docs/spec/object_model/cross_chunk_links.md b/docs/spec/object_model/cross_chunk_links.md
@@ -126,28 +126,30 @@ chunk at the **owning level**, and column 1 is a local vertex index in
 the **same chunk key** at level `owning_level + N`. The reader doesn't
 need any cross-chunk-coords information — both sides share `<chunk_key>`.
 
-**Paired vertex-group offsets:** for `delta == 0` only, the byte
-offset of each link group is paired into the matching
-`vertex_group_offsets/<chunk_key>` table so a reader can fetch one
-vertex group's edges without rescanning the chunk. For `delta != 0`
-the source vertex groups and link groups belong to different levels,
-so the pairing is meaningless and the writer skips it. See the
-guardrail in
-[`write_chunk_links`](../../../zarr_vectors/core/arrays.py).
+**Self-describing blob.** Each `links/<delta>/<chunk_key>` file is a
+self-describing ragged blob: an int64 header with `K` followed by the
+`K` per-group byte offsets, then the concatenated link bytes. Readers
+recover the per-vertex-group partition without consulting any sibling
+table.
 
 ### `cross_chunk_links/<delta>/data` — global flat blob
 
-Each link is `2 * (sid_ndim + 1)` int64s laid out as
+Each record is `link_width * (sid_ndim + 1)` int64s laid out as
+`link_width` back-to-back `(chunk_coords, vertex_idx)` endpoints:
 
 ```
-[chunk_a_0, ..., chunk_a_{ndim-1}, vi_a,
- chunk_b_0, ..., chunk_b_{ndim-1}, vi_b]
+[chunk_0_0, ..., chunk_0_{ndim-1}, vi_0,
+ chunk_1_0, ..., chunk_1_{ndim-1}, vi_1,
+ ...
+ chunk_{L-1}_0, ..., vi_{L-1}]
 ```
 
-— i.e. the two endpoints written back-to-back. `chunk_a` is a chunk
-coordinate at the **owning level**; `chunk_b` is a chunk coordinate at
-the **target level** (`owning_level + level_delta`). `vi_a` and `vi_b`
-are local vertex indices within their respective chunks.
+`link_width=2` (the default) encodes a classic cross-chunk edge;
+`link_width=3` encodes a triangle face spanning chunks (used by mesh
+writers); `link_width=1` encodes a single parent→child reference for
+pyramid metanode drill-down. Endpoint 0 lives at the **owning level**;
+endpoints 1..L-1 live at the **target level** (`owning_level +
+level_delta`).
 
 **`.zattrs` schema** (see
 [`zarr_vectors/core/arrays.py:write_cross_chunk_links`](../../../zarr_vectors/core/arrays.py)):
@@ -157,7 +159,8 @@ are local vertex indices within their respective chunks.
   "zv_array":    "cross_chunk_links",
   "num_links":   12,
   "sid_ndim":    3,
-  "level_delta": 1
+  "level_delta": 1,
+  "link_width":  2
 }
 ```
 
@@ -233,8 +236,9 @@ chunk → bucket into per-chunk `(M_local, link_width)` rows for
 [`_write_cross_level_edges`](../../../zarr_vectors/multiresolution/coarsen.py)
 during pyramid construction. For each adjacent (fine, coarse) pair,
 every fine vertex has exactly one trivial edge to its coarse parent
-metanode (the parent map is reconstructed from `metanode_children`).
-The edges are then partitioned via
+metanode (the parent map is recovered from the coarse level's own
+`cross_chunk_links/<delta=-1>/` records). The edges are then
+partitioned via
 [`partition_cross_level_edges`](../../../zarr_vectors/spatial/boundary.py):
 chunk-aligned edges (source chunk_key == target chunk_key when
 re-evaluated against the coarser grid) become rows in

diff --git a/docs/tutorials/io/cloud_stores.md b/docs/tutorials/io/cloud_stores.md
@@ -27,7 +27,7 @@ to `fsspec` for any URL scheme it can't handle.
 ## Backend resolution at a glance
 
 When you pass a cloud URL to any `read_*` / `write_*` / `open_store` /
-`open_zvr` call, the backend is chosen in this order:
+`open_zv` call, the backend is chosen in this order:
 
 1. **Explicit `backend=` kwarg** — e.g. `backend="fsspec"` forces fsspec
    even if obstore is installed.
@@ -262,9 +262,9 @@ a resolution level, writing new attributes).
 
 ```python
 import numpy as np
-from zarr_vectors.lazy import open_zvr
+from zarr_vectors.lazy import open_zv
 
-store = open_zvr("s3://open-neuro/scan.zarrvectors")
+store = open_zv("s3://open-neuro/scan.zarrvectors")
 
 print(store.levels)                          # metadata only — no chunk I/O
 print(store[2].vertex_count)                 # one metadata request
@@ -281,7 +281,7 @@ detail = read_points(
 )
 ```
 
-`open_zvr` accepts the same `backend=` / `**backend_kwargs` as
+`open_zv` accepts the same `backend=` / `**backend_kwargs` as
 `open_store`.
 
 ---

diff --git a/docs/tutorials/multiscale/lazy_loading.md b/docs/tutorials/multiscale/lazy_loading.md
@@ -4,7 +4,7 @@ The ZVF read functions (`read_points`, `read_polylines`, etc.) are eager:
 they fetch and return all requested data immediately. For large stores or
 remote datasets, an eager read of the full store is impractical.
 
-The **lazy API** provides a `open_zvr` object that opens the store
+The **lazy API** provides a `open_zv` object that opens the store
 metadata without reading any array data. Array slices are fetched on demand
 — only when accessed. This is the recommended access pattern for:
 
@@ -20,10 +20,10 @@ metadata without reading any array data. Array slices are fetched on demand
 ## Opening a store lazily
 
 ```python
-from zarr_vectors.lazy import open_zvr
+from zarr_vectors.lazy import open_zv
 
 # Opens metadata only — no vertex data fetched
-store = open_zvr("synchrotron.zarrvectors")
+store = open_zv("synchrotron.zarrvectors")
 
 print(store.geometry_type)           # "point_cloud"
 print(store.spatial_dims)            # 3
@@ -38,7 +38,7 @@ Opening a remote store is identical — pass an fsspec URL:
 
 ```python
 import s3fs
-from zarr_vectors.lazy import open_zvr
+from zarr_vectors.lazy import open_zv
 
 # 0.4+: backend layer auto-routes cloud URLs via obstore (or fsspec).
 # Public access works without explicit anon=True.
@@ -140,7 +140,7 @@ print(f"Mean intensity over {total_count} points: {mean_intensity:.4f}")
 
 ## Lazy array access
 
-The `open_zvr` exposes each array as a lazy `zarr.Array` that can
+The `open_zv` exposes each array as a lazy `zarr.Array` that can
 be sliced directly:
 
 ```python
@@ -178,18 +178,18 @@ print(f"{len(high_fa_ids)} high-FA streamlines")
 
 ```python
 import s3fs
-from zarr_vectors.lazy import open_zvr
+from zarr_vectors.lazy import open_zv
 
-store = open_zvr("s3://my-bucket/dataset/tracts.zarrvectors")
+store = open_zv("s3://my-bucket/dataset/tracts.zarrvectors")
 ```
 
 ### GCS
 
 ```python
 import gcsfs
-from zarr_vectors.lazy import open_zvr
+from zarr_vectors.lazy import open_zv
 
-store = open_zvr("gs://my-bucket/tracts.zarrvectors")
+store = open_zv("gs://my-bucket/tracts.zarrvectors")
 ```
 
 ### Performance on object stores
@@ -203,7 +203,7 @@ minimises requests by:
 3. Caching decompressed chunks in an LRU cache (configurable size).
 
 ```python
-store = open_zvr(
+store = open_zv(
     "s3://my-bucket/tracts.zarrvectors",
     cache_size=256,     # cache up to 256 decompressed chunks in memory
     n_workers=8,        # fetch up to 8 chunks in parallel
@@ -223,12 +223,12 @@ for chunk_coord, chunk_data in store.iter_chunks(level=1, prefetch=4):
 
 ---
 
-## open_zvr API summary
+## open_zv API summary
 
 ```python
-from zarr_vectors.lazy import open_zvr
+from zarr_vectors.lazy import open_zv
 
-store = open_zvr(path_or_store)
+store = open_zv(path_or_store)
 
 # Metadata (no data I/O)
 store.geometry_type           # str
@@ -258,7 +258,7 @@ store.__enter__() / store.__exit__()   # context manager
 ### Using as a context manager
 
 ```python
-with open_zvr("scan.zarrvectors") as store:
+with open_zv("scan.zarrvectors") as store:
     result = store.read(level=2)
 # Store is closed and cache is freed on exit
 ```
@@ -272,7 +272,7 @@ with open_zvr("scan.zarrvectors") as store:
 Load the coarsest level for a quick full-volume thumbnail:
 
 ```python
-store    = open_zvr("scan.zarrvectors")
+store    = open_zv("scan.zarrvectors")
 coarsest = store.levels[-1]
 result   = store.read(level=coarsest)
 # Use result["positions"] to render a low-density overview