From 3d6b9b12df421396d1a7d3d6e0296665c2cc0bdb Mon Sep 17 00:00:00 2001
From: Tony Burns <tony@tonyburns.net>
Date: Sun, 28 Dec 2025 16:10:55 -0500
Subject: [PATCH 1/7] chore: release v0.1.0a3

---
 pyproject.toml | 2 +-
 uv.lock        | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index d52857c..b8e3f85 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -4,7 +4,7 @@ build-backend = "uv_build"
 
 [project]
 name = "jsonlt-python"
-version = "0.1.0a2"
+version = "0.1.0a3"
 description = "Reference implementation of the JSONLT (JSON Lines Table) specification for Python."
 readme = "README.md"
 license = "MIT"
diff --git a/uv.lock b/uv.lock
index 495801f..d52b255 100644
--- a/uv.lock
+++ b/uv.lock
@@ -852,7 +852,7 @@ wheels = [
 
 [[package]]
 name = "jsonlt-python"
-version = "0.1.0a2"
+version = "0.1.0a3"
 source = { editable = "." }
 dependencies = [
     { name = "typing-extensions" },

From 73c36c9315459f1414eb0915daf4602cb5a5e6dc Mon Sep 17 00:00:00 2001
From: Tony Burns <tony@tonyburns.net>
Date: Sun, 28 Dec 2025 16:11:58 -0500
Subject: [PATCH 2/7] feat(benchmarks): improve measurement accuracy and add
 missing benchmarks

- Restructure compact benchmarks to pre-populate history/tombstones in setup
- Fix delete benchmark to use unique keys per iteration (no restore cycle)
- Add benchmarks for find_one, items, and reload methods
- Add EDGE_PARAMS (scale=0, scale=1) for boundary testing
- Add generator helpers: create_extended_test_table, create_table_with_history,
  create_table_with_tombstones
---
 tests/benchmarks/_generators.py       | 427 +++++++++++++++++
 tests/benchmarks/test_bench_memory.py | 246 ++++++++++
 tests/benchmarks/test_bench_table.py  | 661 ++++++++++++++++++++++++++
 3 files changed, 1334 insertions(+)
 create mode 100644 tests/benchmarks/_generators.py
 create mode 100644 tests/benchmarks/test_bench_memory.py
 create mode 100644 tests/benchmarks/test_bench_table.py

diff --git a/tests/benchmarks/_generators.py b/tests/benchmarks/_generators.py
new file mode 100644
index 0000000..bcadf2f
--- /dev/null
+++ b/tests/benchmarks/_generators.py
@@ -0,0 +1,427 @@
+"""Benchmark data generators for JSONLT benchmarks.
+
+This module provides deterministic data generation functions for benchmark
+tests. All generators use seeded random instances for reproducibility.
+"""
+
+import random
+from typing import TYPE_CHECKING, Literal
+
+from jsonlt import Table
+from jsonlt._header import Header, serialize_header
+from jsonlt._json import JSONObject, serialize_json
+
+if TYPE_CHECKING:
+    from pathlib import Path
+
+    from jsonlt._keys import Key, KeySpecifier
+
+
+def generate_key(
+    key_type: Literal["string", "integer", "tuple"],
+    index: int,
+) -> "Key":
+    """Generate a deterministic key for a given index.
+
+    Args:
+        key_type: The type of key to generate.
+        index: The index used to generate the key value.
+
+    Returns:
+        A key of the specified type.
+    """
+    if key_type == "string":
+        return f"key_{index:08d}"
+    if key_type == "integer":
+        return index
+    # tuple: distribute across 10 orgs
+    org_index = index % 10
+    return (f"org_{org_index}", index)
+
+
+def get_key_specifier(
+    key_type: Literal["string", "integer", "tuple"],
+) -> "KeySpecifier":
+    """Get the key specifier for a given key type.
+
+    Args:
+        key_type: The type of key.
+
+    Returns:
+        The corresponding key specifier.
+    """
+    if key_type == "tuple":
+        return ("org", "seq")
+    return "id"
+
+
+def _generate_small_record(
+    key_type: Literal["string", "integer", "tuple"],
+    index: int,
+    rng: random.Random,
+) -> JSONObject:
+    """Generate a small record (~5 fields).
+
+    Args:
+        key_type: The type of key to use.
+        index: The index for deterministic key generation.
+        rng: Random instance for deterministic value generation.
+
+    Returns:
+        A JSONObject with ~5 fields.
+    """
+    record: JSONObject = {}
+
+    # Add key field(s)
+    if key_type == "tuple":
+        org_index = index % 10
+        record["org"] = f"org_{org_index}"
+        record["seq"] = index
+    elif key_type == "integer":
+        record["id"] = index
+    else:
+        record["id"] = f"key_{index:08d}"
+
+    # Add additional fields
+    record["name"] = f"Record {index}"
+    record["active"] = rng.choice([True, False])
+    record["count"] = rng.randint(0, 10000)
+    record["score"] = round(rng.uniform(0.0, 100.0), 2)
+
+    return record
+
+
+def _generate_medium_record(
+    key_type: Literal["string", "integer", "tuple"],
+    index: int,
+    rng: random.Random,
+) -> JSONObject:
+    """Generate a medium record (~20 fields).
+
+    Args:
+        key_type: The type of key to use.
+        index: The index for deterministic key generation.
+        rng: Random instance for deterministic value generation.
+
+    Returns:
+        A JSONObject with ~20 fields.
+    """
+    record = _generate_small_record(key_type, index, rng)
+
+    # Add more fields to reach ~20 total
+    record["description"] = f"This is a detailed description for record {index}."
+    record["tags"] = [f"tag_{rng.randint(1, 100)}" for _ in range(5)]
+    record["address"] = {
+        "street": f"{rng.randint(1, 9999)} Main St",
+        "city": rng.choice(
+            ["New York", "Los Angeles", "Chicago", "Houston", "Phoenix"]
+        ),
+        "state": rng.choice(["NY", "CA", "IL", "TX", "AZ"]),
+        "zip": f"{rng.randint(10000, 99999)}",
+    }
+    record["created_at"] = f"2024-{rng.randint(1, 12):02d}-{rng.randint(1, 28):02d}"
+    record["updated_at"] = f"2024-{rng.randint(1, 12):02d}-{rng.randint(1, 28):02d}"
+    record["priority"] = rng.randint(1, 5)
+    record["category"] = rng.choice(["A", "B", "C", "D", "E"])
+    record["status"] = rng.choice(["pending", "active", "completed", "archived"])
+    record["version"] = rng.randint(1, 100)
+    record["weight"] = round(rng.uniform(0.1, 100.0), 3)
+    record["rating"] = round(rng.uniform(1.0, 5.0), 1)
+    record["views"] = rng.randint(0, 1000000)
+    record["likes"] = rng.randint(0, 100000)
+
+    return record
+
+
+def _generate_large_record(
+    key_type: Literal["string", "integer", "tuple"],
+    index: int,
+    rng: random.Random,
+) -> JSONObject:
+    """Generate a large record (~100 fields with 1KB+ text blobs).
+
+    Args:
+        key_type: The type of key to use.
+        index: The index for deterministic key generation.
+        rng: Random instance for deterministic value generation.
+
+    Returns:
+        A JSONObject with ~100 fields including large text blobs.
+    """
+    record = _generate_medium_record(key_type, index, rng)
+
+    # Generate large text blobs (1KB+ each)
+    words = [
+        "lorem",
+        "ipsum",
+        "dolor",
+        "sit",
+        "amet",
+        "consectetur",
+        "adipiscing",
+        "elit",
+        "sed",
+        "do",
+        "eiusmod",
+        "tempor",
+        "incididunt",
+        "ut",
+        "labore",
+        "et",
+        "dolore",
+        "magna",
+        "aliqua",
+        "enim",
+    ]
+
+    def generate_blob(min_chars: int) -> str:
+        result: list[str] = []
+        current_len = 0
+        while current_len < min_chars:
+            word = rng.choice(words)
+            result.append(word)
+            current_len += len(word) + 1
+        return " ".join(result)
+
+    # Add large text blobs (these make up most of the record size)
+    record["long_description"] = generate_blob(1024)
+    record["notes"] = generate_blob(1024)
+    record["content"] = generate_blob(2048)
+
+    # Add many additional fields to reach ~100 total
+    for i in range(80):
+        field_name = f"field_{i:02d}"
+        field_type = i % 5
+        if field_type == 0:
+            record[field_name] = f"value_{rng.randint(1, 10000)}"
+        elif field_type == 1:
+            record[field_name] = rng.randint(0, 1000000)
+        elif field_type == 2:
+            record[field_name] = round(rng.uniform(0.0, 1000.0), 4)
+        elif field_type == 3:
+            record[field_name] = rng.choice([True, False])
+        else:
+            record[field_name] = [rng.randint(1, 100) for _ in range(3)]
+
+    return record
+
+
+def generate_record(
+    key_type: Literal["string", "integer", "tuple"],
+    size: Literal["small", "medium", "large"],
+    index: int,
+    *,
+    seed: int = 42,
+) -> JSONObject:
+    """Generate a single deterministic record.
+
+    Args:
+        key_type: The type of key to use ("string", "integer", or "tuple").
+        size: The size of record to generate ("small", "medium", or "large").
+        index: The index for deterministic generation.
+        seed: Random seed for reproducibility.
+
+    Returns:
+        A JSONObject of the specified size and key type.
+    """
+    rng = random.Random(seed + index)  # noqa: S311
+
+    if size == "small":
+        return _generate_small_record(key_type, index, rng)
+    if size == "medium":
+        return _generate_medium_record(key_type, index, rng)
+    return _generate_large_record(key_type, index, rng)
+
+
+def generate_records(
+    key_type: Literal["string", "integer", "tuple"],
+    size: Literal["small", "medium", "large"],
+    count: int,
+    *,
+    seed: int = 42,
+) -> list[JSONObject]:
+    """Generate a list of deterministic records.
+
+    Args:
+        key_type: The type of key to use ("string", "integer", or "tuple").
+        size: The size of records to generate ("small", "medium", or "large").
+        count: Number of records to generate.
+        seed: Random seed for reproducibility.
+
+    Returns:
+        A list of JSONObjects of the specified size and key type.
+    """
+    return [generate_record(key_type, size, i, seed=seed) for i in range(count)]
+
+
+def write_table_file(
+    path: "Path",
+    records: list[JSONObject],
+    key_specifier: "KeySpecifier",
+) -> None:
+    """Write records to a JSONLT file.
+
+    This creates a new file with a header and all records serialized.
+
+    Args:
+        path: Path to write the file to.
+        records: List of records to write.
+        key_specifier: The key specifier for the table.
+    """
+    lines: list[str] = []
+
+    # Add header
+    header = Header(version=1, key=key_specifier)
+    lines.append(serialize_header(header))
+
+    # Add records
+    lines.extend(serialize_json(record) for record in records)
+
+    # Write to file
+    content = "\n".join(lines) + "\n"
+    _ = path.write_text(content, encoding="utf-8")
+
+
+def create_test_table(
+    tmp_path: "Path",
+    key_type: Literal["string", "integer", "tuple"],
+    record_size: Literal["small", "medium", "large"],
+    scale: int,
+    *,
+    auto_reload: bool = False,
+) -> Table:
+    """Create a test table with generated records.
+
+    Args:
+        tmp_path: pytest tmp_path fixture.
+        key_type: Type of keys to generate.
+        record_size: Size of records to generate.
+        scale: Number of records to generate.
+        auto_reload: Whether to enable auto-reload on the table.
+
+    Returns:
+        A Table instance with the generated records.
+    """
+    records = generate_records(key_type, record_size, scale)
+    key_spec = get_key_specifier(key_type)
+    file_path = tmp_path / "bench.jsonlt"
+    write_table_file(file_path, records, key_spec)
+    return Table(file_path, key=key_spec, auto_reload=auto_reload)
+
+
+def add_history_to_table(
+    table: Table,
+    key_type: Literal["string", "integer", "tuple"],
+    record_size: Literal["small", "medium", "large"],
+    count: int,
+) -> None:
+    """Add update history to a table.
+
+    Args:
+        table: The table to add history to.
+        key_type: Type of keys.
+        record_size: Size of records.
+        count: Number of updates to add.
+    """
+    for i in range(count):
+        updated_record = generate_record(key_type, record_size, i, seed=99)
+        table.put(updated_record)
+
+
+def create_extended_test_table(  # noqa: PLR0913
+    tmp_path: "Path",
+    key_type: Literal["string", "integer", "tuple"],
+    record_size: Literal["small", "medium", "large"],
+    base_scale: int,
+    extra_keys: int,
+    *,
+    auto_reload: bool = False,
+) -> Table:
+    """Create a test table with additional keys beyond base scale.
+
+    The table contains keys from index 0 to (base_scale + extra_keys - 1).
+    Use this for delete benchmarks where each iteration needs a unique key.
+
+    Args:
+        tmp_path: pytest tmp_path fixture.
+        key_type: Type of keys to generate.
+        record_size: Size of records to generate.
+        base_scale: Base number of records.
+        extra_keys: Additional keys for benchmark iterations.
+        auto_reload: Whether to enable auto-reload on the table.
+
+    Returns:
+        A Table instance with the generated records.
+    """
+    total_count = base_scale + extra_keys
+    records = generate_records(key_type, record_size, total_count)
+    key_spec = get_key_specifier(key_type)
+    file_path = tmp_path / "bench.jsonlt"
+    write_table_file(file_path, records, key_spec)
+    return Table(file_path, key=key_spec, auto_reload=auto_reload)
+
+
+def create_table_with_history(  # noqa: PLR0913
+    tmp_path: "Path",
+    key_type: Literal["string", "integer", "tuple"],
+    record_size: Literal["small", "medium", "large"],
+    scale: int,
+    history_count: int,
+    *,
+    auto_reload: bool = False,
+) -> Table:
+    """Create a table pre-populated with update history.
+
+    Creates a table with `scale` records, then applies `history_count`
+    updates to existing records. Useful for compact benchmarks.
+
+    Args:
+        tmp_path: pytest tmp_path fixture.
+        key_type: Type of keys to generate.
+        record_size: Size of records to generate.
+        scale: Number of records to generate.
+        history_count: Number of updates to apply.
+        auto_reload: Whether to enable auto-reload on the table.
+
+    Returns:
+        A Table instance with the generated records and history.
+    """
+    table = create_test_table(
+        tmp_path, key_type, record_size, scale, auto_reload=auto_reload
+    )
+    add_history_to_table(table, key_type, record_size, history_count)
+    return table
+
+
+def create_table_with_tombstones(  # noqa: PLR0913
+    tmp_path: "Path",
+    key_type: Literal["string", "integer", "tuple"],
+    record_size: Literal["small", "medium", "large"],
+    scale: int,
+    tombstone_count: int,
+    *,
+    auto_reload: bool = False,
+) -> Table:
+    """Create a table pre-populated with tombstones.
+
+    Creates a table with `scale` records, then deletes `tombstone_count`
+    records starting from index 0. Useful for compact benchmarks.
+
+    Args:
+        tmp_path: pytest tmp_path fixture.
+        key_type: Type of keys to generate.
+        record_size: Size of records to generate.
+        scale: Number of records to generate.
+        tombstone_count: Number of records to delete.
+        auto_reload: Whether to enable auto-reload on the table.
+
+    Returns:
+        A Table instance with tombstones.
+    """
+    table = create_test_table(
+        tmp_path, key_type, record_size, scale, auto_reload=auto_reload
+    )
+    for i in range(tombstone_count):
+        key = generate_key(key_type, i)
+        _ = table.delete(key)
+    return table
diff --git a/tests/benchmarks/test_bench_memory.py b/tests/benchmarks/test_bench_memory.py
new file mode 100644
index 0000000..034f2e0
--- /dev/null
+++ b/tests/benchmarks/test_bench_memory.py
@@ -0,0 +1,246 @@
+"""Memory profiling benchmarks for JSONLT.
+
+This module contains memory usage benchmarks using pytest-memray to ensure
+memory consumption stays within expected bounds.
+"""
+
+import sys
+from typing import TYPE_CHECKING
+
+import pytest
+
+from jsonlt import Table
+
+from ._generators import generate_records, get_key_specifier, write_table_file
+
+if TYPE_CHECKING:
+    from pathlib import Path
+
+    from jsonlt._json import JSONObject
+
+# Skip entire module on Windows (memray not available)
+pytestmark = pytest.mark.skipif(
+    sys.platform == "win32",
+    reason="pytest-memray not available on Windows",
+)
+
+
+class TestMemoryLoad:
+    """Memory benchmarks for loading tables."""
+
+    @pytest.mark.limit_memory("10 MB")
+    def test_load_1k_small_records(self, tmp_path: "Path") -> None:
+        records = generate_records("string", "small", 1000)
+        key_spec = get_key_specifier("string")
+        file_path = tmp_path / "bench.jsonlt"
+        write_table_file(file_path, records, key_spec)
+
+        _ = Table(file_path, key=key_spec, auto_reload=False)
+
+    @pytest.mark.limit_memory("50 MB")
+    @pytest.mark.slow
+    def test_load_10k_small_records(self, tmp_path: "Path") -> None:
+        records = generate_records("string", "small", 10000)
+        key_spec = get_key_specifier("string")
+        file_path = tmp_path / "bench.jsonlt"
+        write_table_file(file_path, records, key_spec)
+
+        _ = Table(file_path, key=key_spec, auto_reload=False)
+
+    @pytest.mark.limit_memory("500 MB")
+    @pytest.mark.slow
+    def test_load_100k_small_records(self, tmp_path: "Path") -> None:
+        records = generate_records("string", "small", 100000)
+        key_spec = get_key_specifier("string")
+        file_path = tmp_path / "bench.jsonlt"
+        write_table_file(file_path, records, key_spec)
+
+        _ = Table(file_path, key=key_spec, auto_reload=False)
+
+    @pytest.mark.limit_memory("20 MB")
+    def test_load_1k_medium_records(self, tmp_path: "Path") -> None:
+        records = generate_records("string", "medium", 1000)
+        key_spec = get_key_specifier("string")
+        file_path = tmp_path / "bench.jsonlt"
+        write_table_file(file_path, records, key_spec)
+
+        _ = Table(file_path, key=key_spec, auto_reload=False)
+
+    @pytest.mark.limit_memory("100 MB")
+    @pytest.mark.slow
+    def test_load_10k_medium_records(self, tmp_path: "Path") -> None:
+        records = generate_records("string", "medium", 10000)
+        key_spec = get_key_specifier("string")
+        file_path = tmp_path / "bench.jsonlt"
+        write_table_file(file_path, records, key_spec)
+
+        _ = Table(file_path, key=key_spec, auto_reload=False)
+
+    @pytest.mark.limit_memory("100 MB")
+    @pytest.mark.slow
+    def test_load_1k_large_records(self, tmp_path: "Path") -> None:
+        records = generate_records("string", "large", 1000)
+        key_spec = get_key_specifier("string")
+        file_path = tmp_path / "bench.jsonlt"
+        write_table_file(file_path, records, key_spec)
+
+        _ = Table(file_path, key=key_spec, auto_reload=False)
+
+
+class TestMemoryLoadKeyTypes:
+    """Memory benchmarks for loading with different key types."""
+
+    @pytest.mark.limit_memory("10 MB")
+    def test_load_1k_integer_keys(self, tmp_path: "Path") -> None:
+        records = generate_records("integer", "small", 1000)
+        key_spec = get_key_specifier("integer")
+        file_path = tmp_path / "bench.jsonlt"
+        write_table_file(file_path, records, key_spec)
+
+        _ = Table(file_path, key=key_spec, auto_reload=False)
+
+    @pytest.mark.limit_memory("10 MB")
+    def test_load_1k_tuple_keys(self, tmp_path: "Path") -> None:
+        records = generate_records("tuple", "small", 1000)
+        key_spec = get_key_specifier("tuple")
+        file_path = tmp_path / "bench.jsonlt"
+        write_table_file(file_path, records, key_spec)
+
+        _ = Table(file_path, key=key_spec, auto_reload=False)
+
+    @pytest.mark.limit_memory("50 MB")
+    @pytest.mark.slow
+    def test_load_10k_integer_keys(self, tmp_path: "Path") -> None:
+        records = generate_records("integer", "small", 10000)
+        key_spec = get_key_specifier("integer")
+        file_path = tmp_path / "bench.jsonlt"
+        write_table_file(file_path, records, key_spec)
+
+        _ = Table(file_path, key=key_spec, auto_reload=False)
+
+    @pytest.mark.limit_memory("50 MB")
+    @pytest.mark.slow
+    def test_load_10k_tuple_keys(self, tmp_path: "Path") -> None:
+        records = generate_records("tuple", "small", 10000)
+        key_spec = get_key_specifier("tuple")
+        file_path = tmp_path / "bench.jsonlt"
+        write_table_file(file_path, records, key_spec)
+
+        _ = Table(file_path, key=key_spec, auto_reload=False)
+
+
+class TestMemoryRead:
+    """Memory benchmarks for read operations."""
+
+    @pytest.mark.limit_memory("15 MB")
+    def test_all_1k_records(self, tmp_path: "Path") -> None:
+        records = generate_records("string", "small", 1000)
+        key_spec = get_key_specifier("string")
+        file_path = tmp_path / "bench.jsonlt"
+        write_table_file(file_path, records, key_spec)
+
+        table = Table(file_path, key=key_spec, auto_reload=False)
+        _ = table.all()
+
+    @pytest.mark.limit_memory("75 MB")
+    @pytest.mark.slow
+    def test_all_10k_records(self, tmp_path: "Path") -> None:
+        records = generate_records("string", "small", 10000)
+        key_spec = get_key_specifier("string")
+        file_path = tmp_path / "bench.jsonlt"
+        write_table_file(file_path, records, key_spec)
+
+        table = Table(file_path, key=key_spec, auto_reload=False)
+        _ = table.all()
+
+    @pytest.mark.limit_memory("15 MB")
+    def test_find_1k_records(self, tmp_path: "Path") -> None:
+        records = generate_records("string", "small", 1000)
+        key_spec = get_key_specifier("string")
+        file_path = tmp_path / "bench.jsonlt"
+        write_table_file(file_path, records, key_spec)
+
+        table = Table(file_path, key=key_spec, auto_reload=False)
+        _ = table.find(lambda r: r.get("active") is True)
+
+    @pytest.mark.limit_memory("15 MB")
+    def test_keys_1k_records(self, tmp_path: "Path") -> None:
+        records = generate_records("string", "small", 1000)
+        key_spec = get_key_specifier("string")
+        file_path = tmp_path / "bench.jsonlt"
+        write_table_file(file_path, records, key_spec)
+
+        table = Table(file_path, key=key_spec, auto_reload=False)
+        _ = table.keys()
+
+
+class TestMemoryWrite:
+    """Memory benchmarks for write operations."""
+
+    @pytest.mark.limit_memory("15 MB")
+    def test_put_to_1k_table(self, tmp_path: "Path") -> None:
+        records = generate_records("string", "small", 1000)
+        key_spec = get_key_specifier("string")
+        file_path = tmp_path / "bench.jsonlt"
+        write_table_file(file_path, records, key_spec)
+
+        table = Table(file_path, key=key_spec, auto_reload=False)
+        new_record: JSONObject = {
+            "id": "new_key",
+            "name": "New Record",
+            "active": True,
+            "count": 1,
+            "score": 1.0,
+        }
+        table.put(new_record)
+
+    @pytest.mark.limit_memory("15 MB")
+    def test_delete_from_1k_table(self, tmp_path: "Path") -> None:
+        records = generate_records("string", "small", 1000)
+        key_spec = get_key_specifier("string")
+        file_path = tmp_path / "bench.jsonlt"
+        write_table_file(file_path, records, key_spec)
+
+        table = Table(file_path, key=key_spec, auto_reload=False)
+        _ = table.delete("key_00000000")
+
+    @pytest.mark.limit_memory("20 MB")
+    def test_compact_1k_table(self, tmp_path: "Path") -> None:
+        records = generate_records("string", "small", 1000)
+        key_spec = get_key_specifier("string")
+        file_path = tmp_path / "bench.jsonlt"
+        write_table_file(file_path, records, key_spec)
+
+        table = Table(file_path, key=key_spec, auto_reload=False)
+        # Add some updates to create history
+        for i in range(100):
+            updated: JSONObject = {
+                "id": f"key_{i:08d}",
+                "name": f"Updated {i}",
+                "active": True,
+                "count": i,
+                "score": float(i),
+            }
+            table.put(updated)
+        table.compact()
+
+    @pytest.mark.limit_memory("100 MB")
+    @pytest.mark.slow
+    def test_compact_10k_table(self, tmp_path: "Path") -> None:
+        records = generate_records("string", "small", 10000)
+        key_spec = get_key_specifier("string")
+        file_path = tmp_path / "bench.jsonlt"
+        write_table_file(file_path, records, key_spec)
+
+        table = Table(file_path, key=key_spec, auto_reload=False)
+        # Add some updates to create history
+        for i in range(1000):
+            updated: JSONObject = {
+                "id": f"key_{i:08d}",
+                "name": f"Updated {i}",
+                "active": True,
+                "count": i,
+                "score": float(i),
+            }
+            table.put(updated)
+        table.compact()
diff --git a/tests/benchmarks/test_bench_table.py b/tests/benchmarks/test_bench_table.py
new file mode 100644
index 0000000..b649aec
--- /dev/null
+++ b/tests/benchmarks/test_bench_table.py
@@ -0,0 +1,661 @@
+"""Benchmarks for Table operations.
+
+This module contains performance benchmarks for all Table operations,
+organized by operation type and parametrized by record size, key type,
+and scale.
+"""
+
+from typing import TYPE_CHECKING, Literal
+
+import pytest
+
+from jsonlt import Table
+
+from ._generators import (
+    create_extended_test_table,
+    create_table_with_history,
+    create_table_with_tombstones,
+    create_test_table,
+    generate_key,
+    generate_record,
+    generate_records,
+    get_key_specifier,
+    write_table_file,
+)
+
+if TYPE_CHECKING:
+    from pathlib import Path
+
+    from pytest_codspeed.plugin import BenchmarkFixture
+
+    from jsonlt._json import JSONObject
+
+
+# Type aliases for parametrization
+RecordSize = Literal["small", "medium", "large"]
+KeyType = Literal["string", "integer", "tuple"]
+
+# Scale and size parameters for CI (fast benchmarks)
+CI_PARAMS: list[object] = [
+    pytest.param("small", "string", 100, id="small-str-100"),
+    pytest.param("small", "string", 1000, id="small-str-1k"),
+    pytest.param("small", "integer", 100, id="small-int-100"),
+    pytest.param("small", "integer", 1000, id="small-int-1k"),
+    pytest.param("small", "tuple", 100, id="small-tuple-100"),
+    pytest.param("small", "tuple", 1000, id="small-tuple-1k"),
+]
+
+# Larger scale parameters (marked slow)
+SLOW_PARAMS: list[object] = [
+    pytest.param("small", "string", 10000, id="small-str-10k", marks=pytest.mark.slow),
+    pytest.param(
+        "small", "string", 100000, id="small-str-100k", marks=pytest.mark.slow
+    ),
+    pytest.param("medium", "string", 1000, id="med-str-1k", marks=pytest.mark.slow),
+    pytest.param("medium", "string", 10000, id="med-str-10k", marks=pytest.mark.slow),
+    pytest.param("large", "string", 1000, id="large-str-1k", marks=pytest.mark.slow),
+    pytest.param("small", "integer", 10000, id="small-int-10k", marks=pytest.mark.slow),
+    pytest.param(
+        "small", "integer", 100000, id="small-int-100k", marks=pytest.mark.slow
+    ),
+    pytest.param("small", "tuple", 10000, id="small-tuple-10k", marks=pytest.mark.slow),
+    pytest.param(
+        "small", "tuple", 100000, id="small-tuple-100k", marks=pytest.mark.slow
+    ),
+    pytest.param("medium", "integer", 1000, id="med-int-1k", marks=pytest.mark.slow),
+    pytest.param("medium", "tuple", 1000, id="med-tuple-1k", marks=pytest.mark.slow),
+    pytest.param("large", "integer", 1000, id="large-int-1k", marks=pytest.mark.slow),
+    pytest.param("large", "tuple", 1000, id="large-tuple-1k", marks=pytest.mark.slow),
+]
+
+# Edge case parameters for boundary testing
+EDGE_PARAMS: list[object] = [
+    pytest.param("small", "string", 0, id="small-str-0"),
+    pytest.param("small", "string", 1, id="small-str-1"),
+    pytest.param("small", "integer", 0, id="small-int-0"),
+    pytest.param("small", "integer", 1, id="small-int-1"),
+    pytest.param("small", "tuple", 0, id="small-tuple-0"),
+    pytest.param("small", "tuple", 1, id="small-tuple-1"),
+]
+
+ALL_PARAMS: list[object] = CI_PARAMS + SLOW_PARAMS
+ALL_WITH_EDGE_PARAMS: list[object] = ALL_PARAMS + EDGE_PARAMS
+
+# Buffer size for delete benchmarks to ensure unique keys per iteration
+DELETE_ITERATION_BUFFER: int = 10000
+
+
+class TestBenchLoad:
+    """Benchmarks for Table constructor loading pre-existing files."""
+
+    @pytest.mark.parametrize(("record_size", "key_type", "scale"), ALL_PARAMS)
+    def test_load(
+        self,
+        benchmark: "BenchmarkFixture",
+        tmp_path: "Path",
+        record_size: RecordSize,
+        key_type: KeyType,
+        scale: int,
+    ) -> None:
+        records = generate_records(key_type, record_size, scale)
+        key_spec = get_key_specifier(key_type)
+        file_path = tmp_path / "bench.jsonlt"
+        write_table_file(file_path, records, key_spec)
+
+        def load_table() -> None:
+            _ = Table(file_path, key=key_spec, auto_reload=False)
+
+        benchmark(load_table)
+
+
+class TestBenchReload:
+    """Benchmarks for table.reload() operations."""
+
+    @pytest.mark.parametrize(("record_size", "key_type", "scale"), ALL_PARAMS)
+    def test_reload(
+        self,
+        benchmark: "BenchmarkFixture",
+        tmp_path: "Path",
+        record_size: RecordSize,
+        key_type: KeyType,
+        scale: int,
+    ) -> None:
+        """Force reload table from disk."""
+        table = create_test_table(tmp_path, key_type, record_size, scale)
+
+        def reload_table() -> None:
+            table.reload()
+
+        benchmark(reload_table)
+
+
+class TestBenchGet:
+    """Benchmarks for table.get() operations."""
+
+    @pytest.mark.parametrize(("record_size", "key_type", "scale"), ALL_PARAMS)
+    def test_get_existing_key(
+        self,
+        benchmark: "BenchmarkFixture",
+        tmp_path: "Path",
+        record_size: RecordSize,
+        key_type: KeyType,
+        scale: int,
+    ) -> None:
+        table = create_test_table(tmp_path, key_type, record_size, scale)
+
+        # Get key from middle of dataset
+        middle_index = scale // 2
+        target_key = generate_key(key_type, middle_index)
+
+        def get_record() -> None:
+            _ = table.get(target_key)
+
+        benchmark(get_record)
+
+    @pytest.mark.parametrize(("record_size", "key_type", "scale"), CI_PARAMS)
+    def test_get_nonexistent_key(
+        self,
+        benchmark: "BenchmarkFixture",
+        tmp_path: "Path",
+        record_size: RecordSize,
+        key_type: KeyType,
+        scale: int,
+    ) -> None:
+        table = create_test_table(tmp_path, key_type, record_size, scale)
+
+        # Generate a key that doesn't exist
+        nonexistent_key = generate_key(key_type, scale + 1000)
+
+        def get_missing() -> None:
+            _ = table.get(nonexistent_key)
+
+        benchmark(get_missing)
+
+
+class TestBenchAll:
+    """Benchmarks for table.all() returning sorted records."""
+
+    @pytest.mark.parametrize(("record_size", "key_type", "scale"), ALL_WITH_EDGE_PARAMS)
+    def test_all(
+        self,
+        benchmark: "BenchmarkFixture",
+        tmp_path: "Path",
+        record_size: RecordSize,
+        key_type: KeyType,
+        scale: int,
+    ) -> None:
+        table = create_test_table(tmp_path, key_type, record_size, scale)
+
+        def get_all() -> None:
+            # Invalidate cache to measure full sort
+            table._cached_sorted_keys = None  # noqa: SLF001  # pyright: ignore[reportPrivateUsage]
+            _ = table.all()
+
+        benchmark(get_all)
+
+
+class TestBenchFind:
+    """Benchmarks for table.find() with various selectivity."""
+
+    @pytest.mark.parametrize(("record_size", "key_type", "scale"), ALL_PARAMS)
+    def test_find_high_selectivity(
+        self,
+        benchmark: "BenchmarkFixture",
+        tmp_path: "Path",
+        record_size: RecordSize,
+        key_type: KeyType,
+        scale: int,
+    ) -> None:
+        table = create_test_table(tmp_path, key_type, record_size, scale)
+
+        # High selectivity: match ~10% of records (count > 9000)
+        def predicate_high_count(r: "JSONObject") -> bool:
+            count = r.get("count", 0)
+            return isinstance(count, int) and count > 9000
+
+        def find_high_count() -> None:
+            _ = table.find(predicate_high_count)
+
+        benchmark(find_high_count)
+
+    @pytest.mark.parametrize(("record_size", "key_type", "scale"), CI_PARAMS)
+    def test_find_low_selectivity(
+        self,
+        benchmark: "BenchmarkFixture",
+        tmp_path: "Path",
+        record_size: RecordSize,
+        key_type: KeyType,
+        scale: int,
+    ) -> None:
+        table = create_test_table(tmp_path, key_type, record_size, scale)
+
+        # Low selectivity: match ~90% of records (count < 9000)
+        def predicate_low_count(r: "JSONObject") -> bool:
+            count = r.get("count", 0)
+            return isinstance(count, int) and count < 9000
+
+        def find_low_count() -> None:
+            _ = table.find(predicate_low_count)
+
+        benchmark(find_low_count)
+
+    @pytest.mark.parametrize(("record_size", "key_type", "scale"), CI_PARAMS)
+    def test_find_very_high_selectivity(
+        self,
+        benchmark: "BenchmarkFixture",
+        tmp_path: "Path",
+        record_size: RecordSize,
+        key_type: KeyType,
+        scale: int,
+    ) -> None:
+        """Find with ~1% selectivity (matches few records)."""
+        table = create_test_table(tmp_path, key_type, record_size, scale)
+
+        # Very high selectivity: match ~1% of records (count > 9900)
+        def predicate_very_selective(r: "JSONObject") -> bool:
+            count = r.get("count", 0)
+            return isinstance(count, int) and count > 9900
+
+        def find_very_selective() -> None:
+            _ = table.find(predicate_very_selective)
+
+        benchmark(find_very_selective)
+
+    @pytest.mark.parametrize(("record_size", "key_type", "scale"), CI_PARAMS)
+    def test_find_all_records(
+        self,
+        benchmark: "BenchmarkFixture",
+        tmp_path: "Path",
+        record_size: RecordSize,
+        key_type: KeyType,
+        scale: int,
+    ) -> None:
+        """Find with 100% selectivity (matches all records)."""
+        table = create_test_table(tmp_path, key_type, record_size, scale)
+
+        def predicate_all(_r: "JSONObject") -> bool:
+            return True
+
+        def find_all() -> None:
+            _ = table.find(predicate_all)
+
+        benchmark(find_all)
+
+    @pytest.mark.parametrize(("record_size", "key_type", "scale"), CI_PARAMS)
+    def test_find_with_limit(
+        self,
+        benchmark: "BenchmarkFixture",
+        tmp_path: "Path",
+        record_size: RecordSize,
+        key_type: KeyType,
+        scale: int,
+    ) -> None:
+        table = create_test_table(tmp_path, key_type, record_size, scale)
+
+        # Find with limit - should short-circuit early
+        def predicate_active(r: "JSONObject") -> bool:
+            return r.get("active") is True
+
+        def find_limited() -> None:
+            _ = table.find(predicate_active, limit=10)
+
+        benchmark(find_limited)
+
+
+class TestBenchFindOne:
+    """Benchmarks for table.find_one() operations."""
+
+    @pytest.mark.parametrize(("record_size", "key_type", "scale"), CI_PARAMS)
+    def test_find_one_match_early(
+        self,
+        benchmark: "BenchmarkFixture",
+        tmp_path: "Path",
+        record_size: RecordSize,
+        key_type: KeyType,
+        scale: int,
+    ) -> None:
+        """Find first record matching predicate (best case)."""
+        table = create_test_table(tmp_path, key_type, record_size, scale)
+
+        def predicate_any(_r: "JSONObject") -> bool:
+            return True
+
+        def find_first() -> None:
+            _ = table.find_one(predicate_any)
+
+        benchmark(find_first)
+
+    @pytest.mark.parametrize(("record_size", "key_type", "scale"), CI_PARAMS)
+    def test_find_one_match_late(
+        self,
+        benchmark: "BenchmarkFixture",
+        tmp_path: "Path",
+        record_size: RecordSize,
+        key_type: KeyType,
+        scale: int,
+    ) -> None:
+        """Find record with predicate matching late in dataset."""
+        table = create_test_table(tmp_path, key_type, record_size, scale)
+
+        # Match only high count values (~1% of records)
+        def predicate_high_count(r: "JSONObject") -> bool:
+            count = r.get("count", 0)
+            return isinstance(count, int) and count > 9900
+
+        def find_late() -> None:
+            _ = table.find_one(predicate_high_count)
+
+        benchmark(find_late)
+
+    @pytest.mark.parametrize(("record_size", "key_type", "scale"), CI_PARAMS)
+    def test_find_one_no_match(
+        self,
+        benchmark: "BenchmarkFixture",
+        tmp_path: "Path",
+        record_size: RecordSize,
+        key_type: KeyType,
+        scale: int,
+    ) -> None:
+        """Find with predicate that matches nothing (full scan)."""
+        table = create_test_table(tmp_path, key_type, record_size, scale)
+
+        def predicate_never(_r: "JSONObject") -> bool:
+            return False
+
+        def find_none() -> None:
+            _ = table.find_one(predicate_never)
+
+        benchmark(find_none)
+
+
+class TestBenchPut:
+    """Benchmarks for table.put() operations."""
+
+    @pytest.mark.parametrize(("record_size", "key_type", "scale"), CI_PARAMS)
+    def test_put_new_record(
+        self,
+        benchmark: "BenchmarkFixture",
+        tmp_path: "Path",
+        record_size: RecordSize,
+        key_type: KeyType,
+        scale: int,
+    ) -> None:
+        table = create_test_table(tmp_path, key_type, record_size, scale)
+
+        # Generate new records to put (beyond existing range)
+        new_record_index = scale + 1
+        counter = [new_record_index]
+
+        def put_new() -> None:
+            new_record = generate_record(key_type, record_size, counter[0], seed=42)
+            table.put(new_record)
+            counter[0] += 1
+
+        benchmark(put_new)
+
+    @pytest.mark.parametrize(("record_size", "key_type", "scale"), CI_PARAMS)
+    def test_put_update_record(
+        self,
+        benchmark: "BenchmarkFixture",
+        tmp_path: "Path",
+        record_size: RecordSize,
+        key_type: KeyType,
+        scale: int,
+    ) -> None:
+        table = create_test_table(tmp_path, key_type, record_size, scale)
+
+        # Update existing records (cycling through them)
+        counter = [0]
+
+        def put_update() -> None:
+            index = counter[0] % scale
+            updated_record = generate_record(key_type, record_size, index, seed=99)
+            table.put(updated_record)
+            counter[0] += 1
+
+        benchmark(put_update)
+
+
+class TestBenchBatchWrite:
+    """Benchmarks for batched write operations."""
+
+    @pytest.mark.parametrize(("record_size", "key_type", "scale"), CI_PARAMS)
+    def test_batch_put_10(
+        self,
+        benchmark: "BenchmarkFixture",
+        tmp_path: "Path",
+        record_size: RecordSize,
+        key_type: KeyType,
+        scale: int,
+    ) -> None:
+        table = create_test_table(tmp_path, key_type, record_size, scale)
+
+        batch_size = 10
+        counter = [scale + 1]
+
+        def batch_put() -> None:
+            start = counter[0]
+            for i in range(batch_size):
+                new_record = generate_record(key_type, record_size, start + i, seed=42)
+                table.put(new_record)
+            counter[0] += batch_size
+
+        benchmark(batch_put)
+
+    @pytest.mark.parametrize(
+        ("record_size", "key_type", "scale"),
+        [
+            pytest.param("small", "string", 100, id="small-str-100"),
+            pytest.param("small", "integer", 100, id="small-int-100"),
+        ],
+    )
+    def test_batch_put_100(
+        self,
+        benchmark: "BenchmarkFixture",
+        tmp_path: "Path",
+        record_size: RecordSize,
+        key_type: KeyType,
+        scale: int,
+    ) -> None:
+        table = create_test_table(tmp_path, key_type, record_size, scale)
+
+        batch_size = 100
+        counter = [scale + 1]
+
+        def batch_put() -> None:
+            start = counter[0]
+            for i in range(batch_size):
+                new_record = generate_record(key_type, record_size, start + i, seed=42)
+                table.put(new_record)
+            counter[0] += batch_size
+
+        benchmark(batch_put)
+
+
+class TestBenchCompact:
+    """Benchmarks for table.compact() operations.
+
+    These benchmarks measure pure compact() performance by pre-populating
+    tables with history or tombstones during setup.
+
+    Note: The benchmark fixture runs compact() multiple times. After the first
+    iteration, the table is already compacted, so subsequent iterations measure
+    the fast path (compacting a clean table). The reported time is amortized
+    across all iterations, with the first iteration doing the meaningful work.
+    """
+
+    @pytest.mark.parametrize(("record_size", "key_type", "scale"), CI_PARAMS)
+    def test_compact_with_history(
+        self,
+        benchmark: "BenchmarkFixture",
+        tmp_path: "Path",
+        record_size: RecordSize,
+        key_type: KeyType,
+        scale: int,
+    ) -> None:
+        """Compact a table with update history (superseded records)."""
+        history_count = max(scale // 10, 1)
+        table = create_table_with_history(
+            tmp_path, key_type, record_size, scale, history_count
+        )
+
+        def compact_only() -> None:
+            table.compact()
+
+        benchmark(compact_only)
+
+    @pytest.mark.parametrize(("record_size", "key_type", "scale"), CI_PARAMS)
+    def test_compact_with_tombstones(
+        self,
+        benchmark: "BenchmarkFixture",
+        tmp_path: "Path",
+        record_size: RecordSize,
+        key_type: KeyType,
+        scale: int,
+    ) -> None:
+        """Compact a table with tombstones (deleted records)."""
+        tombstone_count = max(scale // 10, 1)
+        table = create_table_with_tombstones(
+            tmp_path, key_type, record_size, scale, tombstone_count
+        )
+
+        def compact_only() -> None:
+            table.compact()
+
+        benchmark(compact_only)
+
+
+class TestBenchKeys:
+    """Benchmarks for table.keys() operations."""
+
+    @pytest.mark.parametrize(("record_size", "key_type", "scale"), ALL_WITH_EDGE_PARAMS)
+    def test_keys(
+        self,
+        benchmark: "BenchmarkFixture",
+        tmp_path: "Path",
+        record_size: RecordSize,
+        key_type: KeyType,
+        scale: int,
+    ) -> None:
+        table = create_test_table(tmp_path, key_type, record_size, scale)
+
+        def get_keys() -> None:
+            # Invalidate cache to measure full sort
+            table._cached_sorted_keys = None  # noqa: SLF001  # pyright: ignore[reportPrivateUsage]
+            _ = table.keys()
+
+        benchmark(get_keys)
+
+
+class TestBenchItems:
+    """Benchmarks for table.items() operations."""
+
+    @pytest.mark.parametrize(("record_size", "key_type", "scale"), ALL_WITH_EDGE_PARAMS)
+    def test_items(
+        self,
+        benchmark: "BenchmarkFixture",
+        tmp_path: "Path",
+        record_size: RecordSize,
+        key_type: KeyType,
+        scale: int,
+    ) -> None:
+        """Get all key-value pairs."""
+        table = create_test_table(tmp_path, key_type, record_size, scale)
+
+        def get_items() -> None:
+            # Invalidate cache to measure full sort
+            table._cached_sorted_keys = None  # noqa: SLF001  # pyright: ignore[reportPrivateUsage]
+            _ = table.items()
+
+        benchmark(get_items)
+
+
+class TestBenchCount:
+    """Benchmarks for table.count() operations."""
+
+    @pytest.mark.parametrize(("record_size", "key_type", "scale"), ALL_WITH_EDGE_PARAMS)
+    def test_count(
+        self,
+        benchmark: "BenchmarkFixture",
+        tmp_path: "Path",
+        record_size: RecordSize,
+        key_type: KeyType,
+        scale: int,
+    ) -> None:
+        table = create_test_table(tmp_path, key_type, record_size, scale)
+
+        def count() -> None:
+            _ = table.count()
+
+        benchmark(count)
+
+
+class TestBenchHas:
+    """Benchmarks for table.has() operations."""
+
+    @pytest.mark.parametrize(("record_size", "key_type", "scale"), ALL_WITH_EDGE_PARAMS)
+    def test_has_existing(
+        self,
+        benchmark: "BenchmarkFixture",
+        tmp_path: "Path",
+        record_size: RecordSize,
+        key_type: KeyType,
+        scale: int,
+    ) -> None:
+        if scale == 0:
+            pytest.skip("Cannot test has_existing with scale=0 (no records)")
+        table = create_test_table(tmp_path, key_type, record_size, scale)
+
+        middle_key = generate_key(key_type, scale // 2)
+
+        def has_key() -> None:
+            _ = table.has(middle_key)
+
+        benchmark(has_key)
+
+    @pytest.mark.parametrize(("record_size", "key_type", "scale"), CI_PARAMS)
+    def test_has_nonexistent(
+        self,
+        benchmark: "BenchmarkFixture",
+        tmp_path: "Path",
+        record_size: RecordSize,
+        key_type: KeyType,
+        scale: int,
+    ) -> None:
+        table = create_test_table(tmp_path, key_type, record_size, scale)
+
+        missing_key = generate_key(key_type, scale + 1000)
+
+        def has_missing() -> None:
+            _ = table.has(missing_key)
+
+        benchmark(has_missing)
+
+
+class TestBenchDelete:
+    """Benchmarks for table.delete() operations."""
+
+    @pytest.mark.parametrize(("record_size", "key_type", "scale"), CI_PARAMS)
+    def test_delete_existing(
+        self,
+        benchmark: "BenchmarkFixture",
+        tmp_path: "Path",
+        record_size: RecordSize,
+        key_type: KeyType,
+        scale: int,
+    ) -> None:
+        """Delete existing records using unique keys per iteration."""
+        # Create table with extra keys for benchmark iterations
+        table = create_extended_test_table(
+            tmp_path, key_type, record_size, scale, DELETE_ITERATION_BUFFER
+        )
+
+        # Counter starts at base scale (first extra key)
+        counter = [scale]
+
+        def delete_unique() -> None:
+            key = generate_key(key_type, counter[0])
+            _ = table.delete(key)
+            counter[0] += 1
+
+        benchmark(delete_unique)

From 30ba69ec2667df400450b09d4cca52fc19f9b6dc Mon Sep 17 00:00:00 2001
From: Tony Burns <tony@tonyburns.net>
Date: Sun, 28 Dec 2025 16:24:06 -0500
Subject: [PATCH 3/7] fix: register limit_memory pytest marker for memray

---
 pyproject.toml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/pyproject.toml b/pyproject.toml
index b8e3f85..a563e89 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -99,6 +99,7 @@ markers = [
   "conformance: mark a test as a conformance test.",
   "example: mark a test as a documentation example test.",
   "fuzz: mark a test as a fuzz test.",
+  "limit_memory: mark a test with memory limit (pytest-memray marker).",
   "slow: mark a test as slow (excluded by default, run with -m slow).",
   "integration: mark a test as an integration test.",
   "property: mark a test as a property test.",

From 68580f7b39ef6f3fdbf0a9517897091193e2e1e9 Mon Sep 17 00:00:00 2001
From: Tony Burns <tony@tonyburns.net>
Date: Sun, 28 Dec 2025 16:26:13 -0500
Subject: [PATCH 4/7] perf(ci): shard benchmarks into 4 parallel jobs for
 faster CI

---
 .github/workflows/benchmark.yml | 18 +++++++++++++++---
 1 file changed, 15 insertions(+), 3 deletions(-)

diff --git a/.github/workflows/benchmark.yml b/.github/workflows/benchmark.yml
index d3d8a03..f8e834d 100644
--- a/.github/workflows/benchmark.yml
+++ b/.github/workflows/benchmark.yml
@@ -29,12 +29,24 @@ permissions:
 
 jobs:
   benchmark:
-    name: "Benchmark"
+    name: "Benchmark (${{ matrix.shard }})"
     runs-on: ubuntu-latest
-    timeout-minutes: 15
+    timeout-minutes: 5
     permissions:
       contents: read
       id-token: write
+    strategy:
+      fail-fast: false
+      matrix:
+        include:
+          - shard: "load-reload-get"
+            filter: "TestBenchLoad or TestBenchReload or TestBenchGet"
+          - shard: "all-find"
+            filter: "TestBenchAll or TestBenchFind"
+          - shard: "write-compact"
+            filter: "TestBenchPut or TestBenchBatchWrite or TestBenchCompact"
+          - shard: "keys-items-count-has-delete"
+            filter: "TestBenchKeys or TestBenchItems or TestBenchCount or TestBenchHas or TestBenchDelete"
 
     steps:
       - name: Checkout code
@@ -50,4 +62,4 @@ jobs:
         uses: CodSpeedHQ/action@346a2d8a8d9d38909abd0bc3d23f773110f076ad # v4.4.1
         with:
           mode: simulation
-          run: uv run pytest -m benchmark --codspeed
+          run: uv run pytest -m benchmark -k "${{ matrix.filter }}" --codspeed

From 516e1342926ef82a914c3778f9a6e5988f7b3004 Mon Sep 17 00:00:00 2001
From: Tony Burns <tony@tonyburns.net>
Date: Sun, 28 Dec 2025 17:05:28 -0500
Subject: [PATCH 5/7] ci(benchmark): rebalance shards to prevent timeout

Split benchmarks into 11 shards (from 4) to ensure each completes
within the 6-minute timeout. Max shard size is now 43 tests (find)
which should complete in ~5.7 minutes at ~8s per test.

Shard distribution:
- load-reload: 38 tests
- get: 25 tests
- all: 25 tests
- find: 43 tests
- find-one: 18 tests
- write-compact: 32 tests
- keys-delete: 31 tests
- items: 25 tests
- count: 25 tests
- has: 31 tests
- memory: 18 tests
---
 .github/workflows/benchmark.yml | 39 +++++++++++++++++++++++++++------
 1 file changed, 32 insertions(+), 7 deletions(-)

diff --git a/.github/workflows/benchmark.yml b/.github/workflows/benchmark.yml
index f8e834d..957dabc 100644
--- a/.github/workflows/benchmark.yml
+++ b/.github/workflows/benchmark.yml
@@ -31,7 +31,7 @@ jobs:
   benchmark:
     name: "Benchmark (${{ matrix.shard }})"
     runs-on: ubuntu-latest
-    timeout-minutes: 5
+    timeout-minutes: 6
     permissions:
       contents: read
       id-token: write
@@ -39,14 +39,39 @@ jobs:
       fail-fast: false
       matrix:
         include:
-          - shard: "load-reload-get"
-            filter: "TestBenchLoad or TestBenchReload or TestBenchGet"
-          - shard: "all-find"
-            filter: "TestBenchAll or TestBenchFind"
+          # ~38 tests each for load/reload
+          - shard: "load-reload"
+            filter: "TestBenchLoad or TestBenchReload"
+          # 25 tests
+          - shard: "get"
+            filter: "TestBenchGet"
+          # 25 tests
+          - shard: "all"
+            filter: "TestBenchAll"
+          # 43 tests (find_high=19, find_other=24)
+          - shard: "find"
+            filter: "TestBenchFind"
+          # 18 tests
+          - shard: "find-one"
+            filter: "TestBenchFindOne"
+          # 32 tests
           - shard: "write-compact"
             filter: "TestBenchPut or TestBenchBatchWrite or TestBenchCompact"
-          - shard: "keys-items-count-has-delete"
-            filter: "TestBenchKeys or TestBenchItems or TestBenchCount or TestBenchHas or TestBenchDelete"
+          # 31 tests
+          - shard: "keys-delete"
+            filter: "TestBenchKeys or TestBenchDelete"
+          # 25 tests
+          - shard: "items"
+            filter: "TestBenchItems"
+          # 25 tests
+          - shard: "count"
+            filter: "TestBenchCount"
+          # 31 tests
+          - shard: "has"
+            filter: "TestBenchHas"
+          # 18 tests
+          - shard: "memory"
+            filter: "TestMemory"
 
     steps:
       - name: Checkout code

From 593ddf146e26fc8e0a7ffa33fbb2ca3e29b14c98 Mon Sep 17 00:00:00 2001
From: Tony Burns <tony@tonyburns.net>
Date: Mon, 29 Dec 2025 12:30:36 -0500
Subject: [PATCH 6/7] ci(benchmark): aggressive shard rebalancing for 5-minute
 target

Split benchmarks into 15 shards to ensure each completes within
5 minutes. Separated slow vs non-slow tests for expensive operations
(all, items, keys) using the slow marker.

Shard distribution (estimated times):
- get: 25 tests (~3m)
- count: 25 tests (~3m)
- has: 31 tests (~3m)
- find-one-delete: 24 tests (~2m)
- write-compact: 32 tests (~5m)
- load: 19 tests (~4m)
- reload: 19 tests (~4m)
- keys-ci: 12 tests (~2.5m)
- keys-slow: 13 tests (~2.5m)
- all-ci: 12 tests (~3m)
- all-slow: 13 tests (~3.5m)
- items-ci: 12 tests (~3m)
- items-slow: 13 tests (~3.5m)
- find-high: 19 tests (~3m)
- find-other: 24 tests (~4m)

Memory tests excluded for now due to memray profiling overhead.
---
 .github/workflows/benchmark.yml | 66 ++++++++++++++++++---------------
 1 file changed, 37 insertions(+), 29 deletions(-)

diff --git a/.github/workflows/benchmark.yml b/.github/workflows/benchmark.yml
index 957dabc..d6adce9 100644
--- a/.github/workflows/benchmark.yml
+++ b/.github/workflows/benchmark.yml
@@ -31,7 +31,7 @@ jobs:
   benchmark:
     name: "Benchmark (${{ matrix.shard }})"
     runs-on: ubuntu-latest
-    timeout-minutes: 6
+    timeout-minutes: 5
     permissions:
       contents: read
       id-token: write
@@ -39,39 +39,47 @@ jobs:
       fail-fast: false
       matrix:
         include:
-          # ~38 tests each for load/reload
-          - shard: "load-reload"
-            filter: "TestBenchLoad or TestBenchReload"
-          # 25 tests
+          # Fast tests (~7s/test) - can have more tests per shard
           - shard: "get"
             filter: "TestBenchGet"
-          # 25 tests
-          - shard: "all"
-            filter: "TestBenchAll"
-          # 43 tests (find_high=19, find_other=24)
-          - shard: "find"
-            filter: "TestBenchFind"
-          # 18 tests
-          - shard: "find-one"
-            filter: "TestBenchFindOne"
-          # 32 tests
-          - shard: "write-compact"
-            filter: "TestBenchPut or TestBenchBatchWrite or TestBenchCompact"
-          # 31 tests
-          - shard: "keys-delete"
-            filter: "TestBenchKeys or TestBenchDelete"
-          # 25 tests
-          - shard: "items"
-            filter: "TestBenchItems"
-          # 25 tests
           - shard: "count"
             filter: "TestBenchCount"
-          # 31 tests
           - shard: "has"
             filter: "TestBenchHas"
-          # 18 tests
-          - shard: "memory"
-            filter: "TestMemory"
+          - shard: "find-one-delete"
+            filter: "TestBenchFindOne or TestBenchDelete"
+          # Moderate tests (~9s/test)
+          - shard: "write-compact"
+            filter: "TestBenchPut or TestBenchBatchWrite or TestBenchCompact"
+          # I/O-heavy tests (~12s/test)
+          - shard: "load"
+            filter: "TestBenchLoad"
+          - shard: "reload"
+            filter: "TestBenchReload"
+          - shard: "keys-ci"
+            filter: "TestBenchKeys"
+            marker: "benchmark and not slow"
+          - shard: "keys-slow"
+            filter: "TestBenchKeys"
+            marker: "benchmark and slow"
+          # Expensive tests (~15s/test) - split by slow marker
+          - shard: "all-ci"
+            filter: "TestBenchAll"
+            marker: "benchmark and not slow"
+          - shard: "all-slow"
+            filter: "TestBenchAll"
+            marker: "benchmark and slow"
+          - shard: "items-ci"
+            filter: "TestBenchItems"
+            marker: "benchmark and not slow"
+          - shard: "items-slow"
+            filter: "TestBenchItems"
+            marker: "benchmark and slow"
+          # Find tests (~10s/test)
+          - shard: "find-high"
+            filter: "TestBenchFind and not TestBenchFindOne and test_find_high_selectivity"
+          - shard: "find-other"
+            filter: "TestBenchFind and not TestBenchFindOne and not test_find_high_selectivity"
 
     steps:
       - name: Checkout code
@@ -87,4 +95,4 @@ jobs:
         uses: CodSpeedHQ/action@346a2d8a8d9d38909abd0bc3d23f773110f076ad # v4.4.1
         with:
           mode: simulation
-          run: uv run pytest -m benchmark -k "${{ matrix.filter }}" --codspeed
+          run: uv run pytest -m "${{ matrix.marker || 'benchmark' }}" -k "${{ matrix.filter }}" --codspeed

From 4fb6a339c878ce3a45de7b4625059d6e47e69b47 Mon Sep 17 00:00:00 2001
From: Tony Burns <tony@tonyburns.net>
Date: Tue, 30 Dec 2025 23:19:28 -0500
Subject: [PATCH 7/7] ci(benchmark): run only CI tests for fast PR feedback

Exclude slow tests from PR benchmarks to ensure completion under 5
minutes. Slow tests take ~24s each vs ~7s for CI tests.

Shard distribution (7 shards, ~176 tests total):
- load-reload: 12 tests (~1.5m)
- get: 12 tests (~1.5m)
- find: 30 tests (~3.5m)
- find-one-delete: 24 tests (~2m)
- write-compact: 32 tests (~5m)
- all-keys-items: 36 tests (~4m)
- count-has: 30 tests (~3.5m)
---
 .github/workflows/benchmark.yml | 46 ++++++++++-----------------------
 1 file changed, 13 insertions(+), 33 deletions(-)

diff --git a/.github/workflows/benchmark.yml b/.github/workflows/benchmark.yml
index d6adce9..fb004e3 100644
--- a/.github/workflows/benchmark.yml
+++ b/.github/workflows/benchmark.yml
@@ -39,47 +39,27 @@ jobs:
       fail-fast: false
       matrix:
         include:
-          # Fast tests (~7s/test) - can have more tests per shard
+          # CI tests only (not slow) for fast PR feedback
+          # Slow tests excluded - they take ~24s each vs ~7s for CI tests
+          - shard: "load-reload"
+            filter: "TestBenchLoad or TestBenchReload"
+            marker: "benchmark and not slow"
           - shard: "get"
             filter: "TestBenchGet"
-          - shard: "count"
-            filter: "TestBenchCount"
-          - shard: "has"
-            filter: "TestBenchHas"
+            marker: "benchmark and not slow"
+          - shard: "find"
+            filter: "TestBenchFind and not TestBenchFindOne"
+            marker: "benchmark and not slow"
           - shard: "find-one-delete"
             filter: "TestBenchFindOne or TestBenchDelete"
-          # Moderate tests (~9s/test)
           - shard: "write-compact"
             filter: "TestBenchPut or TestBenchBatchWrite or TestBenchCompact"
-          # I/O-heavy tests (~12s/test)
-          - shard: "load"
-            filter: "TestBenchLoad"
-          - shard: "reload"
-            filter: "TestBenchReload"
-          - shard: "keys-ci"
-            filter: "TestBenchKeys"
-            marker: "benchmark and not slow"
-          - shard: "keys-slow"
-            filter: "TestBenchKeys"
-            marker: "benchmark and slow"
-          # Expensive tests (~15s/test) - split by slow marker
-          - shard: "all-ci"
-            filter: "TestBenchAll"
+          - shard: "all-keys-items"
+            filter: "TestBenchAll or TestBenchKeys or TestBenchItems"
             marker: "benchmark and not slow"
-          - shard: "all-slow"
-            filter: "TestBenchAll"
-            marker: "benchmark and slow"
-          - shard: "items-ci"
-            filter: "TestBenchItems"
+          - shard: "count-has"
+            filter: "TestBenchCount or TestBenchHas"
             marker: "benchmark and not slow"
-          - shard: "items-slow"
-            filter: "TestBenchItems"
-            marker: "benchmark and slow"
-          # Find tests (~10s/test)
-          - shard: "find-high"
-            filter: "TestBenchFind and not TestBenchFindOne and test_find_high_selectivity"
-          - shard: "find-other"
-            filter: "TestBenchFind and not TestBenchFindOne and not test_find_high_selectivity"
 
     steps:
       - name: Checkout code