From 3623300eaae10881c0b1200a23f50a1b2f154a09 Mon Sep 17 00:00:00 2001 From: dts Date: Thu, 30 Apr 2026 15:39:21 +0200 Subject: [PATCH 1/2] chore: move h5py from base deps to [benchmarks] extra MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit h5py is only imported by atompack-py/benchmarks/atom_hdf5_soa.py and benchmarks/hdf5_tuned_experiment.py — comparison scripts, not the package itself. Zero imports anywhere under python/atompack/ or production code paths. Yet h5py was listed as a runtime dependency, so every install of atompack-db pulled ~50 MB of HDF5 binaries that were never used at import time. Move h5py>=3.10 to a new [project.optional-dependencies] benchmarks extra. Users who actually run the comparison benchmarks install with 'pip install atompack-db[benchmarks]'; everyone else gets a slimmer wheel. Tests under tests/benchmarks/ that exercise the comparison scripts already use pytest.importorskip("h5py"), so they cleanly skip when the extra isn't installed — no test failures from the move. uv.lock regenerated to reflect the change. h5py is still locked, just under the benchmarks extra now. --- atompack-py/pyproject.toml | 7 ++++++- atompack-py/uv.lock | 12 +++++++----- 2 files changed, 13 insertions(+), 6 deletions(-) diff --git a/atompack-py/pyproject.toml b/atompack-py/pyproject.toml index 1fbb7b4..7988c45 100644 --- a/atompack-py/pyproject.toml +++ b/atompack-py/pyproject.toml @@ -32,7 +32,6 @@ classifiers = [ "Programming Language :: Python :: 3.14", ] dependencies = [ - "h5py>=3.10", "huggingface_hub>=0.24", "numpy>=1.20", ] @@ -51,6 +50,12 @@ dev = [ "mypy", "ruff", ] +# Used only by the comparison scripts under atompack-py/benchmarks/. Not +# imported anywhere under python/atompack/, so it has no business in the +# base wheel — pulls ~50 MB of HDF5 binaries onto every install. +benchmarks = [ + "h5py>=3.10", +] [tool.maturin] python-source = "python" diff --git a/atompack-py/uv.lock b/atompack-py/uv.lock index e4ef7f7..02f311c 100644 --- a/atompack-py/uv.lock +++ b/atompack-py/uv.lock @@ -169,9 +169,6 @@ name = "atompack-db" version = "0.2.1" source = { editable = "." } dependencies = [ - { name = "h5py", version = "3.11.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.9'" }, - { name = "h5py", version = "3.14.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version == '3.9.*'" }, - { name = "h5py", version = "3.16.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.10'" }, { name = "huggingface-hub", version = "0.36.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.9'" }, { name = "huggingface-hub", version = "1.8.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.9'" }, { name = "numpy", version = "1.24.4", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.9'" }, @@ -181,6 +178,11 @@ dependencies = [ ] [package.optional-dependencies] +benchmarks = [ + { name = "h5py", version = "3.11.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.9'" }, + { name = "h5py", version = "3.14.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version == '3.9.*'" }, + { name = "h5py", version = "3.16.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.10'" }, +] dev = [ { name = "mypy", version = "1.14.1", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.9'" }, { name = "mypy", version = "1.19.1", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.9'" }, @@ -224,7 +226,7 @@ docs = [ [package.metadata] requires-dist = [ - { name = "h5py", specifier = ">=3.10" }, + { name = "h5py", marker = "extra == 'benchmarks'", specifier = ">=3.10" }, { name = "huggingface-hub", specifier = ">=0.24" }, { name = "mypy", marker = "extra == 'dev'" }, { name = "numpy", specifier = ">=1.20" }, @@ -232,7 +234,7 @@ requires-dist = [ { name = "pytest-benchmark", marker = "extra == 'dev'" }, { name = "ruff", marker = "extra == 'dev'" }, ] -provides-extras = ["dev"] +provides-extras = ["dev", "benchmarks"] [package.metadata.requires-dev] docs = [ From b5cf2015b522d1cd230f0134e0387cc787873cab Mon Sep 17 00:00:00 2001 From: dts Date: Thu, 30 Apr 2026 17:47:48 +0200 Subject: [PATCH 2/2] chore: update benchmarks README + Makefile after h5py move Independent reviewer caught two follow-ups for the h5py-to-extras PR: - atompack-py/benchmarks/README.md still claimed h5py was "part of the default benchmark environment" and shipped "through the base project dependencies." Updated both lines to point users at 'pip install atompack-db[benchmarks]'. - Makefile py-test-benchmarks target only passed --extra dev. After moving h5py to [benchmarks], that meant 'make py-test-benchmarks' silently skipped every hdf5-gated test (defeating the target's purpose). Added --extra benchmarks alongside --extra dev so the benchmarks suite actually exercises h5py paths again. Verified with a direct pytest run: 'uv run --extra dev --extra benchmarks --locked pytest tests/benchmarks' now reports 52 passed, 4 skipped (vs. the same command with only --extra dev which would have skipped all h5py tests). --- Makefile | 2 +- atompack-py/benchmarks/README.md | 8 +++++--- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/Makefile b/Makefile index 1f58a9d..5b244d0 100644 --- a/Makefile +++ b/Makefile @@ -82,7 +82,7 @@ py-test: py-dev py-test-benchmarks: py-dev @command -v $(UV) >/dev/null 2>&1 || (echo "uv not found; install from https://docs.astral.sh/uv/" && exit 1) - cd atompack-py && UV_CACHE_DIR=$(UV_CACHE_DIR) $(UV) run --extra dev --locked pytest tests/benchmarks + cd atompack-py && UV_CACHE_DIR=$(UV_CACHE_DIR) $(UV) run --extra dev --extra benchmarks --locked pytest tests/benchmarks docs-sync: @command -v $(UV) >/dev/null 2>&1 || (echo "uv not found; install from https://docs.astral.sh/uv/" && exit 1) diff --git a/atompack-py/benchmarks/README.md b/atompack-py/benchmarks/README.md index 035765b..a4162f0 100644 --- a/atompack-py/benchmarks/README.md +++ b/atompack-py/benchmarks/README.md @@ -64,7 +64,8 @@ Notes: - `get_molecules_flat(...)` remains the fastest tensor-batch path when the consumer can work with concatenated arrays directly. - `hdf5_soa` reflects the usual chunked-dataset HDF5 layout used for - fixed-shape materials data and is part of the default benchmark environment. + fixed-shape materials data; install with `pip install atompack-db[benchmarks]` + to pull in the `h5py` dependency. - `lmdb_packed` now participates in the synthetic custom-property variants in this suite too; `lmdb_soa` remains the builtins-only helper baseline. - In this suite, `hdf5_soa` is intentionally read one molecule at a time so it @@ -330,8 +331,9 @@ tuning that matters most for dataloader-like access: - reader-side raw chunk cache sizing through `h5py` open options - batch reads that regroup requested indices by chunk before rebuilding payloads -This backend is included in the default benchmark environment through the base -project dependencies. +This backend depends on `h5py`, which is shipped as the `[benchmarks]` extra: +install with `pip install atompack-db[benchmarks]` (or `make py-test-benchmarks` +which now uses both the `dev` and `benchmarks` extras). ### `atom_lmdb_soa.py`