From e2255634d9bac2fc6bc84077727699c513ab1829 Mon Sep 17 00:00:00 2001 From: dts Date: Wed, 29 Apr 2026 15:27:39 +0200 Subject: [PATCH 1/2] test: cover behavior gaps surfaced by external review MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pure test additions; no production code changes. Each test pins or exercises a documented public-API contract that previously had no coverage. test_database.py: - test_database_open_mmap_populate covers Database.open(populate=True), the documented Linux page-prefault path which had no smoke test. - test_database_negative_indexing_raises_index_error pins that Database[-1] is not currently supported (raises IndexError / OverflowError / ValueError) — locks in the contract until someone intentionally adds wraparound. - test_database_empty_molecule_roundtrip exercises n_atoms == 0 through the SOA encoder + parser (the empty-positions slice edge case). test_atom_molecule.py: - test_from_arrays_rejects_wrong_dtype_positions and the analogous atomic_numbers test confirm that wrong dtypes raise cleanly across the PyO3 boundary instead of silently truncating or panicking. - test_set_property_python_bool_pins_current_behavior locks in that Python bool is stored as Int(1/0) because bool is a subclass of int and PyO3 extracts i64 before f64. If anyone wants real bool storage, this test will fail loudly. test_from_ase.py: - test_from_ase_expands_voigt6_stress_to_3x3 covers the bridge's _voigt6_to_mat3x3 path; ASE's get_stress(voigt=True) returns (6,) and the bridge must expand it before storing as a 3x3 tensor. --- atompack-py/tests/test_atom_molecule.py | 35 ++++++++++++++++++ atompack-py/tests/test_database.py | 48 +++++++++++++++++++++++++ atompack-py/tests/test_from_ase.py | 26 ++++++++++++++ 3 files changed, 109 insertions(+) diff --git a/atompack-py/tests/test_atom_molecule.py b/atompack-py/tests/test_atom_molecule.py index 0ff034c..9b12293 100644 --- a/atompack-py/tests/test_atom_molecule.py +++ b/atompack-py/tests/test_atom_molecule.py @@ -262,3 +262,38 @@ def test_molecule_getitem_validation() -> None: _ = mol["does_not_exist"] with pytest.raises(TypeError, match=r"integers or strings"): _ = mol[1.5] + + +def test_from_arrays_rejects_wrong_dtype_positions() -> None: + # positions must be float32; passing float64 should be rejected cleanly, + # not silently truncated or panic across the FFI boundary. + positions = np.zeros((2, 3), dtype=np.float64) # wrong dtype + atomic_numbers = np.array([6, 8], dtype=np.uint8) + with pytest.raises((TypeError, ValueError)): + atompack.Molecule.from_arrays(positions, atomic_numbers) + + +def test_from_arrays_rejects_wrong_dtype_atomic_numbers() -> None: + # atomic_numbers must be uint8; passing int64 should be rejected cleanly. + positions = np.zeros((2, 3), dtype=np.float32) + atomic_numbers = np.array([6, 8], dtype=np.int64) # wrong dtype + with pytest.raises((TypeError, ValueError)): + atompack.Molecule.from_arrays(positions, atomic_numbers) + + +def test_set_property_python_bool_pins_current_behavior() -> None: + # Pin the (perhaps surprising) current behavior: Python bool is a subclass + # of int, and PyO3 extracts it as i64 before f64. So set_property(True) + # stores Int(1), not Bool. This test locks in the current contract; if + # anyone wants real bool storage they should add a TYPE_BOOL tag and + # reorder extract attempts. + mol = _make_molecule() + mol.set_property("flag_true", True) + mol.set_property("flag_false", False) + + flag_true = mol.get_property("flag_true") + flag_false = mol.get_property("flag_false") + assert isinstance(flag_true, int) and not isinstance(flag_true, bool) + assert flag_true == 1 + assert isinstance(flag_false, int) and not isinstance(flag_false, bool) + assert flag_false == 0 diff --git a/atompack-py/tests/test_database.py b/atompack-py/tests/test_database.py index 7fbb075..10fcbce 100644 --- a/atompack-py/tests/test_database.py +++ b/atompack-py/tests/test_database.py @@ -577,3 +577,51 @@ def test_get_molecules_flat_empty(tmp_path: Path) -> None: assert batch["n_atoms"].shape == (0,) assert batch["positions"].shape == (0, 3) assert batch["atomic_numbers"].shape == (0,) + + +def test_database_open_mmap_populate(tmp_path: Path) -> None: + # populate=True prefaults mapped pages on Linux. On other platforms it's a + # no-op. Either way the open path must succeed and return readable data. + path = tmp_path / "populate.atp" + db = atompack.Database(str(path)) + db.add_molecule(_make_molecule(-3.0)) + db.flush() + + db_r = atompack.Database.open(str(path), mmap=True, populate=True) + assert len(db_r) == 1 + assert db_r[0].energy == pytest.approx(-3.0) + + +def test_database_negative_indexing_raises_index_error(tmp_path: Path) -> None: + # Database does not support negative indexing today; a clear IndexError + # is preferable to silent wraparound. + path = tmp_path / "negidx.atp" + db = atompack.Database(str(path)) + db.add_molecule(_make_molecule(-1.0)) + db.flush() + + db_r = atompack.Database.open(str(path)) + with pytest.raises((IndexError, OverflowError, ValueError)): + _ = db_r[-1] + + +def test_database_empty_molecule_roundtrip(tmp_path: Path) -> None: + # n_atoms == 0 is the SOA parser edge case; positions/atomic_numbers slices + # become zero-length and most code paths must still work. + path = tmp_path / "empty_mol.atp" + mol = atompack.Molecule.from_arrays( + np.zeros((0, 3), dtype=np.float32), + np.zeros((0,), dtype=np.uint8), + energy=0.0, + ) + + db = atompack.Database(str(path)) + db.add_molecule(mol) + db.flush() + + db_r = atompack.Database.open(str(path)) + read = db_r[0] + assert len(read) == 0 + assert read.positions.shape == (0, 3) + assert read.atomic_numbers.shape == (0,) + assert read.energy == pytest.approx(0.0) diff --git a/atompack-py/tests/test_from_ase.py b/atompack-py/tests/test_from_ase.py index 542cdd4..2a2a34a 100644 --- a/atompack-py/tests/test_from_ase.py +++ b/atompack-py/tests/test_from_ase.py @@ -502,3 +502,29 @@ def test_to_ase_batch_none_calc_mode_preserves_results(tmp_path) -> None: np.testing.assert_allclose(atoms_batch[1].info["stress"], stress[1]) np.testing.assert_allclose(atoms_batch[1].arrays["forces"], forces[1]) np.testing.assert_allclose(atoms_batch[1].arrays["charges"], charges[1]) + + +def test_from_ase_expands_voigt6_stress_to_3x3() -> None: + # ASE's get_stress(voigt=True) returns a (6,) Voigt-form array; the bridge + # must expand it to a (3,3) symmetric tensor before storing. + voigt = np.array([1.1, 2.2, 3.3, 4.4, 5.5, 6.6], dtype=np.float64) + atoms = FakeASEAtoms( + positions=np.array([[0.0, 0.0, 0.0], [1.0, 0.0, 0.0]], dtype=np.float64), + atomic_numbers=np.array([6, 8], dtype=np.int64), + info={"ase_stress": voigt}, + ) + + mol = atompack.from_ase(atoms) + assert mol.stress is not None + assert mol.stress.shape == (3, 3) + # Voigt order is (xx, yy, zz, yz, xz, xy); expanded matrix is symmetric. + expected = np.array( + [ + [voigt[0], voigt[5], voigt[4]], + [voigt[5], voigt[1], voigt[3]], + [voigt[4], voigt[3], voigt[2]], + ], + dtype=np.float64, + ) + np.testing.assert_allclose(mol.stress, expected) + np.testing.assert_allclose(mol.stress, mol.stress.T) # symmetry sanity From 063b51b2565a02a68bed2c83cc4e94a7666310c4 Mon Sep 17 00:00:00 2001 From: dts Date: Wed, 29 Apr 2026 16:00:56 +0200 Subject: [PATCH 2/2] test: tighten gaps flagged by independent review - test_database_negative_indexing_raises_overflow_error pins the actual current exception (OverflowError from PyO3 usize extraction) and the message; previously the test caught a permissive (IndexError, OverflowError, ValueError) tuple that would have masked any future silent change. - test_database_open_mmap_populate now documents that the assertion is smoke-only on macOS (memmap2 PopulateRead advise is best-effort there); pre-faulting performance is a benchmark concern, not a unit-test one. - Add "lz4" to the four mmap-parametrized tests in test_database.py (single_item_reads_are_view_compatible, custom_array_properties_..., single_item_mutation_is_copy_on_write, single_item_pickle_...). The basic lz4+mmap=True path was covered by test_database_roundtrip[lz4] but the deeper mutation/pickle/view round-trips were not. --- atompack-py/tests/test_database.py | 26 ++++++++++++++++---------- 1 file changed, 16 insertions(+), 10 deletions(-) diff --git a/atompack-py/tests/test_database.py b/atompack-py/tests/test_database.py index 10fcbce..65cafb5 100644 --- a/atompack-py/tests/test_database.py +++ b/atompack-py/tests/test_database.py @@ -249,7 +249,7 @@ def test_database_add_arrays_batch_roundtrip_with_custom_properties(tmp_path: Pa @pytest.mark.parametrize("mmap", [False, True]) -@pytest.mark.parametrize("compression", ["none", "zstd"]) +@pytest.mark.parametrize("compression", ["none", "lz4", "zstd"]) def test_database_single_item_reads_are_view_compatible( tmp_path: Path, mmap: bool, @@ -283,7 +283,7 @@ def test_database_single_item_reads_are_view_compatible( @pytest.mark.parametrize("mmap", [False, True]) -@pytest.mark.parametrize("compression", ["none", "zstd"]) +@pytest.mark.parametrize("compression", ["none", "lz4", "zstd"]) def test_database_custom_array_properties_roundtrip_all_numeric_tags( tmp_path: Path, mmap: bool, @@ -349,7 +349,7 @@ def test_database_custom_array_properties_roundtrip_all_numeric_tags( @pytest.mark.parametrize("mmap", [False, True]) -@pytest.mark.parametrize("compression", ["none", "zstd"]) +@pytest.mark.parametrize("compression", ["none", "lz4", "zstd"]) def test_database_single_item_mutation_is_copy_on_write( tmp_path: Path, mmap: bool, @@ -392,7 +392,7 @@ def test_database_single_item_mutation_is_copy_on_write( @pytest.mark.parametrize("mmap", [False, True]) -@pytest.mark.parametrize("compression", ["none", "zstd"]) +@pytest.mark.parametrize("compression", ["none", "lz4", "zstd"]) def test_database_single_item_pickle_materializes_owned_roundtrip( tmp_path: Path, mmap: bool, @@ -580,8 +580,12 @@ def test_get_molecules_flat_empty(tmp_path: Path) -> None: def test_database_open_mmap_populate(tmp_path: Path) -> None: - # populate=True prefaults mapped pages on Linux. On other platforms it's a - # no-op. Either way the open path must succeed and return readable data. + # Smoke test for the documented populate=True path. On Linux this + # prefaults mapped pages via memmap2's PopulateRead advise; on macOS the + # advise is best-effort and silently ignored, so the assertion here is + # only that the open path succeeds and returns correct data — not that + # pages are actually faulted in. Pre-faulting performance is exercised + # in benchmarks/, not unit tests. path = tmp_path / "populate.atp" db = atompack.Database(str(path)) db.add_molecule(_make_molecule(-3.0)) @@ -592,16 +596,18 @@ def test_database_open_mmap_populate(tmp_path: Path) -> None: assert db_r[0].energy == pytest.approx(-3.0) -def test_database_negative_indexing_raises_index_error(tmp_path: Path) -> None: - # Database does not support negative indexing today; a clear IndexError - # is preferable to silent wraparound. +def test_database_negative_indexing_raises_overflow_error(tmp_path: Path) -> None: + # Database does not support negative indexing today. PyO3 extracts the + # index argument as `usize`, so a negative integer raises OverflowError + # at the FFI boundary. If wraparound semantics are ever added, this + # test will fail loudly so the intent is explicit. path = tmp_path / "negidx.atp" db = atompack.Database(str(path)) db.add_molecule(_make_molecule(-1.0)) db.flush() db_r = atompack.Database.open(str(path)) - with pytest.raises((IndexError, OverflowError, ValueError)): + with pytest.raises(OverflowError, match=r"negative"): _ = db_r[-1]