From da0115e75bb7427e7dd538928b3651066826d8f9 Mon Sep 17 00:00:00 2001 From: dts Date: Thu, 30 Apr 2026 15:13:37 +0200 Subject: [PATCH 1/2] fix: skip builtin field names when copying atoms.arrays/info _extract_ase_record copied keys from atoms.arrays and atoms.info into custom properties without filtering builtin field names. This caused silent duplicate state for any molecule whose ASE Atoms object had "forces" stashed in atoms.arrays or "energy" in atoms.info: the bridge stored the calculator-derived value in builtins["forces"] AND the arrays/info value in properties["forces"], producing two divergent values per builtin field. The two affected paths in _extract_ase_record: - atoms.arrays loop only excluded {"positions", "numbers"}. Now also skips _BUILTIN_FIELDS (energy/forces/charges/velocities/cell/stress/ pbc). - _merge_properties (called for atoms.info and the info-override kwarg) only special-cased "stress". Now skips all _BUILTIN_FIELDS, with the existing stress-as-(3,3) builtin override behavior preserved. The atoms.calc.results path (line 186) already filtered _BUILTIN_FIELDS correctly; this PR brings the other two paths into line. New test_from_ase_does_not_duplicate_builtins_into_custom_properties constructs an ASE Atoms object with builtin keys deliberately stashed into both arrays and info, and asserts the builtins remain calculator-derived while none of the builtin keys leak into custom properties. --- atompack-py/python/atompack/ase_bridge.py | 27 ++++++++++---- atompack-py/tests/test_from_ase.py | 45 +++++++++++++++++++++++ 2 files changed, 64 insertions(+), 8 deletions(-) diff --git a/atompack-py/python/atompack/ase_bridge.py b/atompack-py/python/atompack/ase_bridge.py index 15ef29d..b268580 100644 --- a/atompack-py/python/atompack/ase_bridge.py +++ b/atompack-py/python/atompack/ase_bridge.py @@ -81,10 +81,16 @@ def _coerce_property(value, n_atoms): def _merge_properties(properties, builtins, values, n_atoms): for key, value in values.items(): - if key == "stress": - arr = np.asarray(value) - if arr.shape == (3, 3) and arr.dtype.kind == "f": - builtins["stress"] = arr.astype(np.float64, copy=False) + if key in _BUILTIN_FIELDS: + # Builtin keys in atoms.info / info-override go to the builtins + # dict (when shape/dtype matches), never into custom properties. + # Without this guard, info["energy"] would land in both + # builtins["energy"] (from get_potential_energy) and + # properties["energy"], producing divergent state on round-trip. + if key == "stress": + arr = np.asarray(value) + if arr.shape == (3, 3) and arr.dtype.kind == "f": + builtins["stress"] = arr.astype(np.float64, copy=False) continue coerced = _coerce_property(value, n_atoms) if coerced is not None: @@ -174,10 +180,15 @@ def _extract_ase_record( arrays = getattr(atoms, "arrays", None) if isinstance(arrays, dict): for key, value in arrays.items(): - if key not in {"positions", "numbers"}: - coerced = _coerce_property(value, n_atoms) - if coerced is not None: - properties[key] = coerced + # Skip both ASE-reserved geometry keys ("positions", "numbers") + # and atompack builtin field names. A user who stashes "forces" + # in atoms.arrays must not have it duplicated into both + # builtins["forces"] (from get_forces()) and properties["forces"]. + if key in _ASE_RESERVED_ARRAYS or key in _BUILTIN_FIELDS: + continue + coerced = _coerce_property(value, n_atoms) + if coerced is not None: + properties[key] = coerced calc = getattr(atoms, "calc", None) results = getattr(calc, "results", None) diff --git a/atompack-py/tests/test_from_ase.py b/atompack-py/tests/test_from_ase.py index 542cdd4..b6d43be 100644 --- a/atompack-py/tests/test_from_ase.py +++ b/atompack-py/tests/test_from_ase.py @@ -192,6 +192,51 @@ def test_from_ase_extracts_arrays_and_calc_results() -> None: mol.get_property("forces") +def test_from_ase_does_not_duplicate_builtins_into_custom_properties() -> None: + # When a user stashes a builtin field name (e.g. "forces", "energy", + # "pbc") inside atoms.arrays or atoms.info, the bridge must NOT also + # store it as a custom property. Otherwise round-tripping produces + # two divergent values per builtin field — one from the calculator + # path and one from the custom-property path. + forces_in_arrays = np.array([[9.9, 9.9, 9.9], [8.8, 8.8, 8.8]], dtype=np.float64) + atoms = FakeASEAtoms( + positions=np.array([[0.0, 0.0, 0.0], [1.0, 0.0, 0.0]], dtype=np.float64), + atomic_numbers=np.array([6, 8], dtype=np.int64), + forces=np.array([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]], dtype=np.float64), + energy=-1.0, + arrays={ + "positions": np.zeros((2, 3), dtype=np.float64), + "numbers": np.zeros((2,), dtype=np.int64), + # Hostile content: same key as a builtin. Should be ignored. + "forces": forces_in_arrays, + "charges": np.array([99.0, 99.0], dtype=np.float64), + # Plus a legitimate custom array. + "tags": np.array([1, 2], dtype=np.int32), + }, + info={ + "energy": -999.0, # collides with builtin + "forces": forces_in_arrays, # collides with builtin + "pbc": (True, True, True), # collides with builtin + # Plus a legitimate custom info key. + "method": "DFT", + }, + ) + + mol = atompack.from_ase(atoms) + + # Builtins come from the calculator/getter path, not from arrays/info. + np.testing.assert_allclose(mol.forces, atoms.forces.astype(np.float32)) + assert mol.energy == pytest.approx(-1.0) + # Legitimate custom keys still flow through. + np.testing.assert_array_equal(mol.get_property("tags"), np.array([1, 2], dtype=np.int32)) + assert mol.get_property("method") == "DFT" + # Builtin keys must NOT show up as custom properties. + for builtin_key in ("forces", "energy", "charges", "velocities", "cell", "stress", "pbc"): + assert not mol.has_property(builtin_key), ( + f"builtin key {builtin_key!r} leaked into custom properties" + ) + + def test_add_ase_batch_roundtrip(tmp_path) -> None: path = tmp_path / "ase_batch.atp" atoms_list = [ From 6e15a0c5253ff6d3c96bcad7c3eaa842bbe0fbb4 Mon Sep 17 00:00:00 2001 From: dts Date: Thu, 30 Apr 2026 15:28:59 +0200 Subject: [PATCH 2/2] fix: also filter builtins in to_ase direction (symmetric fix) Reviewer flagged a symmetric duplicate-state path on the to_ase side: is_reserved_ase_array_key in molecule_helpers.rs only excluded "numbers"/"positions", so a custom property named after a builtin field (e.g. mol.set_property("forces", X)) would land in atoms.arrays["forces"] right next to the calculator-attached builtin forces. Same divergence class as the from_ase bug closed in the parent commit. Extended the matches! list to also reject "energy", "forces", "charges", "velocities", "cell", "stress", "pbc". The set_property side still allows the user to set a custom property named after a builtin (only "stress" is reserved at set_property time today), but to_ase will no longer surface it in atoms.arrays. Tightening set_property to reject all builtin keys is a more invasive behavior change worth its own PR. New tests: - test_from_ase_info_override_kwarg_filters_builtins covers the second caller of _merge_properties (the from_ase info= kwarg) which the earlier test only reached transitively. - test_to_ase_does_not_duplicate_builtins_in_arrays sets a hostile "forces" custom property and confirms to_ase keeps the calculator- attached forces as the source of truth, while a legitimate custom key ("tags") still flows into atoms.arrays. --- atompack-py/src/molecule_helpers.rs | 17 +++++++++- atompack-py/tests/test_from_ase.py | 49 +++++++++++++++++++++++++++++ 2 files changed, 65 insertions(+), 1 deletion(-) diff --git a/atompack-py/src/molecule_helpers.rs b/atompack-py/src/molecule_helpers.rs index 4512f7f..1f6bdf3 100644 --- a/atompack-py/src/molecule_helpers.rs +++ b/atompack-py/src/molecule_helpers.rs @@ -621,7 +621,22 @@ fn section_is_atom_array(view: &SoaMoleculeView, section: &LazySection) -> bool } fn is_reserved_ase_array_key(key: &str) -> bool { - matches!(key, "numbers" | "positions") + // ASE-reserved geometry keys plus atompack's builtin field names. A custom + // property named "forces" must not be shoveled into atoms.arrays["forces"] + // alongside the calculator-attached forces — that's the to_ase mirror of + // the from_ase duplicate-state bug. + matches!( + key, + "numbers" + | "positions" + | "energy" + | "forces" + | "charges" + | "velocities" + | "cell" + | "stress" + | "pbc" + ) } fn owned_vec3_array<'py>( diff --git a/atompack-py/tests/test_from_ase.py b/atompack-py/tests/test_from_ase.py index b6d43be..1c477dc 100644 --- a/atompack-py/tests/test_from_ase.py +++ b/atompack-py/tests/test_from_ase.py @@ -237,6 +237,55 @@ def test_from_ase_does_not_duplicate_builtins_into_custom_properties() -> None: ) +def test_from_ase_info_override_kwarg_filters_builtins() -> None: + # The same filter must apply to the explicit info= kwarg of from_ase + # (the second caller of _merge_properties), not just atoms.info. + atoms = FakeASEAtoms( + positions=np.array([[0.0, 0.0, 0.0], [1.0, 0.0, 0.0]], dtype=np.float64), + atomic_numbers=np.array([6, 8], dtype=np.int64), + forces=np.array([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]], dtype=np.float64), + energy=-1.0, + ) + mol = atompack.from_ase( + atoms, + info={ + "energy": -777.0, # collides; must be ignored + "forces": np.zeros((2, 3), dtype=np.float64), # collides; must be ignored + "method": "DFT", # legitimate + }, + ) + np.testing.assert_allclose(mol.forces, atoms.forces.astype(np.float32)) + assert mol.energy == pytest.approx(-1.0) + assert mol.get_property("method") == "DFT" + for builtin_key in ("forces", "energy", "charges", "velocities", "cell", "stress", "pbc"): + assert not mol.has_property(builtin_key) + + +def test_to_ase_does_not_duplicate_builtins_in_arrays() -> None: + # to_ase mirror of the from_ase fix: even if a user explicitly stuffs a + # builtin name into custom properties via mol.set_property("forces", ...), + # to_ase must NOT shovel that custom value into atoms.arrays["forces"] + # next to the calculator-attached builtin forces. Otherwise round-tripping + # contaminates the ASE side. + mol = atompack.Molecule.from_arrays( + np.array([[0.0, 0.0, 0.0], [1.0, 0.0, 0.0]], dtype=np.float32), + np.array([6, 8], dtype=np.uint8), + forces=np.array([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]], dtype=np.float32), + energy=-1.0, + ) + # Hostile: name a custom property after a builtin field. + mol.set_property("forces", np.array([[9.9, 9.9, 9.9], [8.8, 8.8, 8.8]], dtype=np.float32)) + mol.set_property("tags", np.array([1, 2], dtype=np.int32)) + + ase_atoms = mol.to_ase() + # Calculator-attached forces are the source of truth. + np.testing.assert_allclose(ase_atoms.get_forces(), mol.forces) + # The hostile custom "forces" must NOT have landed in atoms.arrays. + assert "forces" not in ase_atoms.arrays + # Legitimate non-builtin custom property still flows through. + np.testing.assert_array_equal(ase_atoms.arrays["tags"], np.array([1, 2], dtype=np.int32)) + + def test_add_ase_batch_roundtrip(tmp_path) -> None: path = tmp_path / "ase_batch.atp" atoms_list = [