diff --git a/atompack-py/python/atompack/ase_bridge.py b/atompack-py/python/atompack/ase_bridge.py index 15ef29d..b268580 100644 --- a/atompack-py/python/atompack/ase_bridge.py +++ b/atompack-py/python/atompack/ase_bridge.py @@ -81,10 +81,16 @@ def _coerce_property(value, n_atoms): def _merge_properties(properties, builtins, values, n_atoms): for key, value in values.items(): - if key == "stress": - arr = np.asarray(value) - if arr.shape == (3, 3) and arr.dtype.kind == "f": - builtins["stress"] = arr.astype(np.float64, copy=False) + if key in _BUILTIN_FIELDS: + # Builtin keys in atoms.info / info-override go to the builtins + # dict (when shape/dtype matches), never into custom properties. + # Without this guard, info["energy"] would land in both + # builtins["energy"] (from get_potential_energy) and + # properties["energy"], producing divergent state on round-trip. + if key == "stress": + arr = np.asarray(value) + if arr.shape == (3, 3) and arr.dtype.kind == "f": + builtins["stress"] = arr.astype(np.float64, copy=False) continue coerced = _coerce_property(value, n_atoms) if coerced is not None: @@ -174,10 +180,15 @@ def _extract_ase_record( arrays = getattr(atoms, "arrays", None) if isinstance(arrays, dict): for key, value in arrays.items(): - if key not in {"positions", "numbers"}: - coerced = _coerce_property(value, n_atoms) - if coerced is not None: - properties[key] = coerced + # Skip both ASE-reserved geometry keys ("positions", "numbers") + # and atompack builtin field names. A user who stashes "forces" + # in atoms.arrays must not have it duplicated into both + # builtins["forces"] (from get_forces()) and properties["forces"]. + if key in _ASE_RESERVED_ARRAYS or key in _BUILTIN_FIELDS: + continue + coerced = _coerce_property(value, n_atoms) + if coerced is not None: + properties[key] = coerced calc = getattr(atoms, "calc", None) results = getattr(calc, "results", None) diff --git a/atompack-py/src/molecule_helpers.rs b/atompack-py/src/molecule_helpers.rs index 4512f7f..1f6bdf3 100644 --- a/atompack-py/src/molecule_helpers.rs +++ b/atompack-py/src/molecule_helpers.rs @@ -621,7 +621,22 @@ fn section_is_atom_array(view: &SoaMoleculeView, section: &LazySection) -> bool } fn is_reserved_ase_array_key(key: &str) -> bool { - matches!(key, "numbers" | "positions") + // ASE-reserved geometry keys plus atompack's builtin field names. A custom + // property named "forces" must not be shoveled into atoms.arrays["forces"] + // alongside the calculator-attached forces — that's the to_ase mirror of + // the from_ase duplicate-state bug. + matches!( + key, + "numbers" + | "positions" + | "energy" + | "forces" + | "charges" + | "velocities" + | "cell" + | "stress" + | "pbc" + ) } fn owned_vec3_array<'py>( diff --git a/atompack-py/tests/test_from_ase.py b/atompack-py/tests/test_from_ase.py index 542cdd4..1c477dc 100644 --- a/atompack-py/tests/test_from_ase.py +++ b/atompack-py/tests/test_from_ase.py @@ -192,6 +192,100 @@ def test_from_ase_extracts_arrays_and_calc_results() -> None: mol.get_property("forces") +def test_from_ase_does_not_duplicate_builtins_into_custom_properties() -> None: + # When a user stashes a builtin field name (e.g. "forces", "energy", + # "pbc") inside atoms.arrays or atoms.info, the bridge must NOT also + # store it as a custom property. Otherwise round-tripping produces + # two divergent values per builtin field — one from the calculator + # path and one from the custom-property path. + forces_in_arrays = np.array([[9.9, 9.9, 9.9], [8.8, 8.8, 8.8]], dtype=np.float64) + atoms = FakeASEAtoms( + positions=np.array([[0.0, 0.0, 0.0], [1.0, 0.0, 0.0]], dtype=np.float64), + atomic_numbers=np.array([6, 8], dtype=np.int64), + forces=np.array([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]], dtype=np.float64), + energy=-1.0, + arrays={ + "positions": np.zeros((2, 3), dtype=np.float64), + "numbers": np.zeros((2,), dtype=np.int64), + # Hostile content: same key as a builtin. Should be ignored. + "forces": forces_in_arrays, + "charges": np.array([99.0, 99.0], dtype=np.float64), + # Plus a legitimate custom array. + "tags": np.array([1, 2], dtype=np.int32), + }, + info={ + "energy": -999.0, # collides with builtin + "forces": forces_in_arrays, # collides with builtin + "pbc": (True, True, True), # collides with builtin + # Plus a legitimate custom info key. + "method": "DFT", + }, + ) + + mol = atompack.from_ase(atoms) + + # Builtins come from the calculator/getter path, not from arrays/info. + np.testing.assert_allclose(mol.forces, atoms.forces.astype(np.float32)) + assert mol.energy == pytest.approx(-1.0) + # Legitimate custom keys still flow through. + np.testing.assert_array_equal(mol.get_property("tags"), np.array([1, 2], dtype=np.int32)) + assert mol.get_property("method") == "DFT" + # Builtin keys must NOT show up as custom properties. + for builtin_key in ("forces", "energy", "charges", "velocities", "cell", "stress", "pbc"): + assert not mol.has_property(builtin_key), ( + f"builtin key {builtin_key!r} leaked into custom properties" + ) + + +def test_from_ase_info_override_kwarg_filters_builtins() -> None: + # The same filter must apply to the explicit info= kwarg of from_ase + # (the second caller of _merge_properties), not just atoms.info. + atoms = FakeASEAtoms( + positions=np.array([[0.0, 0.0, 0.0], [1.0, 0.0, 0.0]], dtype=np.float64), + atomic_numbers=np.array([6, 8], dtype=np.int64), + forces=np.array([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]], dtype=np.float64), + energy=-1.0, + ) + mol = atompack.from_ase( + atoms, + info={ + "energy": -777.0, # collides; must be ignored + "forces": np.zeros((2, 3), dtype=np.float64), # collides; must be ignored + "method": "DFT", # legitimate + }, + ) + np.testing.assert_allclose(mol.forces, atoms.forces.astype(np.float32)) + assert mol.energy == pytest.approx(-1.0) + assert mol.get_property("method") == "DFT" + for builtin_key in ("forces", "energy", "charges", "velocities", "cell", "stress", "pbc"): + assert not mol.has_property(builtin_key) + + +def test_to_ase_does_not_duplicate_builtins_in_arrays() -> None: + # to_ase mirror of the from_ase fix: even if a user explicitly stuffs a + # builtin name into custom properties via mol.set_property("forces", ...), + # to_ase must NOT shovel that custom value into atoms.arrays["forces"] + # next to the calculator-attached builtin forces. Otherwise round-tripping + # contaminates the ASE side. + mol = atompack.Molecule.from_arrays( + np.array([[0.0, 0.0, 0.0], [1.0, 0.0, 0.0]], dtype=np.float32), + np.array([6, 8], dtype=np.uint8), + forces=np.array([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]], dtype=np.float32), + energy=-1.0, + ) + # Hostile: name a custom property after a builtin field. + mol.set_property("forces", np.array([[9.9, 9.9, 9.9], [8.8, 8.8, 8.8]], dtype=np.float32)) + mol.set_property("tags", np.array([1, 2], dtype=np.int32)) + + ase_atoms = mol.to_ase() + # Calculator-attached forces are the source of truth. + np.testing.assert_allclose(ase_atoms.get_forces(), mol.forces) + # The hostile custom "forces" must NOT have landed in atoms.arrays. + assert "forces" not in ase_atoms.arrays + # Legitimate non-builtin custom property still flows through. + np.testing.assert_array_equal(ase_atoms.arrays["tags"], np.array([1, 2], dtype=np.int32)) + + def test_add_ase_batch_roundtrip(tmp_path) -> None: path = tmp_path / "ase_batch.atp" atoms_list = [