Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 19 additions & 8 deletions atompack-py/python/atompack/ase_bridge.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,10 +81,16 @@ def _coerce_property(value, n_atoms):

def _merge_properties(properties, builtins, values, n_atoms):
for key, value in values.items():
if key == "stress":
arr = np.asarray(value)
if arr.shape == (3, 3) and arr.dtype.kind == "f":
builtins["stress"] = arr.astype(np.float64, copy=False)
if key in _BUILTIN_FIELDS:
# Builtin keys in atoms.info / info-override go to the builtins
# dict (when shape/dtype matches), never into custom properties.
# Without this guard, info["energy"] would land in both
# builtins["energy"] (from get_potential_energy) and
# properties["energy"], producing divergent state on round-trip.
if key == "stress":
arr = np.asarray(value)
if arr.shape == (3, 3) and arr.dtype.kind == "f":
builtins["stress"] = arr.astype(np.float64, copy=False)
continue
coerced = _coerce_property(value, n_atoms)
if coerced is not None:
Expand Down Expand Up @@ -174,10 +180,15 @@ def _extract_ase_record(
arrays = getattr(atoms, "arrays", None)
if isinstance(arrays, dict):
for key, value in arrays.items():
if key not in {"positions", "numbers"}:
coerced = _coerce_property(value, n_atoms)
if coerced is not None:
properties[key] = coerced
# Skip both ASE-reserved geometry keys ("positions", "numbers")
# and atompack builtin field names. A user who stashes "forces"
# in atoms.arrays must not have it duplicated into both
# builtins["forces"] (from get_forces()) and properties["forces"].
if key in _ASE_RESERVED_ARRAYS or key in _BUILTIN_FIELDS:
continue
coerced = _coerce_property(value, n_atoms)
if coerced is not None:
properties[key] = coerced

calc = getattr(atoms, "calc", None)
results = getattr(calc, "results", None)
Expand Down
17 changes: 16 additions & 1 deletion atompack-py/src/molecule_helpers.rs
Original file line number Diff line number Diff line change
Expand Up @@ -621,7 +621,22 @@ fn section_is_atom_array(view: &SoaMoleculeView, section: &LazySection) -> bool
}

fn is_reserved_ase_array_key(key: &str) -> bool {
matches!(key, "numbers" | "positions")
// ASE-reserved geometry keys plus atompack's builtin field names. A custom
// property named "forces" must not be shoveled into atoms.arrays["forces"]
// alongside the calculator-attached forces — that's the to_ase mirror of
// the from_ase duplicate-state bug.
matches!(
key,
"numbers"
| "positions"
| "energy"
| "forces"
| "charges"
| "velocities"
| "cell"
| "stress"
| "pbc"
)
}

fn owned_vec3_array<'py>(
Expand Down
94 changes: 94 additions & 0 deletions atompack-py/tests/test_from_ase.py
Original file line number Diff line number Diff line change
Expand Up @@ -192,6 +192,100 @@ def test_from_ase_extracts_arrays_and_calc_results() -> None:
mol.get_property("forces")


def test_from_ase_does_not_duplicate_builtins_into_custom_properties() -> None:
# When a user stashes a builtin field name (e.g. "forces", "energy",
# "pbc") inside atoms.arrays or atoms.info, the bridge must NOT also
# store it as a custom property. Otherwise round-tripping produces
# two divergent values per builtin field — one from the calculator
# path and one from the custom-property path.
forces_in_arrays = np.array([[9.9, 9.9, 9.9], [8.8, 8.8, 8.8]], dtype=np.float64)
atoms = FakeASEAtoms(
positions=np.array([[0.0, 0.0, 0.0], [1.0, 0.0, 0.0]], dtype=np.float64),
atomic_numbers=np.array([6, 8], dtype=np.int64),
forces=np.array([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]], dtype=np.float64),
energy=-1.0,
arrays={
"positions": np.zeros((2, 3), dtype=np.float64),
"numbers": np.zeros((2,), dtype=np.int64),
# Hostile content: same key as a builtin. Should be ignored.
"forces": forces_in_arrays,
"charges": np.array([99.0, 99.0], dtype=np.float64),
# Plus a legitimate custom array.
"tags": np.array([1, 2], dtype=np.int32),
},
info={
"energy": -999.0, # collides with builtin
"forces": forces_in_arrays, # collides with builtin
"pbc": (True, True, True), # collides with builtin
# Plus a legitimate custom info key.
"method": "DFT",
},
)

mol = atompack.from_ase(atoms)

# Builtins come from the calculator/getter path, not from arrays/info.
np.testing.assert_allclose(mol.forces, atoms.forces.astype(np.float32))
assert mol.energy == pytest.approx(-1.0)
# Legitimate custom keys still flow through.
np.testing.assert_array_equal(mol.get_property("tags"), np.array([1, 2], dtype=np.int32))
assert mol.get_property("method") == "DFT"
# Builtin keys must NOT show up as custom properties.
for builtin_key in ("forces", "energy", "charges", "velocities", "cell", "stress", "pbc"):
assert not mol.has_property(builtin_key), (
f"builtin key {builtin_key!r} leaked into custom properties"
)


def test_from_ase_info_override_kwarg_filters_builtins() -> None:
# The same filter must apply to the explicit info= kwarg of from_ase
# (the second caller of _merge_properties), not just atoms.info.
atoms = FakeASEAtoms(
positions=np.array([[0.0, 0.0, 0.0], [1.0, 0.0, 0.0]], dtype=np.float64),
atomic_numbers=np.array([6, 8], dtype=np.int64),
forces=np.array([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]], dtype=np.float64),
energy=-1.0,
)
mol = atompack.from_ase(
atoms,
info={
"energy": -777.0, # collides; must be ignored
"forces": np.zeros((2, 3), dtype=np.float64), # collides; must be ignored
"method": "DFT", # legitimate
},
)
np.testing.assert_allclose(mol.forces, atoms.forces.astype(np.float32))
assert mol.energy == pytest.approx(-1.0)
assert mol.get_property("method") == "DFT"
for builtin_key in ("forces", "energy", "charges", "velocities", "cell", "stress", "pbc"):
assert not mol.has_property(builtin_key)


def test_to_ase_does_not_duplicate_builtins_in_arrays() -> None:
# to_ase mirror of the from_ase fix: even if a user explicitly stuffs a
# builtin name into custom properties via mol.set_property("forces", ...),
# to_ase must NOT shovel that custom value into atoms.arrays["forces"]
# next to the calculator-attached builtin forces. Otherwise round-tripping
# contaminates the ASE side.
mol = atompack.Molecule.from_arrays(
np.array([[0.0, 0.0, 0.0], [1.0, 0.0, 0.0]], dtype=np.float32),
np.array([6, 8], dtype=np.uint8),
forces=np.array([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]], dtype=np.float32),
energy=-1.0,
)
# Hostile: name a custom property after a builtin field.
mol.set_property("forces", np.array([[9.9, 9.9, 9.9], [8.8, 8.8, 8.8]], dtype=np.float32))
mol.set_property("tags", np.array([1, 2], dtype=np.int32))

ase_atoms = mol.to_ase()
# Calculator-attached forces are the source of truth.
np.testing.assert_allclose(ase_atoms.get_forces(), mol.forces)
# The hostile custom "forces" must NOT have landed in atoms.arrays.
assert "forces" not in ase_atoms.arrays
# Legitimate non-builtin custom property still flows through.
np.testing.assert_array_equal(ase_atoms.arrays["tags"], np.array([1, 2], dtype=np.int32))


def test_add_ase_batch_roundtrip(tmp_path) -> None:
path = tmp_path / "ase_batch.atp"
atoms_list = [
Expand Down
Loading