diff --git a/src/dynsight/_internal/analysis/dim_reduction.py b/src/dynsight/_internal/analysis/dim_reduction.py new file mode 100644 index 00000000..55c5941c --- /dev/null +++ b/src/dynsight/_internal/analysis/dim_reduction.py @@ -0,0 +1,66 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING, Callable + +if TYPE_CHECKING: + from numpy.typing import NDArray + +import numpy as np +from scipy.spatial.distance import cdist + + +def two_nn_estimator( + data: NDArray[np.float64], + metric: Callable[[NDArray[np.float64], NDArray[np.float64]], float], +) -> float: + """Computes the intrinsic dimension with Two-NN estimator. + + See for reference: Facco, E., d'Errico, M., Rodriguez, A. et al., + Sci Rep 7, 12140 (2017). https://doi.org/10.1038/s41598-017-11873-y + + Parameters: + data: + Has shape (n_atoms, n_frames, n_dims) + + metric: + Distance metric for scipy.spatial.distance.cdist. + + Example: + + .. testcode:: 2nn-test + + from sklearn.datasets import make_swiss_roll + import numpy as np + from dynsight.analysis import two_nn_estimator + from scipy.spatial.distance import euclidean + + # Swiss roll is intrinsically 2-dimensional + frame, _ = make_swiss_roll( + n_samples=500, + noise=0.05, + random_state=42, + ) + + # Create artificial trajectory with 3 identical frames + data = np.stack([frame, frame, frame], axis=1) + + intr_dim = two_nn_estimator(data, euclidean) + + .. testcode:: 2nn-test + :hide: + + assert np.isclose(intr_dim, 2.0, rtol=0.02) + """ + list_of_ratios = [] + for time in range(data.shape[1]): + frame = data[:, time, :] + distances = cdist(frame, frame, metric=metric) + sorted_dist = np.sort(distances, axis=1) + d1 = sorted_dist[:, 1] + d2 = sorted_dist[:, 2] + mask = d1 > 0 + mu = d2[mask] / d1[mask] + list_of_ratios.append(mu) + + ratios = np.concatenate(list_of_ratios) + return ratios.size / np.sum(np.log(ratios)) diff --git a/src/dynsight/_internal/lens/lens.py b/src/dynsight/_internal/lens/lens.py index 8358c909..30c7a8bb 100644 --- a/src/dynsight/_internal/lens/lens.py +++ b/src/dynsight/_internal/lens/lens.py @@ -14,7 +14,7 @@ from numba import njit, prange -@njit(cache=True, fastmath=True) # type: ignore[misc] +@njit(cache=True, fastmath=True) # type: ignore[untyped-decorator] def _pbc_diff( dx: NDArray[np.float64], box: NDArray[np.float64], @@ -26,7 +26,7 @@ def _pbc_diff( return dx -@njit(cache=True, fastmath=True) # type: ignore[misc] +@njit(cache=True, fastmath=True) # type: ignore[untyped-decorator] def build_cell_list( positions: NDArray[np.float64], box: NDArray[np.float64], @@ -66,7 +66,7 @@ def build_cell_list( # We need a function this complex and deep for numba to work # This is why we are ignoring ruff complaints C901, PLR0912 -@njit(cache=True, fastmath=True, parallel=True) # type: ignore[misc] +@njit(cache=True, fastmath=True, parallel=True) # type: ignore[untyped-decorator] def neighbor_list_celllist_centers( # noqa: C901, PLR0912 positions_env: NDArray[np.float64], positions_cent: NDArray[np.float64], @@ -147,7 +147,7 @@ def neighbor_list_celllist_centers( # noqa: C901, PLR0912 return indptr, indices -@njit(cache=True, fastmath=True) # type: ignore[misc] +@njit(cache=True, fastmath=True) # type: ignore[untyped-decorator] def lens_from_two_csr( indptr1: NDArray[np.int32], indices1: NDArray[np.int32], diff --git a/src/dynsight/_internal/trajectory/trajectory.py b/src/dynsight/_internal/trajectory/trajectory.py index 03807359..1df2da3c 100644 --- a/src/dynsight/_internal/trajectory/trajectory.py +++ b/src/dynsight/_internal/trajectory/trajectory.py @@ -500,3 +500,42 @@ def dump_colored_trj( f" {y:.5f} {z:.5f} {label}\n" ) logger.log(f"Colored trj saved to {file_path}.") + + def dump_xyz_with_insight( + self, + insight_list: list[Insight], + file_path: Path, + ) -> None: + """Save an .xyz file with the insights values for each atom. + + The output file has columns: atom_type, x, y, z, ins_1, ins_2, ... + """ + trajslice = slice(None) if self.trajslice is None else self.trajslice + + for ins in insight_list: + if ins.dataset.shape != (self.n_atoms, self.n_frames): + msg = ( + f"Shape mismatch: expected ({self.n_atoms}, " + f"{self.n_frames}), but got {ins.dataset.shape}" + ) + logger.log(msg) + raise ValueError(msg) + + with file_path.open("w") as f: + for i, ts in enumerate(self.universe.trajectory[trajslice]): + f.write(f"{self.n_atoms}\n") + if ts.dimensions is not None: + box_str = " ".join(f"{x:.5f}" for x in ts.dimensions) + else: + box_str = "0 0 0 0 0 0" + f.write(f'Lattice="{box_str}" Frame={i}\n') + for n in range(self.n_atoms): + x, y, z = ts.positions[n] + desc_values = [ins.dataset[n, i] for ins in insight_list] + descriptors = " ".join(f"{d:.5f}" for d in desc_values) + f.write( + f"{self.universe.atoms[n].name} " + f"{x:.5f} {y:.5f} {z:.5f} " + f"{descriptors}\n" + ) + logger.log(f"Trajectory with insights saved to {file_path}.") diff --git a/src/dynsight/_internal/utilities/utilities.py b/src/dynsight/_internal/utilities/utilities.py index 43f8363b..55a42780 100644 --- a/src/dynsight/_internal/utilities/utilities.py +++ b/src/dynsight/_internal/utilities/utilities.py @@ -173,3 +173,46 @@ def read_xyz( row += 1 return pd.DataFrame(data) + + +def save_xyz_from_ndarray( + output_path: Path | str, + coords: npt.NDArray[np.float64], + comment_line: str = "# comment line", + atom_type: str = "C", +) -> None: + """Saves a .xyz file with the coordinates from a numpy.ndarray. + + This saves an array of coordinates as an .xyz file. This is useful to + visualize simple systems' trajectory with visualization tools like Ovito. + If your system requires more information that what is stored in a simple + .xyz format, you should consider building a Trj object. + + Parameters: + output_file: + The path to the .xyz output trajectory. + + coords: + The array containing the coordinates of all the particles at each + frame. Has shape (n_frames, n_atoms, 3). + + comment_line: + The second line in each frame. + + atom_type: + The type to assign to all atoms in the trajectory. + """ + if isinstance(output_path, str): + output_path = Path(output_path) + + n_coordinates = 3 + if coords.shape[2] != n_coordinates: + msg = "coords array must have shape (n_frames, n_atoms, 3)." + raise ValueError(msg) + + with output_path.open("w+") as file: + for _, frame in enumerate(coords): + print(coords.shape[1], file=file) + print(comment_line, file=file) + for _, atom in enumerate(frame): + print(atom_type, atom[0], atom[1], atom[2], file=file) diff --git a/src/dynsight/analysis.py b/src/dynsight/analysis.py index a76d3946..ab782215 100644 --- a/src/dynsight/analysis.py +++ b/src/dynsight/analysis.py @@ -1,5 +1,6 @@ """analysis package.""" +from dynsight._internal.analysis.dim_reduction import two_nn_estimator from dynsight._internal.analysis.entropy import ( compute_entropy_gain, compute_entropy_gain_multi, @@ -36,4 +37,5 @@ "self_time_correlation", "shannon", "spatialaverage", + "two_nn_estimator", ] diff --git a/src/dynsight/utilities.py b/src/dynsight/utilities.py index ccd1b422..c480b0d2 100644 --- a/src/dynsight/utilities.py +++ b/src/dynsight/utilities.py @@ -5,6 +5,7 @@ load_or_compute_soap, normalize_array, read_xyz, + save_xyz_from_ndarray, ) __all__ = [ @@ -12,4 +13,5 @@ "load_or_compute_soap", "normalize_array", "read_xyz", + "save_xyz_from_ndarray", ] diff --git a/tests/lens/utilities.py b/tests/lens/utilities.py index 6ba7211c..299a0d0c 100644 --- a/tests/lens/utilities.py +++ b/tests/lens/utilities.py @@ -1,10 +1,10 @@ import re -import numba import numpy as np +from numba import njit -@numba.jit # type: ignore[misc] +@njit # type: ignore[untyped-decorator] def is_sorted(a: np.ndarray, /) -> bool: # type: ignore[type-arg] """Checks if an array is sorted. diff --git a/tests/trajectory/files/insight.xyz b/tests/trajectory/files/insight.xyz new file mode 100644 index 00000000..07be8746 --- /dev/null +++ b/tests/trajectory/files/insight.xyz @@ -0,0 +1,84 @@ +2 +Lattice="0 0 0 0 0 0" Frame=0 +C 8.51500 5.72000 20.20100 1.00000 +C 18.27900 9.65000 15.46500 1.00000 +2 +Lattice="0 0 0 0 0 0" Frame=1 +C 8.84000 5.91000 20.08000 1.00000 +C 17.95500 9.45900 15.58500 1.00000 +2 +Lattice="0 0 0 0 0 0" Frame=2 +C 9.16400 6.10000 19.96000 1.00000 +C 17.63000 9.26900 15.70500 1.00000 +2 +Lattice="0 0 0 0 0 0" Frame=3 +C 9.48900 6.29000 19.84000 1.00000 +C 17.30500 9.07900 15.82500 1.00000 +2 +Lattice="0 0 0 0 0 0" Frame=4 +C 9.81400 6.48000 19.72000 1.00000 +C 16.98100 8.88900 15.94500 1.00000 +2 +Lattice="0 0 0 0 0 0" Frame=5 +C 10.13800 6.67100 19.60000 1.00000 +C 16.65600 8.69900 16.06600 1.00000 +2 +Lattice="0 0 0 0 0 0" Frame=6 +C 10.46300 6.86100 19.47900 1.00000 +C 16.33100 8.50900 16.18600 1.00000 +2 +Lattice="0 0 0 0 0 0" Frame=7 +C 10.78900 7.05100 19.35900 1.00000 +C 16.00600 8.31800 16.30700 1.00000 +2 +Lattice="0 0 0 0 0 0" Frame=8 +C 11.11500 7.24200 19.23800 1.00000 +C 15.68000 8.12800 16.42800 1.00000 +2 +Lattice="0 0 0 0 0 0" Frame=9 +C 11.44300 7.43300 19.11500 1.00000 +C 15.35200 7.93700 16.55000 1.00000 +2 +Lattice="0 0 0 0 0 0" Frame=10 +C 11.77600 7.62400 18.99000 1.00000 +C 15.01900 7.74500 16.67600 1.00000 +2 +Lattice="0 0 0 0 0 0" Frame=11 +C 12.12400 7.81600 18.85300 1.00000 +C 14.67000 7.55400 16.81200 1.00000 +2 +Lattice="0 0 0 0 0 0" Frame=12 +C 12.49800 8.00500 18.69600 1.00000 +C 14.29700 7.36400 16.96900 1.00000 +2 +Lattice="0 0 0 0 0 0" Frame=13 +C 12.66000 8.29900 18.76000 1.00000 +C 14.13400 7.07100 16.90600 1.00000 +2 +Lattice="0 0 0 0 0 0" Frame=14 +C 12.67300 8.68100 18.99000 1.00000 +C 14.12100 6.68900 16.67600 1.00000 +2 +Lattice="0 0 0 0 0 0" Frame=15 +C 12.70100 9.04300 19.19500 1.00000 +C 14.09300 6.32700 16.47000 1.00000 +2 +Lattice="0 0 0 0 0 0" Frame=16 +C 12.73500 9.39400 19.39000 1.00000 +C 14.06000 5.97600 16.27500 1.00000 +2 +Lattice="0 0 0 0 0 0" Frame=17 +C 12.77000 9.74100 19.58100 1.00000 +C 14.02400 5.62800 16.08400 1.00000 +2 +Lattice="0 0 0 0 0 0" Frame=18 +C 12.80600 10.08700 19.77200 1.00000 +C 13.98900 5.28200 15.89400 1.00000 +2 +Lattice="0 0 0 0 0 0" Frame=19 +C 12.84200 10.43300 19.96100 1.00000 +C 13.95300 4.93700 15.70400 1.00000 +2 +Lattice="0 0 0 0 0 0" Frame=20 +C 12.87800 10.77800 20.15100 1.00000 +C 13.91700 4.59200 15.51500 1.00000 diff --git a/tests/trajectory/files/labels.xyz b/tests/trajectory/files/labels.xyz new file mode 100644 index 00000000..9f69b87c --- /dev/null +++ b/tests/trajectory/files/labels.xyz @@ -0,0 +1,84 @@ +2 +Lattice=0.0 0.0 0.0 0.0 0.0 0.0 Properties=species:S:1:pos:R:3:type:I:1 +C 8.51500 5.72000 20.20100 1 +C 18.27900 9.65000 15.46500 1 +2 +Lattice=0.0 0.0 0.0 0.0 0.0 0.0 Properties=species:S:1:pos:R:3:type:I:1 +C 8.84000 5.91000 20.08000 1 +C 17.95500 9.45900 15.58500 1 +2 +Lattice=0.0 0.0 0.0 0.0 0.0 0.0 Properties=species:S:1:pos:R:3:type:I:1 +C 9.16400 6.10000 19.96000 1 +C 17.63000 9.26900 15.70500 1 +2 +Lattice=0.0 0.0 0.0 0.0 0.0 0.0 Properties=species:S:1:pos:R:3:type:I:1 +C 9.48900 6.29000 19.84000 1 +C 17.30500 9.07900 15.82500 1 +2 +Lattice=0.0 0.0 0.0 0.0 0.0 0.0 Properties=species:S:1:pos:R:3:type:I:1 +C 9.81400 6.48000 19.72000 1 +C 16.98100 8.88900 15.94500 1 +2 +Lattice=0.0 0.0 0.0 0.0 0.0 0.0 Properties=species:S:1:pos:R:3:type:I:1 +C 10.13800 6.67100 19.60000 1 +C 16.65600 8.69900 16.06600 1 +2 +Lattice=0.0 0.0 0.0 0.0 0.0 0.0 Properties=species:S:1:pos:R:3:type:I:1 +C 10.46300 6.86100 19.47900 1 +C 16.33100 8.50900 16.18600 1 +2 +Lattice=0.0 0.0 0.0 0.0 0.0 0.0 Properties=species:S:1:pos:R:3:type:I:1 +C 10.78900 7.05100 19.35900 1 +C 16.00600 8.31800 16.30700 1 +2 +Lattice=0.0 0.0 0.0 0.0 0.0 0.0 Properties=species:S:1:pos:R:3:type:I:1 +C 11.11500 7.24200 19.23800 1 +C 15.68000 8.12800 16.42800 1 +2 +Lattice=0.0 0.0 0.0 0.0 0.0 0.0 Properties=species:S:1:pos:R:3:type:I:1 +C 11.44300 7.43300 19.11500 1 +C 15.35200 7.93700 16.55000 1 +2 +Lattice=0.0 0.0 0.0 0.0 0.0 0.0 Properties=species:S:1:pos:R:3:type:I:1 +C 11.77600 7.62400 18.99000 1 +C 15.01900 7.74500 16.67600 1 +2 +Lattice=0.0 0.0 0.0 0.0 0.0 0.0 Properties=species:S:1:pos:R:3:type:I:1 +C 12.12400 7.81600 18.85300 1 +C 14.67000 7.55400 16.81200 1 +2 +Lattice=0.0 0.0 0.0 0.0 0.0 0.0 Properties=species:S:1:pos:R:3:type:I:1 +C 12.49800 8.00500 18.69600 1 +C 14.29700 7.36400 16.96900 1 +2 +Lattice=0.0 0.0 0.0 0.0 0.0 0.0 Properties=species:S:1:pos:R:3:type:I:1 +C 12.66000 8.29900 18.76000 1 +C 14.13400 7.07100 16.90600 1 +2 +Lattice=0.0 0.0 0.0 0.0 0.0 0.0 Properties=species:S:1:pos:R:3:type:I:1 +C 12.67300 8.68100 18.99000 1 +C 14.12100 6.68900 16.67600 1 +2 +Lattice=0.0 0.0 0.0 0.0 0.0 0.0 Properties=species:S:1:pos:R:3:type:I:1 +C 12.70100 9.04300 19.19500 1 +C 14.09300 6.32700 16.47000 1 +2 +Lattice=0.0 0.0 0.0 0.0 0.0 0.0 Properties=species:S:1:pos:R:3:type:I:1 +C 12.73500 9.39400 19.39000 1 +C 14.06000 5.97600 16.27500 1 +2 +Lattice=0.0 0.0 0.0 0.0 0.0 0.0 Properties=species:S:1:pos:R:3:type:I:1 +C 12.77000 9.74100 19.58100 1 +C 14.02400 5.62800 16.08400 1 +2 +Lattice=0.0 0.0 0.0 0.0 0.0 0.0 Properties=species:S:1:pos:R:3:type:I:1 +C 12.80600 10.08700 19.77200 1 +C 13.98900 5.28200 15.89400 1 +2 +Lattice=0.0 0.0 0.0 0.0 0.0 0.0 Properties=species:S:1:pos:R:3:type:I:1 +C 12.84200 10.43300 19.96100 1 +C 13.95300 4.93700 15.70400 1 +2 +Lattice=0.0 0.0 0.0 0.0 0.0 0.0 Properties=species:S:1:pos:R:3:type:I:1 +C 12.87800 10.77800 20.15100 1 +C 13.91700 4.59200 15.51500 1 diff --git a/tests/trajectory/test_trj.py b/tests/trajectory/test_trj.py index a8bf5589..650eabfc 100644 --- a/tests/trajectory/test_trj.py +++ b/tests/trajectory/test_trj.py @@ -2,6 +2,7 @@ from __future__ import annotations +import filecmp from pathlib import Path import MDAnalysis @@ -183,3 +184,34 @@ def test_insight_load_errors(file_paths: dict[str, Path]) -> None: ) logger.get() + + +def test_dump_xyz_functions( + tmp_path: Path, + universe: MDAnalysis.Universe, + file_paths: dict[str, Path], +) -> None: + """Test methods to dump Trj to .xyz files.""" + trj = Trj(universe) + _, insight = trj.get_coord_number(r_cut=10.0) + onion_ins = insight.get_onion_smooth(delta_t=5) + + trj.dump_colored_trj( + labels=onion_ins.labels, + file_path=tmp_path / "labels.xyz", + ) + assert filecmp.cmp( + tmp_path / "labels.xyz", + file_paths["files_dir"] / "labels.xyz", + shallow=False, + ) + + trj.dump_xyz_with_insight( + insight_list=[insight], + file_path=tmp_path / "insight.xyz", + ) + assert filecmp.cmp( + tmp_path / "insight.xyz", + file_paths["files_dir"] / "insight.xyz", + shallow=False, + ) diff --git a/tests/utilities/test_utilities.py b/tests/utilities/test_utilities.py index ec03e65d..6d8574b0 100644 --- a/tests/utilities/test_utilities.py +++ b/tests/utilities/test_utilities.py @@ -1,9 +1,13 @@ """Pytest for dynsight.utilities.""" +from pathlib import Path + import numpy as np +import pytest from dynsight.utilities import ( find_extrema_points, + save_xyz_from_ndarray, ) @@ -30,3 +34,22 @@ def test_find_extrema_points() -> None: assert np.allclose(min_points, expected_min, atol=1e-2, rtol=1e-2) assert np.allclose(max_points, expected_max, atol=1e-2, rtol=1e-2) + + +def test_save_xyz_from_array(tmp_path: Path) -> None: + output_path = tmp_path / "tmp.xyz" + rng = np.random.default_rng(42) + coords = rng.random((10, 10, 3)) + save_xyz_from_ndarray( + output_path=output_path, + coords=coords, + ) + coords = rng.random((10, 10, 2)) + with pytest.raises( + ValueError, + match=r"coords array must have shape \(n_frames, n_atoms, 3\)\.", + ): + save_xyz_from_ndarray( + output_path=output_path, + coords=coords, + )