Skip to content
66 changes: 66 additions & 0 deletions src/dynsight/_internal/analysis/dim_reduction.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
from __future__ import annotations

from typing import TYPE_CHECKING, Callable

if TYPE_CHECKING:
from numpy.typing import NDArray

import numpy as np
from scipy.spatial.distance import cdist


def two_nn_estimator(
Comment thread
matteobecchi marked this conversation as resolved.
data: NDArray[np.float64],
metric: Callable[[NDArray[np.float64], NDArray[np.float64]], float],
) -> float:
"""Computes the intrinsic dimension with Two-NN estimator.

See for reference: Facco, E., d'Errico, M., Rodriguez, A. et al.,
Sci Rep 7, 12140 (2017). https://doi.org/10.1038/s41598-017-11873-y

Parameters:
data:
Has shape (n_atoms, n_frames, n_dims)

metric:
Distance metric for scipy.spatial.distance.cdist.

Example:

.. testcode:: 2nn-test

from sklearn.datasets import make_swiss_roll
import numpy as np
from dynsight.analysis import two_nn_estimator
from scipy.spatial.distance import euclidean

# Swiss roll is intrinsically 2-dimensional
frame, _ = make_swiss_roll(
n_samples=500,
noise=0.05,
random_state=42,
)

# Create artificial trajectory with 3 identical frames
data = np.stack([frame, frame, frame], axis=1)

intr_dim = two_nn_estimator(data, euclidean)

.. testcode:: 2nn-test
:hide:

assert np.isclose(intr_dim, 2.0, rtol=0.02)
"""
list_of_ratios = []
for time in range(data.shape[1]):
frame = data[:, time, :]
distances = cdist(frame, frame, metric=metric)
sorted_dist = np.sort(distances, axis=1)
d1 = sorted_dist[:, 1]
d2 = sorted_dist[:, 2]
mask = d1 > 0
mu = d2[mask] / d1[mask]
list_of_ratios.append(mu)

ratios = np.concatenate(list_of_ratios)
return ratios.size / np.sum(np.log(ratios))
8 changes: 4 additions & 4 deletions src/dynsight/_internal/lens/lens.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
from numba import njit, prange


@njit(cache=True, fastmath=True) # type: ignore[misc]
@njit(cache=True, fastmath=True) # type: ignore[untyped-decorator]
def _pbc_diff(
dx: NDArray[np.float64],
box: NDArray[np.float64],
Expand All @@ -26,7 +26,7 @@ def _pbc_diff(
return dx


@njit(cache=True, fastmath=True) # type: ignore[misc]
@njit(cache=True, fastmath=True) # type: ignore[untyped-decorator]
def build_cell_list(
positions: NDArray[np.float64],
box: NDArray[np.float64],
Expand Down Expand Up @@ -66,7 +66,7 @@ def build_cell_list(

# We need a function this complex and deep for numba to work
# This is why we are ignoring ruff complaints C901, PLR0912
@njit(cache=True, fastmath=True, parallel=True) # type: ignore[misc]
@njit(cache=True, fastmath=True, parallel=True) # type: ignore[untyped-decorator]
def neighbor_list_celllist_centers( # noqa: C901, PLR0912
positions_env: NDArray[np.float64],
positions_cent: NDArray[np.float64],
Expand Down Expand Up @@ -147,7 +147,7 @@ def neighbor_list_celllist_centers( # noqa: C901, PLR0912
return indptr, indices


@njit(cache=True, fastmath=True) # type: ignore[misc]
@njit(cache=True, fastmath=True) # type: ignore[untyped-decorator]
def lens_from_two_csr(
indptr1: NDArray[np.int32],
indices1: NDArray[np.int32],
Expand Down
39 changes: 39 additions & 0 deletions src/dynsight/_internal/trajectory/trajectory.py
Original file line number Diff line number Diff line change
Expand Up @@ -500,3 +500,42 @@ def dump_colored_trj(
f" {y:.5f} {z:.5f} {label}\n"
)
logger.log(f"Colored trj saved to {file_path}.")

def dump_xyz_with_insight(
Comment thread
matteobecchi marked this conversation as resolved.
self,
insight_list: list[Insight],
file_path: Path,
) -> None:
"""Save an .xyz file with the insights values for each atom.

The output file has columns: atom_type, x, y, z, ins_1, ins_2, ...
"""
trajslice = slice(None) if self.trajslice is None else self.trajslice

for ins in insight_list:
if ins.dataset.shape != (self.n_atoms, self.n_frames):
msg = (
f"Shape mismatch: expected ({self.n_atoms}, "
f"{self.n_frames}), but got {ins.dataset.shape}"
)
logger.log(msg)
raise ValueError(msg)

with file_path.open("w") as f:
for i, ts in enumerate(self.universe.trajectory[trajslice]):
f.write(f"{self.n_atoms}\n")
if ts.dimensions is not None:
box_str = " ".join(f"{x:.5f}" for x in ts.dimensions)
else:
box_str = "0 0 0 0 0 0"
f.write(f'Lattice="{box_str}" Frame={i}\n')
for n in range(self.n_atoms):
x, y, z = ts.positions[n]
desc_values = [ins.dataset[n, i] for ins in insight_list]
descriptors = " ".join(f"{d:.5f}" for d in desc_values)
f.write(
f"{self.universe.atoms[n].name} "
f"{x:.5f} {y:.5f} {z:.5f} "
f"{descriptors}\n"
)
logger.log(f"Trajectory with insights saved to {file_path}.")
43 changes: 43 additions & 0 deletions src/dynsight/_internal/utilities/utilities.py
Original file line number Diff line number Diff line change
Expand Up @@ -173,3 +173,46 @@ def read_xyz(
row += 1

return pd.DataFrame(data)


def save_xyz_from_ndarray(
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe make a point more on the fact that we mean position coordinate (that must be (frame, atoms ,3)?
Or extend the function to create a general XYZ with fewer or more than 3 columns? The user in that case must specify the identity of each column (like Ovito).

Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

just a thought: In this case using pandas could help

Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I specified better what this function is for, and added a note suggesting to use Trj if the system has more complexity.

Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Surely pandas give more control but I don't want to force the user to use pandas... for instance I have no idea how to use it. Is this bad? -.-"

Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's not so difficult to use, and it contains some useful utilities for organizing datasets (remove cols, add cols, add rows). But anyway, it was just a possibility, keep this one.

Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Let's not bring pandas into this, and if we do, we will use polars (pandas in rust).

Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There's always another, better version of everything...

output_path: Path | str,
coords: npt.NDArray[np.float64],
comment_line: str = "# comment line",
atom_type: str = "C",
) -> None:
"""Saves a .xyz file with the coordinates from a numpy.ndarray.

This saves an array of coordinates as an .xyz file. This is useful to
visualize simple systems' trajectory with visualization tools like Ovito.
If your system requires more information that what is stored in a simple
.xyz format, you should consider building a Trj object.

Parameters:
output_file:
The path to the .xyz output trajectory.

coords:
The array containing the coordinates of all the particles at each
frame. Has shape (n_frames, n_atoms, 3).

comment_line:
The second line in each frame.

atom_type:
The type to assign to all atoms in the trajectory.
"""
if isinstance(output_path, str):
output_path = Path(output_path)

n_coordinates = 3
if coords.shape[2] != n_coordinates:
msg = "coords array must have shape (n_frames, n_atoms, 3)."
raise ValueError(msg)

with output_path.open("w+") as file:
for _, frame in enumerate(coords):
print(coords.shape[1], file=file)
print(comment_line, file=file)
for _, atom in enumerate(frame):
print(atom_type, atom[0], atom[1], atom[2], file=file)
2 changes: 2 additions & 0 deletions src/dynsight/analysis.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
"""analysis package."""

from dynsight._internal.analysis.dim_reduction import two_nn_estimator
from dynsight._internal.analysis.entropy import (
compute_entropy_gain,
compute_entropy_gain_multi,
Expand Down Expand Up @@ -36,4 +37,5 @@
"self_time_correlation",
"shannon",
"spatialaverage",
"two_nn_estimator",
]
2 changes: 2 additions & 0 deletions src/dynsight/utilities.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,13 @@
load_or_compute_soap,
normalize_array,
read_xyz,
save_xyz_from_ndarray,
)

__all__ = [
"find_extrema_points",
"load_or_compute_soap",
"normalize_array",
"read_xyz",
"save_xyz_from_ndarray",
]
4 changes: 2 additions & 2 deletions tests/lens/utilities.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
import re

import numba
import numpy as np
from numba import njit


@numba.jit # type: ignore[misc]
@njit # type: ignore[untyped-decorator]
def is_sorted(a: np.ndarray, /) -> bool: # type: ignore[type-arg]
"""Checks if an array is sorted.

Expand Down
84 changes: 84 additions & 0 deletions tests/trajectory/files/insight.xyz
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
2
Lattice="0 0 0 0 0 0" Frame=0
C 8.51500 5.72000 20.20100 1.00000
C 18.27900 9.65000 15.46500 1.00000
2
Lattice="0 0 0 0 0 0" Frame=1
C 8.84000 5.91000 20.08000 1.00000
C 17.95500 9.45900 15.58500 1.00000
2
Lattice="0 0 0 0 0 0" Frame=2
C 9.16400 6.10000 19.96000 1.00000
C 17.63000 9.26900 15.70500 1.00000
2
Lattice="0 0 0 0 0 0" Frame=3
C 9.48900 6.29000 19.84000 1.00000
C 17.30500 9.07900 15.82500 1.00000
2
Lattice="0 0 0 0 0 0" Frame=4
C 9.81400 6.48000 19.72000 1.00000
C 16.98100 8.88900 15.94500 1.00000
2
Lattice="0 0 0 0 0 0" Frame=5
C 10.13800 6.67100 19.60000 1.00000
C 16.65600 8.69900 16.06600 1.00000
2
Lattice="0 0 0 0 0 0" Frame=6
C 10.46300 6.86100 19.47900 1.00000
C 16.33100 8.50900 16.18600 1.00000
2
Lattice="0 0 0 0 0 0" Frame=7
C 10.78900 7.05100 19.35900 1.00000
C 16.00600 8.31800 16.30700 1.00000
2
Lattice="0 0 0 0 0 0" Frame=8
C 11.11500 7.24200 19.23800 1.00000
C 15.68000 8.12800 16.42800 1.00000
2
Lattice="0 0 0 0 0 0" Frame=9
C 11.44300 7.43300 19.11500 1.00000
C 15.35200 7.93700 16.55000 1.00000
2
Lattice="0 0 0 0 0 0" Frame=10
C 11.77600 7.62400 18.99000 1.00000
C 15.01900 7.74500 16.67600 1.00000
2
Lattice="0 0 0 0 0 0" Frame=11
C 12.12400 7.81600 18.85300 1.00000
C 14.67000 7.55400 16.81200 1.00000
2
Lattice="0 0 0 0 0 0" Frame=12
C 12.49800 8.00500 18.69600 1.00000
C 14.29700 7.36400 16.96900 1.00000
2
Lattice="0 0 0 0 0 0" Frame=13
C 12.66000 8.29900 18.76000 1.00000
C 14.13400 7.07100 16.90600 1.00000
2
Lattice="0 0 0 0 0 0" Frame=14
C 12.67300 8.68100 18.99000 1.00000
C 14.12100 6.68900 16.67600 1.00000
2
Lattice="0 0 0 0 0 0" Frame=15
C 12.70100 9.04300 19.19500 1.00000
C 14.09300 6.32700 16.47000 1.00000
2
Lattice="0 0 0 0 0 0" Frame=16
C 12.73500 9.39400 19.39000 1.00000
C 14.06000 5.97600 16.27500 1.00000
2
Lattice="0 0 0 0 0 0" Frame=17
C 12.77000 9.74100 19.58100 1.00000
C 14.02400 5.62800 16.08400 1.00000
2
Lattice="0 0 0 0 0 0" Frame=18
C 12.80600 10.08700 19.77200 1.00000
C 13.98900 5.28200 15.89400 1.00000
2
Lattice="0 0 0 0 0 0" Frame=19
C 12.84200 10.43300 19.96100 1.00000
C 13.95300 4.93700 15.70400 1.00000
2
Lattice="0 0 0 0 0 0" Frame=20
C 12.87800 10.77800 20.15100 1.00000
C 13.91700 4.59200 15.51500 1.00000
Loading