diff --git a/.gitignore b/.gitignore index bc788ed5..ff34315b 100644 --- a/.gitignore +++ b/.gitignore @@ -9,6 +9,7 @@ docs/source/_autosummary examples/onion_analysis/data.json examples/analysis_workflow/lens.json examples/analysis_workflow/onion.json +examples/analysis_workflow/.oxygens.xtc_offsets.* examples/analysis_workflow/colored_trj.xyz examples/info_gain/trj_*.npy tests/systems/coex/.* diff --git a/docs/source/_static/recipes/soap_dim_red.py b/docs/source/_static/recipes/soap_dim_red.py index b93d6314..5b600fd6 100644 --- a/docs/source/_static/recipes/soap_dim_red.py +++ b/docs/source/_static/recipes/soap_dim_red.py @@ -84,7 +84,7 @@ def compute_soap_tica( soap_path=soap_path, ) - rel_times, _, tica_ds = dynsight.tica.many_body_tica( + rel_times, _, tica_ds = dynsight.descriptors.many_body_tica( soap.dataset, lag_time=lag_time, tica_dim=tica_dim, diff --git a/docs/source/analysis.rst b/docs/source/analysis.rst index 45fd5016..7f5fd089 100644 --- a/docs/source/analysis.rst +++ b/docs/source/analysis.rst @@ -3,17 +3,30 @@ analysis The ``analysis`` modulus contains functions for miscellaneous analyses on many-body trajectories. -Functions ---------- + +Entropy +------- + +The ``analysis`` module offers a variety of functions for entropy- and +information-based calculations. .. toctree:: :maxdepth: 1 compute_shannon <_autosummary/dynsight.analysis.compute_shannon> + compute_kl_entropy <_autosummary/dynsight.analysis.compute_kl_entropy> + compute_negentropy <_autosummary/dynsight.analysis.compute_negentropy> compute_entropy_gain <_autosummary/dynsight.analysis.compute_entropy_gain> compute_shannon_multi <_autosummary/dynsight.analysis.compute_shannon_multi> compute_entropy_gain_multi <_autosummary/dynsight.analysis.compute_entropy_gain_multi> sample_entropy <_autosummary/dynsight.analysis.sample_entropy> + +Other functions +--------------- + +.. toctree:: + :maxdepth: 1 + compute_rdf <_autosummary/dynsight.analysis.compute_rdf> self_time_correlation <_autosummary/dynsight.analysis.self_time_correlation> cross_time_correlation <_autosummary/dynsight.analysis.cross_time_correlation> diff --git a/docs/source/descriptors.rst b/docs/source/descriptors.rst new file mode 100644 index 00000000..1c3217e3 --- /dev/null +++ b/docs/source/descriptors.rst @@ -0,0 +1,50 @@ +Descriptors +=========== + +.. toctree:: + :maxdepth: 1 + + SOAP + timeSOAP + LENS + +tICA +---- + +.. toctree:: + :maxdepth: 1 + + many_body_tica <_autosummary/dynsight.descriptors.many_body_tica> + +tICA (time-lagged Independent Component Analysis) is a dimensionality reduction method. Time-series data are mapped to components characterizing the slowest processes, by maximizing the data correlation function at a given lag-time. + +This module allows to perform tICA on trajectories from many-body systems, where the observables under analysis are single-particle descriptors, which should not be mixed within the tICs. + +This module uses the TICA class from the deeptime.decomposition package (see the `documentation page `_). +:mod:`deeptime` requires numpy <= 2.1. + + +Velocity alignment +------------------ + +.. toctree:: + :maxdepth: 1 + + velocity_alignment <_autosummary/dynsight.descriptors.velocity_alignment> + +Computes the average velocity alignment between the central particle and the +neighboring ones. The alignment is computed as the cosine between the velocities. +Thus, the output is a number between 1 (perfect alignment) and -1 (perfect +anti-alignment). + +Orinetational order parameter +----------------------------- + +.. toctree:: + :maxdepth: 1 + + orientational_order_param <_autosummary/dynsight.descriptors.orientational_order_param> + +Computes orientational order parameter for the neighbors of each atom in the +trajectory. The output is a real number between 0 and 1, where 1 corresponds +to a perfect order, and 0 to completely random positions. diff --git a/docs/source/index.rst b/docs/source/index.rst index e68865ab..a4d0e965 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -1,15 +1,17 @@ +.. toctree:: + :hidden: + :maxdepth: 1 + + How to get started + .. toctree:: :hidden: :caption: dynsight :maxdepth: 2 - trajectory - SOAP - timeSOAP - LENS - tICA - onion clustering vision + descriptors + onion clustering analysis data processing HDF5er diff --git a/docs/source/soap_dim_red.rst b/docs/source/soap_dim_red.rst index c230b678..59740a65 100644 --- a/docs/source/soap_dim_red.rst +++ b/docs/source/soap_dim_red.rst @@ -121,7 +121,7 @@ More details on the algorithm here: .. toctree:: :maxdepth: 1 - many_body_tica <_autosummary/dynsight.tica.many_body_tica> + many_body_tica <_autosummary/dynsight.descriptors.many_body_tica> This function takes as input a :class:`.trajectory.Trj` and all the relevant parameters, and performs the TICA of the corresponding SOAP dataset. @@ -160,7 +160,7 @@ parameters, and performs the TICA of the corresponding SOAP dataset. soap_path=soap_path, ) - rel_times, _, tica_ds = dynsight.tica.many_body_tica( + rel_times, _, tica_ds = dynsight.descriptors.many_body_tica( soap.dataset, lag_time=lag_time, tica_dim=tica_dim, diff --git a/docs/source/tica.rst b/docs/source/tica.rst deleted file mode 100644 index 8ddac6fc..00000000 --- a/docs/source/tica.rst +++ /dev/null @@ -1,27 +0,0 @@ -tICA -==== - -tICA (time-lagged Independent Component Analysis) is a dimensionality reduction method. Time-series data are mapped to components characterizing the slowest processes, by maximizing the data correlation function at a given lag-time. - -This module allows to perform tICA on trajectories from many-body systems, where the observables under analysis are single-particle descriptors, which should not be mixed within the tICs. - -Installation ------------- - -This module uses the TICA class from the deeptime.decomposition package (see the `documentation page `_). You will need to install :mod:`deeptime`. This can be done with with pip:: - - $ pip install deeptime - -or with conda:: - - $ conda install -c conda-forge deeptime - -:mod:`deeptime` requires numpy <= 2.1. - -Functions ---------- - -.. toctree:: - :maxdepth: 1 - - many_body_tica <_autosummary/dynsight.tica.many_body_tica> diff --git a/docs/source/trajectory.rst b/docs/source/trajectory.rst index 07a3f4df..c2488627 100644 --- a/docs/source/trajectory.rst +++ b/docs/source/trajectory.rst @@ -1,19 +1,46 @@ -trajectory -========== +How to get started: the ``trajectory`` module +============================================= + +The easiest way to use ``dynsight`` is via the ``trajectory`` module, which +offers an object-oriented implementation of most of the functions and utilities, +to facilitate the workflow of any analysis. -This module offers an object-oriented implementation of the ``dynsight`` -utilities, to facilitate the workflow of any analysis. Moreover, the ``trajectory`` classes make it easier to save and share results, -and have a built-in logging module. +and has a built-in logging module. The :class:`.trajectory.Trj` class is a container for a MDAnalysis.Universe, -which allows for the computation of all the descriptors implemented in ``dynsight``. +which has methods for the computation of all the descriptors implemented in +``dynsight``. These descriptors, as well as the output of subsequent analyses, are stored in :class:`.trajectory.Insight` or :class:`.trajectory.ClusterInsight` objects. We recommend the users, when possible, to write code using this module. +As a minimal example, a typical code for the computation of the SOAP descriptor +may look like this: + +.. code-block:: + + from pathlib import Path + from dynsight import Trj + + traj_file = Path("path/to/the/traj.xtc") + topo_file = Path("path/to/the/traj.gro") + + trj = Trj.init_from_xtc( + traj_file=traj_file, + topo_file=topo_file, + ) # This is a Trj object + + soap = trj.get_soap( + r_cut=10.0, + n_max=4, + l_max=4 + ) # This is an Insight object + + soap_values = soap.dataset # The np.ndarray with the computed values + Complete examples can be found in the recipes section of this documentation. Classes diff --git a/examples/analysis_workflow.py b/examples/analysis_workflow.py index 2a869aa5..25725d6a 100644 --- a/examples/analysis_workflow.py +++ b/examples/analysis_workflow.py @@ -1,12 +1,12 @@ -"""Creating an example for the new trajectory module.""" +"""An example for the trajectory module.""" from pathlib import Path -from dynsight.trajectory import Insight, OnionInsight, Trj +from dynsight.trajectory import Insight, OnionSmoothInsight, Trj def main() -> None: - """Creating an example for the new trajectory module.""" + """An example for the trajectory module.""" files_path = Path("analysis_workflow") # Create a Trj object @@ -19,27 +19,25 @@ def main() -> None: # From here, we work with an Insight, containing data computed from a Trj lens_file = files_path / "lens.json" if lens_file.exists(): - lens = Insight.load_from_json(lens_file) + lens = Insight.load_from_json(lens_file, mmap_mode="r") else: _, lens = trj.get_lens(r_cut=7.5) lens.dump_to_json(lens_file) # We can do spatial average on the computed LENS trj_lens = trj.with_slice(slice(0, -1, 1)) - lens_smooth = lens.spatial_average( - trj=trj_lens, r_cut=7.5, num_processes=6 - ) + lens_aver = lens.spatial_average(trj=trj_lens, r_cut=7.5, num_processes=6) # And we can perform onion-clustering - lens_onion = lens_smooth.get_onion_smooth(delta_t=10) + lens_onion = lens_aver.get_onion_smooth(delta_t=10) lens_onion.plot_output( - file_path=files_path / "tmp_fig1.png", - data_insight=lens_smooth, + file_path=files_path / "fig1.png", + data_insight=lens_aver, ) lens_onion.plot_one_trj( - file_path=files_path / "tmp_fig2.png", - data_insight=lens_smooth, + file_path=files_path / "fig2.png", + data_insight=lens_aver, particle_id=1234, ) lens_onion.dump_colored_trj( @@ -49,7 +47,9 @@ def main() -> None: # Save/load the Insight with all the results lens_onion.dump_to_json(files_path / "onion.json") - _ = OnionInsight.load_from_json(files_path / "onion.json") + _ = OnionSmoothInsight.load_from_json( + files_path / "onion.json", mmap_mode="r" + ) if __name__ == "__main__": diff --git a/examples/analysis_workflow/tmp_fig1.png b/examples/analysis_workflow/fig1.png similarity index 100% rename from examples/analysis_workflow/tmp_fig1.png rename to examples/analysis_workflow/fig1.png diff --git a/examples/analysis_workflow/tmp_fig2.png b/examples/analysis_workflow/fig2.png similarity index 100% rename from examples/analysis_workflow/tmp_fig2.png rename to examples/analysis_workflow/fig2.png diff --git a/examples/analysis_workflow/lens.npy b/examples/analysis_workflow/lens.npy new file mode 100644 index 00000000..2b4643af Binary files /dev/null and b/examples/analysis_workflow/lens.npy differ diff --git a/examples/analysis_workflow/onion_labels.npy b/examples/analysis_workflow/onion_labels.npy new file mode 100644 index 00000000..be616cc9 Binary files /dev/null and b/examples/analysis_workflow/onion_labels.npy differ diff --git a/src/dynsight/__init__.py b/src/dynsight/__init__.py index 0d970042..5d0f9ee0 100644 --- a/src/dynsight/__init__.py +++ b/src/dynsight/__init__.py @@ -3,12 +3,12 @@ from dynsight import ( analysis, data_processing, + descriptors, hdf5er, lens, logs, onion, soap, - tica, trajectory, utilities, vision, @@ -17,12 +17,12 @@ __all__ = [ "analysis", "data_processing", + "descriptors", "hdf5er", "lens", "logs", "onion", "soap", - "tica", "trajectory", "utilities", "vision", diff --git a/src/dynsight/_internal/analysis/entropy.py b/src/dynsight/_internal/analysis/entropy.py index 3466b100..878a63d9 100644 --- a/src/dynsight/_internal/analysis/entropy.py +++ b/src/dynsight/_internal/analysis/entropy.py @@ -8,10 +8,11 @@ import numpy as np import numpy.typing as npt from scipy.spatial.distance import cdist +from scipy.special import digamma def compute_shannon( - data: npt.NDArray[np.float64], + data: NDArray[np.float64], data_range: tuple[float, float], n_bins: int, ) -> float: @@ -31,9 +32,7 @@ def compute_shannon( The number of bins with which the data histogram must be computed. Returns: - float: - entropy: - The value of the normalized Shannon entropy of the dataset. + The value of the normalized Shannon entropy of the dataset. Example: @@ -56,6 +55,7 @@ def compute_shannon( :hide: assert np.isclose(data_entropy, 0.9993954419344714) + """ if data.size == 0: msg = "data is empty" @@ -71,8 +71,100 @@ def compute_shannon( return entropy +def compute_kl_entropy(data: NDArray[np.float64], n_neigh: int = 1) -> float: + """Estimate Shannon differential entropy using Kozachenko-Leonenko. + + The Kozachenko-Leonenko k-nearest neighbors method approximates + differential entropy based on distances to nearest neighbors + in the sample space. It's main advantage is being parameter-free. + + Parameters: + data: + The dataset for which the entropy is to be computed. + Shape (n_data,) + + n_neigh: + The number of neighbors considered in the KL estimator. + + Returns: + The Shannon differential entropy of the dataset, in bits. + + Example: + + .. testcode:: kl-entropy-test + + import numpy as np + from dynsight.analysis import compute_kl_entropy + + np.random.seed(1234) + data = np.random.rand(10000) + + data_entropy = compute_kl_entropy(data) + + .. testcode:: kl-entropy-test + :hide: + + assert np.isclose(data_entropy, -3.3437736767342194) + + """ + data = np.sort(data.flatten()) + n_data = len(data) + eps = data[n_neigh:] - data[:-n_neigh] # n_neigh-th neighbor distances + eps = np.clip(eps, 1e-10, None) # avoid log(0) + const = digamma(n_data) - digamma(n_neigh) + 1 + return const + np.mean(np.log2(eps)) + + +def compute_negentropy(data: NDArray[np.float64]) -> float: + """Estimate negentropy of a dataset. + + Negentropy is a measure of non-Gaussianity representing the distance + from a Gaussian distribution; it's used to quantify the amount of + information in a signal, the Gaussian being the less informative + distribution for a given variance. + + .. math:: + + Neg(X) = H(X_{Gauss}) - H(X) + + Parameters: + data: + The dataset for which the entropy is to be computed. + + Returns: + The negentropy of the dataset, in bits. + + + Example: + + .. testcode:: negentropy-test + + import numpy as np + from dynsight.analysis import compute_negentropy + + np.random.seed(1234) + data = np.random.rand(10000) + + negentropy = compute_negentropy(data) + + .. testcode:: negentropy-test + :hide: + + assert np.isclose(negentropy, 0.2609932580146541) + + """ + data = data.flatten() + rng = np.random.default_rng(seed=1234) + data_norm = (data - np.mean(data)) / np.std(data, ddof=1) + sigma = np.std(data_norm, ddof=1) + data_gauss = rng.normal(loc=0.0, scale=sigma, size=data.size) + h_gauss = compute_kl_entropy(data_gauss) + h_data = compute_kl_entropy(data_norm) + return h_gauss - h_data + + def compute_shannon_multi( - data: npt.NDArray[np.float64], + data: NDArray[np.float64], data_ranges: list[tuple[float, float]], n_bins: list[int], ) -> float: @@ -95,9 +187,7 @@ def compute_shannon_multi( dimension. Returns: - float: - entropy: - The value of the normalized Shannon entropy of the dataset. + The value of the normalized Shannon entropy of the dataset. Example: @@ -121,6 +211,7 @@ def compute_shannon_multi( :hide: assert np.isclose(data_entropy, 0.8837924363474094) + """ if data.size == 0: msg = "data is empty" @@ -157,11 +248,10 @@ def compute_entropy_gain( Default is 20. Returns: - tuple[float, float, float, float] - * The absolute information gain :math:`H_0 - H_{clust}` - * The relative information gain :math:`(H_0 - H_{clust}) / H_0` - * The Shannon entropy of the initial data :math:`H_0` - * The shannon entropy of the clustered data :math:`H_{clust}` + * The absolute information gain :math:`H_0 - H_{clust}` + * The relative information gain :math:`(H_0 - H_{clust}) / H_0` + * The Shannon entropy of the initial data :math:`H_0` + * The shannon entropy of the clustered data :math:`H_{clust}` Example: @@ -184,6 +274,7 @@ def compute_entropy_gain( :hide: assert np.isclose(entropy_gain, 0.0010065005804883983) + """ if data.shape[0] != labels.shape[0]: msg = ( @@ -246,11 +337,10 @@ def compute_entropy_gain_multi( one for each dimension. Returns: - tuple[float, float, float, float] - * The absolute information gain :math:`H_0 - H_{clust}` - * The relative information gain :math:`(H_0 - H_{clust}) / H_0` - * The Shannon entropy of the initial data :math:`H_0` - * The shannon entropy of the clustered data :math:`H_{clust}` + * The absolute information gain :math:`H_0 - H_{clust}` + * The relative information gain :math:`(H_0 - H_{clust}) / H_0` + * The Shannon entropy of the initial data :math:`H_0` + * The shannon entropy of the clustered data :math:`H_{clust}` Example: @@ -274,6 +364,7 @@ def compute_entropy_gain_multi( :hide: assert np.isclose(entropy_gain, 0.13171418273750357) + """ if data.shape[0] != labels.shape[0]: msg = ( @@ -338,8 +429,7 @@ def sample_entropy( The m parameter (length of the considered overlapping windows). Returns: - float - Sample entropy of the input time-series. + Sample entropy of the input time-series. Example: @@ -362,6 +452,7 @@ def sample_entropy( :hide: assert np.isclose(sampen, 1.6094379124341003) + """ n_sum = len(time_series) - m_par diff --git a/src/dynsight/_internal/tica/__init__.py b/src/dynsight/_internal/descriptors/__init__.py similarity index 100% rename from src/dynsight/_internal/tica/__init__.py rename to src/dynsight/_internal/descriptors/__init__.py diff --git a/src/dynsight/_internal/descriptors/misc.py b/src/dynsight/_internal/descriptors/misc.py new file mode 100644 index 00000000..b9af561c --- /dev/null +++ b/src/dynsight/_internal/descriptors/misc.py @@ -0,0 +1,198 @@ +"""Miscellaneous descriptors.""" + +from __future__ import annotations + +from typing import TYPE_CHECKING + +if TYPE_CHECKING: + from MDAnalysis import AtomGroup, Universe + from numpy.typing import NDArray + +import numpy as np +from scipy.spatial.distance import cosine + + +def orientational_order_param( + universe: Universe, + neigh_list_per_frame: list[list[AtomGroup]], + order: int = 6, +) -> NDArray[np.float64]: + r"""Compute the magnitude of the orientational order parameter. + + .. math:: + + | \psi^{(n)}_i | = \frac{1}{N_i} \sum_{j=1}^{N_{i}} e^{i n \theta_{ij}} + + where n is the symmetry order. + + .. warning:: + + Particles are considered as laying in the (x, y) plane. z-coordinates + are ignored. + + Parameters: + universe: contains the coordinates at each frame. + + neigh_list_per_frame: A frame-by-frame list of the neighbors of each + atom, output of :func:`listNeighboursAlongTrajectory`. + + order: the order of the symmetry measured by the descriptor. Default + is 6, corresponding to the hexatic order parameter. + + Returns: + An array of shape (n_atoms, n_frames), with the values of psi. + + Example: + + .. testsetup:: psi-test + + import pathlib + + path = pathlib.Path('source/_static/ex_test_files') + + .. testcode:: psi-test + + import numpy as np + from dynsight.trajectory import Trj + from dynsight.descriptors import orientational_order_param + + trj = Trj.init_from_xyz(path / "trajectory.xyz", dt=1.0) + neig_counts, _ = trj.get_coord_number(r_cut=3.0) + + psi = orientational_order_param( + universe=trj.universe, + neigh_list_per_frame=neig_counts, + ) + + .. testcode:: psi-test + :hide: + + assert np.isclose(psi[0][0], 0.09556616097688675) + + """ + n_atoms = universe.atoms.n_atoms + n_frames = len(universe.trajectory) + + psi = np.zeros((n_atoms, n_frames)) + + for t, _ in enumerate(universe.trajectory): + frame = universe.atoms.positions[:, :2].copy() + + for i, atom_i in enumerate(frame): + tmp = 0.0 + neighbors = neigh_list_per_frame[t][i] + for j in neighbors: + if j != i: + x, y = frame[j] - atom_i + theta = np.mod(np.arctan2(y, x), 2 * np.pi) + tmp += np.exp(1j * order * theta) + + if len(neighbors) > 1: + tmp /= len(neighbors) - 1 + + psi[i][t] = np.abs(tmp) + + return psi + + +def _compute_aver_align( + neigh_list_t: list[AtomGroup], + frame_vel: NDArray[np.float64], +) -> NDArray[np.float64]: + """Computes the average alignment for all the atoms in a frame.""" + phi_t = np.zeros(len(frame_vel)) + for i, atom_i in enumerate(frame_vel): + if not np.any(atom_i): # skip if zero velocity vector + continue + neighbors = neigh_list_t[i] + if len(neighbors) <= 1: + continue # no meaningful averaging if 0 neighbors + valid_neighbors = [ + j for j in neighbors if j != i and np.any(frame_vel[j]) + ] + if not valid_neighbors: + continue # no self-counting, no neighbors with v = 0.0 + + alignments = np.array( + [1 - cosine(atom_i, frame_vel[j]) for j in valid_neighbors] + ) + phi_t[i] = np.mean(alignments) + return phi_t + + +def velocity_alignment( + universe: Universe, + neigh_list_per_frame: list[list[AtomGroup]], +) -> NDArray[np.float64]: + """Compute average velocity alignment phi. + + If the Universe includes velocities, those are used. Otherwise, the + displacements are used. + + Parameters: + universe: contains the coordinates at each frame. + + neigh_list_per_frame: A frame-by-frame list of the neighbors of each + atom, output of :func:`listNeighboursAlongTrajectory`. + + Returns: + If the Universe inclused velocities, the output has shape + (n_atoms, n_frames), otherwise it has shape (n_atoms, n_frames - 1). + + Example: + + .. testsetup:: phi-test + + import pathlib + + path = pathlib.Path('source/_static/ex_test_files') + + .. testcode:: phi-test + + import numpy as np + from dynsight.trajectory import Trj + from dynsight.descriptors import velocity_alignment + + trj = Trj.init_from_xyz(path / "trajectory.xyz", dt=1.0) + neig_counts, _ = trj.get_coord_number(r_cut=3.0) + + phi = velocity_alignment( + universe=trj.universe, + neigh_list_per_frame=neig_counts, + ) + + .. testcode:: phi-test + :hide: + + assert np.isclose(phi[0][0], 0.38268664479255676) + + """ + n_atoms = universe.atoms.n_atoms + n_frames = len(universe.trajectory) + + if ( + hasattr(universe.atoms, "velocities") + and universe.atoms.velocities is not None + ): # If the Universe has velocities, use them + phi = np.zeros((n_frames, n_atoms)) + for t, _ in enumerate(universe.trajectory): + phi[t] = _compute_aver_align( + neigh_list_per_frame[t], + frame_vel=universe.atoms.velocities, + ) + return phi.T + + # If the Universe does not has velocities, use the displacements + r_0 = None + phi = np.zeros((n_frames - 1, n_atoms)) + for t, _ in enumerate(universe.trajectory): + r_1 = universe.atoms.positions.copy() + if t == 0: + r_0 = r_1 + continue + frame_vel = r_1 - r_0 + phi[t - 1] = _compute_aver_align( + neigh_list_per_frame[t - 1], + frame_vel, + ) + return phi.T diff --git a/src/dynsight/_internal/tica/tica.py b/src/dynsight/_internal/descriptors/tica.py similarity index 94% rename from src/dynsight/_internal/tica/tica.py rename to src/dynsight/_internal/descriptors/tica.py index b53fdce0..284936d5 100644 --- a/src/dynsight/_internal/tica/tica.py +++ b/src/dynsight/_internal/descriptors/tica.py @@ -1,4 +1,4 @@ -"""tICA package.""" +"""tICA functions.""" from __future__ import annotations @@ -54,12 +54,12 @@ def many_body_tica( .. code-block:: python import numpy as np - import dynsight + from dynsight.descriptors import many_body_tica np.random.seed(42) random_array = np.random.rand(100, 100, 10) - relax_times, coeffs, tica_data = dynsight.tica.many_body_tica( + relax_times, coeffs, tica_data = many_body_tica( random_array, lag_time=10, tica_dim=3) """ *_, n_dim = data.shape diff --git a/src/dynsight/_internal/lens/lens.py b/src/dynsight/_internal/lens/lens.py index 31261573..7aba2acf 100644 --- a/src/dynsight/_internal/lens/lens.py +++ b/src/dynsight/_internal/lens/lens.py @@ -86,6 +86,7 @@ def list_neighbours_along_trajectory( def neighbour_change_in_time( neigh_list_per_frame: list[list[AtomGroup]], + delay: int = 1, ) -> tuple[ NDArray[np.float64], NDArray[np.int64], @@ -95,24 +96,25 @@ def neighbour_change_in_time( """Return, listed per atom, the LENS values at each frame. * Original author: Martina Crippa - * Mantainer: Matteo Becchi Parameters: neigh_list_per_frame: A frame-by-frame list of the neighbors of each atom, output - of :func:`listNeighboursAlongTrajectory`. + of :func:`listNeighboursAlongTrajecøtory`. + + delay: + The delay between frames on which LENS is computed. Default is 1. Returns: tuple: - A tuple of the following elements: - - lensArray: The calculated LENS parameter. - It's a numpy.array of shape (n_particles, n_frames - 1) - - numberOfNeighs: The count of neighbors per frame. - It's a numpy.array of shape (n_particles, n_frames) - - lensNumerators: The numerators used for calculating LENS. - It's a numpy.array of shape (n_particles, n_frames - 1) - - lensDenominators: The denominators used for calculating LENS. - It's a numpy.array of shape (n_particles, n_frames - 1) + - lens_array: The calculated LENS parameter. + It's a numpy.array of shape (n_particles, n_frames - 1) + - number_of_neighs: The count of neighbors per frame. + It's a numpy.array of shape (n_particles, n_frames) + - lens_numerators: The numerators used for calculating LENS. + It's a numpy.array of shape (n_particles, n_frames - 1) + - lens_denominators: The denominators used for calculating LENS. + It's a numpy.array of shape (n_particles, n_frames - 1) Example: @@ -146,40 +148,39 @@ def neighbour_change_in_time( All supported input file formats by MDAnalysis can be used to set up the Universe. """ - nat = np.asarray(neigh_list_per_frame, dtype=object).shape[1] - nframes = np.asarray(neigh_list_per_frame, dtype=object).shape[0] - # this is the number of common NN between frames - lensarray = np.zeros((nat, nframes)) - # this is the number of NN at that frame - numberofneighs = np.zeros((nat, nframes), dtype=int) - # this is the numerator of LENS - lensnumerators = np.zeros((nat, nframes)) - # this is the denominator of lens - lensdenominators = np.zeros((nat, nframes)) + n_atoms = np.asarray(neigh_list_per_frame, dtype=object).shape[1] + n_frames = np.asarray(neigh_list_per_frame, dtype=object).shape[0] + + lens_array = np.zeros((n_atoms, n_frames)) # The LENS values + number_of_neighs = np.zeros((n_atoms, n_frames), dtype=int) # The NN + lens_numerators = np.zeros((n_atoms, n_frames)) # LENS numerator + lens_denominators = np.zeros((n_atoms, n_frames)) # LENS denominator + # each nnlist contains also the atom that generates them, # so 0 nn is a 1 element list - for atom_id in range(nat): - numberofneighs[atom_id, 0] = ( + for atom_id in range(n_atoms): + number_of_neighs[atom_id, 0] = ( neigh_list_per_frame[0][atom_id].shape[0] - 1 ) # let's calculate the numerators and the denominators - for frame in range(1, nframes): - numberofneighs[atom_id, frame] = ( + for frame in range(delay, n_frames - delay + 1): + number_of_neighs[atom_id, frame] = ( neigh_list_per_frame[frame][atom_id].shape[0] - 1 ) - lensdenominators[atom_id, frame] = ( + lens_denominators[atom_id, frame] = ( neigh_list_per_frame[frame][atom_id].shape[0] - + neigh_list_per_frame[frame - 1][atom_id].shape[0] + + neigh_list_per_frame[frame - delay][atom_id].shape[0] - 2 ) - lensnumerators[atom_id, frame] = np.setxor1d( + lens_numerators[atom_id, frame] = np.setxor1d( neigh_list_per_frame[frame][atom_id], - neigh_list_per_frame[frame - 1][atom_id], + neigh_list_per_frame[frame - delay][atom_id], ).shape[0] - den_not_0 = lensdenominators != 0 - # lens - lensarray[den_not_0] = ( - lensnumerators[den_not_0] / lensdenominators[den_not_0] + den_not_0 = lens_denominators != 0 + + lens_array[den_not_0] = ( + lens_numerators[den_not_0] / lens_denominators[den_not_0] ) - return lensarray, numberofneighs, lensnumerators, lensdenominators + + return lens_array, number_of_neighs, lens_numerators, lens_denominators diff --git a/src/dynsight/_internal/trajectory/cluster_insight.py b/src/dynsight/_internal/trajectory/cluster_insight.py index 70d641cb..94594af8 100644 --- a/src/dynsight/_internal/trajectory/cluster_insight.py +++ b/src/dynsight/_internal/trajectory/cluster_insight.py @@ -1,8 +1,8 @@ from __future__ import annotations import json -from dataclasses import asdict, dataclass, field, fields -from typing import TYPE_CHECKING, Any +from dataclasses import dataclass, field, fields +from typing import TYPE_CHECKING, Any, Literal import numpy as np @@ -32,29 +32,57 @@ class ClusterInsight: labels: NDArray[np.int64] def dump_to_json(self, file_path: Path) -> None: - """Save the ClusterInsight object as .json file.""" - data = asdict(self) - data["labels"] = data["labels"].tolist() + """Save the ClusterInsight to a JSON file and .npy file.""" + npy_path = file_path.with_suffix(".npy") + np.save(npy_path, self.labels) + + json_data = { + "labels_file": npy_path.name, + } + with file_path.open("w") as file: - json.dump(data, file, indent=4) - logger.log(f"ClusterInsight saved to {file_path}.") + json.dump(json_data, file, indent=4) + + logger.log( + f"ClusterInsight saved to {file_path} and labels to {npy_path}." + ) @classmethod - def load_from_json(cls, file_path: Path) -> ClusterInsight: - """Load the ClusterInsight object from .json file. + def load_from_json( + cls, + file_path: Path, + mmap_mode: Literal["r", "r+", "w+", "c"] | None = None, + ) -> ClusterInsight: + """Load the ClusterInsight object from JSON and associated .npy file. + + Parameters: + file_path: + Path to the .json file. + mmap_mode: + If given, used as np.load(..., mmap_mode=mmap_mode) for memory + mapping. Raises: - ValueError if the input file does not have a key "labels". + ValueError: if required keys are missing. """ with file_path.open("r") as file: data = json.load(file) - if "labels" not in data: - msg = "'labels' key not found in JSON file." + + labels_file = data.get("labels_file") + if not labels_file: + msg = "'labels_file' key not found in JSON file." logger.log(msg) raise ValueError(msg) - logger.log(f"ClusterInsight loaded from {file_path}.") - return cls(labels=np.array(data.get("labels"), dtype=np.int64)) + labels_path = file_path.with_name(labels_file) + labels = np.load(labels_path, mmap_mode=mmap_mode) + + logger.log( + f"ClusterInsight loaded from {file_path}, " + f"labels from {labels_path}." + ) + + return cls(labels=labels) @dataclass(frozen=True) @@ -73,49 +101,85 @@ class OnionInsight(ClusterInsight): meta: dict[str, Any] = field(default_factory=dict) def dump_to_json(self, file_path: Path) -> None: - """Save the OnionInsight object as .json file.""" + """Save the OnionInsight to a JSON file and .npy file.""" + # File paths + base = file_path.with_suffix("") + labels_path = base.with_name(base.name + "_labels.npy") + reshaped_path = base.with_name(base.name + "_reshaped.npy") + + # Save large arrays + np.save(labels_path, self.labels) + np.save(reshaped_path, self.reshaped_data) + + # Serialize state_list + serialized_states = [] + for state in self.state_list: + state_dict = {} + for f in fields(state): + val = getattr(state, f.name) + state_dict[f.name] = ( + val.tolist() if isinstance(val, np.ndarray) else val + ) + serialized_states.append(state_dict) + + # Compose JSON data = { - "labels": self.labels.tolist(), - "reshaped_data": self.reshaped_data.tolist(), + "labels_file": labels_path.name, + "reshaped_data_file": reshaped_path.name, + "state_list": serialized_states, "meta": self.meta, } - new_state_list = [] - for state in self.state_list: - tmp = {} - for f in fields(state): - value = getattr(state, f.name) - if isinstance(value, np.ndarray): - tmp[f.name] = value.tolist() - else: - tmp[f.name] = value - new_state_list.append(tmp) - - data["state_list"] = new_state_list with file_path.open("w") as file: json.dump(data, file, indent=4) - logger.log(f"OnionInsight saved to {file_path}.") + + logger.log( + f"OnionInsight saved to {file_path}, labels to {labels_path}, " + f"reshaped data to {reshaped_path}." + ) @classmethod - def load_from_json(cls, file_path: Path) -> OnionInsight: - """Load the OnionInsight object from .json file. + def load_from_json( + cls, + file_path: Path, + mmap_mode: Literal["r", "r+", "w+", "c"] | None = None, + ) -> OnionInsight: + """Load the OnionInsight object from JSON and .npy files. + + Parameters: + file_path: + Path to the .json file. + mmap_mode: + If given, used as np.load(..., mmap_mode=mmap_mode) for memory + mapping. Raises: - ValueError if the input file does not have a key "state_list". + ValueError: if required keys are missing. """ with file_path.open("r") as file: data = json.load(file) - if "state_list" not in data: - msg = "'state_list' key not found in JSON file." - logger.log(msg) - raise ValueError(msg) + + # Validate presence of keys + required_keys = ["labels_file", "reshaped_data_file", "state_list"] + for key in required_keys: + if key not in data: + msg = f"'{key}' key not found in JSON file." + logger.log(msg) + raise ValueError(msg) + + base_dir = file_path.parent + labels = np.load(base_dir / data["labels_file"], mmap_mode=mmap_mode) + reshaped = np.load( + base_dir / data["reshaped_data_file"], mmap_mode=mmap_mode + ) logger.log(f"OnionInsight loaded from {file_path}.") + return cls( - labels=np.array(data.get("labels")), - state_list=data.get("state_list"), - reshaped_data=np.array(data.get("reshaped_data")), - meta=data.get("meta"), + labels=labels, + reshaped_data=reshaped, + state_list=data["state_list"], + meta=data.get("meta", {}), ) def plot_output(self, file_path: Path, data_insight: Insight) -> None: @@ -230,47 +294,77 @@ class OnionSmoothInsight(ClusterInsight): meta: dict[str, Any] = field(default_factory=dict) def dump_to_json(self, file_path: Path) -> None: - """Save the OnionSmoothInsight object as .json file.""" + """Save the OnionSmoothInsight object to JSON and .npy for labels.""" + base = file_path.with_suffix("") + labels_path = base.with_name(base.name + "_labels.npy") + + # Save labels to .npy + np.save(labels_path, self.labels) + + # Serialize state_list + serialized_states = [] + for state in self.state_list: + state_dict = {} + for f in fields(state): + val = getattr(state, f.name) + state_dict[f.name] = ( + val.tolist() if isinstance(val, np.ndarray) else val + ) + serialized_states.append(state_dict) + + # Compose JSON data = { - "labels": self.labels.tolist(), + "labels_file": labels_path.name, + "state_list": serialized_states, "meta": self.meta, } - new_state_list = [] - for state in self.state_list: - tmp = {} - for f in fields(state): - value = getattr(state, f.name) - if isinstance(value, np.ndarray): - tmp[f.name] = value.tolist() - else: - tmp[f.name] = value - new_state_list.append(tmp) - - data["state_list"] = new_state_list with file_path.open("w") as file: json.dump(data, file, indent=4) - logger.log(f"OnionSmoothInsight saved to {file_path}.") + + logger.log( + f"OnionSmoothInsight saved to {file_path}, " + f"labels to {labels_path}." + ) @classmethod - def load_from_json(cls, file_path: Path) -> OnionSmoothInsight: - """Load the OnionSmoothInsight object from .json file. + def load_from_json( + cls, + file_path: Path, + mmap_mode: Literal["r", "r+", "w+", "c"] | None = None, + ) -> OnionSmoothInsight: + """Load the OnionSmoothInsight from JSON and associated .npy file. + + Parameters: + file_path: + Path to the .json file. + mmap_mode: + If given, used as np.load(..., mmap_mode=mmap_mode) for memory + mapping. Raises: - ValueError if the input file does not have a key "state_list". + ValueError: if required keys are missing. """ with file_path.open("r") as file: data = json.load(file) - if "state_list" not in data: - msg = "'state_list' key not found in JSON file." + + if "labels_file" not in data or "state_list" not in data: + msg = "'labels_file' or 'state_list' key not found in JSON file." logger.log(msg) raise ValueError(msg) - logger.log(f"OnionSmoothInsight loaded from {file_path}.") + labels_path = file_path.parent / data["labels_file"] + labels = np.load(labels_path, mmap_mode=mmap_mode) + + logger.log( + f"OnionSmoothInsight loaded from {file_path}, " + f"labels from {labels_path}." + ) + return cls( - labels=np.array(data.get("labels")), - state_list=data.get("state_list"), - meta=data.get("meta"), + labels=labels, + state_list=data["state_list"], + meta=data.get("meta", {}), ) def plot_output(self, file_path: Path, data_insight: Insight) -> None: diff --git a/src/dynsight/_internal/trajectory/insight.py b/src/dynsight/_internal/trajectory/insight.py index 181b30d9..577a7cce 100644 --- a/src/dynsight/_internal/trajectory/insight.py +++ b/src/dynsight/_internal/trajectory/insight.py @@ -1,8 +1,8 @@ from __future__ import annotations import json -from dataclasses import asdict, dataclass, field -from typing import TYPE_CHECKING, Any +from dataclasses import dataclass, field +from typing import TYPE_CHECKING, Any, Literal import numpy as np @@ -33,32 +33,58 @@ class Insight: meta: dict[str, Any] = field(default_factory=dict) def dump_to_json(self, file_path: Path) -> None: - """Save the Insight object as .json file.""" - data = asdict(self) - data["dataset"] = data["dataset"].tolist() + """Save the Insight to a JSON file and .npy file.""" + # Save dataset as .npy + npy_path = file_path.with_suffix(".npy") + np.save(npy_path, self.dataset.astype(np.float64)) + + # Prepare JSON data + json_data = { + "dataset_file": npy_path.name, + "meta": self.meta, + } + with file_path.open("w") as file: - json.dump(data, file, indent=4) - logger.log(f"Insight saved to {file_path}.") + json.dump(json_data, file, indent=4) + logger.log(f"Insight saved to {file_path} and dataset to {npy_path}.") @classmethod - def load_from_json(cls, file_path: Path) -> Insight: + def load_from_json( + cls, + file_path: Path, + mmap_mode: Literal["r", "r+", "w+", "c"] | None = None, + ) -> Insight: """Load the Insight object from .json file. + Parameters: + file_path: + Path to the .json file. + mmap_mode: + If given, used as np.load(..., mmap_mode=mmap_mode) for memory + mapping. + Raises: - ValueError if the input file does not have a key "dataset". + ValueError: if required keys are missing. """ with file_path.open("r") as file: data = json.load(file) - if "dataset" not in data: - msg = "'dataset' key not found in JSON file." + dataset_file = data.get("dataset_file") + if not dataset_file: + msg = "'dataset_file' key not found in JSON file." logger.log(msg) raise ValueError(msg) - logger.log(f"Insight loaded from {file_path}.") + dataset_path = file_path.with_name(dataset_file) + dataset = np.load(dataset_path, mmap_mode=mmap_mode) + + logger.log( + f"Insight loaded from {file_path}, dataset from {dataset_path}." + ) + return cls( - dataset=np.array(data.get("dataset"), dtype=np.float64), - meta=data.get("meta"), + dataset=dataset, + meta=data.get("meta", {}), ) def spatial_average( @@ -120,6 +146,36 @@ def get_angular_velocity(self, delay: int = 1) -> Insight: logger.log(f"Computed angular velocity with args {attr_dict}.") return Insight(dataset=theta, meta=attr_dict) + def get_tica( + self, + lag_time: int, + tica_dim: int, + ) -> tuple[NDArray[np.float64], NDArray[np.float64], Insight]: + """Perform tICA on trajectories from a many-body system. + + The attributes "lag_time" and "tica_dim" are added to the meta. + + Raises: + ValueError if the dataset does not have the right dimensions. + """ + if self.dataset.ndim != UNIVAR_DIM + 1: + msg = "dataset.ndim != 3." + logger.log(msg) + raise ValueError(msg) + + relax_times, coeffs, tica = dynsight.descriptors.many_body_tica( + self.dataset, + lag_time=lag_time, + tica_dim=tica_dim, + ) + + attr_dict = self.meta.copy() + attr_dict.update({"lag_time": lag_time, "tica_dim": tica_dim}) + + logger.log(f"Computed many-body tICA with args {attr_dict}.") + + return relax_times, coeffs, Insight(dataset=tica, meta=attr_dict) + def get_onion( self, delta_t: int, @@ -222,32 +278,30 @@ def get_onion_analysis( for details). Parameters: - delta_t_min: Smaller value for delta_t_list. - - delta_t_max: Larger value for delta_t_list, - - delta_t_num: Number of values in delta_t_list, - - fig1_path: If is not None, the time resolution analysis plot is - saved in this location. - - fig2_path: If is not None, the populations fractions plot is - saved in this location. - - bins: The 'bins' parameter for onion clustering. - - number_of_sigmas: The 'number_of_sigmas' parameter for onion - clustering. - - max_area_overlap: The 'max_area_overlap' parameter for onion - clustering. + delta_t_min: + Smaller value for delta_t_list. + delta_t_max: + Larger value for delta_t_list. + delta_t_num: + Number of values in delta_t_list. + fig1_path: + If is not None, the time resolution analysis plot is saved in + this location. + fig2_path: + If is not None, the populations fractions plot is saved in + this location. + bins: + The 'bins' parameter for onion clustering. + number_of_sigmas: + The 'number_of_sigmas' parameter for onion clustering. + max_area_overlap: + The 'max_area_overlap' parameter for onion clustering. Returns: - delta_t_list: The list of delta_t used. - - n_clust: The number of clusters at each delta_t. - - unclass_frac: The fraction of unclassified data at each delta_t. + tuple: + * delta_t_list: The list of ∆t used. + * n_clust: The number of clusters at each ∆t. + * unclass_frac: The fraction of unclassified data at each ∆t. """ if delta_t_max is None: delta_t_max = self.dataset.shape[1] diff --git a/src/dynsight/_internal/trajectory/trajectory.py b/src/dynsight/_internal/trajectory/trajectory.py index bc80e888..ec6df090 100644 --- a/src/dynsight/_internal/trajectory/trajectory.py +++ b/src/dynsight/_internal/trajectory/trajectory.py @@ -64,7 +64,8 @@ def init_from_xyz(cls, traj_file: Path, dt: float) -> Trj: See https://docs.mdanalysis.org/2.9.0/documentation_pages/core/universe.html#MDAnalysis.core.universe.Universe. Parameters: - dt: the trajectory's time-step. + dt: + the trajectory's time-step. """ logger.log(f"Created Trj from {traj_file} with dt = {dt}.") universe = MDAnalysis.Universe(traj_file, dt=dt) @@ -130,16 +131,18 @@ def get_slice(self, start: int, stop: int, step: int) -> Trj: def get_coord_number( self, r_cut: float, + delay: int = 1, selection: str = "all", neigcounts: list[list[AtomGroup]] | None = None, ) -> tuple[list[list[AtomGroup]], Insight]: """Compute coordination number on the trajectory. Returns: - neighcounts: a list[list[AtomGroup]], it can be used to speed up - subsequent descriptors' computations. - An Insight containing the number of neighbors. It has the following - meta: r_cut, selection. + tuple: + * neighcounts: a list[list[AtomGroup]], it can be used to + speed up subsequent descriptors' computations. + * An Insight containing the number of neighbors. It has the + following meta: r_cut, selection. """ if neigcounts is None: neigcounts = dynsight.lens.list_neighbours_along_trajectory( @@ -148,9 +151,12 @@ def get_coord_number( selection=selection, trajslice=self.trajslice, ) - _, nn, *_ = dynsight.lens.neighbour_change_in_time(neigcounts) + _, nn, *_ = dynsight.lens.neighbour_change_in_time( + neigh_list_per_frame=neigcounts, + delay=delay, + ) - attr_dict = {"r_cut": r_cut, "selection": selection} + attr_dict = {"r_cut": r_cut, "delay": delay, "selection": selection} logger.log(f"Computed coord_number using args {attr_dict}.") return neigcounts, Insight( dataset=nn.astype(np.float64), @@ -160,16 +166,18 @@ def get_coord_number( def get_lens( self, r_cut: float, + delay: int = 1, selection: str = "all", neigcounts: list[list[AtomGroup]] | None = None, ) -> tuple[list[list[AtomGroup]], Insight]: """Compute LENS on the trajectory. Returns: - neighcounts: a list[list[AtomGroup]], it can be used to speed up - subsequent descriptors' computations. - An Insight containing LENS. It has the following meta: r_cut, - selection. + tuple: + * neighcounts: a list[list[AtomGroup]], it can be used to + speed up subsequent descriptors' computations. + * An Insight containing LENS. It has the following meta: + r_cut, selection. """ if neigcounts is None: neigcounts = dynsight.lens.list_neighbours_along_trajectory( @@ -178,9 +186,12 @@ def get_lens( selection=selection, trajslice=self.trajslice, ) - lens, *_ = dynsight.lens.neighbour_change_in_time(neigcounts) + lens, *_ = dynsight.lens.neighbour_change_in_time( + neigh_list_per_frame=neigcounts, + delay=delay, + ) - attr_dict = {"r_cut": r_cut, "selection": selection} + attr_dict = {"r_cut": r_cut, "delay": delay, "selection": selection} logger.log(f"Computed LENS using args {attr_dict}.") return neigcounts, Insight( @@ -225,6 +236,84 @@ def get_soap( logger.log(f"Computed SOAP with args {attr_dict}.") return Insight(dataset=soap, meta=attr_dict) + def get_orientational_op( + self, + r_cut: float, + order: int = 6, + selection: str = "all", + neigcounts: list[list[AtomGroup]] | None = None, + ) -> tuple[list[list[AtomGroup]], Insight]: + """Compute the magnitude of the orientational order parameter. + + Returns: + tuple: + * neighcounts: a list[list[AtomGroup]], it can be used to + speed up subsequent descriptors' computations. + * An Insight containing the orientational order parameter. + It has the following meta: r_cut, order, selection. + """ + if neigcounts is None: + neigcounts = dynsight.lens.list_neighbours_along_trajectory( + input_universe=self.universe, + cutoff=r_cut, + selection=selection, + trajslice=self.trajslice, + ) + psi = dynsight.descriptors.orientational_order_param( + self.universe, + neigh_list_per_frame=neigcounts, + order=order, + ) + + attr_dict = {"r_cut": r_cut, "order": order, "selection": selection} + + logger.log( + f"Computed orientational order parameter using args {attr_dict}." + ) + + return neigcounts, Insight( + dataset=psi, + meta=attr_dict, + ) + + def get_velocity_alignment( + self, + r_cut: float, + selection: str = "all", + neigcounts: list[list[AtomGroup]] | None = None, + ) -> tuple[list[list[AtomGroup]], Insight]: + """Compute the average velocity alignment. + + Returns: + tuple: + * neighcounts: a list[list[AtomGroup]], it can be used to + speed up subsequent descriptors' computations. + * An Insight containing the average velocities alignment. + It has the following meta: r_cut, selection. + """ + if neigcounts is None: + neigcounts = dynsight.lens.list_neighbours_along_trajectory( + input_universe=self.universe, + cutoff=r_cut, + selection=selection, + trajslice=self.trajslice, + ) + phi = dynsight.descriptors.velocity_alignment( + self.universe, + neigh_list_per_frame=neigcounts, + ) + + attr_dict = {"r_cut": r_cut, "selection": selection} + + logger.log( + f"Computed average velocity alignment using args {attr_dict}." + ) + + return neigcounts, Insight( + dataset=phi, + meta=attr_dict, + ) + def get_rdf( self, distances_range: list[float], @@ -233,13 +322,15 @@ def get_rdf( exclusion_block: list[int] | None = None, nbins: int = 200, norm: Literal["rdf", "density", "none"] = "rdf", - ) -> Insight: + ) -> tuple[NDArray[np.float64], NDArray[np.float64]]: """Compute the radial distribution function g(r). See https://docs.mdanalysis.org/1.1.1/documentation_pages/analysis/rdf.html. - The returned Insight contains the following meta: distances_range, s1, - s2, exclusion_block, nbins, norm. + Returns: + tuple: + * A list of values of the interparticle distance r + * The corresponding list of values of g(r) """ trajslice = slice(None) if self.trajslice is None else self.trajslice bins, rdf = dynsight.analysis.compute_rdf( @@ -254,7 +345,6 @@ def get_rdf( stop=trajslice.stop, step=trajslice.step, ) - dataset = np.array([bins, rdf]) attr_dict = { "distances_range": distances_range, "s1": s1, @@ -264,4 +354,4 @@ def get_rdf( "norm": norm, } logger.log(f"Computed g(r) with args {attr_dict}.") - return Insight(dataset=dataset, meta=attr_dict) + return bins, rdf diff --git a/src/dynsight/analysis.py b/src/dynsight/analysis.py index 687bcadb..3b59bcfa 100644 --- a/src/dynsight/analysis.py +++ b/src/dynsight/analysis.py @@ -3,6 +3,8 @@ from dynsight._internal.analysis.entropy import ( compute_entropy_gain, compute_entropy_gain_multi, + compute_kl_entropy, + compute_negentropy, compute_shannon, compute_shannon_multi, sample_entropy, @@ -19,6 +21,8 @@ __all__ = [ "compute_entropy_gain", "compute_entropy_gain_multi", + "compute_kl_entropy", + "compute_negentropy", "compute_rdf", "compute_shannon", "compute_shannon_multi", diff --git a/src/dynsight/descriptors.py b/src/dynsight/descriptors.py new file mode 100644 index 00000000..71d02aec --- /dev/null +++ b/src/dynsight/descriptors.py @@ -0,0 +1,15 @@ +"""descriptors package.""" + +from dynsight._internal.descriptors.misc import ( + orientational_order_param, + velocity_alignment, +) +from dynsight._internal.descriptors.tica import ( + many_body_tica, +) + +__all__ = [ + "many_body_tica", + "orientational_order_param", + "velocity_alignment", +] diff --git a/src/dynsight/tica.py b/src/dynsight/tica.py deleted file mode 100644 index 3361106a..00000000 --- a/src/dynsight/tica.py +++ /dev/null @@ -1,9 +0,0 @@ -"""tICA package.""" - -from dynsight._internal.tica.tica import ( - many_body_tica, -) - -__all__ = [ - "many_body_tica", -] diff --git a/tests/analysis/rdf/test_rdf.py b/tests/analysis/rdf/test_rdf.py index 62b969d1..95289d11 100644 --- a/tests/analysis/rdf/test_rdf.py +++ b/tests/analysis/rdf/test_rdf.py @@ -29,7 +29,7 @@ def test_compute_rdf(case_data: RDFCaseData) -> None: s2=selection, distances_range=[0.0, 5.0], norm=case_data.norm, - ).dataset + ) if not expected_bins.exists() or not expected_rdf.exists(): np.save(expected_bins, test_bins) diff --git a/tests/trajectory/files/cl_ins_test.json b/tests/trajectory/files/cl_ins_test.json index b53a26fa..f9ab205f 100644 --- a/tests/trajectory/files/cl_ins_test.json +++ b/tests/trajectory/files/cl_ins_test.json @@ -1,39 +1,3 @@ { - "labels": [ - [ - 0, - 0, - 0, - 0, - 0 - ], - [ - 0, - 0, - 0, - 0, - 0 - ], - [ - 0, - 0, - 0, - 0, - 0 - ], - [ - 0, - 0, - 0, - 0, - 0 - ], - [ - 0, - 0, - 0, - 0, - 0 - ] - ] + "labels_file": "cl_ins_test.npy" } \ No newline at end of file diff --git a/tests/trajectory/files/cl_ins_test.npy b/tests/trajectory/files/cl_ins_test.npy new file mode 100644 index 00000000..59f7be6b Binary files /dev/null and b/tests/trajectory/files/cl_ins_test.npy differ diff --git a/tests/trajectory/files/ins_1_test.json b/tests/trajectory/files/ins_1_test.json index 8fd04c08..2977ab6c 100644 --- a/tests/trajectory/files/ins_1_test.json +++ b/tests/trajectory/files/ins_1_test.json @@ -1,103 +1,8 @@ { - "dataset": [ - [ - [ - 0.0, - 0.0, - 1.0, - 0.0, - 0.0, - 0.0, - 0.0, - 0.0, - 0.0, - 0.0, - 0.0, - 0.0, - 0.0, - 0.0, - 0.0, - 0.0, - 0.0, - 0.0, - 0.0, - 0.0, - 0.0 - ], - [ - 0.0, - 0.0, - 1.0, - 0.0, - 0.0, - 0.0, - 0.0, - 0.0, - 0.0, - 0.0, - 0.0, - 0.0, - 0.0, - 0.0, - 0.0, - 0.0, - 0.0, - 0.0, - 0.0, - 0.0, - 0.0 - ] - ], - [ - [ - 0.0, - 0.0, - 1.0, - 1.0, - 1.0, - 1.0, - 1.0, - 1.0, - 1.0, - 1.0, - 1.0, - 1.0, - 1.0, - 1.0, - 1.0, - 1.0, - 1.0, - 1.0, - 1.0, - 1.0, - 1.0 - ], - [ - 0.0, - 0.0, - 1.0, - 1.0, - 1.0, - 1.0, - 1.0, - 1.0, - 1.0, - 1.0, - 1.0, - 1.0, - 1.0, - 1.0, - 1.0, - 1.0, - 1.0, - 1.0, - 1.0, - 1.0, - 1.0 - ] - ] - ], + "dataset_file": "ins_1_test.npy", "meta": { - "r_cut": 10.0 + "r_cut": 10.0, + "delay": 1, + "selection": "all" } } \ No newline at end of file diff --git a/tests/trajectory/files/ins_1_test.npy b/tests/trajectory/files/ins_1_test.npy new file mode 100644 index 00000000..cf456290 Binary files /dev/null and b/tests/trajectory/files/ins_1_test.npy differ diff --git a/tests/trajectory/files/lens.json b/tests/trajectory/files/lens.json new file mode 100644 index 00000000..c207498a --- /dev/null +++ b/tests/trajectory/files/lens.json @@ -0,0 +1,8 @@ +{ + "dataset_file": "lens.npy", + "meta": { + "r_cut": 10.0, + "delay": 1, + "selection": "all" + } +} \ No newline at end of file diff --git a/tests/trajectory/files/lens.npy b/tests/trajectory/files/lens.npy new file mode 100644 index 00000000..ef3e0cb8 Binary files /dev/null and b/tests/trajectory/files/lens.npy differ diff --git a/tests/trajectory/files/n_c.json b/tests/trajectory/files/n_c.json new file mode 100644 index 00000000..eefbbeab --- /dev/null +++ b/tests/trajectory/files/n_c.json @@ -0,0 +1,8 @@ +{ + "dataset_file": "n_c.npy", + "meta": { + "r_cut": 10.0, + "delay": 1, + "selection": "all" + } +} \ No newline at end of file diff --git a/tests/trajectory/files/n_c.npy b/tests/trajectory/files/n_c.npy new file mode 100644 index 00000000..c13e3518 Binary files /dev/null and b/tests/trajectory/files/n_c.npy differ diff --git a/tests/trajectory/files/on_ins_test.json b/tests/trajectory/files/on_ins_test.json index 30249a80..4b6b3acf 100644 --- a/tests/trajectory/files/on_ins_test.json +++ b/tests/trajectory/files/on_ins_test.json @@ -1,77 +1,6 @@ { - "labels": [ - -1, - 0, - 0, - 0, - -1, - 0, - 0, - 0 - ], - "reshaped_data": [ - [ - 0.0, - 0.0, - 1.0, - 0.0, - 0.0 - ], - [ - 0.0, - 0.0, - 0.0, - 0.0, - 0.0 - ], - [ - 0.0, - 0.0, - 0.0, - 0.0, - 0.0 - ], - [ - 0.0, - 0.0, - 0.0, - 0.0, - 0.0 - ], - [ - 0.0, - 0.0, - 1.0, - 0.0, - 0.0 - ], - [ - 0.0, - 0.0, - 0.0, - 0.0, - 0.0 - ], - [ - 0.0, - 0.0, - 0.0, - 0.0, - 0.0 - ], - [ - 0.0, - 0.0, - 0.0, - 0.0, - 0.0 - ] - ], - "meta": { - "delta_t": 5, - "bins": "auto", - "number_of_sigmas": 2.0 - }, + "labels_file": "on_ins_test_labels.npy", + "reshaped_data_file": "on_ins_test_reshaped.npy", "state_list": [ { "mean": -5.050528786194419e-10, @@ -89,5 +18,10 @@ 0.0 ] } - ] + ], + "meta": { + "delta_t": 5, + "bins": "auto", + "number_of_sigmas": 2.0 + } } \ No newline at end of file diff --git a/tests/trajectory/files/on_ins_test_labels.npy b/tests/trajectory/files/on_ins_test_labels.npy new file mode 100644 index 00000000..101450be Binary files /dev/null and b/tests/trajectory/files/on_ins_test_labels.npy differ diff --git a/tests/trajectory/files/on_ins_test_reshaped.npy b/tests/trajectory/files/on_ins_test_reshaped.npy new file mode 100644 index 00000000..1ba7198b Binary files /dev/null and b/tests/trajectory/files/on_ins_test_reshaped.npy differ diff --git a/tests/trajectory/files/phi.json b/tests/trajectory/files/phi.json new file mode 100644 index 00000000..60019d94 --- /dev/null +++ b/tests/trajectory/files/phi.json @@ -0,0 +1,7 @@ +{ + "dataset_file": "phi.npy", + "meta": { + "r_cut": 10.0, + "selection": "all" + } +} \ No newline at end of file diff --git a/tests/trajectory/files/phi.npy b/tests/trajectory/files/phi.npy new file mode 100644 index 00000000..9bf995d8 Binary files /dev/null and b/tests/trajectory/files/phi.npy differ diff --git a/tests/trajectory/files/psi.json b/tests/trajectory/files/psi.json new file mode 100644 index 00000000..8b69b79b --- /dev/null +++ b/tests/trajectory/files/psi.json @@ -0,0 +1,8 @@ +{ + "dataset_file": "psi.npy", + "meta": { + "r_cut": 10.0, + "order": 6, + "selection": "all" + } +} \ No newline at end of file diff --git a/tests/trajectory/files/psi.npy b/tests/trajectory/files/psi.npy new file mode 100644 index 00000000..151a395f Binary files /dev/null and b/tests/trajectory/files/psi.npy differ diff --git a/tests/trajectory/files/soap.json b/tests/trajectory/files/soap.json new file mode 100644 index 00000000..6aa89a68 --- /dev/null +++ b/tests/trajectory/files/soap.json @@ -0,0 +1,11 @@ +{ + "dataset_file": "soap.npy", + "meta": { + "r_cut": 10.0, + "n_max": 8, + "l_max": 8, + "respect_pbc": true, + "selection": "all", + "centers": "all" + } +} \ No newline at end of file diff --git a/tests/trajectory/files/soap.npy b/tests/trajectory/files/soap.npy new file mode 100644 index 00000000..dd0d1b35 Binary files /dev/null and b/tests/trajectory/files/soap.npy differ diff --git a/tests/trajectory/files/tica.json b/tests/trajectory/files/tica.json new file mode 100644 index 00000000..04fbe692 --- /dev/null +++ b/tests/trajectory/files/tica.json @@ -0,0 +1,13 @@ +{ + "dataset_file": "tica.npy", + "meta": { + "r_cut": 10.0, + "n_max": 8, + "l_max": 8, + "respect_pbc": true, + "selection": "all", + "centers": "all", + "lag_time": 10, + "tica_dim": 2 + } +} \ No newline at end of file diff --git a/tests/trajectory/files/tica.npy b/tests/trajectory/files/tica.npy new file mode 100644 index 00000000..5dbd333d Binary files /dev/null and b/tests/trajectory/files/tica.npy differ diff --git a/tests/trajectory/files/tsoap.json b/tests/trajectory/files/tsoap.json new file mode 100644 index 00000000..43a373a9 --- /dev/null +++ b/tests/trajectory/files/tsoap.json @@ -0,0 +1,12 @@ +{ + "dataset_file": "tsoap.npy", + "meta": { + "r_cut": 10.0, + "n_max": 8, + "l_max": 8, + "respect_pbc": true, + "selection": "all", + "centers": "all", + "delay": 1 + } +} \ No newline at end of file diff --git a/tests/trajectory/files/tsoap.npy b/tests/trajectory/files/tsoap.npy new file mode 100644 index 00000000..8ac2fe81 Binary files /dev/null and b/tests/trajectory/files/tsoap.npy differ diff --git a/tests/trajectory/test_trj.py b/tests/trajectory/test_trj.py index a845fd7e..6522bb2d 100644 --- a/tests/trajectory/test_trj.py +++ b/tests/trajectory/test_trj.py @@ -7,6 +7,7 @@ import MDAnalysis import numpy as np import pytest +from numpy.testing import assert_allclose from dynsight.logs import logger from dynsight.trajectory import ( @@ -69,12 +70,48 @@ def test_trj_inits( logger.get() +def test_get_descriptors(file_paths: dict[str, Path]) -> None: + """Test computation methods for Trj and Insight classes.""" + trj = Trj.init_from_xtc(file_paths["xtc"], file_paths["gro"]) + + r_cut = 10.0 + neigcounts, n_c = trj.get_coord_number(r_cut=r_cut) + _, lens = trj.get_lens(r_cut=r_cut, neigcounts=neigcounts) + soap = trj.get_soap(r_cut=10.0, n_max=8, l_max=8) + _, phi = trj.get_velocity_alignment(r_cut=r_cut, neigcounts=neigcounts) + _, _, tica = soap.get_tica(lag_time=10, tica_dim=2) + + test_n_c = Insight.load_from_json(file_paths["files_dir"] / "n_c.json") + test_lens = Insight.load_from_json(file_paths["files_dir"] / "lens.json") + test_soap = Insight.load_from_json(file_paths["files_dir"] / "soap.json") + test_phi = Insight.load_from_json(file_paths["files_dir"] / "phi.json") + test_tica = Insight.load_from_json(file_paths["files_dir"] / "tica.json") + + assert_allclose(test_n_c.dataset, n_c.dataset, rtol=1e-4, atol=1e-6) + assert_allclose(test_lens.dataset, lens.dataset, rtol=1e-4, atol=1e-6) + assert_allclose(test_soap.dataset, soap.dataset, rtol=1e-4, atol=1e-6) + assert_allclose(test_phi.dataset, phi.dataset, rtol=1e-4, atol=1e-6) + assert_allclose(test_tica.dataset, tica.dataset, rtol=1e-4, atol=1e-6) + + # Note: for some reason, get_orientational_op() and get_angular_velocity() + # have larger numerical variations that the other descriptors. + _, psi = trj.get_orientational_op(r_cut=r_cut, neigcounts=neigcounts) + test_psi = Insight.load_from_json(file_paths["files_dir"] / "psi.json") + assert_allclose(test_psi.dataset, psi.dataset, rtol=1e-3, atol=1e-6) + + tsoap = soap.get_angular_velocity() + test_tsoap = Insight.load_from_json(file_paths["files_dir"] / "tsoap.json") + assert_allclose(test_tsoap.dataset, tsoap.dataset, rtol=1e-3, atol=1e-6) + + logger.get() + + def test_insight( tmp_path: Path, file_paths: dict[str, Path], universe: MDAnalysis.Universe ) -> None: """Test Insight methods.""" trj = Trj(universe) - _, insight = trj.get_lens(10.0) + _, insight = trj.get_lens(r_cut=10.0) # Insight dump/load json_file = tmp_path / "insight.json" @@ -124,19 +161,19 @@ def test_onion_analysis(universe: MDAnalysis.Universe) -> None: def test_insight_load_errors(file_paths: dict[str, Path]) -> None: """Test insight load errors.""" with pytest.raises( - ValueError, match="'dataset' key not found in JSON file." + ValueError, match="'dataset_file' key not found in JSON file." ): _ = Insight.load_from_json(file_paths["files_dir"] / "empty.json") with pytest.raises( - ValueError, match="'labels' key not found in JSON file." + ValueError, match="'labels_file' key not found in JSON file." ): _ = ClusterInsight.load_from_json( file_paths["files_dir"] / "ins_1_test.json" ) with pytest.raises( - ValueError, match="'state_list' key not found in JSON file." + ValueError, match="'reshaped_data_file' key not found in JSON file." ): _ = OnionInsight.load_from_json( file_paths["files_dir"] / "cl_ins_test.json"