GMPavanLab · matteobecchi · Nov 7, 2025 · Oct 30, 2025 · Oct 30, 2025 · Oct 30, 2025
diff --git a/README.rst b/README.rst
@@ -86,9 +86,7 @@ If you use ``dynsight`` please cite
 
     https://github.com/GMPavanLab/dynsight
 
-and
-
-    TBD
+    https://doi.org/10.48550/arXiv.2510.23493
 
 * Most modules also use MDAnalysis, https://www.mdanalysis.org/pages/citations/
 * If you use SOAP, please cite https://doi.org/10.1103/PhysRevB.87.184115 and DScribe https://singroup.github.io/dscribe/latest/citing.html
@@ -98,6 +96,7 @@ and
 * If you use tICA, please cite ``deeptime`` https://deeptime-ml.github.io/latest/index.html
 * If you use ``dynsight.vision``, please cite Ultralytics YOLO https://docs.ultralytics.com/it/models/yolo11/#usage-examples
 * If you use ``dynsight.track``, please cite Trackpy https://soft-matter.github.io/trackpy/dev/introduction.html
+* Entropy calculations are based on ``infomeasure`` https://doi.org/10.1038/s41598-025-14053-5
 
 
 Acknowledgements

diff --git a/docs/source/_static/Example_0_1D.png b/docs/source/_static/Example_0_1D.png
diff --git a/docs/source/_static/Example_0_2D.png b/docs/source/_static/Example_0_2D.png
diff --git a/docs/source/_static/Example_1_1D.png b/docs/source/_static/Example_1_1D.png
diff --git a/docs/source/_static/Example_1_2D.png b/docs/source/_static/Example_1_2D.png
diff --git a/docs/source/_static/Information_gains.png b/docs/source/_static/Information_gains.png
diff --git a/docs/source/_static/info_gain/trj_2.npy b/docs/source/_static/info_gain/trj_2.npy
diff --git a/docs/source/_static/info_gain/trj_4.npy b/docs/source/_static/info_gain/trj_4.npy
diff --git a/docs/source/_static/info_gain_clusters_1d.png b/docs/source/_static/info_gain_clusters_1d.png
diff --git a/docs/source/_static/info_gain_clusters_1d_y.png b/docs/source/_static/info_gain_clusters_1d_y.png
diff --git a/docs/source/_static/info_gain_clusters_2d.png b/docs/source/_static/info_gain_clusters_2d.png
diff --git a/docs/source/_static/info_gain_clusters_2d_y.png b/docs/source/_static/info_gain_clusters_2d_y.png
diff --git a/docs/source/_static/info_plot.png b/docs/source/_static/info_plot.png
diff --git a/docs/source/_static/recipes/descr_from_trj.py b/docs/source/_static/recipes/descr_from_trj.py
@@ -21,7 +21,7 @@ def main() -> None:
         selection="all",  # compute on a selection of particles
         centers="all",  # compute for a selection of centers
         respect_pbc=False,  # consider PBC
-        n_core=1,  # use multiprocessing to speed up calculations
+        n_jobs=1,  # use multiprocessing to speed up calculations
     )
 
     # Loading an example trajectory

diff --git a/docs/source/analysis.rst b/docs/source/analysis.rst
@@ -10,17 +10,25 @@ Entropy
 The ``analysis`` module offers a variety of functions for entropy- and
 information-based calculations. 
 
+.. toctree::
+  :maxdepth: 1
+
+  shannon <_autosummary/dynsight.analysis.shannon>
+  info_gain <_autosummary/dynsight.analysis.info_gain>
+  compute_negentropy <_autosummary/dynsight.analysis.compute_negentropy>
+  sample_entropy <_autosummary/dynsight.analysis.sample_entropy>
+
+The following functions are deprecated, the previous ones should be preferred.
+
 .. toctree::
   :maxdepth: 1
 
   compute_shannon <_autosummary/dynsight.analysis.compute_shannon>
   compute_kl_entropy <_autosummary/dynsight.analysis.compute_kl_entropy>
-  compute_negentropy <_autosummary/dynsight.analysis.compute_negentropy>
   compute_entropy_gain <_autosummary/dynsight.analysis.compute_entropy_gain>
   compute_shannon_multi <_autosummary/dynsight.analysis.compute_shannon_multi>
   compute_kl_entropy_multi <_autosummary/dynsight.analysis.compute_kl_entropy_multi>
   compute_entropy_gain_multi <_autosummary/dynsight.analysis.compute_entropy_gain_multi>
-  sample_entropy <_autosummary/dynsight.analysis.sample_entropy>
 
 Other functions
 ---------------

diff --git a/docs/source/example_info_gain.rst b/docs/source/example_info_gain.rst
@@ -11,11 +11,13 @@ To start, let's import the packages we will need and create a folder in the cwd
 
     from pathlib import Path
     from typing import Callable
+    from tqdm import tqdm
     import numpy as np
     import matplotlib.pyplot as plt
+    import dynsight
 
     cwd = Path.cwd()
-    folder_name = "info_gain"
+    folder_name = "source/_static/info_gain"
     folder_path = cwd / folder_name
     if not folder_path.exists():
         folder_path.mkdir()
@@ -95,12 +97,8 @@ This function simulates, for both energy landscapes, the dynamics of 100 particl
 
                 trajectory[t, i] = particles[i]
 
-        plt.figure()
-        plt.plot(trajectory[:, :, 0], trajectory[:, :, 1])
-        plt.show()
-
         dataset = np.transpose(trajectory, (1, 0, 2))
-        np.save(filename, dataset)
+        np.save(file_name, dataset)
         return dataset
 
 
@@ -144,59 +142,54 @@ To check if the clustering is working in a meaningful way, we also plot the resu
         y_positions = dataset[:, :, 1]
         info_gain_y = np.zeros(delta_t_list.size)
 
-        for j, delta_t in enumerate(delta_t_list):
-            reshaped_data = dynsight.onion.helpers.reshape_from_nt(
-                y_positions, delta_t
+        for j, delta_t in enumerate(tqdm(delta_t_list)):
+            state_list, labels = dynsight.onion.onion_uni_smooth(
+                y_positions,
+                delta_t=delta_t,
             )
-            state_list, labels = dynsight.onion.onion_uni(reshaped_data)
 
             if j == example_delta_t:
-                dynsight.onion.plot.plot_output_uni(
-                    f"Example_{i}_1D.png",
-                    reshaped_data,
-                    n_atoms,
+                dynsight.onion.plot_smooth.plot_output_uni(
+                    f"source/_static/Example_{i}_1D.png",
+                    y_positions,
                     state_list,
                 )
 
             # and compute the information gain:
-            info_gain_y[j], *_ = dynsight.analysis.compute_entropy_gain(
-                reshaped_data, labels, n_bins=40
-            )
-        results.append(info_gain_y)
+            info_gain_y[j], *_ = dynsight.analysis.info_gain(
+                data=y_positions.ravel(),
+                labels=labels.ravel(),
+                method="kl",
+            )  # Results are in bit
+        results[i * 2] = info_gain_y
 
         # Or we can do clustering using both (x, y) variables:
         info_gain_xy = np.zeros(delta_t_list.size)
-        tmp1_dataset = np.transpose(dataset, (2, 0, 1))
-        for j, delta_t in enumerate(delta_t_list):
-            reshaped_data = dynsight.onion.helpers.reshape_from_dnt(
-                tmp1_dataset, delta_t
+        for j, delta_t in enumerate(tqdm(delta_t_list)):
+            state_list, labels = dynsight.onion.onion_multi_smooth(
+                dataset,
+                delta_t=delta_t,
             )
-            state_list, labels = dynsight.onion.onion_multi(reshaped_data)
 
             if j == example_delta_t:
-                dynsight.onion.plot.plot_output_multi(
-                    f"Example_{i}_2D.png",
-                    tmp1_dataset,
+                dynsight.onion.plot_smooth.plot_output_multi(
+                    f"source/_static/Example_{i}_2D.png",
+                    dataset,
                     state_list,
                     labels,
-                    delta_t,
                 )
 
             # and compute the information gain:
             # We need an array (n_samples, n_dims), and labels (n_samples,)
-            n_sequences = int(n_frames / delta_t)
-            long_labels = np.repeat(labels, delta_t)
-            tmp = dataset[:, : n_sequences * delta_t, :]
-            ds_reshaped = tmp.reshape((-1, n_dims))
+            reshaped_data = dataset.reshape((-1, 2))
+            reshaped_labels = labels.reshape((-1,))
 
-            info_gain_xy[j], *_ = dynsight.analysis.compute_entropy_gain_multi(
-                ds_reshaped, long_labels, n_bins=[40, 40]
-            )
-        # Need to multiply by two because it's 2 dimensional, and the output
-        # of the info_gain functions is normalized by the log volume of the
-        # phase space, which is 2D is doubled
-        info_gain_xy *= 2
-        results.append(info_gain_xy)
+            info_gain_xy[j], *_ = dynsight.analysis.info_gain(
+                reshaped_data,
+                reshaped_labels,
+                method="kl",
+            )  # Results are in bit
+        results[i * 2 + 1] = info_gain_xy
 
 
 Here are the plots of the two datasets, with the different clusters identified when clustering the full, bi-dimensional data, using ∆t = 4 frames:
@@ -205,8 +198,8 @@ Here are the plots of the two datasets, with the different clusters identified w
    :widths: auto
    :align: center
 
-   * - .. image:: _static/info_gain_clusters_1d.png
-     - .. image:: _static/info_gain_clusters_2d.png
+   * - .. image:: _static/Example_0_2D.png
+     - .. image:: _static/Example_1_2D.png
 
 
 As can be seen, all the clusters are correctly identified at this time resolution ∆t. When we are using only the y-coordinate instead, as expected in both cases just two clusters can be identified (the two plots look the same but they are actually from the two different systems):
@@ -215,8 +208,8 @@ As can be seen, all the clusters are correctly identified at this time resolutio
    :widths: auto
    :align: center
 
-   * - .. image:: _static/info_gain_clusters_1d_y.png
-     - .. image:: _static/info_gain_clusters_2d_y.png
+   * - .. image:: _static/Example_0_1D.png
+     - .. image:: _static/Example_1_1D.png
 
 
 We can now plot, for every case and for every choice of ∆t, the corresponding information gain. 
@@ -246,7 +239,7 @@ We can now plot, for every case and for every choice of ∆t, the corresponding
     ax.set_ylabel(r"Information gain $\Delta H$ [bit]")
     ax.set_xscale("log")
     ax.legend()
-    plt.show()
+    fig.savefig("source/_static/Information_gains.png", dpi=600)
 
 As can be seen in the plot below, clustering both datasets using only the y coordinate gives the same information gain, because only two clusters can be distinguished. 
 

diff --git a/docs/source/index.rst b/docs/source/index.rst
@@ -31,10 +31,10 @@
   :maxdepth: 2
   :caption: Recipes:
 
-  Descriptors from a Trj <descr_from_trj>
-  Dimensionality reduction methods <soap_dim_red>
-  Information gain analysis <info_gain>
-  Entropy calculations <entropy>
+  Descriptors from a Trj <recipe_descr_from_trj>
+  Dimensionality reduction methods <recipe_soap_dim_red>
+  Entropy calculations <recipe_entropy>
+  Information gain analysis <recipe_info_gain>
 
 .. toctree::
   :hidden:
@@ -118,8 +118,8 @@ Examples
 There are examples throughout the documentation and available in
 the ``examples/`` directory of this repository.
 
-There are also examples available in the ``cpctools`` repository
-`here <https://github.com/GMPavanLab/cpctools/tree/main/Examples>`
+There are also examples available in the ``cpctools`` repository here:
+https://github.com/GMPavanLab/cpctools/tree/main/Examples
 
 
 How To Cite
@@ -129,9 +129,7 @@ If you use ``dynsight`` please cite
 
     https://github.com/GMPavanLab/dynsight
 
-and
-
-    TBD
+    https://doi.org/10.48550/arXiv.2510.23493
 
 * Most modules also use MDAnalysis, https://www.mdanalysis.org/pages/citations/
 * If you use SOAP, please cite https://doi.org/10.1103/PhysRevB.87.184115 and DScribe https://singroup.github.io/dscribe/latest/citing.html

diff --git a/docs/source/descr_from_trj.rst → docs/source/recipe_descr_from_trj.rst b/docs/source/descr_from_trj.rst → docs/source/recipe_descr_from_trj.rst
@@ -45,7 +45,7 @@ it's directly calculated by the :class:`.trajectory.Trj.get_soap()` method.
         selection="all",    # compute on a selection of particles
         centers="all",      # compute for a selection of centers
         respect_pbc=False,  # consider PBC
-        n_core=1,           # use multiprocessing to speed up calculations
+        n_jobs=1,           # use multiprocessing to speed up calculations
     )
 
 Number of neighbors and LENS

diff --git a/docs/source/entropy.rst → docs/source/recipe_entropy.rst b/docs/source/entropy.rst → docs/source/recipe_entropy.rst
@@ -2,7 +2,7 @@ Entropy calculations
 ====================
 
 This recipe explains how to compute Shannon entropy for different types of
-datasets using the functions in the `dynsight.analysis` module.
+datasets using the functions in the ``dynsight.analysis`` module.
 
 First of all, we import all the packages and objects we'll need:
 
@@ -25,31 +25,29 @@ should be equal to log2(6) bits.
     n_sample = 10000
     rolls = rng.integers(1, 7, size=n_sample)
 
-    dice_entropy = dynsight.analysis.compute_shannon(
-        data=rolls.astype(float),
-        data_range=(1,6),
-        n_bins=6,
-        units="bit",
+    dice_entropy = dynsight.analysis.shannon(
+        data=rolls,
+        method="histo",
+        base=2,
     )
     # dice_entropy = 2.584832195231254 ~ log2(6)
 
 
 Entropy of a discrete multivariate variable
 -------------------------------------------
 
-Let's compute the Shanon entropy of rolling `two` dices ``n_sample`` times,
+Let's compute the Shanon entropy of rolling two dices ``n_sample`` times,
 which should be equal to log2(36) bits.
 
 .. testcode:: recipe4-test
 
     n_sample = 10000
     rolls = rng.integers(1, 7, size=(n_sample, 2))
 
-    dices_entropy = dynsight.analysis.compute_shannon_multi(
-        data=rolls.astype(float),
-        data_ranges=[(1,6), (1,6)],
-        n_bins=[6, 6],
-        units="bit",
+    dices_entropy = dynsight.analysis.shannon(
+        data=rolls,
+        method="histo",
+        base=2,
     )
     # dices_entropy = 5.168428344754391 ~ log2(36)
 
@@ -58,26 +56,30 @@ Entropy of a continuous variable
 ---------------------------------
 
 Shannon entropy is not univocally defined for continuous variables, but the
-difference between the entropy of different distribution is. Let's compute the
-difference between the Shannon entropy of two Gaussian distributions, with
-standard deviations respectively equal to 1 and 2, which should be 1 bit.
+difference between the entropy of different distribution is.
+For continuous variables, we need to use the Kozachenko-Leonenko (KL)
+estimator, passing the argument ``method="kl"``.
+Let's compute the difference between the Shannon entropy of two Gaussian distributions, with standard deviations respectively equal to 1 and 2, which
+should be 1 bit.
 
 .. testcode:: recipe4-test
 
-    n_sample = 10000000
+    n_sample = 100000
     data_1 = rng.normal(loc=0.0, scale=1.0, size=n_sample)
     data_2 = rng.normal(loc=0.0, scale=2.0, size=n_sample)
 
-    gauss_entropy_1 = dynsight.analysis.compute_kl_entropy(
+    gauss_entropy_1 = dynsight.analysis.shannon(
         data=data_1,
-        units="bit",
+        method="kl",
+        base=2,
     )
-    gauss_entropy_2 = dynsight.analysis.compute_kl_entropy(
+    gauss_entropy_2 = dynsight.analysis.shannon(
         data=data_2,
-        units="bit",
+        method="kl",
+        base=2,
     )
     diff = gauss_entropy_2 - gauss_entropy_1
-    # diff = 1.0010395631476854
+    # diff = 0.9994806386420283
 
 
 Entropy of a continuous multivariate variable
@@ -104,16 +106,18 @@ which should be 2 bits.
         size=n_sample,
     )
 
-    gauss_entropy_1 = dynsight.analysis.compute_kl_entropy_multi(
+    gauss_entropy_1 = dynsight.analysis.shannon(
         data=data_1,
-        units="bit",
+        method="kl",
+        base=2,
     )
-    gauss_entropy_2 = dynsight.analysis.compute_kl_entropy_multi(
+    gauss_entropy_2 = dynsight.analysis.shannon(
         data=data_2,
-        units="bit",
+        method="kl",
+        base=2,
     )
     diff_2d = gauss_entropy_2 - gauss_entropy_1
-    # diff_2d = 2.0142525628908743
+    # diff_2d = 2.0101274002195764
 
 
 .. raw:: html