GMPavanLab · matteobecchi · Oct 2, 2025 · Oct 1, 2025 · Oct 1, 2025 · Oct 1, 2025
diff --git a/docs/source/_static/recipes/entropy.py b/docs/source/_static/recipes/entropy.py
@@ -0,0 +1,69 @@
+"""Copiable code from Recipe #4."""
+
+import numpy as np
+
+import dynsight
+
+rng = np.random.default_rng(42)  # set the random seed
+
+# Entropy of a discrete variable
+n_sample = 10000
+rolls = rng.integers(1, 7, size=n_sample)
+dice_entropy = dynsight.analysis.compute_shannon(
+    data=rolls.astype(float),
+    data_range=(1, 6),
+    n_bins=6,
+    units="bit",
+)
+
+# Entropy of a discrete multivariate variable
+n_sample = 10000
+rolls = rng.integers(1, 7, size=(n_sample, 2))
+dices_entropy = dynsight.analysis.compute_shannon_multi(
+    data=rolls.astype(float),
+    data_ranges=[(1, 6), (1, 6)],
+    n_bins=[6, 6],
+    units="bit",
+)
+
+
+# Entropy of a continuous variable
+n_sample = 10000000
+data_1 = rng.normal(loc=0.0, scale=1.0, size=n_sample)
+data_2 = rng.normal(loc=0.0, scale=2.0, size=n_sample)
+
+gauss_entropy_1 = dynsight.analysis.compute_kl_entropy(
+    data=data_1,
+    units="bit",
+)
+gauss_entropy_2 = dynsight.analysis.compute_kl_entropy(
+    data=data_2,
+    units="bit",
+)
+diff = gauss_entropy_2 - gauss_entropy_1
+
+
+# Entropy of a continuous multivariate variable
+n_sample = 100000
+mean = [1, 1]
+cov = np.array([[1, 0], [0, 1]])
+data_1 = rng.multivariate_normal(
+    mean=mean,
+    cov=cov,
+    size=n_sample,
+)
+data_2 = rng.multivariate_normal(
+    mean=mean,
+    cov=cov * 4.0,
+    size=n_sample,
+)
+
+gauss_entropy_1 = dynsight.analysis.compute_kl_entropy_multi(
+    data=data_1,
+    units="bit",
+)
+gauss_entropy_2 = dynsight.analysis.compute_kl_entropy_multi(
+    data=data_2,
+    units="bit",
+)
+diff_2d = gauss_entropy_2 - gauss_entropy_1
diff --git a/docs/source/_static/recipes/info_gain.py b/docs/source/_static/recipes/info_gain.py
@@ -26,7 +26,7 @@ def info_gain_with_onion(
     float,
 ]:
     """Performs full information gain analysis with Onion clustering."""
-    data_range = (np.min(data), np.max(data))
+    data_range = (float(np.min(data)), float(np.max(data)))
 
     n_clusters = np.zeros(len(delta_t_list), dtype=int)
     clusters_frac = []

diff --git a/docs/source/analysis.rst b/docs/source/analysis.rst
@@ -18,6 +18,7 @@ information-based calculations.
   compute_negentropy <_autosummary/dynsight.analysis.compute_negentropy>
   compute_entropy_gain <_autosummary/dynsight.analysis.compute_entropy_gain>
   compute_shannon_multi <_autosummary/dynsight.analysis.compute_shannon_multi>
+  compute_kl_entropy_multi <_autosummary/dynsight.analysis.compute_kl_entropy_multi>
   compute_entropy_gain_multi <_autosummary/dynsight.analysis.compute_entropy_gain_multi>
   sample_entropy <_autosummary/dynsight.analysis.sample_entropy>
 

diff --git a/docs/source/descr_from_trj.rst b/docs/source/descr_from_trj.rst
@@ -1,12 +1,12 @@
-Descriptors from a :class:`.trajectory.Trj` 
+Descriptors from a :class:`.trajectory.Trj`
 ===========================================
 
-This recipe explains how to compute descriptors directly from a 
-:class:`.trajectory.Trj` object. 
+This recipe explains how to compute descriptors directly from a
+:class:`.trajectory.Trj` object.
 
 .. warning::
 
-    This code works when run from the ``/docs`` directory of the ``dynsight`` 
+    This code works when run from the ``/docs`` directory of the ``dynsight``
     repo. To use it elsewhere, you have to change the ``Path`` variables
     accordingly.
 
@@ -26,7 +26,7 @@ it's directly calculated by the :class:`.trajectory.Trj.get_soap()` method.
 .. warning::
 
     Please consider that the SOAP dataset can be very large, due to the high
-    dimensionality, thus calculations can be expensive, and saving to/loading 
+    dimensionality, thus calculations can be expensive, and saving to/loading
     from file quite slow.
 
 .. testcode:: recipe1-test
@@ -83,7 +83,7 @@ calculation can be sped up significantly.
     )
 
 Notice that, differently from SOAP - which is computed for every frame, LENS
-is computed for every pair of frames. Thus, the LENS dataset has shape 
+is computed for every pair of frames. Thus, the LENS dataset has shape
 ``(n_particles, n_frames - 1)``. Consequently, if you need to match the LENS
 values with the particles along the trajectory, you will need to use a sliced
 trajectory (removing the last frame). The easiest way to do this is:
@@ -95,7 +95,7 @@ trajectory (removing the last frame). The easiest way to do this is:
 
 .. raw:: html
 
-    <a class="btn-download" href="../_static/recipes/descr_from_trj.py" download>⬇️ Download Python Script</a>
+    <a class="btn-download" href="_static/recipes/descr_from_trj.py" download>⬇️ Download Python Script</a>
 
 .. testcode:: recipe1-test
     :hide:

diff --git a/docs/source/entropy.rst b/docs/source/entropy.rst
@@ -0,0 +1,129 @@
+Entropy calculations
+====================
+
+This recipe explains how to compute Shannon entropy for different types of
+datasets using the functions in the `dynsight.analysis` module.
+
+First of all, we import all the packages and objects we'll need:
+
+.. testcode:: recipe4-test
+
+    import numpy as np
+    import dynsight
+
+    rng = np.random.default_rng(42)  # set the random seed
+
+
+Entropy of a discrete variable
+------------------------------
+
+Let's compute the Shanon entropy of rolling a dice ``n_sample`` times, which
+should be equal to log2(6) bits.
+
+.. testcode:: recipe4-test
+
+    n_sample = 10000
+    rolls = rng.integers(1, 7, size=n_sample)
+
+    dice_entropy = dynsight.analysis.compute_shannon(
+        data=rolls.astype(float),
+        data_range=(1,6),
+        n_bins=6,
+        units="bit",
+    )
+    # dice_entropy = 2.584832195231254 ~ log2(6)
+
+
+Entropy of a discrete multivariate variable
+-------------------------------------------
+
+Let's compute the Shanon entropy of rolling `two` dices ``n_sample`` times,
+which should be equal to log2(36) bits.
+
+.. testcode:: recipe4-test
+
+    n_sample = 10000
+    rolls = rng.integers(1, 7, size=(n_sample, 2))
+
+    dices_entropy = dynsight.analysis.compute_shannon_multi(
+        data=rolls.astype(float),
+        data_ranges=[(1,6), (1,6)],
+        n_bins=[6, 6],
+        units="bit",
+    )
+    # dices_entropy = 5.168428344754391 ~ log2(36)
+
+
+Entropy of a continuous variable
+---------------------------------
+
+Shannon entropy is not univocally defined for continuous variables, but the
+difference between the entropy of different distribution is. Let's compute the
+difference between the Shannon entropy of two Gaussian distributions, with
+standard deviations respectively equal to 1 and 2, which should be 1 bit.
+
+.. testcode:: recipe4-test
+
+    n_sample = 10000000
+    data_1 = rng.normal(loc=0.0, scale=1.0, size=n_sample)
+    data_2 = rng.normal(loc=0.0, scale=2.0, size=n_sample)
+
+    gauss_entropy_1 = dynsight.analysis.compute_kl_entropy(
+        data=data_1,
+        units="bit",
+    )
+    gauss_entropy_2 = dynsight.analysis.compute_kl_entropy(
+        data=data_2,
+        units="bit",
+    )
+    diff = gauss_entropy_2 - gauss_entropy_1
+    # diff = 1.0010395631476854
+
+
+Entropy of a continuous multivariate variable
+---------------------------------------------
+
+And the same is true for multivariate distributions. Let's compute the
+difference between the Shannon entropy of two bivariate Gaussian
+distributions, with standard deviations respectively equal to 1 and 2,
+which should be 2 bits.
+
+.. testcode:: recipe4-test
+
+    n_sample = 100000
+    mean = [1, 1]
+    cov = np.array([[1, 0], [0, 1]])
+    data_1 = rng.multivariate_normal(
+        mean=mean,
+        cov=cov,
+        size=n_sample,
+    )
+    data_2 = rng.multivariate_normal(
+        mean=mean,
+        cov=cov * 4.0,
+        size=n_sample,
+    )
+
+    gauss_entropy_1 = dynsight.analysis.compute_kl_entropy_multi(
+        data=data_1,
+        units="bit",
+    )
+    gauss_entropy_2 = dynsight.analysis.compute_kl_entropy_multi(
+        data=data_2,
+        units="bit",
+    )
+    diff_2d = gauss_entropy_2 - gauss_entropy_1
+    # diff_2d = 2.0142525628908743
+
+
+.. raw:: html
+
+    <a class="btn-download" href="_static/recipes/entropy.py" download>⬇️ Download Python Script</a>
+
+.. testcode:: recipe4-test
+    :hide:
+
+    assert np.isclose(dice_entropy, np.log2(6), rtol=1e-3)
+    assert np.isclose(dices_entropy, np.log2(36), rtol=1e-3)
+    assert np.isclose(diff, 1, rtol=1e-3, atol=1e-4)
+    assert np.isclose(diff_2d, 2, rtol=1e-2, atol=1e-2)
diff --git a/docs/source/index.rst b/docs/source/index.rst
@@ -34,6 +34,7 @@
   Descriptors from a Trj <descr_from_trj>
   Dimensionality reduction methods <soap_dim_red>
   Information gain analysis <info_gain>
+  Entropy calculations <entropy>
 
 .. toctree::
   :hidden:
@@ -81,7 +82,7 @@ How to get started
 ------------------
 
 We suggest you give a read to the ``dynsight.trajectory`` module documentation,
-which offers a compact and easy way of using most of the ``dynsight`` tools. 
+which offers a compact and easy way of using most of the ``dynsight`` tools.
 Also, the documentation offers some copiable Recipes and Examples for the most
 common analyses.
 

diff --git a/docs/source/info_gain.rst b/docs/source/info_gain.rst
@@ -3,16 +3,16 @@ Information gain analysis
 
 For the theoretical aspects of this work, see https://doi.org/10.48550/arXiv.2504.12990.
 
-This recipe explains how to compute the information gain through clustering 
+This recipe explains how to compute the information gain through clustering
 analysis. We use a synthetic dataset containing a signal that oscillates
 between 0 and 1, with Gaussian noise. Onion clustering is run on a broad
 range of time resolutions ∆t. The information gain and the Shannon entropy of
-the environments is computed for each value of ∆t. The analysis is implemented 
+the environments is computed for each value of ∆t. The analysis is implemented
 using onion 2.0.0 ("onion smooth").
 
 .. warning::
 
-    This code works when run from the ``/docs`` directory of the ``dynsight`` 
+    This code works when run from the ``/docs`` directory of the ``dynsight``
     repo. To use it elsewhere, you have to change the ``Path`` variables
     accordingly.
 
@@ -54,7 +54,7 @@ Let's start by creating a the synthetic dataset:
 
 The following function takes as input the dataset, and a list of values
 of time resolutions ∆t, and for each of these it performs Onion clustering, and
-computes the information gain achieved through clustering with that ∆t. 
+computes the information gain achieved through clustering with that ∆t.
 
 .. warning::
 
@@ -222,7 +222,7 @@ gain goes to 0.
 
 .. raw:: html
 
-    <a class="btn-download" href="../_static/recipes/info_gain.py" download>⬇️ Download Python Script</a>
+    <a class="btn-download" href="_static/recipes/info_gain.py" download>⬇️ Download Python Script</a>
 
 .. testcode:: recipe3-test
     :hide:

diff --git a/docs/source/soap_dim_red.rst b/docs/source/soap_dim_red.rst
@@ -1,4 +1,4 @@
-Dimensionality reduction methods 
+Dimensionality reduction methods
 ================================
 
 This recipe explains how to compute descriptors via dimensionality reduction
@@ -15,12 +15,12 @@ from dynsight.utilities.
 .. warning::
 
     Please consider that the SOAP dataset can be very large, due to the high
-    dimensionality, thus calculations can be expensive, and saving to/loading 
+    dimensionality, thus calculations can be expensive, and saving to/loading
     from file quite slow.
 
 .. warning::
 
-    This code works when run from the ``/docs`` directory of the ``dynsight`` 
+    This code works when run from the ``/docs`` directory of the ``dynsight``
     repo. To use it elsewhere, you have to change the ``Path`` variables
     accordingly.
 
@@ -189,7 +189,7 @@ parameters, and performs the TICA of the corresponding SOAP dataset.
     )
 
 The output :class:`.trajectory.Insight` stores the SOAP information in its
-"meta" attribute, together with the ``lag_time`` parameter and ``rel_times``, 
+"meta" attribute, together with the ``lag_time`` parameter and ``rel_times``,
 the relaxation times of the computed TICs.
 
 
@@ -257,7 +257,7 @@ The output :class:`.trajectory.Insight` stores the SOAP information in its
 "meta" attribute, together with the ``delay`` parameter.
 
 Notice that, differently from SOAP - which is computed for every frame, tSOAP
-is computed for every pair of frames. Thus, the tSOAP dataset has shape 
+is computed for every pair of frames. Thus, the tSOAP dataset has shape
 ``(n_particles, n_frames - 1)``. Consequently, if you need to match the tSOAP
 values with the particles along the trajectory, you will need to use a sliced
 trajectory (removing the last frame). The easiest way to do this is:
@@ -269,7 +269,7 @@ trajectory (removing the last frame). The easiest way to do this is:
 
 .. raw:: html
 
-    <a class="btn-download" href="../_static/recipes/soap_dim_red.py" download>⬇️ Download Python Script</a>
+    <a class="btn-download" href="_static/recipes/soap_dim_red.py" download>⬇️ Download Python Script</a>
 
 .. testcode:: recipe2-test
     :hide: