From 48efda2606a25dac30b6fe607f5c0ffbe2bf7146 Mon Sep 17 00:00:00 2001
From: matteobecchi <bechmath@gmail.com>
Date: Wed, 1 Oct 2025 16:37:38 +0200
Subject: [PATCH 01/16] units added to shannon computation.

---
 src/dynsight/_internal/analysis/entropy.py | 28 ++++++++++++++++++----
 1 file changed, 23 insertions(+), 5 deletions(-)

diff --git a/src/dynsight/_internal/analysis/entropy.py b/src/dynsight/_internal/analysis/entropy.py
index 30790072..5000b611 100644
--- a/src/dynsight/_internal/analysis/entropy.py
+++ b/src/dynsight/_internal/analysis/entropy.py
@@ -1,6 +1,6 @@
 from __future__ import annotations
 
-from typing import TYPE_CHECKING
+from typing import TYPE_CHECKING, Literal
 
 if TYPE_CHECKING:
     from numpy.typing import NDArray
@@ -15,6 +15,7 @@ def compute_shannon(
     data: NDArray[np.float64],
     data_range: tuple[float, float],
     n_bins: int,
+    units: Literal["bit", "nat", "frac"] = "frac",
 ) -> float:
     """Compute the Shannon entropy of a univariate data distribution.
 
@@ -31,6 +32,10 @@ def compute_shannon(
         n_bins:
             The number of bins with which the data histogram must be computed.
 
+        units:
+            The units of measure of the output entropy. If "frac", entropy is
+            normalized between 0 and 1 by dividing by log(n_bins).
+
     Returns:
         The value of the normalized Shannon entropy of the dataset.
 
@@ -67,11 +72,19 @@ def compute_shannon(
     )
     probs = counts / np.sum(counts)  # Data probabilities are needed
     entropy = -np.sum([p * np.log2(p) for p in probs if p > 0.0])
-    entropy /= np.log2(n_bins)
-    return entropy
+
+    if units == "bit":
+        return entropy
+    if units == "nat":
+        return entropy * np.log(2)
+    return entropy / np.log2(n_bins)
 
 
-def compute_kl_entropy(data: NDArray[np.float64], n_neigh: int = 1) -> float:
+def compute_kl_entropy(
+    data: NDArray[np.float64],
+    n_neigh: int = 1,
+    units: Literal["bit", "nat"] = "bit",
+) -> float:
     """Estimate Shannon differential entropy using Kozachenko-Leonenko.
 
     The Kozachenko-Leonenko k-nearest neighbors method approximates
@@ -86,6 +99,9 @@ def compute_kl_entropy(data: NDArray[np.float64], n_neigh: int = 1) -> float:
         n_neigh:
             The number of neighbors considered in the KL estimator.
 
+        units:
+            The units of measure of the output entropy.
+
     Returns:
         The Shannon differential entropy of the dataset, in bits.
 
@@ -112,7 +128,9 @@ def compute_kl_entropy(data: NDArray[np.float64], n_neigh: int = 1) -> float:
     eps = data[n_neigh:] - data[:-n_neigh]  # n_neigh-th neighbor distances
     eps = np.clip(eps, 1e-10, None)  # avoid log(0)
     const = digamma(n_data) - digamma(n_neigh) + 1
-    return const + np.mean(np.log2(eps))
+    if units == "bit":
+        return const + np.mean(np.log2(eps))
+    return const + np.mean(np.log2(eps)) * np.log(2)
 
 
 def compute_negentropy(data: NDArray[np.float64]) -> float:

From 29cf3b6095b0cf4b58645da85ad5eade34782303 Mon Sep 17 00:00:00 2001
From: matteobecchi <bechmath@gmail.com>
Date: Wed, 1 Oct 2025 16:42:03 +0200
Subject: [PATCH 02/16] units added to negentropy computation.

---
 src/dynsight/_internal/analysis/entropy.py | 14 ++++++++++----
 1 file changed, 10 insertions(+), 4 deletions(-)

diff --git a/src/dynsight/_internal/analysis/entropy.py b/src/dynsight/_internal/analysis/entropy.py
index 5000b611..8a54ef40 100644
--- a/src/dynsight/_internal/analysis/entropy.py
+++ b/src/dynsight/_internal/analysis/entropy.py
@@ -133,7 +133,10 @@ def compute_kl_entropy(
     return const + np.mean(np.log2(eps)) * np.log(2)
 
 
-def compute_negentropy(data: NDArray[np.float64]) -> float:
+def compute_negentropy(
+    data: NDArray[np.float64],
+    units: Literal["bit", "nat"] = "bit",
+) -> float:
     """Estimate negentropy of a dataset.
 
     Negentropy is a measure of non-Gaussianity representing the distance
@@ -149,8 +152,11 @@ def compute_negentropy(data: NDArray[np.float64]) -> float:
         data:
             The dataset for which the entropy is to be computed.
 
+        units:
+            The units of measure of the output negentropy.
+
     Returns:
-        The negentropy of the dataset, in bits.
+        The negentropy of the dataset.
 
 
     Example:
@@ -176,8 +182,8 @@ def compute_negentropy(data: NDArray[np.float64]) -> float:
     data_norm = (data - np.mean(data)) / np.std(data, ddof=1)
     sigma = np.std(data_norm, ddof=1)
     data_gauss = rng.normal(loc=0.0, scale=sigma, size=data.size)
-    h_gauss = compute_kl_entropy(data_gauss)
-    h_data = compute_kl_entropy(data_norm)
+    h_gauss = compute_kl_entropy(data_gauss, units=units)
+    h_data = compute_kl_entropy(data_norm, units=units)
     return h_gauss - h_data
 
 

From b44e1f20136596b66ac017a25918bd0e35ec1cce Mon Sep 17 00:00:00 2001
From: matteobecchi <bechmath@gmail.com>
Date: Wed, 1 Oct 2025 16:49:04 +0200
Subject: [PATCH 03/16] units added to shannon_multi computation.

---
 src/dynsight/_internal/analysis/entropy.py | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/src/dynsight/_internal/analysis/entropy.py b/src/dynsight/_internal/analysis/entropy.py
index 8a54ef40..586cf6fe 100644
--- a/src/dynsight/_internal/analysis/entropy.py
+++ b/src/dynsight/_internal/analysis/entropy.py
@@ -191,6 +191,7 @@ def compute_shannon_multi(
     data: NDArray[np.float64],
     data_ranges: list[tuple[float, float]],
     n_bins: list[int],
+    units: Literal["bit", "nat", "frac"] = "frac",
 ) -> float:
     """Compute the Shannon entropy of a multivariate data distribution.
 
@@ -210,6 +211,10 @@ def compute_shannon_multi(
             A list of integers specifying the number of bins for each
             dimension.
 
+        units:
+            The units of measure of the output entropy. If "frac", entropy is
+            normalized between 0 and 1 by dividing by log(n_bins).
+
     Returns:
         The value of the normalized Shannon entropy of the dataset.
 
@@ -248,9 +253,12 @@ def compute_shannon_multi(
     counts, _ = np.histogramdd(data, bins=n_bins, range=data_ranges)
     probs = counts / np.sum(counts)  # Probability distribution
     entropy = -np.sum(probs[probs > 0] * np.log2(probs[probs > 0]))
-    entropy /= np.log2(np.prod(n_bins))  # Normalization
 
-    return entropy
+    if units == "bit":
+        return entropy
+    if units == "nat":
+        return entropy * np.log(2)
+    return entropy / np.log2(np.prod(n_bins))  # Normalization
 
 
 def compute_entropy_gain(

From 5075169d9070794eeec79056f1f9a14300814ecb Mon Sep 17 00:00:00 2001
From: matteobecchi <bechmath@gmail.com>
Date: Wed, 1 Oct 2025 17:05:39 +0200
Subject: [PATCH 04/16] Added function compute_kl_entropy_multi().

---
 src/dynsight/_internal/analysis/entropy.py | 65 +++++++++++++++++++++-
 src/dynsight/analysis.py                   |  2 +
 2 files changed, 66 insertions(+), 1 deletion(-)

diff --git a/src/dynsight/_internal/analysis/entropy.py b/src/dynsight/_internal/analysis/entropy.py
index 586cf6fe..9f7110e1 100644
--- a/src/dynsight/_internal/analysis/entropy.py
+++ b/src/dynsight/_internal/analysis/entropy.py
@@ -7,8 +7,9 @@
 
 import numpy as np
 import numpy.typing as npt
+from scipy.spatial import cKDTree
 from scipy.spatial.distance import cdist
-from scipy.special import digamma
+from scipy.special import digamma, gamma
 
 
 def compute_shannon(
@@ -261,6 +262,68 @@ def compute_shannon_multi(
     return entropy / np.log2(np.prod(n_bins))  # Normalization
 
 
+def compute_kl_entropy_multi(
+    data: NDArray[np.float64],
+    n_neigh: int = 1,
+    units: Literal["bit", "nat"] = "bit",
+) -> float:
+    """Estimate Shannon differential entropy using Kozachenko-Leonenko.
+
+    This function works for multivariate distribution.
+    The Kozachenko-Leonenko k-nearest neighbors method approximates
+    differential entropy based on distances to nearest neighbors
+    in the sample space. It's main advantage is being parameter-free.
+
+    Parameters:
+        data:
+            The dataset for which the entropy is to be computed.
+            Shape (n_data, n_dims)
+
+        n_neigh:
+            The number of neighbors considered in the KL estimator.
+
+        units:
+            The units of measure of the output entropy.
+
+    Returns:
+        The Shannon differential entropy of the dataset, in bits.
+
+    Example:
+
+        .. testcode:: klm-entropy-test
+
+            import numpy as np
+            from dynsight.analysis import compute_kl_entropy_multi
+
+            np.random.seed(1234)
+            data = np.random.rand(10000, 2)
+
+            data_entropy = compute_kl_entropy_multi(data)
+
+        .. testcode:: klm-entropy-test
+            :hide:
+
+            assert np.isclose(data_entropy, -4.319358938644518)
+
+    """
+    n_samples, dim = data.shape
+    tree = cKDTree(data)
+    eps, _ = tree.query(data, k=n_neigh + 1, p=2)
+    eps = eps[:, -1]  # distance to the n_neigh-th neighbor
+    eps = np.clip(eps, 1e-10, None)  # avoid log(0)
+    unit_ball_volume = (np.pi ** (dim / 2)) / gamma(dim / 2 + 1)
+    entropy = (
+        digamma(n_samples)
+        - digamma(n_neigh)
+        + np.log2(unit_ball_volume)
+        + (dim / n_samples) * np.sum(np.log2(eps))
+    )
+
+    if units == "bit":
+        return entropy
+    return entropy * np.log(2)
+
+
 def compute_entropy_gain(
     data: npt.NDArray[np.float64],
     labels: npt.NDArray[np.int64],
diff --git a/src/dynsight/analysis.py b/src/dynsight/analysis.py
index 3b59bcfa..4f08dc8a 100644
--- a/src/dynsight/analysis.py
+++ b/src/dynsight/analysis.py
@@ -4,6 +4,7 @@
     compute_entropy_gain,
     compute_entropy_gain_multi,
     compute_kl_entropy,
+    compute_kl_entropy_multi,
     compute_negentropy,
     compute_shannon,
     compute_shannon_multi,
@@ -22,6 +23,7 @@
     "compute_entropy_gain",
     "compute_entropy_gain_multi",
     "compute_kl_entropy",
+    "compute_kl_entropy_multi",
     "compute_negentropy",
     "compute_rdf",
     "compute_shannon",

From d2f3c54effeb4ea450d577df36e307a105bd6cc2 Mon Sep 17 00:00:00 2001
From: matteobecchi <bechmath@gmail.com>
Date: Wed, 1 Oct 2025 18:56:47 +0200
Subject: [PATCH 05/16] Solved bug in math formulas.

---
 src/dynsight/_internal/analysis/entropy.py | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/src/dynsight/_internal/analysis/entropy.py b/src/dynsight/_internal/analysis/entropy.py
index 9f7110e1..4dd7fcff 100644
--- a/src/dynsight/_internal/analysis/entropy.py
+++ b/src/dynsight/_internal/analysis/entropy.py
@@ -121,17 +121,19 @@ def compute_kl_entropy(
         .. testcode:: kl-entropy-test
             :hide:
 
-            assert np.isclose(data_entropy, -3.3437736767342194)
+            assert np.isclose(data_entropy, -3.650626496174274)
 
     """
     data = np.sort(data.flatten())
     n_data = len(data)
     eps = data[n_neigh:] - data[:-n_neigh]  # n_neigh-th neighbor distances
     eps = np.clip(eps, 1e-10, None)  # avoid log(0)
-    const = digamma(n_data) - digamma(n_neigh) + 1
+    const = digamma(n_data) - digamma(n_neigh) + np.log(2)  # 1D volume
+    h_bits = const + np.mean(np.log2(eps))
     if units == "bit":
-        return const + np.mean(np.log2(eps))
-    return const + np.mean(np.log2(eps)) * np.log(2)
+        return h_bits
+    # nat
+    return h_bits * np.log(2)
 
 
 def compute_negentropy(

From efec760e7662ec84f23708c15bbc521b47fe8cdc Mon Sep 17 00:00:00 2001
From: matteobecchi <bechmath@gmail.com>
Date: Wed, 1 Oct 2025 18:57:15 +0200
Subject: [PATCH 06/16] Adding recipe for entropy calculations.

---
 docs/source/entropy.rst | 72 +++++++++++++++++++++++++++++++++++++++++
 docs/source/index.rst   |  3 +-
 2 files changed, 74 insertions(+), 1 deletion(-)
 create mode 100644 docs/source/entropy.rst

diff --git a/docs/source/entropy.rst b/docs/source/entropy.rst
new file mode 100644
index 00000000..e109a596
--- /dev/null
+++ b/docs/source/entropy.rst
@@ -0,0 +1,72 @@
+Entropy calculations
+====================
+
+This recipe explains how to compute Shannon entropy for different types of
+datasets using the functions in the `dynsight.analysis` module.
+
+First of all, we import all the packages and objects we'll need:
+
+.. testcode:: recipe4-test
+
+    import numpy as np
+    import dynsight
+    import matplotlib.pyplot as plt
+
+    np.random.seed(42)  # set the random seed
+
+
+Entropy of a discrete variable
+------------------------------
+
+Let's compute the Shanon entropy of rolling a dice ``n_sample`` times, which
+should be equal to log2(6) bit.
+
+.. testcode:: recipe4-test
+
+    n_sample = 10000
+    rolls = np.random.randint(1, 7, size=n_sample)
+
+    dice_entropy = dynsight.analysis.compute_shannon(
+        data=rolls,
+        data_range=(1,6),
+        n_bins=6,
+        units="bit",
+    )
+    # dice_entropy = 2.584832195231254
+
+
+Entropy of a continuous variable
+---------------------------------
+
+Shannon entropy is not univocally defined for continuous variables, but the
+difference between the entropy of different distribution is. Let's compute the
+difference between the Shannon entropy of two Gaussian distributions, with
+standard deviations respectively equal to 1 and 2, which should be 1 bit.
+
+.. testcode:: recipe4-test
+
+    n_sample = 10000000
+    data_1 = np.random.normal(loc=0.0, scale=1.0, size=n_sample)
+    data_2 = np.random.normal(loc=0.0, scale=2.0, size=n_sample)
+
+    gauss_entropy_1 = dynsight.analysis.compute_kl_entropy(
+        data=data_1,
+        units="bit",
+    )
+    gauss_entropy_2 = dynsight.analysis.compute_kl_entropy(
+        data=data_2,
+        units="bit",
+    )
+    diff = gauss_entropy_2 - gauss_entropy_1
+    # diff = 1.0010395631476854
+
+
+%.. raw:: html
+%
+%    <a class="btn-download" href="../_static/recipes/entropy.py" download>⬇️ Download Python Script</a>
+
+.. testcode:: recipe4-test
+    :hide:
+
+    assert np.isclose(dice_entropy, np.log2(6), rtol=1e-3)
+    assert np.isclose(diff, 1, rtol=1e-3, atol=1e-4)
diff --git a/docs/source/index.rst b/docs/source/index.rst
index a52ae8ac..f3a61eb7 100644
--- a/docs/source/index.rst
+++ b/docs/source/index.rst
@@ -33,6 +33,7 @@
 
   Descriptors from a Trj <descr_from_trj>
   Dimensionality reduction methods <soap_dim_red>
+  Entropy calculations <entropy>
   Information gain analysis <info_gain>
 
 .. toctree::
@@ -81,7 +82,7 @@ How to get started
 ------------------
 
 We suggest you give a read to the ``dynsight.trajectory`` module documentation,
-which offers a compact and easy way of using most of the ``dynsight`` tools. 
+which offers a compact and easy way of using most of the ``dynsight`` tools.
 Also, the documentation offers some copiable Recipes and Examples for the most
 common analyses.
 

From 2f0233a4ee806b27557c7d154f05d281995e1e95 Mon Sep 17 00:00:00 2001
From: matteobecchi <bechmath@gmail.com>
Date: Wed, 1 Oct 2025 19:07:53 +0200
Subject: [PATCH 07/16] Added discrete multivariate case.

---
 docs/source/entropy.rst | 26 +++++++++++++++++++++++++-
 1 file changed, 25 insertions(+), 1 deletion(-)

diff --git a/docs/source/entropy.rst b/docs/source/entropy.rst
index e109a596..641bc513 100644
--- a/docs/source/entropy.rst
+++ b/docs/source/entropy.rst
@@ -32,7 +32,27 @@ should be equal to log2(6) bit.
         n_bins=6,
         units="bit",
     )
-    # dice_entropy = 2.584832195231254
+    # dice_entropy = 2.584832195231254 ~ log2(6)
+
+
+Entropy of a discrete multivariate variable
+-------------------------------------------
+
+Let's compute the Shanon entropy of rolling `two` dices ``n_sample`` times,
+which should be equal to log2(36) bit.
+
+.. testcode:: recipe4-test
+
+    n_sample = 10000
+    rolls = np.random.randint(1, 7, size=(n_sample, 2))
+
+    dices_entropy = dynsight.analysis.compute_shannon_multi(
+        data=rolls,
+        data_ranges=[(1,6), (1,6)],
+        n_bins=[6, 6],
+        units="bit",
+    )
+    # dices_entropy = 5.168428344754391 ~ log2(36)
 
 
 Entropy of a continuous variable
@@ -61,6 +81,9 @@ standard deviations respectively equal to 1 and 2, which should be 1 bit.
     # diff = 1.0010395631476854
 
 
+
+
+
 %.. raw:: html
 %
 %    <a class="btn-download" href="../_static/recipes/entropy.py" download>⬇️ Download Python Script</a>
@@ -69,4 +92,5 @@ standard deviations respectively equal to 1 and 2, which should be 1 bit.
     :hide:
 
     assert np.isclose(dice_entropy, np.log2(6), rtol=1e-3)
+    assert np.isclose(dices_entropy, np.log2(36), rtol=1e-3)
     assert np.isclose(diff, 1, rtol=1e-3, atol=1e-4)

From c532cd1a58d5629c5ad22baa7f5fe60e2f8e9d58 Mon Sep 17 00:00:00 2001
From: matteobecchi <bechmath@gmail.com>
Date: Wed, 1 Oct 2025 19:17:59 +0200
Subject: [PATCH 08/16] Added continuous multivariate case.

---
 docs/source/entropy.rst | 38 ++++++++++++++++++++++++++++++++++++--
 1 file changed, 36 insertions(+), 2 deletions(-)

diff --git a/docs/source/entropy.rst b/docs/source/entropy.rst
index 641bc513..125e9bf5 100644
--- a/docs/source/entropy.rst
+++ b/docs/source/entropy.rst
@@ -19,7 +19,7 @@ Entropy of a discrete variable
 ------------------------------
 
 Let's compute the Shanon entropy of rolling a dice ``n_sample`` times, which
-should be equal to log2(6) bit.
+should be equal to log2(6) bits.
 
 .. testcode:: recipe4-test
 
@@ -39,7 +39,7 @@ Entropy of a discrete multivariate variable
 -------------------------------------------
 
 Let's compute the Shanon entropy of rolling `two` dices ``n_sample`` times,
-which should be equal to log2(36) bit.
+which should be equal to log2(36) bits.
 
 .. testcode:: recipe4-test
 
@@ -81,7 +81,40 @@ standard deviations respectively equal to 1 and 2, which should be 1 bit.
     # diff = 1.0010395631476854
 
 
+Entropy of a continuous multivariate variable
+---------------------------------------------
 
+And the same is true for multivariate distributions. Let's compute the
+difference between the Shannon entropy of two bivariate Gaussian
+distributions, with standard deviations respectively equal to 1 and 2,
+which should be 2 bits.
+
+.. testcode:: recipe4-test
+
+    n_sample = 100000
+    mean = [1, 1]
+    cov = np.array([[1, 0], [0, 1]])
+    data_1 = np.random.multivariate_normal(
+        mean=mean,
+        cov=cov,
+        size=n_sample,
+    )
+    data_2 = np.random.multivariate_normal(
+        mean=mean,
+        cov=cov * 4.0,
+        size=n_sample,
+    )
+
+    gauss_entropy_1 = dynsight.analysis.compute_kl_entropy_multi(
+        data=data_1,
+        units="bit",
+    )
+    gauss_entropy_2 = dynsight.analysis.compute_kl_entropy_multi(
+        data=data_2,
+        units="bit",
+    )
+    diff_2d = gauss_entropy_2 - gauss_entropy_1
+    # diff_2d = 1.9983384346024948
 
 
 %.. raw:: html
@@ -94,3 +127,4 @@ standard deviations respectively equal to 1 and 2, which should be 1 bit.
     assert np.isclose(dice_entropy, np.log2(6), rtol=1e-3)
     assert np.isclose(dices_entropy, np.log2(36), rtol=1e-3)
     assert np.isclose(diff, 1, rtol=1e-3, atol=1e-4)
+    assert np.isclose(diff_2d, 2, rtol=1e-3, atol=1e-4)

From 1b2e2c16f11e06d038c12c0fe84a7b8e65e7f641 Mon Sep 17 00:00:00 2001
From: matteobecchi <bechmath@gmail.com>
Date: Wed, 1 Oct 2025 19:34:34 +0200
Subject: [PATCH 09/16] Added downloadable version.

---
 docs/source/_static/recipes/entropy.py | 69 ++++++++++++++++++++++++++
 docs/source/entropy.rst                | 25 +++++-----
 docs/source/index.rst                  |  2 +-
 3 files changed, 82 insertions(+), 14 deletions(-)
 create mode 100644 docs/source/_static/recipes/entropy.py

diff --git a/docs/source/_static/recipes/entropy.py b/docs/source/_static/recipes/entropy.py
new file mode 100644
index 00000000..bc12e793
--- /dev/null
+++ b/docs/source/_static/recipes/entropy.py
@@ -0,0 +1,69 @@
+"""Copiable code from Recipe #4."""
+
+import numpy as np
+
+import dynsight
+
+rng = np.random.default_rng(42)  # set the random seed
+
+# Entropy of a discrete variable
+n_sample = 10000
+rolls = rng.integers(1, 7, size=n_sample)
+dice_entropy = dynsight.analysis.compute_shannon(
+    data=rolls.astype(float),
+    data_range=(1, 6),
+    n_bins=6,
+    units="bit",
+)
+
+# Entropy of a discrete multivariate variable
+n_sample = 10000
+rolls = rng.integers(1, 7, size=(n_sample, 2))
+dices_entropy = dynsight.analysis.compute_shannon_multi(
+    data=rolls.astype(float),
+    data_ranges=[(1, 6), (1, 6)],
+    n_bins=[6, 6],
+    units="bit",
+)
+
+
+# Entropy of a continuous variable
+n_sample = 10000000
+data_1 = rng.normal(loc=0.0, scale=1.0, size=n_sample)
+data_2 = rng.normal(loc=0.0, scale=2.0, size=n_sample)
+
+gauss_entropy_1 = dynsight.analysis.compute_kl_entropy(
+    data=data_1,
+    units="bit",
+)
+gauss_entropy_2 = dynsight.analysis.compute_kl_entropy(
+    data=data_2,
+    units="bit",
+)
+diff = gauss_entropy_2 - gauss_entropy_1
+
+
+# Entropy of a continuous multivariate variable
+n_sample = 100000
+mean = [1, 1]
+cov = np.array([[1, 0], [0, 1]])
+data_1 = rng.multivariate_normal(
+    mean=mean,
+    cov=cov,
+    size=n_sample,
+)
+data_2 = rng.multivariate_normal(
+    mean=mean,
+    cov=cov * 4.0,
+    size=n_sample,
+)
+
+gauss_entropy_1 = dynsight.analysis.compute_kl_entropy_multi(
+    data=data_1,
+    units="bit",
+)
+gauss_entropy_2 = dynsight.analysis.compute_kl_entropy_multi(
+    data=data_2,
+    units="bit",
+)
+diff_2d = gauss_entropy_2 - gauss_entropy_1
diff --git a/docs/source/entropy.rst b/docs/source/entropy.rst
index 125e9bf5..c1eb58dd 100644
--- a/docs/source/entropy.rst
+++ b/docs/source/entropy.rst
@@ -10,9 +10,8 @@ First of all, we import all the packages and objects we'll need:
 
     import numpy as np
     import dynsight
-    import matplotlib.pyplot as plt
 
-    np.random.seed(42)  # set the random seed
+    rng = np.random.default_rng(42)  # set the random seed
 
 
 Entropy of a discrete variable
@@ -24,10 +23,10 @@ should be equal to log2(6) bits.
 .. testcode:: recipe4-test
 
     n_sample = 10000
-    rolls = np.random.randint(1, 7, size=n_sample)
+    rolls = rng.integers(1, 7, size=n_sample)
 
     dice_entropy = dynsight.analysis.compute_shannon(
-        data=rolls,
+        data=rolls.astype(float),
         data_range=(1,6),
         n_bins=6,
         units="bit",
@@ -44,10 +43,10 @@ which should be equal to log2(36) bits.
 .. testcode:: recipe4-test
 
     n_sample = 10000
-    rolls = np.random.randint(1, 7, size=(n_sample, 2))
+    rolls = rng.integers(1, 7, size=(n_sample, 2))
 
     dices_entropy = dynsight.analysis.compute_shannon_multi(
-        data=rolls,
+        data=rolls.astype(float),
         data_ranges=[(1,6), (1,6)],
         n_bins=[6, 6],
         units="bit",
@@ -66,8 +65,8 @@ standard deviations respectively equal to 1 and 2, which should be 1 bit.
 .. testcode:: recipe4-test
 
     n_sample = 10000000
-    data_1 = np.random.normal(loc=0.0, scale=1.0, size=n_sample)
-    data_2 = np.random.normal(loc=0.0, scale=2.0, size=n_sample)
+    data_1 = rng.normal(loc=0.0, scale=1.0, size=n_sample)
+    data_2 = rng.normal(loc=0.0, scale=2.0, size=n_sample)
 
     gauss_entropy_1 = dynsight.analysis.compute_kl_entropy(
         data=data_1,
@@ -94,12 +93,12 @@ which should be 2 bits.
     n_sample = 100000
     mean = [1, 1]
     cov = np.array([[1, 0], [0, 1]])
-    data_1 = np.random.multivariate_normal(
+    data_1 = rng.multivariate_normal(
         mean=mean,
         cov=cov,
         size=n_sample,
     )
-    data_2 = np.random.multivariate_normal(
+    data_2 = rng.multivariate_normal(
         mean=mean,
         cov=cov * 4.0,
         size=n_sample,
@@ -117,9 +116,9 @@ which should be 2 bits.
     # diff_2d = 1.9983384346024948
 
 
-%.. raw:: html
-%
-%    <a class="btn-download" href="../_static/recipes/entropy.py" download>⬇️ Download Python Script</a>
+.. raw:: html
+
+    <a class="btn-download" href="_static/recipes/entropy.py" download>⬇️ Download Python Script</a>
 
 .. testcode:: recipe4-test
     :hide:
diff --git a/docs/source/index.rst b/docs/source/index.rst
index f3a61eb7..3ae663ba 100644
--- a/docs/source/index.rst
+++ b/docs/source/index.rst
@@ -33,8 +33,8 @@
 
   Descriptors from a Trj <descr_from_trj>
   Dimensionality reduction methods <soap_dim_red>
-  Entropy calculations <entropy>
   Information gain analysis <info_gain>
+  Entropy calculations <entropy>
 
 .. toctree::
   :hidden:

From 26c37177eb163ffb95f5206f48cc19b0ae3a9e6c Mon Sep 17 00:00:00 2001
From: matteobecchi <bechmath@gmail.com>
Date: Wed, 1 Oct 2025 19:44:55 +0200
Subject: [PATCH 10/16] Fixing downloadable recipes.

---
 docs/source/descr_from_trj.rst | 14 +++++++-------
 docs/source/entropy.rst        |  4 ++--
 docs/source/info_gain.rst      | 10 +++++-----
 docs/source/soap_dim_red.rst   | 12 ++++++------
 4 files changed, 20 insertions(+), 20 deletions(-)

diff --git a/docs/source/descr_from_trj.rst b/docs/source/descr_from_trj.rst
index b7d21716..213056dc 100644
--- a/docs/source/descr_from_trj.rst
+++ b/docs/source/descr_from_trj.rst
@@ -1,12 +1,12 @@
-Descriptors from a :class:`.trajectory.Trj` 
+Descriptors from a :class:`.trajectory.Trj`
 ===========================================
 
-This recipe explains how to compute descriptors directly from a 
-:class:`.trajectory.Trj` object. 
+This recipe explains how to compute descriptors directly from a
+:class:`.trajectory.Trj` object.
 
 .. warning::
 
-    This code works when run from the ``/docs`` directory of the ``dynsight`` 
+    This code works when run from the ``/docs`` directory of the ``dynsight``
     repo. To use it elsewhere, you have to change the ``Path`` variables
     accordingly.
 
@@ -26,7 +26,7 @@ it's directly calculated by the :class:`.trajectory.Trj.get_soap()` method.
 .. warning::
 
     Please consider that the SOAP dataset can be very large, due to the high
-    dimensionality, thus calculations can be expensive, and saving to/loading 
+    dimensionality, thus calculations can be expensive, and saving to/loading
     from file quite slow.
 
 .. testcode:: recipe1-test
@@ -83,7 +83,7 @@ calculation can be sped up significantly.
     )
 
 Notice that, differently from SOAP - which is computed for every frame, LENS
-is computed for every pair of frames. Thus, the LENS dataset has shape 
+is computed for every pair of frames. Thus, the LENS dataset has shape
 ``(n_particles, n_frames - 1)``. Consequently, if you need to match the LENS
 values with the particles along the trajectory, you will need to use a sliced
 trajectory (removing the last frame). The easiest way to do this is:
@@ -95,7 +95,7 @@ trajectory (removing the last frame). The easiest way to do this is:
 
 .. raw:: html
 
-    <a class="btn-download" href="../_static/recipes/descr_from_trj.py" download>⬇️ Download Python Script</a>
+    <a class="btn-download" href="_static/recipes/descr_from_trj.py" download>⬇️ Download Python Script</a>
 
 .. testcode:: recipe1-test
     :hide:
diff --git a/docs/source/entropy.rst b/docs/source/entropy.rst
index c1eb58dd..115afadb 100644
--- a/docs/source/entropy.rst
+++ b/docs/source/entropy.rst
@@ -113,7 +113,7 @@ which should be 2 bits.
         units="bit",
     )
     diff_2d = gauss_entropy_2 - gauss_entropy_1
-    # diff_2d = 1.9983384346024948
+    # diff_2d = 2.0142525628908743
 
 
 .. raw:: html
@@ -126,4 +126,4 @@ which should be 2 bits.
     assert np.isclose(dice_entropy, np.log2(6), rtol=1e-3)
     assert np.isclose(dices_entropy, np.log2(36), rtol=1e-3)
     assert np.isclose(diff, 1, rtol=1e-3, atol=1e-4)
-    assert np.isclose(diff_2d, 2, rtol=1e-3, atol=1e-4)
+    assert np.isclose(diff_2d, 2, rtol=1e-2, atol=1e-2)
diff --git a/docs/source/info_gain.rst b/docs/source/info_gain.rst
index 44b1d2f2..5af73631 100644
--- a/docs/source/info_gain.rst
+++ b/docs/source/info_gain.rst
@@ -3,16 +3,16 @@ Information gain analysis
 
 For the theoretical aspects of this work, see https://doi.org/10.48550/arXiv.2504.12990.
 
-This recipe explains how to compute the information gain through clustering 
+This recipe explains how to compute the information gain through clustering
 analysis. We use a synthetic dataset containing a signal that oscillates
 between 0 and 1, with Gaussian noise. Onion clustering is run on a broad
 range of time resolutions ∆t. The information gain and the Shannon entropy of
-the environments is computed for each value of ∆t. The analysis is implemented 
+the environments is computed for each value of ∆t. The analysis is implemented
 using onion 2.0.0 ("onion smooth").
 
 .. warning::
 
-    This code works when run from the ``/docs`` directory of the ``dynsight`` 
+    This code works when run from the ``/docs`` directory of the ``dynsight``
     repo. To use it elsewhere, you have to change the ``Path`` variables
     accordingly.
 
@@ -54,7 +54,7 @@ Let's start by creating a the synthetic dataset:
 
 The following function takes as input the dataset, and a list of values
 of time resolutions ∆t, and for each of these it performs Onion clustering, and
-computes the information gain achieved through clustering with that ∆t. 
+computes the information gain achieved through clustering with that ∆t.
 
 .. warning::
 
@@ -222,7 +222,7 @@ gain goes to 0.
 
 .. raw:: html
 
-    <a class="btn-download" href="../_static/recipes/info_gain.py" download>⬇️ Download Python Script</a>
+    <a class="btn-download" href="_static/recipes/info_gain.py" download>⬇️ Download Python Script</a>
 
 .. testcode:: recipe3-test
     :hide:
diff --git a/docs/source/soap_dim_red.rst b/docs/source/soap_dim_red.rst
index 59740a65..06521f45 100644
--- a/docs/source/soap_dim_red.rst
+++ b/docs/source/soap_dim_red.rst
@@ -1,4 +1,4 @@
-Dimensionality reduction methods 
+Dimensionality reduction methods
 ================================
 
 This recipe explains how to compute descriptors via dimensionality reduction
@@ -15,12 +15,12 @@ from dynsight.utilities.
 .. warning::
 
     Please consider that the SOAP dataset can be very large, due to the high
-    dimensionality, thus calculations can be expensive, and saving to/loading 
+    dimensionality, thus calculations can be expensive, and saving to/loading
     from file quite slow.
 
 .. warning::
 
-    This code works when run from the ``/docs`` directory of the ``dynsight`` 
+    This code works when run from the ``/docs`` directory of the ``dynsight``
     repo. To use it elsewhere, you have to change the ``Path`` variables
     accordingly.
 
@@ -189,7 +189,7 @@ parameters, and performs the TICA of the corresponding SOAP dataset.
     )
 
 The output :class:`.trajectory.Insight` stores the SOAP information in its
-"meta" attribute, together with the ``lag_time`` parameter and ``rel_times``, 
+"meta" attribute, together with the ``lag_time`` parameter and ``rel_times``,
 the relaxation times of the computed TICs.
 
 
@@ -257,7 +257,7 @@ The output :class:`.trajectory.Insight` stores the SOAP information in its
 "meta" attribute, together with the ``delay`` parameter.
 
 Notice that, differently from SOAP - which is computed for every frame, tSOAP
-is computed for every pair of frames. Thus, the tSOAP dataset has shape 
+is computed for every pair of frames. Thus, the tSOAP dataset has shape
 ``(n_particles, n_frames - 1)``. Consequently, if you need to match the tSOAP
 values with the particles along the trajectory, you will need to use a sliced
 trajectory (removing the last frame). The easiest way to do this is:
@@ -269,7 +269,7 @@ trajectory (removing the last frame). The easiest way to do this is:
 
 .. raw:: html
 
-    <a class="btn-download" href="../_static/recipes/soap_dim_red.py" download>⬇️ Download Python Script</a>
+    <a class="btn-download" href="_static/recipes/soap_dim_red.py" download>⬇️ Download Python Script</a>
 
 .. testcode:: recipe2-test
     :hide:

From 8115f74e4e775a2204026e0b0c7a3afead738a58 Mon Sep 17 00:00:00 2001
From: matteobecchi <bechmath@gmail.com>
Date: Wed, 1 Oct 2025 19:50:18 +0200
Subject: [PATCH 11/16] Wrong units arg raises ValueError.

---
 src/dynsight/_internal/analysis/entropy.py | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/src/dynsight/_internal/analysis/entropy.py b/src/dynsight/_internal/analysis/entropy.py
index 4dd7fcff..4cc141cb 100644
--- a/src/dynsight/_internal/analysis/entropy.py
+++ b/src/dynsight/_internal/analysis/entropy.py
@@ -66,6 +66,9 @@ def compute_shannon(
     if data.size == 0:
         msg = "data is empty"
         raise ValueError(msg)
+    if units not in ["bit", "nat", "frac"]:
+        msg = "units must be bit, nat or frac."
+        raise ValueError(msg)
     counts, _ = np.histogram(
         data,
         bins=n_bins,
@@ -124,6 +127,9 @@ def compute_kl_entropy(
             assert np.isclose(data_entropy, -3.650626496174274)
 
     """
+    if units not in ["bit", "nat"]:
+        msg = "units must be bit or nat."
+        raise ValueError(msg)
     data = np.sort(data.flatten())
     n_data = len(data)
     eps = data[n_neigh:] - data[:-n_neigh]  # n_neigh-th neighbor distances
@@ -180,6 +186,9 @@ def compute_negentropy(
             assert np.isclose(negentropy, 0.2609932580146541)
 
     """
+    if units not in ["bit", "nat"]:
+        msg = "units must be bit or nat."
+        raise ValueError(msg)
     data = data.flatten()
     rng = np.random.default_rng(seed=1234)
     data_norm = (data - np.mean(data)) / np.std(data, ddof=1)
@@ -252,6 +261,9 @@ def compute_shannon_multi(
     if n_dims != len(data_ranges) or n_dims != len(n_bins):
         msg = "Mismatch between data dimensions, data_ranges, and n_bins"
         raise ValueError(msg)
+    if units not in ["bit", "nat", "frac"]:
+        msg = "units must be bit, nat or frac."
+        raise ValueError(msg)
 
     counts, _ = np.histogramdd(data, bins=n_bins, range=data_ranges)
     probs = counts / np.sum(counts)  # Probability distribution
@@ -308,6 +320,9 @@ def compute_kl_entropy_multi(
             assert np.isclose(data_entropy, -4.319358938644518)
 
     """
+    if units not in ["bit", "nat"]:
+        msg = "units must be bit or nat."
+        raise ValueError(msg)
     n_samples, dim = data.shape
     tree = cKDTree(data)
     eps, _ = tree.query(data, k=n_neigh + 1, p=2)

From 6578a08e33e2e5f10f8297d3b31131ef8c75d14d Mon Sep 17 00:00:00 2001
From: Matteo Becchi <bechmath@gmail.com>
Date: Thu, 2 Oct 2025 09:52:46 +0200
Subject: [PATCH 12/16] Fixing mypy complaint.

---
 docs/source/_static/recipes/info_gain.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/source/_static/recipes/info_gain.py b/docs/source/_static/recipes/info_gain.py
index 873839a0..97eccbe1 100644
--- a/docs/source/_static/recipes/info_gain.py
+++ b/docs/source/_static/recipes/info_gain.py
@@ -26,7 +26,7 @@ def info_gain_with_onion(
     float,
 ]:
     """Performs full information gain analysis with Onion clustering."""
-    data_range = (np.min(data), np.max(data))
+    data_range = (float(np.min(data)), float(np.max(data)))
 
     n_clusters = np.zeros(len(delta_t_list), dtype=int)
     clusters_frac = []

From ea6dd61fc29867fc12afbc1b1294e2a4baad1a60 Mon Sep 17 00:00:00 2001
From: Matteo Becchi <bechmath@gmail.com>
Date: Thu, 2 Oct 2025 10:11:38 +0200
Subject: [PATCH 13/16] Using tuple in value check; explaining units of
 measure.

---
 src/dynsight/_internal/analysis/entropy.py | 25 +++++++++++++---------
 1 file changed, 15 insertions(+), 10 deletions(-)

diff --git a/src/dynsight/_internal/analysis/entropy.py b/src/dynsight/_internal/analysis/entropy.py
index 4cc141cb..69c507a7 100644
--- a/src/dynsight/_internal/analysis/entropy.py
+++ b/src/dynsight/_internal/analysis/entropy.py
@@ -35,7 +35,8 @@ def compute_shannon(
 
         units:
             The units of measure of the output entropy. If "frac", entropy is
-            normalized between 0 and 1 by dividing by log(n_bins).
+            normalized between 0 and 1 by dividing by log(n_bins). If "bit",
+            it is computed with log base 2, if "nat" with natural log.
 
     Returns:
         The value of the normalized Shannon entropy of the dataset.
@@ -66,7 +67,7 @@ def compute_shannon(
     if data.size == 0:
         msg = "data is empty"
         raise ValueError(msg)
-    if units not in ["bit", "nat", "frac"]:
+    if units not in ("bit", "nat", "frac"):
         msg = "units must be bit, nat or frac."
         raise ValueError(msg)
     counts, _ = np.histogram(
@@ -104,7 +105,8 @@ def compute_kl_entropy(
             The number of neighbors considered in the KL estimator.
 
         units:
-            The units of measure of the output entropy.
+            The units of measure of the output entropy. If "bit", it is
+            computed with log base 2, if "nat" with natural log.
 
     Returns:
         The Shannon differential entropy of the dataset, in bits.
@@ -127,7 +129,7 @@ def compute_kl_entropy(
             assert np.isclose(data_entropy, -3.650626496174274)
 
     """
-    if units not in ["bit", "nat"]:
+    if units not in ("bit", "nat"):
         msg = "units must be bit or nat."
         raise ValueError(msg)
     data = np.sort(data.flatten())
@@ -162,7 +164,8 @@ def compute_negentropy(
             The dataset for which the entropy is to be computed.
 
         units:
-            The units of measure of the output negentropy.
+            The units of measure of the output negentropy. If "bit", it is
+            computed with log base 2, if "nat" with natural log.
 
     Returns:
         The negentropy of the dataset.
@@ -186,7 +189,7 @@ def compute_negentropy(
             assert np.isclose(negentropy, 0.2609932580146541)
 
     """
-    if units not in ["bit", "nat"]:
+    if units not in ("bit", "nat"):
         msg = "units must be bit or nat."
         raise ValueError(msg)
     data = data.flatten()
@@ -225,7 +228,8 @@ def compute_shannon_multi(
 
         units:
             The units of measure of the output entropy. If "frac", entropy is
-            normalized between 0 and 1 by dividing by log(n_bins).
+            normalized between 0 and 1 by dividing by log(n_bins). If "bit",
+            it is computed with log base 2, if "nat" with natural log.
 
     Returns:
         The value of the normalized Shannon entropy of the dataset.
@@ -261,7 +265,7 @@ def compute_shannon_multi(
     if n_dims != len(data_ranges) or n_dims != len(n_bins):
         msg = "Mismatch between data dimensions, data_ranges, and n_bins"
         raise ValueError(msg)
-    if units not in ["bit", "nat", "frac"]:
+    if units not in ("bit", "nat", "frac"):
         msg = "units must be bit, nat or frac."
         raise ValueError(msg)
 
@@ -297,7 +301,8 @@ def compute_kl_entropy_multi(
             The number of neighbors considered in the KL estimator.
 
         units:
-            The units of measure of the output entropy.
+            The units of measure of the output entropy. If "bit", it is
+            computed with log base 2, if "nat" with natural log.
 
     Returns:
         The Shannon differential entropy of the dataset, in bits.
@@ -320,7 +325,7 @@ def compute_kl_entropy_multi(
             assert np.isclose(data_entropy, -4.319358938644518)
 
     """
-    if units not in ["bit", "nat"]:
+    if units not in ("bit", "nat"):
         msg = "units must be bit or nat."
         raise ValueError(msg)
     n_samples, dim = data.shape

From f0a4d763f01b519e989a96e0b1cdb00a557bbdd8 Mon Sep 17 00:00:00 2001
From: Matteo Becchi <bechmath@gmail.com>
Date: Thu, 2 Oct 2025 10:35:13 +0200
Subject: [PATCH 14/16] Info gain with KL estimator.

---
 src/dynsight/_internal/analysis/entropy.py | 49 +++++++++++++++-------
 1 file changed, 34 insertions(+), 15 deletions(-)

diff --git a/src/dynsight/_internal/analysis/entropy.py b/src/dynsight/_internal/analysis/entropy.py
index 69c507a7..d4a1875f 100644
--- a/src/dynsight/_internal/analysis/entropy.py
+++ b/src/dynsight/_internal/analysis/entropy.py
@@ -349,6 +349,7 @@ def compute_kl_entropy_multi(
 def compute_entropy_gain(
     data: npt.NDArray[np.float64],
     labels: npt.NDArray[np.int64],
+    method: Literal["histo", "kl"] = "histo",
     n_bins: int = 20,
 ) -> tuple[float, float, float, float]:
     """Compute the relative information gained by the clustering.
@@ -364,6 +365,11 @@ def compute_entropy_gain(
             The number of bins with which the data histogram must be computed.
             Default is 20.
 
+        method:
+            How the Shannon entropy is computed. You shoud use "histo" for
+            discrete variables, and "kl" for continuous variables. If "kl" is
+            chosen, the "n_bins" arg is irrelevant.
+
     Returns:
         * The absolute information gain :math:`H_0 - H_{clust}`
         * The relative information gain :math:`(H_0 - H_{clust}) / H_0`
@@ -399,28 +405,41 @@ def compute_entropy_gain(
             "must have same shape[0]"
         )
         raise RuntimeError(msg)
+    if method not in ("histo", "kl"):
+        msg = "method must be histo or kl."
+        raise ValueError(msg)
 
-    data_range = (float(np.min(data)), float(np.max(data)))
-
-    # Compute the entropy of the raw data
-    total_entropy = compute_shannon(
-        data,
-        data_range,
-        n_bins,
-    )
-
-    # Compute the fraction and the entropy of the single clusters
     n_clusters = np.unique(labels).size
     frac, entr = np.zeros(n_clusters), np.zeros(n_clusters)
-    for i, label in enumerate(np.unique(labels)):
-        mask = labels == label
-        frac[i] = np.sum(mask) / labels.size
-        entr[i] = compute_shannon(
-            data[mask],
+
+    if method == "histo":
+        data_range = (float(np.min(data)), float(np.max(data)))
+        # Compute the total entropy of the data
+        total_entropy = compute_shannon(
+            data,
             data_range,
             n_bins,
         )
 
+        # Compute the fraction and the entropy of the single clusters
+        for i, label in enumerate(np.unique(labels)):
+            mask = labels == label
+            frac[i] = np.sum(mask) / labels.size
+            entr[i] = compute_shannon(
+                data[mask],
+                data_range,
+                n_bins,
+            )
+    else:  # method == "kl"
+        # Compute the total entropy of the data
+        total_entropy = compute_kl_entropy(data)
+
+        # Compute the fraction and the entropy of the single clusters
+        for i, label in enumerate(np.unique(labels)):
+            mask = labels == label
+            frac[i] = np.sum(mask) / labels.size
+            entr[i] = compute_kl_entropy(data[mask])
+
     # Compute the entropy of the clustered data
     clustered_entropy = np.dot(frac, entr)
     info_gain = total_entropy - clustered_entropy

From 74d3ab1aad9a0a78831701c1be6e1ab19e60cb01 Mon Sep 17 00:00:00 2001
From: Matteo Becchi <bechmath@gmail.com>
Date: Thu, 2 Oct 2025 11:34:40 +0200
Subject: [PATCH 15/16] Debugging. Differences were OK but absolute values were
 wrong.

---
 src/dynsight/_internal/analysis/entropy.py | 92 +++++++++++++++-------
 tests/analysis/test_shannon.py             | 21 +++++
 2 files changed, 83 insertions(+), 30 deletions(-)

diff --git a/src/dynsight/_internal/analysis/entropy.py b/src/dynsight/_internal/analysis/entropy.py
index d4a1875f..3e3d15ba 100644
--- a/src/dynsight/_internal/analysis/entropy.py
+++ b/src/dynsight/_internal/analysis/entropy.py
@@ -126,7 +126,7 @@ def compute_kl_entropy(
         .. testcode:: kl-entropy-test
             :hide:
 
-            assert np.isclose(data_entropy, -3.650626496174274)
+            assert np.isclose(data_entropy, 0.9891067080934253)
 
     """
     if units not in ("bit", "nat"):
@@ -137,11 +137,11 @@ def compute_kl_entropy(
     eps = data[n_neigh:] - data[:-n_neigh]  # n_neigh-th neighbor distances
     eps = np.clip(eps, 1e-10, None)  # avoid log(0)
     const = digamma(n_data) - digamma(n_neigh) + np.log(2)  # 1D volume
-    h_bits = const + np.mean(np.log2(eps))
-    if units == "bit":
-        return h_bits
-    # nat
-    return h_bits * np.log(2)
+    if units == "nat":
+        const = digamma(n_data) - digamma(n_neigh) + np.log(2)
+        return const + np.mean(np.log(eps))
+    const = (digamma(n_data) - digamma(n_neigh)) / np.log(2) + 1.0
+    return const + np.mean(np.log2(eps))
 
 
 def compute_negentropy(
@@ -322,7 +322,7 @@ def compute_kl_entropy_multi(
         .. testcode:: klm-entropy-test
             :hide:
 
-            assert np.isclose(data_entropy, -4.319358938644518)
+            assert np.isclose(data_entropy, 0.013521446183128614)
 
     """
     if units not in ("bit", "nat"):
@@ -334,16 +334,18 @@ def compute_kl_entropy_multi(
     eps = eps[:, -1]  # distance to the n_neigh-th neighbor
     eps = np.clip(eps, 1e-10, None)  # avoid log(0)
     unit_ball_volume = (np.pi ** (dim / 2)) / gamma(dim / 2 + 1)
-    entropy = (
+    # --- Compute in nats ---
+    entropy_nats = (
         digamma(n_samples)
         - digamma(n_neigh)
-        + np.log2(unit_ball_volume)
-        + (dim / n_samples) * np.sum(np.log2(eps))
+        + np.log(unit_ball_volume)
+        + (dim / n_samples) * np.sum(np.log(eps))
     )
 
-    if units == "bit":
-        return entropy
-    return entropy * np.log(2)
+    if units == "nat":
+        return entropy_nats
+    # bits
+    return entropy_nats / np.log(2)
 
 
 def compute_entropy_gain(
@@ -366,9 +368,11 @@ def compute_entropy_gain(
             Default is 20.
 
         method:
-            How the Shannon entropy is computed. You shoud use "histo" for
+            How the Shannon entropy is computed. You should use "histo" for
             discrete variables, and "kl" for continuous variables. If "kl" is
-            chosen, the "n_bins" arg is irrelevant.
+            chosen, the "n_bins" arg is irrelevant. See the documentation of
+            ``compute_shannon()`` and ``compute_kl_entropy()`` for more
+            details.
 
     Returns:
         * The absolute information gain :math:`H_0 - H_{clust}`
@@ -376,6 +380,10 @@ def compute_entropy_gain(
         * The Shannon entropy of the initial data :math:`H_0`
         * The shannon entropy of the clustered data :math:`H_{clust}`
 
+    Note:
+        The output are expressed as fractions if method is "histo", in bit if
+        method is "kl".
+
     Example:
 
         .. testcode:: shannon2-test
@@ -456,6 +464,7 @@ def compute_entropy_gain_multi(
     data: npt.NDArray[np.float64],
     labels: npt.NDArray[np.int64],
     n_bins: list[int],
+    method: Literal["histo", "kl"] = "histo",
 ) -> tuple[float, float, float, float]:
     """Compute the relative information gained by the clustering.
 
@@ -472,6 +481,13 @@ def compute_entropy_gain_multi(
             The number of bins with which the data histogram must be computed,
             one for each dimension.
 
+        method:
+            How the Shannon entropy is computed. You should use "histo" for
+            discrete variables, and "kl" for continuous variables. If "kl" is
+            chosen, the "n_bins" arg is irrelevant. See the documentation of
+            ``compute_shannon_multi()`` and ``compute_kl_entropy_multi()`` for
+            more details.
+
     Returns:
         * The absolute information gain :math:`H_0 - H_{clust}`
         * The relative information gain :math:`(H_0 - H_{clust}) / H_0`
@@ -508,28 +524,44 @@ def compute_entropy_gain_multi(
             "must have same shape[0]"
         )
         raise RuntimeError(msg)
+    if method not in ("histo", "kl"):
+        msg = "method must be histo or kl."
+        raise ValueError(msg)
 
-    data_range = [(float(np.min(tmp)), float(np.max(tmp))) for tmp in data.T]
-
-    # Compute the entropy of the raw data
-    total_entropy = compute_shannon_multi(
-        data,
-        data_range,
-        n_bins,
-    )
-
-    # Compute the fraction and the entropy of the single clusters
     n_clusters = np.unique(labels).size
     frac, entr = np.zeros(n_clusters), np.zeros(n_clusters)
-    for i, label in enumerate(np.unique(labels)):
-        mask = labels == label
-        frac[i] = np.sum(mask) / labels.size
-        entr[i] = compute_shannon_multi(
-            data[mask],
+
+    if method == "histo":
+        data_range = [
+            (float(np.min(tmp)), float(np.max(tmp))) for tmp in data.T
+        ]
+
+        # Compute the total entropy of the data
+        total_entropy = compute_shannon_multi(
+            data,
             data_range,
             n_bins,
         )
 
+        # Compute the fraction and the entropy of the single clusters
+        for i, label in enumerate(np.unique(labels)):
+            mask = labels == label
+            frac[i] = np.sum(mask) / labels.size
+            entr[i] = compute_shannon_multi(
+                data[mask],
+                data_range,
+                n_bins,
+            )
+    else:  # method == "kl"
+        # Compute the total entropy of the data
+        total_entropy = compute_kl_entropy_multi(data)
+
+        # Compute the fraction and the entropy of the single clusters
+        for i, label in enumerate(np.unique(labels)):
+            mask = labels == label
+            frac[i] = np.sum(mask) / labels.size
+            entr[i] = compute_kl_entropy_multi(data[mask])
+
     # Compute the entropy of the clustered data
     clustered_entropy = np.dot(frac, entr)
     info_gain = total_entropy - clustered_entropy
diff --git a/tests/analysis/test_shannon.py b/tests/analysis/test_shannon.py
index 2f90d41f..185c10de 100644
--- a/tests/analysis/test_shannon.py
+++ b/tests/analysis/test_shannon.py
@@ -27,6 +27,14 @@ def data_2d(rng: np.random.Generator) -> NDArray[np.float64]:
     return rng.random((100, 2))
 
 
+@pytest.fixture
+def data_gauss(rng: np.random.Generator) -> NDArray[np.float64]:
+    """Random 2-Gaussians array."""
+    data_1 = rng.normal(0.0, 0.1, 10000)
+    data_2 = rng.normal(1.0, 0.1, 10000)
+    return np.concatenate((data_1, data_2))
+
+
 @pytest.fixture
 def labels(rng: np.random.Generator) -> NDArray[np.int64]:
     """Valid integer labels for 100 samples."""
@@ -89,6 +97,19 @@ def test_gain(data: NDArray[np.float64], labels: NDArray[np.int64]) -> None:
     assert np.isclose(gain, ref)
 
 
+def test_kl_gain(data_gauss: NDArray[np.float64]) -> None:
+    """Check entropy gain value using KL estimator."""
+    labels = np.concatenate(
+        (np.zeros(10000, dtype=int), np.ones(10000, dtype=int))
+    )
+    gain, *_ = dynsight.analysis.compute_entropy_gain(
+        data_gauss,
+        labels,
+        method="kl",
+    )
+    assert np.isclose(gain, 1.0, rtol=1e-3, atol=1e-3)
+
+
 def test_gain_multi(
     data_2d: NDArray[np.float64],
     labels: NDArray[np.int64],

From 3ba3d3c76aad65921b07f48a0101267de06fd34d Mon Sep 17 00:00:00 2001
From: Matteo Becchi <bechmath@gmail.com>
Date: Thu, 2 Oct 2025 11:39:19 +0200
Subject: [PATCH 16/16] Fixing docs.

---
 docs/source/analysis.rst | 1 +
 1 file changed, 1 insertion(+)

diff --git a/docs/source/analysis.rst b/docs/source/analysis.rst
index 7f5fd089..7b7a4589 100644
--- a/docs/source/analysis.rst
+++ b/docs/source/analysis.rst
@@ -18,6 +18,7 @@ information-based calculations.
   compute_negentropy <_autosummary/dynsight.analysis.compute_negentropy>
   compute_entropy_gain <_autosummary/dynsight.analysis.compute_entropy_gain>
   compute_shannon_multi <_autosummary/dynsight.analysis.compute_shannon_multi>
+  compute_kl_entropy_multi <_autosummary/dynsight.analysis.compute_kl_entropy_multi>
   compute_entropy_gain_multi <_autosummary/dynsight.analysis.compute_entropy_gain_multi>
   sample_entropy <_autosummary/dynsight.analysis.sample_entropy>