AlFontal · AlFontal · Dec 22, 2025 · Dec 22, 2025 · Dec 22, 2025 · Dec 22, 2025
diff --git a/.github/workflows/build_and_publish.yml b/.github/workflows/build_and_publish.yml
@@ -24,4 +24,4 @@ jobs:
         run: uv build
 
       - name: Publish to PyPI
-        uses: pypa/gh-action-pypi-publish@release/v1
+        uses: pypa/gh-action-pypi-publish@release/v1
diff --git a/.gitignore b/.gitignore
@@ -1,8 +1,8 @@
 .eggs/*
 .tox/*
-*/__pycache__/*
+__pycache__/
 *.egg-info/
 build/*
 dist/*
 */*/AGENTS.md
-**/.DS_Store
+**/.DS_Store
diff --git a/LICENSE b/LICENSE
@@ -19,4 +19,3 @@ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 SOFTWARE.
-
diff --git a/README.md b/README.md
@@ -29,6 +29,10 @@ uv pip install sdcpy
 
 ## Usage
 
+The main class is `SDCAnalysis`, which takes two time series as input and performs the SDC analysis. If only one time series is provided, it will be compared with itself.
+
+In this example we generate two synthetic time series with a transient pattern between indices 63-169 to showcase the detection of transient correlations.
+
 ```python
 import numpy as np
 import pandas as pd
@@ -46,14 +50,87 @@ ts1 = pd.Series([tc_signal(i) for i in range(250)])
 ts2 = pd.Series([tc_signal(i) for i in range(250)])
 
 # Run SDC analysis
-sdc = SDCAnalysis(ts1, ts2, fragment_size=50, n_permutations=99)
+sdc = SDCAnalysis(
+    ts1=ts1,             # First time series (pd.Series or np.ndarray)
+    ts2=ts2,             # Second time series (pd.Series or np.ndarray)
+    fragment_size=50,    # Size of the sliding window fragment
+    n_permutations=99,   # Number of permutations for significance testing
+    method="pearson",    # Correlation method ("pearson", "spearman" or Callable)
+    two_tailed=True,     # Whether to use two-tailed test
+    permutations=True,   # Whether to compute p-values via permutation
+    min_lag=-np.inf,     # Minimum lag to compute
+    max_lag=np.inf,      # Maximum lag to compute
+    max_memory_gb=2.0,   # Max memory usage for vectorized ops before chunking
+)
 
 # Generate 2-way SDC combi plot
-fig = sdc.combi_plot(xlabel="TS1", ylabel="TS2")
-fig.savefig("sdc_plot.png", dpi=150, bbox_inches="tight")
+fig = sdc.combi_plot(
+    xlabel="$TS_1$",        # Label for top axis (TS1)
+    ylabel="$TS_2$",        # Label for left axis (TS2)
+    title=None,             # Plot title (None = auto-generated)
+    max_r=None,             # Max correlation for color scale (None = auto)
+    date_fmt="%Y-%m-%d",    # Date format for axes
+    align="center",         # Alignment of heatmap cells ("center", "left", "right")
+    min_lag=-np.inf,        # Start of lag range to plot
+    max_lag=np.inf,         # End of lag range to plot
+    fontsize=9,             # Base font size
+    figsize=(7, 7),         # Figure size (width, height)
+    show_colorbar=True,     # Whether to show the colorbar
+    show_ts2=True,          # Whether to show TS2 time series panel
+    dpi=250,                # Resolution for saving/displaying
+)
+
+```
+
+<img src="sdc_example.png" width="600"/>
+
+### Access Detailed Results
+
+You can access the detailed SDC results DataFrame via `sdc.sdc_df`. This contains the coordinates of each fragment, lag, correlation, and p-value.
+
+```python
+sdc.sdc_df.head()
 ```
 
-<img src="sdc_example.png" width="500" />
+```text
+   start_1  stop_1  start_2  stop_2  lag       r  p_value 
+      0.0    50.0      0.0    50.0  0.0  0.2267   0.0495
+      0.0    50.0      1.0    51.0 -1.0 -0.0047   1.0000
+      0.0    50.0      2.0    52.0 -2.0  0.0579   0.6238
+      0.0    50.0      3.0    53.0 -3.0  0.0602   0.6337
+      0.0    50.0      4.0    54.0 -4.0 -0.1660   0.2475
+```
+
+### Correlation by Value Range
+
+To check if synchronies occur in specific value ranges of the time series, use `get_ranges_df()`. This computes statistics of Positive/Non-significant/Negative correlations binned by the value of the time series and returns a `pandas.DataFrame` with the results.
+
+```python
+sdc.get_ranges_df(
+    ts=1,               # Which TS to bin by (1 or 2)
+    bin_size=0.5,       # Size of value bins
+    agg_func="mean",    # Aggregation function for fragment values
+    alpha=0.05,         # Significance threshold
+    min_bin=None,       # Manual lower bound for bins (None = auto)
+    max_bin=None,       # Manual upper bound for bins (None = auto)
+    min_lag=0,          # Minimum lag to include in stats
+    max_lag=10,         # Maximum lag to include in stats
+)
+```
+
+```text
+  cat_value direction  counts    n   freq  label
+(-0.6, 0.0]  Positive     504 1232 0.4091 40.9 %
+(-0.6, 0.0]  Negative       0 1232 0.0000  0.0 %
+(-0.6, 0.0]        NS     728 1232 0.5909 59.1 %
+ (0.0, 0.5]  Positive     218  883 0.2469 24.7 %
+ (0.0, 0.5]  Negative      10  883 0.0111  1.1 %
+ (0.0, 0.5]        NS     655  883 0.7418 74.2 %
+ (0.5, 1.0]  Positive       1   19 0.0526  5.3 %
+ (0.5, 1.0]  Negative       0   19 0.0000  0.0 %
+ (0.5, 1.0]        NS      18   19 0.9474 94.7 %
+ (1.0, 1.5]  Positive       0    0 0.0000  0.0 %
+```
 
 See [examples/basic_usage.py](examples/basic_usage.py) for a complete example with synthetic data showing transient correlations.
 

diff --git a/examples/basic_usage.py b/examples/basic_usage.py
@@ -31,5 +31,5 @@ def tc_signal(i):
 
     # Generate combination plot
     fig = sdc.combi_plot(xlabel="$TS_1$", ylabel="$TS_2$")
-    fig.savefig("sdc_example.png", dpi=300, bbox_inches="tight")
+    fig.savefig("sdc_example.png", bbox_inches="tight")
     print("Saved: sdc_example.png")
diff --git a/pyproject.toml b/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "sdcpy"
-version = "0.5.2"
+version = "0.7.0"
 description = "Scale Dependent Correlation in Python"
 readme = "README.md"
 license = "MIT"
@@ -88,7 +88,7 @@ ignore = [
 known-first-party = ["sdcpy"]
 
 [tool.bumpversion]
-current_version = "0.5.0"
+current_version = "0.7.0"
 commit = true
 tag = true
 

diff --git a/sdc_example.png b/sdc_example.png
diff --git a/sdcpy/core.py b/sdcpy/core.py
@@ -14,7 +14,51 @@
     "spearman": lambda x, y: stats.spearmanr(x, y),
 }
 
-CONSTANT_WARNING = {"pearson": stats.ConstantInputWarning, "spearman": stats.ConstantInputWarning}
+
+# Default maximum memory threshold (in GB) for full vectorized computation.
+# Above this, chunked processing is used automatically.
+DEFAULT_MAX_MEMORY_GB = 2.0
+
+
+def _estimate_vectorized_memory(
+    n1: int, n2: int, n_permutations: int, dtype: np.dtype = np.float64
+) -> float:
+    """
+    Estimate peak memory usage (in GB) for the fully vectorized SDC computation.
+
+    The dominant memory consumers are:
+    - Permuted correlation matrices: (n_root^2, n1, n2) where n_root = sqrt(n_permutations)
+    - Fragment matrices: (n1, fragment_size) + (n2, fragment_size)
+    - Correlation matrix: (n1, n2)
+    - Grid/lag matrices: 3 * (n1, n2)
+
+    Parameters
+    ----------
+    n1
+        Number of fragments from first time series
+    n2
+        Number of fragments from second time series
+    n_permutations
+        Number of permutations for the randomization test
+    dtype
+        Data type (default float64 = 8 bytes)
+
+    Returns
+    -------
+    float
+        Estimated peak memory usage in gigabytes
+    """
+    bytes_per_element = np.dtype(dtype).itemsize
+    n_root = int(np.sqrt(n_permutations).round())
+    n_actual_perms = n_root * n_root
+
+    # Main memory consumers
+    perm_matrices = n_actual_perms * n1 * n2 * bytes_per_element
+    corr_matrix = n1 * n2 * bytes_per_element
+    grid_matrices = 3 * n1 * n2 * bytes_per_element  # start_1_grid, start_2_grid, lag_matrix
+
+    total_bytes = perm_matrices + corr_matrix + grid_matrices
+    return total_bytes / (1024**3)  # Convert to GB
 
 
 def generate_correlation_map(x: np.ndarray, y: np.ndarray, method: str = "pearson") -> np.ndarray:
@@ -94,6 +138,7 @@ def compute_sdc(
     permutations: bool = True,
     min_lag: int = -np.inf,
     max_lag: int = np.inf,
+    max_memory_gb: float = DEFAULT_MAX_MEMORY_GB,
 ) -> pd.DataFrame:
     """
     Computes scale dependent correlation (https://doi.org/10.1007/s00382-005-0106-4) matrix among two time series
@@ -124,6 +169,10 @@ def compute_sdc(
         Lower limit of the lags between ts1 and ts2 that will be computed.
     max_lag
         Upper limit of the lags between ts1 and ts2 that will be computed.
+    max_memory_gb
+        Maximum memory (in GB) to use for full vectorized computation. If estimated memory exceeds
+        this limit, the computation falls back to chunked processing which uses constant memory but
+        may be slightly slower. Default is 2.0 GB.
 
     Returns
     -------
@@ -147,6 +196,7 @@ def compute_sdc(
             permutations,
             min_lag,
             max_lag,
+            max_memory_gb,
         )
     else:
         # Fall back to original loop-based implementation for custom callables
@@ -173,8 +223,16 @@ def _compute_sdc_vectorized(
     permutations: bool,
     min_lag: int,
     max_lag: int,
+    max_memory_gb: float = DEFAULT_MAX_MEMORY_GB,
 ) -> pd.DataFrame:
-    """Vectorized implementation for built-in correlation methods."""
+    """Vectorized implementation for built-in correlation methods.
+
+    Parameters
+    ----------
+    max_memory_gb
+        Maximum memory (in GB) to use for full vectorization. If estimated
+        memory exceeds this, chunked processing is used automatically.
+    """
     n1 = len(ts1) - fragment_size
     n2 = len(ts2) - fragment_size
 
@@ -210,44 +268,86 @@ def _compute_sdc_vectorized(
         n_root = int(np.sqrt(n_permutations).round())
         n_actual_perms = n_root * n_root
 
-        # OPTIMIZED: Pre-shuffle all fragments and compute full permuted correlation matrices
-        # This is much faster than shuffling per-pair because we compute n_root^2 full
-        # correlation matrices instead of n_valid individual permutation tests
-
-        # Pre-compute shuffled versions of all fragments
-        # Shape: (n_root, n_fragments, fragment_size)
-        shuffled_frags1 = np.array(
-            [shuffle_along_axis(frags1.copy(), axis=1) for _ in range(n_root)]
-        )
-        shuffled_frags2 = np.array(
-            [shuffle_along_axis(frags2.copy(), axis=1) for _ in range(n_root)]
-        )
-
-        # Compute permuted correlation matrices for all combinations of shuffled fragments
-        # Shape: (n_root, n_root, n1, n2)
-        perm_corr_matrices = np.zeros((n_root, n_root, n1, n2))
-        for i in tqdm(range(n_root), desc="Computing permutations", leave=False):
-            for j in range(n_root):
-                with warnings.catch_warnings():
-                    warnings.simplefilter("ignore")
-                    perm_corr_matrices[i, j] = generate_correlation_map(
-                        shuffled_frags1[i], shuffled_frags2[j], method=method
-                    )
-
-        # Reshape to (n_actual_perms, n1, n2)
-        perm_corrs_flat = perm_corr_matrices.reshape(n_actual_perms, n1, n2)
-
-        # Compute p-values vectorized
-        if two_tailed:
-            # Count how many abs(perm) >= abs(observed) for each position
-            abs_observed = np.abs(corr_matrix)
-            abs_perms = np.abs(perm_corrs_flat)
-            counts = (abs_perms >= abs_observed[np.newaxis, :, :]).sum(axis=0)
+        # Estimate memory and decide strategy
+        estimated_memory = _estimate_vectorized_memory(n1, n2, n_permutations)
+        use_chunked = estimated_memory > max_memory_gb
+
+        if use_chunked:
+            warnings.warn(
+                f"Estimated memory ({estimated_memory:.2f} GB) exceeds limit "
+                f"({max_memory_gb:.2f} GB). Using chunked processing.",
+                UserWarning,
+                stacklevel=3,
+            )
+            # Chunked approach: accumulate counts without storing all permutation matrices
+            counts = np.zeros((n1, n2), dtype=np.int32)
+
+            # Pre-compute shuffled versions of all fragments
+            shuffled_frags1 = np.array(
+                [shuffle_along_axis(frags1.copy(), axis=1) for _ in range(n_root)]
+            )
+            shuffled_frags2 = np.array(
+                [shuffle_along_axis(frags2.copy(), axis=1) for _ in range(n_root)]
+            )
+
+            # Process one permutation pair at a time, accumulating counts
+            abs_observed = np.abs(corr_matrix) if two_tailed else None
+            with tqdm(
+                total=n_actual_perms, desc="Computing permutations (chunked)", leave=False
+            ) as pbar:
+                for i in range(n_root):
+                    for j in range(n_root):
+                        with warnings.catch_warnings():
+                            warnings.simplefilter("ignore")
+                            perm_corr = generate_correlation_map(
+                                shuffled_frags1[i], shuffled_frags2[j], method=method
+                            )
+                        if two_tailed:
+                            counts += (np.abs(perm_corr) >= abs_observed).astype(np.int32)
+                        else:
+                            counts += (perm_corr >= corr_matrix).astype(np.int32)
+                    pbar.update(n_root)
+
+            # P-value: (count + 1) / (n_perms + 1) for proper permutation test
+            p_value_matrix = (counts + 1) / (n_actual_perms + 1)
         else:
-            counts = (perm_corrs_flat >= corr_matrix[np.newaxis, :, :]).sum(axis=0)
-
-        # P-value: (count + 1) / (n_perms + 1) for proper permutation test
-        p_value_matrix = (counts + 1) / (n_actual_perms + 1)
+            # Full vectorized approach: store all permutation matrices
+            # Pre-compute shuffled versions of all fragments
+            # Shape: (n_root, n_fragments, fragment_size)
+            shuffled_frags1 = np.array(
+                [shuffle_along_axis(frags1.copy(), axis=1) for _ in range(n_root)]
+            )
+            shuffled_frags2 = np.array(
+                [shuffle_along_axis(frags2.copy(), axis=1) for _ in range(n_root)]
+            )
+
+            # Compute permuted correlation matrices for all combinations of shuffled fragments
+            # Shape: (n_root, n_root, n1, n2)
+            perm_corr_matrices = np.zeros((n_root, n_root, n1, n2))
+            with tqdm(total=n_actual_perms, desc="Computing permutations", leave=False) as pbar:
+                for i in range(n_root):
+                    for j in range(n_root):
+                        with warnings.catch_warnings():
+                            warnings.simplefilter("ignore")
+                            perm_corr_matrices[i, j] = generate_correlation_map(
+                                shuffled_frags1[i], shuffled_frags2[j], method=method
+                            )
+                    pbar.update(n_root)
+
+            # Reshape to (n_actual_perms, n1, n2)
+            perm_corrs_flat = perm_corr_matrices.reshape(n_actual_perms, n1, n2)
+
+            # Compute p-values vectorized
+            if two_tailed:
+                # Count how many abs(perm) >= abs(observed) for each position
+                abs_observed = np.abs(corr_matrix)
+                abs_perms = np.abs(perm_corrs_flat)
+                counts = (abs_perms >= abs_observed[np.newaxis, :, :]).sum(axis=0)
+            else:
+                counts = (perm_corrs_flat >= corr_matrix[np.newaxis, :, :]).sum(axis=0)
+
+            # P-value: (count + 1) / (n_perms + 1) for proper permutation test
+            p_value_matrix = (counts + 1) / (n_actual_perms + 1)
 
         # Extract p-values for valid entries
         p_values = p_value_matrix[valid_mask]
@@ -291,7 +391,7 @@ def _compute_sdc_loop(
     """Original loop-based implementation for custom callable methods."""
     method_fun = method
     n_iterations = (len(ts1) - fragment_size) * (len(ts2) - fragment_size)
-    int(np.sqrt(n_permutations).round())
+
     sdc_array = np.empty(shape=(n_iterations, 7))
     sdc_array[:] = np.nan
     i = 0
Original file line number	Diff line number	Diff line change
Expand Up		@@ -19,4 +19,3 @@ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
		LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
		OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
		SOFTWARE.