From aa299539e1a3a0c45fd6288ea0437846c989571b Mon Sep 17 00:00:00 2001
From: Saswat Susmoy <tech@saswatsusmoy.in>
Date: Sat, 28 Mar 2026 12:18:27 +0530
Subject: [PATCH 1/9] Add ReservoirComputingBenchmarks library

Initial implementation of the benchmarking library in lib/ as discussed
in #398. Implements three core tasks:

- Memory Capacity (Jaeger 2002)
- NARMA target generation and evaluation (Atiya & Parlos 2000)
- Information Processing Capacity (Dambre et al. 2012)

Model-agnostic API taking raw input/states arrays. Uses Cholesky-based
ridge regression with factorization reuse for performance. Zero external
dependencies (LinearAlgebra + Statistics only).
---
 lib/ReservoirComputingBenchmarks/Project.toml |  18 ++
 .../src/ReservoirComputingBenchmarks.jl       |  16 ++
 lib/ReservoirComputingBenchmarks/src/ipc.jl   | 215 +++++++++++++++
 .../src/memory_capacity.jl                    |  84 ++++++
 lib/ReservoirComputingBenchmarks/src/narma.jl | 190 ++++++++++++++
 lib/ReservoirComputingBenchmarks/src/utils.jl | 116 +++++++++
 .../test/runtests.jl                          | 244 ++++++++++++++++++
 7 files changed, 883 insertions(+)
 create mode 100644 lib/ReservoirComputingBenchmarks/Project.toml
 create mode 100644 lib/ReservoirComputingBenchmarks/src/ReservoirComputingBenchmarks.jl
 create mode 100644 lib/ReservoirComputingBenchmarks/src/ipc.jl
 create mode 100644 lib/ReservoirComputingBenchmarks/src/memory_capacity.jl
 create mode 100644 lib/ReservoirComputingBenchmarks/src/narma.jl
 create mode 100644 lib/ReservoirComputingBenchmarks/src/utils.jl
 create mode 100644 lib/ReservoirComputingBenchmarks/test/runtests.jl

diff --git a/lib/ReservoirComputingBenchmarks/Project.toml b/lib/ReservoirComputingBenchmarks/Project.toml
new file mode 100644
index 000000000..3e91e71c4
--- /dev/null
+++ b/lib/ReservoirComputingBenchmarks/Project.toml
@@ -0,0 +1,18 @@
+name = "ReservoirComputingBenchmarks"
+uuid = "03eff81b-b261-4f82-8cea-c13df0f346ab"
+authors = ["Francesco Martinuzzi", "Saswat Susmoy"]
+version = "0.1.0"
+
+[deps]
+LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
+Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
+
+[extras]
+Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
+Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
+
+[targets]
+test = ["Test", "Random"]
+
+[compat]
+julia = "1.10"
diff --git a/lib/ReservoirComputingBenchmarks/src/ReservoirComputingBenchmarks.jl b/lib/ReservoirComputingBenchmarks/src/ReservoirComputingBenchmarks.jl
new file mode 100644
index 000000000..4025850d6
--- /dev/null
+++ b/lib/ReservoirComputingBenchmarks/src/ReservoirComputingBenchmarks.jl
@@ -0,0 +1,16 @@
+module ReservoirComputingBenchmarks
+
+import LinearAlgebra
+using LinearAlgebra: I, cholesky, Symmetric, mul!
+using Statistics: mean, var, cor
+
+include("utils.jl")
+include("memory_capacity.jl")
+include("narma.jl")
+include("ipc.jl")
+
+export memory_capacity
+export generate_narma, narma
+export ipc
+
+end # module
diff --git a/lib/ReservoirComputingBenchmarks/src/ipc.jl b/lib/ReservoirComputingBenchmarks/src/ipc.jl
new file mode 100644
index 000000000..7308d1044
--- /dev/null
+++ b/lib/ReservoirComputingBenchmarks/src/ipc.jl
@@ -0,0 +1,215 @@
+@doc raw"""
+    ipc(input, states; max_delay=10, max_degree=3, max_total_degree=nothing, cross_terms=true, train_ratio=0.8, reg=1.0)
+
+Compute the Information Processing Capacity of a reservoir following
+Dambre et al. (2012).
+
+The IPC decomposes the total computational capacity of a reservoir into
+contributions from orthonormal Legendre polynomial basis functions:
+
+```math
+z_l(t) = \prod_i \tilde{P}_{d_i}\!\bigl(u(t - \tau_i)\bigr)
+```
+
+where ``\tilde{P}_n(x) = P_n(x)\sqrt{(2n+1)/2}`` is the normalized Legendre
+polynomial of degree ``n``.  For each basis function, a ridge regression readout
+is trained, and the capacity is the squared correlation between the target and
+prediction.
+
+The total capacity satisfies ``C_{\text{total}} \leq N`` where ``N`` is the
+number of reservoir nodes.
+
+## Arguments
+
+  - `input::AbstractVector`: input signal of length `T` (should be i.i.d.
+    uniform in ``[-1, 1]``).
+  - `states::AbstractMatrix`: reservoir state matrix of size `(n_features, T)`.
+
+## Keyword Arguments
+
+  - `max_delay::Int=10`: maximum delay ``\tau`` for basis functions.
+  - `max_degree::Int=3`: maximum polynomial degree per variable.
+  - `max_total_degree::Union{Int,Nothing}=nothing`: maximum sum of degrees
+    in cross-terms. Defaults to `max_degree`.
+  - `cross_terms::Bool=true`: include two-variable product basis functions.
+  - `train_ratio::Real=0.8`: fraction of valid data used for training.
+  - `reg::Real=1.0`: ridge regression regularization coefficient.
+
+## Returns
+
+A `NamedTuple` with fields:
+
+  - `total::Float64`: total information processing capacity.
+  - `linear::Float64`: capacity from degree-1 (linear memory) terms.
+  - `nonlinear::Float64`: capacity from degree ``\geq 2`` terms.
+  - `by_degree::Dict{Int,Float64}`: capacity aggregated by total polynomial
+    degree.
+  - `by_delay::Dict{Int,Float64}`: capacity aggregated by delay (single-variable
+    terms only).
+  - `basis_capacities::Vector{NamedTuple}`: per-basis-function results with
+    fields `terms`, `degree`, `capacity`.
+  - `theoretical_max::Int`: upper bound ``N`` (number of reservoir features).
+
+## References
+
+  - Dambre, J., Verstraeten, D., Schrauwen, B. & Massar, S. (2012).
+    "Information processing capacity of dynamical systems."
+    *Scientific Reports*, 2, 514.
+"""
+function ipc(
+    input::AbstractVector,
+    states::AbstractMatrix;
+    max_delay::Int=10,
+    max_degree::Int=3,
+    max_total_degree::Union{Int,Nothing}=nothing,
+    cross_terms::Bool=true,
+    train_ratio::Real=0.8,
+    reg::Real=1.0,
+)
+    T = length(input)
+    n_features = size(states, 1)
+    @assert size(states, 2) == T "states must have $T columns (time steps), got $(size(states, 2))"
+    @assert max_delay >= 1 "max_delay must be >= 1, got $max_delay"
+    @assert max_degree >= 1 "max_degree must be >= 1, got $max_degree"
+    @assert max_delay < T "max_delay ($max_delay) must be less than signal length ($T)"
+
+    mtd = something(max_total_degree, max_degree)
+
+    # Precompute normalized Legendre polynomial values for all (degree, delay)
+    poly_cache = Dict{Tuple{Int,Int},Vector{Float64}}()
+    for delay in 1:max_delay
+        for degree in 1:max_degree
+            delayed = @view input[(max_delay + 1 - delay):(T - delay)]
+            poly_cache[(delay, degree)] = _normalized_legendre.(degree, delayed)
+        end
+    end
+
+    # Generate basis functions
+    basis_functions = _enumerate_basis_functions(max_delay, max_degree, mtd, cross_terms)
+
+    # Valid time range (after max_delay to avoid edge effects)
+    valid = (max_delay + 1):T
+    T_valid = length(valid)
+    X = collect(states[:, valid]')  # (T_valid, n_features)
+
+    train_idx, test_idx = _train_test_split(T_valid, train_ratio)
+    X_train = X[train_idx, :]
+    X_test = X[test_idx, :]
+
+    # Pre-compute Cholesky factorization — reused across all basis functions
+    rf = _ridge_factor(X_train; reg=reg)
+
+    results = Vector{NamedTuple{(:terms, :degree, :capacity),Tuple{Vector{Tuple{Int,Int}},Int,Float64}}}()
+    by_degree = Dict{Int,Float64}()
+    by_delay = Dict{Int,Float64}()
+
+    # Pre-allocate target buffer — reused via fill!
+    target = Vector{Float64}(undef, T_valid)
+
+    for terms in basis_functions
+        total_degree = sum(d for (_, d) in terms)
+
+        # Compute target: product of normalized Legendre polynomials
+        fill!(target, 1.0)
+        for (delay, degree) in terms
+            target .*= poly_cache[(delay, degree)]
+        end
+
+        y_train = target[train_idx]
+        y_test = target[test_idx]
+
+        w = _ridge_solve(rf, X_train, y_train)
+        y_pred = X_test * w
+
+        cap = _squared_correlation(y_test, y_pred)
+
+        push!(results, (terms=terms, degree=total_degree, capacity=cap))
+        by_degree[total_degree] = get(by_degree, total_degree, 0.0) + cap
+
+        # Track per-delay capacity for single-variable terms
+        if length(terms) == 1
+            delay = terms[1][1]
+            by_delay[delay] = get(by_delay, delay, 0.0) + cap
+        end
+    end
+
+    total_cap = isempty(results) ? 0.0 : sum(r.capacity for r in results)
+    linear_cap = get(by_degree, 1, 0.0)
+    nonlinear_cap = total_cap - linear_cap
+
+    return (
+        total=total_cap,
+        linear=linear_cap,
+        nonlinear=nonlinear_cap,
+        by_degree=by_degree,
+        by_delay=by_delay,
+        basis_capacities=results,
+        theoretical_max=n_features,
+    )
+end
+
+# ── Legendre polynomials ─────────────────────────────────────────────────────
+
+"""
+    _legendre(n, x)
+
+Evaluate the Legendre polynomial ``P_n(x)`` using the three-term recurrence.
+"""
+function _legendre(n::Int, x::Real)
+    @assert n >= 0 "Legendre polynomial degree must be non-negative, got $n"
+    n == 0 && return one(float(x))
+    n == 1 && return float(x)
+    p_prev, p_curr = one(float(x)), float(x)
+    for k in 1:(n - 1)
+        p_next = ((2k + 1) * x * p_curr - k * p_prev) / (k + 1)
+        p_prev, p_curr = p_curr, p_next
+    end
+    return p_curr
+end
+
+"""
+    _normalized_legendre(n, x)
+
+Evaluate the orthonormalized Legendre polynomial
+``\\tilde{P}_n(x) = P_n(x) \\sqrt{(2n+1)/2}`` on ``[-1,1]``.
+"""
+function _normalized_legendre(n::Int, x::Real)
+    return _legendre(n, x) * sqrt((2n + 1) / 2)
+end
+
+# ── Basis function enumeration ───────────────────────────────────────────────
+
+"""
+    _enumerate_basis_functions(max_delay, max_degree, max_total_degree, cross_terms)
+
+Return a vector of basis function descriptors. Each descriptor is a
+`Vector{Tuple{Int,Int}}` of `(delay, degree)` pairs.
+"""
+function _enumerate_basis_functions(
+    max_delay::Int, max_degree::Int, max_total_degree::Int, cross_terms::Bool
+)
+    basis = Vector{Vector{Tuple{Int,Int}}}()
+
+    # Single-variable terms: P_d(u(t - τ))
+    for delay in 1:max_delay
+        for degree in 1:max_degree
+            push!(basis, [(delay, degree)])
+        end
+    end
+
+    # Two-variable cross-terms: P_{d1}(u(t-τ1)) * P_{d2}(u(t-τ2))
+    if cross_terms
+        for d1 in 1:max_delay
+            for d2 in (d1 + 1):max_delay
+                for deg1 in 1:max_degree
+                    for deg2 in 1:max_degree
+                        deg1 + deg2 <= max_total_degree || continue
+                        push!(basis, [(d1, deg1), (d2, deg2)])
+                    end
+                end
+            end
+        end
+    end
+
+    return basis
+end
diff --git a/lib/ReservoirComputingBenchmarks/src/memory_capacity.jl b/lib/ReservoirComputingBenchmarks/src/memory_capacity.jl
new file mode 100644
index 000000000..36642df9a
--- /dev/null
+++ b/lib/ReservoirComputingBenchmarks/src/memory_capacity.jl
@@ -0,0 +1,84 @@
+@doc raw"""
+    memory_capacity(input, states; max_delay=30, train_ratio=0.8, reg=1.0)
+
+Compute the linear memory capacity of a reservoir following Jaeger (2002).
+
+For each delay ``k = 1, \ldots, K``, a ridge regression readout is trained to
+reconstruct the input delayed by ``k`` steps. The per-delay capacity is the
+squared Pearson correlation between the true delayed input and the prediction:
+
+```math
+MC_k = \frac{\text{cov}^2(u(t-k),\, \hat{u}_k(t))}
+            {\text{var}(u(t-k))\;\text{var}(\hat{u}_k(t))}
+```
+
+The total memory capacity is ``MC = \sum_{k=1}^{K} MC_k``, bounded above by the
+number of reservoir nodes ``N`` (Jaeger, 2002).
+
+## Arguments
+
+  - `input::AbstractVector`: input signal of length `T` (typically i.i.d.
+    uniform in ``[-1, 1]``).
+  - `states::AbstractMatrix`: reservoir state matrix of size `(n_features, T)`.
+
+## Keyword Arguments
+
+  - `max_delay::Int=30`: maximum delay ``K`` to evaluate.
+  - `train_ratio::Real=0.8`: fraction of valid data used for training.
+  - `reg::Real=1.0`: ridge regression regularization coefficient ``\lambda``.
+
+## Returns
+
+A `NamedTuple` with fields:
+
+  - `total::Float64`: total memory capacity ``MC``.
+  - `delays::Vector{Float64}`: per-delay capacities ``MC_k`` for
+    ``k = 1, \ldots, K``.
+
+## References
+
+  - Jaeger, H. (2002). "Short term memory in echo state networks."
+    GMD Report 152.
+"""
+function memory_capacity(
+    input::AbstractVector,
+    states::AbstractMatrix;
+    max_delay::Int=30,
+    train_ratio::Real=0.8,
+    reg::Real=1.0,
+)
+    T = length(input)
+    @assert size(states, 2) == T "states must have $T columns (time steps), got $(size(states, 2))"
+    @assert max_delay >= 1 "max_delay must be >= 1, got $max_delay"
+    @assert max_delay < T "max_delay ($max_delay) must be less than signal length ($T)"
+
+    # Discard first max_delay steps to avoid edge effects from delays
+    valid = (max_delay + 1):T
+    T_valid = length(valid)
+    X = collect(states[:, valid]')  # (T_valid, n_features)
+
+    train_idx, test_idx = _train_test_split(T_valid, train_ratio)
+
+    X_train = X[train_idx, :]
+    X_test = X[test_idx, :]
+
+    # Pre-compute Cholesky factorization — reused across all delays
+    rf = _ridge_factor(X_train; reg=reg)
+
+    delay_capacities = zeros(max_delay)
+
+    for k in 1:max_delay
+        # Target: input shifted back by k steps
+        target = input[valid .- k]
+
+        y_train = target[train_idx]
+        y_test = target[test_idx]
+
+        w = _ridge_solve(rf, X_train, y_train)
+        y_pred = X_test * w
+
+        delay_capacities[k] = _squared_correlation(y_test, y_pred)
+    end
+
+    return (total=sum(delay_capacities), delays=delay_capacities)
+end
diff --git a/lib/ReservoirComputingBenchmarks/src/narma.jl b/lib/ReservoirComputingBenchmarks/src/narma.jl
new file mode 100644
index 000000000..4e71988f8
--- /dev/null
+++ b/lib/ReservoirComputingBenchmarks/src/narma.jl
@@ -0,0 +1,190 @@
+"""
+Standard NARMA coefficients from the literature.
+
+  - NARMA-2:  Atiya & Parlos (2000), Eq. 12
+  - NARMA-10: Atiya & Parlos (2000), standard benchmark
+  - NARMA-30: Schrauwen et al. (2008)
+"""
+const NARMA_COEFFICIENTS = Dict{Int,NamedTuple{(:alpha, :beta, :gamma, :delta),NTuple{4,Float64}}}(
+    2 => (alpha=0.4, beta=0.4, gamma=0.6, delta=0.1),
+    10 => (alpha=0.3, beta=0.05, gamma=1.5, delta=0.1),
+    30 => (alpha=0.2, beta=0.04, gamma=1.5, delta=0.001),
+)
+
+@doc raw"""
+    generate_narma(input; order=10, alpha=nothing, beta=nothing, gamma=nothing, delta=nothing, normalize=true)
+
+Generate a NARMA target signal from the given input.
+
+For `order == 2` (Atiya & Parlos 2000, Eq. 12):
+
+```math
+y(k{+}1) = \alpha\, y(k) + \beta\, y(k)\, y(k{-}1)
+       + \gamma\, u(k)^3 + \delta
+```
+
+For `order >= 3` (Atiya & Parlos 2000, general NARMA-N):
+
+```math
+y(t{+}1) = \alpha\, y(t) + \beta\, y(t) \sum_{i=0}^{N-1} y(t{-}i)
+            + \gamma\, u(t{-}N{+}1)\, u(t) + \delta
+```
+
+## Arguments
+
+  - `input::AbstractVector`: driving input signal of length `T`.
+
+## Keyword Arguments
+
+  - `order::Int=10`: NARMA system order ``N``. Must be ``\geq 2``.
+  - `alpha`, `beta`, `gamma`, `delta`: recurrence coefficients. When `nothing`
+    (default), standard values are used for orders 2, 10, and 30. All four
+    must be provided explicitly for other orders.
+  - `normalize::Bool=true`: normalize input to ``[0, 0.5]`` before computing
+    the recurrence.
+
+## Returns
+
+  - `Vector{Float64}`: NARMA target signal of length `T`.
+
+## References
+
+  - Atiya, A.F. & Parlos, A.G. (2000). "New results on recurrent network
+    training." *IEEE Trans. Neural Networks*, 11(3).
+  - Schrauwen, B. et al. (2008). "Improving reservoirs using intrinsic
+    plasticity." *Neurocomputing*, 71(7–9).
+"""
+function generate_narma(
+    input::AbstractVector;
+    order::Int=10,
+    alpha::Union{Real,Nothing}=nothing,
+    beta::Union{Real,Nothing}=nothing,
+    gamma::Union{Real,Nothing}=nothing,
+    delta::Union{Real,Nothing}=nothing,
+    normalize::Bool=true,
+)
+    @assert order >= 2 "NARMA order must be >= 2, got $order"
+
+    # Resolve coefficients
+    if haskey(NARMA_COEFFICIENTS, order)
+        c = NARMA_COEFFICIENTS[order]
+        alpha = something(alpha, c.alpha)
+        beta = something(beta, c.beta)
+        gamma = something(gamma, c.gamma)
+        delta = something(delta, c.delta)
+    else
+        @assert !isnothing(alpha) && !isnothing(beta) && !isnothing(gamma) && !isnothing(delta) (
+            "No default coefficients for NARMA-$order. " *
+            "Provide alpha, beta, gamma, delta explicitly."
+        )
+    end
+
+    T = length(input)
+    @assert T > order "Input length ($T) must be greater than order ($order)"
+
+    u = normalize ? _normalize(input, 0.0, 0.5) : Float64.(input)
+    y = zeros(Float64, T)
+
+    if order == 2
+        # NARMA-2: y[t] = α y[t-1] + β y[t-1] y[t-2] + γ u[t-1]³ + δ
+        for t in 3:T
+            y[t] = alpha * y[t - 1] +
+                   beta * y[t - 1] * y[t - 2] +
+                   gamma * u[t - 1]^3 +
+                   delta
+        end
+    else
+        # NARMA-N (N >= 3): standard recurrence
+        # y[t] = α y[t-1] + β y[t-1] Σ y[(t-N):(t-1)] + γ u[t-N] u[t-1] + δ
+        for t in (order + 1):T
+            y[t] = alpha * y[t - 1] +
+                   beta * y[t - 1] * sum(@view y[(t - order):(t - 1)]) +
+                   gamma * u[t - order] * u[t - 1] +
+                   delta
+        end
+    end
+
+    if any(!isfinite, y)
+        idx = findfirst(!isfinite, y)
+        @warn "NARMA-$order target diverged at time step $idx (value: $(y[idx])). " *
+              "Consider using normalize=true or adjusting coefficients."
+    end
+
+    return y
+end
+
+@doc raw"""
+    narma(input, states; order=10, metric=:nmse, train_ratio=0.8, reg=1.0, washout=nothing, kwargs...)
+
+Evaluate reservoir performance on the NARMA-N task.
+
+Generates the NARMA target from `input`, trains a ridge regression readout
+from `states` to the target, and computes the error metric on held-out data.
+
+## Arguments
+
+  - `input::AbstractVector`: driving input signal of length `T`.
+  - `states::AbstractMatrix`: reservoir state matrix of size `(n_features, T)`.
+
+## Keyword Arguments
+
+  - `order::Int=10`: NARMA system order.
+  - `metric::Symbol=:nmse`: error metric. One of `:nmse` (Normalized MSE),
+    `:rnmse` (Root NMSE), or `:mse`.
+  - `train_ratio::Real=0.8`: fraction of data used for training.
+  - `reg::Real=1.0`: ridge regression regularization coefficient.
+  - `washout::Union{Int,Nothing}=nothing`: number of initial time steps to
+    discard. Defaults to `order`.
+  - Remaining `kwargs` are forwarded to [`generate_narma`](@ref).
+
+## Returns
+
+A `NamedTuple` with fields:
+
+  - `score::Float64`: the computed error metric.
+  - `metric::Symbol`: which metric was used.
+  - `target::Vector{Float64}`: the full NARMA target signal.
+"""
+function narma(
+    input::AbstractVector,
+    states::AbstractMatrix;
+    order::Int=10,
+    metric::Symbol=:nmse,
+    train_ratio::Real=0.8,
+    reg::Real=1.0,
+    washout::Union{Int,Nothing}=nothing,
+    kwargs...,
+)
+    T = length(input)
+    @assert size(states, 2) == T "states must have $T columns (time steps), got $(size(states, 2))"
+
+    target = generate_narma(input; order=order, kwargs...)
+
+    # Skip initial transient where y ≈ 0
+    wo = something(washout, order)
+    @assert 0 <= wo < T "washout must be in [0, $T), got $wo"
+    valid = (wo + 1):T
+    T_valid = length(valid)
+
+    X = collect(states[:, valid]')  # (T_valid, n_features)
+    y = target[valid]
+
+    train_idx, test_idx = _train_test_split(T_valid, train_ratio)
+
+    w = _ridge_regression(X[train_idx, :], y[train_idx]; reg=reg)
+    y_pred = X[test_idx, :] * w
+    y_test = y[test_idx]
+
+    score = if metric === :nmse
+        _nmse(y_test, y_pred)
+    elseif metric === :rnmse
+        nmse_val = _nmse(y_test, y_pred)
+        nmse_val < 0 ? 0.0 : sqrt(nmse_val)
+    elseif metric === :mse
+        mean((y_test .- y_pred) .^ 2)
+    else
+        throw(ArgumentError("Unknown metric :$metric. Use :nmse, :rnmse, or :mse."))
+    end
+
+    return (score=score, metric=metric, target=target)
+end
diff --git a/lib/ReservoirComputingBenchmarks/src/utils.jl b/lib/ReservoirComputingBenchmarks/src/utils.jl
new file mode 100644
index 000000000..c6ab2e1c8
--- /dev/null
+++ b/lib/ReservoirComputingBenchmarks/src/utils.jl
@@ -0,0 +1,116 @@
+"""
+    _RidgeFactor
+
+Pre-computed Cholesky factorization of the Gram matrix `X'X + λI` for
+efficient multi-RHS ridge regression. Construct once, solve many times
+with [`_ridge_solve`](@ref).
+"""
+struct _RidgeFactor{T<:Real}
+    factor::LinearAlgebra.Cholesky{T,Matrix{T}}
+    Xty_buf::Vector{T}   # scratch buffer for X'y products
+end
+
+"""
+    _ridge_factor(X; reg=1.0)
+
+Pre-compute the Cholesky factorization of `X'X + reg * I`.
+
+- `X`: (T, n_features) design matrix
+- `reg`: regularization coefficient (λ ≥ 0)
+
+Returns a `_RidgeFactor` that can be reused across many targets.
+"""
+function _ridge_factor(X::AbstractMatrix; reg::Real=1.0)
+    @assert reg >= 0 "Regularization coefficient must be non-negative, got $reg"
+    n = size(X, 2)
+    G = Symmetric(X' * X)
+    G_reg = Matrix(G) + reg * Matrix{eltype(X)}(I, n, n)
+    F = cholesky(Symmetric(G_reg))
+    buf = Vector{eltype(X)}(undef, n)
+    return _RidgeFactor(F, buf)
+end
+
+"""
+    _ridge_solve(rf, X, y)
+
+Solve ridge regression using the pre-computed `_RidgeFactor`.
+Returns weight vector `w` such that `X * w ≈ y`.
+"""
+function _ridge_solve(rf::_RidgeFactor, X::AbstractMatrix, y::AbstractVector)
+    mul!(rf.Xty_buf, X', y)
+    return rf.factor \ rf.Xty_buf
+end
+
+"""
+    _ridge_regression(X, y; reg=1.0)
+
+Solve ridge regression `X * w ≈ y` via Cholesky factorization of the
+Gram matrix `X'X + λI`.
+
+- `X`: (T, n_features) design matrix
+- `y`: (T,) target vector
+- `reg`: regularization coefficient (λ ≥ 0)
+
+Returns weight vector `w` of length `n_features`.
+"""
+function _ridge_regression(X::AbstractMatrix, y::AbstractVector; reg::Real=1.0)
+    @assert reg >= 0 "Regularization coefficient must be non-negative, got $reg"
+    rf = _ridge_factor(X; reg=reg)
+    return _ridge_solve(rf, X, y)
+end
+
+"""
+    _squared_correlation(y_true, y_pred)
+
+Squared Pearson correlation coefficient between `y_true` and `y_pred`.
+Returns 0.0 with a warning if either vector has zero variance.
+"""
+function _squared_correlation(y_true::AbstractVector, y_pred::AbstractVector)
+    r = cor(y_true, y_pred)
+    if isnan(r)
+        @warn "Squared correlation is NaN (zero-variance input). " *
+              "This may indicate a degenerate reservoir or collapsed prediction."
+        return 0.0
+    end
+    return clamp(r^2, 0.0, 1.0)
+end
+
+"""
+    _nmse(y_true, y_pred)
+
+Normalized Mean Squared Error: `mean((y_true - y_pred).²) / var(y_true)`.
+"""
+function _nmse(y_true::AbstractVector, y_pred::AbstractVector)
+    v = var(y_true)
+    if v < eps(eltype(y_true))
+        @warn "NMSE: target variance is near-zero ($v). " *
+              "NMSE is undefined for constant targets."
+        return NaN
+    end
+    return mean((y_true .- y_pred) .^ 2) / v
+end
+
+"""
+    _train_test_split(n, train_ratio)
+
+Return `(train_range, test_range)` index ranges for a temporal split.
+"""
+function _train_test_split(n::Int, train_ratio::Real)
+    @assert 0 < train_ratio < 1 "train_ratio must be in (0, 1), got $train_ratio"
+    split = floor(Int, n * train_ratio)
+    @assert split >= 1 "Training set is empty (n=$n, train_ratio=$train_ratio). Provide more data."
+    @assert split < n "Test set is empty (n=$n, train_ratio=$train_ratio). Reduce train_ratio or provide more data."
+    return 1:split, (split + 1):n
+end
+
+"""
+    _normalize(x, lo, hi)
+
+Min-max normalize vector `x` to the interval `[lo, hi]`.
+Returns a constant vector of `lo` if `x` is constant.
+"""
+function _normalize(x::AbstractVector, lo::Real, hi::Real)
+    xmin, xmax = extrema(x)
+    xmin == xmax && return fill(convert(eltype(x), lo), length(x))
+    return @. (x - xmin) / (xmax - xmin) * (hi - lo) + lo
+end
diff --git a/lib/ReservoirComputingBenchmarks/test/runtests.jl b/lib/ReservoirComputingBenchmarks/test/runtests.jl
new file mode 100644
index 000000000..375001100
--- /dev/null
+++ b/lib/ReservoirComputingBenchmarks/test/runtests.jl
@@ -0,0 +1,244 @@
+using ReservoirComputingBenchmarks
+using Test
+using Random
+
+@testset "ReservoirComputingBenchmarks" begin
+    rng = Random.MersenneTwister(42)
+
+    @testset "Memory Capacity" begin
+        T = 2000
+        n_features = 50
+        input = rand(rng, T) .* 2 .- 1  # Uniform[-1, 1]
+
+        # Build a simple linear reservoir (identity + shift) that should
+        # have high memory capacity
+        states = zeros(n_features, T)
+        for t in 2:T
+            states[1, t] = input[t - 1]
+            for i in 2:n_features
+                states[i, t] = states[i - 1, t - 1]
+            end
+        end
+
+        result = memory_capacity(input, states; max_delay=20, reg=1e-6)
+
+        @test haskey(result, :total)
+        @test haskey(result, :delays)
+        @test length(result.delays) == 20
+        @test all(0 .<= result.delays .<= 1.001)
+        @test result.total >= 0
+        # A shift-register reservoir should have near-perfect MC for
+        # delays up to n_features
+        @test result.delays[1] > 0.9
+        @test result.delays[5] > 0.8
+        @test result.total > 10
+    end
+
+    @testset "NARMA Target Generation" begin
+        T = 1000
+        input = rand(rng, T) .* 0.5  # Uniform[0, 0.5]
+
+        # NARMA-2
+        y2 = generate_narma(input; order=2, normalize=false)
+        @test length(y2) == T
+        @test y2[1] == 0.0
+        @test y2[2] == 0.0
+        @test y2[3] != 0.0  # First non-trivial value
+
+        # NARMA-10
+        y10 = generate_narma(input; order=10, normalize=false)
+        @test length(y10) == T
+        @test all(y10[1:10] .== 0.0)
+        @test y10[11] != 0.0
+
+        # NARMA-30
+        y30 = generate_narma(input; order=30, normalize=false)
+        @test length(y30) == T
+        @test all(y30[1:30] .== 0.0)
+        @test y30[31] != 0.0
+
+        # Target should not diverge for standard inputs
+        @test all(isfinite.(y10))
+        @test all(isfinite.(y30))
+
+        # Custom coefficients for non-standard order
+        @test_throws AssertionError generate_narma(input; order=5)
+        y5 = generate_narma(input; order=5, alpha=0.3, beta=0.05, gamma=1.5, delta=0.1, normalize=false)
+        @test length(y5) == T
+
+        # Order must be >= 2
+        @test_throws AssertionError generate_narma(input; order=1)
+    end
+
+    @testset "NARMA Evaluation" begin
+        T = 1000
+        n_features = 100
+        input = rand(rng, T)
+
+        # Use random states (won't be great, but should run)
+        states = randn(rng, n_features, T)
+
+        result = narma(input, states; order=10, metric=:nmse, reg=1.0)
+
+        @test haskey(result, :score)
+        @test haskey(result, :metric)
+        @test haskey(result, :target)
+        @test result.metric === :nmse
+        @test isfinite(result.score)
+        @test result.score >= 0
+        @test length(result.target) == T
+
+        # Test other metrics
+        result_rnmse = narma(input, states; order=10, metric=:rnmse, reg=1.0)
+        @test result_rnmse.metric === :rnmse
+        @test isfinite(result_rnmse.score)
+
+        result_mse = narma(input, states; order=10, metric=:mse, reg=1.0)
+        @test result_mse.metric === :mse
+        @test isfinite(result_mse.score)
+
+        # Invalid metric
+        @test_throws ArgumentError narma(input, states; order=10, metric=:invalid)
+    end
+
+    @testset "IPC" begin
+        T = 1000
+        n_features = 30
+        input = rand(rng, T) .* 2 .- 1  # Uniform[-1, 1]
+
+        # Simple linear reservoir
+        states = zeros(n_features, T)
+        for t in 2:T
+            states[1, t] = input[t - 1]
+            for i in 2:n_features
+                states[i, t] = 0.95 * states[i, t - 1] + 0.05 * randn(rng)
+            end
+        end
+
+        result = ipc(input, states; max_delay=5, max_degree=2, reg=1e-4)
+
+        @test haskey(result, :total)
+        @test haskey(result, :linear)
+        @test haskey(result, :nonlinear)
+        @test haskey(result, :by_degree)
+        @test haskey(result, :by_delay)
+        @test haskey(result, :basis_capacities)
+        @test haskey(result, :theoretical_max)
+
+        @test result.total >= 0
+        @test result.linear >= 0
+        @test result.nonlinear >= 0
+        @test result.theoretical_max == n_features
+        @test abs(result.total - result.linear - result.nonlinear) < 1e-10
+
+        # Degree-1 capacity should be present
+        @test haskey(result.by_degree, 1)
+
+        # Total should not exceed theoretical maximum (with some tolerance)
+        @test result.total <= n_features + 1.0
+
+        # Without cross terms
+        result_no_cross = ipc(input, states; max_delay=5, max_degree=2, cross_terms=false, reg=1e-4)
+        @test length(result_no_cross.basis_capacities) < length(result.basis_capacities)
+    end
+
+    @testset "Legendre Polynomials" begin
+        leg = ReservoirComputingBenchmarks._legendre
+
+        # P_0(x) = 1
+        @test leg(0, 0.5) ≈ 1.0
+        @test leg(0, -0.3) ≈ 1.0
+
+        # P_1(x) = x
+        @test leg(1, 0.5) ≈ 0.5
+        @test leg(1, -0.3) ≈ -0.3
+
+        # P_2(x) = (3x² - 1) / 2
+        @test leg(2, 0.5) ≈ (3 * 0.25 - 1) / 2
+        @test leg(2, 0.0) ≈ -0.5
+
+        # P_3(x) = (5x³ - 3x) / 2
+        @test leg(3, 0.5) ≈ (5 * 0.125 - 1.5) / 2
+
+        # Negative degree should error
+        @test_throws AssertionError leg(-1, 0.5)
+
+        # Orthonormality: ∫₋₁¹ P̃_n(x) P̃_m(x) dx ≈ δ_{nm}
+        nleg = ReservoirComputingBenchmarks._normalized_legendre
+        N_quad = 10000
+        x_quad = range(-1, 1; length=N_quad)
+        dx = 2.0 / (N_quad - 1)
+
+        for n in 0:3
+            vals_n = nleg.(n, x_quad)
+            integral = sum(vals_n .^ 2) * dx
+            @test isapprox(integral, 1.0; atol=0.01)
+        end
+    end
+
+    @testset "Utilities" begin
+        # _normalize
+        x = [1.0, 2.0, 3.0, 4.0, 5.0]
+        xn = ReservoirComputingBenchmarks._normalize(x, 0.0, 0.5)
+        @test xn[1] ≈ 0.0
+        @test xn[end] ≈ 0.5
+        @test all(0 .<= xn .<= 0.5)
+
+        # Constant input
+        xc = ReservoirComputingBenchmarks._normalize([3.0, 3.0, 3.0], 0.0, 0.5)
+        @test all(xc .== 0.0)
+
+        # _squared_correlation
+        a = [1.0, 2.0, 3.0, 4.0, 5.0]
+        @test ReservoirComputingBenchmarks._squared_correlation(a, a) ≈ 1.0
+        @test ReservoirComputingBenchmarks._squared_correlation(a, -a) ≈ 1.0
+
+        # _nmse: perfect prediction → 0
+        @test ReservoirComputingBenchmarks._nmse(a, a) ≈ 0.0
+
+        # _train_test_split: invalid ratios
+        @test_throws AssertionError ReservoirComputingBenchmarks._train_test_split(100, 0.0)
+        @test_throws AssertionError ReservoirComputingBenchmarks._train_test_split(100, 1.0)
+        @test_throws AssertionError ReservoirComputingBenchmarks._train_test_split(100, -0.5)
+        @test_throws AssertionError ReservoirComputingBenchmarks._train_test_split(100, 1.5)
+
+        # _ridge_regression: negative reg
+        @test_throws AssertionError ReservoirComputingBenchmarks._ridge_regression(
+            randn(50, 5), randn(50); reg=-1.0
+        )
+    end
+
+    @testset "Validation" begin
+        input = rand(rng, 500) .* 2 .- 1
+        states = randn(rng, 10, 500)
+
+        # max_delay must be >= 1
+        @test_throws AssertionError memory_capacity(input, states; max_delay=0)
+
+        # max_delay must be < T
+        @test_throws AssertionError memory_capacity(input, states; max_delay=500)
+
+        # Mismatched dimensions
+        @test_throws AssertionError memory_capacity(rand(100), randn(10, 200))
+
+        # NARMA order < 2
+        @test_throws AssertionError generate_narma(rand(100); order=1)
+
+        # Missing NARMA coefficients for non-standard order
+        @test_throws AssertionError generate_narma(rand(100); order=7)
+
+        # Input shorter than order
+        @test_throws AssertionError generate_narma(rand(5); order=10)
+
+        # Invalid NARMA metric
+        @test_throws ArgumentError narma(input, states; order=10, metric=:bad)
+
+        # Washout out of range
+        @test_throws AssertionError narma(input, states; order=10, washout=-1)
+        @test_throws AssertionError narma(input, states; order=10, washout=500)
+
+        # IPC: max_degree/max_delay must be >= 1
+        @test_throws AssertionError ipc(input, states; max_delay=0, max_degree=2)
+        @test_throws AssertionError ipc(input, states; max_delay=5, max_degree=0)
+    end
+end

From 6605f4365feda90c666c41881c341b90d33a1d4d Mon Sep 17 00:00:00 2001
From: Saswat Susmoy <tech@saswatsusmoy.in>
Date: Sat, 28 Mar 2026 12:26:49 +0530
Subject: [PATCH 2/9] Apply Runic formatting to ReservoirComputingBenchmarks

---
 lib/ReservoirComputingBenchmarks/src/ipc.jl   | 50 +++++++--------
 .../src/memory_capacity.jl                    | 16 ++---
 lib/ReservoirComputingBenchmarks/src/narma.jl | 64 +++++++++----------
 lib/ReservoirComputingBenchmarks/src/utils.jl | 14 ++--
 .../test/runtests.jl                          | 54 ++++++++--------
 5 files changed, 99 insertions(+), 99 deletions(-)

diff --git a/lib/ReservoirComputingBenchmarks/src/ipc.jl b/lib/ReservoirComputingBenchmarks/src/ipc.jl
index 7308d1044..bf210bc4e 100644
--- a/lib/ReservoirComputingBenchmarks/src/ipc.jl
+++ b/lib/ReservoirComputingBenchmarks/src/ipc.jl
@@ -57,15 +57,15 @@ A `NamedTuple` with fields:
     *Scientific Reports*, 2, 514.
 """
 function ipc(
-    input::AbstractVector,
-    states::AbstractMatrix;
-    max_delay::Int=10,
-    max_degree::Int=3,
-    max_total_degree::Union{Int,Nothing}=nothing,
-    cross_terms::Bool=true,
-    train_ratio::Real=0.8,
-    reg::Real=1.0,
-)
+        input::AbstractVector,
+        states::AbstractMatrix;
+        max_delay::Int = 10,
+        max_degree::Int = 3,
+        max_total_degree::Union{Int, Nothing} = nothing,
+        cross_terms::Bool = true,
+        train_ratio::Real = 0.8,
+        reg::Real = 1.0,
+    )
     T = length(input)
     n_features = size(states, 1)
     @assert size(states, 2) == T "states must have $T columns (time steps), got $(size(states, 2))"
@@ -76,7 +76,7 @@ function ipc(
     mtd = something(max_total_degree, max_degree)
 
     # Precompute normalized Legendre polynomial values for all (degree, delay)
-    poly_cache = Dict{Tuple{Int,Int},Vector{Float64}}()
+    poly_cache = Dict{Tuple{Int, Int}, Vector{Float64}}()
     for delay in 1:max_delay
         for degree in 1:max_degree
             delayed = @view input[(max_delay + 1 - delay):(T - delay)]
@@ -97,11 +97,11 @@ function ipc(
     X_test = X[test_idx, :]
 
     # Pre-compute Cholesky factorization — reused across all basis functions
-    rf = _ridge_factor(X_train; reg=reg)
+    rf = _ridge_factor(X_train; reg = reg)
 
-    results = Vector{NamedTuple{(:terms, :degree, :capacity),Tuple{Vector{Tuple{Int,Int}},Int,Float64}}}()
-    by_degree = Dict{Int,Float64}()
-    by_delay = Dict{Int,Float64}()
+    results = Vector{NamedTuple{(:terms, :degree, :capacity), Tuple{Vector{Tuple{Int, Int}}, Int, Float64}}}()
+    by_degree = Dict{Int, Float64}()
+    by_delay = Dict{Int, Float64}()
 
     # Pre-allocate target buffer — reused via fill!
     target = Vector{Float64}(undef, T_valid)
@@ -123,7 +123,7 @@ function ipc(
 
         cap = _squared_correlation(y_test, y_pred)
 
-        push!(results, (terms=terms, degree=total_degree, capacity=cap))
+        push!(results, (terms = terms, degree = total_degree, capacity = cap))
         by_degree[total_degree] = get(by_degree, total_degree, 0.0) + cap
 
         # Track per-delay capacity for single-variable terms
@@ -138,13 +138,13 @@ function ipc(
     nonlinear_cap = total_cap - linear_cap
 
     return (
-        total=total_cap,
-        linear=linear_cap,
-        nonlinear=nonlinear_cap,
-        by_degree=by_degree,
-        by_delay=by_delay,
-        basis_capacities=results,
-        theoretical_max=n_features,
+        total = total_cap,
+        linear = linear_cap,
+        nonlinear = nonlinear_cap,
+        by_degree = by_degree,
+        by_delay = by_delay,
+        basis_capacities = results,
+        theoretical_max = n_features,
     )
 end
 
@@ -186,9 +186,9 @@ Return a vector of basis function descriptors. Each descriptor is a
 `Vector{Tuple{Int,Int}}` of `(delay, degree)` pairs.
 """
 function _enumerate_basis_functions(
-    max_delay::Int, max_degree::Int, max_total_degree::Int, cross_terms::Bool
-)
-    basis = Vector{Vector{Tuple{Int,Int}}}()
+        max_delay::Int, max_degree::Int, max_total_degree::Int, cross_terms::Bool
+    )
+    basis = Vector{Vector{Tuple{Int, Int}}}()
 
     # Single-variable terms: P_d(u(t - τ))
     for delay in 1:max_delay
diff --git a/lib/ReservoirComputingBenchmarks/src/memory_capacity.jl b/lib/ReservoirComputingBenchmarks/src/memory_capacity.jl
index 36642df9a..135a56417 100644
--- a/lib/ReservoirComputingBenchmarks/src/memory_capacity.jl
+++ b/lib/ReservoirComputingBenchmarks/src/memory_capacity.jl
@@ -41,12 +41,12 @@ A `NamedTuple` with fields:
     GMD Report 152.
 """
 function memory_capacity(
-    input::AbstractVector,
-    states::AbstractMatrix;
-    max_delay::Int=30,
-    train_ratio::Real=0.8,
-    reg::Real=1.0,
-)
+        input::AbstractVector,
+        states::AbstractMatrix;
+        max_delay::Int = 30,
+        train_ratio::Real = 0.8,
+        reg::Real = 1.0,
+    )
     T = length(input)
     @assert size(states, 2) == T "states must have $T columns (time steps), got $(size(states, 2))"
     @assert max_delay >= 1 "max_delay must be >= 1, got $max_delay"
@@ -63,7 +63,7 @@ function memory_capacity(
     X_test = X[test_idx, :]
 
     # Pre-compute Cholesky factorization — reused across all delays
-    rf = _ridge_factor(X_train; reg=reg)
+    rf = _ridge_factor(X_train; reg = reg)
 
     delay_capacities = zeros(max_delay)
 
@@ -80,5 +80,5 @@ function memory_capacity(
         delay_capacities[k] = _squared_correlation(y_test, y_pred)
     end
 
-    return (total=sum(delay_capacities), delays=delay_capacities)
+    return (total = sum(delay_capacities), delays = delay_capacities)
 end
diff --git a/lib/ReservoirComputingBenchmarks/src/narma.jl b/lib/ReservoirComputingBenchmarks/src/narma.jl
index 4e71988f8..ad869e827 100644
--- a/lib/ReservoirComputingBenchmarks/src/narma.jl
+++ b/lib/ReservoirComputingBenchmarks/src/narma.jl
@@ -5,10 +5,10 @@ Standard NARMA coefficients from the literature.
   - NARMA-10: Atiya & Parlos (2000), standard benchmark
   - NARMA-30: Schrauwen et al. (2008)
 """
-const NARMA_COEFFICIENTS = Dict{Int,NamedTuple{(:alpha, :beta, :gamma, :delta),NTuple{4,Float64}}}(
-    2 => (alpha=0.4, beta=0.4, gamma=0.6, delta=0.1),
-    10 => (alpha=0.3, beta=0.05, gamma=1.5, delta=0.1),
-    30 => (alpha=0.2, beta=0.04, gamma=1.5, delta=0.001),
+const NARMA_COEFFICIENTS = Dict{Int, NamedTuple{(:alpha, :beta, :gamma, :delta), NTuple{4, Float64}}}(
+    2 => (alpha = 0.4, beta = 0.4, gamma = 0.6, delta = 0.1),
+    10 => (alpha = 0.3, beta = 0.05, gamma = 1.5, delta = 0.1),
+    30 => (alpha = 0.2, beta = 0.04, gamma = 1.5, delta = 0.001),
 )
 
 @doc raw"""
@@ -55,14 +55,14 @@ y(t{+}1) = \alpha\, y(t) + \beta\, y(t) \sum_{i=0}^{N-1} y(t{-}i)
     plasticity." *Neurocomputing*, 71(7–9).
 """
 function generate_narma(
-    input::AbstractVector;
-    order::Int=10,
-    alpha::Union{Real,Nothing}=nothing,
-    beta::Union{Real,Nothing}=nothing,
-    gamma::Union{Real,Nothing}=nothing,
-    delta::Union{Real,Nothing}=nothing,
-    normalize::Bool=true,
-)
+        input::AbstractVector;
+        order::Int = 10,
+        alpha::Union{Real, Nothing} = nothing,
+        beta::Union{Real, Nothing} = nothing,
+        gamma::Union{Real, Nothing} = nothing,
+        delta::Union{Real, Nothing} = nothing,
+        normalize::Bool = true,
+    )
     @assert order >= 2 "NARMA order must be >= 2, got $order"
 
     # Resolve coefficients
@@ -75,7 +75,7 @@ function generate_narma(
     else
         @assert !isnothing(alpha) && !isnothing(beta) && !isnothing(gamma) && !isnothing(delta) (
             "No default coefficients for NARMA-$order. " *
-            "Provide alpha, beta, gamma, delta explicitly."
+                "Provide alpha, beta, gamma, delta explicitly."
         )
     end
 
@@ -89,25 +89,25 @@ function generate_narma(
         # NARMA-2: y[t] = α y[t-1] + β y[t-1] y[t-2] + γ u[t-1]³ + δ
         for t in 3:T
             y[t] = alpha * y[t - 1] +
-                   beta * y[t - 1] * y[t - 2] +
-                   gamma * u[t - 1]^3 +
-                   delta
+                beta * y[t - 1] * y[t - 2] +
+                gamma * u[t - 1]^3 +
+                delta
         end
     else
         # NARMA-N (N >= 3): standard recurrence
         # y[t] = α y[t-1] + β y[t-1] Σ y[(t-N):(t-1)] + γ u[t-N] u[t-1] + δ
         for t in (order + 1):T
             y[t] = alpha * y[t - 1] +
-                   beta * y[t - 1] * sum(@view y[(t - order):(t - 1)]) +
-                   gamma * u[t - order] * u[t - 1] +
-                   delta
+                beta * y[t - 1] * sum(@view y[(t - order):(t - 1)]) +
+                gamma * u[t - order] * u[t - 1] +
+                delta
         end
     end
 
     if any(!isfinite, y)
         idx = findfirst(!isfinite, y)
         @warn "NARMA-$order target diverged at time step $idx (value: $(y[idx])). " *
-              "Consider using normalize=true or adjusting coefficients."
+            "Consider using normalize=true or adjusting coefficients."
     end
 
     return y
@@ -146,19 +146,19 @@ A `NamedTuple` with fields:
   - `target::Vector{Float64}`: the full NARMA target signal.
 """
 function narma(
-    input::AbstractVector,
-    states::AbstractMatrix;
-    order::Int=10,
-    metric::Symbol=:nmse,
-    train_ratio::Real=0.8,
-    reg::Real=1.0,
-    washout::Union{Int,Nothing}=nothing,
-    kwargs...,
-)
+        input::AbstractVector,
+        states::AbstractMatrix;
+        order::Int = 10,
+        metric::Symbol = :nmse,
+        train_ratio::Real = 0.8,
+        reg::Real = 1.0,
+        washout::Union{Int, Nothing} = nothing,
+        kwargs...,
+    )
     T = length(input)
     @assert size(states, 2) == T "states must have $T columns (time steps), got $(size(states, 2))"
 
-    target = generate_narma(input; order=order, kwargs...)
+    target = generate_narma(input; order = order, kwargs...)
 
     # Skip initial transient where y ≈ 0
     wo = something(washout, order)
@@ -171,7 +171,7 @@ function narma(
 
     train_idx, test_idx = _train_test_split(T_valid, train_ratio)
 
-    w = _ridge_regression(X[train_idx, :], y[train_idx]; reg=reg)
+    w = _ridge_regression(X[train_idx, :], y[train_idx]; reg = reg)
     y_pred = X[test_idx, :] * w
     y_test = y[test_idx]
 
@@ -186,5 +186,5 @@ function narma(
         throw(ArgumentError("Unknown metric :$metric. Use :nmse, :rnmse, or :mse."))
     end
 
-    return (score=score, metric=metric, target=target)
+    return (score = score, metric = metric, target = target)
 end
diff --git a/lib/ReservoirComputingBenchmarks/src/utils.jl b/lib/ReservoirComputingBenchmarks/src/utils.jl
index c6ab2e1c8..a5f2aade0 100644
--- a/lib/ReservoirComputingBenchmarks/src/utils.jl
+++ b/lib/ReservoirComputingBenchmarks/src/utils.jl
@@ -5,8 +5,8 @@ Pre-computed Cholesky factorization of the Gram matrix `X'X + λI` for
 efficient multi-RHS ridge regression. Construct once, solve many times
 with [`_ridge_solve`](@ref).
 """
-struct _RidgeFactor{T<:Real}
-    factor::LinearAlgebra.Cholesky{T,Matrix{T}}
+struct _RidgeFactor{T <: Real}
+    factor::LinearAlgebra.Cholesky{T, Matrix{T}}
     Xty_buf::Vector{T}   # scratch buffer for X'y products
 end
 
@@ -20,7 +20,7 @@ Pre-compute the Cholesky factorization of `X'X + reg * I`.
 
 Returns a `_RidgeFactor` that can be reused across many targets.
 """
-function _ridge_factor(X::AbstractMatrix; reg::Real=1.0)
+function _ridge_factor(X::AbstractMatrix; reg::Real = 1.0)
     @assert reg >= 0 "Regularization coefficient must be non-negative, got $reg"
     n = size(X, 2)
     G = Symmetric(X' * X)
@@ -53,9 +53,9 @@ Gram matrix `X'X + λI`.
 
 Returns weight vector `w` of length `n_features`.
 """
-function _ridge_regression(X::AbstractMatrix, y::AbstractVector; reg::Real=1.0)
+function _ridge_regression(X::AbstractMatrix, y::AbstractVector; reg::Real = 1.0)
     @assert reg >= 0 "Regularization coefficient must be non-negative, got $reg"
-    rf = _ridge_factor(X; reg=reg)
+    rf = _ridge_factor(X; reg = reg)
     return _ridge_solve(rf, X, y)
 end
 
@@ -69,7 +69,7 @@ function _squared_correlation(y_true::AbstractVector, y_pred::AbstractVector)
     r = cor(y_true, y_pred)
     if isnan(r)
         @warn "Squared correlation is NaN (zero-variance input). " *
-              "This may indicate a degenerate reservoir or collapsed prediction."
+            "This may indicate a degenerate reservoir or collapsed prediction."
         return 0.0
     end
     return clamp(r^2, 0.0, 1.0)
@@ -84,7 +84,7 @@ function _nmse(y_true::AbstractVector, y_pred::AbstractVector)
     v = var(y_true)
     if v < eps(eltype(y_true))
         @warn "NMSE: target variance is near-zero ($v). " *
-              "NMSE is undefined for constant targets."
+            "NMSE is undefined for constant targets."
         return NaN
     end
     return mean((y_true .- y_pred) .^ 2) / v
diff --git a/lib/ReservoirComputingBenchmarks/test/runtests.jl b/lib/ReservoirComputingBenchmarks/test/runtests.jl
index 375001100..3b473769f 100644
--- a/lib/ReservoirComputingBenchmarks/test/runtests.jl
+++ b/lib/ReservoirComputingBenchmarks/test/runtests.jl
@@ -20,7 +20,7 @@ using Random
             end
         end
 
-        result = memory_capacity(input, states; max_delay=20, reg=1e-6)
+        result = memory_capacity(input, states; max_delay = 20, reg = 1.0e-6)
 
         @test haskey(result, :total)
         @test haskey(result, :delays)
@@ -39,20 +39,20 @@ using Random
         input = rand(rng, T) .* 0.5  # Uniform[0, 0.5]
 
         # NARMA-2
-        y2 = generate_narma(input; order=2, normalize=false)
+        y2 = generate_narma(input; order = 2, normalize = false)
         @test length(y2) == T
         @test y2[1] == 0.0
         @test y2[2] == 0.0
         @test y2[3] != 0.0  # First non-trivial value
 
         # NARMA-10
-        y10 = generate_narma(input; order=10, normalize=false)
+        y10 = generate_narma(input; order = 10, normalize = false)
         @test length(y10) == T
         @test all(y10[1:10] .== 0.0)
         @test y10[11] != 0.0
 
         # NARMA-30
-        y30 = generate_narma(input; order=30, normalize=false)
+        y30 = generate_narma(input; order = 30, normalize = false)
         @test length(y30) == T
         @test all(y30[1:30] .== 0.0)
         @test y30[31] != 0.0
@@ -62,12 +62,12 @@ using Random
         @test all(isfinite.(y30))
 
         # Custom coefficients for non-standard order
-        @test_throws AssertionError generate_narma(input; order=5)
-        y5 = generate_narma(input; order=5, alpha=0.3, beta=0.05, gamma=1.5, delta=0.1, normalize=false)
+        @test_throws AssertionError generate_narma(input; order = 5)
+        y5 = generate_narma(input; order = 5, alpha = 0.3, beta = 0.05, gamma = 1.5, delta = 0.1, normalize = false)
         @test length(y5) == T
 
         # Order must be >= 2
-        @test_throws AssertionError generate_narma(input; order=1)
+        @test_throws AssertionError generate_narma(input; order = 1)
     end
 
     @testset "NARMA Evaluation" begin
@@ -78,7 +78,7 @@ using Random
         # Use random states (won't be great, but should run)
         states = randn(rng, n_features, T)
 
-        result = narma(input, states; order=10, metric=:nmse, reg=1.0)
+        result = narma(input, states; order = 10, metric = :nmse, reg = 1.0)
 
         @test haskey(result, :score)
         @test haskey(result, :metric)
@@ -89,16 +89,16 @@ using Random
         @test length(result.target) == T
 
         # Test other metrics
-        result_rnmse = narma(input, states; order=10, metric=:rnmse, reg=1.0)
+        result_rnmse = narma(input, states; order = 10, metric = :rnmse, reg = 1.0)
         @test result_rnmse.metric === :rnmse
         @test isfinite(result_rnmse.score)
 
-        result_mse = narma(input, states; order=10, metric=:mse, reg=1.0)
+        result_mse = narma(input, states; order = 10, metric = :mse, reg = 1.0)
         @test result_mse.metric === :mse
         @test isfinite(result_mse.score)
 
         # Invalid metric
-        @test_throws ArgumentError narma(input, states; order=10, metric=:invalid)
+        @test_throws ArgumentError narma(input, states; order = 10, metric = :invalid)
     end
 
     @testset "IPC" begin
@@ -115,7 +115,7 @@ using Random
             end
         end
 
-        result = ipc(input, states; max_delay=5, max_degree=2, reg=1e-4)
+        result = ipc(input, states; max_delay = 5, max_degree = 2, reg = 1.0e-4)
 
         @test haskey(result, :total)
         @test haskey(result, :linear)
@@ -129,7 +129,7 @@ using Random
         @test result.linear >= 0
         @test result.nonlinear >= 0
         @test result.theoretical_max == n_features
-        @test abs(result.total - result.linear - result.nonlinear) < 1e-10
+        @test abs(result.total - result.linear - result.nonlinear) < 1.0e-10
 
         # Degree-1 capacity should be present
         @test haskey(result.by_degree, 1)
@@ -138,7 +138,7 @@ using Random
         @test result.total <= n_features + 1.0
 
         # Without cross terms
-        result_no_cross = ipc(input, states; max_delay=5, max_degree=2, cross_terms=false, reg=1e-4)
+        result_no_cross = ipc(input, states; max_delay = 5, max_degree = 2, cross_terms = false, reg = 1.0e-4)
         @test length(result_no_cross.basis_capacities) < length(result.basis_capacities)
     end
 
@@ -166,13 +166,13 @@ using Random
         # Orthonormality: ∫₋₁¹ P̃_n(x) P̃_m(x) dx ≈ δ_{nm}
         nleg = ReservoirComputingBenchmarks._normalized_legendre
         N_quad = 10000
-        x_quad = range(-1, 1; length=N_quad)
+        x_quad = range(-1, 1; length = N_quad)
         dx = 2.0 / (N_quad - 1)
 
         for n in 0:3
             vals_n = nleg.(n, x_quad)
             integral = sum(vals_n .^ 2) * dx
-            @test isapprox(integral, 1.0; atol=0.01)
+            @test isapprox(integral, 1.0; atol = 0.01)
         end
     end
 
@@ -204,7 +204,7 @@ using Random
 
         # _ridge_regression: negative reg
         @test_throws AssertionError ReservoirComputingBenchmarks._ridge_regression(
-            randn(50, 5), randn(50); reg=-1.0
+            randn(50, 5), randn(50); reg = -1.0
         )
     end
 
@@ -213,32 +213,32 @@ using Random
         states = randn(rng, 10, 500)
 
         # max_delay must be >= 1
-        @test_throws AssertionError memory_capacity(input, states; max_delay=0)
+        @test_throws AssertionError memory_capacity(input, states; max_delay = 0)
 
         # max_delay must be < T
-        @test_throws AssertionError memory_capacity(input, states; max_delay=500)
+        @test_throws AssertionError memory_capacity(input, states; max_delay = 500)
 
         # Mismatched dimensions
         @test_throws AssertionError memory_capacity(rand(100), randn(10, 200))
 
         # NARMA order < 2
-        @test_throws AssertionError generate_narma(rand(100); order=1)
+        @test_throws AssertionError generate_narma(rand(100); order = 1)
 
         # Missing NARMA coefficients for non-standard order
-        @test_throws AssertionError generate_narma(rand(100); order=7)
+        @test_throws AssertionError generate_narma(rand(100); order = 7)
 
         # Input shorter than order
-        @test_throws AssertionError generate_narma(rand(5); order=10)
+        @test_throws AssertionError generate_narma(rand(5); order = 10)
 
         # Invalid NARMA metric
-        @test_throws ArgumentError narma(input, states; order=10, metric=:bad)
+        @test_throws ArgumentError narma(input, states; order = 10, metric = :bad)
 
         # Washout out of range
-        @test_throws AssertionError narma(input, states; order=10, washout=-1)
-        @test_throws AssertionError narma(input, states; order=10, washout=500)
+        @test_throws AssertionError narma(input, states; order = 10, washout = -1)
+        @test_throws AssertionError narma(input, states; order = 10, washout = 500)
 
         # IPC: max_degree/max_delay must be >= 1
-        @test_throws AssertionError ipc(input, states; max_delay=0, max_degree=2)
-        @test_throws AssertionError ipc(input, states; max_delay=5, max_degree=0)
+        @test_throws AssertionError ipc(input, states; max_delay = 0, max_degree = 2)
+        @test_throws AssertionError ipc(input, states; max_delay = 5, max_degree = 0)
     end
 end

From 4db27921e60bf8dc22aeed6457b7a534f8c7bb41 Mon Sep 17 00:00:00 2001
From: Saswat Susmoy <tech@saswatsusmoy.in>
Date: Sat, 28 Mar 2026 12:29:04 +0530
Subject: [PATCH 3/9] Address PR review feedback

- Fix type promotion in _ridge_factor: promote eltype(X) and typeof(reg)
  to a common type for Cholesky factor and scratch buffer
- Catch PosDefException in _ridge_factor with clear error message when
  reg is too small for rank-deficient data
- Fix eps check in _nmse to use typeof(v) instead of eltype(y_true)
  so it works for non-float input vectors
- Fix chained broadcast comparisons in tests that silently skipped
  upper bound checks
---
 lib/ReservoirComputingBenchmarks/src/utils.jl | 28 +++++++++++++++----
 .../test/runtests.jl                          |  4 +--
 2 files changed, 25 insertions(+), 7 deletions(-)

diff --git a/lib/ReservoirComputingBenchmarks/src/utils.jl b/lib/ReservoirComputingBenchmarks/src/utils.jl
index a5f2aade0..f1d370b68 100644
--- a/lib/ReservoirComputingBenchmarks/src/utils.jl
+++ b/lib/ReservoirComputingBenchmarks/src/utils.jl
@@ -22,11 +22,29 @@ Returns a `_RidgeFactor` that can be reused across many targets.
 """
 function _ridge_factor(X::AbstractMatrix; reg::Real = 1.0)
     @assert reg >= 0 "Regularization coefficient must be non-negative, got $reg"
+    T = promote_type(eltype(X), typeof(reg))
     n = size(X, 2)
-    G = Symmetric(X' * X)
-    G_reg = Matrix(G) + reg * Matrix{eltype(X)}(I, n, n)
-    F = cholesky(Symmetric(G_reg))
-    buf = Vector{eltype(X)}(undef, n)
+    X_T = Matrix{T}(X)
+    G_reg = Matrix{T}(Symmetric(X_T' * X_T))
+    reg_T = convert(T, reg)
+    @inbounds for i in 1:n
+        G_reg[i, i] += reg_T
+    end
+    F = try
+        cholesky(Symmetric(G_reg))
+    catch e
+        if e isa LinearAlgebra.PosDefException
+            throw(
+                ArgumentError(
+                    "Cholesky factorization failed: X'X + reg*I is not positive definite. " *
+                        "This can happen when reg is zero or too small for rank-deficient data. " *
+                        "Increase reg or provide full-rank features.",
+                ),
+            )
+        end
+        rethrow(e)
+    end
+    buf = Vector{T}(undef, n)
     return _RidgeFactor(F, buf)
 end
 
@@ -82,7 +100,7 @@ Normalized Mean Squared Error: `mean((y_true - y_pred).²) / var(y_true)`.
 """
 function _nmse(y_true::AbstractVector, y_pred::AbstractVector)
     v = var(y_true)
-    if v < eps(eltype(y_true))
+    if v < eps(typeof(v))
         @warn "NMSE: target variance is near-zero ($v). " *
             "NMSE is undefined for constant targets."
         return NaN
diff --git a/lib/ReservoirComputingBenchmarks/test/runtests.jl b/lib/ReservoirComputingBenchmarks/test/runtests.jl
index 3b473769f..f1e38c90a 100644
--- a/lib/ReservoirComputingBenchmarks/test/runtests.jl
+++ b/lib/ReservoirComputingBenchmarks/test/runtests.jl
@@ -25,7 +25,7 @@ using Random
         @test haskey(result, :total)
         @test haskey(result, :delays)
         @test length(result.delays) == 20
-        @test all(0 .<= result.delays .<= 1.001)
+        @test all(d -> 0 <= d <= 1.001, result.delays)
         @test result.total >= 0
         # A shift-register reservoir should have near-perfect MC for
         # delays up to n_features
@@ -182,7 +182,7 @@ using Random
         xn = ReservoirComputingBenchmarks._normalize(x, 0.0, 0.5)
         @test xn[1] ≈ 0.0
         @test xn[end] ≈ 0.5
-        @test all(0 .<= xn .<= 0.5)
+        @test all(v -> 0 <= v <= 0.5, xn)
 
         # Constant input
         xc = ReservoirComputingBenchmarks._normalize([3.0, 3.0, 3.0], 0.0, 0.5)

From f58437f865fccecd11e3f84e273468799c00b764 Mon Sep 17 00:00:00 2001
From: Saswat Susmoy <tech@saswatsusmoy.in>
Date: Wed, 8 Apr 2026 00:10:42 +0530
Subject: [PATCH 4/9] Make narma() metric parameter accept any callable and
 optimize internals

- Change narma() metric from Symbol to callable with (y_true, y_pred) -> score
- Export nmse, rnmse, mse as public convenience functions
- Remove metric field from narma() return value
- Replace NARMA_COEFFICIENTS Dict with compile-time resolved NamedTuples
- Use mul!/copytri!/cholesky! to eliminate redundant matrix allocations in _ridge_factor
- Add _ridge_solve! with pre-allocated w_buf for zero-alloc hot loop solves
- Use copyto! + views instead of collect/array slicing in MC and IPC
- Replace Dict with Matrix for IPC poly_cache (array indexing vs hash lookup)
- Pre-allocate y_pred buffer in IPC to eliminate per-basis allocations
- Fix rnmse to explicitly handle NaN from zero-variance input
---
 .../src/ReservoirComputingBenchmarks.jl       |  5 +-
 lib/ReservoirComputingBenchmarks/src/ipc.jl   | 52 +++++------
 .../src/memory_capacity.jl                    | 19 ++--
 lib/ReservoirComputingBenchmarks/src/narma.jl | 78 +++++++++-------
 lib/ReservoirComputingBenchmarks/src/utils.jl | 91 ++++++++++---------
 .../test/runtests.jl                          | 20 ++--
 6 files changed, 143 insertions(+), 122 deletions(-)

diff --git a/lib/ReservoirComputingBenchmarks/src/ReservoirComputingBenchmarks.jl b/lib/ReservoirComputingBenchmarks/src/ReservoirComputingBenchmarks.jl
index 4025850d6..e43d03f01 100644
--- a/lib/ReservoirComputingBenchmarks/src/ReservoirComputingBenchmarks.jl
+++ b/lib/ReservoirComputingBenchmarks/src/ReservoirComputingBenchmarks.jl
@@ -1,9 +1,11 @@
 module ReservoirComputingBenchmarks
 
 import LinearAlgebra
-using LinearAlgebra: I, cholesky, Symmetric, mul!
+using LinearAlgebra: I, cholesky, cholesky!, Symmetric, mul!, ldiv!, copytri!
 using Statistics: mean, var, cor
 
+const MetricFunction = Function
+
 include("utils.jl")
 include("memory_capacity.jl")
 include("narma.jl")
@@ -12,5 +14,6 @@ include("ipc.jl")
 export memory_capacity
 export generate_narma, narma
 export ipc
+export nmse, rnmse, mse
 
 end # module
diff --git a/lib/ReservoirComputingBenchmarks/src/ipc.jl b/lib/ReservoirComputingBenchmarks/src/ipc.jl
index bf210bc4e..e06939e3f 100644
--- a/lib/ReservoirComputingBenchmarks/src/ipc.jl
+++ b/lib/ReservoirComputingBenchmarks/src/ipc.jl
@@ -13,7 +13,7 @@ z_l(t) = \prod_i \tilde{P}_{d_i}\!\bigl(u(t - \tau_i)\bigr)
 
 where ``\tilde{P}_n(x) = P_n(x)\sqrt{(2n+1)/2}`` is the normalized Legendre
 polynomial of degree ``n``.  For each basis function, a ridge regression readout
-is trained, and the capacity is the squared correlation between the target and
+is trained, and the capacity is the squared correlation between target and
 prediction.
 
 The total capacity satisfies ``C_{\text{total}} \leq N`` where ``N`` is the
@@ -76,11 +76,11 @@ function ipc(
     mtd = something(max_total_degree, max_degree)
 
     # Precompute normalized Legendre polynomial values for all (degree, delay)
-    poly_cache = Dict{Tuple{Int, Int}, Vector{Float64}}()
+    poly_cache = Matrix{Vector{Float64}}(undef, max_delay, max_degree)
     for delay in 1:max_delay
         for degree in 1:max_degree
             delayed = @view input[(max_delay + 1 - delay):(T - delay)]
-            poly_cache[(delay, degree)] = _normalized_legendre.(degree, delayed)
+            poly_cache[delay, degree] = map(x -> _normalized_legendre(degree, x), delayed)
         end
     end
 
@@ -90,43 +90,43 @@ function ipc(
     # Valid time range (after max_delay to avoid edge effects)
     valid = (max_delay + 1):T
     T_valid = length(valid)
-    X = collect(states[:, valid]')  # (T_valid, n_features)
+    X = Matrix{Float64}(undef, T_valid, size(states, 1))
+    copyto!(X, view(states, :, valid)')
 
     train_idx, test_idx = _train_test_split(T_valid, train_ratio)
-    X_train = X[train_idx, :]
-    X_test = X[test_idx, :]
+    X_train = view(X, train_idx, :)
+    X_test = view(X, test_idx, :)
+    n_test = length(test_idx)
 
-    # Pre-compute Cholesky factorization — reused across all basis functions
     rf = _ridge_factor(X_train; reg = reg)
 
     results = Vector{NamedTuple{(:terms, :degree, :capacity), Tuple{Vector{Tuple{Int, Int}}, Int, Float64}}}()
+    sizehint!(results, length(basis_functions))
     by_degree = Dict{Int, Float64}()
     by_delay = Dict{Int, Float64}()
 
-    # Pre-allocate target buffer — reused via fill!
     target = Vector{Float64}(undef, T_valid)
+    y_pred = Vector{Float64}(undef, n_test)
 
-    for terms in basis_functions
+    @inbounds for terms in basis_functions
         total_degree = sum(d for (_, d) in terms)
 
-        # Compute target: product of normalized Legendre polynomials
         fill!(target, 1.0)
         for (delay, degree) in terms
-            target .*= poly_cache[(delay, degree)]
+            target .*= poly_cache[delay, degree]
         end
 
-        y_train = target[train_idx]
-        y_test = target[test_idx]
+        y_train = view(target, train_idx)
+        y_test = view(target, test_idx)
 
-        w = _ridge_solve(rf, X_train, y_train)
-        y_pred = X_test * w
+        w = _ridge_solve!(rf, X_train, y_train)
+        mul!(y_pred, X_test, w)
 
         cap = _squared_correlation(y_test, y_pred)
 
         push!(results, (terms = terms, degree = total_degree, capacity = cap))
         by_degree[total_degree] = get(by_degree, total_degree, 0.0) + cap
 
-        # Track per-delay capacity for single-variable terms
         if length(terms) == 1
             delay = terms[1][1]
             by_delay[delay] = get(by_delay, delay, 0.0) + cap
@@ -153,14 +153,14 @@ end
 """
     _legendre(n, x)
 
-Evaluate the Legendre polynomial ``P_n(x)`` using the three-term recurrence.
+Evaluate Legendre polynomial ``P_n(x)`` using three-term recurrence.
 """
-function _legendre(n::Int, x::Real)
+@inline function _legendre(n::Int, x::Real)
     @assert n >= 0 "Legendre polynomial degree must be non-negative, got $n"
     n == 0 && return one(float(x))
     n == 1 && return float(x)
     p_prev, p_curr = one(float(x)), float(x)
-    for k in 1:(n - 1)
+    @inbounds for k in 1:(n - 1)
         p_next = ((2k + 1) * x * p_curr - k * p_prev) / (k + 1)
         p_prev, p_curr = p_curr, p_next
     end
@@ -173,7 +173,7 @@ end
 Evaluate the orthonormalized Legendre polynomial
 ``\\tilde{P}_n(x) = P_n(x) \\sqrt{(2n+1)/2}`` on ``[-1,1]``.
 """
-function _normalized_legendre(n::Int, x::Real)
+@inline function _normalized_legendre(n::Int, x::Real)
     return _legendre(n, x) * sqrt((2n + 1) / 2)
 end
 
@@ -191,18 +191,18 @@ function _enumerate_basis_functions(
     basis = Vector{Vector{Tuple{Int, Int}}}()
 
     # Single-variable terms: P_d(u(t - τ))
-    for delay in 1:max_delay
-        for degree in 1:max_degree
+    @inbounds for delay in 1:max_delay
+        @inbounds for degree in 1:max_degree
             push!(basis, [(delay, degree)])
         end
     end
 
     # Two-variable cross-terms: P_{d1}(u(t-τ1)) * P_{d2}(u(t-τ2))
     if cross_terms
-        for d1 in 1:max_delay
-            for d2 in (d1 + 1):max_delay
-                for deg1 in 1:max_degree
-                    for deg2 in 1:max_degree
+        @inbounds for d1 in 1:max_delay
+            @inbounds for d2 in (d1 + 1):max_delay
+                @inbounds for deg1 in 1:max_degree
+                    @inbounds for deg2 in 1:max_degree
                         deg1 + deg2 <= max_total_degree || continue
                         push!(basis, [(d1, deg1), (d2, deg2)])
                     end
diff --git a/lib/ReservoirComputingBenchmarks/src/memory_capacity.jl b/lib/ReservoirComputingBenchmarks/src/memory_capacity.jl
index 135a56417..bf7c6bdb9 100644
--- a/lib/ReservoirComputingBenchmarks/src/memory_capacity.jl
+++ b/lib/ReservoirComputingBenchmarks/src/memory_capacity.jl
@@ -55,26 +55,25 @@ function memory_capacity(
     # Discard first max_delay steps to avoid edge effects from delays
     valid = (max_delay + 1):T
     T_valid = length(valid)
-    X = collect(states[:, valid]')  # (T_valid, n_features)
+    X = Matrix{Float64}(undef, T_valid, size(states, 1))
+    copyto!(X, view(states, :, valid)')
 
     train_idx, test_idx = _train_test_split(T_valid, train_ratio)
 
-    X_train = X[train_idx, :]
-    X_test = X[test_idx, :]
+    X_train = view(X, train_idx, :)
+    X_test = view(X, test_idx, :)
 
-    # Pre-compute Cholesky factorization — reused across all delays
     rf = _ridge_factor(X_train; reg = reg)
 
     delay_capacities = zeros(max_delay)
 
-    for k in 1:max_delay
-        # Target: input shifted back by k steps
-        target = input[valid .- k]
+    @inbounds for k in 1:max_delay
+        target = view(input, valid .- k)
 
-        y_train = target[train_idx]
-        y_test = target[test_idx]
+        y_train = view(target, train_idx)
+        y_test = view(target, test_idx)
 
-        w = _ridge_solve(rf, X_train, y_train)
+        w = _ridge_solve!(rf, X_train, y_train)
         y_pred = X_test * w
 
         delay_capacities[k] = _squared_correlation(y_test, y_pred)
diff --git a/lib/ReservoirComputingBenchmarks/src/narma.jl b/lib/ReservoirComputingBenchmarks/src/narma.jl
index ad869e827..b2bc871e2 100644
--- a/lib/ReservoirComputingBenchmarks/src/narma.jl
+++ b/lib/ReservoirComputingBenchmarks/src/narma.jl
@@ -5,11 +5,17 @@ Standard NARMA coefficients from the literature.
   - NARMA-10: Atiya & Parlos (2000), standard benchmark
   - NARMA-30: Schrauwen et al. (2008)
 """
-const NARMA_COEFFICIENTS = Dict{Int, NamedTuple{(:alpha, :beta, :gamma, :delta), NTuple{4, Float64}}}(
-    2 => (alpha = 0.4, beta = 0.4, gamma = 0.6, delta = 0.1),
-    10 => (alpha = 0.3, beta = 0.05, gamma = 1.5, delta = 0.1),
-    30 => (alpha = 0.2, beta = 0.04, gamma = 1.5, delta = 0.001),
-)
+const _NARMA_2 = (alpha = 0.4, beta = 0.4, gamma = 0.6, delta = 0.1)
+const _NARMA_10 = (alpha = 0.3, beta = 0.05, gamma = 1.5, delta = 0.1)
+const _NARMA_30 = (alpha = 0.2, beta = 0.04, gamma = 1.5, delta = 0.001)
+const _NARMA_PRESETS = (2, 10, 30)
+
+@inline function _narma_coeffs(order::Int)
+    order == 2 && return _NARMA_2
+    order == 10 && return _NARMA_10
+    order == 30 && return _NARMA_30
+    return nothing
+end
 
 @doc raw"""
     generate_narma(input; order=10, alpha=nothing, beta=nothing, gamma=nothing, delta=nothing, normalize=true)
@@ -66,8 +72,8 @@ function generate_narma(
     @assert order >= 2 "NARMA order must be >= 2, got $order"
 
     # Resolve coefficients
-    if haskey(NARMA_COEFFICIENTS, order)
-        c = NARMA_COEFFICIENTS[order]
+    c = _narma_coeffs(order)
+    if c !== nothing
         alpha = something(alpha, c.alpha)
         beta = something(beta, c.beta)
         gamma = something(gamma, c.gamma)
@@ -82,12 +88,12 @@ function generate_narma(
     T = length(input)
     @assert T > order "Input length ($T) must be greater than order ($order)"
 
-    u = normalize ? _normalize(input, 0.0, 0.5) : Float64.(input)
+    u = normalize ? _normalize(input, 0.0, 0.5) : convert(Vector{Float64}, input)
     y = zeros(Float64, T)
 
     if order == 2
         # NARMA-2: y[t] = α y[t-1] + β y[t-1] y[t-2] + γ u[t-1]³ + δ
-        for t in 3:T
+        @inbounds for t in 3:T
             y[t] = alpha * y[t - 1] +
                 beta * y[t - 1] * y[t - 2] +
                 gamma * u[t - 1]^3 +
@@ -96,7 +102,7 @@ function generate_narma(
     else
         # NARMA-N (N >= 3): standard recurrence
         # y[t] = α y[t-1] + β y[t-1] Σ y[(t-N):(t-1)] + γ u[t-N] u[t-1] + δ
-        for t in (order + 1):T
+        @inbounds for t in (order + 1):T
             y[t] = alpha * y[t - 1] +
                 beta * y[t - 1] * sum(@view y[(t - order):(t - 1)]) +
                 gamma * u[t - order] * u[t - 1] +
@@ -114,7 +120,7 @@ function generate_narma(
 end
 
 @doc raw"""
-    narma(input, states; order=10, metric=:nmse, train_ratio=0.8, reg=1.0, washout=nothing, kwargs...)
+    narma(input, states; order=10, metric=nmse, train_ratio=0.8, reg=1.0, washout=nothing, kwargs...)
 
 Evaluate reservoir performance on the NARMA-N task.
 
@@ -129,8 +135,9 @@ from `states` to the target, and computes the error metric on held-out data.
 ## Keyword Arguments
 
   - `order::Int=10`: NARMA system order.
-  - `metric::Symbol=:nmse`: error metric. One of `:nmse` (Normalized MSE),
-    `:rnmse` (Root NMSE), or `:mse`.
+  - `metric=nmse`: error metric function with signature `(y_true, y_pred) -> score`.
+    Built-in options: [`nmse`](@ref), [`rnmse`](@ref), [`mse`](@ref). Can be any
+    `MetricFunction` (callable) with the same signature.
   - `train_ratio::Real=0.8`: fraction of data used for training.
   - `reg::Real=1.0`: ridge regression regularization coefficient.
   - `washout::Union{Int,Nothing}=nothing`: number of initial time steps to
@@ -142,14 +149,29 @@ from `states` to the target, and computes the error metric on held-out data.
 A `NamedTuple` with fields:
 
   - `score::Float64`: the computed error metric.
-  - `metric::Symbol`: which metric was used.
   - `target::Vector{Float64}`: the full NARMA target signal.
+
+## Examples
+
+```julia
+using ReservoirComputingBenchmarks
+
+input = rand(1000)
+states = randn(50, 1000)
+
+# Using built-in metrics
+result = narma(input, states; order=10, metric=nmse)
+
+# Using custom metric
+custom_metric(y_true, y_pred) = mean(abs.(y_true .- y_pred))
+result = narma(input, states; order=10, metric=custom_metric)
+```
 """
 function narma(
         input::AbstractVector,
         states::AbstractMatrix;
         order::Int = 10,
-        metric::Symbol = :nmse,
+        metric::MetricFunction = nmse,
         train_ratio::Real = 0.8,
         reg::Real = 1.0,
         washout::Union{Int, Nothing} = nothing,
@@ -166,25 +188,17 @@ function narma(
     valid = (wo + 1):T
     T_valid = length(valid)
 
-    X = collect(states[:, valid]')  # (T_valid, n_features)
-    y = target[valid]
+    X = Matrix{Float64}(undef, T_valid, size(states, 1))
+    copyto!(X, view(states, :, valid)')
+    y = view(target, valid)
 
     train_idx, test_idx = _train_test_split(T_valid, train_ratio)
 
-    w = _ridge_regression(X[train_idx, :], y[train_idx]; reg = reg)
-    y_pred = X[test_idx, :] * w
-    y_test = y[test_idx]
-
-    score = if metric === :nmse
-        _nmse(y_test, y_pred)
-    elseif metric === :rnmse
-        nmse_val = _nmse(y_test, y_pred)
-        nmse_val < 0 ? 0.0 : sqrt(nmse_val)
-    elseif metric === :mse
-        mean((y_test .- y_pred) .^ 2)
-    else
-        throw(ArgumentError("Unknown metric :$metric. Use :nmse, :rnmse, or :mse."))
-    end
+    w = _ridge_regression(view(X, train_idx, :), view(y, train_idx); reg = reg)
+    y_pred = view(X, test_idx, :) * w
+    y_test = view(y, test_idx)
+
+    score = metric(y_test, y_pred)
 
-    return (score = score, metric = metric, target = target)
+    return (score = score, target = target)
 end
diff --git a/lib/ReservoirComputingBenchmarks/src/utils.jl b/lib/ReservoirComputingBenchmarks/src/utils.jl
index f1d370b68..a63b53164 100644
--- a/lib/ReservoirComputingBenchmarks/src/utils.jl
+++ b/lib/ReservoirComputingBenchmarks/src/utils.jl
@@ -7,31 +7,23 @@ with [`_ridge_solve`](@ref).
 """
 struct _RidgeFactor{T <: Real}
     factor::LinearAlgebra.Cholesky{T, Matrix{T}}
-    Xty_buf::Vector{T}   # scratch buffer for X'y products
+    Xty_buf::Vector{T}
+    w_buf::Vector{T}
 end
 
-"""
-    _ridge_factor(X; reg=1.0)
-
-Pre-compute the Cholesky factorization of `X'X + reg * I`.
-
-- `X`: (T, n_features) design matrix
-- `reg`: regularization coefficient (λ ≥ 0)
-
-Returns a `_RidgeFactor` that can be reused across many targets.
-"""
 function _ridge_factor(X::AbstractMatrix; reg::Real = 1.0)
     @assert reg >= 0 "Regularization coefficient must be non-negative, got $reg"
     T = promote_type(eltype(X), typeof(reg))
     n = size(X, 2)
-    X_T = Matrix{T}(X)
-    G_reg = Matrix{T}(Symmetric(X_T' * X_T))
     reg_T = convert(T, reg)
+    G_reg = Matrix{T}(undef, n, n)
+    X_col = convert(Matrix{T}, X)
+    LinearAlgebra.copytri!(mul!(G_reg, X_col', X_col), 'U')
     @inbounds for i in 1:n
         G_reg[i, i] += reg_T
     end
     F = try
-        cholesky(Symmetric(G_reg))
+        cholesky!(Symmetric(G_reg))
     catch e
         if e isa LinearAlgebra.PosDefException
             throw(
@@ -44,16 +36,17 @@ function _ridge_factor(X::AbstractMatrix; reg::Real = 1.0)
         end
         rethrow(e)
     end
-    buf = Vector{T}(undef, n)
-    return _RidgeFactor(F, buf)
+    Xty_buf = Vector{T}(undef, n)
+    w_buf = Vector{T}(undef, n)
+    return _RidgeFactor(F, Xty_buf, w_buf)
 end
 
-"""
-    _ridge_solve(rf, X, y)
+function _ridge_solve!(rf::_RidgeFactor, X::AbstractMatrix, y::AbstractVector)
+    mul!(rf.Xty_buf, X', y)
+    ldiv!(rf.w_buf, rf.factor, rf.Xty_buf)
+    return rf.w_buf
+end
 
-Solve ridge regression using the pre-computed `_RidgeFactor`.
-Returns weight vector `w` such that `X * w ≈ y`.
-"""
 function _ridge_solve(rf::_RidgeFactor, X::AbstractMatrix, y::AbstractVector)
     mul!(rf.Xty_buf, X', y)
     return rf.factor \ rf.Xty_buf
@@ -77,13 +70,7 @@ function _ridge_regression(X::AbstractMatrix, y::AbstractVector; reg::Real = 1.0
     return _ridge_solve(rf, X, y)
 end
 
-"""
-    _squared_correlation(y_true, y_pred)
-
-Squared Pearson correlation coefficient between `y_true` and `y_pred`.
-Returns 0.0 with a warning if either vector has zero variance.
-"""
-function _squared_correlation(y_true::AbstractVector, y_pred::AbstractVector)
+@inline function _squared_correlation(y_true::AbstractVector, y_pred::AbstractVector)
     r = cor(y_true, y_pred)
     if isnan(r)
         @warn "Squared correlation is NaN (zero-variance input). " *
@@ -93,12 +80,7 @@ function _squared_correlation(y_true::AbstractVector, y_pred::AbstractVector)
     return clamp(r^2, 0.0, 1.0)
 end
 
-"""
-    _nmse(y_true, y_pred)
-
-Normalized Mean Squared Error: `mean((y_true - y_pred).²) / var(y_true)`.
-"""
-function _nmse(y_true::AbstractVector, y_pred::AbstractVector)
+@inline function _nmse(y_true::AbstractVector, y_pred::AbstractVector)
     v = var(y_true)
     if v < eps(typeof(v))
         @warn "NMSE: target variance is near-zero ($v). " *
@@ -121,14 +103,41 @@ function _train_test_split(n::Int, train_ratio::Real)
     return 1:split, (split + 1):n
 end
 
-"""
-    _normalize(x, lo, hi)
-
-Min-max normalize vector `x` to the interval `[lo, hi]`.
-Returns a constant vector of `lo` if `x` is constant.
-"""
-function _normalize(x::AbstractVector, lo::Real, hi::Real)
+@inline function _normalize(x::AbstractVector, lo::Real, hi::Real)
     xmin, xmax = extrema(x)
     xmin == xmax && return fill(convert(eltype(x), lo), length(x))
     return @. (x - xmin) / (xmax - xmin) * (hi - lo) + lo
 end
+
+@doc raw"""
+    nmse(y_true, y_pred)
+
+Normalized Mean Squared Error: ``\\text{mean}((y_\\text{true} - y_\\text{pred})^2) / \\text{var}(y_\\text{true})``.
+
+Returns `NaN` with a warning if `y_true` has zero variance.
+"""
+@inline function nmse(y_true::AbstractVector, y_pred::AbstractVector)
+    _nmse(y_true, y_pred)
+end
+
+@doc raw"""
+    rnmse(y_true, y_pred)
+
+Root Normalized Mean Squared Error: ``\\sqrt{\\text{nmse}(y_\\text{true}, y_\\text{pred})}``.
+
+Returns `0.0` if NMSE is negative (due to numerical issues).
+"""
+@inline function rnmse(y_true::AbstractVector, y_pred::AbstractVector)
+    nmse_val = nmse(y_true, y_pred)
+    isnan(nmse_val) && return NaN
+    nmse_val < 0 ? 0.0 : sqrt(nmse_val)
+end
+
+@doc raw"""
+    mse(y_true, y_pred)
+
+Mean Squared Error: ``\\text{mean}((y_\\text{true} - y_\\text{pred})^2)``.
+"""
+@inline function mse(y_true::AbstractVector, y_pred::AbstractVector)
+    return mean((y_true .- y_pred) .^ 2)
+end
diff --git a/lib/ReservoirComputingBenchmarks/test/runtests.jl b/lib/ReservoirComputingBenchmarks/test/runtests.jl
index f1e38c90a..89fb2a5ff 100644
--- a/lib/ReservoirComputingBenchmarks/test/runtests.jl
+++ b/lib/ReservoirComputingBenchmarks/test/runtests.jl
@@ -1,6 +1,7 @@
 using ReservoirComputingBenchmarks
 using Test
 using Random
+using Statistics
 
 @testset "ReservoirComputingBenchmarks" begin
     rng = Random.MersenneTwister(42)
@@ -78,27 +79,25 @@ using Random
         # Use random states (won't be great, but should run)
         states = randn(rng, n_features, T)
 
-        result = narma(input, states; order = 10, metric = :nmse, reg = 1.0)
+        result = narma(input, states; order = 10, metric = nmse, reg = 1.0)
 
         @test haskey(result, :score)
-        @test haskey(result, :metric)
         @test haskey(result, :target)
-        @test result.metric === :nmse
         @test isfinite(result.score)
         @test result.score >= 0
         @test length(result.target) == T
 
         # Test other metrics
-        result_rnmse = narma(input, states; order = 10, metric = :rnmse, reg = 1.0)
-        @test result_rnmse.metric === :rnmse
+        result_rnmse = narma(input, states; order = 10, metric = rnmse, reg = 1.0)
         @test isfinite(result_rnmse.score)
 
-        result_mse = narma(input, states; order = 10, metric = :mse, reg = 1.0)
-        @test result_mse.metric === :mse
+        result_mse = narma(input, states; order = 10, metric = mse, reg = 1.0)
         @test isfinite(result_mse.score)
 
-        # Invalid metric
-        @test_throws ArgumentError narma(input, states; order = 10, metric = :invalid)
+        # Custom metric
+        custom_metric(y_true, y_pred) = mean(abs.(y_true .- y_pred))
+        result_custom = narma(input, states; order = 10, metric = custom_metric, reg = 1.0)
+        @test isfinite(result_custom.score)
     end
 
     @testset "IPC" begin
@@ -230,9 +229,6 @@ using Random
         # Input shorter than order
         @test_throws AssertionError generate_narma(rand(5); order = 10)
 
-        # Invalid NARMA metric
-        @test_throws ArgumentError narma(input, states; order = 10, metric = :bad)
-
         # Washout out of range
         @test_throws AssertionError narma(input, states; order = 10, washout = -1)
         @test_throws AssertionError narma(input, states; order = 10, washout = 500)

From fad4314b4177816136dff346e22163bb3c6eff36 Mon Sep 17 00:00:00 2001
From: Saswat Susmoy <tech@saswatsusmoy.in>
Date: Wed, 8 Apr 2026 00:17:39 +0530
Subject: [PATCH 5/9] Apply Runic formatting to ReservoirComputingBenchmarks

---
 lib/ReservoirComputingBenchmarks/src/utils.jl | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/lib/ReservoirComputingBenchmarks/src/utils.jl b/lib/ReservoirComputingBenchmarks/src/utils.jl
index a63b53164..265d15eb6 100644
--- a/lib/ReservoirComputingBenchmarks/src/utils.jl
+++ b/lib/ReservoirComputingBenchmarks/src/utils.jl
@@ -117,7 +117,7 @@ Normalized Mean Squared Error: ``\\text{mean}((y_\\text{true} - y_\\text{pred})^
 Returns `NaN` with a warning if `y_true` has zero variance.
 """
 @inline function nmse(y_true::AbstractVector, y_pred::AbstractVector)
-    _nmse(y_true, y_pred)
+    return _nmse(y_true, y_pred)
 end
 
 @doc raw"""
@@ -130,7 +130,7 @@ Returns `0.0` if NMSE is negative (due to numerical issues).
 @inline function rnmse(y_true::AbstractVector, y_pred::AbstractVector)
     nmse_val = nmse(y_true, y_pred)
     isnan(nmse_val) && return NaN
-    nmse_val < 0 ? 0.0 : sqrt(nmse_val)
+    return nmse_val < 0 ? 0.0 : sqrt(nmse_val)
 end
 
 @doc raw"""

From 3b84876cd217a992a62023d83e19db0cf08b5435 Mon Sep 17 00:00:00 2001
From: Saswat Susmoy <tech@saswatsusmoy.in>
Date: Sun, 26 Apr 2026 01:04:15 +0530
Subject: [PATCH 6/9] Add remaining issue #398 benchmarks and model dispatch
 via package extension

Implements the five missing benchmarks from issue #398 (nonlinear
transformation, nonlinear memory, sin approximation, kernel rank,
generalization rank) alongside a package extension that adds
(model, ps, st) dispatch to all eight benchmarks.

The extension activates when ReservoirComputing is loaded, generating
synthetic uniform[-1, 1] driving signals, collecting reservoir states
via collectstates, and forwarding to the array-based methods. The base
library remains zero-dependency.

The model dispatch validates in_dims == 1 with a friendly ArgumentError
and reuses the user-provided initial state across runs for kernel and
generalization rank so the streams are independent.

Tests cover array form, happy path, validation errors, edge cases,
reproducibility, deterministic generalization-rank invariant
(perturbation == 0 -> rank 1), and cross-model dispatch on ES2N and
EuSN. 180/180 pass.
---
 lib/ReservoirComputingBenchmarks/Project.toml |  20 +-
 .../ext/RCBenchmarksReservoirComputingExt.jl  | 326 ++++++++++++++++
 .../src/ReservoirComputingBenchmarks.jl       |   8 +-
 .../src/kernel_rank.jl                        |  87 +++++
 .../src/nonlinear_memory.jl                   |  84 ++++
 .../src/nonlinear_transformation.jl           |  71 ++++
 .../src/sin_approximation.jl                  |  35 ++
 .../test/runtests.jl                          | 368 ++++++++++++++++++
 8 files changed, 994 insertions(+), 5 deletions(-)
 create mode 100644 lib/ReservoirComputingBenchmarks/ext/RCBenchmarksReservoirComputingExt.jl
 create mode 100644 lib/ReservoirComputingBenchmarks/src/kernel_rank.jl
 create mode 100644 lib/ReservoirComputingBenchmarks/src/nonlinear_memory.jl
 create mode 100644 lib/ReservoirComputingBenchmarks/src/nonlinear_transformation.jl
 create mode 100644 lib/ReservoirComputingBenchmarks/src/sin_approximation.jl

diff --git a/lib/ReservoirComputingBenchmarks/Project.toml b/lib/ReservoirComputingBenchmarks/Project.toml
index 3e91e71c4..b4e5bd966 100644
--- a/lib/ReservoirComputingBenchmarks/Project.toml
+++ b/lib/ReservoirComputingBenchmarks/Project.toml
@@ -7,12 +7,24 @@ version = "0.1.0"
 LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
 Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
 
-[extras]
-Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
+[weakdeps]
 Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
+ReservoirComputing = "7c2d2b1e-3dd4-11ea-355a-8f6a8116e294"
 
-[targets]
-test = ["Test", "Random"]
+[extensions]
+RCBenchmarksReservoirComputingExt = ["Random", "ReservoirComputing"]
 
 [compat]
+LinearAlgebra = "1.10"
+Random = "1.10"
+ReservoirComputing = "0.12"
+Statistics = "1.10"
 julia = "1.10"
+
+[extras]
+Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
+ReservoirComputing = "7c2d2b1e-3dd4-11ea-355a-8f6a8116e294"
+Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
+
+[targets]
+test = ["Random", "ReservoirComputing", "Test"]
diff --git a/lib/ReservoirComputingBenchmarks/ext/RCBenchmarksReservoirComputingExt.jl b/lib/ReservoirComputingBenchmarks/ext/RCBenchmarksReservoirComputingExt.jl
new file mode 100644
index 000000000..f435a74c8
--- /dev/null
+++ b/lib/ReservoirComputingBenchmarks/ext/RCBenchmarksReservoirComputingExt.jl
@@ -0,0 +1,326 @@
+module RCBenchmarksReservoirComputingExt
+
+using Random: AbstractRNG, default_rng, randn
+using ReservoirComputing: AbstractReservoirComputer, collectstates
+using ReservoirComputingBenchmarks: ReservoirComputingBenchmarks,
+    memory_capacity, nonlinear_memory,
+    nonlinear_transformation, sin_approximation,
+    narma, ipc,
+    kernel_rank, generalization_rank
+
+# ── Helpers ─────────────────────────────────────────────────────────────────
+
+@inline function _uniform_input(rng::AbstractRNG, T::Integer, lo::Real, hi::Real)
+    @assert T >= 2 "Signal length T must be >= 2, got $T"
+    @assert hi > lo "Require hi > lo, got lo=$lo, hi=$hi"
+    return rand(rng, T) .* (hi - lo) .+ lo
+end
+
+@inline function _resolve_input(
+        rng::AbstractRNG, T::Integer, input::Union{Nothing, AbstractVector}
+    )
+    u = input === nothing ? _uniform_input(rng, T, -1.0, 1.0) : input
+    @assert length(u) >= 2 "input must have at least 2 samples, got $(length(u))"
+    return u
+end
+
+# Best-effort check that the model accepts scalar (in_dims == 1) inputs.
+# Walks `rc.reservoir.cell.in_dims` when available; no-op otherwise so that
+# unconventional model layouts still fall through to `collectstates`'s own
+# shape error.
+@inline function _check_scalar_input(rc::AbstractReservoirComputer)
+    res = getfield(rc, :reservoir)
+    cell = hasproperty(res, :cell) ? getproperty(res, :cell) : nothing
+    cell === nothing && return nothing
+    if hasproperty(cell, :in_dims)
+        in_dims = getproperty(cell, :in_dims)
+        in_dims == 1 || throw(
+            ArgumentError(
+                "ReservoirComputingBenchmarks model dispatch requires a reservoir with " *
+                    "scalar input (in_dims == 1), got in_dims == $in_dims. Provide a model " *
+                    "constructed with `in_dims = 1`, or call the array-based method directly " *
+                    "with your own (n_features, T) state matrix.",
+            ),
+        )
+    end
+    return nothing
+end
+
+@inline function _collect_scalar_states(
+        rc::AbstractReservoirComputer, u::AbstractVector, ps, st
+    )
+    _check_scalar_input(rc)
+    data = reshape(u, 1, length(u))
+    states, _ = collectstates(rc, data, ps, st)
+    return states
+end
+
+# Drive the reservoir with `u`, return the **final** state vector. Each call
+# uses the user-provided initial `st` (with carry=nothing) so successive runs
+# are independent. Skips the per-call scalar-input check (assumed already
+# verified once by the caller).
+@inline function _final_state(
+        rc::AbstractReservoirComputer, u::AbstractVector, ps, st
+    )
+    data = reshape(u, 1, length(u))
+    states, _ = collectstates(rc, data, ps, st)
+    return states[:, end]
+end
+
+# ── Memory Capacity ─────────────────────────────────────────────────────────
+
+@doc raw"""
+    memory_capacity(rc::AbstractReservoirComputer, ps, st;
+                    T=3000, rng=Random.default_rng(),
+                    input=nothing, kwargs...)
+
+Linear memory capacity of a reservoir computing model. Generates a uniform
+``[-1, 1]`` input, drives the model via [`collectstates`](@ref), and dispatches
+to the array-based [`memory_capacity`](@ref).
+
+Remaining keyword arguments (`max_delay`, `train_ratio`, `reg`) are forwarded.
+The reservoir model must accept scalar inputs (`in_dims == 1`).
+"""
+function ReservoirComputingBenchmarks.memory_capacity(
+        rc::AbstractReservoirComputer, ps, st;
+        T::Integer = 3000,
+        rng::AbstractRNG = default_rng(),
+        input::Union{Nothing, AbstractVector} = nothing,
+        kwargs...,
+    )
+    u = _resolve_input(rng, T, input)
+    states = _collect_scalar_states(rc, u, ps, st)
+    return memory_capacity(u, states; kwargs...)
+end
+
+# ── Nonlinear Memory ────────────────────────────────────────────────────────
+
+@doc raw"""
+    nonlinear_memory(rc::AbstractReservoirComputer, ps, st;
+                     T=3000, rng=Random.default_rng(),
+                     input=nothing, kwargs...)
+
+Nonlinear memory capacity of a reservoir computing model. Generates a uniform
+``[-1, 1]`` input, drives the model via [`collectstates`](@ref), and dispatches
+to the array-based [`nonlinear_memory`](@ref).
+
+Remaining keyword arguments (`f`, `max_delay`, `train_ratio`, `reg`) are
+forwarded. The reservoir model must accept scalar inputs (`in_dims == 1`).
+"""
+function ReservoirComputingBenchmarks.nonlinear_memory(
+        rc::AbstractReservoirComputer, ps, st;
+        T::Integer = 3000,
+        rng::AbstractRNG = default_rng(),
+        input::Union{Nothing, AbstractVector} = nothing,
+        kwargs...,
+    )
+    u = _resolve_input(rng, T, input)
+    states = _collect_scalar_states(rc, u, ps, st)
+    return nonlinear_memory(u, states; kwargs...)
+end
+
+# ── Nonlinear Transformation ────────────────────────────────────────────────
+
+@doc raw"""
+    nonlinear_transformation(rc::AbstractReservoirComputer, ps, st;
+                             T=3000, rng=Random.default_rng(),
+                             input=nothing, kwargs...)
+
+Memoryless nonlinear transformation benchmark on a reservoir computing model.
+Generates a uniform ``[-1, 1]`` input, drives the model via
+[`collectstates`](@ref), and dispatches to the array-based
+[`nonlinear_transformation`](@ref).
+
+Remaining keyword arguments (`f`, `train_ratio`, `reg`, `metric`, `washout`)
+are forwarded. The reservoir model must accept scalar inputs (`in_dims == 1`).
+"""
+function ReservoirComputingBenchmarks.nonlinear_transformation(
+        rc::AbstractReservoirComputer, ps, st;
+        T::Integer = 3000,
+        rng::AbstractRNG = default_rng(),
+        input::Union{Nothing, AbstractVector} = nothing,
+        kwargs...,
+    )
+    u = _resolve_input(rng, T, input)
+    states = _collect_scalar_states(rc, u, ps, st)
+    return nonlinear_transformation(u, states; kwargs...)
+end
+
+# ── Sin Approximation ───────────────────────────────────────────────────────
+
+@doc raw"""
+    sin_approximation(rc::AbstractReservoirComputer, ps, st;
+                      T=3000, rng=Random.default_rng(),
+                      input=nothing, kwargs...)
+
+Sin-approximation benchmark on a reservoir computing model. Generates a
+uniform ``[-1, 1]`` input, drives the model via [`collectstates`](@ref), and
+dispatches to the array-based [`sin_approximation`](@ref).
+
+Remaining keyword arguments (`freq`, `train_ratio`, `reg`, `metric`,
+`washout`) are forwarded. The reservoir model must accept scalar inputs
+(`in_dims == 1`).
+"""
+function ReservoirComputingBenchmarks.sin_approximation(
+        rc::AbstractReservoirComputer, ps, st;
+        T::Integer = 3000,
+        rng::AbstractRNG = default_rng(),
+        input::Union{Nothing, AbstractVector} = nothing,
+        kwargs...,
+    )
+    u = _resolve_input(rng, T, input)
+    states = _collect_scalar_states(rc, u, ps, st)
+    return sin_approximation(u, states; kwargs...)
+end
+
+# ── NARMA ───────────────────────────────────────────────────────────────────
+
+@doc raw"""
+    narma(rc::AbstractReservoirComputer, ps, st;
+          T=3000, rng=Random.default_rng(),
+          input=nothing, kwargs...)
+
+Evaluate a reservoir computing model on the NARMA-N task. Generates a uniform
+``[-1, 1]`` input, drives the model via [`collectstates`](@ref), and dispatches
+to the array-based [`narma`](@ref).
+
+Remaining keyword arguments (`order`, `metric`, `train_ratio`, `reg`,
+`washout`, NARMA coefficients, ...) are forwarded. The reservoir model must
+accept scalar inputs (`in_dims == 1`).
+"""
+function ReservoirComputingBenchmarks.narma(
+        rc::AbstractReservoirComputer, ps, st;
+        T::Integer = 3000,
+        rng::AbstractRNG = default_rng(),
+        input::Union{Nothing, AbstractVector} = nothing,
+        kwargs...,
+    )
+    u = _resolve_input(rng, T, input)
+    states = _collect_scalar_states(rc, u, ps, st)
+    return narma(u, states; kwargs...)
+end
+
+# ── IPC ─────────────────────────────────────────────────────────────────────
+
+@doc raw"""
+    ipc(rc::AbstractReservoirComputer, ps, st;
+        T=3000, rng=Random.default_rng(),
+        input=nothing, kwargs...)
+
+Information Processing Capacity of a reservoir computing model. Generates a
+uniform ``[-1, 1]`` input, drives the model via [`collectstates`](@ref), and
+dispatches to the array-based [`ipc`](@ref).
+
+Remaining keyword arguments (`max_delay`, `max_degree`, `max_total_degree`,
+`cross_terms`, `train_ratio`, `reg`) are forwarded. The reservoir model must
+accept scalar inputs (`in_dims == 1`).
+"""
+function ReservoirComputingBenchmarks.ipc(
+        rc::AbstractReservoirComputer, ps, st;
+        T::Integer = 3000,
+        rng::AbstractRNG = default_rng(),
+        input::Union{Nothing, AbstractVector} = nothing,
+        kwargs...,
+    )
+    u = _resolve_input(rng, T, input)
+    states = _collect_scalar_states(rc, u, ps, st)
+    return ipc(u, states; kwargs...)
+end
+
+# ── Kernel Rank ─────────────────────────────────────────────────────────────
+
+# Drive `n_streams` independent runs and stack the final states column-wise.
+function _collect_final_states(
+        rc::AbstractReservoirComputer, ps, st,
+        streams::Function, n_streams::Integer
+    )
+    @assert n_streams >= 1 "n_streams must be >= 1, got $n_streams"
+    _check_scalar_input(rc)
+    final_states = nothing
+    n_features = -1
+    @inbounds for i in 1:n_streams
+        u = streams(i)
+        x = _final_state(rc, u, ps, st)
+        if final_states === nothing
+            n_features = length(x)
+            final_states = Matrix{eltype(x)}(undef, n_features, n_streams)
+        end
+        @assert length(x) == n_features "Final state size changed across runs ($(length(x)) vs $n_features)"
+        final_states[:, i] .= x
+    end
+    return final_states
+end
+
+@doc raw"""
+    kernel_rank(rc::AbstractReservoirComputer, ps, st;
+                n_streams=500, stream_length=100,
+                rng=Random.default_rng(), threshold=0.01)
+
+Drive the reservoir with `n_streams` independent uniform ``[-1, 1]`` input
+streams of length `stream_length`, collect the final state of each run, and
+return the numerical rank of the resulting `(n_features, n_streams)` matrix.
+
+The user-provided initial `st` is reused for each run, ensuring that every
+stream starts from the same fresh carry (`nothing`), so the runs are
+independent.
+
+Remaining keyword arguments (`threshold`) are forwarded to the array-based
+[`kernel_rank`](@ref). The reservoir model must accept scalar inputs
+(`in_dims == 1`).
+"""
+function ReservoirComputingBenchmarks.kernel_rank(
+        rc::AbstractReservoirComputer, ps, st;
+        n_streams::Integer = 500,
+        stream_length::Integer = 100,
+        rng::AbstractRNG = default_rng(),
+        threshold::Real = 0.01,
+    )
+    @assert stream_length >= 2 "stream_length must be >= 2, got $stream_length"
+    streams = _ -> _uniform_input(rng, stream_length, -1.0, 1.0)
+    M = _collect_final_states(rc, ps, st, streams, n_streams)
+    return kernel_rank(M; threshold = threshold)
+end
+
+# ── Generalization Rank ─────────────────────────────────────────────────────
+
+@doc raw"""
+    generalization_rank(rc::AbstractReservoirComputer, ps, st;
+                        n_streams=500, stream_length=100,
+                        perturbation=0.01, base_input=nothing,
+                        rng=Random.default_rng(), threshold=0.01)
+
+Drive the reservoir with `n_streams` slightly perturbed copies of a common
+base input stream of length `stream_length`, collect the final state of each
+run, and return the numerical rank of the resulting matrix.
+
+A *lower* generalization rank means the reservoir collapses similar inputs to
+similar states (good generalization). The base stream is sampled uniformly in
+``[-1, 1]`` unless supplied via `base_input`; perturbations are i.i.d. Gaussian
+noise of standard deviation `perturbation`.
+
+Remaining keyword arguments (`threshold`) are forwarded to the array-based
+[`generalization_rank`](@ref). The reservoir model must accept scalar inputs
+(`in_dims == 1`).
+"""
+function ReservoirComputingBenchmarks.generalization_rank(
+        rc::AbstractReservoirComputer, ps, st;
+        n_streams::Integer = 500,
+        stream_length::Integer = 100,
+        perturbation::Real = 0.01,
+        base_input::Union{Nothing, AbstractVector} = nothing,
+        rng::AbstractRNG = default_rng(),
+        threshold::Real = 0.01,
+    )
+    @assert stream_length >= 2 "stream_length must be >= 2, got $stream_length"
+    @assert perturbation >= 0 "perturbation must be >= 0, got $perturbation"
+    base = base_input === nothing ?
+        _uniform_input(rng, stream_length, -1.0, 1.0) : base_input
+    @assert length(base) == stream_length (
+        "base_input length ($(length(base))) must equal stream_length ($stream_length)"
+    )
+    streams = _ -> base .+ perturbation .* randn(rng, stream_length)
+    M = _collect_final_states(rc, ps, st, streams, n_streams)
+    return generalization_rank(M; threshold = threshold)
+end
+
+end # module
diff --git a/lib/ReservoirComputingBenchmarks/src/ReservoirComputingBenchmarks.jl b/lib/ReservoirComputingBenchmarks/src/ReservoirComputingBenchmarks.jl
index e43d03f01..af1a33fc2 100644
--- a/lib/ReservoirComputingBenchmarks/src/ReservoirComputingBenchmarks.jl
+++ b/lib/ReservoirComputingBenchmarks/src/ReservoirComputingBenchmarks.jl
@@ -8,12 +8,18 @@ const MetricFunction = Function
 
 include("utils.jl")
 include("memory_capacity.jl")
+include("nonlinear_memory.jl")
+include("nonlinear_transformation.jl")
+include("sin_approximation.jl")
 include("narma.jl")
 include("ipc.jl")
+include("kernel_rank.jl")
 
-export memory_capacity
+export memory_capacity, nonlinear_memory
+export nonlinear_transformation, sin_approximation
 export generate_narma, narma
 export ipc
+export kernel_rank, generalization_rank
 export nmse, rnmse, mse
 
 end # module
diff --git a/lib/ReservoirComputingBenchmarks/src/kernel_rank.jl b/lib/ReservoirComputingBenchmarks/src/kernel_rank.jl
new file mode 100644
index 000000000..02dbbf4a7
--- /dev/null
+++ b/lib/ReservoirComputingBenchmarks/src/kernel_rank.jl
@@ -0,0 +1,87 @@
+@doc raw"""
+    _effective_rank(M; threshold=0.01)
+
+Numerical rank of a matrix `M`: the number of singular values strictly greater
+than `threshold * maximum_singular_value`.
+
+The threshold is relative to the largest singular value, making the rank
+estimate scale-invariant. Returns `0` for empty or all-zero matrices.
+"""
+function _effective_rank(M::AbstractMatrix; threshold::Real = 0.01)
+    @assert threshold > 0 "threshold must be positive, got $threshold"
+    isempty(M) && return 0
+    s = LinearAlgebra.svdvals(M)
+    s_max = isempty(s) ? zero(eltype(s)) : maximum(s)
+    s_max <= 0 && return 0
+    return count(>(threshold * s_max), s)
+end
+
+@doc raw"""
+    kernel_rank(states; threshold=0.01)
+
+Compute the **kernel rank** of a reservoir, defined as the numerical rank of a
+matrix whose columns are reservoir final states reached after driving the
+reservoir with `n` distinct random input streams.
+
+A higher kernel rank indicates a richer separation property: the reservoir
+maps different inputs to linearly independent state vectors, which is a
+prerequisite for high computational power.
+
+## Arguments
+
+  - `states::AbstractMatrix`: matrix of size `(n_features, n)` whose columns
+    are final reservoir states from `n` independent driving runs.
+
+## Keyword Arguments
+
+  - `threshold::Real=0.01`: relative threshold on singular values.
+
+## Returns
+
+  - `Int`: numerical rank, in `[0, min(n_features, n)]`.
+
+## References
+
+  - Legenstein, R. & Maass, W. (2007). "Edge of chaos and prediction of
+    computational performance for neural circuit models." *Neural Networks*,
+    20(3), 323–334.
+"""
+function kernel_rank(states::AbstractMatrix; threshold::Real = 0.01)
+    return _effective_rank(states; threshold = threshold)
+end
+
+@doc raw"""
+    generalization_rank(states; threshold=0.01)
+
+Compute the **generalization rank** of a reservoir, defined as the numerical
+rank of a matrix whose columns are reservoir final states reached after
+driving the reservoir with `n` slightly perturbed copies of a common input
+stream.
+
+A *lower* generalization rank indicates that the reservoir collapses similar
+inputs to similar states (good generalization). The difference
+`kernel_rank - generalization_rank` peaks at the *edge of chaos* and predicts
+computational performance (Legenstein & Maass, 2007).
+
+## Arguments
+
+  - `states::AbstractMatrix`: matrix of size `(n_features, n)` whose columns
+    are final reservoir states from `n` perturbed driving runs.
+
+## Keyword Arguments
+
+  - `threshold::Real=0.01`: relative threshold on singular values.
+
+## Returns
+
+  - `Int`: numerical rank, in `[0, min(n_features, n)]`.
+
+## References
+
+  - Legenstein, R. & Maass, W. (2007). "Edge of chaos and prediction of
+    computational performance for neural circuit models." *Neural Networks*,
+    20(3), 323–334.
+"""
+function generalization_rank(states::AbstractMatrix; threshold::Real = 0.01)
+    return _effective_rank(states; threshold = threshold)
+end
diff --git a/lib/ReservoirComputingBenchmarks/src/nonlinear_memory.jl b/lib/ReservoirComputingBenchmarks/src/nonlinear_memory.jl
new file mode 100644
index 000000000..b21ed52e8
--- /dev/null
+++ b/lib/ReservoirComputingBenchmarks/src/nonlinear_memory.jl
@@ -0,0 +1,84 @@
+@doc raw"""
+    nonlinear_memory(input, states; f=x->x^2, max_delay=30, train_ratio=0.8, reg=1.0)
+
+Compute the nonlinear memory capacity of a reservoir following Inubushi &
+Yoshimura (2017). For each delay ``k = 1, \ldots, K``, a ridge regression
+readout is trained to reconstruct ``f(u(t-k))`` from the reservoir state at
+time ``t``. The per-delay capacity is the squared Pearson correlation between
+target and prediction, and the total capacity is the sum.
+
+When ``f(x) = x`` this reduces to the linear [`memory_capacity`](@ref).
+
+```math
+NMC_k = \frac{\text{cov}^2(f(u(t-k)),\, \hat{y}_k(t))}
+              {\text{var}(f(u(t-k)))\;\text{var}(\hat{y}_k(t))}
+```
+
+## Arguments
+
+  - `input::AbstractVector`: input signal of length `T` (typically i.i.d.
+    uniform in ``[-1, 1]``).
+  - `states::AbstractMatrix`: reservoir state matrix of size `(n_features, T)`.
+
+## Keyword Arguments
+
+  - `f`: scalar nonlinearity applied to the delayed input. Default ``x \mapsto x^2``.
+  - `max_delay::Int=30`: maximum delay ``K``.
+  - `train_ratio::Real=0.8`: fraction of valid data used for training.
+  - `reg::Real=1.0`: ridge regression regularization coefficient.
+
+## Returns
+
+A `NamedTuple` with fields:
+
+  - `total::Float64`: total nonlinear memory capacity.
+  - `delays::Vector{Float64}`: per-delay capacities ``NMC_k``.
+
+## References
+
+  - Inubushi, M. & Yoshimura, K. (2017). "Reservoir computing beyond
+    memory-nonlinearity trade-off." *Scientific Reports*, 7, 10199.
+"""
+function nonlinear_memory(
+        input::AbstractVector,
+        states::AbstractMatrix;
+        f = x -> x^2,
+        max_delay::Int = 30,
+        train_ratio::Real = 0.8,
+        reg::Real = 1.0,
+    )
+    T = length(input)
+    @assert size(states, 2) == T "states must have $T columns (time steps), got $(size(states, 2))"
+    @assert max_delay >= 1 "max_delay must be >= 1, got $max_delay"
+    @assert max_delay < T "max_delay ($max_delay) must be less than signal length ($T)"
+
+    valid = (max_delay + 1):T
+    T_valid = length(valid)
+    X = Matrix{Float64}(undef, T_valid, size(states, 1))
+    copyto!(X, view(states, :, valid)')
+
+    train_idx, test_idx = _train_test_split(T_valid, train_ratio)
+    X_train = view(X, train_idx, :)
+    X_test = view(X, test_idx, :)
+
+    rf = _ridge_factor(X_train; reg = reg)
+
+    delay_capacities = zeros(max_delay)
+    target = Vector{Float64}(undef, T_valid)
+
+    @inbounds for k in 1:max_delay
+        for (i, t) in enumerate(valid)
+            target[i] = float(f(input[t - k]))
+        end
+
+        y_train = view(target, train_idx)
+        y_test = view(target, test_idx)
+
+        w = _ridge_solve!(rf, X_train, y_train)
+        y_pred = X_test * w
+
+        delay_capacities[k] = _squared_correlation(y_test, y_pred)
+    end
+
+    return (total = sum(delay_capacities), delays = delay_capacities)
+end
diff --git a/lib/ReservoirComputingBenchmarks/src/nonlinear_transformation.jl b/lib/ReservoirComputingBenchmarks/src/nonlinear_transformation.jl
new file mode 100644
index 000000000..270c4a73f
--- /dev/null
+++ b/lib/ReservoirComputingBenchmarks/src/nonlinear_transformation.jl
@@ -0,0 +1,71 @@
+@doc raw"""
+    nonlinear_transformation(input, states; f=x->sin(π*x), train_ratio=0.8,
+                             reg=1.0, metric=nmse, washout::Integer=0)
+
+Evaluate how well a reservoir reproduces a memoryless nonlinear transformation
+``f(u(t))`` of the current input.
+
+A ridge regression readout is trained from `states` to the target `f.(input)`,
+and the metric is reported on a held-out tail of the trajectory.
+
+## Arguments
+
+  - `input::AbstractVector`: driving input signal of length `T`.
+  - `states::AbstractMatrix`: reservoir state matrix of size `(n_features, T)`.
+
+## Keyword Arguments
+
+  - `f`: scalar nonlinearity applied element-wise to `input`. Default ``\sin(\pi x)``.
+  - `train_ratio::Real=0.8`: fraction of valid data used for training.
+  - `reg::Real=1.0`: ridge regression regularization coefficient.
+  - `metric`: error metric `(y_true, y_pred) -> score`. Built-ins:
+    [`nmse`](@ref), [`rnmse`](@ref), [`mse`](@ref).
+  - `washout::Integer=0`: number of initial time steps discarded before
+    training/testing.
+
+## Returns
+
+A `NamedTuple` with fields:
+
+  - `score::Float64`: error metric on the test split.
+  - `target::Vector{Float64}`: full target signal `f.(input)`.
+
+## References
+
+  - Lukoševičius, M. (2012). "A practical guide to applying echo state networks."
+    *Neural Networks: Tricks of the Trade*, Springer.
+"""
+function nonlinear_transformation(
+        input::AbstractVector,
+        states::AbstractMatrix;
+        f = x -> sin(π * x),
+        train_ratio::Real = 0.8,
+        reg::Real = 1.0,
+        metric = nmse,
+        washout::Integer = 0,
+    )
+    T = length(input)
+    @assert size(states, 2) == T "states must have $T columns (time steps), got $(size(states, 2))"
+    @assert 0 <= washout < T "washout must be in [0, $T), got $washout"
+
+    valid = (washout + 1):T
+    T_valid = length(valid)
+    @assert T_valid >= 2 "Not enough samples after washout: $T_valid"
+
+    X = Matrix{Float64}(undef, T_valid, size(states, 1))
+    copyto!(X, view(states, :, valid)')
+
+    target = Vector{Float64}(undef, T)
+    @inbounds for t in 1:T
+        target[t] = float(f(input[t]))
+    end
+
+    train_idx, test_idx = _train_test_split(T_valid, train_ratio)
+
+    y_valid = view(target, valid)
+    w = _ridge_regression(view(X, train_idx, :), view(y_valid, train_idx); reg = reg)
+    y_pred = view(X, test_idx, :) * w
+    score = metric(view(y_valid, test_idx), y_pred)
+
+    return (score = score, target = target)
+end
diff --git a/lib/ReservoirComputingBenchmarks/src/sin_approximation.jl b/lib/ReservoirComputingBenchmarks/src/sin_approximation.jl
new file mode 100644
index 000000000..b607ad645
--- /dev/null
+++ b/lib/ReservoirComputingBenchmarks/src/sin_approximation.jl
@@ -0,0 +1,35 @@
+@doc raw"""
+    sin_approximation(input, states; freq=π, train_ratio=0.8, reg=1.0,
+                      metric=nmse, washout::Integer=0)
+
+Evaluate how well a reservoir approximates ``\sin(\omega\, u(t))``.
+
+This is a memoryless nonlinear task. It is a thin wrapper around
+[`nonlinear_transformation`](@ref) with ``f(x) = \sin(\omega x)``.
+
+## Arguments
+
+  - `input::AbstractVector`: driving input signal of length `T`.
+  - `states::AbstractMatrix`: reservoir state matrix of size `(n_features, T)`.
+
+## Keyword Arguments
+
+  - `freq::Real=π`: angular frequency ``\omega``.
+  - Remaining keyword arguments are forwarded to
+    [`nonlinear_transformation`](@ref).
+
+## Returns
+
+A `NamedTuple` with fields:
+
+  - `score::Float64`: error metric on the test split.
+  - `target::Vector{Float64}`: full target signal `sin(freq * input)`.
+"""
+function sin_approximation(
+        input::AbstractVector,
+        states::AbstractMatrix;
+        freq::Real = π,
+        kwargs...,
+    )
+    return nonlinear_transformation(input, states; f = x -> sin(freq * x), kwargs...)
+end
diff --git a/lib/ReservoirComputingBenchmarks/test/runtests.jl b/lib/ReservoirComputingBenchmarks/test/runtests.jl
index 89fb2a5ff..eaa16afeb 100644
--- a/lib/ReservoirComputingBenchmarks/test/runtests.jl
+++ b/lib/ReservoirComputingBenchmarks/test/runtests.jl
@@ -2,6 +2,7 @@ using ReservoirComputingBenchmarks
 using Test
 using Random
 using Statistics
+using ReservoirComputing
 
 @testset "ReservoirComputingBenchmarks" begin
     rng = Random.MersenneTwister(42)
@@ -141,6 +142,124 @@ using Statistics
         @test length(result_no_cross.basis_capacities) < length(result.basis_capacities)
     end
 
+    @testset "Nonlinear Transformation" begin
+        T = 1000
+        n_features = 80
+        input = rand(rng, T) .* 2 .- 1
+
+        # Reservoir whose first feature is u(t) and rest are nonlinear maps of it
+        states = zeros(n_features, T)
+        for t in 1:T
+            states[1, t] = input[t]
+            states[2, t] = input[t]^2
+            states[3, t] = sin(π * input[t])
+            for i in 4:n_features
+                states[i, t] = tanh(0.7 * input[t] + 0.1 * (i - 3))
+            end
+        end
+
+        # sin(π u) target — a feature exactly matches it, so the readout
+        # should fit it well (small NMSE up to ridge regularisation bias).
+        r = nonlinear_transformation(input, states; f = x -> sin(π * x), reg = 1.0e-6)
+        @test haskey(r, :score)
+        @test haskey(r, :target)
+        @test length(r.target) == T
+        @test isfinite(r.score)
+        @test r.score < 1.0e-3
+
+        # x^2 target — exactly available
+        r2 = nonlinear_transformation(input, states; f = x -> x^2, reg = 1.0e-6)
+        @test r2.score < 1.0e-3
+
+        # custom metric
+        r3 = nonlinear_transformation(input, states; f = sin, metric = mse, reg = 1.0e-4)
+        @test isfinite(r3.score)
+
+        # Validation
+        @test_throws AssertionError nonlinear_transformation(input, randn(rng, n_features, T - 1))
+        @test_throws AssertionError nonlinear_transformation(input, states; washout = T)
+        @test_throws AssertionError nonlinear_transformation(input, states; washout = -1)
+    end
+
+    @testset "Nonlinear Memory" begin
+        T = 2000
+        n_features = 50
+        input = rand(rng, T) .* 2 .- 1
+
+        # Build a reservoir whose features are u(t-k) (linear shift register)
+        states = zeros(n_features, T)
+        for t in 2:T
+            states[1, t] = input[t - 1]
+            for i in 2:n_features
+                states[i, t] = states[i - 1, t - 1]
+            end
+        end
+
+        # Linear memory baseline (should reduce to memory_capacity for f=identity)
+        r_lin = nonlinear_memory(input, states; f = identity, max_delay = 20, reg = 1.0e-6)
+        @test r_lin.delays[1] > 0.9
+        @test r_lin.total > 10
+
+        # Squared nonlinear memory: a linear reservoir cannot recover x^2 perfectly
+        r_sq = nonlinear_memory(input, states; f = x -> x^2, max_delay = 20, reg = 1.0e-6)
+        @test length(r_sq.delays) == 20
+        @test all(d -> 0 <= d <= 1.001, r_sq.delays)
+        @test r_sq.total >= 0
+
+        # Validation
+        @test_throws AssertionError nonlinear_memory(input, states; max_delay = 0)
+        @test_throws AssertionError nonlinear_memory(input, states; max_delay = T)
+        @test_throws AssertionError nonlinear_memory(rand(50), randn(rng, 10, 100))
+    end
+
+    @testset "Sin Approximation" begin
+        T = 800
+        n_features = 30
+        input = rand(rng, T) .* 2 .- 1
+
+        # Reservoir already contains sin(π u(t)) — perfect fit possible
+        states = zeros(n_features, T)
+        for t in 1:T
+            states[1, t] = sin(π * input[t])
+            for i in 2:n_features
+                states[i, t] = tanh(0.5 * input[t] + 0.05 * (i - 2))
+            end
+        end
+
+        r = sin_approximation(input, states; freq = π, reg = 1.0e-6)
+        @test haskey(r, :score)
+        @test r.score < 1.0e-3
+
+        # freq = 1 — non-trivial map; just check finite
+        r2 = sin_approximation(input, states; freq = 1.0)
+        @test isfinite(r2.score)
+    end
+
+    @testset "Kernel / Generalization Rank (array form)" begin
+        # n_features × n full-rank random matrix
+        M_full = randn(rng, 30, 50)
+        @test kernel_rank(M_full) == 30
+        @test generalization_rank(M_full) == 30
+
+        # Rank-1 matrix: numerical rank == 1
+        v = randn(rng, 30)
+        M_rank1 = v * randn(rng, 50)'
+        @test kernel_rank(M_rank1) == 1
+        @test generalization_rank(M_rank1) == 1
+
+        # All-zero matrix → rank 0
+        @test kernel_rank(zeros(30, 50)) == 0
+        @test generalization_rank(zeros(30, 50)) == 0
+
+        # threshold extremes
+        M_mixed = randn(rng, 5, 5)
+        @test kernel_rank(M_mixed; threshold = 1.0e-12) >= kernel_rank(M_mixed; threshold = 0.5)
+
+        # Validation
+        @test_throws AssertionError kernel_rank(M_full; threshold = 0.0)
+        @test_throws AssertionError kernel_rank(M_full; threshold = -0.1)
+    end
+
     @testset "Legendre Polynomials" begin
         leg = ReservoirComputingBenchmarks._legendre
 
@@ -237,4 +356,253 @@ using Statistics
         @test_throws AssertionError ipc(input, states; max_delay = 0, max_degree = 2)
         @test_throws AssertionError ipc(input, states; max_delay = 5, max_degree = 0)
     end
+
+    @testset "ReservoirComputing model dispatch" begin
+        rng_model = Random.MersenneTwister(7)
+        T = 600
+        in_dims, res_dims, out_dims = 1, 40, 1
+        model = ESN(in_dims, res_dims, out_dims, tanh)
+        ps, st = setup(rng_model, model)
+
+        @testset "memory_capacity(model, ps, st)" begin
+            rng_input = Random.MersenneTwister(11)
+            r = memory_capacity(
+                model, ps, st;
+                T = T, rng = rng_input, max_delay = 10, reg = 1.0e-4,
+            )
+            @test haskey(r, :total)
+            @test haskey(r, :delays)
+            @test length(r.delays) == 10
+            @test isfinite(r.total)
+            @test r.total >= 0
+            @test all(d -> 0 <= d <= 1.001, r.delays)
+
+            # Reproducibility: same rng + same model → same result
+            r2 = memory_capacity(
+                model, ps, st;
+                T = T, rng = Random.MersenneTwister(11), max_delay = 10, reg = 1.0e-4,
+            )
+            @test r2.total ≈ r.total
+            @test r2.delays ≈ r.delays
+        end
+
+        @testset "narma(model, ps, st)" begin
+            rng_input = Random.MersenneTwister(13)
+            r = narma(
+                model, ps, st;
+                T = T, rng = rng_input, order = 10, metric = nmse, reg = 1.0e-4,
+            )
+            @test haskey(r, :score)
+            @test haskey(r, :target)
+            @test isfinite(r.score)
+            @test r.score >= 0
+            @test length(r.target) == T
+
+            # User-supplied input bypasses rng
+            user_input = rand(Random.MersenneTwister(17), T) .* 2 .- 1
+            r_user = narma(
+                model, ps, st;
+                input = user_input, order = 10, reg = 1.0e-4,
+            )
+            @test isfinite(r_user.score)
+            @test length(r_user.target) == T
+        end
+
+        @testset "ipc(model, ps, st)" begin
+            rng_input = Random.MersenneTwister(19)
+            r = ipc(
+                model, ps, st;
+                T = T, rng = rng_input,
+                max_delay = 4, max_degree = 2, reg = 1.0e-4,
+            )
+            @test haskey(r, :total)
+            @test haskey(r, :linear)
+            @test haskey(r, :nonlinear)
+            @test haskey(r, :basis_capacities)
+            @test r.theoretical_max == res_dims
+            @test r.total >= 0
+            @test abs(r.total - r.linear - r.nonlinear) < 1.0e-10
+        end
+
+        @testset "nonlinear_transformation(model, ps, st)" begin
+            rng_input = Random.MersenneTwister(23)
+            r = nonlinear_transformation(
+                model, ps, st;
+                T = T, rng = rng_input, f = x -> x^2, reg = 1.0e-4,
+            )
+            @test haskey(r, :score)
+            @test haskey(r, :target)
+            @test isfinite(r.score)
+            @test length(r.target) == T
+        end
+
+        @testset "nonlinear_memory(model, ps, st)" begin
+            rng_input = Random.MersenneTwister(29)
+            r = nonlinear_memory(
+                model, ps, st;
+                T = T, rng = rng_input, f = x -> x^2, max_delay = 8, reg = 1.0e-4,
+            )
+            @test haskey(r, :total)
+            @test haskey(r, :delays)
+            @test length(r.delays) == 8
+            @test all(d -> 0 <= d <= 1.001, r.delays)
+            @test isfinite(r.total)
+        end
+
+        @testset "sin_approximation(model, ps, st)" begin
+            rng_input = Random.MersenneTwister(31)
+            r = sin_approximation(
+                model, ps, st;
+                T = T, rng = rng_input, freq = π, reg = 1.0e-4,
+            )
+            @test haskey(r, :score)
+            @test haskey(r, :target)
+            @test isfinite(r.score)
+            @test length(r.target) == T
+        end
+
+        @testset "kernel_rank(model, ps, st)" begin
+            rng_input = Random.MersenneTwister(37)
+            kr = kernel_rank(
+                model, ps, st;
+                n_streams = 80, stream_length = 60,
+                rng = rng_input, threshold = 0.01,
+            )
+            @test kr isa Integer
+            @test 0 <= kr <= res_dims
+
+            # Reproducibility
+            kr2 = kernel_rank(
+                model, ps, st;
+                n_streams = 80, stream_length = 60,
+                rng = Random.MersenneTwister(37), threshold = 0.01,
+            )
+            @test kr2 == kr
+        end
+
+        @testset "generalization_rank(model, ps, st)" begin
+            rng_input = Random.MersenneTwister(41)
+            gr = generalization_rank(
+                model, ps, st;
+                n_streams = 80, stream_length = 60,
+                perturbation = 1.0e-4,
+                rng = rng_input, threshold = 0.01,
+            )
+            @test gr isa Integer
+            @test 0 <= gr <= res_dims
+
+            # Deterministic invariant: with perturbation == 0, all streams are
+            # the same `base_input`, so all final states are identical and the
+            # final-state matrix has numerical rank exactly 1.
+            gr_zero = generalization_rank(
+                model, ps, st;
+                n_streams = 50, stream_length = 60,
+                perturbation = 0.0,
+                rng = Random.MersenneTwister(43), threshold = 0.01,
+            )
+            @test gr_zero == 1
+        end
+
+        @testset "input length validation" begin
+            @test_throws AssertionError memory_capacity(
+                model, ps, st; T = 1, max_delay = 1,
+            )
+            short = [0.1, 0.2]
+            @test_throws AssertionError memory_capacity(
+                model, ps, st; input = [0.1], max_delay = 1,
+            )
+            # input keyword length controls dispatch length, not T
+            @test_throws AssertionError narma(model, ps, st; input = [0.1])
+        end
+
+        @testset "scalar input (in_dims == 1) check" begin
+            # in_dims = 3 → friendly ArgumentError, not an opaque shape error
+            multi_model = ESN(3, 20, 1, tanh)
+            multi_ps, multi_st = setup(Random.MersenneTwister(5), multi_model)
+            @test_throws ArgumentError memory_capacity(
+                multi_model, multi_ps, multi_st; T = 200, max_delay = 4,
+            )
+            @test_throws ArgumentError narma(
+                multi_model, multi_ps, multi_st; T = 200,
+            )
+            @test_throws ArgumentError ipc(
+                multi_model, multi_ps, multi_st; T = 200, max_delay = 2, max_degree = 2,
+            )
+            @test_throws ArgumentError nonlinear_transformation(
+                multi_model, multi_ps, multi_st; T = 200,
+            )
+            @test_throws ArgumentError nonlinear_memory(
+                multi_model, multi_ps, multi_st; T = 200, max_delay = 4,
+            )
+            @test_throws ArgumentError sin_approximation(
+                multi_model, multi_ps, multi_st; T = 200,
+            )
+            @test_throws ArgumentError kernel_rank(
+                multi_model, multi_ps, multi_st; n_streams = 10, stream_length = 30,
+            )
+            @test_throws ArgumentError generalization_rank(
+                multi_model, multi_ps, multi_st;
+                n_streams = 10, stream_length = 30,
+            )
+        end
+    end
+
+    @testset "Cross-model dispatch ($(nameof(M)))" for M in (ES2N, EuSN)
+        rng_model = Random.MersenneTwister(101)
+        T = 400
+        in_dims, res_dims, out_dims = 1, 25, 1
+        model = M(in_dims, res_dims, out_dims, tanh)
+        ps, st = setup(rng_model, model)
+
+        rng_input = Random.MersenneTwister(103)
+        r_mc = memory_capacity(
+            model, ps, st;
+            T = T, rng = rng_input, max_delay = 5, reg = 1.0e-4,
+        )
+        @test isfinite(r_mc.total)
+        @test length(r_mc.delays) == 5
+
+        r_narma = narma(
+            model, ps, st;
+            T = T, rng = Random.MersenneTwister(105), order = 10, reg = 1.0e-4,
+        )
+        @test isfinite(r_narma.score)
+
+        r_ipc = ipc(
+            model, ps, st;
+            T = T, rng = Random.MersenneTwister(107),
+            max_delay = 3, max_degree = 2, reg = 1.0e-4,
+        )
+        @test r_ipc.theoretical_max == res_dims
+        @test isfinite(r_ipc.total)
+
+        r_nlt = nonlinear_transformation(
+            model, ps, st;
+            T = T, rng = Random.MersenneTwister(109),
+            f = x -> x^2, reg = 1.0e-4,
+        )
+        @test isfinite(r_nlt.score)
+
+        r_nlm = nonlinear_memory(
+            model, ps, st;
+            T = T, rng = Random.MersenneTwister(111),
+            f = x -> x^2, max_delay = 4, reg = 1.0e-4,
+        )
+        @test length(r_nlm.delays) == 4
+
+        kr = kernel_rank(
+            model, ps, st;
+            n_streams = 60, stream_length = 50,
+            rng = Random.MersenneTwister(113),
+        )
+        @test 0 <= kr <= res_dims
+
+        gr_zero = generalization_rank(
+            model, ps, st;
+            n_streams = 30, stream_length = 50,
+            perturbation = 0.0,
+            rng = Random.MersenneTwister(115),
+        )
+        @test gr_zero == 1
+    end
 end

From 09985d258abaa33369b1ddc825e3e34c47ece9db Mon Sep 17 00:00:00 2001
From: Saswat Susmoy <tech@saswatsusmoy.in>
Date: Mon, 18 May 2026 11:47:46 +0530
Subject: [PATCH 7/9] refactor(benchmarks): replace @assert with
 ArgumentError/DimensionMismatch

`@assert` is intended for internal invariants and can be disabled by
`--check-bounds=no`/optimization passes. User-facing input validation
should raise typed exceptions so callers can dispatch on them and so the
errors are not silently optimized away.

  - Argument-value violations (out-of-range scalars, missing kwargs,
    non-positive sizes) now raise `ArgumentError`.
  - Shape/length mismatches now raise `DimensionMismatch`.
  - Tests updated to match the new exception types.

Addresses review feedback on PR #414 (use specific errors instead of
assertions for clearer user-facing failures).
---
 .../ext/RCBenchmarksReservoirComputingExt.jl  | 31 ++++++----
 lib/ReservoirComputingBenchmarks/src/ipc.jl   | 18 ++++--
 .../src/kernel_rank.jl                        |  2 +-
 .../src/memory_capacity.jl                    | 12 +++-
 lib/ReservoirComputingBenchmarks/src/narma.jl | 24 +++++---
 .../src/nonlinear_memory.jl                   | 12 +++-
 .../src/nonlinear_transformation.jl           | 12 +++-
 lib/ReservoirComputingBenchmarks/src/utils.jl | 22 +++++--
 .../test/runtests.jl                          | 58 +++++++++----------
 9 files changed, 124 insertions(+), 67 deletions(-)

diff --git a/lib/ReservoirComputingBenchmarks/ext/RCBenchmarksReservoirComputingExt.jl b/lib/ReservoirComputingBenchmarks/ext/RCBenchmarksReservoirComputingExt.jl
index f435a74c8..ecf2b5821 100644
--- a/lib/ReservoirComputingBenchmarks/ext/RCBenchmarksReservoirComputingExt.jl
+++ b/lib/ReservoirComputingBenchmarks/ext/RCBenchmarksReservoirComputingExt.jl
@@ -11,8 +11,8 @@ using ReservoirComputingBenchmarks: ReservoirComputingBenchmarks,
 # ── Helpers ─────────────────────────────────────────────────────────────────
 
 @inline function _uniform_input(rng::AbstractRNG, T::Integer, lo::Real, hi::Real)
-    @assert T >= 2 "Signal length T must be >= 2, got $T"
-    @assert hi > lo "Require hi > lo, got lo=$lo, hi=$hi"
+    T >= 2 || throw(ArgumentError("Signal length T must be >= 2, got $T"))
+    hi > lo || throw(ArgumentError("Require hi > lo, got lo=$lo, hi=$hi"))
     return rand(rng, T) .* (hi - lo) .+ lo
 end
 
@@ -20,7 +20,8 @@ end
         rng::AbstractRNG, T::Integer, input::Union{Nothing, AbstractVector}
     )
     u = input === nothing ? _uniform_input(rng, T, -1.0, 1.0) : input
-    @assert length(u) >= 2 "input must have at least 2 samples, got $(length(u))"
+    length(u) >= 2 ||
+        throw(ArgumentError("input must have at least 2 samples, got $(length(u))"))
     return u
 end
 
@@ -234,7 +235,8 @@ function _collect_final_states(
         rc::AbstractReservoirComputer, ps, st,
         streams::Function, n_streams::Integer
     )
-    @assert n_streams >= 1 "n_streams must be >= 1, got $n_streams"
+    n_streams >= 1 ||
+        throw(ArgumentError("n_streams must be >= 1, got $n_streams"))
     _check_scalar_input(rc)
     final_states = nothing
     n_features = -1
@@ -245,7 +247,11 @@ function _collect_final_states(
             n_features = length(x)
             final_states = Matrix{eltype(x)}(undef, n_features, n_streams)
         end
-        @assert length(x) == n_features "Final state size changed across runs ($(length(x)) vs $n_features)"
+        length(x) == n_features || throw(
+            DimensionMismatch(
+                "Final state size changed across runs ($(length(x)) vs $n_features)",
+            ),
+        )
         final_states[:, i] .= x
     end
     return final_states
@@ -275,7 +281,8 @@ function ReservoirComputingBenchmarks.kernel_rank(
         rng::AbstractRNG = default_rng(),
         threshold::Real = 0.01,
     )
-    @assert stream_length >= 2 "stream_length must be >= 2, got $stream_length"
+    stream_length >= 2 ||
+        throw(ArgumentError("stream_length must be >= 2, got $stream_length"))
     streams = _ -> _uniform_input(rng, stream_length, -1.0, 1.0)
     M = _collect_final_states(rc, ps, st, streams, n_streams)
     return kernel_rank(M; threshold = threshold)
@@ -311,12 +318,16 @@ function ReservoirComputingBenchmarks.generalization_rank(
         rng::AbstractRNG = default_rng(),
         threshold::Real = 0.01,
     )
-    @assert stream_length >= 2 "stream_length must be >= 2, got $stream_length"
-    @assert perturbation >= 0 "perturbation must be >= 0, got $perturbation"
+    stream_length >= 2 ||
+        throw(ArgumentError("stream_length must be >= 2, got $stream_length"))
+    perturbation >= 0 ||
+        throw(ArgumentError("perturbation must be >= 0, got $perturbation"))
     base = base_input === nothing ?
         _uniform_input(rng, stream_length, -1.0, 1.0) : base_input
-    @assert length(base) == stream_length (
-        "base_input length ($(length(base))) must equal stream_length ($stream_length)"
+    length(base) == stream_length || throw(
+        DimensionMismatch(
+            "base_input length ($(length(base))) must equal stream_length ($stream_length)",
+        ),
     )
     streams = _ -> base .+ perturbation .* randn(rng, stream_length)
     M = _collect_final_states(rc, ps, st, streams, n_streams)
diff --git a/lib/ReservoirComputingBenchmarks/src/ipc.jl b/lib/ReservoirComputingBenchmarks/src/ipc.jl
index e06939e3f..98d09e775 100644
--- a/lib/ReservoirComputingBenchmarks/src/ipc.jl
+++ b/lib/ReservoirComputingBenchmarks/src/ipc.jl
@@ -68,10 +68,17 @@ function ipc(
     )
     T = length(input)
     n_features = size(states, 1)
-    @assert size(states, 2) == T "states must have $T columns (time steps), got $(size(states, 2))"
-    @assert max_delay >= 1 "max_delay must be >= 1, got $max_delay"
-    @assert max_degree >= 1 "max_degree must be >= 1, got $max_degree"
-    @assert max_delay < T "max_delay ($max_delay) must be less than signal length ($T)"
+    size(states, 2) == T || throw(
+        DimensionMismatch(
+            "states must have $T columns (time steps), got $(size(states, 2))",
+        ),
+    )
+    max_delay >= 1 || throw(ArgumentError("max_delay must be >= 1, got $max_delay"))
+    max_degree >= 1 ||
+        throw(ArgumentError("max_degree must be >= 1, got $max_degree"))
+    max_delay < T || throw(
+        ArgumentError("max_delay ($max_delay) must be less than signal length ($T)"),
+    )
 
     mtd = something(max_total_degree, max_degree)
 
@@ -156,7 +163,8 @@ end
 Evaluate Legendre polynomial ``P_n(x)`` using three-term recurrence.
 """
 @inline function _legendre(n::Int, x::Real)
-    @assert n >= 0 "Legendre polynomial degree must be non-negative, got $n"
+    n >= 0 ||
+        throw(ArgumentError("Legendre polynomial degree must be non-negative, got $n"))
     n == 0 && return one(float(x))
     n == 1 && return float(x)
     p_prev, p_curr = one(float(x)), float(x)
diff --git a/lib/ReservoirComputingBenchmarks/src/kernel_rank.jl b/lib/ReservoirComputingBenchmarks/src/kernel_rank.jl
index 02dbbf4a7..7ed9c8ede 100644
--- a/lib/ReservoirComputingBenchmarks/src/kernel_rank.jl
+++ b/lib/ReservoirComputingBenchmarks/src/kernel_rank.jl
@@ -8,7 +8,7 @@ The threshold is relative to the largest singular value, making the rank
 estimate scale-invariant. Returns `0` for empty or all-zero matrices.
 """
 function _effective_rank(M::AbstractMatrix; threshold::Real = 0.01)
-    @assert threshold > 0 "threshold must be positive, got $threshold"
+    threshold > 0 || throw(ArgumentError("threshold must be positive, got $threshold"))
     isempty(M) && return 0
     s = LinearAlgebra.svdvals(M)
     s_max = isempty(s) ? zero(eltype(s)) : maximum(s)
diff --git a/lib/ReservoirComputingBenchmarks/src/memory_capacity.jl b/lib/ReservoirComputingBenchmarks/src/memory_capacity.jl
index bf7c6bdb9..fe01b3b57 100644
--- a/lib/ReservoirComputingBenchmarks/src/memory_capacity.jl
+++ b/lib/ReservoirComputingBenchmarks/src/memory_capacity.jl
@@ -48,9 +48,15 @@ function memory_capacity(
         reg::Real = 1.0,
     )
     T = length(input)
-    @assert size(states, 2) == T "states must have $T columns (time steps), got $(size(states, 2))"
-    @assert max_delay >= 1 "max_delay must be >= 1, got $max_delay"
-    @assert max_delay < T "max_delay ($max_delay) must be less than signal length ($T)"
+    size(states, 2) == T || throw(
+        DimensionMismatch(
+            "states must have $T columns (time steps), got $(size(states, 2))",
+        ),
+    )
+    max_delay >= 1 || throw(ArgumentError("max_delay must be >= 1, got $max_delay"))
+    max_delay < T || throw(
+        ArgumentError("max_delay ($max_delay) must be less than signal length ($T)"),
+    )
 
     # Discard first max_delay steps to avoid edge effects from delays
     valid = (max_delay + 1):T
diff --git a/lib/ReservoirComputingBenchmarks/src/narma.jl b/lib/ReservoirComputingBenchmarks/src/narma.jl
index b2bc871e2..a306ab51c 100644
--- a/lib/ReservoirComputingBenchmarks/src/narma.jl
+++ b/lib/ReservoirComputingBenchmarks/src/narma.jl
@@ -69,7 +69,7 @@ function generate_narma(
         delta::Union{Real, Nothing} = nothing,
         normalize::Bool = true,
     )
-    @assert order >= 2 "NARMA order must be >= 2, got $order"
+    order >= 2 || throw(ArgumentError("NARMA order must be >= 2, got $order"))
 
     # Resolve coefficients
     c = _narma_coeffs(order)
@@ -78,15 +78,19 @@ function generate_narma(
         beta = something(beta, c.beta)
         gamma = something(gamma, c.gamma)
         delta = something(delta, c.delta)
-    else
-        @assert !isnothing(alpha) && !isnothing(beta) && !isnothing(gamma) && !isnothing(delta) (
-            "No default coefficients for NARMA-$order. " *
-                "Provide alpha, beta, gamma, delta explicitly."
+    elseif isnothing(alpha) || isnothing(beta) || isnothing(gamma) || isnothing(delta)
+        throw(
+            ArgumentError(
+                "No default coefficients for NARMA-$order. " *
+                    "Provide alpha, beta, gamma, delta explicitly.",
+            ),
         )
     end
 
     T = length(input)
-    @assert T > order "Input length ($T) must be greater than order ($order)"
+    T > order || throw(
+        ArgumentError("Input length ($T) must be greater than order ($order)"),
+    )
 
     u = normalize ? _normalize(input, 0.0, 0.5) : convert(Vector{Float64}, input)
     y = zeros(Float64, T)
@@ -178,13 +182,17 @@ function narma(
         kwargs...,
     )
     T = length(input)
-    @assert size(states, 2) == T "states must have $T columns (time steps), got $(size(states, 2))"
+    size(states, 2) == T || throw(
+        DimensionMismatch(
+            "states must have $T columns (time steps), got $(size(states, 2))",
+        ),
+    )
 
     target = generate_narma(input; order = order, kwargs...)
 
     # Skip initial transient where y ≈ 0
     wo = something(washout, order)
-    @assert 0 <= wo < T "washout must be in [0, $T), got $wo"
+    0 <= wo < T || throw(ArgumentError("washout must be in [0, $T), got $wo"))
     valid = (wo + 1):T
     T_valid = length(valid)
 
diff --git a/lib/ReservoirComputingBenchmarks/src/nonlinear_memory.jl b/lib/ReservoirComputingBenchmarks/src/nonlinear_memory.jl
index b21ed52e8..b0fa2821c 100644
--- a/lib/ReservoirComputingBenchmarks/src/nonlinear_memory.jl
+++ b/lib/ReservoirComputingBenchmarks/src/nonlinear_memory.jl
@@ -48,9 +48,15 @@ function nonlinear_memory(
         reg::Real = 1.0,
     )
     T = length(input)
-    @assert size(states, 2) == T "states must have $T columns (time steps), got $(size(states, 2))"
-    @assert max_delay >= 1 "max_delay must be >= 1, got $max_delay"
-    @assert max_delay < T "max_delay ($max_delay) must be less than signal length ($T)"
+    size(states, 2) == T || throw(
+        DimensionMismatch(
+            "states must have $T columns (time steps), got $(size(states, 2))",
+        ),
+    )
+    max_delay >= 1 || throw(ArgumentError("max_delay must be >= 1, got $max_delay"))
+    max_delay < T || throw(
+        ArgumentError("max_delay ($max_delay) must be less than signal length ($T)"),
+    )
 
     valid = (max_delay + 1):T
     T_valid = length(valid)
diff --git a/lib/ReservoirComputingBenchmarks/src/nonlinear_transformation.jl b/lib/ReservoirComputingBenchmarks/src/nonlinear_transformation.jl
index 270c4a73f..a2a8a674d 100644
--- a/lib/ReservoirComputingBenchmarks/src/nonlinear_transformation.jl
+++ b/lib/ReservoirComputingBenchmarks/src/nonlinear_transformation.jl
@@ -45,12 +45,18 @@ function nonlinear_transformation(
         washout::Integer = 0,
     )
     T = length(input)
-    @assert size(states, 2) == T "states must have $T columns (time steps), got $(size(states, 2))"
-    @assert 0 <= washout < T "washout must be in [0, $T), got $washout"
+    size(states, 2) == T || throw(
+        DimensionMismatch(
+            "states must have $T columns (time steps), got $(size(states, 2))",
+        ),
+    )
+    0 <= washout < T ||
+        throw(ArgumentError("washout must be in [0, $T), got $washout"))
 
     valid = (washout + 1):T
     T_valid = length(valid)
-    @assert T_valid >= 2 "Not enough samples after washout: $T_valid"
+    T_valid >= 2 ||
+        throw(ArgumentError("Not enough samples after washout: $T_valid"))
 
     X = Matrix{Float64}(undef, T_valid, size(states, 1))
     copyto!(X, view(states, :, valid)')
diff --git a/lib/ReservoirComputingBenchmarks/src/utils.jl b/lib/ReservoirComputingBenchmarks/src/utils.jl
index 265d15eb6..bcee9af47 100644
--- a/lib/ReservoirComputingBenchmarks/src/utils.jl
+++ b/lib/ReservoirComputingBenchmarks/src/utils.jl
@@ -12,7 +12,8 @@ struct _RidgeFactor{T <: Real}
 end
 
 function _ridge_factor(X::AbstractMatrix; reg::Real = 1.0)
-    @assert reg >= 0 "Regularization coefficient must be non-negative, got $reg"
+    reg >= 0 ||
+        throw(ArgumentError("Regularization coefficient must be non-negative, got $reg"))
     T = promote_type(eltype(X), typeof(reg))
     n = size(X, 2)
     reg_T = convert(T, reg)
@@ -65,7 +66,8 @@ Gram matrix `X'X + λI`.
 Returns weight vector `w` of length `n_features`.
 """
 function _ridge_regression(X::AbstractMatrix, y::AbstractVector; reg::Real = 1.0)
-    @assert reg >= 0 "Regularization coefficient must be non-negative, got $reg"
+    reg >= 0 ||
+        throw(ArgumentError("Regularization coefficient must be non-negative, got $reg"))
     rf = _ridge_factor(X; reg = reg)
     return _ridge_solve(rf, X, y)
 end
@@ -96,10 +98,20 @@ end
 Return `(train_range, test_range)` index ranges for a temporal split.
 """
 function _train_test_split(n::Int, train_ratio::Real)
-    @assert 0 < train_ratio < 1 "train_ratio must be in (0, 1), got $train_ratio"
+    0 < train_ratio < 1 ||
+        throw(ArgumentError("train_ratio must be in (0, 1), got $train_ratio"))
     split = floor(Int, n * train_ratio)
-    @assert split >= 1 "Training set is empty (n=$n, train_ratio=$train_ratio). Provide more data."
-    @assert split < n "Test set is empty (n=$n, train_ratio=$train_ratio). Reduce train_ratio or provide more data."
+    split >= 1 || throw(
+        ArgumentError(
+            "Training set is empty (n=$n, train_ratio=$train_ratio). Provide more data.",
+        ),
+    )
+    split < n || throw(
+        ArgumentError(
+            "Test set is empty (n=$n, train_ratio=$train_ratio). " *
+                "Reduce train_ratio or provide more data.",
+        ),
+    )
     return 1:split, (split + 1):n
 end
 
diff --git a/lib/ReservoirComputingBenchmarks/test/runtests.jl b/lib/ReservoirComputingBenchmarks/test/runtests.jl
index eaa16afeb..414820090 100644
--- a/lib/ReservoirComputingBenchmarks/test/runtests.jl
+++ b/lib/ReservoirComputingBenchmarks/test/runtests.jl
@@ -64,12 +64,12 @@ using ReservoirComputing
         @test all(isfinite.(y30))
 
         # Custom coefficients for non-standard order
-        @test_throws AssertionError generate_narma(input; order = 5)
+        @test_throws ArgumentError generate_narma(input; order = 5)
         y5 = generate_narma(input; order = 5, alpha = 0.3, beta = 0.05, gamma = 1.5, delta = 0.1, normalize = false)
         @test length(y5) == T
 
         # Order must be >= 2
-        @test_throws AssertionError generate_narma(input; order = 1)
+        @test_throws ArgumentError generate_narma(input; order = 1)
     end
 
     @testset "NARMA Evaluation" begin
@@ -176,9 +176,9 @@ using ReservoirComputing
         @test isfinite(r3.score)
 
         # Validation
-        @test_throws AssertionError nonlinear_transformation(input, randn(rng, n_features, T - 1))
-        @test_throws AssertionError nonlinear_transformation(input, states; washout = T)
-        @test_throws AssertionError nonlinear_transformation(input, states; washout = -1)
+        @test_throws DimensionMismatch nonlinear_transformation(input, randn(rng, n_features, T - 1))
+        @test_throws ArgumentError nonlinear_transformation(input, states; washout = T)
+        @test_throws ArgumentError nonlinear_transformation(input, states; washout = -1)
     end
 
     @testset "Nonlinear Memory" begin
@@ -207,9 +207,9 @@ using ReservoirComputing
         @test r_sq.total >= 0
 
         # Validation
-        @test_throws AssertionError nonlinear_memory(input, states; max_delay = 0)
-        @test_throws AssertionError nonlinear_memory(input, states; max_delay = T)
-        @test_throws AssertionError nonlinear_memory(rand(50), randn(rng, 10, 100))
+        @test_throws ArgumentError nonlinear_memory(input, states; max_delay = 0)
+        @test_throws ArgumentError nonlinear_memory(input, states; max_delay = T)
+        @test_throws DimensionMismatch nonlinear_memory(rand(50), randn(rng, 10, 100))
     end
 
     @testset "Sin Approximation" begin
@@ -256,8 +256,8 @@ using ReservoirComputing
         @test kernel_rank(M_mixed; threshold = 1.0e-12) >= kernel_rank(M_mixed; threshold = 0.5)
 
         # Validation
-        @test_throws AssertionError kernel_rank(M_full; threshold = 0.0)
-        @test_throws AssertionError kernel_rank(M_full; threshold = -0.1)
+        @test_throws ArgumentError kernel_rank(M_full; threshold = 0.0)
+        @test_throws ArgumentError kernel_rank(M_full; threshold = -0.1)
     end
 
     @testset "Legendre Polynomials" begin
@@ -279,7 +279,7 @@ using ReservoirComputing
         @test leg(3, 0.5) ≈ (5 * 0.125 - 1.5) / 2
 
         # Negative degree should error
-        @test_throws AssertionError leg(-1, 0.5)
+        @test_throws ArgumentError leg(-1, 0.5)
 
         # Orthonormality: ∫₋₁¹ P̃_n(x) P̃_m(x) dx ≈ δ_{nm}
         nleg = ReservoirComputingBenchmarks._normalized_legendre
@@ -315,13 +315,13 @@ using ReservoirComputing
         @test ReservoirComputingBenchmarks._nmse(a, a) ≈ 0.0
 
         # _train_test_split: invalid ratios
-        @test_throws AssertionError ReservoirComputingBenchmarks._train_test_split(100, 0.0)
-        @test_throws AssertionError ReservoirComputingBenchmarks._train_test_split(100, 1.0)
-        @test_throws AssertionError ReservoirComputingBenchmarks._train_test_split(100, -0.5)
-        @test_throws AssertionError ReservoirComputingBenchmarks._train_test_split(100, 1.5)
+        @test_throws ArgumentError ReservoirComputingBenchmarks._train_test_split(100, 0.0)
+        @test_throws ArgumentError ReservoirComputingBenchmarks._train_test_split(100, 1.0)
+        @test_throws ArgumentError ReservoirComputingBenchmarks._train_test_split(100, -0.5)
+        @test_throws ArgumentError ReservoirComputingBenchmarks._train_test_split(100, 1.5)
 
         # _ridge_regression: negative reg
-        @test_throws AssertionError ReservoirComputingBenchmarks._ridge_regression(
+        @test_throws ArgumentError ReservoirComputingBenchmarks._ridge_regression(
             randn(50, 5), randn(50); reg = -1.0
         )
     end
@@ -331,30 +331,30 @@ using ReservoirComputing
         states = randn(rng, 10, 500)
 
         # max_delay must be >= 1
-        @test_throws AssertionError memory_capacity(input, states; max_delay = 0)
+        @test_throws ArgumentError memory_capacity(input, states; max_delay = 0)
 
         # max_delay must be < T
-        @test_throws AssertionError memory_capacity(input, states; max_delay = 500)
+        @test_throws ArgumentError memory_capacity(input, states; max_delay = 500)
 
         # Mismatched dimensions
-        @test_throws AssertionError memory_capacity(rand(100), randn(10, 200))
+        @test_throws DimensionMismatch memory_capacity(rand(100), randn(10, 200))
 
         # NARMA order < 2
-        @test_throws AssertionError generate_narma(rand(100); order = 1)
+        @test_throws ArgumentError generate_narma(rand(100); order = 1)
 
         # Missing NARMA coefficients for non-standard order
-        @test_throws AssertionError generate_narma(rand(100); order = 7)
+        @test_throws ArgumentError generate_narma(rand(100); order = 7)
 
         # Input shorter than order
-        @test_throws AssertionError generate_narma(rand(5); order = 10)
+        @test_throws ArgumentError generate_narma(rand(5); order = 10)
 
         # Washout out of range
-        @test_throws AssertionError narma(input, states; order = 10, washout = -1)
-        @test_throws AssertionError narma(input, states; order = 10, washout = 500)
+        @test_throws ArgumentError narma(input, states; order = 10, washout = -1)
+        @test_throws ArgumentError narma(input, states; order = 10, washout = 500)
 
         # IPC: max_degree/max_delay must be >= 1
-        @test_throws AssertionError ipc(input, states; max_delay = 0, max_degree = 2)
-        @test_throws AssertionError ipc(input, states; max_delay = 5, max_degree = 0)
+        @test_throws ArgumentError ipc(input, states; max_delay = 0, max_degree = 2)
+        @test_throws ArgumentError ipc(input, states; max_delay = 5, max_degree = 0)
     end
 
     @testset "ReservoirComputing model dispatch" begin
@@ -504,15 +504,15 @@ using ReservoirComputing
         end
 
         @testset "input length validation" begin
-            @test_throws AssertionError memory_capacity(
+            @test_throws ArgumentError memory_capacity(
                 model, ps, st; T = 1, max_delay = 1,
             )
             short = [0.1, 0.2]
-            @test_throws AssertionError memory_capacity(
+            @test_throws ArgumentError memory_capacity(
                 model, ps, st; input = [0.1], max_delay = 1,
             )
             # input keyword length controls dispatch length, not T
-            @test_throws AssertionError narma(model, ps, st; input = [0.1])
+            @test_throws ArgumentError narma(model, ps, st; input = [0.1])
         end
 
         @testset "scalar input (in_dims == 1) check" begin

From 2def5e2bb0e6353d4d478e2938064ba3c8849078 Mon Sep 17 00:00:00 2001
From: Saswat Susmoy <tech@saswatsusmoy.in>
Date: Mon, 18 May 2026 11:57:39 +0530
Subject: [PATCH 8/9] refactor(benchmarks): infer element type from inputs
 instead of forcing Float64

Allocations such as `Matrix{Float64}(undef, ...)`, `Vector{Float64}(...)`
and `zeros(...)` ignored the caller's element type and silently promoted
`Float32` (and other) inputs to `Float64`. Each benchmark now derives a
single computational type

    Telt = promote_type(eltype(input), eltype(states), typeof(reg))

and uses it for every internal buffer (design matrix, target buffers,
capacity vectors, IPC dictionaries). The NARMA target generator promotes
across the input and the four recurrence coefficients instead.

Metric and utility helpers (`_squared_correlation`, `_nmse`, `rnmse`,
`_normalize`) were also updated to be type-preserving so that storing
their results back into the new typed buffers never silently downcasts.

Docstrings now document the element-type rule in place of the previous
explicit `Float64` annotation.

Addresses review feedback on PR #414 (don't force `Float64`; infer from
input/states).
---
 lib/ReservoirComputingBenchmarks/src/ipc.jl   | 48 ++++++++++++-------
 .../src/memory_capacity.jl                    | 13 +++--
 lib/ReservoirComputingBenchmarks/src/narma.jl | 26 +++++++---
 .../src/nonlinear_memory.jl                   | 15 +++---
 .../src/nonlinear_transformation.jl           | 13 +++--
 .../src/sin_approximation.jl                  |  5 +-
 lib/ReservoirComputingBenchmarks/src/utils.jl | 22 ++++++---
 7 files changed, 93 insertions(+), 49 deletions(-)

diff --git a/lib/ReservoirComputingBenchmarks/src/ipc.jl b/lib/ReservoirComputingBenchmarks/src/ipc.jl
index 98d09e775..e18953667 100644
--- a/lib/ReservoirComputingBenchmarks/src/ipc.jl
+++ b/lib/ReservoirComputingBenchmarks/src/ipc.jl
@@ -39,17 +39,20 @@ number of reservoir nodes.
 
 A `NamedTuple` with fields:
 
-  - `total::Float64`: total information processing capacity.
-  - `linear::Float64`: capacity from degree-1 (linear memory) terms.
-  - `nonlinear::Float64`: capacity from degree ``\geq 2`` terms.
-  - `by_degree::Dict{Int,Float64}`: capacity aggregated by total polynomial
+  - `total::Real`: total information processing capacity.
+  - `linear::Real`: capacity from degree-1 (linear memory) terms.
+  - `nonlinear::Real`: capacity from degree ``\geq 2`` terms.
+  - `by_degree::Dict{Int,<:Real}`: capacity aggregated by total polynomial
     degree.
-  - `by_delay::Dict{Int,Float64}`: capacity aggregated by delay (single-variable
+  - `by_delay::Dict{Int,<:Real}`: capacity aggregated by delay (single-variable
     terms only).
   - `basis_capacities::Vector{NamedTuple}`: per-basis-function results with
     fields `terms`, `degree`, `capacity`.
   - `theoretical_max::Int`: upper bound ``N`` (number of reservoir features).
 
+Capacity element types are derived from
+`promote_type(eltype(input), eltype(states), typeof(reg))`.
+
 ## References
 
   - Dambre, J., Verstraeten, D., Schrauwen, B. & Massar, S. (2012).
@@ -82,12 +85,16 @@ function ipc(
 
     mtd = something(max_total_degree, max_degree)
 
+    Telt = promote_type(eltype(input), eltype(states), typeof(reg))
+
     # Precompute normalized Legendre polynomial values for all (degree, delay)
-    poly_cache = Matrix{Vector{Float64}}(undef, max_delay, max_degree)
+    poly_cache = Matrix{Vector{Telt}}(undef, max_delay, max_degree)
     for delay in 1:max_delay
         for degree in 1:max_degree
             delayed = @view input[(max_delay + 1 - delay):(T - delay)]
-            poly_cache[delay, degree] = map(x -> _normalized_legendre(degree, x), delayed)
+            poly_cache[delay, degree] = Telt[
+                _normalized_legendre(degree, x) for x in delayed
+            ]
         end
     end
 
@@ -97,7 +104,7 @@ function ipc(
     # Valid time range (after max_delay to avoid edge effects)
     valid = (max_delay + 1):T
     T_valid = length(valid)
-    X = Matrix{Float64}(undef, T_valid, size(states, 1))
+    X = Matrix{Telt}(undef, T_valid, size(states, 1))
     copyto!(X, view(states, :, valid)')
 
     train_idx, test_idx = _train_test_split(T_valid, train_ratio)
@@ -107,18 +114,23 @@ function ipc(
 
     rf = _ridge_factor(X_train; reg = reg)
 
-    results = Vector{NamedTuple{(:terms, :degree, :capacity), Tuple{Vector{Tuple{Int, Int}}, Int, Float64}}}()
+    results = Vector{
+        NamedTuple{
+            (:terms, :degree, :capacity),
+            Tuple{Vector{Tuple{Int, Int}}, Int, Telt},
+        },
+    }()
     sizehint!(results, length(basis_functions))
-    by_degree = Dict{Int, Float64}()
-    by_delay = Dict{Int, Float64}()
+    by_degree = Dict{Int, Telt}()
+    by_delay = Dict{Int, Telt}()
 
-    target = Vector{Float64}(undef, T_valid)
-    y_pred = Vector{Float64}(undef, n_test)
+    target = Vector{Telt}(undef, T_valid)
+    y_pred = Vector{Telt}(undef, n_test)
 
     @inbounds for terms in basis_functions
         total_degree = sum(d for (_, d) in terms)
 
-        fill!(target, 1.0)
+        fill!(target, one(Telt))
         for (delay, degree) in terms
             target .*= poly_cache[delay, degree]
         end
@@ -132,16 +144,16 @@ function ipc(
         cap = _squared_correlation(y_test, y_pred)
 
         push!(results, (terms = terms, degree = total_degree, capacity = cap))
-        by_degree[total_degree] = get(by_degree, total_degree, 0.0) + cap
+        by_degree[total_degree] = get(by_degree, total_degree, zero(Telt)) + cap
 
         if length(terms) == 1
             delay = terms[1][1]
-            by_delay[delay] = get(by_delay, delay, 0.0) + cap
+            by_delay[delay] = get(by_delay, delay, zero(Telt)) + cap
         end
     end
 
-    total_cap = isempty(results) ? 0.0 : sum(r.capacity for r in results)
-    linear_cap = get(by_degree, 1, 0.0)
+    total_cap = isempty(results) ? zero(Telt) : sum(r.capacity for r in results)
+    linear_cap = get(by_degree, 1, zero(Telt))
     nonlinear_cap = total_cap - linear_cap
 
     return (
diff --git a/lib/ReservoirComputingBenchmarks/src/memory_capacity.jl b/lib/ReservoirComputingBenchmarks/src/memory_capacity.jl
index fe01b3b57..3f8ae5ae7 100644
--- a/lib/ReservoirComputingBenchmarks/src/memory_capacity.jl
+++ b/lib/ReservoirComputingBenchmarks/src/memory_capacity.jl
@@ -31,9 +31,10 @@ number of reservoir nodes ``N`` (Jaeger, 2002).
 
 A `NamedTuple` with fields:
 
-  - `total::Float64`: total memory capacity ``MC``.
-  - `delays::Vector{Float64}`: per-delay capacities ``MC_k`` for
-    ``k = 1, \ldots, K``.
+  - `total::Real`: total memory capacity ``MC``.
+  - `delays::Vector{<:Real}`: per-delay capacities ``MC_k`` for
+    ``k = 1, \ldots, K``. The element type is derived from
+    `promote_type(eltype(input), eltype(states), typeof(reg))`.
 
 ## References
 
@@ -58,10 +59,12 @@ function memory_capacity(
         ArgumentError("max_delay ($max_delay) must be less than signal length ($T)"),
     )
 
+    Telt = promote_type(eltype(input), eltype(states), typeof(reg))
+
     # Discard first max_delay steps to avoid edge effects from delays
     valid = (max_delay + 1):T
     T_valid = length(valid)
-    X = Matrix{Float64}(undef, T_valid, size(states, 1))
+    X = Matrix{Telt}(undef, T_valid, size(states, 1))
     copyto!(X, view(states, :, valid)')
 
     train_idx, test_idx = _train_test_split(T_valid, train_ratio)
@@ -71,7 +74,7 @@ function memory_capacity(
 
     rf = _ridge_factor(X_train; reg = reg)
 
-    delay_capacities = zeros(max_delay)
+    delay_capacities = zeros(Telt, max_delay)
 
     @inbounds for k in 1:max_delay
         target = view(input, valid .- k)
diff --git a/lib/ReservoirComputingBenchmarks/src/narma.jl b/lib/ReservoirComputingBenchmarks/src/narma.jl
index a306ab51c..d3dbb810c 100644
--- a/lib/ReservoirComputingBenchmarks/src/narma.jl
+++ b/lib/ReservoirComputingBenchmarks/src/narma.jl
@@ -51,7 +51,8 @@ y(t{+}1) = \alpha\, y(t) + \beta\, y(t) \sum_{i=0}^{N-1} y(t{-}i)
 
 ## Returns
 
-  - `Vector{Float64}`: NARMA target signal of length `T`.
+  - `Vector{<:Real}`: NARMA target signal of length `T`. The element type is
+    `float(promote_type(eltype(input), typeof.((alpha, beta, gamma, delta))...))`.
 
 ## References
 
@@ -92,8 +93,18 @@ function generate_narma(
         ArgumentError("Input length ($T) must be greater than order ($order)"),
     )
 
-    u = normalize ? _normalize(input, 0.0, 0.5) : convert(Vector{Float64}, input)
-    y = zeros(Float64, T)
+    Telt = float(
+        promote_type(
+            eltype(input), typeof(alpha), typeof(beta), typeof(gamma), typeof(delta),
+        ),
+    )
+
+    u = if normalize
+        _normalize(input, zero(Telt), convert(Telt, 1 // 2))
+    else
+        convert(Vector{Telt}, input)
+    end
+    y = zeros(Telt, T)
 
     if order == 2
         # NARMA-2: y[t] = α y[t-1] + β y[t-1] y[t-2] + γ u[t-1]³ + δ
@@ -152,8 +163,9 @@ from `states` to the target, and computes the error metric on held-out data.
 
 A `NamedTuple` with fields:
 
-  - `score::Float64`: the computed error metric.
-  - `target::Vector{Float64}`: the full NARMA target signal.
+  - `score::Real`: the computed error metric.
+  - `target::Vector{<:Real}`: the full NARMA target signal (see
+    [`generate_narma`](@ref) for its element type).
 
 ## Examples
 
@@ -196,7 +208,9 @@ function narma(
     valid = (wo + 1):T
     T_valid = length(valid)
 
-    X = Matrix{Float64}(undef, T_valid, size(states, 1))
+    Telt = promote_type(eltype(target), eltype(states), typeof(reg))
+
+    X = Matrix{Telt}(undef, T_valid, size(states, 1))
     copyto!(X, view(states, :, valid)')
     y = view(target, valid)
 
diff --git a/lib/ReservoirComputingBenchmarks/src/nonlinear_memory.jl b/lib/ReservoirComputingBenchmarks/src/nonlinear_memory.jl
index b0fa2821c..e583fc3d2 100644
--- a/lib/ReservoirComputingBenchmarks/src/nonlinear_memory.jl
+++ b/lib/ReservoirComputingBenchmarks/src/nonlinear_memory.jl
@@ -31,8 +31,9 @@ NMC_k = \frac{\text{cov}^2(f(u(t-k)),\, \hat{y}_k(t))}
 
 A `NamedTuple` with fields:
 
-  - `total::Float64`: total nonlinear memory capacity.
-  - `delays::Vector{Float64}`: per-delay capacities ``NMC_k``.
+  - `total::Real`: total nonlinear memory capacity.
+  - `delays::Vector{<:Real}`: per-delay capacities ``NMC_k``. The element type
+    is derived from `promote_type(eltype(input), eltype(states), typeof(reg))`.
 
 ## References
 
@@ -58,9 +59,11 @@ function nonlinear_memory(
         ArgumentError("max_delay ($max_delay) must be less than signal length ($T)"),
     )
 
+    Telt = promote_type(eltype(input), eltype(states), typeof(reg))
+
     valid = (max_delay + 1):T
     T_valid = length(valid)
-    X = Matrix{Float64}(undef, T_valid, size(states, 1))
+    X = Matrix{Telt}(undef, T_valid, size(states, 1))
     copyto!(X, view(states, :, valid)')
 
     train_idx, test_idx = _train_test_split(T_valid, train_ratio)
@@ -69,12 +72,12 @@ function nonlinear_memory(
 
     rf = _ridge_factor(X_train; reg = reg)
 
-    delay_capacities = zeros(max_delay)
-    target = Vector{Float64}(undef, T_valid)
+    delay_capacities = zeros(Telt, max_delay)
+    target = Vector{Telt}(undef, T_valid)
 
     @inbounds for k in 1:max_delay
         for (i, t) in enumerate(valid)
-            target[i] = float(f(input[t - k]))
+            target[i] = f(input[t - k])
         end
 
         y_train = view(target, train_idx)
diff --git a/lib/ReservoirComputingBenchmarks/src/nonlinear_transformation.jl b/lib/ReservoirComputingBenchmarks/src/nonlinear_transformation.jl
index a2a8a674d..12a017ef7 100644
--- a/lib/ReservoirComputingBenchmarks/src/nonlinear_transformation.jl
+++ b/lib/ReservoirComputingBenchmarks/src/nonlinear_transformation.jl
@@ -27,8 +27,9 @@ and the metric is reported on a held-out tail of the trajectory.
 
 A `NamedTuple` with fields:
 
-  - `score::Float64`: error metric on the test split.
-  - `target::Vector{Float64}`: full target signal `f.(input)`.
+  - `score::Real`: error metric on the test split.
+  - `target::Vector{<:Real}`: full target signal `f.(input)`. The element type
+    is derived from `promote_type(eltype(input), eltype(states), typeof(reg))`.
 
 ## References
 
@@ -58,12 +59,14 @@ function nonlinear_transformation(
     T_valid >= 2 ||
         throw(ArgumentError("Not enough samples after washout: $T_valid"))
 
-    X = Matrix{Float64}(undef, T_valid, size(states, 1))
+    Telt = promote_type(eltype(input), eltype(states), typeof(reg))
+
+    X = Matrix{Telt}(undef, T_valid, size(states, 1))
     copyto!(X, view(states, :, valid)')
 
-    target = Vector{Float64}(undef, T)
+    target = Vector{Telt}(undef, T)
     @inbounds for t in 1:T
-        target[t] = float(f(input[t]))
+        target[t] = f(input[t])
     end
 
     train_idx, test_idx = _train_test_split(T_valid, train_ratio)
diff --git a/lib/ReservoirComputingBenchmarks/src/sin_approximation.jl b/lib/ReservoirComputingBenchmarks/src/sin_approximation.jl
index b607ad645..a124e1876 100644
--- a/lib/ReservoirComputingBenchmarks/src/sin_approximation.jl
+++ b/lib/ReservoirComputingBenchmarks/src/sin_approximation.jl
@@ -22,8 +22,9 @@ This is a memoryless nonlinear task. It is a thin wrapper around
 
 A `NamedTuple` with fields:
 
-  - `score::Float64`: error metric on the test split.
-  - `target::Vector{Float64}`: full target signal `sin(freq * input)`.
+  - `score::Real`: error metric on the test split.
+  - `target::Vector{<:Real}`: full target signal `sin(freq * input)`. See
+    [`nonlinear_transformation`](@ref) for the element-type rule.
 """
 function sin_approximation(
         input::AbstractVector,
diff --git a/lib/ReservoirComputingBenchmarks/src/utils.jl b/lib/ReservoirComputingBenchmarks/src/utils.jl
index bcee9af47..86bbb2074 100644
--- a/lib/ReservoirComputingBenchmarks/src/utils.jl
+++ b/lib/ReservoirComputingBenchmarks/src/utils.jl
@@ -77,9 +77,9 @@ end
     if isnan(r)
         @warn "Squared correlation is NaN (zero-variance input). " *
             "This may indicate a degenerate reservoir or collapsed prediction."
-        return 0.0
+        return zero(r)
     end
-    return clamp(r^2, 0.0, 1.0)
+    return clamp(r^2, zero(r), one(r))
 end
 
 @inline function _nmse(y_true::AbstractVector, y_pred::AbstractVector)
@@ -87,7 +87,7 @@ end
     if v < eps(typeof(v))
         @warn "NMSE: target variance is near-zero ($v). " *
             "NMSE is undefined for constant targets."
-        return NaN
+        return oftype(v, NaN)
     end
     return mean((y_true .- y_pred) .^ 2) / v
 end
@@ -116,9 +116,17 @@ function _train_test_split(n::Int, train_ratio::Real)
 end
 
 @inline function _normalize(x::AbstractVector, lo::Real, hi::Real)
+    Telt = float(promote_type(eltype(x), typeof(lo), typeof(hi)))
     xmin, xmax = extrema(x)
-    xmin == xmax && return fill(convert(eltype(x), lo), length(x))
-    return @. (x - xmin) / (xmax - xmin) * (hi - lo) + lo
+    if xmin == xmax
+        return fill(convert(Telt, lo), length(x))
+    end
+    lo_c = convert(Telt, lo)
+    hi_c = convert(Telt, hi)
+    xmin_c = convert(Telt, xmin)
+    xmax_c = convert(Telt, xmax)
+    scale = (hi_c - lo_c) / (xmax_c - xmin_c)
+    return @. (x - xmin_c) * scale + lo_c
 end
 
 @doc raw"""
@@ -141,8 +149,8 @@ Returns `0.0` if NMSE is negative (due to numerical issues).
 """
 @inline function rnmse(y_true::AbstractVector, y_pred::AbstractVector)
     nmse_val = nmse(y_true, y_pred)
-    isnan(nmse_val) && return NaN
-    return nmse_val < 0 ? 0.0 : sqrt(nmse_val)
+    isnan(nmse_val) && return nmse_val
+    return nmse_val < 0 ? zero(nmse_val) : sqrt(nmse_val)
 end
 
 @doc raw"""

From 4b87049f2bc9195c7266a6c4e61ae93ed65797be Mon Sep 17 00:00:00 2001
From: Saswat Susmoy <tech@saswatsusmoy.in>
Date: Mon, 18 May 2026 12:35:16 +0530
Subject: [PATCH 9/9] refactor(benchmarks): drop MetricFunction alias; accept
 any callable metric

The `metric` keyword previously carried a `::MetricFunction` annotation
(an alias for `Function`). It excluded user-defined callable structs and
provided no real type safety, since `Function` is already the supertype
of every function. Remove the annotation so any object satisfying the
`(y_true, y_pred) -> score` contract is accepted, and delete the now
unused `MetricFunction` constant from the module.

Addresses review feedback on PR #414 (leave the `metric` argument
untyped).
---
 .../src/ReservoirComputingBenchmarks.jl                    | 2 --
 lib/ReservoirComputingBenchmarks/src/narma.jl              | 7 +++----
 2 files changed, 3 insertions(+), 6 deletions(-)

diff --git a/lib/ReservoirComputingBenchmarks/src/ReservoirComputingBenchmarks.jl b/lib/ReservoirComputingBenchmarks/src/ReservoirComputingBenchmarks.jl
index af1a33fc2..236b6af0f 100644
--- a/lib/ReservoirComputingBenchmarks/src/ReservoirComputingBenchmarks.jl
+++ b/lib/ReservoirComputingBenchmarks/src/ReservoirComputingBenchmarks.jl
@@ -4,8 +4,6 @@ import LinearAlgebra
 using LinearAlgebra: I, cholesky, cholesky!, Symmetric, mul!, ldiv!, copytri!
 using Statistics: mean, var, cor
 
-const MetricFunction = Function
-
 include("utils.jl")
 include("memory_capacity.jl")
 include("nonlinear_memory.jl")
diff --git a/lib/ReservoirComputingBenchmarks/src/narma.jl b/lib/ReservoirComputingBenchmarks/src/narma.jl
index d3dbb810c..b900b2f7a 100644
--- a/lib/ReservoirComputingBenchmarks/src/narma.jl
+++ b/lib/ReservoirComputingBenchmarks/src/narma.jl
@@ -150,9 +150,8 @@ from `states` to the target, and computes the error metric on held-out data.
 ## Keyword Arguments
 
   - `order::Int=10`: NARMA system order.
-  - `metric=nmse`: error metric function with signature `(y_true, y_pred) -> score`.
-    Built-in options: [`nmse`](@ref), [`rnmse`](@ref), [`mse`](@ref). Can be any
-    `MetricFunction` (callable) with the same signature.
+  - `metric=nmse`: any callable with the signature `(y_true, y_pred) -> score`.
+    Built-in options: [`nmse`](@ref), [`rnmse`](@ref), [`mse`](@ref).
   - `train_ratio::Real=0.8`: fraction of data used for training.
   - `reg::Real=1.0`: ridge regression regularization coefficient.
   - `washout::Union{Int,Nothing}=nothing`: number of initial time steps to
@@ -187,7 +186,7 @@ function narma(
         input::AbstractVector,
         states::AbstractMatrix;
         order::Int = 10,
-        metric::MetricFunction = nmse,
+        metric = nmse,
         train_ratio::Real = 0.8,
         reg::Real = 1.0,
         washout::Union{Int, Nothing} = nothing,