diff --git a/lib/ReservoirComputingBenchmarks/Project.toml b/lib/ReservoirComputingBenchmarks/Project.toml new file mode 100644 index 000000000..b4e5bd966 --- /dev/null +++ b/lib/ReservoirComputingBenchmarks/Project.toml @@ -0,0 +1,30 @@ +name = "ReservoirComputingBenchmarks" +uuid = "03eff81b-b261-4f82-8cea-c13df0f346ab" +authors = ["Francesco Martinuzzi", "Saswat Susmoy"] +version = "0.1.0" + +[deps] +LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" +Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2" + +[weakdeps] +Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" +ReservoirComputing = "7c2d2b1e-3dd4-11ea-355a-8f6a8116e294" + +[extensions] +RCBenchmarksReservoirComputingExt = ["Random", "ReservoirComputing"] + +[compat] +LinearAlgebra = "1.10" +Random = "1.10" +ReservoirComputing = "0.12" +Statistics = "1.10" +julia = "1.10" + +[extras] +Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" +ReservoirComputing = "7c2d2b1e-3dd4-11ea-355a-8f6a8116e294" +Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" + +[targets] +test = ["Random", "ReservoirComputing", "Test"] diff --git a/lib/ReservoirComputingBenchmarks/ext/RCBenchmarksReservoirComputingExt.jl b/lib/ReservoirComputingBenchmarks/ext/RCBenchmarksReservoirComputingExt.jl new file mode 100644 index 000000000..ecf2b5821 --- /dev/null +++ b/lib/ReservoirComputingBenchmarks/ext/RCBenchmarksReservoirComputingExt.jl @@ -0,0 +1,337 @@ +module RCBenchmarksReservoirComputingExt + +using Random: AbstractRNG, default_rng, randn +using ReservoirComputing: AbstractReservoirComputer, collectstates +using ReservoirComputingBenchmarks: ReservoirComputingBenchmarks, + memory_capacity, nonlinear_memory, + nonlinear_transformation, sin_approximation, + narma, ipc, + kernel_rank, generalization_rank + +# ── Helpers ───────────────────────────────────────────────────────────────── + +@inline function _uniform_input(rng::AbstractRNG, T::Integer, lo::Real, hi::Real) + T >= 2 || throw(ArgumentError("Signal length T must be >= 2, got $T")) + hi > lo || throw(ArgumentError("Require hi > lo, got lo=$lo, hi=$hi")) + return rand(rng, T) .* (hi - lo) .+ lo +end + +@inline function _resolve_input( + rng::AbstractRNG, T::Integer, input::Union{Nothing, AbstractVector} + ) + u = input === nothing ? _uniform_input(rng, T, -1.0, 1.0) : input + length(u) >= 2 || + throw(ArgumentError("input must have at least 2 samples, got $(length(u))")) + return u +end + +# Best-effort check that the model accepts scalar (in_dims == 1) inputs. +# Walks `rc.reservoir.cell.in_dims` when available; no-op otherwise so that +# unconventional model layouts still fall through to `collectstates`'s own +# shape error. +@inline function _check_scalar_input(rc::AbstractReservoirComputer) + res = getfield(rc, :reservoir) + cell = hasproperty(res, :cell) ? getproperty(res, :cell) : nothing + cell === nothing && return nothing + if hasproperty(cell, :in_dims) + in_dims = getproperty(cell, :in_dims) + in_dims == 1 || throw( + ArgumentError( + "ReservoirComputingBenchmarks model dispatch requires a reservoir with " * + "scalar input (in_dims == 1), got in_dims == $in_dims. Provide a model " * + "constructed with `in_dims = 1`, or call the array-based method directly " * + "with your own (n_features, T) state matrix.", + ), + ) + end + return nothing +end + +@inline function _collect_scalar_states( + rc::AbstractReservoirComputer, u::AbstractVector, ps, st + ) + _check_scalar_input(rc) + data = reshape(u, 1, length(u)) + states, _ = collectstates(rc, data, ps, st) + return states +end + +# Drive the reservoir with `u`, return the **final** state vector. Each call +# uses the user-provided initial `st` (with carry=nothing) so successive runs +# are independent. Skips the per-call scalar-input check (assumed already +# verified once by the caller). +@inline function _final_state( + rc::AbstractReservoirComputer, u::AbstractVector, ps, st + ) + data = reshape(u, 1, length(u)) + states, _ = collectstates(rc, data, ps, st) + return states[:, end] +end + +# ── Memory Capacity ───────────────────────────────────────────────────────── + +@doc raw""" + memory_capacity(rc::AbstractReservoirComputer, ps, st; + T=3000, rng=Random.default_rng(), + input=nothing, kwargs...) + +Linear memory capacity of a reservoir computing model. Generates a uniform +``[-1, 1]`` input, drives the model via [`collectstates`](@ref), and dispatches +to the array-based [`memory_capacity`](@ref). + +Remaining keyword arguments (`max_delay`, `train_ratio`, `reg`) are forwarded. +The reservoir model must accept scalar inputs (`in_dims == 1`). +""" +function ReservoirComputingBenchmarks.memory_capacity( + rc::AbstractReservoirComputer, ps, st; + T::Integer = 3000, + rng::AbstractRNG = default_rng(), + input::Union{Nothing, AbstractVector} = nothing, + kwargs..., + ) + u = _resolve_input(rng, T, input) + states = _collect_scalar_states(rc, u, ps, st) + return memory_capacity(u, states; kwargs...) +end + +# ── Nonlinear Memory ──────────────────────────────────────────────────────── + +@doc raw""" + nonlinear_memory(rc::AbstractReservoirComputer, ps, st; + T=3000, rng=Random.default_rng(), + input=nothing, kwargs...) + +Nonlinear memory capacity of a reservoir computing model. Generates a uniform +``[-1, 1]`` input, drives the model via [`collectstates`](@ref), and dispatches +to the array-based [`nonlinear_memory`](@ref). + +Remaining keyword arguments (`f`, `max_delay`, `train_ratio`, `reg`) are +forwarded. The reservoir model must accept scalar inputs (`in_dims == 1`). +""" +function ReservoirComputingBenchmarks.nonlinear_memory( + rc::AbstractReservoirComputer, ps, st; + T::Integer = 3000, + rng::AbstractRNG = default_rng(), + input::Union{Nothing, AbstractVector} = nothing, + kwargs..., + ) + u = _resolve_input(rng, T, input) + states = _collect_scalar_states(rc, u, ps, st) + return nonlinear_memory(u, states; kwargs...) +end + +# ── Nonlinear Transformation ──────────────────────────────────────────────── + +@doc raw""" + nonlinear_transformation(rc::AbstractReservoirComputer, ps, st; + T=3000, rng=Random.default_rng(), + input=nothing, kwargs...) + +Memoryless nonlinear transformation benchmark on a reservoir computing model. +Generates a uniform ``[-1, 1]`` input, drives the model via +[`collectstates`](@ref), and dispatches to the array-based +[`nonlinear_transformation`](@ref). + +Remaining keyword arguments (`f`, `train_ratio`, `reg`, `metric`, `washout`) +are forwarded. The reservoir model must accept scalar inputs (`in_dims == 1`). +""" +function ReservoirComputingBenchmarks.nonlinear_transformation( + rc::AbstractReservoirComputer, ps, st; + T::Integer = 3000, + rng::AbstractRNG = default_rng(), + input::Union{Nothing, AbstractVector} = nothing, + kwargs..., + ) + u = _resolve_input(rng, T, input) + states = _collect_scalar_states(rc, u, ps, st) + return nonlinear_transformation(u, states; kwargs...) +end + +# ── Sin Approximation ─────────────────────────────────────────────────────── + +@doc raw""" + sin_approximation(rc::AbstractReservoirComputer, ps, st; + T=3000, rng=Random.default_rng(), + input=nothing, kwargs...) + +Sin-approximation benchmark on a reservoir computing model. Generates a +uniform ``[-1, 1]`` input, drives the model via [`collectstates`](@ref), and +dispatches to the array-based [`sin_approximation`](@ref). + +Remaining keyword arguments (`freq`, `train_ratio`, `reg`, `metric`, +`washout`) are forwarded. The reservoir model must accept scalar inputs +(`in_dims == 1`). +""" +function ReservoirComputingBenchmarks.sin_approximation( + rc::AbstractReservoirComputer, ps, st; + T::Integer = 3000, + rng::AbstractRNG = default_rng(), + input::Union{Nothing, AbstractVector} = nothing, + kwargs..., + ) + u = _resolve_input(rng, T, input) + states = _collect_scalar_states(rc, u, ps, st) + return sin_approximation(u, states; kwargs...) +end + +# ── NARMA ─────────────────────────────────────────────────────────────────── + +@doc raw""" + narma(rc::AbstractReservoirComputer, ps, st; + T=3000, rng=Random.default_rng(), + input=nothing, kwargs...) + +Evaluate a reservoir computing model on the NARMA-N task. Generates a uniform +``[-1, 1]`` input, drives the model via [`collectstates`](@ref), and dispatches +to the array-based [`narma`](@ref). + +Remaining keyword arguments (`order`, `metric`, `train_ratio`, `reg`, +`washout`, NARMA coefficients, ...) are forwarded. The reservoir model must +accept scalar inputs (`in_dims == 1`). +""" +function ReservoirComputingBenchmarks.narma( + rc::AbstractReservoirComputer, ps, st; + T::Integer = 3000, + rng::AbstractRNG = default_rng(), + input::Union{Nothing, AbstractVector} = nothing, + kwargs..., + ) + u = _resolve_input(rng, T, input) + states = _collect_scalar_states(rc, u, ps, st) + return narma(u, states; kwargs...) +end + +# ── IPC ───────────────────────────────────────────────────────────────────── + +@doc raw""" + ipc(rc::AbstractReservoirComputer, ps, st; + T=3000, rng=Random.default_rng(), + input=nothing, kwargs...) + +Information Processing Capacity of a reservoir computing model. Generates a +uniform ``[-1, 1]`` input, drives the model via [`collectstates`](@ref), and +dispatches to the array-based [`ipc`](@ref). + +Remaining keyword arguments (`max_delay`, `max_degree`, `max_total_degree`, +`cross_terms`, `train_ratio`, `reg`) are forwarded. The reservoir model must +accept scalar inputs (`in_dims == 1`). +""" +function ReservoirComputingBenchmarks.ipc( + rc::AbstractReservoirComputer, ps, st; + T::Integer = 3000, + rng::AbstractRNG = default_rng(), + input::Union{Nothing, AbstractVector} = nothing, + kwargs..., + ) + u = _resolve_input(rng, T, input) + states = _collect_scalar_states(rc, u, ps, st) + return ipc(u, states; kwargs...) +end + +# ── Kernel Rank ───────────────────────────────────────────────────────────── + +# Drive `n_streams` independent runs and stack the final states column-wise. +function _collect_final_states( + rc::AbstractReservoirComputer, ps, st, + streams::Function, n_streams::Integer + ) + n_streams >= 1 || + throw(ArgumentError("n_streams must be >= 1, got $n_streams")) + _check_scalar_input(rc) + final_states = nothing + n_features = -1 + @inbounds for i in 1:n_streams + u = streams(i) + x = _final_state(rc, u, ps, st) + if final_states === nothing + n_features = length(x) + final_states = Matrix{eltype(x)}(undef, n_features, n_streams) + end + length(x) == n_features || throw( + DimensionMismatch( + "Final state size changed across runs ($(length(x)) vs $n_features)", + ), + ) + final_states[:, i] .= x + end + return final_states +end + +@doc raw""" + kernel_rank(rc::AbstractReservoirComputer, ps, st; + n_streams=500, stream_length=100, + rng=Random.default_rng(), threshold=0.01) + +Drive the reservoir with `n_streams` independent uniform ``[-1, 1]`` input +streams of length `stream_length`, collect the final state of each run, and +return the numerical rank of the resulting `(n_features, n_streams)` matrix. + +The user-provided initial `st` is reused for each run, ensuring that every +stream starts from the same fresh carry (`nothing`), so the runs are +independent. + +Remaining keyword arguments (`threshold`) are forwarded to the array-based +[`kernel_rank`](@ref). The reservoir model must accept scalar inputs +(`in_dims == 1`). +""" +function ReservoirComputingBenchmarks.kernel_rank( + rc::AbstractReservoirComputer, ps, st; + n_streams::Integer = 500, + stream_length::Integer = 100, + rng::AbstractRNG = default_rng(), + threshold::Real = 0.01, + ) + stream_length >= 2 || + throw(ArgumentError("stream_length must be >= 2, got $stream_length")) + streams = _ -> _uniform_input(rng, stream_length, -1.0, 1.0) + M = _collect_final_states(rc, ps, st, streams, n_streams) + return kernel_rank(M; threshold = threshold) +end + +# ── Generalization Rank ───────────────────────────────────────────────────── + +@doc raw""" + generalization_rank(rc::AbstractReservoirComputer, ps, st; + n_streams=500, stream_length=100, + perturbation=0.01, base_input=nothing, + rng=Random.default_rng(), threshold=0.01) + +Drive the reservoir with `n_streams` slightly perturbed copies of a common +base input stream of length `stream_length`, collect the final state of each +run, and return the numerical rank of the resulting matrix. + +A *lower* generalization rank means the reservoir collapses similar inputs to +similar states (good generalization). The base stream is sampled uniformly in +``[-1, 1]`` unless supplied via `base_input`; perturbations are i.i.d. Gaussian +noise of standard deviation `perturbation`. + +Remaining keyword arguments (`threshold`) are forwarded to the array-based +[`generalization_rank`](@ref). The reservoir model must accept scalar inputs +(`in_dims == 1`). +""" +function ReservoirComputingBenchmarks.generalization_rank( + rc::AbstractReservoirComputer, ps, st; + n_streams::Integer = 500, + stream_length::Integer = 100, + perturbation::Real = 0.01, + base_input::Union{Nothing, AbstractVector} = nothing, + rng::AbstractRNG = default_rng(), + threshold::Real = 0.01, + ) + stream_length >= 2 || + throw(ArgumentError("stream_length must be >= 2, got $stream_length")) + perturbation >= 0 || + throw(ArgumentError("perturbation must be >= 0, got $perturbation")) + base = base_input === nothing ? + _uniform_input(rng, stream_length, -1.0, 1.0) : base_input + length(base) == stream_length || throw( + DimensionMismatch( + "base_input length ($(length(base))) must equal stream_length ($stream_length)", + ), + ) + streams = _ -> base .+ perturbation .* randn(rng, stream_length) + M = _collect_final_states(rc, ps, st, streams, n_streams) + return generalization_rank(M; threshold = threshold) +end + +end # module diff --git a/lib/ReservoirComputingBenchmarks/src/ReservoirComputingBenchmarks.jl b/lib/ReservoirComputingBenchmarks/src/ReservoirComputingBenchmarks.jl new file mode 100644 index 000000000..236b6af0f --- /dev/null +++ b/lib/ReservoirComputingBenchmarks/src/ReservoirComputingBenchmarks.jl @@ -0,0 +1,23 @@ +module ReservoirComputingBenchmarks + +import LinearAlgebra +using LinearAlgebra: I, cholesky, cholesky!, Symmetric, mul!, ldiv!, copytri! +using Statistics: mean, var, cor + +include("utils.jl") +include("memory_capacity.jl") +include("nonlinear_memory.jl") +include("nonlinear_transformation.jl") +include("sin_approximation.jl") +include("narma.jl") +include("ipc.jl") +include("kernel_rank.jl") + +export memory_capacity, nonlinear_memory +export nonlinear_transformation, sin_approximation +export generate_narma, narma +export ipc +export kernel_rank, generalization_rank +export nmse, rnmse, mse + +end # module diff --git a/lib/ReservoirComputingBenchmarks/src/ipc.jl b/lib/ReservoirComputingBenchmarks/src/ipc.jl new file mode 100644 index 000000000..e18953667 --- /dev/null +++ b/lib/ReservoirComputingBenchmarks/src/ipc.jl @@ -0,0 +1,235 @@ +@doc raw""" + ipc(input, states; max_delay=10, max_degree=3, max_total_degree=nothing, cross_terms=true, train_ratio=0.8, reg=1.0) + +Compute the Information Processing Capacity of a reservoir following +Dambre et al. (2012). + +The IPC decomposes the total computational capacity of a reservoir into +contributions from orthonormal Legendre polynomial basis functions: + +```math +z_l(t) = \prod_i \tilde{P}_{d_i}\!\bigl(u(t - \tau_i)\bigr) +``` + +where ``\tilde{P}_n(x) = P_n(x)\sqrt{(2n+1)/2}`` is the normalized Legendre +polynomial of degree ``n``. For each basis function, a ridge regression readout +is trained, and the capacity is the squared correlation between target and +prediction. + +The total capacity satisfies ``C_{\text{total}} \leq N`` where ``N`` is the +number of reservoir nodes. + +## Arguments + + - `input::AbstractVector`: input signal of length `T` (should be i.i.d. + uniform in ``[-1, 1]``). + - `states::AbstractMatrix`: reservoir state matrix of size `(n_features, T)`. + +## Keyword Arguments + + - `max_delay::Int=10`: maximum delay ``\tau`` for basis functions. + - `max_degree::Int=3`: maximum polynomial degree per variable. + - `max_total_degree::Union{Int,Nothing}=nothing`: maximum sum of degrees + in cross-terms. Defaults to `max_degree`. + - `cross_terms::Bool=true`: include two-variable product basis functions. + - `train_ratio::Real=0.8`: fraction of valid data used for training. + - `reg::Real=1.0`: ridge regression regularization coefficient. + +## Returns + +A `NamedTuple` with fields: + + - `total::Real`: total information processing capacity. + - `linear::Real`: capacity from degree-1 (linear memory) terms. + - `nonlinear::Real`: capacity from degree ``\geq 2`` terms. + - `by_degree::Dict{Int,<:Real}`: capacity aggregated by total polynomial + degree. + - `by_delay::Dict{Int,<:Real}`: capacity aggregated by delay (single-variable + terms only). + - `basis_capacities::Vector{NamedTuple}`: per-basis-function results with + fields `terms`, `degree`, `capacity`. + - `theoretical_max::Int`: upper bound ``N`` (number of reservoir features). + +Capacity element types are derived from +`promote_type(eltype(input), eltype(states), typeof(reg))`. + +## References + + - Dambre, J., Verstraeten, D., Schrauwen, B. & Massar, S. (2012). + "Information processing capacity of dynamical systems." + *Scientific Reports*, 2, 514. +""" +function ipc( + input::AbstractVector, + states::AbstractMatrix; + max_delay::Int = 10, + max_degree::Int = 3, + max_total_degree::Union{Int, Nothing} = nothing, + cross_terms::Bool = true, + train_ratio::Real = 0.8, + reg::Real = 1.0, + ) + T = length(input) + n_features = size(states, 1) + size(states, 2) == T || throw( + DimensionMismatch( + "states must have $T columns (time steps), got $(size(states, 2))", + ), + ) + max_delay >= 1 || throw(ArgumentError("max_delay must be >= 1, got $max_delay")) + max_degree >= 1 || + throw(ArgumentError("max_degree must be >= 1, got $max_degree")) + max_delay < T || throw( + ArgumentError("max_delay ($max_delay) must be less than signal length ($T)"), + ) + + mtd = something(max_total_degree, max_degree) + + Telt = promote_type(eltype(input), eltype(states), typeof(reg)) + + # Precompute normalized Legendre polynomial values for all (degree, delay) + poly_cache = Matrix{Vector{Telt}}(undef, max_delay, max_degree) + for delay in 1:max_delay + for degree in 1:max_degree + delayed = @view input[(max_delay + 1 - delay):(T - delay)] + poly_cache[delay, degree] = Telt[ + _normalized_legendre(degree, x) for x in delayed + ] + end + end + + # Generate basis functions + basis_functions = _enumerate_basis_functions(max_delay, max_degree, mtd, cross_terms) + + # Valid time range (after max_delay to avoid edge effects) + valid = (max_delay + 1):T + T_valid = length(valid) + X = Matrix{Telt}(undef, T_valid, size(states, 1)) + copyto!(X, view(states, :, valid)') + + train_idx, test_idx = _train_test_split(T_valid, train_ratio) + X_train = view(X, train_idx, :) + X_test = view(X, test_idx, :) + n_test = length(test_idx) + + rf = _ridge_factor(X_train; reg = reg) + + results = Vector{ + NamedTuple{ + (:terms, :degree, :capacity), + Tuple{Vector{Tuple{Int, Int}}, Int, Telt}, + }, + }() + sizehint!(results, length(basis_functions)) + by_degree = Dict{Int, Telt}() + by_delay = Dict{Int, Telt}() + + target = Vector{Telt}(undef, T_valid) + y_pred = Vector{Telt}(undef, n_test) + + @inbounds for terms in basis_functions + total_degree = sum(d for (_, d) in terms) + + fill!(target, one(Telt)) + for (delay, degree) in terms + target .*= poly_cache[delay, degree] + end + + y_train = view(target, train_idx) + y_test = view(target, test_idx) + + w = _ridge_solve!(rf, X_train, y_train) + mul!(y_pred, X_test, w) + + cap = _squared_correlation(y_test, y_pred) + + push!(results, (terms = terms, degree = total_degree, capacity = cap)) + by_degree[total_degree] = get(by_degree, total_degree, zero(Telt)) + cap + + if length(terms) == 1 + delay = terms[1][1] + by_delay[delay] = get(by_delay, delay, zero(Telt)) + cap + end + end + + total_cap = isempty(results) ? zero(Telt) : sum(r.capacity for r in results) + linear_cap = get(by_degree, 1, zero(Telt)) + nonlinear_cap = total_cap - linear_cap + + return ( + total = total_cap, + linear = linear_cap, + nonlinear = nonlinear_cap, + by_degree = by_degree, + by_delay = by_delay, + basis_capacities = results, + theoretical_max = n_features, + ) +end + +# ── Legendre polynomials ───────────────────────────────────────────────────── + +""" + _legendre(n, x) + +Evaluate Legendre polynomial ``P_n(x)`` using three-term recurrence. +""" +@inline function _legendre(n::Int, x::Real) + n >= 0 || + throw(ArgumentError("Legendre polynomial degree must be non-negative, got $n")) + n == 0 && return one(float(x)) + n == 1 && return float(x) + p_prev, p_curr = one(float(x)), float(x) + @inbounds for k in 1:(n - 1) + p_next = ((2k + 1) * x * p_curr - k * p_prev) / (k + 1) + p_prev, p_curr = p_curr, p_next + end + return p_curr +end + +""" + _normalized_legendre(n, x) + +Evaluate the orthonormalized Legendre polynomial +``\\tilde{P}_n(x) = P_n(x) \\sqrt{(2n+1)/2}`` on ``[-1,1]``. +""" +@inline function _normalized_legendre(n::Int, x::Real) + return _legendre(n, x) * sqrt((2n + 1) / 2) +end + +# ── Basis function enumeration ─────────────────────────────────────────────── + +""" + _enumerate_basis_functions(max_delay, max_degree, max_total_degree, cross_terms) + +Return a vector of basis function descriptors. Each descriptor is a +`Vector{Tuple{Int,Int}}` of `(delay, degree)` pairs. +""" +function _enumerate_basis_functions( + max_delay::Int, max_degree::Int, max_total_degree::Int, cross_terms::Bool + ) + basis = Vector{Vector{Tuple{Int, Int}}}() + + # Single-variable terms: P_d(u(t - τ)) + @inbounds for delay in 1:max_delay + @inbounds for degree in 1:max_degree + push!(basis, [(delay, degree)]) + end + end + + # Two-variable cross-terms: P_{d1}(u(t-τ1)) * P_{d2}(u(t-τ2)) + if cross_terms + @inbounds for d1 in 1:max_delay + @inbounds for d2 in (d1 + 1):max_delay + @inbounds for deg1 in 1:max_degree + @inbounds for deg2 in 1:max_degree + deg1 + deg2 <= max_total_degree || continue + push!(basis, [(d1, deg1), (d2, deg2)]) + end + end + end + end + end + + return basis +end diff --git a/lib/ReservoirComputingBenchmarks/src/kernel_rank.jl b/lib/ReservoirComputingBenchmarks/src/kernel_rank.jl new file mode 100644 index 000000000..7ed9c8ede --- /dev/null +++ b/lib/ReservoirComputingBenchmarks/src/kernel_rank.jl @@ -0,0 +1,87 @@ +@doc raw""" + _effective_rank(M; threshold=0.01) + +Numerical rank of a matrix `M`: the number of singular values strictly greater +than `threshold * maximum_singular_value`. + +The threshold is relative to the largest singular value, making the rank +estimate scale-invariant. Returns `0` for empty or all-zero matrices. +""" +function _effective_rank(M::AbstractMatrix; threshold::Real = 0.01) + threshold > 0 || throw(ArgumentError("threshold must be positive, got $threshold")) + isempty(M) && return 0 + s = LinearAlgebra.svdvals(M) + s_max = isempty(s) ? zero(eltype(s)) : maximum(s) + s_max <= 0 && return 0 + return count(>(threshold * s_max), s) +end + +@doc raw""" + kernel_rank(states; threshold=0.01) + +Compute the **kernel rank** of a reservoir, defined as the numerical rank of a +matrix whose columns are reservoir final states reached after driving the +reservoir with `n` distinct random input streams. + +A higher kernel rank indicates a richer separation property: the reservoir +maps different inputs to linearly independent state vectors, which is a +prerequisite for high computational power. + +## Arguments + + - `states::AbstractMatrix`: matrix of size `(n_features, n)` whose columns + are final reservoir states from `n` independent driving runs. + +## Keyword Arguments + + - `threshold::Real=0.01`: relative threshold on singular values. + +## Returns + + - `Int`: numerical rank, in `[0, min(n_features, n)]`. + +## References + + - Legenstein, R. & Maass, W. (2007). "Edge of chaos and prediction of + computational performance for neural circuit models." *Neural Networks*, + 20(3), 323–334. +""" +function kernel_rank(states::AbstractMatrix; threshold::Real = 0.01) + return _effective_rank(states; threshold = threshold) +end + +@doc raw""" + generalization_rank(states; threshold=0.01) + +Compute the **generalization rank** of a reservoir, defined as the numerical +rank of a matrix whose columns are reservoir final states reached after +driving the reservoir with `n` slightly perturbed copies of a common input +stream. + +A *lower* generalization rank indicates that the reservoir collapses similar +inputs to similar states (good generalization). The difference +`kernel_rank - generalization_rank` peaks at the *edge of chaos* and predicts +computational performance (Legenstein & Maass, 2007). + +## Arguments + + - `states::AbstractMatrix`: matrix of size `(n_features, n)` whose columns + are final reservoir states from `n` perturbed driving runs. + +## Keyword Arguments + + - `threshold::Real=0.01`: relative threshold on singular values. + +## Returns + + - `Int`: numerical rank, in `[0, min(n_features, n)]`. + +## References + + - Legenstein, R. & Maass, W. (2007). "Edge of chaos and prediction of + computational performance for neural circuit models." *Neural Networks*, + 20(3), 323–334. +""" +function generalization_rank(states::AbstractMatrix; threshold::Real = 0.01) + return _effective_rank(states; threshold = threshold) +end diff --git a/lib/ReservoirComputingBenchmarks/src/memory_capacity.jl b/lib/ReservoirComputingBenchmarks/src/memory_capacity.jl new file mode 100644 index 000000000..3f8ae5ae7 --- /dev/null +++ b/lib/ReservoirComputingBenchmarks/src/memory_capacity.jl @@ -0,0 +1,92 @@ +@doc raw""" + memory_capacity(input, states; max_delay=30, train_ratio=0.8, reg=1.0) + +Compute the linear memory capacity of a reservoir following Jaeger (2002). + +For each delay ``k = 1, \ldots, K``, a ridge regression readout is trained to +reconstruct the input delayed by ``k`` steps. The per-delay capacity is the +squared Pearson correlation between the true delayed input and the prediction: + +```math +MC_k = \frac{\text{cov}^2(u(t-k),\, \hat{u}_k(t))} + {\text{var}(u(t-k))\;\text{var}(\hat{u}_k(t))} +``` + +The total memory capacity is ``MC = \sum_{k=1}^{K} MC_k``, bounded above by the +number of reservoir nodes ``N`` (Jaeger, 2002). + +## Arguments + + - `input::AbstractVector`: input signal of length `T` (typically i.i.d. + uniform in ``[-1, 1]``). + - `states::AbstractMatrix`: reservoir state matrix of size `(n_features, T)`. + +## Keyword Arguments + + - `max_delay::Int=30`: maximum delay ``K`` to evaluate. + - `train_ratio::Real=0.8`: fraction of valid data used for training. + - `reg::Real=1.0`: ridge regression regularization coefficient ``\lambda``. + +## Returns + +A `NamedTuple` with fields: + + - `total::Real`: total memory capacity ``MC``. + - `delays::Vector{<:Real}`: per-delay capacities ``MC_k`` for + ``k = 1, \ldots, K``. The element type is derived from + `promote_type(eltype(input), eltype(states), typeof(reg))`. + +## References + + - Jaeger, H. (2002). "Short term memory in echo state networks." + GMD Report 152. +""" +function memory_capacity( + input::AbstractVector, + states::AbstractMatrix; + max_delay::Int = 30, + train_ratio::Real = 0.8, + reg::Real = 1.0, + ) + T = length(input) + size(states, 2) == T || throw( + DimensionMismatch( + "states must have $T columns (time steps), got $(size(states, 2))", + ), + ) + max_delay >= 1 || throw(ArgumentError("max_delay must be >= 1, got $max_delay")) + max_delay < T || throw( + ArgumentError("max_delay ($max_delay) must be less than signal length ($T)"), + ) + + Telt = promote_type(eltype(input), eltype(states), typeof(reg)) + + # Discard first max_delay steps to avoid edge effects from delays + valid = (max_delay + 1):T + T_valid = length(valid) + X = Matrix{Telt}(undef, T_valid, size(states, 1)) + copyto!(X, view(states, :, valid)') + + train_idx, test_idx = _train_test_split(T_valid, train_ratio) + + X_train = view(X, train_idx, :) + X_test = view(X, test_idx, :) + + rf = _ridge_factor(X_train; reg = reg) + + delay_capacities = zeros(Telt, max_delay) + + @inbounds for k in 1:max_delay + target = view(input, valid .- k) + + y_train = view(target, train_idx) + y_test = view(target, test_idx) + + w = _ridge_solve!(rf, X_train, y_train) + y_pred = X_test * w + + delay_capacities[k] = _squared_correlation(y_test, y_pred) + end + + return (total = sum(delay_capacities), delays = delay_capacities) +end diff --git a/lib/ReservoirComputingBenchmarks/src/narma.jl b/lib/ReservoirComputingBenchmarks/src/narma.jl new file mode 100644 index 000000000..b900b2f7a --- /dev/null +++ b/lib/ReservoirComputingBenchmarks/src/narma.jl @@ -0,0 +1,225 @@ +""" +Standard NARMA coefficients from the literature. + + - NARMA-2: Atiya & Parlos (2000), Eq. 12 + - NARMA-10: Atiya & Parlos (2000), standard benchmark + - NARMA-30: Schrauwen et al. (2008) +""" +const _NARMA_2 = (alpha = 0.4, beta = 0.4, gamma = 0.6, delta = 0.1) +const _NARMA_10 = (alpha = 0.3, beta = 0.05, gamma = 1.5, delta = 0.1) +const _NARMA_30 = (alpha = 0.2, beta = 0.04, gamma = 1.5, delta = 0.001) +const _NARMA_PRESETS = (2, 10, 30) + +@inline function _narma_coeffs(order::Int) + order == 2 && return _NARMA_2 + order == 10 && return _NARMA_10 + order == 30 && return _NARMA_30 + return nothing +end + +@doc raw""" + generate_narma(input; order=10, alpha=nothing, beta=nothing, gamma=nothing, delta=nothing, normalize=true) + +Generate a NARMA target signal from the given input. + +For `order == 2` (Atiya & Parlos 2000, Eq. 12): + +```math +y(k{+}1) = \alpha\, y(k) + \beta\, y(k)\, y(k{-}1) + + \gamma\, u(k)^3 + \delta +``` + +For `order >= 3` (Atiya & Parlos 2000, general NARMA-N): + +```math +y(t{+}1) = \alpha\, y(t) + \beta\, y(t) \sum_{i=0}^{N-1} y(t{-}i) + + \gamma\, u(t{-}N{+}1)\, u(t) + \delta +``` + +## Arguments + + - `input::AbstractVector`: driving input signal of length `T`. + +## Keyword Arguments + + - `order::Int=10`: NARMA system order ``N``. Must be ``\geq 2``. + - `alpha`, `beta`, `gamma`, `delta`: recurrence coefficients. When `nothing` + (default), standard values are used for orders 2, 10, and 30. All four + must be provided explicitly for other orders. + - `normalize::Bool=true`: normalize input to ``[0, 0.5]`` before computing + the recurrence. + +## Returns + + - `Vector{<:Real}`: NARMA target signal of length `T`. The element type is + `float(promote_type(eltype(input), typeof.((alpha, beta, gamma, delta))...))`. + +## References + + - Atiya, A.F. & Parlos, A.G. (2000). "New results on recurrent network + training." *IEEE Trans. Neural Networks*, 11(3). + - Schrauwen, B. et al. (2008). "Improving reservoirs using intrinsic + plasticity." *Neurocomputing*, 71(7–9). +""" +function generate_narma( + input::AbstractVector; + order::Int = 10, + alpha::Union{Real, Nothing} = nothing, + beta::Union{Real, Nothing} = nothing, + gamma::Union{Real, Nothing} = nothing, + delta::Union{Real, Nothing} = nothing, + normalize::Bool = true, + ) + order >= 2 || throw(ArgumentError("NARMA order must be >= 2, got $order")) + + # Resolve coefficients + c = _narma_coeffs(order) + if c !== nothing + alpha = something(alpha, c.alpha) + beta = something(beta, c.beta) + gamma = something(gamma, c.gamma) + delta = something(delta, c.delta) + elseif isnothing(alpha) || isnothing(beta) || isnothing(gamma) || isnothing(delta) + throw( + ArgumentError( + "No default coefficients for NARMA-$order. " * + "Provide alpha, beta, gamma, delta explicitly.", + ), + ) + end + + T = length(input) + T > order || throw( + ArgumentError("Input length ($T) must be greater than order ($order)"), + ) + + Telt = float( + promote_type( + eltype(input), typeof(alpha), typeof(beta), typeof(gamma), typeof(delta), + ), + ) + + u = if normalize + _normalize(input, zero(Telt), convert(Telt, 1 // 2)) + else + convert(Vector{Telt}, input) + end + y = zeros(Telt, T) + + if order == 2 + # NARMA-2: y[t] = α y[t-1] + β y[t-1] y[t-2] + γ u[t-1]³ + δ + @inbounds for t in 3:T + y[t] = alpha * y[t - 1] + + beta * y[t - 1] * y[t - 2] + + gamma * u[t - 1]^3 + + delta + end + else + # NARMA-N (N >= 3): standard recurrence + # y[t] = α y[t-1] + β y[t-1] Σ y[(t-N):(t-1)] + γ u[t-N] u[t-1] + δ + @inbounds for t in (order + 1):T + y[t] = alpha * y[t - 1] + + beta * y[t - 1] * sum(@view y[(t - order):(t - 1)]) + + gamma * u[t - order] * u[t - 1] + + delta + end + end + + if any(!isfinite, y) + idx = findfirst(!isfinite, y) + @warn "NARMA-$order target diverged at time step $idx (value: $(y[idx])). " * + "Consider using normalize=true or adjusting coefficients." + end + + return y +end + +@doc raw""" + narma(input, states; order=10, metric=nmse, train_ratio=0.8, reg=1.0, washout=nothing, kwargs...) + +Evaluate reservoir performance on the NARMA-N task. + +Generates the NARMA target from `input`, trains a ridge regression readout +from `states` to the target, and computes the error metric on held-out data. + +## Arguments + + - `input::AbstractVector`: driving input signal of length `T`. + - `states::AbstractMatrix`: reservoir state matrix of size `(n_features, T)`. + +## Keyword Arguments + + - `order::Int=10`: NARMA system order. + - `metric=nmse`: any callable with the signature `(y_true, y_pred) -> score`. + Built-in options: [`nmse`](@ref), [`rnmse`](@ref), [`mse`](@ref). + - `train_ratio::Real=0.8`: fraction of data used for training. + - `reg::Real=1.0`: ridge regression regularization coefficient. + - `washout::Union{Int,Nothing}=nothing`: number of initial time steps to + discard. Defaults to `order`. + - Remaining `kwargs` are forwarded to [`generate_narma`](@ref). + +## Returns + +A `NamedTuple` with fields: + + - `score::Real`: the computed error metric. + - `target::Vector{<:Real}`: the full NARMA target signal (see + [`generate_narma`](@ref) for its element type). + +## Examples + +```julia +using ReservoirComputingBenchmarks + +input = rand(1000) +states = randn(50, 1000) + +# Using built-in metrics +result = narma(input, states; order=10, metric=nmse) + +# Using custom metric +custom_metric(y_true, y_pred) = mean(abs.(y_true .- y_pred)) +result = narma(input, states; order=10, metric=custom_metric) +``` +""" +function narma( + input::AbstractVector, + states::AbstractMatrix; + order::Int = 10, + metric = nmse, + train_ratio::Real = 0.8, + reg::Real = 1.0, + washout::Union{Int, Nothing} = nothing, + kwargs..., + ) + T = length(input) + size(states, 2) == T || throw( + DimensionMismatch( + "states must have $T columns (time steps), got $(size(states, 2))", + ), + ) + + target = generate_narma(input; order = order, kwargs...) + + # Skip initial transient where y ≈ 0 + wo = something(washout, order) + 0 <= wo < T || throw(ArgumentError("washout must be in [0, $T), got $wo")) + valid = (wo + 1):T + T_valid = length(valid) + + Telt = promote_type(eltype(target), eltype(states), typeof(reg)) + + X = Matrix{Telt}(undef, T_valid, size(states, 1)) + copyto!(X, view(states, :, valid)') + y = view(target, valid) + + train_idx, test_idx = _train_test_split(T_valid, train_ratio) + + w = _ridge_regression(view(X, train_idx, :), view(y, train_idx); reg = reg) + y_pred = view(X, test_idx, :) * w + y_test = view(y, test_idx) + + score = metric(y_test, y_pred) + + return (score = score, target = target) +end diff --git a/lib/ReservoirComputingBenchmarks/src/nonlinear_memory.jl b/lib/ReservoirComputingBenchmarks/src/nonlinear_memory.jl new file mode 100644 index 000000000..e583fc3d2 --- /dev/null +++ b/lib/ReservoirComputingBenchmarks/src/nonlinear_memory.jl @@ -0,0 +1,93 @@ +@doc raw""" + nonlinear_memory(input, states; f=x->x^2, max_delay=30, train_ratio=0.8, reg=1.0) + +Compute the nonlinear memory capacity of a reservoir following Inubushi & +Yoshimura (2017). For each delay ``k = 1, \ldots, K``, a ridge regression +readout is trained to reconstruct ``f(u(t-k))`` from the reservoir state at +time ``t``. The per-delay capacity is the squared Pearson correlation between +target and prediction, and the total capacity is the sum. + +When ``f(x) = x`` this reduces to the linear [`memory_capacity`](@ref). + +```math +NMC_k = \frac{\text{cov}^2(f(u(t-k)),\, \hat{y}_k(t))} + {\text{var}(f(u(t-k)))\;\text{var}(\hat{y}_k(t))} +``` + +## Arguments + + - `input::AbstractVector`: input signal of length `T` (typically i.i.d. + uniform in ``[-1, 1]``). + - `states::AbstractMatrix`: reservoir state matrix of size `(n_features, T)`. + +## Keyword Arguments + + - `f`: scalar nonlinearity applied to the delayed input. Default ``x \mapsto x^2``. + - `max_delay::Int=30`: maximum delay ``K``. + - `train_ratio::Real=0.8`: fraction of valid data used for training. + - `reg::Real=1.0`: ridge regression regularization coefficient. + +## Returns + +A `NamedTuple` with fields: + + - `total::Real`: total nonlinear memory capacity. + - `delays::Vector{<:Real}`: per-delay capacities ``NMC_k``. The element type + is derived from `promote_type(eltype(input), eltype(states), typeof(reg))`. + +## References + + - Inubushi, M. & Yoshimura, K. (2017). "Reservoir computing beyond + memory-nonlinearity trade-off." *Scientific Reports*, 7, 10199. +""" +function nonlinear_memory( + input::AbstractVector, + states::AbstractMatrix; + f = x -> x^2, + max_delay::Int = 30, + train_ratio::Real = 0.8, + reg::Real = 1.0, + ) + T = length(input) + size(states, 2) == T || throw( + DimensionMismatch( + "states must have $T columns (time steps), got $(size(states, 2))", + ), + ) + max_delay >= 1 || throw(ArgumentError("max_delay must be >= 1, got $max_delay")) + max_delay < T || throw( + ArgumentError("max_delay ($max_delay) must be less than signal length ($T)"), + ) + + Telt = promote_type(eltype(input), eltype(states), typeof(reg)) + + valid = (max_delay + 1):T + T_valid = length(valid) + X = Matrix{Telt}(undef, T_valid, size(states, 1)) + copyto!(X, view(states, :, valid)') + + train_idx, test_idx = _train_test_split(T_valid, train_ratio) + X_train = view(X, train_idx, :) + X_test = view(X, test_idx, :) + + rf = _ridge_factor(X_train; reg = reg) + + delay_capacities = zeros(Telt, max_delay) + target = Vector{Telt}(undef, T_valid) + + @inbounds for k in 1:max_delay + for (i, t) in enumerate(valid) + target[i] = f(input[t - k]) + end + + y_train = view(target, train_idx) + y_test = view(target, test_idx) + + w = _ridge_solve!(rf, X_train, y_train) + y_pred = X_test * w + + delay_capacities[k] = _squared_correlation(y_test, y_pred) + end + + return (total = sum(delay_capacities), delays = delay_capacities) +end diff --git a/lib/ReservoirComputingBenchmarks/src/nonlinear_transformation.jl b/lib/ReservoirComputingBenchmarks/src/nonlinear_transformation.jl new file mode 100644 index 000000000..12a017ef7 --- /dev/null +++ b/lib/ReservoirComputingBenchmarks/src/nonlinear_transformation.jl @@ -0,0 +1,80 @@ +@doc raw""" + nonlinear_transformation(input, states; f=x->sin(π*x), train_ratio=0.8, + reg=1.0, metric=nmse, washout::Integer=0) + +Evaluate how well a reservoir reproduces a memoryless nonlinear transformation +``f(u(t))`` of the current input. + +A ridge regression readout is trained from `states` to the target `f.(input)`, +and the metric is reported on a held-out tail of the trajectory. + +## Arguments + + - `input::AbstractVector`: driving input signal of length `T`. + - `states::AbstractMatrix`: reservoir state matrix of size `(n_features, T)`. + +## Keyword Arguments + + - `f`: scalar nonlinearity applied element-wise to `input`. Default ``\sin(\pi x)``. + - `train_ratio::Real=0.8`: fraction of valid data used for training. + - `reg::Real=1.0`: ridge regression regularization coefficient. + - `metric`: error metric `(y_true, y_pred) -> score`. Built-ins: + [`nmse`](@ref), [`rnmse`](@ref), [`mse`](@ref). + - `washout::Integer=0`: number of initial time steps discarded before + training/testing. + +## Returns + +A `NamedTuple` with fields: + + - `score::Real`: error metric on the test split. + - `target::Vector{<:Real}`: full target signal `f.(input)`. The element type + is derived from `promote_type(eltype(input), eltype(states), typeof(reg))`. + +## References + + - Lukoševičius, M. (2012). "A practical guide to applying echo state networks." + *Neural Networks: Tricks of the Trade*, Springer. +""" +function nonlinear_transformation( + input::AbstractVector, + states::AbstractMatrix; + f = x -> sin(π * x), + train_ratio::Real = 0.8, + reg::Real = 1.0, + metric = nmse, + washout::Integer = 0, + ) + T = length(input) + size(states, 2) == T || throw( + DimensionMismatch( + "states must have $T columns (time steps), got $(size(states, 2))", + ), + ) + 0 <= washout < T || + throw(ArgumentError("washout must be in [0, $T), got $washout")) + + valid = (washout + 1):T + T_valid = length(valid) + T_valid >= 2 || + throw(ArgumentError("Not enough samples after washout: $T_valid")) + + Telt = promote_type(eltype(input), eltype(states), typeof(reg)) + + X = Matrix{Telt}(undef, T_valid, size(states, 1)) + copyto!(X, view(states, :, valid)') + + target = Vector{Telt}(undef, T) + @inbounds for t in 1:T + target[t] = f(input[t]) + end + + train_idx, test_idx = _train_test_split(T_valid, train_ratio) + + y_valid = view(target, valid) + w = _ridge_regression(view(X, train_idx, :), view(y_valid, train_idx); reg = reg) + y_pred = view(X, test_idx, :) * w + score = metric(view(y_valid, test_idx), y_pred) + + return (score = score, target = target) +end diff --git a/lib/ReservoirComputingBenchmarks/src/sin_approximation.jl b/lib/ReservoirComputingBenchmarks/src/sin_approximation.jl new file mode 100644 index 000000000..a124e1876 --- /dev/null +++ b/lib/ReservoirComputingBenchmarks/src/sin_approximation.jl @@ -0,0 +1,36 @@ +@doc raw""" + sin_approximation(input, states; freq=π, train_ratio=0.8, reg=1.0, + metric=nmse, washout::Integer=0) + +Evaluate how well a reservoir approximates ``\sin(\omega\, u(t))``. + +This is a memoryless nonlinear task. It is a thin wrapper around +[`nonlinear_transformation`](@ref) with ``f(x) = \sin(\omega x)``. + +## Arguments + + - `input::AbstractVector`: driving input signal of length `T`. + - `states::AbstractMatrix`: reservoir state matrix of size `(n_features, T)`. + +## Keyword Arguments + + - `freq::Real=π`: angular frequency ``\omega``. + - Remaining keyword arguments are forwarded to + [`nonlinear_transformation`](@ref). + +## Returns + +A `NamedTuple` with fields: + + - `score::Real`: error metric on the test split. + - `target::Vector{<:Real}`: full target signal `sin(freq * input)`. See + [`nonlinear_transformation`](@ref) for the element-type rule. +""" +function sin_approximation( + input::AbstractVector, + states::AbstractMatrix; + freq::Real = π, + kwargs..., + ) + return nonlinear_transformation(input, states; f = x -> sin(freq * x), kwargs...) +end diff --git a/lib/ReservoirComputingBenchmarks/src/utils.jl b/lib/ReservoirComputingBenchmarks/src/utils.jl new file mode 100644 index 000000000..86bbb2074 --- /dev/null +++ b/lib/ReservoirComputingBenchmarks/src/utils.jl @@ -0,0 +1,163 @@ +""" + _RidgeFactor + +Pre-computed Cholesky factorization of the Gram matrix `X'X + λI` for +efficient multi-RHS ridge regression. Construct once, solve many times +with [`_ridge_solve`](@ref). +""" +struct _RidgeFactor{T <: Real} + factor::LinearAlgebra.Cholesky{T, Matrix{T}} + Xty_buf::Vector{T} + w_buf::Vector{T} +end + +function _ridge_factor(X::AbstractMatrix; reg::Real = 1.0) + reg >= 0 || + throw(ArgumentError("Regularization coefficient must be non-negative, got $reg")) + T = promote_type(eltype(X), typeof(reg)) + n = size(X, 2) + reg_T = convert(T, reg) + G_reg = Matrix{T}(undef, n, n) + X_col = convert(Matrix{T}, X) + LinearAlgebra.copytri!(mul!(G_reg, X_col', X_col), 'U') + @inbounds for i in 1:n + G_reg[i, i] += reg_T + end + F = try + cholesky!(Symmetric(G_reg)) + catch e + if e isa LinearAlgebra.PosDefException + throw( + ArgumentError( + "Cholesky factorization failed: X'X + reg*I is not positive definite. " * + "This can happen when reg is zero or too small for rank-deficient data. " * + "Increase reg or provide full-rank features.", + ), + ) + end + rethrow(e) + end + Xty_buf = Vector{T}(undef, n) + w_buf = Vector{T}(undef, n) + return _RidgeFactor(F, Xty_buf, w_buf) +end + +function _ridge_solve!(rf::_RidgeFactor, X::AbstractMatrix, y::AbstractVector) + mul!(rf.Xty_buf, X', y) + ldiv!(rf.w_buf, rf.factor, rf.Xty_buf) + return rf.w_buf +end + +function _ridge_solve(rf::_RidgeFactor, X::AbstractMatrix, y::AbstractVector) + mul!(rf.Xty_buf, X', y) + return rf.factor \ rf.Xty_buf +end + +""" + _ridge_regression(X, y; reg=1.0) + +Solve ridge regression `X * w ≈ y` via Cholesky factorization of the +Gram matrix `X'X + λI`. + +- `X`: (T, n_features) design matrix +- `y`: (T,) target vector +- `reg`: regularization coefficient (λ ≥ 0) + +Returns weight vector `w` of length `n_features`. +""" +function _ridge_regression(X::AbstractMatrix, y::AbstractVector; reg::Real = 1.0) + reg >= 0 || + throw(ArgumentError("Regularization coefficient must be non-negative, got $reg")) + rf = _ridge_factor(X; reg = reg) + return _ridge_solve(rf, X, y) +end + +@inline function _squared_correlation(y_true::AbstractVector, y_pred::AbstractVector) + r = cor(y_true, y_pred) + if isnan(r) + @warn "Squared correlation is NaN (zero-variance input). " * + "This may indicate a degenerate reservoir or collapsed prediction." + return zero(r) + end + return clamp(r^2, zero(r), one(r)) +end + +@inline function _nmse(y_true::AbstractVector, y_pred::AbstractVector) + v = var(y_true) + if v < eps(typeof(v)) + @warn "NMSE: target variance is near-zero ($v). " * + "NMSE is undefined for constant targets." + return oftype(v, NaN) + end + return mean((y_true .- y_pred) .^ 2) / v +end + +""" + _train_test_split(n, train_ratio) + +Return `(train_range, test_range)` index ranges for a temporal split. +""" +function _train_test_split(n::Int, train_ratio::Real) + 0 < train_ratio < 1 || + throw(ArgumentError("train_ratio must be in (0, 1), got $train_ratio")) + split = floor(Int, n * train_ratio) + split >= 1 || throw( + ArgumentError( + "Training set is empty (n=$n, train_ratio=$train_ratio). Provide more data.", + ), + ) + split < n || throw( + ArgumentError( + "Test set is empty (n=$n, train_ratio=$train_ratio). " * + "Reduce train_ratio or provide more data.", + ), + ) + return 1:split, (split + 1):n +end + +@inline function _normalize(x::AbstractVector, lo::Real, hi::Real) + Telt = float(promote_type(eltype(x), typeof(lo), typeof(hi))) + xmin, xmax = extrema(x) + if xmin == xmax + return fill(convert(Telt, lo), length(x)) + end + lo_c = convert(Telt, lo) + hi_c = convert(Telt, hi) + xmin_c = convert(Telt, xmin) + xmax_c = convert(Telt, xmax) + scale = (hi_c - lo_c) / (xmax_c - xmin_c) + return @. (x - xmin_c) * scale + lo_c +end + +@doc raw""" + nmse(y_true, y_pred) + +Normalized Mean Squared Error: ``\\text{mean}((y_\\text{true} - y_\\text{pred})^2) / \\text{var}(y_\\text{true})``. + +Returns `NaN` with a warning if `y_true` has zero variance. +""" +@inline function nmse(y_true::AbstractVector, y_pred::AbstractVector) + return _nmse(y_true, y_pred) +end + +@doc raw""" + rnmse(y_true, y_pred) + +Root Normalized Mean Squared Error: ``\\sqrt{\\text{nmse}(y_\\text{true}, y_\\text{pred})}``. + +Returns `0.0` if NMSE is negative (due to numerical issues). +""" +@inline function rnmse(y_true::AbstractVector, y_pred::AbstractVector) + nmse_val = nmse(y_true, y_pred) + isnan(nmse_val) && return nmse_val + return nmse_val < 0 ? zero(nmse_val) : sqrt(nmse_val) +end + +@doc raw""" + mse(y_true, y_pred) + +Mean Squared Error: ``\\text{mean}((y_\\text{true} - y_\\text{pred})^2)``. +""" +@inline function mse(y_true::AbstractVector, y_pred::AbstractVector) + return mean((y_true .- y_pred) .^ 2) +end diff --git a/lib/ReservoirComputingBenchmarks/test/runtests.jl b/lib/ReservoirComputingBenchmarks/test/runtests.jl new file mode 100644 index 000000000..414820090 --- /dev/null +++ b/lib/ReservoirComputingBenchmarks/test/runtests.jl @@ -0,0 +1,608 @@ +using ReservoirComputingBenchmarks +using Test +using Random +using Statistics +using ReservoirComputing + +@testset "ReservoirComputingBenchmarks" begin + rng = Random.MersenneTwister(42) + + @testset "Memory Capacity" begin + T = 2000 + n_features = 50 + input = rand(rng, T) .* 2 .- 1 # Uniform[-1, 1] + + # Build a simple linear reservoir (identity + shift) that should + # have high memory capacity + states = zeros(n_features, T) + for t in 2:T + states[1, t] = input[t - 1] + for i in 2:n_features + states[i, t] = states[i - 1, t - 1] + end + end + + result = memory_capacity(input, states; max_delay = 20, reg = 1.0e-6) + + @test haskey(result, :total) + @test haskey(result, :delays) + @test length(result.delays) == 20 + @test all(d -> 0 <= d <= 1.001, result.delays) + @test result.total >= 0 + # A shift-register reservoir should have near-perfect MC for + # delays up to n_features + @test result.delays[1] > 0.9 + @test result.delays[5] > 0.8 + @test result.total > 10 + end + + @testset "NARMA Target Generation" begin + T = 1000 + input = rand(rng, T) .* 0.5 # Uniform[0, 0.5] + + # NARMA-2 + y2 = generate_narma(input; order = 2, normalize = false) + @test length(y2) == T + @test y2[1] == 0.0 + @test y2[2] == 0.0 + @test y2[3] != 0.0 # First non-trivial value + + # NARMA-10 + y10 = generate_narma(input; order = 10, normalize = false) + @test length(y10) == T + @test all(y10[1:10] .== 0.0) + @test y10[11] != 0.0 + + # NARMA-30 + y30 = generate_narma(input; order = 30, normalize = false) + @test length(y30) == T + @test all(y30[1:30] .== 0.0) + @test y30[31] != 0.0 + + # Target should not diverge for standard inputs + @test all(isfinite.(y10)) + @test all(isfinite.(y30)) + + # Custom coefficients for non-standard order + @test_throws ArgumentError generate_narma(input; order = 5) + y5 = generate_narma(input; order = 5, alpha = 0.3, beta = 0.05, gamma = 1.5, delta = 0.1, normalize = false) + @test length(y5) == T + + # Order must be >= 2 + @test_throws ArgumentError generate_narma(input; order = 1) + end + + @testset "NARMA Evaluation" begin + T = 1000 + n_features = 100 + input = rand(rng, T) + + # Use random states (won't be great, but should run) + states = randn(rng, n_features, T) + + result = narma(input, states; order = 10, metric = nmse, reg = 1.0) + + @test haskey(result, :score) + @test haskey(result, :target) + @test isfinite(result.score) + @test result.score >= 0 + @test length(result.target) == T + + # Test other metrics + result_rnmse = narma(input, states; order = 10, metric = rnmse, reg = 1.0) + @test isfinite(result_rnmse.score) + + result_mse = narma(input, states; order = 10, metric = mse, reg = 1.0) + @test isfinite(result_mse.score) + + # Custom metric + custom_metric(y_true, y_pred) = mean(abs.(y_true .- y_pred)) + result_custom = narma(input, states; order = 10, metric = custom_metric, reg = 1.0) + @test isfinite(result_custom.score) + end + + @testset "IPC" begin + T = 1000 + n_features = 30 + input = rand(rng, T) .* 2 .- 1 # Uniform[-1, 1] + + # Simple linear reservoir + states = zeros(n_features, T) + for t in 2:T + states[1, t] = input[t - 1] + for i in 2:n_features + states[i, t] = 0.95 * states[i, t - 1] + 0.05 * randn(rng) + end + end + + result = ipc(input, states; max_delay = 5, max_degree = 2, reg = 1.0e-4) + + @test haskey(result, :total) + @test haskey(result, :linear) + @test haskey(result, :nonlinear) + @test haskey(result, :by_degree) + @test haskey(result, :by_delay) + @test haskey(result, :basis_capacities) + @test haskey(result, :theoretical_max) + + @test result.total >= 0 + @test result.linear >= 0 + @test result.nonlinear >= 0 + @test result.theoretical_max == n_features + @test abs(result.total - result.linear - result.nonlinear) < 1.0e-10 + + # Degree-1 capacity should be present + @test haskey(result.by_degree, 1) + + # Total should not exceed theoretical maximum (with some tolerance) + @test result.total <= n_features + 1.0 + + # Without cross terms + result_no_cross = ipc(input, states; max_delay = 5, max_degree = 2, cross_terms = false, reg = 1.0e-4) + @test length(result_no_cross.basis_capacities) < length(result.basis_capacities) + end + + @testset "Nonlinear Transformation" begin + T = 1000 + n_features = 80 + input = rand(rng, T) .* 2 .- 1 + + # Reservoir whose first feature is u(t) and rest are nonlinear maps of it + states = zeros(n_features, T) + for t in 1:T + states[1, t] = input[t] + states[2, t] = input[t]^2 + states[3, t] = sin(π * input[t]) + for i in 4:n_features + states[i, t] = tanh(0.7 * input[t] + 0.1 * (i - 3)) + end + end + + # sin(π u) target — a feature exactly matches it, so the readout + # should fit it well (small NMSE up to ridge regularisation bias). + r = nonlinear_transformation(input, states; f = x -> sin(π * x), reg = 1.0e-6) + @test haskey(r, :score) + @test haskey(r, :target) + @test length(r.target) == T + @test isfinite(r.score) + @test r.score < 1.0e-3 + + # x^2 target — exactly available + r2 = nonlinear_transformation(input, states; f = x -> x^2, reg = 1.0e-6) + @test r2.score < 1.0e-3 + + # custom metric + r3 = nonlinear_transformation(input, states; f = sin, metric = mse, reg = 1.0e-4) + @test isfinite(r3.score) + + # Validation + @test_throws DimensionMismatch nonlinear_transformation(input, randn(rng, n_features, T - 1)) + @test_throws ArgumentError nonlinear_transformation(input, states; washout = T) + @test_throws ArgumentError nonlinear_transformation(input, states; washout = -1) + end + + @testset "Nonlinear Memory" begin + T = 2000 + n_features = 50 + input = rand(rng, T) .* 2 .- 1 + + # Build a reservoir whose features are u(t-k) (linear shift register) + states = zeros(n_features, T) + for t in 2:T + states[1, t] = input[t - 1] + for i in 2:n_features + states[i, t] = states[i - 1, t - 1] + end + end + + # Linear memory baseline (should reduce to memory_capacity for f=identity) + r_lin = nonlinear_memory(input, states; f = identity, max_delay = 20, reg = 1.0e-6) + @test r_lin.delays[1] > 0.9 + @test r_lin.total > 10 + + # Squared nonlinear memory: a linear reservoir cannot recover x^2 perfectly + r_sq = nonlinear_memory(input, states; f = x -> x^2, max_delay = 20, reg = 1.0e-6) + @test length(r_sq.delays) == 20 + @test all(d -> 0 <= d <= 1.001, r_sq.delays) + @test r_sq.total >= 0 + + # Validation + @test_throws ArgumentError nonlinear_memory(input, states; max_delay = 0) + @test_throws ArgumentError nonlinear_memory(input, states; max_delay = T) + @test_throws DimensionMismatch nonlinear_memory(rand(50), randn(rng, 10, 100)) + end + + @testset "Sin Approximation" begin + T = 800 + n_features = 30 + input = rand(rng, T) .* 2 .- 1 + + # Reservoir already contains sin(π u(t)) — perfect fit possible + states = zeros(n_features, T) + for t in 1:T + states[1, t] = sin(π * input[t]) + for i in 2:n_features + states[i, t] = tanh(0.5 * input[t] + 0.05 * (i - 2)) + end + end + + r = sin_approximation(input, states; freq = π, reg = 1.0e-6) + @test haskey(r, :score) + @test r.score < 1.0e-3 + + # freq = 1 — non-trivial map; just check finite + r2 = sin_approximation(input, states; freq = 1.0) + @test isfinite(r2.score) + end + + @testset "Kernel / Generalization Rank (array form)" begin + # n_features × n full-rank random matrix + M_full = randn(rng, 30, 50) + @test kernel_rank(M_full) == 30 + @test generalization_rank(M_full) == 30 + + # Rank-1 matrix: numerical rank == 1 + v = randn(rng, 30) + M_rank1 = v * randn(rng, 50)' + @test kernel_rank(M_rank1) == 1 + @test generalization_rank(M_rank1) == 1 + + # All-zero matrix → rank 0 + @test kernel_rank(zeros(30, 50)) == 0 + @test generalization_rank(zeros(30, 50)) == 0 + + # threshold extremes + M_mixed = randn(rng, 5, 5) + @test kernel_rank(M_mixed; threshold = 1.0e-12) >= kernel_rank(M_mixed; threshold = 0.5) + + # Validation + @test_throws ArgumentError kernel_rank(M_full; threshold = 0.0) + @test_throws ArgumentError kernel_rank(M_full; threshold = -0.1) + end + + @testset "Legendre Polynomials" begin + leg = ReservoirComputingBenchmarks._legendre + + # P_0(x) = 1 + @test leg(0, 0.5) ≈ 1.0 + @test leg(0, -0.3) ≈ 1.0 + + # P_1(x) = x + @test leg(1, 0.5) ≈ 0.5 + @test leg(1, -0.3) ≈ -0.3 + + # P_2(x) = (3x² - 1) / 2 + @test leg(2, 0.5) ≈ (3 * 0.25 - 1) / 2 + @test leg(2, 0.0) ≈ -0.5 + + # P_3(x) = (5x³ - 3x) / 2 + @test leg(3, 0.5) ≈ (5 * 0.125 - 1.5) / 2 + + # Negative degree should error + @test_throws ArgumentError leg(-1, 0.5) + + # Orthonormality: ∫₋₁¹ P̃_n(x) P̃_m(x) dx ≈ δ_{nm} + nleg = ReservoirComputingBenchmarks._normalized_legendre + N_quad = 10000 + x_quad = range(-1, 1; length = N_quad) + dx = 2.0 / (N_quad - 1) + + for n in 0:3 + vals_n = nleg.(n, x_quad) + integral = sum(vals_n .^ 2) * dx + @test isapprox(integral, 1.0; atol = 0.01) + end + end + + @testset "Utilities" begin + # _normalize + x = [1.0, 2.0, 3.0, 4.0, 5.0] + xn = ReservoirComputingBenchmarks._normalize(x, 0.0, 0.5) + @test xn[1] ≈ 0.0 + @test xn[end] ≈ 0.5 + @test all(v -> 0 <= v <= 0.5, xn) + + # Constant input + xc = ReservoirComputingBenchmarks._normalize([3.0, 3.0, 3.0], 0.0, 0.5) + @test all(xc .== 0.0) + + # _squared_correlation + a = [1.0, 2.0, 3.0, 4.0, 5.0] + @test ReservoirComputingBenchmarks._squared_correlation(a, a) ≈ 1.0 + @test ReservoirComputingBenchmarks._squared_correlation(a, -a) ≈ 1.0 + + # _nmse: perfect prediction → 0 + @test ReservoirComputingBenchmarks._nmse(a, a) ≈ 0.0 + + # _train_test_split: invalid ratios + @test_throws ArgumentError ReservoirComputingBenchmarks._train_test_split(100, 0.0) + @test_throws ArgumentError ReservoirComputingBenchmarks._train_test_split(100, 1.0) + @test_throws ArgumentError ReservoirComputingBenchmarks._train_test_split(100, -0.5) + @test_throws ArgumentError ReservoirComputingBenchmarks._train_test_split(100, 1.5) + + # _ridge_regression: negative reg + @test_throws ArgumentError ReservoirComputingBenchmarks._ridge_regression( + randn(50, 5), randn(50); reg = -1.0 + ) + end + + @testset "Validation" begin + input = rand(rng, 500) .* 2 .- 1 + states = randn(rng, 10, 500) + + # max_delay must be >= 1 + @test_throws ArgumentError memory_capacity(input, states; max_delay = 0) + + # max_delay must be < T + @test_throws ArgumentError memory_capacity(input, states; max_delay = 500) + + # Mismatched dimensions + @test_throws DimensionMismatch memory_capacity(rand(100), randn(10, 200)) + + # NARMA order < 2 + @test_throws ArgumentError generate_narma(rand(100); order = 1) + + # Missing NARMA coefficients for non-standard order + @test_throws ArgumentError generate_narma(rand(100); order = 7) + + # Input shorter than order + @test_throws ArgumentError generate_narma(rand(5); order = 10) + + # Washout out of range + @test_throws ArgumentError narma(input, states; order = 10, washout = -1) + @test_throws ArgumentError narma(input, states; order = 10, washout = 500) + + # IPC: max_degree/max_delay must be >= 1 + @test_throws ArgumentError ipc(input, states; max_delay = 0, max_degree = 2) + @test_throws ArgumentError ipc(input, states; max_delay = 5, max_degree = 0) + end + + @testset "ReservoirComputing model dispatch" begin + rng_model = Random.MersenneTwister(7) + T = 600 + in_dims, res_dims, out_dims = 1, 40, 1 + model = ESN(in_dims, res_dims, out_dims, tanh) + ps, st = setup(rng_model, model) + + @testset "memory_capacity(model, ps, st)" begin + rng_input = Random.MersenneTwister(11) + r = memory_capacity( + model, ps, st; + T = T, rng = rng_input, max_delay = 10, reg = 1.0e-4, + ) + @test haskey(r, :total) + @test haskey(r, :delays) + @test length(r.delays) == 10 + @test isfinite(r.total) + @test r.total >= 0 + @test all(d -> 0 <= d <= 1.001, r.delays) + + # Reproducibility: same rng + same model → same result + r2 = memory_capacity( + model, ps, st; + T = T, rng = Random.MersenneTwister(11), max_delay = 10, reg = 1.0e-4, + ) + @test r2.total ≈ r.total + @test r2.delays ≈ r.delays + end + + @testset "narma(model, ps, st)" begin + rng_input = Random.MersenneTwister(13) + r = narma( + model, ps, st; + T = T, rng = rng_input, order = 10, metric = nmse, reg = 1.0e-4, + ) + @test haskey(r, :score) + @test haskey(r, :target) + @test isfinite(r.score) + @test r.score >= 0 + @test length(r.target) == T + + # User-supplied input bypasses rng + user_input = rand(Random.MersenneTwister(17), T) .* 2 .- 1 + r_user = narma( + model, ps, st; + input = user_input, order = 10, reg = 1.0e-4, + ) + @test isfinite(r_user.score) + @test length(r_user.target) == T + end + + @testset "ipc(model, ps, st)" begin + rng_input = Random.MersenneTwister(19) + r = ipc( + model, ps, st; + T = T, rng = rng_input, + max_delay = 4, max_degree = 2, reg = 1.0e-4, + ) + @test haskey(r, :total) + @test haskey(r, :linear) + @test haskey(r, :nonlinear) + @test haskey(r, :basis_capacities) + @test r.theoretical_max == res_dims + @test r.total >= 0 + @test abs(r.total - r.linear - r.nonlinear) < 1.0e-10 + end + + @testset "nonlinear_transformation(model, ps, st)" begin + rng_input = Random.MersenneTwister(23) + r = nonlinear_transformation( + model, ps, st; + T = T, rng = rng_input, f = x -> x^2, reg = 1.0e-4, + ) + @test haskey(r, :score) + @test haskey(r, :target) + @test isfinite(r.score) + @test length(r.target) == T + end + + @testset "nonlinear_memory(model, ps, st)" begin + rng_input = Random.MersenneTwister(29) + r = nonlinear_memory( + model, ps, st; + T = T, rng = rng_input, f = x -> x^2, max_delay = 8, reg = 1.0e-4, + ) + @test haskey(r, :total) + @test haskey(r, :delays) + @test length(r.delays) == 8 + @test all(d -> 0 <= d <= 1.001, r.delays) + @test isfinite(r.total) + end + + @testset "sin_approximation(model, ps, st)" begin + rng_input = Random.MersenneTwister(31) + r = sin_approximation( + model, ps, st; + T = T, rng = rng_input, freq = π, reg = 1.0e-4, + ) + @test haskey(r, :score) + @test haskey(r, :target) + @test isfinite(r.score) + @test length(r.target) == T + end + + @testset "kernel_rank(model, ps, st)" begin + rng_input = Random.MersenneTwister(37) + kr = kernel_rank( + model, ps, st; + n_streams = 80, stream_length = 60, + rng = rng_input, threshold = 0.01, + ) + @test kr isa Integer + @test 0 <= kr <= res_dims + + # Reproducibility + kr2 = kernel_rank( + model, ps, st; + n_streams = 80, stream_length = 60, + rng = Random.MersenneTwister(37), threshold = 0.01, + ) + @test kr2 == kr + end + + @testset "generalization_rank(model, ps, st)" begin + rng_input = Random.MersenneTwister(41) + gr = generalization_rank( + model, ps, st; + n_streams = 80, stream_length = 60, + perturbation = 1.0e-4, + rng = rng_input, threshold = 0.01, + ) + @test gr isa Integer + @test 0 <= gr <= res_dims + + # Deterministic invariant: with perturbation == 0, all streams are + # the same `base_input`, so all final states are identical and the + # final-state matrix has numerical rank exactly 1. + gr_zero = generalization_rank( + model, ps, st; + n_streams = 50, stream_length = 60, + perturbation = 0.0, + rng = Random.MersenneTwister(43), threshold = 0.01, + ) + @test gr_zero == 1 + end + + @testset "input length validation" begin + @test_throws ArgumentError memory_capacity( + model, ps, st; T = 1, max_delay = 1, + ) + short = [0.1, 0.2] + @test_throws ArgumentError memory_capacity( + model, ps, st; input = [0.1], max_delay = 1, + ) + # input keyword length controls dispatch length, not T + @test_throws ArgumentError narma(model, ps, st; input = [0.1]) + end + + @testset "scalar input (in_dims == 1) check" begin + # in_dims = 3 → friendly ArgumentError, not an opaque shape error + multi_model = ESN(3, 20, 1, tanh) + multi_ps, multi_st = setup(Random.MersenneTwister(5), multi_model) + @test_throws ArgumentError memory_capacity( + multi_model, multi_ps, multi_st; T = 200, max_delay = 4, + ) + @test_throws ArgumentError narma( + multi_model, multi_ps, multi_st; T = 200, + ) + @test_throws ArgumentError ipc( + multi_model, multi_ps, multi_st; T = 200, max_delay = 2, max_degree = 2, + ) + @test_throws ArgumentError nonlinear_transformation( + multi_model, multi_ps, multi_st; T = 200, + ) + @test_throws ArgumentError nonlinear_memory( + multi_model, multi_ps, multi_st; T = 200, max_delay = 4, + ) + @test_throws ArgumentError sin_approximation( + multi_model, multi_ps, multi_st; T = 200, + ) + @test_throws ArgumentError kernel_rank( + multi_model, multi_ps, multi_st; n_streams = 10, stream_length = 30, + ) + @test_throws ArgumentError generalization_rank( + multi_model, multi_ps, multi_st; + n_streams = 10, stream_length = 30, + ) + end + end + + @testset "Cross-model dispatch ($(nameof(M)))" for M in (ES2N, EuSN) + rng_model = Random.MersenneTwister(101) + T = 400 + in_dims, res_dims, out_dims = 1, 25, 1 + model = M(in_dims, res_dims, out_dims, tanh) + ps, st = setup(rng_model, model) + + rng_input = Random.MersenneTwister(103) + r_mc = memory_capacity( + model, ps, st; + T = T, rng = rng_input, max_delay = 5, reg = 1.0e-4, + ) + @test isfinite(r_mc.total) + @test length(r_mc.delays) == 5 + + r_narma = narma( + model, ps, st; + T = T, rng = Random.MersenneTwister(105), order = 10, reg = 1.0e-4, + ) + @test isfinite(r_narma.score) + + r_ipc = ipc( + model, ps, st; + T = T, rng = Random.MersenneTwister(107), + max_delay = 3, max_degree = 2, reg = 1.0e-4, + ) + @test r_ipc.theoretical_max == res_dims + @test isfinite(r_ipc.total) + + r_nlt = nonlinear_transformation( + model, ps, st; + T = T, rng = Random.MersenneTwister(109), + f = x -> x^2, reg = 1.0e-4, + ) + @test isfinite(r_nlt.score) + + r_nlm = nonlinear_memory( + model, ps, st; + T = T, rng = Random.MersenneTwister(111), + f = x -> x^2, max_delay = 4, reg = 1.0e-4, + ) + @test length(r_nlm.delays) == 4 + + kr = kernel_rank( + model, ps, st; + n_streams = 60, stream_length = 50, + rng = Random.MersenneTwister(113), + ) + @test 0 <= kr <= res_dims + + gr_zero = generalization_rank( + model, ps, st; + n_streams = 30, stream_length = 50, + perturbation = 0.0, + rng = Random.MersenneTwister(115), + ) + @test gr_zero == 1 + end +end