diff --git a/NEWS.md b/NEWS.md index 97522f7b..1363f86e 100644 --- a/NEWS.md +++ b/NEWS.md @@ -4,6 +4,18 @@ - The deprecated `RegularizationTools` extension and the `RegularizationSmooth` interpolation type have been removed. `RegularizationTools` was deprecated and capped `Optim` to `≤ 1`; removing it restores support for `Optim` v2. + - The `assume_linear_t` constructor keyword and the `DataInterpolations.looks_linear` utility have been removed. Knot-vector structure is now probed once at construction through `FindFirstFunctions.SearchProperties(t)` and cached on every interpolation as `A.t_props`; uniformly-spaced knots are detected exactly and automatically. To override the probe, pass the new `search_properties` keyword accepted by every constructor, e.g. `LinearInterpolation(u, t; search_properties = FindFirstFunctions.SearchProperties(t; is_uniform = true))`. + +## New features + + - Every interpolation constructor accepts a `search_properties::Union{Nothing, FindFirstFunctions.SearchProperties}` keyword. The default `nothing` probes `t` at construction; passing a pre-built `SearchProperties` skips the probe (useful when constructing many interpolations over the same knot vector). + + - Knot search is dispatched through `FindFirstFunctions.Auto(t)` resolved at construction: uniformly-spaced knots (any `AbstractRange`, or vectors detected as exactly uniform) use a closed-form O(1) lookup; short non-uniform knot vectors use a linear scan; everything else keeps the previous bracketed gallop. + + - `LinearInterpolation` with uniformly-spaced knots and floating-point values takes a statically-dispatched fast path (closed-form index + lerp, verified against the live knots) — 5-10x faster per query on uniform grids. + + - `QuadraticSpline` construction is now O(n) instead of O(n^2) (running locator in `quadratic_spline_params`), e.g. ~870x faster at 100k points. + # DataInterpolations v5 Release Notes ## Breaking changes diff --git a/Project.toml b/Project.toml index 32e714bd..894332f7 100644 --- a/Project.toml +++ b/Project.toml @@ -36,7 +36,7 @@ BenchmarkTools = "1" ChainRulesCore = "1.26.1" EnumX = "1.0.5" FillArrays = "1.16.0" -FindFirstFunctions = "2" +FindFirstFunctions = "3" FiniteDifferences = "0.12.31" ForwardDiff = "1" LinearAlgebra = "1.10" diff --git a/bench/.gitignore b/bench/.gitignore new file mode 100644 index 00000000..ba39cc53 --- /dev/null +++ b/bench/.gitignore @@ -0,0 +1 @@ +Manifest.toml diff --git a/bench/Project.toml b/bench/Project.toml new file mode 100644 index 00000000..efbe15a7 --- /dev/null +++ b/bench/Project.toml @@ -0,0 +1,9 @@ +[deps] +BasicInterpolators = "26cce99e-4866-4b6d-ab74-862489e035e0" +BenchmarkTools = "6e4b80f9-dd63-53aa-95a3-0cdb28fa8baf" +DataInterpolations = "82cc6244-b520-54b8-b5a6-8a565e85f1d0" +Dierckx = "39dd38d3-220a-591b-8e3c-4c3a8c710a94" +FastInterpolations = "9ea80cae-fc13-4c00-8066-6eaedb12f34b" +FindFirstFunctions = "64ca27bc-2ba2-4a57-88aa-44e436879224" +Interpolations = "a98d9a8b-a2ab-59e6-89dd-64a1c18fca59" +PCHIPInterpolation = "afe20452-48d1-4729-9a8b-50fb251f06cd" diff --git a/bench/cross_library_comparison.jl b/bench/cross_library_comparison.jl new file mode 100644 index 00000000..939f1dfa --- /dev/null +++ b/bench/cross_library_comparison.jl @@ -0,0 +1,701 @@ +#= +Cross-library 1D interpolation benchmark for DataInterpolations.jl. + +Compares DataInterpolations.jl against +Interpolations.jl, Dierckx.jl, BasicInterpolators.jl, PCHIPInterpolation.jl, +and FastInterpolations.jl. + +Usage: + julia +1.11 --project=bench bench/cross_library_comparison.jl + +The script writes a fresh `bench/cross_library_comparison.md` with the report. +=# + +import Pkg +const BENCH_DIR = @__DIR__ +const REPO_ROOT = dirname(BENCH_DIR) +Pkg.activate(BENCH_DIR) + +using Printf +using Random +using Statistics +using BenchmarkTools +using LinearAlgebra +using InteractiveUtils: versioninfo + +using DataInterpolations +using Interpolations +using Dierckx +using BasicInterpolators +using PCHIPInterpolation +using FastInterpolations + +const DI = DataInterpolations +const ITP = Interpolations +const DRX = Dierckx +const BI = BasicInterpolators +const PCHIP = PCHIPInterpolation +const FI = FastInterpolations + +const RNG = MersenneTwister(0x00C0FFEE) + +# ----------------------------------------------------------------------------- +# Helpers +# ----------------------------------------------------------------------------- + +""" +Build the test data `(u, t)` of length `n`. `pattern` is `:uniform` or `:nonuniform`. +The samples are deterministic for a given (n, pattern). +""" +function make_knots(n::Int, pattern::Symbol) + rng = MersenneTwister(0x0BADBEEF + n + (pattern === :uniform ? 0 : 1)) + if pattern === :uniform + # Keep as a `range` so Interpolations.cubic_spline_interpolation accepts it. + # Concrete Vector form for libraries that need it is taken via `collect(t)` later. + t = range(0.0, 1.0; length = n) + elseif pattern === :nonuniform + t = sort(rand(rng, n)) + t .= (t .- first(t)) ./ (last(t) - first(t)) + else + error("unknown pattern $(pattern)") + end + u = @. sin(2π * t) + 0.3 * cos(7 * t) + return u, t +end + +""" +Build a query batch of length `m` in the same domain. `pattern` is `:sorted`, +`:random`, or `:chained` (monotone, ODE-style). +""" +function make_queries(m::Int, pattern::Symbol) + rng = MersenneTwister(0x00C0FFEE + m + (pattern === :sorted ? 0 : pattern === :random ? 1 : 2)) + if pattern === :sorted + tt = sort(rand(rng, m)) + elseif pattern === :random + tt = rand(rng, m) + elseif pattern === :chained + steps = rand(rng, m) + tt = cumsum(steps) + tt .= (tt .- first(tt)) ./ (last(tt) - first(tt)) .* 0.999 .+ 0.0005 + else + error("unknown query pattern $(pattern)") + end + return tt +end + +# Pretty-print BenchmarkTools.Trial median and IQR (q3-q1) in human units +function fmt_trial(t::BenchmarkTools.Trial) + med = median(t).time # ns + q1 = quantile(t.times, 0.25) + q3 = quantile(t.times, 0.75) + iqr = q3 - q1 + return string(BenchmarkTools.prettytime(med), " (IQR ", BenchmarkTools.prettytime(iqr), ")") +end + +fmt_trial(::Nothing) = "—" + +# Result store: Dict{(algorithm, case, library) => Dict{params => Trial}} +# We'll just use nested dictionaries to keep it simple +const RESULTS = Dict{String, Any}() + +function record!(category::String, algo::String, lib::String, params::String, trial) + key = string(category, " | ", algo) + d = get!(RESULTS, key, Dict{Tuple{String, String}, Any}()) + d[(lib, params)] = trial + return nothing +end + +# Limit each individual benchmark; full sweep is large. +const BMK_SECONDS = 0.5 +const BMK_SAMPLES = 100 + +function bench(expr_setup::Function, args...; seconds = BMK_SECONDS, samples = BMK_SAMPLES) + b = @benchmarkable $(expr_setup)($(args)...) evals = 1 samples = samples seconds = seconds + return run(b) +end + +# ----------------------------------------------------------------------------- +# Library adapter functions +# +# Each adapter returns the interpolator for the given (u, t). +# We separate construction from evaluation so we can benchmark each. +# ----------------------------------------------------------------------------- + +# --- Linear -------------------------------------------------------------- +# Helpers: many third-party libraries want `Vector{Float64}` knots, not a `range`. +_vec(t) = collect(Float64, t) + +build_di_linear(u, t) = DI.LinearInterpolation(_vec(u), _vec(t)) +build_itp_linear_uniform(u, t) = ITP.linear_interpolation(t, u) # t may be a range +build_itp_linear_nonuniform(u, t) = ITP.interpolate((_vec(t),), _vec(u), ITP.Gridded(ITP.Linear())) +build_drx_linear(u, t) = DRX.Spline1D(_vec(t), _vec(u); k = 1, bc = "extrapolate") +build_bi_linear(u, t) = BI.LinearInterpolator(_vec(t), _vec(u), BI.WeakBoundaries()) +# FastInterpolations accepts both AbstractRange and Vector; passing the raw `t` so +# uniform-grid (range) builds keep their O(1) DirectSearch optimization. +build_fi_linear(u, t) = FI.linear_interp(t, _vec(u)) + +# --- Cubic spline (natural BC for DI, Interpolations) -------------------- +build_di_cubic(u, t) = DI.CubicSpline(_vec(u), _vec(t)) +build_itp_cubic_uniform(u, t) = ITP.cubic_spline_interpolation(t, u) +build_drx_cubic(u, t) = DRX.Spline1D(_vec(t), _vec(u); k = 3, bc = "extrapolate") +build_bi_cubic(u, t) = BI.CubicSplineInterpolator(_vec(t), _vec(u), BI.WeakBoundaries()) +# FastInterpolations cubic: default BC is `CubicFit()` (cubic polynomial fit at +# endpoints), not natural BC. Numerically different in the boundary cells from +# DI's natural cubic but matches in the interior — comparable for speed. +build_fi_cubic(u, t) = FI.cubic_interp(t, _vec(u)) + +# --- Quadratic spline ---------------------------------------------------- +build_di_quadratic(u, t) = DI.QuadraticSpline(_vec(u), _vec(t)) +build_drx_quadratic(u, t) = DRX.Spline1D(_vec(t), _vec(u); k = 2, bc = "extrapolate") +build_fi_quadratic(u, t) = FI.quadratic_interp(t, _vec(u)) + +# --- Akima --------------------------------------------------------------- +build_di_akima(u, t) = DI.AkimaInterpolation(_vec(u), _vec(t)) +build_fi_akima(u, t) = FI.akima_interp(t, _vec(u)) + +# --- PCHIP / monotone cubic Hermite ------------------------------------- +function build_di_cubic_hermite(u, t) + uv = _vec(u) + tv = _vec(t) + n = length(tv) + du = similar(uv) + @inbounds for i in 2:(n - 1) + du[i] = (uv[i + 1] - uv[i - 1]) / (tv[i + 1] - tv[i - 1]) + end + du[1] = (uv[2] - uv[1]) / (tv[2] - tv[1]) + du[n] = (uv[n] - uv[n - 1]) / (tv[n] - tv[n - 1]) + return DI.CubicHermiteSpline(du, uv, tv) +end +build_pchip(u, t) = PCHIP.Interpolator(_vec(t), _vec(u)) +# FastInterpolations PCHIP (Fritsch-Carlson monotone cubic Hermite). +build_fi_pchip(u, t) = FI.pchip_interp(t, _vec(u)) + +# --- Single-eval dispatch ----------------------------------------------- +# DI, Dierckx, BasicInterpolators all use `A(x)` +# Interpolations also uses `A(x)` +# PCHIP uses `A(x)` +single_eval(A, x) = A(x) + +# --- Batched eval -------------------------------------------------------- +# DI: `A(out, tt)` in-place; uses sorted-batch fast path when sorted. +batched_eval_di!(out, A, tt) = A(out, tt) +# Interpolations: broadcast (no native batched call) +batched_eval_itp!(out, A, tt) = (out .= A.(tt); out) +# Dierckx: has Spline1D batched evaluation (returns a new array). We'll call the +# in-place form if we can find it; otherwise wrap broadcast. +batched_eval_drx!(out, A::DRX.Spline1D, tt) = (out .= DRX.evaluate.(Ref(A), tt); out) +# BasicInterpolators: broadcast +batched_eval_bi!(out, A, tt) = (out .= A.(tt); out) +# PCHIP: broadcast +batched_eval_pchip!(out, A, tt) = (out .= A.(tt); out) +# FastInterpolations: `A(out, xq)` in-place (zero-alloc, with AutoSearch). +batched_eval_fi!(out, A, tt) = (A(out, tt); out) + +# ----------------------------------------------------------------------------- +# Verification: every library should agree on the same query to within tol +# ----------------------------------------------------------------------------- + +function verify_agreement(algo, builders, u, t, tt; tol = 1.0e-6, ref_name = first(keys(builders))) + ref_build = builders[ref_name] + ref = ref_build(u, t) + ref_vals = [single_eval(ref, x) for x in tt] + for (name, build) in builders + name == ref_name && continue + A = build(u, t) + vals = [single_eval(A, x) for x in tt] + diff = maximum(abs.(ref_vals .- vals)) + if diff > tol + @warn "Library $name disagrees with $ref_name on $algo by max diff $diff (tol=$tol)" + end + end + return nothing +end + +# ----------------------------------------------------------------------------- +# Run sweep +# ----------------------------------------------------------------------------- + +const SMOKE = get(ENV, "BENCH_SMOKE", "0") != "0" + +const N_VALUES = SMOKE ? [100, 1_000] : [100, 1_000, 10_000, 100_000] +const M_VALUES = SMOKE ? [10, 1_000] : [1, 10, 1_000, 100_000] +const KNOT_PATTERNS = [:uniform, :nonuniform] + +# Algorithm spec: +# builders :: Dict{library => build_fun(u, t)} +# batched! :: Dict{library => (out, A, tt) -> ...} +# supports :: Dict{library => set of knot_patterns it supports} +const ALGORITHMS = let + d = Dict{String, NamedTuple}() + d["Linear"] = ( + builders = Dict( + "DataInterpolations" => build_di_linear, + "Interpolations (uniform)" => build_itp_linear_uniform, + "Interpolations (gridded)" => build_itp_linear_nonuniform, + "Dierckx (k=1)" => build_drx_linear, + "BasicInterpolators" => build_bi_linear, + "FastInterpolations" => build_fi_linear, + ), + batched! = Dict( + "DataInterpolations" => batched_eval_di!, + "Interpolations (uniform)" => batched_eval_itp!, + "Interpolations (gridded)" => batched_eval_itp!, + "Dierckx (k=1)" => batched_eval_drx!, + "BasicInterpolators" => batched_eval_bi!, + "FastInterpolations" => batched_eval_fi!, + ), + supports = Dict( + "DataInterpolations" => [:uniform, :nonuniform], + "Interpolations (uniform)" => [:uniform], + "Interpolations (gridded)" => [:uniform, :nonuniform], + "Dierckx (k=1)" => [:uniform, :nonuniform], + "BasicInterpolators" => [:uniform, :nonuniform], + "FastInterpolations" => [:uniform, :nonuniform], + ), + ) + d["CubicSpline"] = ( + builders = Dict( + "DataInterpolations" => build_di_cubic, + "Interpolations (uniform)" => build_itp_cubic_uniform, + "Dierckx (k=3)" => build_drx_cubic, + "BasicInterpolators" => build_bi_cubic, + "FastInterpolations" => build_fi_cubic, + ), + batched! = Dict( + "DataInterpolations" => batched_eval_di!, + "Interpolations (uniform)" => batched_eval_itp!, + "Dierckx (k=3)" => batched_eval_drx!, + "BasicInterpolators" => batched_eval_bi!, + "FastInterpolations" => batched_eval_fi!, + ), + supports = Dict( + "DataInterpolations" => [:uniform, :nonuniform], + "Interpolations (uniform)" => [:uniform], + "Dierckx (k=3)" => [:uniform, :nonuniform], + "BasicInterpolators" => [:uniform, :nonuniform], + "FastInterpolations" => [:uniform, :nonuniform], + ), + ) + d["QuadraticSpline"] = ( + builders = Dict( + "DataInterpolations" => build_di_quadratic, + "Dierckx (k=2)" => build_drx_quadratic, + "FastInterpolations" => build_fi_quadratic, + ), + batched! = Dict( + "DataInterpolations" => batched_eval_di!, + "Dierckx (k=2)" => batched_eval_drx!, + "FastInterpolations" => batched_eval_fi!, + ), + supports = Dict( + "DataInterpolations" => [:uniform, :nonuniform], + "Dierckx (k=2)" => [:uniform, :nonuniform], + "FastInterpolations" => [:uniform, :nonuniform], + ), + ) + d["Akima"] = ( + builders = Dict( + "DataInterpolations" => build_di_akima, + "FastInterpolations" => build_fi_akima, + ), + batched! = Dict( + "DataInterpolations" => batched_eval_di!, + "FastInterpolations" => batched_eval_fi!, + ), + supports = Dict( + "DataInterpolations" => [:uniform, :nonuniform], + "FastInterpolations" => [:uniform, :nonuniform], + ), + ) + d["MonotoneCubic"] = ( + builders = Dict( + "DataInterpolations (CubicHermite)" => build_di_cubic_hermite, + "PCHIPInterpolation" => build_pchip, + "FastInterpolations (PCHIP)" => build_fi_pchip, + ), + batched! = Dict( + "DataInterpolations (CubicHermite)" => batched_eval_di!, + "PCHIPInterpolation" => batched_eval_pchip!, + "FastInterpolations (PCHIP)" => batched_eval_fi!, + ), + supports = Dict( + "DataInterpolations (CubicHermite)" => [:uniform, :nonuniform], + "PCHIPInterpolation" => [:uniform, :nonuniform], + "FastInterpolations (PCHIP)" => [:uniform, :nonuniform], + ), + ) + d +end + +# Quick agreement check (use small n) +function run_agreement_checks() + println("\n== Cross-library agreement check ==") + tt_check = collect(range(0.05, 0.95; length = 11)) + for knot in (:uniform, :nonuniform) + u, t = make_knots(50, knot) + for (algo, spec) in ALGORITHMS + # Filter builders that support this knot pattern + supported = Dict(k => v for (k, v) in spec.builders if knot in spec.supports[k]) + length(supported) < 2 && continue + # Different algorithms can disagree slightly (different BCs). We use + # a moderately loose tolerance only meant to catch outright bugs. + tol = algo == "Linear" ? 1.0e-10 : (algo in ("CubicSpline", "QuadraticSpline") ? 5.0e-2 : 1.0e-1) + try + verify_agreement(algo, supported, u, t, tt_check; tol = tol) + catch e + @warn "Agreement check failed: $algo $knot" exception = (e, catch_backtrace()) + end + end + end + return nothing +end + +# ----------------------------------------------------------------------------- +# Bench cases +# ----------------------------------------------------------------------------- + +function warmup() + # Force compilation of every builder + dispatch path so the actual benchmarks + # measure work, not first-time compile. + println("Warming up…") + for knot in KNOT_PATTERNS + u, t = make_knots(100, knot) + tt = make_queries(8, :sorted) + out = similar(tt) + for (_algo, spec) in ALGORITHMS + for (lib, build) in spec.builders + knot in spec.supports[lib] || continue + A = try + build(u, t) + catch + continue + end + single_eval(A, 0.5) + try + spec.batched![lib](out, A, tt) + catch + end + end + end + end + return nothing +end + +function run_construction() + println("\n== Construction time ==") + for (algo, spec) in ALGORITHMS + for n in N_VALUES + for knot in KNOT_PATTERNS + u, t = make_knots(n, knot) + for (lib, build) in spec.builders + knot in spec.supports[lib] || continue + trial = try + bench(build, u, t) + catch e + @warn "construction failed: $algo $lib n=$n knot=$knot" exception = e + nothing + end + record!( + "construction", algo, lib, + string("n=", n, ",", knot), + trial, + ) + println( + @sprintf( + " %-25s | %-30s | n=%-7d | %s | %s", + algo, lib, n, String(knot), fmt_trial(trial) + ) + ) + end + end + end + end + return nothing +end + +function run_single_query() + println("\n== Single-query latency ==") + x_query = 0.42718 + for (algo, spec) in ALGORITHMS + for n in N_VALUES + for knot in KNOT_PATTERNS + u, t = make_knots(n, knot) + for (lib, build) in spec.builders + knot in spec.supports[lib] || continue + A = try + build(u, t) + catch e + @warn "skipping single-query (build failed): $algo $lib n=$n" exception = e + continue + end + trial = try + bench(single_eval, A, x_query) + catch e + @warn "single-query failed: $algo $lib" exception = e + nothing + end + record!( + "single-query", algo, lib, + string("n=", n, ",", knot), + trial, + ) + println( + @sprintf( + " %-25s | %-30s | n=%-7d | %s | %s", + algo, lib, n, String(knot), fmt_trial(trial) + ) + ) + end + end + end + end + return nothing +end + +# For batched/random/chained, we hold n at a single representative knot pattern +# (uniform) — exploring the cross-product of n × knot-pattern × m × query-pattern +# would explode the runtime. The batched-call story is the same on non-uniform +# (DI's sorted fast-path still kicks in). + +function run_batched(query_pattern::Symbol, m_values = M_VALUES) + label = query_pattern === :sorted ? "Sorted batch" : + query_pattern === :random ? "Random batch" : + "Chained ODE-style" + println("\n== $label ==") + knot = :uniform + for (algo, spec) in ALGORITHMS + for n in N_VALUES + u, t = make_knots(n, knot) + for m in m_values + tt = make_queries(m, query_pattern) + for (lib, build) in spec.builders + knot in spec.supports[lib] || continue + A = try + build(u, t) + catch e + continue + end + out = similar(tt) + batched! = spec.batched![lib] + if query_pattern === :chained + # Sequential single-eval loop is what ODE solvers do. + # Don't use the batched interface for this case. + f = (A, tt) -> begin + s = 0.0 + for x in tt + s += single_eval(A, x) + end + s + end + trial = try + bench(f, A, tt) + catch e + @warn "chained eval failed: $algo $lib" exception = e + nothing + end + else + trial = try + bench(batched!, out, A, tt) + catch e + @warn "batched eval failed: $algo $lib" exception = e + nothing + end + end + record!( + string(label), algo, lib, + string("n=", n, ",m=", m), + trial, + ) + println( + @sprintf( + " %-25s | %-30s | n=%-7d | m=%-7d | %s", + algo, lib, n, m, fmt_trial(trial) + ) + ) + end + end + end + end + return nothing +end + +# ----------------------------------------------------------------------------- +# Markdown output +# ----------------------------------------------------------------------------- + +function library_order(libs) + # Stable order: DI first, then others alphabetically + di = filter(l -> startswith(l, "DataInterpolations"), libs) + others = sort(setdiff(libs, di)) + return vcat(sort(di), others) +end + +function unique_params(d) + return sort!(unique([p for ((_, p), _) in d])) +end + +function unique_libs(d) + return sort!(unique([l for ((l, _), _) in d])) +end + +function write_table(io::IO, key::AbstractString, d::Dict) + libs = library_order(unique_libs(d)) + params = unique_params(d) + print(io, "\n### ", key, "\n\n") + # Header + print(io, "| Library | ") + for p in params + print(io, p, " | ") + end + print(io, "\n") + print(io, "|---|") + for _ in params + print(io, "---|") + end + print(io, "\n") + for lib in libs + print(io, "| ", lib, " | ") + for p in params + trial = get(d, (lib, p), nothing) + print(io, fmt_trial(trial), " | ") + end + print(io, "\n") + end + return nothing +end + +function write_report(path::String; total_seconds = 0.0) + open(path, "w") do io + println(io, "# Cross-library 1D interpolation benchmark") + println(io) + println(io, "## Setup") + println(io) + # Host/Julia info + println(io, "```") + io_buf = IOBuffer() + versioninfo(io_buf) + print(io, String(take!(io_buf))) + println(io, "```") + println(io) + println(io, "Bench harness: `BenchmarkTools.@benchmark` with `evals=1`, max samples=$(BMK_SAMPLES), max seconds=$(BMK_SECONDS).") + println(io) + commit = read(`git -C $REPO_ROOT rev-parse HEAD`, String) |> strip + println(io, "Commit: `$commit`") + println(io) + println(io, "Library versions:") + println(io, "```") + deps = Pkg.project().dependencies + all_info = Pkg.dependencies() + for pkg in ("DataInterpolations", "Interpolations", "Dierckx", "BasicInterpolators", "PCHIPInterpolation", "FastInterpolations", "BenchmarkTools") + if haskey(deps, pkg) + v = all_info[deps[pkg]].version + println(io, " ", pkg, " ", v) + end + end + println(io, "```") + println(io) + println(io, "Total bench time: ", round(total_seconds; digits = 1), " s") + println(io) + + # Section 1: construction + println(io, "## Construction time") + println(io) + println(io, "Rows = library, columns = (n, knot pattern). Values = median wall time (IQR).") + for key in sort!(collect(keys(RESULTS))) + startswith(key, "construction") || continue + algo = split(key, " | ")[2] + write_table(io, algo, RESULTS[key]) + end + + # Section 2: single-query latency + println(io, "\n## Single-query latency") + println(io) + println(io, "Cold single evaluation `A(x_query)`. Rows = library, columns = (n, knot pattern).") + for key in sort!(collect(keys(RESULTS))) + startswith(key, "single-query") || continue + algo = split(key, " | ")[2] + write_table(io, algo, RESULTS[key]) + end + + # Section 3: sorted batch + println(io, "\n## Sorted batch") + println(io) + println(io, "`A(out, tt)` where `tt` is sorted random points in domain. (knot pattern = uniform)") + for key in sort!(collect(keys(RESULTS))) + startswith(key, "Sorted batch") || continue + algo = split(key, " | ")[2] + write_table(io, algo, RESULTS[key]) + end + + # Section 4: random batch + println(io, "\n## Random batch") + println(io) + println(io, "`A(out, tt)` where `tt` is unsorted. (knot pattern = uniform)") + for key in sort!(collect(keys(RESULTS))) + startswith(key, "Random batch") || continue + algo = split(key, " | ")[2] + write_table(io, algo, RESULTS[key]) + end + + # Section 5: chained + println(io, "\n## Chained ODE-style") + println(io) + println(io, "Sequential `for x in tt; A(x); end` over a monotone sequence. (knot pattern = uniform)") + for key in sort!(collect(keys(RESULTS))) + startswith(key, "Chained ODE-style") || continue + algo = split(key, " | ")[2] + write_table(io, algo, RESULTS[key]) + end + + println(io, "\n## Reproducer") + println(io) + println(io, "Bench script: `bench/cross_library_comparison.jl`") + println(io) + println(io, "Bench Project.toml: `bench/Project.toml` (devs DI from `..`).") + println(io) + println( + io, + """ + To rerun: + ```bash + cd /home/crackauc/sandbox/tmp_20260515_091703_4914/DataInterpolations.jl + git checkout fff-strategy-batched-evals + julia +1.11 --project=bench bench/cross_library_comparison.jl + ``` + """, + ) + end + return nothing +end + +# ----------------------------------------------------------------------------- +# Top-level +# ----------------------------------------------------------------------------- + +function main() + println("Starting cross-library benchmark sweep…") + println("Bench dir: ", BENCH_DIR) + + t_start = time() + + run_agreement_checks() + warmup() + + run_construction() + run_single_query() + run_batched(:sorted) + run_batched(:random) + # Chained is most interesting at moderate m; cap to avoid 100k single-evals on Dierckx etc. + run_batched(:chained, [1_000]) + + t_end = time() + total = t_end - t_start + println("\nTotal bench time: ", round(total; digits = 1), " s") + + report_path = joinpath(BENCH_DIR, "cross_library_comparison.md") + write_report(report_path; total_seconds = total) + println("Wrote report to ", report_path) + return nothing +end + +main() diff --git a/bench/cross_library_comparison.md b/bench/cross_library_comparison.md new file mode 100644 index 00000000..0d98f8d5 --- /dev/null +++ b/bench/cross_library_comparison.md @@ -0,0 +1,391 @@ +# Cross-library 1D interpolation benchmark + +## Setup + +``` +Julia Version 1.11.9 +Commit 53a02c0720c (2026-02-06 00:27 UTC) +Build Info: + Official https://julialang.org/ release +Platform Info: + OS: Linux (x86_64-linux-gnu) + CPU: 128 × AMD EPYC 7502 32-Core Processor + WORD_SIZE: 64 + LLVM: libLLVM-16.0.6 (ORCJIT, znver2) +Threads: 1 default, 0 interactive, 1 GC (on 128 virtual cores) +``` + +Bench harness: `BenchmarkTools.@benchmark` with `evals=1`, max samples=100, max seconds=0.5. + +Commit: `3ce1080447eb29b4f51ef363bbad4129e00b176a` + +Library versions: +``` + DataInterpolations 8.10.0 + Interpolations 0.16.2 + Dierckx 0.5.4 + BasicInterpolators 0.7.1 + PCHIPInterpolation 0.2.1 + FastInterpolations 0.4.11 + BenchmarkTools 1.8.0 +``` + +Total bench time: 261.4 s + +## Construction time + +Rows = library, columns = (n, knot pattern). Values = median wall time (IQR). + +### Akima + +| Library | n=100,nonuniform | n=100,uniform | n=1000,nonuniform | n=1000,uniform | n=10000,nonuniform | n=10000,uniform | n=100000,nonuniform | n=100000,uniform | +|---|---|---|---|---|---|---|---|---| +| DataInterpolations | 1.895 μs (IQR 267.500 ns) | 1.935 μs (IQR 255.000 ns) | 12.340 μs (IQR 552.500 ns) | 13.625 μs (IQR 31.997 μs) | 96.769 μs (IQR 892.500 ns) | 105.959 μs (IQR 160.974 μs) | 966.061 μs (IQR 2.906 ms) | 1.065 ms (IQR 2.945 ms) | +| FastInterpolations | 1.320 μs (IQR 422.500 ns) | 990.000 ns (IQR 310.000 ns) | 10.680 μs (IQR 39.707 μs) | 29.840 μs (IQR 7.692 μs) | 69.779 μs (IQR 424.373 μs) | 173.559 μs (IQR 78.677 μs) | 680.669 μs (IQR 35.064 μs) | 673.078 μs (IQR 25.835 μs) | + +### CubicSpline + +| Library | n=100,nonuniform | n=100,uniform | n=1000,nonuniform | n=1000,uniform | n=10000,nonuniform | n=10000,uniform | n=100000,nonuniform | n=100000,uniform | +|---|---|---|---|---|---|---|---|---| +| DataInterpolations | 6.930 μs (IQR 3.364 μs) | 7.170 μs (IQR 3.487 μs) | 55.215 μs (IQR 1.649 μs) | 57.094 μs (IQR 44.369 μs) | 482.461 μs (IQR 3.031 μs) | 495.015 μs (IQR 981.821 μs) | 5.001 ms (IQR 2.172 ms) | 5.070 ms (IQR 893.747 μs) | +| BasicInterpolators | 4.340 μs (IQR 530.000 ns) | 4.385 μs (IQR 722.500 ns) | 37.880 μs (IQR 1.177 μs) | 39.015 μs (IQR 2.552 μs) | 319.082 μs (IQR 3.510 μs) | 1.021 ms (IQR 696.255 μs) | 3.258 ms (IQR 864.505 μs) | 3.324 ms (IQR 835.567 μs) | +| Dierckx (k=3) | 23.020 μs (IQR 1.680 μs) | 17.440 μs (IQR 495.000 ns) | 209.343 μs (IQR 50.807 μs) | 211.203 μs (IQR 27.988 μs) | 1.582 ms (IQR 93.580 μs) | 1.723 ms (IQR 687.644 μs) | 18.212 ms (IQR 5.298 ms) | 20.038 ms (IQR 5.815 ms) | +| FastInterpolations | 1.760 μs (IQR 285.000 ns) | 1.400 μs (IQR 235.000 ns) | 14.395 μs (IQR 495.000 ns) | 11.440 μs (IQR 515.000 ns) | 128.024 μs (IQR 1.032 μs) | 101.614 μs (IQR 1.115 μs) | 1.280 ms (IQR 242.824 μs) | 1.010 ms (IQR 29.291 μs) | +| Interpolations (uniform) | — | 15.339 μs (IQR 1.022 μs) | — | 130.319 μs (IQR 9.140 μs) | — | 1.470 ms (IQR 311.781 μs) | — | 8.529 ms (IQR 384.982 μs) | + +### Linear + +| Library | n=100,nonuniform | n=100,uniform | n=1000,nonuniform | n=1000,uniform | n=10000,nonuniform | n=10000,uniform | n=100000,nonuniform | n=100000,uniform | +|---|---|---|---|---|---|---|---|---| +| DataInterpolations | 905.000 ns (IQR 250.000 ns) | 1.030 μs (IQR 162.500 ns) | 7.020 μs (IQR 412.500 ns) | 7.835 μs (IQR 300.250 ns) | 60.794 μs (IQR 1.147 μs) | 69.284 μs (IQR 679.750 ns) | 602.635 μs (IQR 3.941 μs) | 684.534 μs (IQR 5.035 μs) | +| BasicInterpolators | 1.170 μs (IQR 350.000 ns) | 1.320 μs (IQR 262.500 ns) | 8.145 μs (IQR 600.000 ns) | 9.035 μs (IQR 1.195 μs) | 61.674 μs (IQR 2.082 μs) | 70.815 μs (IQR 2.756 μs) | 610.510 μs (IQR 9.142 μs) | 696.798 μs (IQR 5.480 μs) | +| Dierckx (k=1) | 9.309 μs (IQR 580.000 ns) | 12.405 μs (IQR 2.297 μs) | 79.575 μs (IQR 2.900 μs) | 106.234 μs (IQR 10.303 μs) | 764.703 μs (IQR 21.014 μs) | 764.083 μs (IQR 13.045 μs) | 7.486 ms (IQR 1.087 ms) | 7.568 ms (IQR 922.522 μs) | +| FastInterpolations | 1.015 μs (IQR 402.500 ns) | 330.000 ns (IQR 212.500 ns) | 7.410 μs (IQR 2.632 μs) | 2.100 μs (IQR 347.500 ns) | 45.944 μs (IQR 3.295 μs) | 15.395 μs (IQR 1.893 μs) | 429.641 μs (IQR 24.649 μs) | 137.569 μs (IQR 2.454 μs) | +| Interpolations (gridded) | 875.000 ns (IQR 245.000 ns) | 970.000 ns (IQR 245.000 ns) | 7.385 μs (IQR 477.500 ns) | 9.085 μs (IQR 9.578 μs) | 60.604 μs (IQR 1.885 μs) | 69.745 μs (IQR 1.954 μs) | 596.539 μs (IQR 5.037 μs) | 682.029 μs (IQR 5.189 μs) | +| Interpolations (uniform) | — | 165.000 ns (IQR 165.000 ns) | — | 3.040 μs (IQR 2.235 μs) | — | 23.445 μs (IQR 5.530 μs) | — | 68.814 μs (IQR 2.921 μs) | + +### MonotoneCubic + +| Library | n=100,nonuniform | n=100,uniform | n=1000,nonuniform | n=1000,uniform | n=10000,nonuniform | n=10000,uniform | n=100000,nonuniform | n=100000,uniform | +|---|---|---|---|---|---|---|---|---| +| DataInterpolations (CubicHermite) | 1.210 μs (IQR 245.000 ns) | 1.260 μs (IQR 312.500 ns) | 8.205 μs (IQR 472.500 ns) | 8.970 μs (IQR 552.500 ns) | 66.989 μs (IQR 899.500 ns) | 155.944 μs (IQR 2.865 μs) | 667.664 μs (IQR 11.177 μs) | 760.448 μs (IQR 1.507 ms) | +| FastInterpolations (PCHIP) | 1.980 μs (IQR 395.000 ns) | 1.245 μs (IQR 262.500 ns) | 15.105 μs (IQR 837.500 ns) | 10.855 μs (IQR 550.000 ns) | 116.039 μs (IQR 425.011 μs) | 93.414 μs (IQR 1.335 μs) | 1.148 ms (IQR 64.792 μs) | 925.542 μs (IQR 1.909 ms) | +| PCHIPInterpolation | 1.590 μs (IQR 302.500 ns) | 1.685 μs (IQR 352.500 ns) | 13.275 μs (IQR 790.000 ns) | 14.570 μs (IQR 890.000 ns) | 150.029 μs (IQR 157.078 μs) | 118.489 μs (IQR 12.057 μs) | 1.100 ms (IQR 1.930 ms) | 1.195 ms (IQR 2.849 ms) | + +### QuadraticSpline + +| Library | n=100,nonuniform | n=100,uniform | n=1000,nonuniform | n=1000,uniform | n=10000,nonuniform | n=10000,uniform | n=100000,nonuniform | n=100000,uniform | +|---|---|---|---|---|---|---|---|---| +| DataInterpolations | 9.420 μs (IQR 661.750 ns) | 9.455 μs (IQR 750.000 ns) | 84.669 μs (IQR 997.750 ns) | 96.034 μs (IQR 61.307 μs) | 800.123 μs (IQR 10.530 μs) | 823.987 μs (IQR 668.939 μs) | 8.070 ms (IQR 3.777 ms) | 8.209 ms (IQR 5.391 ms) | +| Dierckx (k=2) | 15.485 μs (IQR 680.000 ns) | 21.090 μs (IQR 3.221 μs) | 136.554 μs (IQR 1.056 μs) | 143.228 μs (IQR 16.675 μs) | 1.353 ms (IQR 164.550 μs) | 1.356 ms (IQR 155.038 μs) | 13.384 ms (IQR 4.032 ms) | 14.377 ms (IQR 3.926 ms) | +| FastInterpolations | 1.430 μs (IQR 420.000 ns) | 715.000 ns (IQR 342.500 ns) | 10.325 μs (IQR 740.000 ns) | 5.210 μs (IQR 485.750 ns) | 70.434 μs (IQR 420.084 μs) | 38.454 μs (IQR 2.993 μs) | 702.374 μs (IQR 38.012 μs) | 372.171 μs (IQR 33.597 μs) | + +## Single-query latency + +Cold single evaluation `A(x_query)`. Rows = library, columns = (n, knot pattern). + +### Akima + +| Library | n=100,nonuniform | n=100,uniform | n=1000,nonuniform | n=1000,uniform | n=10000,nonuniform | n=10000,uniform | n=100000,nonuniform | n=100000,uniform | +|---|---|---|---|---|---|---|---|---| +| DataInterpolations | 70.000 ns (IQR 0.000 ns) | 70.000 ns (IQR 0.000 ns) | 70.000 ns (IQR 0.000 ns) | 80.000 ns (IQR 10.000 ns) | 90.000 ns (IQR 0.000 ns) | 90.000 ns (IQR 0.000 ns) | 100.000 ns (IQR 2.500 ns) | 100.000 ns (IQR 10.000 ns) | +| FastInterpolations | 50.000 ns (IQR 10.000 ns) | 50.000 ns (IQR 0.000 ns) | 50.000 ns (IQR 10.000 ns) | 50.000 ns (IQR 0.000 ns) | 60.000 ns (IQR 10.000 ns) | 50.000 ns (IQR 0.000 ns) | 60.000 ns (IQR 10.000 ns) | 50.000 ns (IQR 0.000 ns) | + +### CubicSpline + +| Library | n=100,nonuniform | n=100,uniform | n=1000,nonuniform | n=1000,uniform | n=10000,nonuniform | n=10000,uniform | n=100000,nonuniform | n=100000,uniform | +|---|---|---|---|---|---|---|---|---| +| DataInterpolations | 80.000 ns (IQR 0.000 ns) | 80.000 ns (IQR 10.000 ns) | 100.000 ns (IQR 0.000 ns) | 90.000 ns (IQR 0.000 ns) | 90.000 ns (IQR 10.000 ns) | 100.000 ns (IQR 0.000 ns) | 110.000 ns (IQR 0.000 ns) | 110.000 ns (IQR 10.000 ns) | +| BasicInterpolators | 50.000 ns (IQR 10.000 ns) | 50.000 ns (IQR 10.000 ns) | 60.000 ns (IQR 2.500 ns) | 60.000 ns (IQR 0.000 ns) | 60.000 ns (IQR 0.000 ns) | 60.000 ns (IQR 0.000 ns) | 70.000 ns (IQR 10.000 ns) | 90.000 ns (IQR 10.000 ns) | +| Dierckx (k=3) | 150.000 ns (IQR 0.000 ns) | 150.000 ns (IQR 10.000 ns) | 380.000 ns (IQR 10.000 ns) | 400.000 ns (IQR 10.000 ns) | 2.789 μs (IQR 10.000 ns) | 2.790 μs (IQR 10.000 ns) | 26.760 μs (IQR 51.000 ns) | 26.680 μs (IQR 40.000 ns) | +| FastInterpolations | 50.000 ns (IQR 10.000 ns) | 50.000 ns (IQR 10.000 ns) | 50.000 ns (IQR 0.000 ns) | 60.000 ns (IQR 10.000 ns) | 60.000 ns (IQR 0.000 ns) | 50.000 ns (IQR 10.000 ns) | 60.000 ns (IQR 0.000 ns) | 50.000 ns (IQR 10.000 ns) | +| Interpolations (uniform) | — | 80.000 ns (IQR 10.000 ns) | — | 80.000 ns (IQR 10.000 ns) | — | 80.000 ns (IQR 0.000 ns) | — | 80.000 ns (IQR 10.000 ns) | + +### Linear + +| Library | n=100,nonuniform | n=100,uniform | n=1000,nonuniform | n=1000,uniform | n=10000,nonuniform | n=10000,uniform | n=100000,nonuniform | n=100000,uniform | +|---|---|---|---|---|---|---|---|---| +| DataInterpolations | 70.000 ns (IQR 10.000 ns) | 70.000 ns (IQR 10.000 ns) | 70.000 ns (IQR 0.000 ns) | 90.000 ns (IQR 0.000 ns) | 90.000 ns (IQR 10.000 ns) | 100.000 ns (IQR 10.000 ns) | 110.000 ns (IQR 0.000 ns) | 100.000 ns (IQR 10.000 ns) | +| BasicInterpolators | 60.000 ns (IQR 10.000 ns) | 50.000 ns (IQR 0.000 ns) | 60.000 ns (IQR 10.000 ns) | 60.000 ns (IQR 0.000 ns) | 70.000 ns (IQR 10.000 ns) | 70.000 ns (IQR 10.000 ns) | 70.000 ns (IQR 0.000 ns) | 70.000 ns (IQR 10.000 ns) | +| Dierckx (k=1) | 120.000 ns (IQR 10.000 ns) | 120.000 ns (IQR 10.000 ns) | 350.000 ns (IQR 0.000 ns) | 370.000 ns (IQR 10.000 ns) | 2.750 μs (IQR 10.000 ns) | 2.750 μs (IQR 10.000 ns) | 26.720 μs (IQR 40.250 ns) | 26.650 μs (IQR 42.500 ns) | +| FastInterpolations | 50.000 ns (IQR 0.000 ns) | 40.000 ns (IQR 10.000 ns) | 50.000 ns (IQR 0.000 ns) | 40.000 ns (IQR 10.000 ns) | 60.000 ns (IQR 10.000 ns) | 40.000 ns (IQR 10.000 ns) | 60.000 ns (IQR 0.000 ns) | 40.000 ns (IQR 10.000 ns) | +| Interpolations (gridded) | 60.000 ns (IQR 10.000 ns) | 60.000 ns (IQR 0.000 ns) | 70.000 ns (IQR 10.000 ns) | 70.000 ns (IQR 0.000 ns) | 90.000 ns (IQR 0.000 ns) | 70.000 ns (IQR 0.000 ns) | 100.000 ns (IQR 0.000 ns) | 80.000 ns (IQR 10.000 ns) | +| Interpolations (uniform) | — | 60.000 ns (IQR 0.000 ns) | — | 60.000 ns (IQR 0.000 ns) | — | 60.000 ns (IQR 0.000 ns) | — | 60.000 ns (IQR 0.000 ns) | + +### MonotoneCubic + +| Library | n=100,nonuniform | n=100,uniform | n=1000,nonuniform | n=1000,uniform | n=10000,nonuniform | n=10000,uniform | n=100000,nonuniform | n=100000,uniform | +|---|---|---|---|---|---|---|---|---| +| DataInterpolations (CubicHermite) | 90.000 ns (IQR 10.000 ns) | 90.000 ns (IQR 0.000 ns) | 90.000 ns (IQR 10.000 ns) | 100.000 ns (IQR 0.000 ns) | 100.000 ns (IQR 10.000 ns) | 100.000 ns (IQR 10.000 ns) | 110.000 ns (IQR 10.000 ns) | 110.000 ns (IQR 0.000 ns) | +| FastInterpolations (PCHIP) | 50.000 ns (IQR 10.000 ns) | 50.000 ns (IQR 0.000 ns) | 60.000 ns (IQR 10.000 ns) | 50.000 ns (IQR 10.000 ns) | 60.000 ns (IQR 10.000 ns) | 50.000 ns (IQR 0.000 ns) | 60.000 ns (IQR 10.000 ns) | 50.000 ns (IQR 0.000 ns) | +| PCHIPInterpolation | 60.000 ns (IQR 10.000 ns) | 70.000 ns (IQR 10.000 ns) | 80.000 ns (IQR 10.000 ns) | 80.000 ns (IQR 0.000 ns) | 90.000 ns (IQR 0.000 ns) | 90.000 ns (IQR 0.000 ns) | 110.000 ns (IQR 10.000 ns) | 100.000 ns (IQR 0.000 ns) | + +### QuadraticSpline + +| Library | n=100,nonuniform | n=100,uniform | n=1000,nonuniform | n=1000,uniform | n=10000,nonuniform | n=10000,uniform | n=100000,nonuniform | n=100000,uniform | +|---|---|---|---|---|---|---|---|---| +| DataInterpolations | 160.000 ns (IQR 10.000 ns) | 160.000 ns (IQR 10.000 ns) | 250.000 ns (IQR 0.000 ns) | 260.000 ns (IQR 0.000 ns) | 1.240 μs (IQR 10.000 ns) | 1.240 μs (IQR 10.000 ns) | 14.540 μs (IQR 100.000 ns) | 14.464 μs (IQR 59.000 ns) | +| Dierckx (k=2) | 130.000 ns (IQR 10.000 ns) | 140.000 ns (IQR 10.000 ns) | 370.000 ns (IQR 10.000 ns) | 390.000 ns (IQR 0.000 ns) | 2.780 μs (IQR 10.000 ns) | 2.780 μs (IQR 0.000 ns) | 26.719 μs (IQR 59.000 ns) | 26.630 μs (IQR 43.250 ns) | +| FastInterpolations | 50.000 ns (IQR 10.000 ns) | 40.000 ns (IQR 10.000 ns) | 50.000 ns (IQR 2.500 ns) | 40.000 ns (IQR 10.000 ns) | 60.000 ns (IQR 10.000 ns) | 40.000 ns (IQR 10.000 ns) | 60.000 ns (IQR 2.500 ns) | 40.000 ns (IQR 10.000 ns) | + +## Sorted batch + +`A(out, tt)` where `tt` is sorted random points in domain. (knot pattern = uniform) + +### Akima + +| Library | n=100,m=1 | n=100,m=10 | n=100,m=1000 | n=100,m=100000 | n=1000,m=1 | n=1000,m=10 | n=1000,m=1000 | n=1000,m=100000 | n=10000,m=1 | n=10000,m=10 | n=10000,m=1000 | n=10000,m=100000 | n=100000,m=1 | n=100000,m=10 | n=100000,m=1000 | n=100000,m=100000 | +|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---| +| DataInterpolations | 120.000 ns (IQR 10.000 ns) | 170.000 ns (IQR 0.000 ns) | 4.010 μs (IQR 0.000 ns) | 364.582 μs (IQR 1.153 μs) | 120.000 ns (IQR 10.000 ns) | 360.000 ns (IQR 0.000 ns) | 5.350 μs (IQR 20.000 ns) | 387.027 μs (IQR 2.195 μs) | 130.000 ns (IQR 10.000 ns) | 430.000 ns (IQR 10.000 ns) | 13.874 μs (IQR 683.250 ns) | 521.981 μs (IQR 3.110 μs) | 140.000 ns (IQR 10.000 ns) | 540.000 ns (IQR 10.000 ns) | 141.629 μs (IQR 2.840 μs) | 1.442 ms (IQR 10.085 μs) | +| FastInterpolations | 60.000 ns (IQR 10.000 ns) | 170.000 ns (IQR 10.000 ns) | 11.190 μs (IQR 41.000 ns) | 1.074 ms (IQR 22.070 μs) | 60.000 ns (IQR 10.000 ns) | 170.000 ns (IQR 10.000 ns) | 11.170 μs (IQR 11.000 ns) | 1.070 ms (IQR 11.035 μs) | 60.000 ns (IQR 10.000 ns) | 170.000 ns (IQR 10.000 ns) | 11.190 μs (IQR 50.000 ns) | 1.072 ms (IQR 7.127 μs) | 60.000 ns (IQR 10.000 ns) | 170.000 ns (IQR 10.000 ns) | 11.884 μs (IQR 411.500 ns) | 1.075 ms (IQR 6.483 μs) | + +### CubicSpline + +| Library | n=100,m=1 | n=100,m=10 | n=100,m=1000 | n=100,m=100000 | n=1000,m=1 | n=1000,m=10 | n=1000,m=1000 | n=1000,m=100000 | n=10000,m=1 | n=10000,m=10 | n=10000,m=1000 | n=10000,m=100000 | n=100000,m=1 | n=100000,m=10 | n=100000,m=1000 | n=100000,m=100000 | +|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---| +| DataInterpolations | 130.000 ns (IQR 10.000 ns) | 210.000 ns (IQR 0.000 ns) | 10.380 μs (IQR 10.000 ns) | 1.002 ms (IQR 6.160 μs) | 130.000 ns (IQR 10.000 ns) | 410.000 ns (IQR 10.000 ns) | 10.220 μs (IQR 10.000 ns) | 1.007 ms (IQR 5.220 μs) | 140.000 ns (IQR 10.000 ns) | 495.000 ns (IQR 10.000 ns) | 15.790 μs (IQR 144.750 ns) | 1.050 ms (IQR 5.147 μs) | 150.000 ns (IQR 10.000 ns) | 570.000 ns (IQR 10.000 ns) | 153.254 μs (IQR 2.850 μs) | 1.787 ms (IQR 11.688 μs) | +| BasicInterpolators | 60.000 ns (IQR 0.000 ns) | 200.000 ns (IQR 10.000 ns) | 16.920 μs (IQR 371.000 ns) | 1.402 ms (IQR 29.020 μs) | 70.000 ns (IQR 0.000 ns) | 250.000 ns (IQR 10.000 ns) | 43.025 μs (IQR 2.495 μs) | 2.069 ms (IQR 40.005 μs) | 70.000 ns (IQR 10.000 ns) | 310.000 ns (IQR 0.000 ns) | 79.704 μs (IQR 4.163 μs) | 4.140 ms (IQR 15.742 μs) | 80.000 ns (IQR 0.000 ns) | 370.000 ns (IQR 0.000 ns) | 126.109 μs (IQR 1.827 μs) | 7.865 ms (IQR 77.216 μs) | +| Dierckx (k=3) | 190.000 ns (IQR 10.000 ns) | 1.220 μs (IQR 0.000 ns) | 121.679 μs (IQR 62.500 ns) | 11.733 ms (IQR 34.135 μs) | 700.000 ns (IQR 0.000 ns) | 3.620 μs (IQR 0.000 ns) | 408.256 μs (IQR 72.500 ns) | 40.985 ms (IQR 72.560 μs) | 5.690 μs (IQR 30.000 ns) | 26.920 μs (IQR 39.250 ns) | 3.202 ms (IQR 4.645 μs) | 322.833 ms (IQR 90.359 μs) | 55.620 μs (IQR 110.000 ns) | 260.202 μs (IQR 173.500 ns) | 31.267 ms (IQR 222.842 μs) | 3.164 s (IQR 0.000 ns) | +| FastInterpolations | 60.000 ns (IQR 2.500 ns) | 150.000 ns (IQR 0.000 ns) | 8.860 μs (IQR 0.000 ns) | 843.207 μs (IQR 12.260 μs) | 60.000 ns (IQR 0.000 ns) | 140.000 ns (IQR 10.000 ns) | 8.980 μs (IQR 1.000 ns) | 846.997 μs (IQR 6.682 μs) | 60.000 ns (IQR 2.500 ns) | 150.000 ns (IQR 0.000 ns) | 8.950 μs (IQR 42.500 ns) | 846.477 μs (IQR 4.218 μs) | 70.000 ns (IQR 20.000 ns) | 150.000 ns (IQR 10.000 ns) | 9.525 μs (IQR 69.250 ns) | 849.188 μs (IQR 10.038 μs) | +| Interpolations (uniform) | 90.000 ns (IQR 2.500 ns) | 270.000 ns (IQR 0.000 ns) | 20.460 μs (IQR 10.000 ns) | 2.046 ms (IQR 8.038 μs) | 90.000 ns (IQR 10.000 ns) | 270.000 ns (IQR 0.000 ns) | 20.460 μs (IQR 1.000 ns) | 2.047 ms (IQR 9.498 μs) | 90.000 ns (IQR 10.000 ns) | 270.000 ns (IQR 0.000 ns) | 20.440 μs (IQR 19.000 ns) | 2.047 ms (IQR 7.382 μs) | 90.000 ns (IQR 0.000 ns) | 270.000 ns (IQR 0.000 ns) | 20.440 μs (IQR 31.000 ns) | 2.049 ms (IQR 10.880 μs) | + +### Linear + +| Library | n=100,m=1 | n=100,m=10 | n=100,m=1000 | n=100,m=100000 | n=1000,m=1 | n=1000,m=10 | n=1000,m=1000 | n=1000,m=100000 | n=10000,m=1 | n=10000,m=10 | n=10000,m=1000 | n=10000,m=100000 | n=100000,m=1 | n=100000,m=10 | n=100000,m=1000 | n=100000,m=100000 | +|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---| +| DataInterpolations | 120.000 ns (IQR 10.000 ns) | 180.000 ns (IQR 10.000 ns) | 6.490 μs (IQR 10.000 ns) | 530.655 μs (IQR 2.315 μs) | 130.000 ns (IQR 0.000 ns) | 380.000 ns (IQR 0.000 ns) | 7.010 μs (IQR 10.000 ns) | 541.605 μs (IQR 889.250 ns) | 140.000 ns (IQR 0.000 ns) | 470.000 ns (IQR 10.000 ns) | 15.860 μs (IQR 50.250 ns) | 685.063 μs (IQR 7.840 μs) | 150.000 ns (IQR 10.000 ns) | 550.000 ns (IQR 0.000 ns) | 143.244 μs (IQR 4.605 μs) | 1.653 ms (IQR 13.035 μs) | +| BasicInterpolators | 60.000 ns (IQR 10.000 ns) | 190.000 ns (IQR 0.000 ns) | 16.470 μs (IQR 322.500 ns) | 1.399 ms (IQR 16.903 μs) | 70.000 ns (IQR 0.000 ns) | 240.000 ns (IQR 0.000 ns) | 42.279 μs (IQR 3.697 μs) | 2.052 ms (IQR 12.033 μs) | 70.000 ns (IQR 0.000 ns) | 310.000 ns (IQR 10.000 ns) | 75.574 μs (IQR 5.532 μs) | 4.203 ms (IQR 42.208 μs) | 80.000 ns (IQR 0.000 ns) | 380.000 ns (IQR 10.000 ns) | 123.084 μs (IQR 1.998 μs) | 7.835 ms (IQR 64.520 μs) | +| Dierckx (k=1) | 150.000 ns (IQR 0.000 ns) | 840.000 ns (IQR 10.000 ns) | 86.449 μs (IQR 320.000 ns) | 8.338 ms (IQR 205.934 μs) | 670.000 ns (IQR 0.000 ns) | 3.270 μs (IQR 40.000 ns) | 373.326 μs (IQR 591.750 ns) | 37.456 ms (IQR 86.332 μs) | 5.660 μs (IQR 20.000 ns) | 26.600 μs (IQR 30.000 ns) | 3.172 ms (IQR 37.733 μs) | 319.279 ms (IQR 135.094 μs) | 55.620 μs (IQR 93.250 ns) | 263.383 μs (IQR 1.763 μs) | 31.217 ms (IQR 137.637 μs) | 3.147 s (IQR 0.000 ns) | +| FastInterpolations | 50.000 ns (IQR 0.000 ns) | 90.000 ns (IQR 0.000 ns) | 4.220 μs (IQR 90.000 ns) | 370.687 μs (IQR 3.397 μs) | 50.000 ns (IQR 0.000 ns) | 90.000 ns (IQR 0.000 ns) | 4.145 μs (IQR 30.000 ns) | 372.367 μs (IQR 3.388 μs) | 50.000 ns (IQR 0.000 ns) | 90.000 ns (IQR 0.000 ns) | 4.200 μs (IQR 82.500 ns) | 373.437 μs (IQR 4.133 μs) | 50.000 ns (IQR 0.000 ns) | 90.000 ns (IQR 0.000 ns) | 4.370 μs (IQR 72.500 ns) | 374.116 μs (IQR 4.323 μs) | +| Interpolations (gridded) | 70.000 ns (IQR 0.000 ns) | 230.000 ns (IQR 0.000 ns) | 18.080 μs (IQR 50.000 ns) | 1.644 ms (IQR 26.273 μs) | 80.000 ns (IQR 10.000 ns) | 300.000 ns (IQR 0.000 ns) | 52.349 μs (IQR 3.364 μs) | 2.509 ms (IQR 35.296 μs) | 80.000 ns (IQR 10.000 ns) | 350.000 ns (IQR 10.000 ns) | 93.839 μs (IQR 3.115 μs) | 4.435 ms (IQR 47.497 μs) | 110.000 ns (IQR 10.000 ns) | 430.000 ns (IQR 10.000 ns) | 139.209 μs (IQR 4.602 μs) | 7.831 ms (IQR 71.805 μs) | +| Interpolations (uniform) | 70.000 ns (IQR 10.000 ns) | 120.000 ns (IQR 10.000 ns) | 6.610 μs (IQR 20.000 ns) | 655.379 μs (IQR 612.250 ns) | 70.000 ns (IQR 0.000 ns) | 120.000 ns (IQR 0.000 ns) | 6.610 μs (IQR 20.000 ns) | 655.654 μs (IQR 1.357 μs) | 70.000 ns (IQR 0.000 ns) | 120.000 ns (IQR 10.000 ns) | 6.665 μs (IQR 20.000 ns) | 655.439 μs (IQR 542.500 ns) | 70.000 ns (IQR 0.000 ns) | 120.000 ns (IQR 10.000 ns) | 7.050 μs (IQR 30.250 ns) | 655.829 μs (IQR 6.360 μs) | + +### MonotoneCubic + +| Library | n=100,m=1 | n=100,m=10 | n=100,m=1000 | n=100,m=100000 | n=1000,m=1 | n=1000,m=10 | n=1000,m=1000 | n=1000,m=100000 | n=10000,m=1 | n=10000,m=10 | n=10000,m=1000 | n=10000,m=100000 | n=100000,m=1 | n=100000,m=10 | n=100000,m=1000 | n=100000,m=100000 | +|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---| +| DataInterpolations (CubicHermite) | 130.000 ns (IQR 0.000 ns) | 230.000 ns (IQR 10.000 ns) | 10.500 μs (IQR 10.000 ns) | 983.576 μs (IQR 12.512 μs) | 130.000 ns (IQR 10.000 ns) | 430.000 ns (IQR 0.000 ns) | 10.660 μs (IQR 20.000 ns) | 992.811 μs (IQR 10.555 μs) | 140.000 ns (IQR 10.000 ns) | 510.000 ns (IQR 0.000 ns) | 18.420 μs (IQR 41.000 ns) | 1.075 ms (IQR 11.249 μs) | 150.000 ns (IQR 10.000 ns) | 600.000 ns (IQR 20.000 ns) | 147.614 μs (IQR 2.352 μs) | 1.812 ms (IQR 21.045 μs) | +| FastInterpolations (PCHIP) | 60.000 ns (IQR 0.000 ns) | 160.000 ns (IQR 10.000 ns) | 11.139 μs (IQR 10.000 ns) | 1.072 ms (IQR 3.803 μs) | 60.000 ns (IQR 0.000 ns) | 160.000 ns (IQR 10.000 ns) | 11.190 μs (IQR 30.000 ns) | 1.072 ms (IQR 11.660 μs) | 60.000 ns (IQR 0.000 ns) | 170.000 ns (IQR 10.000 ns) | 11.160 μs (IQR 72.250 ns) | 1.081 ms (IQR 23.755 μs) | 60.000 ns (IQR 0.000 ns) | 160.000 ns (IQR 10.000 ns) | 11.830 μs (IQR 112.750 ns) | 1.073 ms (IQR 7.117 μs) | +| PCHIPInterpolation | 70.000 ns (IQR 0.000 ns) | 240.000 ns (IQR 0.000 ns) | 20.294 μs (IQR 322.250 ns) | 1.864 ms (IQR 11.880 μs) | 90.000 ns (IQR 0.000 ns) | 320.000 ns (IQR 10.000 ns) | 40.360 μs (IQR 1.312 μs) | 2.973 ms (IQR 40.100 μs) | 100.000 ns (IQR 0.000 ns) | 450.000 ns (IQR 2.500 ns) | 79.409 μs (IQR 2.623 μs) | 5.436 ms (IQR 51.495 μs) | 120.000 ns (IQR 10.000 ns) | 510.000 ns (IQR 10.000 ns) | 134.408 μs (IQR 3.429 μs) | 9.598 ms (IQR 69.869 μs) | + +### QuadraticSpline + +| Library | n=100,m=1 | n=100,m=10 | n=100,m=1000 | n=100,m=100000 | n=1000,m=1 | n=1000,m=10 | n=1000,m=1000 | n=1000,m=100000 | n=10000,m=1 | n=10000,m=10 | n=10000,m=1000 | n=10000,m=100000 | n=100000,m=1 | n=100000,m=10 | n=100000,m=1000 | n=100000,m=100000 | +|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---| +| DataInterpolations | 210.000 ns (IQR 0.000 ns) | 1.010 μs (IQR 0.000 ns) | 110.119 μs (IQR 162.500 ns) | 9.480 ms (IQR 53.279 μs) | 310.000 ns (IQR 0.000 ns) | 2.420 μs (IQR 10.000 ns) | 222.133 μs (IQR 455.000 ns) | 18.933 ms (IQR 74.185 μs) | 1.300 μs (IQR 10.000 ns) | 12.569 μs (IQR 59.250 ns) | 1.227 ms (IQR 5.570 μs) | 117.928 ms (IQR 382.237 μs) | 14.610 μs (IQR 70.000 ns) | 144.098 μs (IQR 315.250 ns) | 14.771 ms (IQR 263.223 μs) | 1.477 s (IQR 0.000 ns) | +| Dierckx (k=2) | 170.000 ns (IQR 2.500 ns) | 1.040 μs (IQR 10.000 ns) | 103.589 μs (IQR 329.250 ns) | 9.887 ms (IQR 85.805 μs) | 690.000 ns (IQR 0.000 ns) | 3.440 μs (IQR 10.000 ns) | 391.187 μs (IQR 620.250 ns) | 39.289 ms (IQR 132.329 μs) | 5.669 μs (IQR 20.000 ns) | 26.799 μs (IQR 21.000 ns) | 3.180 ms (IQR 3.583 μs) | 320.143 ms (IQR 155.369 μs) | 55.730 μs (IQR 89.000 ns) | 260.017 μs (IQR 267.500 ns) | 31.216 ms (IQR 132.239 μs) | 3.154 s (IQR 0.000 ns) | +| FastInterpolations | 50.000 ns (IQR 10.000 ns) | 110.000 ns (IQR 10.000 ns) | 3.910 μs (IQR 20.000 ns) | 349.737 μs (IQR 3.685 μs) | 50.000 ns (IQR 10.000 ns) | 110.000 ns (IQR 10.000 ns) | 3.990 μs (IQR 60.000 ns) | 350.812 μs (IQR 3.107 μs) | 50.000 ns (IQR 10.000 ns) | 110.000 ns (IQR 10.000 ns) | 4.320 μs (IQR 62.500 ns) | 351.341 μs (IQR 3.715 μs) | 50.000 ns (IQR 10.000 ns) | 110.000 ns (IQR 10.000 ns) | 5.560 μs (IQR 80.000 ns) | 359.077 μs (IQR 9.547 μs) | + +## Random batch + +`A(out, tt)` where `tt` is unsorted. (knot pattern = uniform) + +### Akima + +| Library | n=100,m=1 | n=100,m=10 | n=100,m=1000 | n=100,m=100000 | n=1000,m=1 | n=1000,m=10 | n=1000,m=1000 | n=1000,m=100000 | n=10000,m=1 | n=10000,m=10 | n=10000,m=1000 | n=10000,m=100000 | n=100000,m=1 | n=100000,m=10 | n=100000,m=1000 | n=100000,m=100000 | +|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---| +| DataInterpolations | 120.000 ns (IQR 10.000 ns) | 440.000 ns (IQR 10.000 ns) | 85.734 μs (IQR 283.250 ns) | 8.733 ms (IQR 42.521 μs) | 120.000 ns (IQR 0.000 ns) | 760.000 ns (IQR 60.000 ns) | 113.999 μs (IQR 2.893 μs) | 11.877 ms (IQR 130.729 μs) | 130.000 ns (IQR 0.000 ns) | 620.000 ns (IQR 10.000 ns) | 144.079 μs (IQR 3.071 μs) | 15.912 ms (IQR 39.300 μs) | 130.000 ns (IQR 10.000 ns) | 750.000 ns (IQR 10.000 ns) | 209.804 μs (IQR 3.175 μs) | 22.256 ms (IQR 44.275 μs) | +| FastInterpolations | 60.000 ns (IQR 10.000 ns) | 170.000 ns (IQR 10.000 ns) | 11.150 μs (IQR 0.000 ns) | 1.071 ms (IQR 7.907 μs) | 60.000 ns (IQR 0.000 ns) | 190.000 ns (IQR 0.000 ns) | 11.180 μs (IQR 30.000 ns) | 1.071 ms (IQR 5.312 μs) | 60.000 ns (IQR 10.000 ns) | 180.000 ns (IQR 10.000 ns) | 11.180 μs (IQR 40.000 ns) | 1.070 ms (IQR 5.062 μs) | 60.000 ns (IQR 0.000 ns) | 180.000 ns (IQR 10.000 ns) | 11.215 μs (IQR 60.250 ns) | 1.115 ms (IQR 5.000 μs) | + +### CubicSpline + +| Library | n=100,m=1 | n=100,m=10 | n=100,m=1000 | n=100,m=100000 | n=1000,m=1 | n=1000,m=10 | n=1000,m=1000 | n=1000,m=100000 | n=10000,m=1 | n=10000,m=10 | n=10000,m=1000 | n=10000,m=100000 | n=100000,m=1 | n=100000,m=10 | n=100000,m=1000 | n=100000,m=100000 | +|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---| +| DataInterpolations | 130.000 ns (IQR 0.000 ns) | 510.000 ns (IQR 10.000 ns) | 88.379 μs (IQR 315.750 ns) | 8.819 ms (IQR 37.679 μs) | 130.000 ns (IQR 10.000 ns) | 790.000 ns (IQR 10.000 ns) | 116.909 μs (IQR 462.500 ns) | 11.897 ms (IQR 63.819 μs) | 140.000 ns (IQR 0.000 ns) | 740.000 ns (IQR 10.000 ns) | 145.738 μs (IQR 2.996 μs) | 16.274 ms (IQR 145.029 μs) | 150.000 ns (IQR 0.000 ns) | 740.000 ns (IQR 10.000 ns) | 221.258 μs (IQR 1.857 μs) | 24.186 ms (IQR 232.969 μs) | +| BasicInterpolators | 60.000 ns (IQR 0.000 ns) | 200.000 ns (IQR 10.000 ns) | 26.510 μs (IQR 1.024 μs) | 6.469 ms (IQR 35.997 μs) | 70.000 ns (IQR 0.000 ns) | 240.000 ns (IQR 10.000 ns) | 64.219 μs (IQR 3.527 μs) | 9.810 ms (IQR 34.904 μs) | 70.000 ns (IQR 10.000 ns) | 300.000 ns (IQR 0.000 ns) | 107.729 μs (IQR 3.515 μs) | 13.373 ms (IQR 124.972 μs) | 80.000 ns (IQR 0.000 ns) | 360.000 ns (IQR 0.000 ns) | 171.113 μs (IQR 4.553 μs) | 19.801 ms (IQR 558.067 μs) | +| Dierckx (k=3) | 150.000 ns (IQR 10.000 ns) | 1.250 μs (IQR 0.000 ns) | 134.424 μs (IQR 462.250 ns) | 13.515 ms (IQR 60.659 μs) | 290.000 ns (IQR 10.000 ns) | 3.960 μs (IQR 10.000 ns) | 412.197 μs (IQR 334.250 ns) | 41.079 ms (IQR 50.310 μs) | 1.480 μs (IQR 0.000 ns) | 30.050 μs (IQR 30.000 ns) | 3.232 ms (IQR 6.122 μs) | 321.523 ms (IQR 211.493 μs) | 13.540 μs (IQR 80.000 ns) | 291.423 μs (IQR 211.500 ns) | 31.510 ms (IQR 153.314 μs) | 3.190 s (IQR 0.000 ns) | +| FastInterpolations | 60.000 ns (IQR 2.500 ns) | 150.000 ns (IQR 0.000 ns) | 8.980 μs (IQR 10.000 ns) | 860.512 μs (IQR 4.238 μs) | 60.000 ns (IQR 0.000 ns) | 150.000 ns (IQR 0.000 ns) | 8.960 μs (IQR 60.000 ns) | 847.768 μs (IQR 4.537 μs) | 60.000 ns (IQR 0.000 ns) | 150.000 ns (IQR 0.000 ns) | 8.980 μs (IQR 40.000 ns) | 854.847 μs (IQR 8.707 μs) | 60.000 ns (IQR 0.000 ns) | 150.000 ns (IQR 0.000 ns) | 9.140 μs (IQR 40.000 ns) | 1.037 ms (IQR 5.805 μs) | +| Interpolations (uniform) | 90.000 ns (IQR 0.000 ns) | 270.000 ns (IQR 10.000 ns) | 20.430 μs (IQR 20.000 ns) | 2.046 ms (IQR 7.638 μs) | 90.000 ns (IQR 0.000 ns) | 270.000 ns (IQR 0.000 ns) | 20.470 μs (IQR 10.000 ns) | 2.046 ms (IQR 5.918 μs) | 90.000 ns (IQR 0.000 ns) | 270.000 ns (IQR 0.000 ns) | 20.420 μs (IQR 21.000 ns) | 2.044 ms (IQR 7.173 μs) | 90.000 ns (IQR 10.000 ns) | 270.000 ns (IQR 0.000 ns) | 20.450 μs (IQR 5.500 ns) | 2.048 ms (IQR 11.220 μs) | + +### Linear + +| Library | n=100,m=1 | n=100,m=10 | n=100,m=1000 | n=100,m=100000 | n=1000,m=1 | n=1000,m=10 | n=1000,m=1000 | n=1000,m=100000 | n=10000,m=1 | n=10000,m=10 | n=10000,m=1000 | n=10000,m=100000 | n=100000,m=1 | n=100000,m=10 | n=100000,m=1000 | n=100000,m=100000 | +|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---| +| DataInterpolations | 120.000 ns (IQR 0.000 ns) | 480.000 ns (IQR 0.000 ns) | 83.519 μs (IQR 372.750 ns) | 8.415 ms (IQR 52.812 μs) | 130.000 ns (IQR 0.000 ns) | 770.000 ns (IQR 12.500 ns) | 108.679 μs (IQR 3.103 μs) | 11.532 ms (IQR 79.008 μs) | 140.000 ns (IQR 0.000 ns) | 700.000 ns (IQR 10.000 ns) | 141.614 μs (IQR 3.540 μs) | 15.566 ms (IQR 113.496 μs) | 150.000 ns (IQR 0.000 ns) | 820.000 ns (IQR 10.000 ns) | 208.248 μs (IQR 2.930 μs) | 22.285 ms (IQR 93.724 μs) | +| BasicInterpolators | 60.000 ns (IQR 0.000 ns) | 200.000 ns (IQR 0.000 ns) | 30.665 μs (IQR 533.250 ns) | 6.365 ms (IQR 46.459 μs) | 60.000 ns (IQR 10.000 ns) | 240.000 ns (IQR 0.000 ns) | 65.439 μs (IQR 3.513 μs) | 9.742 ms (IQR 67.108 μs) | 70.000 ns (IQR 0.000 ns) | 290.000 ns (IQR 0.000 ns) | 104.350 μs (IQR 3.683 μs) | 13.157 ms (IQR 127.214 μs) | 80.000 ns (IQR 0.000 ns) | 360.000 ns (IQR 10.000 ns) | 167.203 μs (IQR 4.349 μs) | 19.014 ms (IQR 160.544 μs) | +| Dierckx (k=1) | 110.000 ns (IQR 0.000 ns) | 870.000 ns (IQR 10.000 ns) | 98.819 μs (IQR 450.000 ns) | 10.105 ms (IQR 280.429 μs) | 260.000 ns (IQR 60.000 ns) | 3.610 μs (IQR 0.000 ns) | 376.521 μs (IQR 5.056 μs) | 37.694 ms (IQR 182.261 μs) | 1.455 μs (IQR 10.000 ns) | 29.699 μs (IQR 33.250 ns) | 3.194 ms (IQR 5.514 μs) | 321.630 ms (IQR 565.205 μs) | 13.500 μs (IQR 45.000 ns) | 291.442 μs (IQR 304.250 ns) | 31.515 ms (IQR 158.856 μs) | 3.173 s (IQR 0.000 ns) | +| FastInterpolations | 50.000 ns (IQR 0.000 ns) | 90.000 ns (IQR 10.000 ns) | 4.240 μs (IQR 0.000 ns) | 392.077 μs (IQR 5.178 μs) | 50.000 ns (IQR 0.000 ns) | 90.000 ns (IQR 0.000 ns) | 4.240 μs (IQR 32.500 ns) | 373.457 μs (IQR 3.303 μs) | 50.000 ns (IQR 0.000 ns) | 90.000 ns (IQR 0.000 ns) | 4.270 μs (IQR 50.000 ns) | 377.542 μs (IQR 4.339 μs) | 50.000 ns (IQR 0.000 ns) | 90.000 ns (IQR 10.000 ns) | 4.350 μs (IQR 30.000 ns) | 464.091 μs (IQR 4.390 μs) | +| Interpolations (gridded) | 70.000 ns (IQR 10.000 ns) | 230.000 ns (IQR 10.000 ns) | 25.485 μs (IQR 400.000 ns) | 6.346 ms (IQR 57.190 μs) | 80.000 ns (IQR 10.000 ns) | 270.000 ns (IQR 10.000 ns) | 60.749 μs (IQR 4.149 μs) | 9.964 ms (IQR 113.239 μs) | 80.000 ns (IQR 0.000 ns) | 350.000 ns (IQR 0.000 ns) | 106.214 μs (IQR 5.040 μs) | 13.345 ms (IQR 47.422 μs) | 90.000 ns (IQR 0.000 ns) | 410.000 ns (IQR 0.000 ns) | 172.038 μs (IQR 6.404 μs) | 19.450 ms (IQR 135.066 μs) | +| Interpolations (uniform) | 70.000 ns (IQR 2.500 ns) | 120.000 ns (IQR 10.000 ns) | 6.600 μs (IQR 30.000 ns) | 655.404 μs (IQR 625.000 ns) | 70.000 ns (IQR 0.000 ns) | 120.000 ns (IQR 10.000 ns) | 6.600 μs (IQR 20.000 ns) | 661.254 μs (IQR 8.865 μs) | 70.000 ns (IQR 10.000 ns) | 120.000 ns (IQR 10.000 ns) | 6.890 μs (IQR 20.000 ns) | 683.869 μs (IQR 335.000 ns) | 70.000 ns (IQR 0.000 ns) | 120.000 ns (IQR 0.000 ns) | 7.170 μs (IQR 20.000 ns) | 752.364 μs (IQR 8.863 μs) | + +### MonotoneCubic + +| Library | n=100,m=1 | n=100,m=10 | n=100,m=1000 | n=100,m=100000 | n=1000,m=1 | n=1000,m=10 | n=1000,m=1000 | n=1000,m=100000 | n=10000,m=1 | n=10000,m=10 | n=10000,m=1000 | n=10000,m=100000 | n=100000,m=1 | n=100000,m=10 | n=100000,m=1000 | n=100000,m=100000 | +|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---| +| DataInterpolations (CubicHermite) | 130.000 ns (IQR 0.000 ns) | 550.000 ns (IQR 10.000 ns) | 88.869 μs (IQR 182.750 ns) | 8.923 ms (IQR 45.887 μs) | 140.000 ns (IQR 0.000 ns) | 750.000 ns (IQR 10.000 ns) | 114.689 μs (IQR 312.750 ns) | 12.259 ms (IQR 81.830 μs) | 150.000 ns (IQR 0.000 ns) | 760.000 ns (IQR 10.000 ns) | 149.914 μs (IQR 4.128 μs) | 16.358 ms (IQR 36.446 μs) | 160.000 ns (IQR 10.000 ns) | 850.000 ns (IQR 10.000 ns) | 225.383 μs (IQR 3.223 μs) | 24.150 ms (IQR 233.658 μs) | +| FastInterpolations (PCHIP) | 60.000 ns (IQR 0.000 ns) | 180.000 ns (IQR 10.000 ns) | 11.240 μs (IQR 10.000 ns) | 1.070 ms (IQR 6.601 μs) | 60.000 ns (IQR 0.000 ns) | 170.000 ns (IQR 0.000 ns) | 11.150 μs (IQR 140.000 ns) | 1.071 ms (IQR 10.822 μs) | 60.000 ns (IQR 0.000 ns) | 180.000 ns (IQR 10.000 ns) | 11.140 μs (IQR 24.750 ns) | 1.068 ms (IQR 5.527 μs) | 60.000 ns (IQR 0.000 ns) | 190.000 ns (IQR 10.000 ns) | 11.240 μs (IQR 40.000 ns) | 1.114 ms (IQR 9.055 μs) | +| PCHIPInterpolation | 70.000 ns (IQR 10.000 ns) | 250.000 ns (IQR 0.000 ns) | 25.435 μs (IQR 1.558 μs) | 6.446 ms (IQR 16.104 μs) | 90.000 ns (IQR 0.000 ns) | 345.000 ns (IQR 10.000 ns) | 63.260 μs (IQR 2.118 μs) | 9.963 ms (IQR 79.389 μs) | 90.000 ns (IQR 0.000 ns) | 440.000 ns (IQR 10.000 ns) | 118.869 μs (IQR 4.293 μs) | 14.222 ms (IQR 93.245 μs) | 110.000 ns (IQR 10.000 ns) | 490.000 ns (IQR 10.000 ns) | 185.893 μs (IQR 5.101 μs) | 20.408 ms (IQR 54.869 μs) | + +### QuadraticSpline + +| Library | n=100,m=1 | n=100,m=10 | n=100,m=1000 | n=100,m=100000 | n=1000,m=1 | n=1000,m=10 | n=1000,m=1000 | n=1000,m=100000 | n=10000,m=1 | n=10000,m=10 | n=10000,m=1000 | n=10000,m=100000 | n=100000,m=1 | n=100000,m=10 | n=100000,m=1000 | n=100000,m=100000 | +|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---| +| DataInterpolations | 210.000 ns (IQR 0.000 ns) | 1.270 μs (IQR 0.000 ns) | 188.969 μs (IQR 920.000 ns) | 19.321 ms (IQR 149.183 μs) | 310.000 ns (IQR 10.000 ns) | 2.720 μs (IQR 10.000 ns) | 347.986 μs (IQR 3.498 μs) | 36.760 ms (IQR 66.121 μs) | 1.300 μs (IQR 20.000 ns) | 12.700 μs (IQR 61.750 ns) | 1.444 ms (IQR 9.453 μs) | 145.560 ms (IQR 123.712 μs) | 15.650 μs (IQR 700.000 ns) | 146.198 μs (IQR 282.250 ns) | 16.271 ms (IQR 285.353 μs) | 1.616 s (IQR 0.000 ns) | +| Dierckx (k=2) | 130.000 ns (IQR 0.000 ns) | 1.060 μs (IQR 10.000 ns) | 117.319 μs (IQR 3.700 μs) | 11.766 ms (IQR 113.288 μs) | 270.000 ns (IQR 10.000 ns) | 3.780 μs (IQR 0.000 ns) | 394.716 μs (IQR 372.250 ns) | 39.406 ms (IQR 139.238 μs) | 1.470 μs (IQR 20.000 ns) | 29.990 μs (IQR 44.750 ns) | 3.214 ms (IQR 5.622 μs) | 320.480 ms (IQR 312.612 μs) | 13.520 μs (IQR 90.000 ns) | 291.358 μs (IQR 312.500 ns) | 32.152 ms (IQR 591.392 μs) | 3.155 s (IQR 0.000 ns) | +| FastInterpolations | 50.000 ns (IQR 10.000 ns) | 110.000 ns (IQR 0.000 ns) | 4.030 μs (IQR 30.000 ns) | 366.447 μs (IQR 4.920 μs) | 50.000 ns (IQR 10.000 ns) | 110.000 ns (IQR 0.000 ns) | 4.000 μs (IQR 50.000 ns) | 351.122 μs (IQR 4.475 μs) | 50.000 ns (IQR 10.000 ns) | 110.000 ns (IQR 2.500 ns) | 4.150 μs (IQR 50.000 ns) | 412.296 μs (IQR 4.755 μs) | 50.000 ns (IQR 10.000 ns) | 110.000 ns (IQR 0.000 ns) | 4.475 μs (IQR 80.000 ns) | 478.275 μs (IQR 4.135 μs) | + +## Chained ODE-style + +Sequential `for x in tt; A(x); end` over a monotone sequence. (knot pattern = uniform) + +### Akima + +| Library | n=100,m=1000 | n=1000,m=1000 | n=10000,m=1000 | n=100000,m=1000 | +|---|---|---|---|---| +| DataInterpolations | 55.874 μs (IQR 224.000 ns) | 80.234 μs (IQR 400.250 ns) | 116.019 μs (IQR 2.942 μs) | 163.899 μs (IQR 4.407 μs) | +| FastInterpolations | 10.970 μs (IQR 9.250 ns) | 11.040 μs (IQR 1.000 ns) | 11.030 μs (IQR 410.000 ns) | 11.420 μs (IQR 93.250 ns) | + +### CubicSpline + +| Library | n=100,m=1000 | n=1000,m=1000 | n=10000,m=1000 | n=100000,m=1000 | +|---|---|---|---|---| +| DataInterpolations | 59.995 μs (IQR 562.250 ns) | 81.979 μs (IQR 374.500 ns) | 113.249 μs (IQR 3.044 μs) | 178.064 μs (IQR 5.123 μs) | +| BasicInterpolators | 17.450 μs (IQR 357.500 ns) | 45.319 μs (IQR 1.970 μs) | 85.799 μs (IQR 2.695 μs) | 140.149 μs (IQR 2.475 μs) | +| Dierckx (k=3) | 122.163 μs (IQR 72.250 ns) | 410.447 μs (IQR 169.250 ns) | 3.214 ms (IQR 6.145 μs) | 31.322 ms (IQR 130.226 μs) | +| FastInterpolations | 10.150 μs (IQR 10.000 ns) | 10.140 μs (IQR 20.000 ns) | 10.160 μs (IQR 12.250 ns) | 10.950 μs (IQR 99.250 ns) | +| Interpolations (uniform) | 27.260 μs (IQR 490.000 ns) | 27.850 μs (IQR 0.250 ns) | 27.509 μs (IQR 500.000 ns) | 27.720 μs (IQR 420.000 ns) | + +### Linear + +| Library | n=100,m=1000 | n=1000,m=1000 | n=10000,m=1000 | n=100000,m=1000 | +|---|---|---|---|---| +| DataInterpolations | 55.260 μs (IQR 104.750 ns) | 77.310 μs (IQR 362.500 ns) | 109.109 μs (IQR 3.022 μs) | 162.824 μs (IQR 2.605 μs) | +| BasicInterpolators | 17.130 μs (IQR 430.000 ns) | 49.775 μs (IQR 2.400 μs) | 87.274 μs (IQR 4.043 μs) | 135.179 μs (IQR 2.735 μs) | +| Dierckx (k=1) | 86.279 μs (IQR 450.000 ns) | 374.666 μs (IQR 50.000 ns) | 3.185 ms (IQR 19.682 μs) | 31.293 ms (IQR 85.214 μs) | +| FastInterpolations | 3.510 μs (IQR 10.000 ns) | 3.500 μs (IQR 20.000 ns) | 3.550 μs (IQR 12.250 ns) | 3.650 μs (IQR 10.000 ns) | +| Interpolations (gridded) | 23.720 μs (IQR 162.500 ns) | 56.059 μs (IQR 2.413 μs) | 89.024 μs (IQR 3.799 μs) | 139.869 μs (IQR 3.240 μs) | +| Interpolations (uniform) | 23.050 μs (IQR 11.000 ns) | 23.050 μs (IQR 30.000 ns) | 23.059 μs (IQR 10.000 ns) | 23.120 μs (IQR 39.250 ns) | + +### MonotoneCubic + +| Library | n=100,m=1000 | n=1000,m=1000 | n=10000,m=1000 | n=100000,m=1000 | +|---|---|---|---|---| +| DataInterpolations (CubicHermite) | 67.909 μs (IQR 212.750 ns) | 89.499 μs (IQR 411.750 ns) | 121.504 μs (IQR 4.065 μs) | 180.869 μs (IQR 3.533 μs) | +| FastInterpolations (PCHIP) | 11.170 μs (IQR 10.000 ns) | 11.070 μs (IQR 100.000 ns) | 11.020 μs (IQR 45.000 ns) | 11.380 μs (IQR 80.000 ns) | +| PCHIPInterpolation | 25.144 μs (IQR 229.250 ns) | 53.205 μs (IQR 303.250 ns) | 94.259 μs (IQR 1.500 μs) | 150.668 μs (IQR 4.713 μs) | + +### QuadraticSpline + +| Library | n=100,m=1000 | n=1000,m=1000 | n=10000,m=1000 | n=100000,m=1000 | +|---|---|---|---|---| +| DataInterpolations | 137.358 μs (IQR 460.500 ns) | 268.752 μs (IQR 559.250 ns) | 1.322 ms (IQR 6.378 μs) | 15.843 ms (IQR 1.173 ms) | +| Dierckx (k=2) | 104.454 μs (IQR 240.000 ns) | 393.216 μs (IQR 341.000 ns) | 3.187 ms (IQR 5.403 μs) | 31.297 ms (IQR 52.556 μs) | +| FastInterpolations | 3.310 μs (IQR 2.500 ns) | 3.310 μs (IQR 10.000 ns) | 3.450 μs (IQR 20.000 ns) | 4.650 μs (IQR 40.000 ns) | + +## FastInterpolations.jl advertised benchmark + +Numbers below come from `bench/fast_interpolations_bench.jl`, a port of +FastInterpolations.jl's own `benchmark/interpolation_benchmark.jl` +(ProjectTorreyPines/FastInterpolations.jl, upstream commit `616b106b`). It runs +the matrix-of-interpolants workload they advertise on their README: +`mpert × mpert` independent 1D interpolants over a shared uniform +`range(0.0, 1.0; length = npsi)` grid, evaluated at `n_eval` query points +clustered near psi=0 (cubic spacing — mimics ODE solver behavior near +singular surfaces). Default size: `npsi = 64`, `mpert = 100`, `n_eval = 1000` +(10⁴ interpolants × 10³ queries = 10⁷ total scalar evaluations). + +### Cubic spline, `--default` (npsi=64, mpert=100, n_eval=1000) + +| Package | Init (ms) | Eval (ms) | Total (ms) | Speedup vs DI scalar | +|---|---|---|---|---| +| FastInterpolations.jl (Series+scalar) | 16.508 | 5.751 | 22.259 | 73.28× | +| FastInterpolations.jl (Series+vector) | 16.508 | 22.521 | 39.029 | 41.80× | +| FastInterpolations.jl (vector) | 31.631 | 95.125 | 126.757 | 12.87× | +| DataInterpolations.jl (vector) | 93.259 | 103.438 | 196.697 | 8.29× | +| FastInterpolations.jl (scalar) | 31.631 | 171.245 | 202.876 | 8.04× | +| Interpolations.jl (broadcast) | 139.363 | 212.500 | 351.863 | 4.64× | +| Interpolations.jl (scalar) | 139.363 | 427.833 | 567.196 | 2.88× | +| Dierckx.jl (vector) | 127.741 | 566.160 | 693.901 | 2.35× | +| DataInterpolations.jl (scalar) | 93.259 | 1537.974 | 1631.233 | 1.00× | +| Dierckx.jl (scalar) | 127.741 | 1590.398 | 1718.139 | 0.95× | + +### Linear, `--default` (npsi=64, mpert=100, n_eval=1000) + +| Package | Init (ms) | Eval (ms) | Total (ms) | Speedup vs DI scalar | +|---|---|---|---|---| +| FastInterpolations.jl (Series+scalar) | 5.649 | 3.173 | 8.822 | 102.83× | +| FastInterpolations.jl (Series+vector) | 5.649 | 21.019 | 26.668 | 34.02× | +| FastInterpolations.jl (vector) | 10.481 | 49.861 | 60.343 | 15.03× | +| DataInterpolations.jl (vector) | 15.629 | 64.376 | 80.005 | 11.34× | +| Interpolations.jl (broadcast) | 9.634 | 70.787 | 80.421 | 11.28× | +| FastInterpolations.jl (scalar) | 10.481 | 90.099 | 100.580 | 9.02× | +| Interpolations.jl (scalar) | 9.634 | 211.753 | 221.387 | 4.10× | +| Dierckx.jl (vector) | 77.443 | 244.645 | 322.088 | 2.82× | +| DataInterpolations.jl (scalar) | 15.629 | 891.534 | 907.163 | 1.00× | +| Dierckx.jl (scalar) | 77.443 |1236.548 | 1313.991 | 0.69× | + +## Findings + +### Where FastInterpolations.jl beats DI + + 1. **Series interpolant + matrix-of-interpolants workload.** FastInterpolations' + `Series` API computes the cell anchor (index, alpha, neighbouring grid + points) once per query point and reuses it across all 10⁴ coefficient + series. DI has no equivalent — each interpolant runs an independent + search per query. The 70-100× speedup at `--default` size is real and + unfixable without an analogous "shared anchor / Series" type in DI. This + is a separate design proposal; out of scope for this PR. + + 2. **Per-query scalar evaluation on uniform-`Vector` grids.** In the + cross-library bench (we `_vec(t)` before passing to every library to + compare like-for-like), DI's per-query latency is ~100-200 ns, + FastInterpolations' is ~50 ns. The gap is in the scalar kernel call + overhead and the dispatch through `Auto(t_props)`. + FastInterpolations' `_search_direct(::_CachedRange, q)` is a single + `unsafe_trunc(Int, muladd(q - lo, inv_h, 1))` — fewer instructions than + FFF's `Auto` → `UniformStep` path which still goes through method + dispatch. Closing this gap on DI would require either: + - resolving `Auto` to a concrete strategy at *construction* time (so + `_interpolate` doesn't dispatch through `Auto` at all), or + - making FFF `Auto` simpler to specialize at the call site. + + 3. **Batched / chained evaluation on uniform `Vector` grids.** DI's batched + loop is ~10-30 μs for m=1000 queries; FastInterpolations gets to + ~3-4 μs because their vectorized batch loop avoids per-call function- + pointer dispatch entirely. The remaining gap is partly the kernel + overhead from (2) and partly that FastInterpolations always knows the + search policy at compile time (it's a type parameter on its `Searcher`). + +### Where DI matches FastInterpolations.jl + + - **Non-uniform construction** (Akima, CubicSpline, QuadraticSpline) at + large n. DI and FastInterpolations are within ~30% on construction time + once n ≥ 10k. The new `Auto(A.t_props)` + `O(n)` `spline_coefficients!` + fix on this branch closes the gap from where it was before. + - **Sorted-batch evaluation on non-uniform grids** at large m. FFF's + `BracketGallop` / `LinearScan` strategies + the batched-Auto + specialization keep DI competitive once the batch is large enough to + amortize the type-instability overhead from `_resolve_search_policy`. + +### Where DI loses but the fix is out-of-scope for this PR + + - Per-query latency on `Vector{Float64}` grids: DI's per-call path goes + `interp(t) → _interpolate(A, t) → _interpolate(A, t, A.iguesser) → + get_idx → Auto(A.t_props) → searchsortedlast(Auto, v, q, hint)`. Each + indirection is ~5-10 ns; FastInterpolations' direct dispatch path is + one or two indirections. Reducing this means either inlining `get_idx` + into `_interpolate` per type, or storing a *resolved* concrete search + strategy at construction time rather than a generic `Auto(props)` — + both substantial restructurings. + - No `Series`-style anchor reuse: a different type system. Worth a + separate proposal; the design space is large. + +## Reproducer + +Bench script: `bench/cross_library_comparison.jl` + +FastInterpolations-style bench: `bench/fast_interpolations_bench.jl` +(port of ProjectTorreyPines/FastInterpolations.jl's +`benchmark/interpolation_benchmark.jl`, commit `616b106b`). + +Bench Project.toml: `bench/Project.toml` (devs DI from `..`). + +To rerun: +```bash +cd /home/crackauc/sandbox/tmp_20260515_091703_4914/DataInterpolations.jl +git checkout fff-v2-cleanup-quadraticspline +julia +1.11 --project=bench bench/cross_library_comparison.jl +julia +1.11 --project=bench bench/fast_interpolations_bench.jl --cubic --default +julia +1.11 --project=bench bench/fast_interpolations_bench.jl --linear --default +``` + diff --git a/bench/di_perq_bench.jl b/bench/di_perq_bench.jl new file mode 100644 index 00000000..fe44b8b6 --- /dev/null +++ b/bench/di_perq_bench.jl @@ -0,0 +1,125 @@ +#= +DI per-query micro-bench over the knot/query regimes where the search +strategy dominates: + + Workload | DI before | DI after | FastInterp + --------------------------------------- | --------- | -------- | ---------- + Range knots, sorted queries | 77 | ? | 3.5 + Uniform Vector knots, sorted queries | 38 | ? | 27 + Non-uniform Vector knots, sorted | 78 | ? | n/a + Range knots, shuffled random queries | 87 | ? | 3.5 + Range knots, monotone ODE-chain | ? | ? | ? + +n = 10_000, m = 1000, Float64. + +Usage: + julia +1.11 --project=bench bench/di_perq_bench.jl +=# + +import Pkg +const BENCH_DIR = @__DIR__ +Pkg.activate(BENCH_DIR) + +using BenchmarkTools +using Random +using Statistics + +using DataInterpolations +using FastInterpolations + +const RNG = MersenneTwister(0x00C0FFEE) + +const N = 10_000 +const M = 1_000 + +# Knots +const range_knots = range(0.0, 1.0; length = N) +const uniform_vec_knots = collect(range_knots) +const nonuniform_vec_knots = sort!(rand(MersenneTwister(0x0BADBEEF), N)) +const u_range = sin.(2π .* range_knots) .+ 0.3 .* cos.(7 .* range_knots) +const u_uniform = collect(u_range) +const u_nonuniform = sin.(2π .* nonuniform_vec_knots) .+ 0.3 .* cos.(7 .* nonuniform_vec_knots) + +# Queries — keep in [knot_min, knot_max] so all libraries' interpolations +# stay in-domain. Clamp into the non-uniform vector range below. +function clamp_to(x, t) + a, b = first(t), last(t) + return @. clamp(x, a + (b - a) * 1.0e-6, b - (b - a) * 1.0e-6) +end + +const queries_sorted_raw = sort!(rand(MersenneTwister(0x00C0FFEE + M), M)) +const queries_random_raw = rand(MersenneTwister(0x00C0FFEE + M + 1), M) +const queries_chained_raw = let steps = rand(MersenneTwister(0x00C0FFEE + M + 2), M) + tt = cumsum(steps) + tt = (tt .- first(tt)) ./ (last(tt) - first(tt)) .* 0.999 .+ 0.0005 + tt +end + +# Library builders +build_di_range(u, t) = DataInterpolations.LinearInterpolation(u, t) +build_di_uniform_vec(u, t) = DataInterpolations.LinearInterpolation(u, t) +build_di_nonuniform(u, t) = DataInterpolations.LinearInterpolation(u, t) +build_fi(u, t) = FastInterpolations.linear_interp(t, u) + +# Per-query bench: tight loop with no batched call. Measures latency of A(x). +function per_query_loop(A, queries) + s = 0.0 + @inbounds for k in eachindex(queries) + s += A(queries[k]) + end + return s +end + +function bench_pq(A, queries; samples = 500, seconds = 5.0) + # Warm-up call before benchmark — avoids first-call effects from carrying + # over Guesser state across benchmarks for different `A`s. + per_query_loop(A, queries) + b = @benchmarkable per_query_loop($A, $queries) evals = 1 samples = samples seconds = seconds + return run(b) +end + +fmt_pq(t) = string(round(median(t).time / M; digits = 2), " ns/q") + +println("== n=$N, m=$M, single-query latency (ns per query) ==\n") + +println("Range knots:") +A_di_r = build_di_range(u_range, range_knots) +A_fi_r = build_fi(u_range, range_knots) +queries_sorted_r = clamp_to(queries_sorted_raw, range_knots) +queries_random_r = clamp_to(queries_random_raw, range_knots) +queries_chained_r = clamp_to(queries_chained_raw, range_knots) +println(" DataInterp (Auto) | sorted : ", fmt_pq(bench_pq(A_di_r, queries_sorted_r))) +println(" DataInterp (Auto) | random : ", fmt_pq(bench_pq(A_di_r, queries_random_r))) +println(" DataInterp (Auto) | chained : ", fmt_pq(bench_pq(A_di_r, queries_chained_r))) +println(" FastInterp | sorted : ", fmt_pq(bench_pq(A_fi_r, queries_sorted_r))) +println(" FastInterp | random : ", fmt_pq(bench_pq(A_fi_r, queries_random_r))) +println(" FastInterp | chained : ", fmt_pq(bench_pq(A_fi_r, queries_chained_r))) + +println("\nUniform Vector knots (collect(range)):") +A_di_uv = build_di_uniform_vec(u_uniform, uniform_vec_knots) +A_fi_uv = build_fi(u_uniform, uniform_vec_knots) +queries_sorted_u = clamp_to(queries_sorted_raw, uniform_vec_knots) +queries_random_u = clamp_to(queries_random_raw, uniform_vec_knots) +queries_chained_u = clamp_to(queries_chained_raw, uniform_vec_knots) +println(" DataInterp (Auto) | sorted : ", fmt_pq(bench_pq(A_di_uv, queries_sorted_u))) +println(" DataInterp (Auto) | random : ", fmt_pq(bench_pq(A_di_uv, queries_random_u))) +println(" DataInterp (Auto) | chained : ", fmt_pq(bench_pq(A_di_uv, queries_chained_u))) +println(" FastInterp | sorted : ", fmt_pq(bench_pq(A_fi_uv, queries_sorted_u))) +println(" FastInterp | random : ", fmt_pq(bench_pq(A_fi_uv, queries_random_u))) + +println("\nNon-uniform Vector knots:") +A_di_nv = build_di_nonuniform(u_nonuniform, nonuniform_vec_knots) +A_fi_nv = build_fi(u_nonuniform, nonuniform_vec_knots) +queries_sorted_n = clamp_to(queries_sorted_raw, nonuniform_vec_knots) +queries_random_n = clamp_to(queries_random_raw, nonuniform_vec_knots) +queries_chained_n = clamp_to(queries_chained_raw, nonuniform_vec_knots) +println(" DataInterp (Auto) | sorted : ", fmt_pq(bench_pq(A_di_nv, queries_sorted_n))) +println(" DataInterp (Auto) | random : ", fmt_pq(bench_pq(A_di_nv, queries_random_n))) +println(" DataInterp (Auto) | chained : ", fmt_pq(bench_pq(A_di_nv, queries_chained_n))) +println(" FastInterp | sorted : ", fmt_pq(bench_pq(A_fi_nv, queries_sorted_n))) +println(" FastInterp | random : ", fmt_pq(bench_pq(A_fi_nv, queries_random_n))) + +println("\nReporting Auto kind selection:") +println(" Range knots : Auto kind = ", A_di_r.strategy.kind) +println(" Uniform Vector knots : Auto kind = ", A_di_uv.strategy.kind) +println(" Non-uniform Vector knots : Auto kind = ", A_di_nv.strategy.kind) diff --git a/bench/fast_interpolations_bench.jl b/bench/fast_interpolations_bench.jl new file mode 100644 index 00000000..d05307fe --- /dev/null +++ b/bench/fast_interpolations_bench.jl @@ -0,0 +1,383 @@ +#= +FastInterpolations.jl benchmark, ported from their `benchmark/interpolation_benchmark.jl` +(`ProjectTorreyPines/FastInterpolations.jl`, commit 616b106b at the time of import). + +This is the comparison they advertise on their README: + - Compares Interpolations.jl, DataInterpolations.jl, FastInterpolations.jl (+ their + Series interpolant), and Dierckx.jl. + - Workload: `mpert × mpert` independent 1D interpolants over the same uniform + `range(0.0, 1.0; length = npsi)` grid, evaluated at `n_eval` query points clustered + near psi = 0 (cubic spacing). Mimics fusion-physics matrix-of-interpolants workloads. + - Default config (matching their `--default`): `npsi = 64`, `mpert = 100` → 10_000 + interpolants per package, `n_eval = 1000` query points → 10⁷ total scalar evaluations. + +Usage: + julia +1.11 --project=bench bench/fast_interpolations_bench.jl + julia +1.11 --project=bench bench/fast_interpolations_bench.jl --linear --small + julia +1.11 --project=bench bench/fast_interpolations_bench.jl --cubic --default + +This emits stdout-only output (no markdown report). The numbers feed the comparison +table in `bench/cross_library_comparison.md`. +=# + +import Pkg +const BENCH_DIR = @__DIR__ +Pkg.activate(BENCH_DIR) + +using BenchmarkTools +using Interpolations +using DataInterpolations +using FastInterpolations +using Dierckx +using Random +using Printf +using Statistics + +const SIZE_PRESETS = Dict( + :tiny => (16, 2, 5), + :small => (64, 5, 100), + :default => (64, 100, 1000), + :large => (64, 200, 4000), +) + +const METHOD_OPTIONS = [:constant, :linear, :quadratic, :cubic] + +function parse_args(args) + size_key = :default + method_key = :cubic + for arg in args + if startswith(arg, "--") + key = Symbol(arg[3:end]) + if haskey(SIZE_PRESETS, key) + size_key = key + elseif key in METHOD_OPTIONS + method_key = key + end + end + end + return size_key, method_key +end + +const (SIZE_KEY, METHOD_KEY) = parse_args(ARGS) +const (NPSI, MPERT, N_EVAL_POINTS) = SIZE_PRESETS[SIZE_KEY] + +function generate_test_data(npsi::Int, mpert::Int; seed::Int = 42) + Random.seed!(seed) + psi_grid = range(0.0, 1.0, length = npsi) + data = rand(npsi, mpert, mpert) + return psi_grid, data +end + +function generate_evaluation_points(n_points::Int) + return collect(range(0.0, 1.0, length = n_points)) .^ 3 +end + +# ---- Interpolations.jl ----------------------------------------------------- +function init_interpolations(::Val{:linear}, psi_grid::AbstractRange, data::Array{Float64, 3}) + _, mpert, _ = size(data) + first_itp = Interpolations.linear_interpolation(psi_grid, data[:, 1, 1]) + interps = Matrix{typeof(first_itp)}(undef, mpert, mpert) + interps[1, 1] = first_itp + for m1 in 1:mpert, m2 in 1:mpert + (m1 == 1 && m2 == 1) && continue + interps[m1, m2] = Interpolations.linear_interpolation(psi_grid, data[:, m1, m2]) + end + return interps +end + +function init_interpolations(::Val{:cubic}, psi_grid::AbstractRange, data::Array{Float64, 3}) + _, mpert, _ = size(data) + first_itp = Interpolations.cubic_spline_interpolation(psi_grid, data[:, 1, 1]) + interps = Matrix{typeof(first_itp)}(undef, mpert, mpert) + interps[1, 1] = first_itp + for m1 in 1:mpert, m2 in 1:mpert + (m1 == 1 && m2 == 1) && continue + interps[m1, m2] = Interpolations.cubic_spline_interpolation(psi_grid, data[:, m1, m2]) + end + return interps +end + +function init_interpolations(::Val{:constant}, psi_grid::AbstractRange, data::Array{Float64, 3}) + _, mpert, _ = size(data) + first_itp = Interpolations.constant_interpolation(psi_grid, data[:, 1, 1]) + interps = Matrix{typeof(first_itp)}(undef, mpert, mpert) + interps[1, 1] = first_itp + for m1 in 1:mpert, m2 in 1:mpert + (m1 == 1 && m2 == 1) && continue + interps[m1, m2] = Interpolations.constant_interpolation(psi_grid, data[:, m1, m2]) + end + return interps +end + +function init_interpolations(::Val{:quadratic}, psi_grid::AbstractRange, data::Array{Float64, 3}) + _, mpert, _ = size(data) + knots = (psi_grid,) + first_itp = Interpolations.extrapolate( + Interpolations.scale( + Interpolations.interpolate( + data[:, 1, 1], + Interpolations.BSpline(Interpolations.Quadratic(Interpolations.Reflect(Interpolations.OnCell()))), + ), + knots, + ), + Interpolations.Throw(), + ) + interps = Matrix{typeof(first_itp)}(undef, mpert, mpert) + interps[1, 1] = first_itp + for m1 in 1:mpert, m2 in 1:mpert + (m1 == 1 && m2 == 1) && continue + interps[m1, m2] = Interpolations.extrapolate( + Interpolations.scale( + Interpolations.interpolate( + data[:, m1, m2], + Interpolations.BSpline(Interpolations.Quadratic(Interpolations.Reflect(Interpolations.OnCell()))), + ), + knots, + ), + Interpolations.Throw(), + ) + end + return interps +end + +# Scalar-loop and broadcast evaluators +function eval_interpolations!(A, interps, psi) + @inbounds for m2 in axes(interps, 2), m1 in axes(interps, 1) + A[m1, m2] = interps[m1, m2](psi) + end + return A +end + +run_interpolations_loop!(A, interps, psis) = ( + for psi in psis + eval_interpolations!(A, interps, psi) + end; A +) +function run_interpolations_broadcast!(A_all, interps, psis) + @inbounds for m2 in axes(interps, 2), m1 in axes(interps, 1) + @. A_all[:, m1, m2] = interps[m1, m2](psis) + end + return A_all +end + +# ---- FastInterpolations.jl ----------------------------------------------- +const FI_INIT_TABLE = Dict( + :linear => FastInterpolations.linear_interp, + :cubic => FastInterpolations.cubic_interp, + :quadratic => FastInterpolations.quadratic_interp, + :constant => FastInterpolations.constant_interp, +) + +function init_fi(method::Symbol, psi_grid, data::Array{Float64, 3}) + f = FI_INIT_TABLE[method] + _, mpert, _ = size(data) + first_itp = f(psi_grid, data[:, 1, 1]) + interps = Matrix{typeof(first_itp)}(undef, mpert, mpert) + interps[1, 1] = first_itp + for m1 in 1:mpert, m2 in 1:mpert + (m1 == 1 && m2 == 1) && continue + interps[m1, m2] = f(psi_grid, data[:, m1, m2]) + end + return interps +end + +function eval_fi!(A, interps, psi) + @inbounds for m2 in axes(interps, 2), m1 in axes(interps, 1) + A[m1, m2] = interps[m1, m2](psi) + end + return A +end + +run_fi_loop!(A, interps, psis) = ( + for psi in psis + eval_fi!(A, interps, psi) + end; A +) +function run_fi_vector!(A_all, interps, psis) + @inbounds for m2 in axes(interps, 2), m1 in axes(interps, 1) + @views interps[m1, m2](A_all[:, m1, m2], psis) + end + return A_all +end + +# Series API (one anchor per query, shared across series) +function init_fi_series(method::Symbol, psi_grid, data::Array{Float64, 3}) + f = FI_INIT_TABLE[method] + _, mpert, _ = size(data) + ys = Series([data[:, m1, m2] for m2 in 1:mpert for m1 in 1:mpert]) + return f(psi_grid, ys) +end + +run_fi_series_loop!(A, sitp, psis) = ( + for psi in psis + sitp(A, psi) + end; A +) +run_fi_series_vector!(A_all, sitp, psis) = (sitp(A_all, psis); A_all) + +# ---- DataInterpolations.jl ----------------------------------------------- +const DI_INIT_TABLE = Dict( + :linear => DataInterpolations.LinearInterpolation, + :cubic => DataInterpolations.CubicSpline, + :quadratic => DataInterpolations.QuadraticInterpolation, + :constant => DataInterpolations.ConstantInterpolation, +) + +function init_di(method::Symbol, psi_grid, data::Array{Float64, 3}) + f = DI_INIT_TABLE[method] + _, mpert, _ = size(data) + t = collect(psi_grid) + first_itp = f(data[:, 1, 1], t) + interps = Matrix{typeof(first_itp)}(undef, mpert, mpert) + interps[1, 1] = first_itp + for m1 in 1:mpert, m2 in 1:mpert + (m1 == 1 && m2 == 1) && continue + interps[m1, m2] = f(data[:, m1, m2], t) + end + return interps +end + +eval_di!(A, interps, psi) = ( + @inbounds for m2 in axes(interps, 2), m1 in axes(interps, 1) + A[m1, m2] = interps[m1, m2](psi) + end; A +) +run_di_loop!(A, interps, psis) = ( + for psi in psis + eval_di!(A, interps, psi) + end; A +) +function run_di_vector!(A_all, interps, psis) + @inbounds for m2 in axes(interps, 2), m1 in axes(interps, 1) + @views interps[m1, m2](A_all[:, m1, m2], psis) + end + return A_all +end + +# ---- Dierckx.jl --------------------------------------------------------- +function init_dierckx(method::Symbol, psi_grid, data::Array{Float64, 3}) + method == :constant && return nothing # Dierckx has no k=0 + k = Dict(:linear => 1, :quadratic => 2, :cubic => 3)[method] + _, mpert, _ = size(data) + t = collect(psi_grid) + first_itp = Dierckx.Spline1D(t, data[:, 1, 1]; k = k, s = 0.0) + interps = Matrix{typeof(first_itp)}(undef, mpert, mpert) + interps[1, 1] = first_itp + for m1 in 1:mpert, m2 in 1:mpert + (m1 == 1 && m2 == 1) && continue + interps[m1, m2] = Dierckx.Spline1D(t, data[:, m1, m2]; k = k, s = 0.0) + end + return interps +end + +eval_dierckx!(A, interps, psi) = ( + @inbounds for m2 in axes(interps, 2), m1 in axes(interps, 1) + A[m1, m2] = interps[m1, m2](psi) + end; A +) +run_dierckx_loop!(A, interps, psis) = ( + for psi in psis + eval_dierckx!(A, interps, psi) + end; A +) +function run_dierckx_vector!(A_all, interps, psis) + @inbounds for m2 in axes(interps, 2), m1 in axes(interps, 1) + @views A_all[:, m1, m2] .= interps[m1, m2](psis) + end + return A_all +end + +# ---- Driver ---------------------------------------------------------------- +function bench_one(label, f; samples = 5, evals = 2, seconds = 120) + f() # warm-up + GC.gc() + b = @benchmark $f() samples = samples evals = evals seconds = seconds + return median(b).time / 1.0e6 # ms +end + +function run_fi_bench() + method = METHOD_KEY + psi_grid, data = generate_test_data(NPSI, MPERT) + psis = generate_evaluation_points(N_EVAL_POINTS) + n_interps = MPERT * MPERT + total_evals = N_EVAL_POINTS * n_interps + + println("="^80) + println("FastInterpolations.jl-style benchmark · method=$(method) · npsi=$(NPSI), mpert=$(MPERT), n_eval=$(N_EVAL_POINTS)") + println("Reproducing `ProjectTorreyPines/FastInterpolations.jl/benchmark/interpolation_benchmark.jl`") + println("="^80) + println() + + A = Matrix{Float64}(undef, MPERT, MPERT) + A_all = Array{Float64, 3}(undef, N_EVAL_POINTS, MPERT, MPERT) + A_series = Vector{Float64}(undef, n_interps) + A_series_all = [Vector{Float64}(undef, N_EVAL_POINTS) for _ in 1:n_interps] + + results = Dict{String, Tuple{Float64, Float64}}() # name => (init_ms, eval_ms) + + function report(name, init_ms, eval_ms) + results[name] = (init_ms, eval_ms) + total = init_ms + eval_ms + evs = total_evals / (eval_ms / 1.0e3) + return @printf(" %-44s init %8.3f ms eval %8.3f ms total %8.3f ms evals/s %.2e\n", name, init_ms, eval_ms, total, evs) + end + + # Interpolations.jl + init_ms = bench_one("ITP init", () -> init_interpolations(Val(method), psi_grid, data)) + itp_interps = init_interpolations(Val(method), psi_grid, data) + eval_ms = bench_one("ITP scalar", () -> run_interpolations_loop!(A, itp_interps, psis)) + report("Interpolations.jl (scalar)", init_ms, eval_ms) + eval_ms = bench_one("ITP broadcast", () -> run_interpolations_broadcast!(A_all, itp_interps, psis)) + report("Interpolations.jl (broadcast)", init_ms, eval_ms) + + # FastInterpolations.jl + init_ms = bench_one("FI init", () -> init_fi(method, psi_grid, data)) + fi_interps = init_fi(method, psi_grid, data) + eval_ms = bench_one("FI scalar", () -> run_fi_loop!(A, fi_interps, psis)) + report("FastInterpolations.jl (scalar)", init_ms, eval_ms) + eval_ms = bench_one("FI vector", () -> run_fi_vector!(A_all, fi_interps, psis)) + report("FastInterpolations.jl (vector)", init_ms, eval_ms) + + # FastInterpolations Series + init_ms = bench_one("FI Series init", () -> init_fi_series(method, psi_grid, data)) + sitp = init_fi_series(method, psi_grid, data) + eval_ms = bench_one("FI Series scalar", () -> run_fi_series_loop!(A_series, sitp, psis)) + report("FastInterpolations.jl (Series+scalar)", init_ms, eval_ms) + eval_ms = bench_one("FI Series vector", () -> run_fi_series_vector!(A_series_all, sitp, psis)) + report("FastInterpolations.jl (Series+vector)", init_ms, eval_ms) + + # DataInterpolations.jl + init_ms = bench_one("DI init", () -> init_di(method, psi_grid, data)) + di_interps = init_di(method, psi_grid, data) + eval_ms = bench_one("DI scalar", () -> run_di_loop!(A, di_interps, psis)) + report("DataInterpolations.jl (scalar)", init_ms, eval_ms) + eval_ms = bench_one("DI vector", () -> run_di_vector!(A_all, di_interps, psis)) + report("DataInterpolations.jl (vector)", init_ms, eval_ms) + + # Dierckx (if applicable) + if method != :constant + init_ms = bench_one("Dierckx init", () -> init_dierckx(method, psi_grid, data)) + drx_interps = init_dierckx(method, psi_grid, data) + eval_ms = bench_one("Dierckx scalar", () -> run_dierckx_loop!(A, drx_interps, psis)) + report("Dierckx.jl (scalar)", init_ms, eval_ms) + eval_ms = bench_one("Dierckx vector", () -> run_dierckx_vector!(A_all, drx_interps, psis)) + report("Dierckx.jl (vector)", init_ms, eval_ms) + end + + # Summary + println() + println("="^80) + println("Summary: speedup vs DataInterpolations.jl (scalar)") + println("="^80) + baseline_total = sum(results["DataInterpolations.jl (scalar)"]) + @printf(" %-44s %10s %10s %10s %s\n", "Package", "Init (ms)", "Eval (ms)", "Total (ms)", "Speedup") + for (name, (init_ms, eval_ms)) in sort(collect(results); by = x -> sum(x[2])) + total = init_ms + eval_ms + @printf(" %-44s %10.3f %10.3f %10.3f %.2fx\n", name, init_ms, eval_ms, total, baseline_total / total) + end + println() + + return results +end + +run_fi_bench() diff --git a/docs/src/manual.md b/docs/src/manual.md index f11c11d4..c242b8d8 100644 --- a/docs/src/manual.md +++ b/docs/src/manual.md @@ -19,7 +19,6 @@ QuinticHermiteSpline # Utility Functions ```@docs -DataInterpolations.looks_linear DataInterpolations.output_dim DataInterpolations.output_size ``` diff --git a/ext/DataInterpolationsMooncakeExt.jl b/ext/DataInterpolationsMooncakeExt.jl index 6d716278..db5fb466 100644 --- a/ext/DataInterpolationsMooncakeExt.jl +++ b/ext/DataInterpolationsMooncakeExt.jl @@ -1,9 +1,9 @@ module DataInterpolationsMooncakeExt -using DataInterpolations, Mooncake, ChainRulesCore +using DataInterpolations, Mooncake, ChainRulesCore, FindFirstFunctions using DataInterpolations: _interpolate, munge_data, AbstractInterpolation, LinearInterpolation, QuadraticInterpolation -import Mooncake: @from_chainrules, MinimalCtx +import Mooncake: @from_chainrules, @zero_adjoint, MinimalCtx, DefaultCtx # When the ChainRules pullback for _interpolate returns a Tangent{AbstractInterpolation}, # this tells Mooncake how to accumulate the u-component into the interpolation's fdata. @@ -19,6 +19,28 @@ function Mooncake.increment_and_get_rdata!( return Mooncake.NoRData() end +# `SearchProperties{T}` and `Auto{T}` now carry `first_val::T` / `inv_step::T` +# (precomputed scalar fields for the props-aware UniformStep kernel). These +# fields show up in Mooncake's rdata for the interpolation cache because +# they are bitstype `Float64` (or `Float32`), but they are **not** +# differentiable — they are constants attached to the cache at construction. +# Mooncake doesn't know that from the type alone, so it routes the +# `Tangent{<:AbstractInterpolation}` (which has only `u` populated) through +# `increment_and_get_rdata!` with a populated `RData{NamedTuple{...}}` rather +# than `NoRData`. Tell Mooncake the right thing: accumulate `u`, then return +# the rdata unchanged so its t_props / strategy slots accumulate zero. +function Mooncake.increment_and_get_rdata!( + f::Mooncake.FData{<:NamedTuple}, + r::Mooncake.RData{<:NamedTuple}, + t::ChainRulesCore.Tangent{<:AbstractInterpolation} + ) + u_tang = ChainRulesCore.unthunk(t.u) + if !(u_tang isa ChainRulesCore.AbstractZero) + f.data.u .+= u_tang + end + return r +end + # Constructor rules: stop Mooncake recursing into LinearParameterCache and other # internal structs. The 6-arg and 7-arg forms are the internal constructors that # have ChainRules rrules defined in DataInterpolationsChainRulesCoreExt. @@ -35,4 +57,22 @@ end @from_chainrules MinimalCtx Tuple{typeof(munge_data), AbstractMatrix, AbstractVector} true @from_chainrules MinimalCtx Tuple{typeof(munge_data), AbstractArray, Any} true +# `get_idx` dispatches the cached `StrategyKind` into FindFirstFunctions: +# the bare enum for most kinds, and a reconstructed `Auto` (carrying the +# props' `first_val::T` / `inv_step::T`) for the uniform closed-form path. +# Both return integer indices — positional bookkeeping, not +# differentiable. Declare them zero-adjoint so Mooncake doesn't recurse +# into FFF's strategy kernels (which contain `llvmcall` SIMD intrinsics it +# cannot differentiate through). DI's `_interpolate` always feeds the +# search result into integer indexing, so the gradient flow is already cut +# at the index boundary — zero-adjoint here is correct. +@zero_adjoint DefaultCtx Tuple{typeof(FindFirstFunctions.searchsorted_last), FindFirstFunctions.StrategyKind, AbstractVector, Any, Integer} +@zero_adjoint DefaultCtx Tuple{typeof(FindFirstFunctions.searchsorted_first), FindFirstFunctions.StrategyKind, AbstractVector, Any, Integer} +@zero_adjoint DefaultCtx Tuple{typeof(FindFirstFunctions.searchsorted_last), FindFirstFunctions.StrategyKind, AbstractVector, Any} +@zero_adjoint DefaultCtx Tuple{typeof(FindFirstFunctions.searchsorted_first), FindFirstFunctions.StrategyKind, AbstractVector, Any} +@zero_adjoint DefaultCtx Tuple{typeof(FindFirstFunctions.searchsorted_last), FindFirstFunctions.Auto, AbstractVector, Any, Integer} +@zero_adjoint DefaultCtx Tuple{typeof(FindFirstFunctions.searchsorted_first), FindFirstFunctions.Auto, AbstractVector, Any, Integer} +@zero_adjoint DefaultCtx Tuple{typeof(FindFirstFunctions.searchsorted_last), FindFirstFunctions.Auto, AbstractVector, Any} +@zero_adjoint DefaultCtx Tuple{typeof(FindFirstFunctions.searchsorted_first), FindFirstFunctions.Auto, AbstractVector, Any} + end diff --git a/src/integral_inverses.jl b/src/integral_inverses.jl index 2e375aae..e63725b8 100644 --- a/src/integral_inverses.jl +++ b/src/integral_inverses.jl @@ -41,11 +41,18 @@ struct LinearInterpolationIntInv{uType, tType, itpType, T, propsType} <: extrapolation_right::ExtrapolationType.T iguesser::Guesser{tType} t_props::propsType + kind::FindFirstFunctions.StrategyKind itp::itpType - function LinearInterpolationIntInv(u, t, A, extrapolation_left, extrapolation_right) - t_props = FindFirstFunctions.SearchProperties(t) - return new{typeof(u), typeof(t), typeof(A), eltype(u), typeof(t_props)}( - u, t, extrapolation_left, extrapolation_right, Guesser(t), t_props, A + function LinearInterpolationIntInv( + u, t, A, extrapolation_left, extrapolation_right, t_props, + ) + kind = _resolve_strategy_kind(t, t_props) + return new{ + typeof(u), typeof(t), typeof(A), eltype(u), + typeof(t_props), + }( + u, t, extrapolation_left, extrapolation_right, + Guesser(t), t_props, kind, A ) end end @@ -63,12 +70,14 @@ end function invert_integral( A::LinearInterpolation{<:AbstractVector{<:Number}}; extrapolation_left::ExtrapolationType.T = A.extrapolation_left, - extrapolation_right::ExtrapolationType.T = A.extrapolation_right + extrapolation_right::ExtrapolationType.T = A.extrapolation_right, + search_properties::Union{Nothing, FindFirstFunctions.SearchProperties} = nothing ) !invertible_integral(A) && throw(IntegralNotInvertibleError()) - + t_I = get_I(A) + t_props = something(search_properties, FindFirstFunctions.SearchProperties(t_I)) return LinearInterpolationIntInv( - A.t, get_I(A), A, extrapolation_left, extrapolation_right + A.t, t_I, A, extrapolation_left, extrapolation_right, t_props ) end @@ -103,13 +112,18 @@ struct ConstantInterpolationIntInv{uType, tType, itpType, T, propsType} <: extrapolation_right::ExtrapolationType.T iguesser::Guesser{tType} t_props::propsType + kind::FindFirstFunctions.StrategyKind itp::itpType function ConstantInterpolationIntInv( - u, t, A, extrapolation_left, extrapolation_right + u, t, A, extrapolation_left, extrapolation_right, t_props, ) - t_props = FindFirstFunctions.SearchProperties(t) - return new{typeof(u), typeof(t), typeof(A), eltype(u), typeof(t_props)}( - u, t, extrapolation_left, extrapolation_right, Guesser(t), t_props, A + kind = _resolve_strategy_kind(t, t_props) + return new{ + typeof(u), typeof(t), typeof(A), eltype(u), + typeof(t_props), + }( + u, t, extrapolation_left, extrapolation_right, + Guesser(t), t_props, kind, A ) end end @@ -121,11 +135,14 @@ end function invert_integral( A::ConstantInterpolation{<:AbstractVector{<:Number}}; extrapolation_left::ExtrapolationType.T = A.extrapolation_left, - extrapolation_right::ExtrapolationType.T = A.extrapolation_right + extrapolation_right::ExtrapolationType.T = A.extrapolation_right, + search_properties::Union{Nothing, FindFirstFunctions.SearchProperties} = nothing ) !invertible_integral(A) && throw(IntegralNotInvertibleError()) + t_I = get_I(A) + t_props = something(search_properties, FindFirstFunctions.SearchProperties(t_I)) return ConstantInterpolationIntInv( - A.t, get_I(A), A, extrapolation_left, extrapolation_right + A.t, t_I, A, extrapolation_left, extrapolation_right, t_props ) end diff --git a/src/interpolation_caches.jl b/src/interpolation_caches.jl index 0d12c3a6..ef26aaf7 100644 --- a/src/interpolation_caches.jl +++ b/src/interpolation_caches.jl @@ -22,12 +22,13 @@ Extrapolation extends the last linear polynomial on each side. the possible options. This keyword is ignored if `extrapolation != Extrapolation.none`. - `cache_parameters`: precompute parameters at initialization for faster interpolation computations. Note: if activated, `u` and `t` should not be modified. Defaults to `false`. - - `assume_linear_t`: boolean value to specify a faster index lookup behavior for - evenly-distributed abscissae. Alternatively, a numerical threshold may be specified - for a test based on the normalized standard deviation of the difference with respect - to the straight line (see [`looks_linear`](@ref)). Defaults to 1e-2. + - `search_properties`: a pre-built `FindFirstFunctions.SearchProperties` for `t`, used + to skip the construction-time knot probe or override its result (e.g. built with + `is_uniform = true`). Defaults to `nothing`, which probes `t` automatically. """ -struct LinearInterpolation{uType, tType, IType, pType, T, propsType} <: +struct LinearInterpolation{ + uType, tType, IType, pType, T, propsType, + } <: AbstractInterpolation{T} u::uType t::tType @@ -37,20 +38,22 @@ struct LinearInterpolation{uType, tType, IType, pType, T, propsType} <: extrapolation_right::ExtrapolationType.T iguesser::Guesser{tType} t_props::propsType + # Resolved at construction; `KIND_UNIFORM_STEP` enables the closed-form + # fast path in `_interpolate` via a runtime branch (not a type tag), so + # the constructor returns a single concrete type and stays inferred. + kind::FindFirstFunctions.StrategyKind cache_parameters::Bool - linear_lookup::Bool - function LinearInterpolation( + @inline function LinearInterpolation( u, t, I, p, extrapolation_left, extrapolation_right, - cache_parameters, assume_linear_t + cache_parameters, t_props, ) - linear_lookup = seems_linear(assume_linear_t, t) - t_props = FindFirstFunctions.SearchProperties(t) + kind = _resolve_strategy_kind(t, t_props) return new{ typeof(u), typeof(t), typeof(I), typeof(p.slope), eltype(u), typeof(t_props), }( u, t, I, p, extrapolation_left, extrapolation_right, - Guesser(t), t_props, cache_parameters, linear_lookup + Guesser(t), t_props, kind, cache_parameters, ) end end @@ -58,22 +61,25 @@ end function LinearInterpolation( u, t; extrapolation::ExtrapolationType.T = ExtrapolationType.None, extrapolation_left::ExtrapolationType.T = ExtrapolationType.None, - extrapolation_right::ExtrapolationType.T = ExtrapolationType.None, cache_parameters = false, assume_linear_t = 1.0e-2 + extrapolation_right::ExtrapolationType.T = ExtrapolationType.None, + cache_parameters = false, + search_properties::Union{Nothing, FindFirstFunctions.SearchProperties} = nothing ) extrapolation_left, extrapolation_right = munge_extrapolation( extrapolation, extrapolation_left, extrapolation_right ) u, t = munge_data(u, t) + t_props = something(search_properties, FindFirstFunctions.SearchProperties(t)) p = LinearParameterCache(u, t, cache_parameters) A = LinearInterpolation( u, t, nothing, p, extrapolation_left, - extrapolation_right, cache_parameters, assume_linear_t + extrapolation_right, cache_parameters, t_props ) I = cumulative_integral(A, cache_parameters) return LinearInterpolation( u, t, I, p, extrapolation_left, extrapolation_right, - cache_parameters, assume_linear_t + cache_parameters, t_props ) end @@ -101,10 +107,9 @@ Extrapolation extends the last quadratic polynomial on each side. - `extrapolation_right`: The extrapolation type applied right of the data. See `extrapolation` for the possible options. This keyword is ignored if `extrapolation != Extrapolation.none`. - `cache_parameters`: precompute parameters at initialization for faster interpolation computations. Note: if activated, `u` and `t` should not be modified. Defaults to `false`. - - `assume_linear_t`: boolean value to specify a faster index lookup behaviour for - evenly-distributed abscissae. Alternatively, a numerical threshold may be specified - for a test based on the normalized standard deviation of the difference with respect - to the straight line (see [`looks_linear`](@ref)). Defaults to 1e-2. + - `search_properties`: a pre-built `FindFirstFunctions.SearchProperties` for `t`, used + to skip the construction-time knot probe or override its result (e.g. built with + `is_uniform = true`). Defaults to `nothing`, which probes `t` automatically. """ struct QuadraticInterpolation{uType, tType, IType, pType, T, propsType} <: AbstractInterpolation{T} @@ -117,22 +122,21 @@ struct QuadraticInterpolation{uType, tType, IType, pType, T, propsType} <: extrapolation_right::ExtrapolationType.T iguesser::Guesser{tType} t_props::propsType + kind::FindFirstFunctions.StrategyKind cache_parameters::Bool - linear_lookup::Bool function QuadraticInterpolation( u, t, I, p, mode, extrapolation_left, - extrapolation_right, cache_parameters, assume_linear_t + extrapolation_right, cache_parameters, t_props ) mode ∈ (:Forward, :Backward) || error("mode should be :Forward or :Backward for QuadraticInterpolation") - linear_lookup = seems_linear(assume_linear_t, t) - t_props = FindFirstFunctions.SearchProperties(t) + kind = _resolve_strategy_kind(t, t_props) return new{ typeof(u), typeof(t), typeof(I), typeof(p.α), eltype(u), typeof(t_props), }( u, t, I, p, mode, extrapolation_left, extrapolation_right, - Guesser(t), t_props, cache_parameters, linear_lookup + Guesser(t), t_props, kind, cache_parameters ) end end @@ -140,23 +144,25 @@ end function QuadraticInterpolation( u, t, mode; extrapolation::ExtrapolationType.T = ExtrapolationType.None, extrapolation_left::ExtrapolationType.T = ExtrapolationType.None, - extrapolation_right::ExtrapolationType.T = ExtrapolationType.None, cache_parameters = false, assume_linear_t = 1.0e-2 + extrapolation_right::ExtrapolationType.T = ExtrapolationType.None, + cache_parameters = false, + search_properties::Union{Nothing, FindFirstFunctions.SearchProperties} = nothing ) extrapolation_left, extrapolation_right = munge_extrapolation( extrapolation, extrapolation_left, extrapolation_right ) u, t = munge_data(u, t) - linear_lookup = seems_linear(assume_linear_t, t) + t_props = something(search_properties, FindFirstFunctions.SearchProperties(t)) p = QuadraticParameterCache(u, t, cache_parameters, mode) A = QuadraticInterpolation( u, t, nothing, p, mode, extrapolation_left, - extrapolation_right, cache_parameters, linear_lookup + extrapolation_right, cache_parameters, t_props ) I = cumulative_integral(A, cache_parameters) return QuadraticInterpolation( u, t, I, p, mode, extrapolation_left, - extrapolation_right, cache_parameters, linear_lookup + extrapolation_right, cache_parameters, t_props ) end @@ -185,6 +191,10 @@ It is the method of interpolation using Lagrange polynomials of (k-1)th order pa the possible options. This keyword is ignored if `extrapolation != Extrapolation.none`. - `extrapolation_right`: The extrapolation type applied right of the data. See `extrapolation` for the possible options. This keyword is ignored if `extrapolation != Extrapolation.none`. + - `search_properties`: a pre-built `FindFirstFunctions.SearchProperties` for `t`, used + to skip the construction-time knot probe or override its result (e.g. built with + `is_uniform = true`). Defaults to `nothing`, which probes `t` automatically. + """ struct LagrangeInterpolation{uType, tType, T, bcacheType, propsType} <: AbstractInterpolation{T} @@ -197,12 +207,16 @@ struct LagrangeInterpolation{uType, tType, T, bcacheType, propsType} <: extrapolation_right::ExtrapolationType.T iguesser::Guesser{tType} t_props::propsType - function LagrangeInterpolation(u, t, n, extrapolation_left, extrapolation_right) + kind::FindFirstFunctions.StrategyKind + function LagrangeInterpolation(u, t, n, extrapolation_left, extrapolation_right, t_props) bcache = zeros(eltype(u[1]), n + 1) idxs = zeros(Int, n + 1) fill!(bcache, NaN) - t_props = FindFirstFunctions.SearchProperties(t) - return new{typeof(u), typeof(t), eltype(u), typeof(bcache), typeof(t_props)}( + kind = _resolve_strategy_kind(t, t_props) + return new{ + typeof(u), typeof(t), eltype(u), typeof(bcache), + typeof(t_props), + }( u, t, n, @@ -211,7 +225,8 @@ struct LagrangeInterpolation{uType, tType, T, bcacheType, propsType} <: extrapolation_left, extrapolation_right, Guesser(t), - t_props + t_props, + kind ) end end @@ -220,7 +235,8 @@ function LagrangeInterpolation( u, t, n = length(t) - 1; extrapolation::ExtrapolationType.T = ExtrapolationType.None, extrapolation_left::ExtrapolationType.T = ExtrapolationType.None, - extrapolation_right::ExtrapolationType.T = ExtrapolationType.None + extrapolation_right::ExtrapolationType.T = ExtrapolationType.None, + search_properties::Union{Nothing, FindFirstFunctions.SearchProperties} = nothing ) extrapolation_left, extrapolation_right = munge_extrapolation( @@ -230,7 +246,8 @@ function LagrangeInterpolation( if n != length(t) - 1 error("Currently only n=length(t) - 1 is supported") end - return LagrangeInterpolation(u, t, n, extrapolation_left, extrapolation_right) + t_props = something(search_properties, FindFirstFunctions.SearchProperties(t)) + return LagrangeInterpolation(u, t, n, extrapolation_left, extrapolation_right, t_props) end """ @@ -260,12 +277,13 @@ Extrapolation extends the last cubic polynomial on each side. - `extrapolation_right`: The extrapolation type applied right of the data. See `extrapolation` for the possible options. This keyword is ignored if `extrapolation != Extrapolation.none`. - `cache_parameters`: precompute parameters at initialization for faster interpolation computations. Note: if activated, `u` and `t` should not be modified. Defaults to `false`. - - `assume_linear_t`: boolean value to specify a faster index lookup behaviour for - evenly-distributed abscissae. Alternatively, a numerical threshold may be specified - for a test based on the normalized standard deviation of the difference with respect - to the straight line (see [`looks_linear`](@ref)). Defaults to 1e-2. + - `search_properties`: a pre-built `FindFirstFunctions.SearchProperties` for `t`, used + to skip the construction-time knot probe or override its result (e.g. built with + `is_uniform = true`). Defaults to `nothing`, which probes `t` automatically. """ -struct AkimaInterpolation{uType, tType, IType, bType, cType, dType, T, propsType} <: +struct AkimaInterpolation{ + uType, tType, IType, bType, cType, dType, T, propsType, + } <: AbstractInterpolation{T} u::uType t::tType @@ -277,14 +295,13 @@ struct AkimaInterpolation{uType, tType, IType, bType, cType, dType, T, propsType extrapolation_right::ExtrapolationType.T iguesser::Guesser{tType} t_props::propsType + kind::FindFirstFunctions.StrategyKind cache_parameters::Bool - linear_lookup::Bool function AkimaInterpolation( u, t, I, b, c, d, extrapolation_left, - extrapolation_right, cache_parameters, assume_linear_t + extrapolation_right, cache_parameters, t_props ) - linear_lookup = seems_linear(assume_linear_t, t) - t_props = FindFirstFunctions.SearchProperties(t) + kind = _resolve_strategy_kind(t, t_props) return new{ typeof(u), typeof(t), typeof(I), typeof(b), typeof(c), typeof(d), eltype(u), typeof(t_props), @@ -299,8 +316,8 @@ struct AkimaInterpolation{uType, tType, IType, bType, cType, dType, T, propsType extrapolation_right, Guesser(t), t_props, + kind, cache_parameters, - linear_lookup ) end end @@ -369,14 +386,16 @@ function AkimaInterpolation( u, t; modified::Bool = false, extrapolation::ExtrapolationType.T = ExtrapolationType.None, extrapolation_left::ExtrapolationType.T = ExtrapolationType.None, - extrapolation_right::ExtrapolationType.T = ExtrapolationType.None, cache_parameters = false, assume_linear_t = 1.0e-2 + extrapolation_right::ExtrapolationType.T = ExtrapolationType.None, + cache_parameters = false, + search_properties::Union{Nothing, FindFirstFunctions.SearchProperties} = nothing ) extrapolation_left, extrapolation_right = munge_extrapolation( extrapolation, extrapolation_left, extrapolation_right ) u, t = munge_data(u, t) - linear_lookup = seems_linear(assume_linear_t, t) + t_props = something(search_properties, FindFirstFunctions.SearchProperties(t)) n = length(t) T = eltype(u) b = Vector{T}(undef, n) @@ -386,12 +405,12 @@ function AkimaInterpolation( A = AkimaInterpolation( u, t, nothing, b, c, d, extrapolation_left, - extrapolation_right, cache_parameters, linear_lookup + extrapolation_right, cache_parameters, t_props ) I = cumulative_integral(A, cache_parameters) return AkimaInterpolation( u, t, I, b, c, d, extrapolation_left, - extrapolation_right, cache_parameters, linear_lookup + extrapolation_right, cache_parameters, t_props ) end @@ -419,10 +438,9 @@ Extrapolation extends the last constant polynomial at the end points on each sid - `extrapolation_right`: The extrapolation type applied right of the data. See `extrapolation` for the possible options. This keyword is ignored if `extrapolation != Extrapolation.none`. - `cache_parameters`: precompute parameters at initialization for faster interpolation computations. Note: if activated, `u` and `t` should not be modified. Defaults to `false`. - - `assume_linear_t`: boolean value to specify a faster index lookup behaviour for - evenly-distributed abscissae. Alternatively, a numerical threshold may be specified - for a test based on the normalized standard deviation of the difference with respect - to the straight line (see [`looks_linear`](@ref)). Defaults to 1e-2. + - `search_properties`: a pre-built `FindFirstFunctions.SearchProperties` for `t`, used + to skip the construction-time knot probe or override its result (e.g. built with + `is_uniform = true`). Defaults to `nothing`, which probes `t` automatically. """ struct ConstantInterpolation{uType, tType, IType, T, propsType} <: AbstractInterpolation{T} @@ -435,17 +453,19 @@ struct ConstantInterpolation{uType, tType, IType, T, propsType} <: extrapolation_right::ExtrapolationType.T iguesser::Guesser{tType} t_props::propsType + kind::FindFirstFunctions.StrategyKind cache_parameters::Bool - linear_lookup::Bool function ConstantInterpolation( u, t, I, dir, extrapolation_left, extrapolation_right, - cache_parameters, assume_linear_t + cache_parameters, t_props ) - linear_lookup = seems_linear(assume_linear_t, t) - t_props = FindFirstFunctions.SearchProperties(t) - return new{typeof(u), typeof(t), typeof(I), eltype(u), typeof(t_props)}( + kind = _resolve_strategy_kind(t, t_props) + return new{ + typeof(u), typeof(t), typeof(I), eltype(u), + typeof(t_props), + }( u, t, I, nothing, dir, extrapolation_left, extrapolation_right, - Guesser(t), t_props, cache_parameters, linear_lookup + Guesser(t), t_props, kind, cache_parameters ) end end @@ -454,27 +474,29 @@ function ConstantInterpolation( u, t; dir = :left, extrapolation::ExtrapolationType.T = ExtrapolationType.None, extrapolation_left::ExtrapolationType.T = ExtrapolationType.None, extrapolation_right::ExtrapolationType.T = ExtrapolationType.None, - cache_parameters = false, assume_linear_t = 1.0e-2 + cache_parameters = false, + search_properties::Union{Nothing, FindFirstFunctions.SearchProperties} = nothing ) extrapolation_left, extrapolation_right = munge_extrapolation( extrapolation, extrapolation_left, extrapolation_right ) u, t = munge_data(u, t) + t_props = something(search_properties, FindFirstFunctions.SearchProperties(t)) A = ConstantInterpolation( u, t, nothing, dir, extrapolation_left, - extrapolation_right, cache_parameters, assume_linear_t + extrapolation_right, cache_parameters, t_props ) I = cumulative_integral(A, cache_parameters) return ConstantInterpolation( u, t, I, dir, extrapolation_left, extrapolation_right, - cache_parameters, assume_linear_t + cache_parameters, t_props ) end """ SmoothedConstantInterpolation(u, t; d_max = Inf, extrapolate = false, - cache_parameters = false, assume_linear_t = 1e-2) + cache_parameters = false) It is a method for interpolating constantly with forward fill, with smoothing around the value transitions to make the curve continuously differentiable while the integral never @@ -498,10 +520,9 @@ except when using extrapolation types `Constant` or `Extension`. - `extrapolation_right`: The extrapolation type applied right of the data. See `extrapolation` for the possible options. This keyword is ignored if `extrapolation != Extrapolation.none`. - `cache_parameters`: precompute parameters at initialization for faster interpolation computations. Note: if activated, `u` and `t` should not be modified. Defaults to `false`. - - `assume_linear_t`: boolean value to specify a faster index lookup behavior for - evenly-distributed abscissae. Alternatively, a numerical threshold may be specified - for a test based on the normalized standard deviation of the difference with respect - to the straight line (see [`looks_linear`](@ref)). Defaults to 1e-2. + - `search_properties`: a pre-built `FindFirstFunctions.SearchProperties` for `t`, used + to skip the construction-time knot probe or override its result (e.g. built with + `is_uniform = true`). Defaults to `nothing`, which probes `t` automatically. """ struct SmoothedConstantInterpolation{ uType, tType, IType, dType, cType, dmaxType, T, propsType, @@ -516,20 +537,19 @@ struct SmoothedConstantInterpolation{ extrapolation_right::ExtrapolationType.T iguesser::Guesser{tType} t_props::propsType + kind::FindFirstFunctions.StrategyKind cache_parameters::Bool - linear_lookup::Bool function SmoothedConstantInterpolation( u, t, I, p, d_max, extrapolation_left, - extrapolation_right, cache_parameters, assume_linear_t + extrapolation_right, cache_parameters, t_props ) - linear_lookup = seems_linear(assume_linear_t, t) - t_props = FindFirstFunctions.SearchProperties(t) + kind = _resolve_strategy_kind(t, t_props) return new{ typeof(u), typeof(t), typeof(I), typeof(p.d), typeof(p.c), typeof(d_max), eltype(u), typeof(t_props), }( u, t, I, p, d_max, extrapolation_left, extrapolation_right, - Guesser(t), t_props, cache_parameters, linear_lookup + Guesser(t), t_props, kind, cache_parameters ) end end @@ -538,24 +558,26 @@ function SmoothedConstantInterpolation( u, t; d_max = Inf, extrapolation::ExtrapolationType.T = ExtrapolationType.None, extrapolation_left::ExtrapolationType.T = ExtrapolationType.None, extrapolation_right::ExtrapolationType.T = ExtrapolationType.None, - cache_parameters = false, assume_linear_t = 1.0e-2 + cache_parameters = false, + search_properties::Union{Nothing, FindFirstFunctions.SearchProperties} = nothing ) extrapolation_left, extrapolation_right = munge_extrapolation( extrapolation, extrapolation_left, extrapolation_right ) u, t = munge_data(u, t) + t_props = something(search_properties, FindFirstFunctions.SearchProperties(t)) p = SmoothedConstantParameterCache( u, t, cache_parameters, d_max, extrapolation_left, extrapolation_right ) A = SmoothedConstantInterpolation( u, t, nothing, p, d_max, extrapolation_left, - extrapolation_right, cache_parameters, assume_linear_t + extrapolation_right, cache_parameters, t_props ) I = cumulative_integral(A, cache_parameters) return SmoothedConstantInterpolation( u, t, I, p, d_max, extrapolation_left, - extrapolation_right, cache_parameters, assume_linear_t + extrapolation_right, cache_parameters, t_props ) end @@ -581,12 +603,13 @@ Extrapolation extends the last quadratic polynomial on each side. - `extrapolation_right`: The extrapolation type applied right of the data. See `extrapolation` for the possible options. This keyword is ignored if `extrapolation != Extrapolation.none`. - `cache_parameters`: precompute parameters at initialization for faster interpolation computations. Note: if activated, `u` and `t` should not be modified. Defaults to `false`. - - `assume_linear_t`: boolean value to specify a faster index lookup behaviour for - evenly-distributed abscissae. Alternatively, a numerical threshold may be specified - for a test based on the normalized standard deviation of the difference with respect - to the straight line (see [`looks_linear`](@ref)). Defaults to 1e-2. + - `search_properties`: a pre-built `FindFirstFunctions.SearchProperties` for `t`, used + to skip the construction-time knot probe or override its result (e.g. built with + `is_uniform = true`). Defaults to `nothing`, which probes `t` automatically. """ -struct QuadraticSpline{uType, tType, IType, pType, kType, cType, scType, T, propsType} <: +struct QuadraticSpline{ + uType, tType, IType, pType, kType, cType, scType, T, propsType, + } <: AbstractInterpolation{T} u::uType t::tType @@ -599,14 +622,13 @@ struct QuadraticSpline{uType, tType, IType, pType, kType, cType, scType, T, prop extrapolation_right::ExtrapolationType.T iguesser::Guesser{tType} t_props::propsType + kind::FindFirstFunctions.StrategyKind cache_parameters::Bool - linear_lookup::Bool function QuadraticSpline( u, t, I, p, k, c, sc, extrapolation_left, - extrapolation_right, cache_parameters, assume_linear_t + extrapolation_right, cache_parameters, t_props ) - linear_lookup = seems_linear(assume_linear_t, t) - t_props = FindFirstFunctions.SearchProperties(t) + kind = _resolve_strategy_kind(t, t_props) return new{ typeof(u), typeof(t), typeof(I), typeof(p.α), typeof(k), typeof(c), typeof(sc), eltype(u), typeof(t_props), @@ -622,8 +644,8 @@ struct QuadraticSpline{uType, tType, IType, pType, kType, cType, scType, T, prop extrapolation_right, Guesser(t), t_props, + kind, cache_parameters, - linear_lookup ) end end @@ -632,13 +654,15 @@ function QuadraticSpline( u::AbstractVector{<:Number}, t; extrapolation::ExtrapolationType.T = ExtrapolationType.None, extrapolation_left::ExtrapolationType.T = ExtrapolationType.None, extrapolation_right::ExtrapolationType.T = ExtrapolationType.None, - cache_parameters = false, assume_linear_t = 1.0e-2 + cache_parameters = false, + search_properties::Union{Nothing, FindFirstFunctions.SearchProperties} = nothing ) extrapolation_left, extrapolation_right = munge_extrapolation( extrapolation, extrapolation_left, extrapolation_right ) u, t = munge_data(u, t) + t_props = something(search_properties, FindFirstFunctions.SearchProperties(t)) n = length(t) dtype_sc = typeof(one(eltype(t)) / one(eltype(t))) @@ -649,26 +673,28 @@ function QuadraticSpline( p = QuadraticSplineParameterCache(u, t, k, c, sc, cache_parameters) A = QuadraticSpline( u, t, nothing, p, k, c, sc, extrapolation_left, - extrapolation_right, cache_parameters, assume_linear_t + extrapolation_right, cache_parameters, t_props ) I = cumulative_integral(A, cache_parameters) return QuadraticSpline( u, t, I, p, k, c, sc, extrapolation_left, - extrapolation_right, cache_parameters, assume_linear_t + extrapolation_right, cache_parameters, t_props ) end function QuadraticSpline( u::AbstractVector, t; extrapolation::ExtrapolationType.T = ExtrapolationType.None, extrapolation_left::ExtrapolationType.T = ExtrapolationType.None, - extrapolation_right::ExtrapolationType.T = ExtrapolationType.None, cache_parameters = false, - assume_linear_t = 1.0e-2 + extrapolation_right::ExtrapolationType.T = ExtrapolationType.None, + cache_parameters = false, + search_properties::Union{Nothing, FindFirstFunctions.SearchProperties} = nothing ) extrapolation_left, extrapolation_right = munge_extrapolation( extrapolation, extrapolation_left, extrapolation_right ) u, t = munge_data(u, t) + t_props = something(search_properties, FindFirstFunctions.SearchProperties(t)) n = length(t) dtype_sc = typeof(one(eltype(t)) / one(eltype(t))) @@ -689,12 +715,12 @@ function QuadraticSpline( p = QuadraticSplineParameterCache(u, t, k, c, sc, cache_parameters) A = QuadraticSpline( u, t, nothing, p, k, c, sc, extrapolation_left, - extrapolation_right, cache_parameters, assume_linear_t + extrapolation_right, cache_parameters, t_props ) I = cumulative_integral(A, cache_parameters) return QuadraticSpline( u, t, I, p, k, c, sc, extrapolation_left, - extrapolation_right, cache_parameters, assume_linear_t + extrapolation_right, cache_parameters, t_props ) end @@ -720,10 +746,9 @@ Second derivative on both ends are zero, which are also called "natural" boundar - `extrapolation_right`: The extrapolation type applied right of the data. See `extrapolation` for the possible options. This keyword is ignored if `extrapolation != Extrapolation.none`. - `cache_parameters`: precompute parameters at initialization for faster interpolation computations. Note: if activated, `u` and `t` should not be modified. Defaults to `false`. - - `assume_linear_t`: boolean value to specify a faster index lookup behaviour for - evenly-distributed abscissae. Alternatively, a numerical threshold may be specified - for a test based on the normalized standard deviation of the difference with respect - to the straight line (see [`looks_linear`](@ref)). Defaults to 1e-2. + - `search_properties`: a pre-built `FindFirstFunctions.SearchProperties` for `t`, used + to skip the construction-time knot probe or override its result (e.g. built with + `is_uniform = true`). Defaults to `nothing`, which probes `t` automatically. """ struct CubicSpline{uType, tType, IType, pType, hType, zType, T, propsType} <: AbstractInterpolation{T} @@ -737,14 +762,13 @@ struct CubicSpline{uType, tType, IType, pType, hType, zType, T, propsType} <: extrapolation_right::ExtrapolationType.T iguesser::Guesser{tType} t_props::propsType + kind::FindFirstFunctions.StrategyKind cache_parameters::Bool - linear_lookup::Bool function CubicSpline( u, t, I, p, h, z, extrapolation_left, - extrapolation_right, cache_parameters, assume_linear_t + extrapolation_right, cache_parameters, t_props ) - linear_lookup = seems_linear(assume_linear_t, t) - t_props = FindFirstFunctions.SearchProperties(t) + kind = _resolve_strategy_kind(t, t_props) return new{ typeof(u), typeof(t), typeof(I), typeof(p.c₁), typeof(h), typeof(z), eltype(u), typeof(t_props), @@ -759,8 +783,8 @@ struct CubicSpline{uType, tType, IType, pType, hType, zType, T, propsType} <: extrapolation_right, Guesser(t), t_props, + kind, cache_parameters, - linear_lookup ) end end @@ -769,14 +793,16 @@ function CubicSpline( u::AbstractVector{<:Number}, t; extrapolation::ExtrapolationType.T = ExtrapolationType.None, extrapolation_left::ExtrapolationType.T = ExtrapolationType.None, - extrapolation_right::ExtrapolationType.T = ExtrapolationType.None, cache_parameters = false, - assume_linear_t = 1.0e-2 + extrapolation_right::ExtrapolationType.T = ExtrapolationType.None, + cache_parameters = false, + search_properties::Union{Nothing, FindFirstFunctions.SearchProperties} = nothing ) extrapolation_left, extrapolation_right = munge_extrapolation( extrapolation, extrapolation_left, extrapolation_right ) u, t = munge_data(u, t) + t_props = something(search_properties, FindFirstFunctions.SearchProperties(t)) n = length(t) - 1 h = vcat(0, map(k -> t[k + 1] - t[k], 1:(length(t) - 1)), 0) dl = vcat(h[2:n], zero(eltype(h))) @@ -796,31 +822,33 @@ function CubicSpline( 1:(n + 1) ) z = tA \ d - linear_lookup = seems_linear(assume_linear_t, t) p = CubicSplineParameterCache(u, h, z, cache_parameters) A = CubicSpline( u, t, nothing, p, h[1:(n + 1)], z, extrapolation_left, - extrapolation_right, cache_parameters, linear_lookup + extrapolation_right, cache_parameters, t_props ) I = cumulative_integral(A, cache_parameters) return CubicSpline( u, t, I, p, h[1:(n + 1)], z, extrapolation_left, - extrapolation_right, cache_parameters, linear_lookup + extrapolation_right, cache_parameters, t_props ) end function CubicSpline( u::AbstractArray{T, N}, t; - extrapolation::ExtrapolationType.T = ExtrapolationType.None, extrapolation_left::ExtrapolationType.T = ExtrapolationType.None, - extrapolation_right::ExtrapolationType.T = ExtrapolationType.None, cache_parameters = false, - assume_linear_t = 1.0e-2 + extrapolation::ExtrapolationType.T = ExtrapolationType.None, + extrapolation_left::ExtrapolationType.T = ExtrapolationType.None, + extrapolation_right::ExtrapolationType.T = ExtrapolationType.None, + cache_parameters = false, + search_properties::Union{Nothing, FindFirstFunctions.SearchProperties} = nothing ) where {T, N} extrapolation_left, extrapolation_right = munge_extrapolation( extrapolation, extrapolation_left, extrapolation_right ) u, t = munge_data(u, t) + t_props = something(search_properties, FindFirstFunctions.SearchProperties(t)) n = length(t) - 1 h = vcat(0, map(k -> t[k + 1] - t[k], 1:(length(t) - 1)), 0) dl = vcat(h[2:n], zero(eltype(h))) @@ -843,30 +871,31 @@ function CubicSpline( d_reshaped = reshape(d, prod(size(d)[1:(end - 1)]), :) z = (tA \ d_reshaped')' z = reshape(z, size(u)...) - linear_lookup = seems_linear(assume_linear_t, t) p = CubicSplineParameterCache(u, h, z, cache_parameters) A = CubicSpline( u, t, nothing, p, h[1:(n + 1)], z, extrapolation_left, - extrapolation_right, cache_parameters, linear_lookup + extrapolation_right, cache_parameters, t_props ) I = cumulative_integral(A, cache_parameters) return CubicSpline( u, t, I, p, h[1:(n + 1)], z, extrapolation_left, - extrapolation_right, cache_parameters, linear_lookup + extrapolation_right, cache_parameters, t_props ) end function CubicSpline( u::AbstractVector, t; extrapolation::ExtrapolationType.T = ExtrapolationType.None, extrapolation_left::ExtrapolationType.T = ExtrapolationType.None, - extrapolation_right::ExtrapolationType.T = ExtrapolationType.None, cache_parameters = false, - assume_linear_t = 1.0e-2 + extrapolation_right::ExtrapolationType.T = ExtrapolationType.None, + cache_parameters = false, + search_properties::Union{Nothing, FindFirstFunctions.SearchProperties} = nothing ) extrapolation_left, extrapolation_right = munge_extrapolation( extrapolation, extrapolation_left, extrapolation_right ) u, t = munge_data(u, t) + t_props = something(search_properties, FindFirstFunctions.SearchProperties(t)) n = length(t) - 1 h = vcat(0, map(k -> t[k + 1] - t[k], 1:(length(t) - 1)), 0) dl = vcat(h[2:n], zero(eltype(h))) @@ -885,12 +914,12 @@ function CubicSpline( p = CubicSplineParameterCache(u, h, z, cache_parameters) A = CubicSpline( u, t, nothing, p, h[1:(n + 1)], z, extrapolation_left, - extrapolation_right, cache_parameters, assume_linear_t + extrapolation_right, cache_parameters, t_props ) I = cumulative_integral(A, cache_parameters) return CubicSpline( u, t, I, p, h[1:(n + 1)], z, extrapolation_left, - extrapolation_right, cache_parameters, assume_linear_t + extrapolation_right, cache_parameters, t_props ) end @@ -918,12 +947,14 @@ Extrapolation is a constant polynomial of the end points on each side. the possible options. This keyword is ignored if `extrapolation != Extrapolation.none`. - `extrapolation_right`: The extrapolation type applied right of the data. See `extrapolation` for the possible options. This keyword is ignored if `extrapolation != Extrapolation.none`. - - `assume_linear_t`: boolean value to specify a faster index lookup behavior for - evenly-distributed abscissae. Alternatively, a numerical threshold may be specified - for a test based on the normalized standard deviation of the difference with respect - to the straight line (see [`looks_linear`](@ref)). Defaults to 1e-2. + - `search_properties`: a pre-built `FindFirstFunctions.SearchProperties` for `t`, used + to skip the construction-time knot probe or override its result (e.g. built with + `is_uniform = true`). Defaults to `nothing`, which probes `t` automatically. + """ -struct BSplineInterpolation{uType, tType, pType, kType, cType, scType, T, propsType} <: +struct BSplineInterpolation{ + uType, tType, pType, kType, cType, scType, T, propsType, + } <: AbstractInterpolation{T} u::uType t::tType @@ -938,7 +969,7 @@ struct BSplineInterpolation{uType, tType, pType, kType, cType, scType, T, propsT extrapolation_right::ExtrapolationType.T iguesser::Guesser{tType} t_props::propsType - linear_lookup::Bool + kind::FindFirstFunctions.StrategyKind function BSplineInterpolation( u, t, @@ -951,10 +982,9 @@ struct BSplineInterpolation{uType, tType, pType, kType, cType, scType, T, propsT knotVecType, extrapolation_left, extrapolation_right, - assume_linear_t + t_props, ) - linear_lookup = seems_linear(assume_linear_t, t) - t_props = FindFirstFunctions.SearchProperties(t) + kind = _resolve_strategy_kind(t, t_props) return new{ typeof(u), typeof(t), typeof(p), typeof(k), typeof(c), typeof(sc), eltype(u), typeof(t_props), @@ -972,7 +1002,7 @@ struct BSplineInterpolation{uType, tType, pType, kType, cType, scType, T, propsT extrapolation_right, Guesser(t), t_props, - linear_lookup + kind ) end end @@ -981,13 +1011,15 @@ function BSplineInterpolation( u::AbstractVector, t, d, pVecType, knotVecType; extrapolation::ExtrapolationType.T = ExtrapolationType.None, extrapolation_left::ExtrapolationType.T = ExtrapolationType.None, - extrapolation_right::ExtrapolationType.T = ExtrapolationType.None, assume_linear_t = 1.0e-2 + extrapolation_right::ExtrapolationType.T = ExtrapolationType.None, + search_properties::Union{Nothing, FindFirstFunctions.SearchProperties} = nothing ) extrapolation_left, extrapolation_right = munge_extrapolation( extrapolation, extrapolation_left, extrapolation_right ) u, t = munge_data(u, t) + t_props = something(search_properties, FindFirstFunctions.SearchProperties(t)) n = length(t) n < d + 1 && error("BSplineInterpolation needs at least d + 1, i.e. $(d + 1) points.") s = zero(eltype(u)) @@ -1052,7 +1084,7 @@ function BSplineInterpolation( sc = zeros(eltype(t), n) return BSplineInterpolation( u, t, d, p, k, c, sc, pVecType, knotVecType, - extrapolation_left, extrapolation_right, assume_linear_t + extrapolation_left, extrapolation_right, t_props ) end @@ -1061,13 +1093,14 @@ function BSplineInterpolation( extrapolation::ExtrapolationType.T = ExtrapolationType.None, extrapolation_left::ExtrapolationType.T = ExtrapolationType.None, extrapolation_right::ExtrapolationType.T = ExtrapolationType.None, - assume_linear_t = 1.0e-2 + search_properties::Union{Nothing, FindFirstFunctions.SearchProperties} = nothing, ) extrapolation_left, extrapolation_right = munge_extrapolation( extrapolation, extrapolation_left, extrapolation_right ) u, t = munge_data(u, t) + t_props = something(search_properties, FindFirstFunctions.SearchProperties(t)) n = length(t) n < d + 1 && error("BSplineInterpolation needs at least d + 1, i.e. $(d + 1) points.") s = zero(eltype(u)) @@ -1135,7 +1168,7 @@ function BSplineInterpolation( sc = zeros(eltype(t), n) return BSplineInterpolation( u, t, d, p, k, c, sc, pVecType, knotVecType, - extrapolation_left, extrapolation_right, assume_linear_t + extrapolation_left, extrapolation_right, t_props ) end @@ -1165,12 +1198,14 @@ Extrapolation is a constant polynomial of the end points on each side. the possible options. This keyword is ignored if `extrapolation != Extrapolation.none`. - `extrapolation_right`: The extrapolation type applied right of the data. See `extrapolation` for the possible options. This keyword is ignored if `extrapolation != Extrapolation.none`. - - `assume_linear_t`: boolean value to specify a faster index lookup behaviour for - evenly-distributed abscissae. Alternatively, a numerical threshold may be specified - for a test based on the normalized standard deviation of the difference with respect - to the straight line (see [`looks_linear`](@ref)). Defaults to 1e-2. + - `search_properties`: a pre-built `FindFirstFunctions.SearchProperties` for `t`, used + to skip the construction-time knot probe or override its result (e.g. built with + `is_uniform = true`). Defaults to `nothing`, which probes `t` automatically. + """ -struct BSplineApprox{uType, tType, pType, kType, cType, scType, T, propsType} <: +struct BSplineApprox{ + uType, tType, pType, kType, cType, scType, T, propsType, + } <: AbstractInterpolation{T} u::uType t::tType @@ -1186,7 +1221,7 @@ struct BSplineApprox{uType, tType, pType, kType, cType, scType, T, propsType} <: extrapolation_right::ExtrapolationType.T iguesser::Guesser{tType} t_props::propsType - linear_lookup::Bool + kind::FindFirstFunctions.StrategyKind function BSplineApprox( u, t, @@ -1200,10 +1235,9 @@ struct BSplineApprox{uType, tType, pType, kType, cType, scType, T, propsType} <: knotVecType, extrapolation_left, extrapolation_right, - assume_linear_t + t_props, ) - linear_lookup = seems_linear(assume_linear_t, t) - t_props = FindFirstFunctions.SearchProperties(t) + kind = _resolve_strategy_kind(t, t_props) return new{ typeof(u), typeof(t), typeof(p), typeof(k), typeof(c), typeof(sc), eltype(u), typeof(t_props), @@ -1222,7 +1256,7 @@ struct BSplineApprox{uType, tType, pType, kType, cType, scType, T, propsType} <: extrapolation_right, Guesser(t), t_props, - linear_lookup + kind ) end end @@ -1231,13 +1265,15 @@ function BSplineApprox( u::AbstractVector, t, d, h, pVecType, knotVecType; extrapolation::ExtrapolationType.T = ExtrapolationType.None, extrapolation_left::ExtrapolationType.T = ExtrapolationType.None, - extrapolation_right::ExtrapolationType.T = ExtrapolationType.None, assume_linear_t = 1.0e-2 + extrapolation_right::ExtrapolationType.T = ExtrapolationType.None, + search_properties::Union{Nothing, FindFirstFunctions.SearchProperties} = nothing ) extrapolation_left, extrapolation_right = munge_extrapolation( extrapolation, extrapolation_left, extrapolation_right ) u, t = munge_data(u, t) + t_props = something(search_properties, FindFirstFunctions.SearchProperties(t)) n = length(t) h < d + 1 && error("BSplineApprox needs at least d + 1, i.e. $(d + 1) control points.") s = zero(eltype(u)) @@ -1323,7 +1359,7 @@ function BSplineApprox( sc = zeros(eltype(t), h) return BSplineApprox( u, t, d, h, p, k, c, sc, pVecType, knotVecType, - extrapolation_left, extrapolation_right, assume_linear_t + extrapolation_left, extrapolation_right, t_props ) end @@ -1332,13 +1368,14 @@ function BSplineApprox( extrapolation::ExtrapolationType.T = ExtrapolationType.None, extrapolation_left::ExtrapolationType.T = ExtrapolationType.None, extrapolation_right::ExtrapolationType.T = ExtrapolationType.None, - assume_linear_t = 1.0e-2 + search_properties::Union{Nothing, FindFirstFunctions.SearchProperties} = nothing, ) where {T, N} extrapolation_left, extrapolation_right = munge_extrapolation( extrapolation, extrapolation_left, extrapolation_right ) u, t = munge_data(u, t) + t_props = something(search_properties, FindFirstFunctions.SearchProperties(t)) n = length(t) h < d + 1 && error("BSplineApprox needs at least d + 1, i.e. $(d + 1) control points.") s = zero(eltype(u)) @@ -1432,7 +1469,7 @@ function BSplineApprox( sc = zeros(eltype(t), h) return BSplineApprox( u, t, d, h, p, k, c, sc, pVecType, knotVecType, - extrapolation_left, extrapolation_right, assume_linear_t + extrapolation_left, extrapolation_right, t_props ) end """ @@ -1457,12 +1494,13 @@ It is a Cubic Hermite interpolation, which is a piece-wise third degree polynomi - `extrapolation_right`: The extrapolation type applied right of the data. See `extrapolation` for the possible options. This keyword is ignored if `extrapolation != Extrapolation.none`. - `cache_parameters`: precompute parameters at initialization for faster interpolation computations. Note: if activated, `u` and `t` should not be modified. Defaults to `false`. - - `assume_linear_t`: boolean value to specify a faster index lookup behaviour for - evenly-distributed abscissae. Alternatively, a numerical threshold may be specified - for a test based on the normalized standard deviation of the difference with respect - to the straight line (see [`looks_linear`](@ref)). Defaults to 1e-2. + - `search_properties`: a pre-built `FindFirstFunctions.SearchProperties` for `t`, used + to skip the construction-time knot probe or override its result (e.g. built with + `is_uniform = true`). Defaults to `nothing`, which probes `t` automatically. """ -struct CubicHermiteSpline{uType, tType, IType, duType, pType, T, propsType} <: +struct CubicHermiteSpline{ + uType, tType, IType, duType, pType, T, propsType, + } <: AbstractInterpolation{T} du::duType u::uType @@ -1473,20 +1511,19 @@ struct CubicHermiteSpline{uType, tType, IType, duType, pType, T, propsType} <: extrapolation_right::ExtrapolationType.T iguesser::Guesser{tType} t_props::propsType + kind::FindFirstFunctions.StrategyKind cache_parameters::Bool - linear_lookup::Bool function CubicHermiteSpline( du, u, t, I, p, extrapolation_left, extrapolation_right, - cache_parameters, assume_linear_t + cache_parameters, t_props ) - linear_lookup = seems_linear(assume_linear_t, t) - t_props = FindFirstFunctions.SearchProperties(t) + kind = _resolve_strategy_kind(t, t_props) return new{ typeof(u), typeof(t), typeof(I), typeof(du), typeof(p.c₁), eltype(u), typeof(t_props), }( du, u, t, I, p, extrapolation_left, extrapolation_right, - Guesser(t), t_props, cache_parameters, linear_lookup + Guesser(t), t_props, kind, cache_parameters ) end end @@ -1494,7 +1531,9 @@ end function CubicHermiteSpline( du, u, t; extrapolation::ExtrapolationType.T = ExtrapolationType.None, extrapolation_left::ExtrapolationType.T = ExtrapolationType.None, - extrapolation_right::ExtrapolationType.T = ExtrapolationType.None, cache_parameters = false, assume_linear_t = 1.0e-2 + extrapolation_right::ExtrapolationType.T = ExtrapolationType.None, + cache_parameters = false, + search_properties::Union{Nothing, FindFirstFunctions.SearchProperties} = nothing ) @assert length(u) == length(du) "Length of `u` is not equal to length of `du`." extrapolation_left, @@ -1502,16 +1541,16 @@ function CubicHermiteSpline( extrapolation, extrapolation_left, extrapolation_right ) u, t = munge_data(u, t) - linear_lookup = seems_linear(assume_linear_t, t) + t_props = something(search_properties, FindFirstFunctions.SearchProperties(t)) p = CubicHermiteParameterCache(du, u, t, cache_parameters) A = CubicHermiteSpline( du, u, t, nothing, p, extrapolation_left, - extrapolation_right, cache_parameters, linear_lookup + extrapolation_right, cache_parameters, t_props ) I = cumulative_integral(A, cache_parameters) return CubicHermiteSpline( du, u, t, I, p, extrapolation_left, - extrapolation_right, cache_parameters, linear_lookup + extrapolation_right, cache_parameters, t_props ) end @@ -1538,10 +1577,9 @@ section 3.4 for more details. - `extrapolation_right`: The extrapolation type applied right of the data. See `extrapolation` for the possible options. This keyword is ignored if `extrapolation != Extrapolation.none`. - `cache_parameters`: precompute parameters at initialization for faster interpolation computations. Note: if activated, `u` and `t` should not be modified. Defaults to `false`. - - `assume_linear_t`: boolean value to specify a faster index lookup behaviour for - evenly-distributed abscissae. Alternatively, a numerical threshold may be specified - for a test based on the normalized standard deviation of the difference with respect - to the straight line (see [`looks_linear`](@ref)). Defaults to 1e-2. + - `search_properties`: a pre-built `FindFirstFunctions.SearchProperties` for `t`, used + to skip the construction-time knot probe or override its result (e.g. built with + `is_uniform = true`). Defaults to `nothing`, which probes `t` automatically. """ function PCHIPInterpolation(u, t; kwargs...) u, t = munge_data(u, t) @@ -1572,12 +1610,13 @@ It is a Quintic Hermite interpolation, which is a piece-wise fifth degree polyno - `extrapolation_right`: The extrapolation type applied right of the data. See `extrapolation` for the possible options. This keyword is ignored if `extrapolation != Extrapolation.none`. - `cache_parameters`: precompute parameters at initialization for faster interpolation computations. Note: if activated, `u` and `t` should not be modified. Defaults to `false`. - - `assume_linear_t`: boolean value to specify a faster index lookup behaviour for - evenly-distributed abscissae. Alternatively, a numerical threshold may be specified - for a test based on the normalized standard deviation of the difference with respect - to the straight line (see [`looks_linear`](@ref)). Defaults to 1e-2. + - `search_properties`: a pre-built `FindFirstFunctions.SearchProperties` for `t`, used + to skip the construction-time knot probe or override its result (e.g. built with + `is_uniform = true`). Defaults to `nothing`, which probes `t` automatically. """ -struct QuinticHermiteSpline{uType, tType, IType, duType, dduType, pType, T, propsType} <: +struct QuinticHermiteSpline{ + uType, tType, IType, duType, dduType, pType, T, propsType, + } <: AbstractInterpolation{T} ddu::dduType du::duType @@ -1589,20 +1628,19 @@ struct QuinticHermiteSpline{uType, tType, IType, duType, dduType, pType, T, prop extrapolation_right::ExtrapolationType.T iguesser::Guesser{tType} t_props::propsType + kind::FindFirstFunctions.StrategyKind cache_parameters::Bool - linear_lookup::Bool function QuinticHermiteSpline( ddu, du, u, t, I, p, extrapolation_left, - extrapolation_right, cache_parameters, assume_linear_t + extrapolation_right, cache_parameters, t_props ) - linear_lookup = seems_linear(assume_linear_t, t) - t_props = FindFirstFunctions.SearchProperties(t) + kind = _resolve_strategy_kind(t, t_props) return new{ typeof(u), typeof(t), typeof(I), typeof(du), typeof(ddu), typeof(p.c₁), eltype(u), typeof(t_props), }( ddu, du, u, t, I, p, extrapolation_left, extrapolation_right, - Guesser(t), t_props, cache_parameters, linear_lookup + Guesser(t), t_props, kind, cache_parameters ) end end @@ -1611,7 +1649,8 @@ function QuinticHermiteSpline( ddu, du, u, t; extrapolation::ExtrapolationType.T = ExtrapolationType.None, extrapolation_left::ExtrapolationType.T = ExtrapolationType.None, extrapolation_right::ExtrapolationType.T = ExtrapolationType.None, - cache_parameters = false, assume_linear_t = 1.0e-2 + cache_parameters = false, + search_properties::Union{Nothing, FindFirstFunctions.SearchProperties} = nothing ) @assert length(u) == length(du) == length(ddu) "Length of `u` is not equal to length of `du` or `ddu`." extrapolation_left, @@ -1619,21 +1658,22 @@ function QuinticHermiteSpline( extrapolation, extrapolation_left, extrapolation_right ) u, t = munge_data(u, t) - linear_lookup = seems_linear(assume_linear_t, t) + t_props = something(search_properties, FindFirstFunctions.SearchProperties(t)) p = QuinticHermiteParameterCache(ddu, du, u, t, cache_parameters) A = QuinticHermiteSpline( ddu, du, u, t, nothing, p, extrapolation_left, - extrapolation_right, cache_parameters, linear_lookup + extrapolation_right, cache_parameters, t_props ) I = cumulative_integral(A, cache_parameters) return QuinticHermiteSpline( ddu, du, u, t, I, p, extrapolation_left, - extrapolation_right, cache_parameters, linear_lookup + extrapolation_right, cache_parameters, t_props ) end struct SmoothArcLengthInterpolation{ - uType, tType, IType, P, D, S <: Union{AbstractInterpolation, Nothing}, T, propsType, + uType, tType, IType, P, D, S <: Union{AbstractInterpolation, Nothing}, + T, propsType, } <: AbstractInterpolation{T} u::uType @@ -1654,8 +1694,8 @@ struct SmoothArcLengthInterpolation{ extrapolation_right::ExtrapolationType.T iguesser::Guesser{tType} t_props::propsType + kind::FindFirstFunctions.StrategyKind cache_parameters::Bool - linear_lookup::Bool out::Vector{P} derivative::Vector{P} in_place::Bool @@ -1663,10 +1703,9 @@ struct SmoothArcLengthInterpolation{ u, t, d, shape_itp, Δt_circle_segment, Δt_line_segment, center, radius, dir_1, dir_2, short_side_left, I, extrapolation_left, extrapolation_right, - assume_linear_t, out, derivative, in_place + out, derivative, in_place, t_props ) - linear_lookup = seems_linear(assume_linear_t, t) - t_props = FindFirstFunctions.SearchProperties(t) + kind = _resolve_strategy_kind(t, t_props) return new{ typeof(u), typeof(t), typeof(I), eltype(radius), eltype(d), typeof(shape_itp), eltype(u), typeof(t_props), @@ -1674,7 +1713,7 @@ struct SmoothArcLengthInterpolation{ u, t, d, shape_itp, Δt_circle_segment, Δt_line_segment, center, radius, dir_1, dir_2, short_side_left, I, nothing, extrapolation_left, extrapolation_right, - Guesser(t), t_props, false, linear_lookup, out, derivative, in_place + Guesser(t), t_props, kind, false, out, derivative, in_place ) end end @@ -1712,10 +1751,10 @@ If you want to do this, construct the shape interpolation yourself and use the the possible options. This keyword is ignored if `extrapolation != Extrapolation.none`. - `extrapolation_right`: The extrapolation type applied right of the data. See `extrapolation` for the possible options. This keyword is ignored if `extrapolation != Extrapolation.none`. - - `assume_linear_t`: boolean value to specify a faster index lookup behaviour for - evenly-distributed abscissae. Alternatively, a numerical threshold may be specified - for a test based on the normalized standard deviation of the difference with respect - to the straight line (see [`looks_linear`](@ref)). Defaults to 1e-2. + - `search_properties`: a pre-built `FindFirstFunctions.SearchProperties` for `t`, used + to skip the construction-time knot probe or override its result (e.g. built with + `is_uniform = true`). Defaults to `nothing`, which probes `t` automatically. + """ function SmoothArcLengthInterpolation( u::AbstractMatrix{U}; @@ -1762,10 +1801,10 @@ Approximate the `shape_itp` with a C¹ unit speed interpolation using line segme the possible options. This keyword is ignored if `extrapolation != Extrapolation.none`. - `extrapolation_right`: The extrapolation type applied right of the data. See `extrapolation` for the possible options. This keyword is ignored if `extrapolation != Extrapolation.none`. - - `assume_linear_t`: boolean value to specify a faster index lookup behaviour for - evenly-distributed abscissae. Alternatively, a numerical threshold may be specified - for a test based on the normalized standard deviation of the difference with respect - to the straight line (see [`looks_linear`](@ref)). Defaults to 1e-2. + - `search_properties`: a pre-built `FindFirstFunctions.SearchProperties` for `t`, used + to skip the construction-time knot probe or override its result (e.g. built with + `is_uniform = true`). Defaults to `nothing`, which probes `t` automatically. + """ function SmoothArcLengthInterpolation( shape_itp::AbstractInterpolation; @@ -1814,7 +1853,6 @@ end extrapolation_left::ExtrapolationType.T = ExtrapolationType.None, extrapolation_right::ExtrapolationType.T = ExtrapolationType.None, cache_parameters::Bool = false, - assume_linear_t = 1e-2, in_place::Bool = true) Make a C¹ smooth unit speed interpolation through the given data with the given tangents using line @@ -1839,10 +1877,10 @@ segments and circle segments. the possible options. This keyword is ignored if `extrapolation != Extrapolation.none`. - `extrapolation_right`: The extrapolation type applied right of the data. See `extrapolation` for the possible options. This keyword is ignored if `extrapolation != Extrapolation.none`. - - `assume_linear_t`: boolean value to specify a faster index lookup behaviour for - evenly-distributed abscissae. Alternatively, a numerical threshold may be specified - for a test based on the normalized standard deviation of the difference with respect - to the straight line (see [`looks_linear`](@ref)). Defaults to 1e-2. + - `search_properties`: a pre-built `FindFirstFunctions.SearchProperties` for `t`, used + to skip the construction-time knot probe or override its result (e.g. built with + `is_uniform = true`). Defaults to `nothing`, which probes `t` automatically. + """ function SmoothArcLengthInterpolation( u::AbstractMatrix, @@ -1931,8 +1969,8 @@ function SmoothArcLengthInterpolation( extrapolation_left::ExtrapolationType.T = ExtrapolationType.None, extrapolation_right::ExtrapolationType.T = ExtrapolationType.None, cache_parameters::Bool = false, - assume_linear_t = 1.0e-2, - in_place::Bool = true + in_place::Bool = true, + search_properties::Union{Nothing, FindFirstFunctions.SearchProperties} = nothing ) N = size(u, 1) n_circle_arcs = size(u, 2) - 1 @@ -1982,14 +2020,15 @@ function SmoothArcLengthInterpolation( extrapolation_right = munge_extrapolation( extrapolation, extrapolation_left, extrapolation_right ) - linear_lookup = seems_linear(assume_linear_t, t) out = Vector{P}(undef, N) derivative = Vector{P}(undef, N) + t_props = something(search_properties, FindFirstFunctions.SearchProperties(t)) return SmoothArcLengthInterpolation( u, t, d, shape_itp, Δt_circle_segment, Δt_line_segment, center, radius, dir_1, dir_2, short_side_left, - nothing, extrapolation_left, extrapolation_right, linear_lookup, out, derivative, in_place + nothing, extrapolation_left, extrapolation_right, + out, derivative, in_place, t_props ) end diff --git a/src/interpolation_methods.jl b/src/interpolation_methods.jl index c6a6f86a..07a92cb1 100644 --- a/src/interpolation_methods.jl +++ b/src/interpolation_methods.jl @@ -169,8 +169,83 @@ function _extrapolate_right(A::SmoothedConstantInterpolation, t) end end -# Linear Interpolation +# Linear Interpolation — two uniform fast paths, both inference-safe: +# +# 1. STATIC, for `AbstractRange` knots. A range is uniform at the *type* +# level (no value-dependent tag, so the constructor stays inferred) and +# is immutable + exactly spaced, so the lean closed form needs neither +# the runtime `kind` branch nor the cell/spacing verification, and it +# gets `α` straight from the float position without loading `A.t[idx]`. +# 2. RUNTIME, for `Vector{<:AbstractFloat}` knots. Uniformity here is a +# value property, carried by the `kind` enum; the branch picks the +# verified closed form (a `Vector` can be `push!`-mutated or +# caller-forced-uniform on jittered data, so it must check the live +# knots and fall back). Both arms return the same concrete type. +# +# Non-Float `u`, and non-uniform knots, use the slope form. +function _interpolate( + A::LinearInterpolation{<:AbstractVector{<:AbstractFloat}, <:AbstractRange}, + t::Number, iguess, + ) + return _linear_uniform_range_interpolate(A, t, iguess) +end +function _interpolate( + A::LinearInterpolation{<:AbstractVector{<:AbstractFloat}}, t::Number, iguess, + ) + return if A.kind === FindFirstFunctions.KIND_UNIFORM_STEP + _linear_uniform_interpolate(A, t, iguess) + else + _linear_slope_interpolate(A, t, iguess) + end +end function _interpolate(A::LinearInterpolation{<:AbstractVector}, t::Number, iguess) + return _linear_slope_interpolate(A, t, iguess) +end + +# Lean uniform kernel for `AbstractRange` knots. No verification (a range is +# immutable and exactly uniform) and no `A.t[idx]` load (`α` is the +# fractional part of the float position `f`). NaN handling matches the other +# methods: a NaN query propagates through the partial of `t` for correct +# derivatives, and a NaN-adjacent `u` is resolved by exact-knot comparison. +@inline function _linear_uniform_range_interpolate( + A::LinearInterpolation{<:AbstractVector{<:AbstractFloat}}, t::Number, iguess, + ) + if isnan(t) + idx = firstindex(A.u) + u1 = oneunit(eltype(A.u)) + slope = t / t * get_parameters(A, idx) + Δu = slope * (t - oneunit(eltype(A.t))) + return oftype(Δu, u1) + Δu + end + props = A.t_props + f = (t - props.first_val) * props.inv_step + n = length(A.t) + idx0 = if f < 0 + 0 + elseif f > n - 2 + n - 2 + else + unsafe_trunc(Int, floor(f)) + end + α = f - idx0 + @inbounds u1 = A.u[idx0 + 1] + @inbounds u2 = A.u[idx0 + 2] + Δu = α * (u2 - u1) + if any(isnan.(Δu)) + # `0 * NaN = NaN` poisons exact-knot queries when a neighbour is NaN; + # resolve by comparing the query to the bracketing knots directly. + @inbounds t1 = A.t[idx0 + 1] + @inbounds t2 = A.t[idx0 + 2] + if t == t2 + return u2 + zero(Δu) + elseif t == t1 + return u1 + zero(Δu) + end + end + return u1 + Δu +end + +function _linear_slope_interpolate(A::LinearInterpolation, t::Number, iguess) if isnan(t) # For correct derivative with NaN idx = firstindex(A.u) @@ -198,6 +273,86 @@ function _interpolate(A::LinearInterpolation{<:AbstractVector}, t::Number, igues return val end +# Uniform-grid fast path, reached from the `_interpolate` runtime branch when +# `kind === KIND_UNIFORM_STEP`. Uses the precomputed `inv_step` / `first_val` +# baked into `t_props` to skip the `A.t[idx]` load and the cached slope +# lookup. The linear-blend form `u[idx] + α * (u[idx+1] - u[idx])` is +# mathematically equivalent to the slope form modulo a few ulps of +# floating-point roundoff. +# +# `inv_step` / `first_val` are always `AbstractFloat` (`_ratio_type` of the +# knot eltype) and the caller restricts `eltype(u) <: AbstractFloat`, so the +# lerp result type matches the slope form — both branches of `_interpolate` +# return the same concrete type, keeping the query inferred. On a stale or +# non-uniform cell (see the verification below) it falls back to the slope +# form, which is also the same type. +function _linear_uniform_interpolate( + A::LinearInterpolation{<:AbstractVector{<:AbstractFloat}}, t::Number, iguess, + ) + if isnan(t) + # Propagate NaN through the partial of `t` so `ForwardDiff.derivative` + # at a NaN query returns NaN. The non-uniform method does this by + # computing `u1 + slope * (t - t1)` with `t - t1 = NaN`; replicate the + # `slope * (t - t1)` poisoning here. + idx = firstindex(A.u) + u1 = oneunit(eltype(A.u)) + slope = t / t * get_parameters(A, idx) + Δu = slope * (t - oneunit(eltype(A.t))) + return oftype(Δu, u1) + Δu + end + # Closed-form index lookup on a uniform grid: `f` is the float position + # in `[0, length-1]`, `idx0` its floor (zero-based), `α` the fractional + # part. Clamping into `[0, length-2]` keeps `idx0 + 1` and `idx0 + 2` + # in bounds for the two-sample load below. The clamp happens in the + # float domain before truncating: with Extension extrapolation `t` can + # be far outside the knot span, where `f` exceeds typemax(Int) and + # `unsafe_trunc` is UB. + props = A.t_props + f = (t - props.first_val) * props.inv_step + n = length(A.t) + idx0 = if f < 0 + 0 + elseif f > n - 2 + n - 2 + else + unsafe_trunc(Int, floor(f)) + end + @inbounds t1 = A.t[idx0 + 1] + @inbounds t2 = A.t[idx0 + 2] + # Verify the guessed cell against the live knots. `push!`/`append!` + # mutate `A.t` while `t_props` (and the cached `kind`) keep their + # construction-time values, so the precomputed `first_val` / + # `inv_step` can go stale; a caller-forced `is_uniform = true` on + # non-uniform knots is the same hazard. The cell check catches a + # wrong segment; the spacing check bounds the α error when the cell + # is right but the cached `inv_step` no longer matches the local + # spacing. The 1e-6 slack tolerates accumulated float roundoff on + # long validated-uniform vectors while rejecting any real spacing + # change. On failure, evaluate via the general slope-form path + # (slower, always correct). + in_cell = (t1 <= t || idx0 == 0) && (t <= t2 || idx0 == n - 2) + spacing_ok = abs((t2 - t1) * props.inv_step - 1) <= 1.0e-6 + if !(in_cell && spacing_ok) + return _linear_slope_interpolate(A, t, iguess) + end + A.iguesser.idx_prev[] = idx0 + 1 + α = (t - t1) * props.inv_step + @inbounds u1 = A.u[idx0 + 1] + @inbounds u2 = A.u[idx0 + 2] + Δu = α * (u2 - u1) + if any(isnan.(Δu)) + # When NaN appears in `u` adjacent to the segment, `0 * NaN = NaN` + # poisons the answer at exact-knot queries. Resolve by comparing + # the query against the knot values directly. + if t == t2 + return u2 + zero(Δu) + elseif t == t1 + return u1 + zero(Δu) + end + end + return u1 + Δu +end + function _interpolate(A::LinearInterpolation{<:AbstractArray}, t::Number, iguess) idx = get_idx(A, t, iguess) Δt = t - A.t[idx] diff --git a/src/interpolation_utils.jl b/src/interpolation_utils.jl index 5e12edcf..bdcbc816 100644 --- a/src/interpolation_utils.jl +++ b/src/interpolation_utils.jl @@ -30,6 +30,10 @@ function findRequiredIdxs!(A::LagrangeInterpolation, t, idx) end function spline_coefficients!(N, d, k, u::Number) + # `N` is zeroed because BSpline derivative paths read the full vector + # (see `_derivative(::BSplineInterpolation, …)` in `derivatives.jl`). + # Positions outside the body's `(i-d):i` write window must be zero or + # stale values from previous calls would leak in. N .= zero(u) if u == k[1] N[1] = one(u) @@ -38,18 +42,27 @@ function spline_coefficients!(N, d, k, u::Number) N[end] = one(u) return length(N):length(N) else - i = findfirst(x -> x > u, k)::Int - 1 - N[i] = one(u) - for deg in 1:d - N[i - deg] = (k[i + 1] - u) / (k[i + 1] - k[i - deg + 1]) * N[i - deg + 1] - for j in (i - deg + 1):(i - 1) - N[j] = (u - k[j]) / (k[j + deg] - k[j]) * N[j] + - (k[j + deg + 1] - u) / (k[j + deg + 1] - k[j + 1]) * N[j + 1] - end - N[i] = (u - k[i]) / (k[i + deg] - k[i]) * N[i] + # `k` is sorted; the legacy `findfirst(x -> x > u, k) - 1` did an O(n) + # linear scan. `searchsortedlast` returns the same index in O(log n). + i = searchsortedlast(k, u) + return _spline_coefficients_body!(N, d, k, u, i) + end +end + +# Body of `spline_coefficients!` after the locator index `i` has been +# determined. Used by `spline_coefficients!` (logarithmic locator) and by +# `quadratic_spline_params` (O(1) amortised running locator). +function _spline_coefficients_body!(N, d, k, u, i) + N[i] = one(u) + for deg in 1:d + N[i - deg] = (k[i + 1] - u) / (k[i + 1] - k[i - deg + 1]) * N[i - deg + 1] + for j in (i - deg + 1):(i - 1) + N[j] = (u - k[j]) / (k[j + deg] - k[j]) * N[j] + + (k[j + deg + 1] - u) / (k[j + deg + 1] - k[j + 1]) * N[j + 1] end - return (i - d):i + N[i] = (u - k[i]) / (k[i + deg] - k[i]) * N[i] end + return (i - d):i end function spline_coefficients!(N, d, k, u::AbstractVector) @@ -86,11 +99,41 @@ function quadratic_spline_params(t::AbstractVector, sc::AbstractVector) diag_hi = Vector{dtype_sc}(undef, n - 1) diag_lo = Vector{dtype_sc}(undef, n - 1) + # `t` is sorted and `k` is built from `t`, so the locator + # `searchsortedlast(k, tᵢ)` is non-decreasing in `i`. Maintain a running + # pointer to advance amortised O(1) per knot — total O(n) instead of the + # O(n²) `findfirst` scan or O(n log n) per-call `searchsortedlast`. + nk = length(k) + d = 2 + fill!(sc, zero(dtype_sc)) + locator = 1 for (i, tᵢ) in enumerate(t) - spline_coefficients!(sc, 2, k, tᵢ) + if tᵢ == k[1] || tᵢ == k[end] + # `t[1] == k[1]` and `t[end] == k[end]` by construction, so this + # branch only fires for `i == 1` (sc[1] = 1) and `i == n` + # (sc[end] = 1). Read directly without touching `sc`. + on_first = tᵢ == k[1] + diag[i] = (on_first && i == 1) || (!on_first && i == length(sc)) ? + one(dtype_sc) : zero(dtype_sc) + (i > 1) && (diag_lo[i - 1] = zero(dtype_sc)) + (i < n) && (diag_hi[i] = zero(dtype_sc)) + continue + end + # Advance the running locator until `k[locator+1] > tᵢ` — equivalent + # to `searchsortedlast(k, tᵢ)` on monotone-increasing `tᵢ` inputs. + while locator < nk && k[locator + 1] <= tᵢ + locator += 1 + end + _spline_coefficients_body!(sc, d, k, tᵢ, locator) diag[i] = sc[i] (i > 1) && (diag_lo[i - 1] = sc[i - 1]) (i < n) && (diag_hi[i] = sc[i + 1]) + # The body writes only `sc[locator-d:locator]`; zero those entries + # so the next iteration starts with `sc .== 0` again (the body + # assumes positions outside its write window are already zero). + for j in (locator - d):locator + sc[j] = zero(dtype_sc) + end end A = Tridiagonal(diag_lo, diag, diag_hi) @@ -167,28 +210,31 @@ function munge_data(U::AbstractArray{T, N}, t) where {T, N} return U, t end -seems_linear(assume_linear_t::Bool, _) = assume_linear_t -seems_linear(assume_linear_t::Number, t) = looks_linear(t; threshold = assume_linear_t) - -""" - looks_linear(t; threshold = 1e-2) - -Determine if the abscissae `t` are regularly distributed, taking the standard deviation of -the difference between the array of abscissae with respect to the straight line linking -its first and last elements, normalized by the range of `t`. If this standard deviation is -below the given `threshold`, the vector looks linear (return true). Internal function - -interface may change. -""" -function looks_linear(t; threshold = 1.0e-2) - length(t) <= 2 && return true - t_0, t_f = first(t), last(t) - t_span = t_f - t_0 - tspan_over_N = t_span * length(t)^(-1) - norm_var = sum( - (t_i - t_0 - i * tspan_over_N)^2 for (i, t_i) in enumerate(t) - ) / (length(t) * t_span^2) - return norm_var < threshold^2 -end +# Resolve the concrete `FindFirstFunctions.StrategyKind` for the given knot +# vector at construction time. Stored on every interpolation cache as the +# `kind::StrategyKind` field (a `UInt8` enum — not parametric), so +# `get_idx` dispatches on a fixed kind without re-probing per query. +# +# `FindFirstFunctions.Auto(t, props)` does the resolution and we keep its +# `.kind`: +# +# - For uniformly-spaced data (any `AbstractRange` or a `Vector` whose +# every element lies within ~1e-12 of the exactly-uniform line), +# picks `KIND_UNIFORM_STEP`. +# - For non-uniform data with `length(t) ≤ 16`, picks `KIND_LINEAR_SCAN`. +# - Otherwise picks `KIND_BRACKET_GALLOP` (the v2 default). +# +# The precomputed `inv_step` / `first_val` that `KIND_UNIFORM_STEP`'s +# closed-form lookup needs live in `A.t_props` (parametric on the ratio +# type). `get_idx` reconstructs `Auto(A.t_props)` for the uniform case so +# the closed-form path is taken; for every other kind the props are +# unused, so the bare `kind` suffices and no parametric strategy field is +# stored. +@inline _resolve_strategy_kind(t::AbstractVector) = FindFirstFunctions.Auto(t).kind +# Props-aware form: reuses the already-computed (possibly caller-supplied) +# `SearchProperties` instead of re-probing `t` inside `Auto(t)`. +@inline _resolve_strategy_kind(t::AbstractVector, props::FindFirstFunctions.SearchProperties) = + FindFirstFunctions.Auto(t, props).kind function get_idx( A::AbstractInterpolation, t, iguess::Integer; lb = 1, @@ -196,19 +242,8 @@ function get_idx( ) tvec = A.t ub = length(tvec) + ub_shift - return if side == :last - clamp( - searchsortedlast(FindFirstFunctions.BracketGallop(), tvec, t, iguess) + - idx_shift, lb, ub - ) - elseif side == :first - clamp( - searchsortedfirst(FindFirstFunctions.BracketGallop(), tvec, t, iguess) + - idx_shift, lb, ub - ) - else - error("side must be :first or :last") - end + raw = _dispatch_search(A, tvec, t, iguess, side) + return clamp(raw + idx_shift, lb, ub) end function get_idx( @@ -217,18 +252,36 @@ function get_idx( ) tvec = A.t ub = length(tvec) + ub_shift - return if side == :last - clamp( - searchsortedlast(FindFirstFunctions.GuesserHint(iguess), tvec, t) + - idx_shift, lb, ub - ) - elseif side == :first - clamp( - searchsortedfirst(FindFirstFunctions.GuesserHint(iguess), tvec, t) + - idx_shift, lb, ub - ) + # `iguess(t)` gives a linear-extrapolation hint when `t` looks linear and + # falls back to the cached `idx_prev` otherwise. + hint = iguess(t) + raw = _dispatch_search(A, tvec, t, hint, side) + idx = clamp(raw + idx_shift, lb, ub) + iguess.idx_prev[] = idx + return idx +end + +# Dispatch the cached `A.kind` into FindFirstFunctions. `KIND_UNIFORM_STEP`'s +# closed-form lookup needs the precomputed `inv_step` / `first_val`, which +# live in `A.t_props`; reconstruct the (isbits, stack-allocated) `Auto` from +# them so that path is taken. Every other kind ignores the props, so the +# bare enum dispatches directly. The branch must sit at the call — `Auto` +# and `StrategyKind` are different types, so a hoisted variable would be a +# `Union` and break type stability; each arm returns a concrete `Int`. +@inline function _dispatch_search(A, tvec, t, hint, side) + if A.kind === FindFirstFunctions.KIND_UNIFORM_STEP + auto = FindFirstFunctions.Auto(A.t_props) + return side == :last ? + FindFirstFunctions.searchsorted_last(auto, tvec, t, hint) : + side == :first ? + FindFirstFunctions.searchsorted_first(auto, tvec, t, hint) : + error("side must be :first or :last") else - error("side must be :first or :last") + return side == :last ? + FindFirstFunctions.searchsorted_last(A.kind, tvec, t, hint) : + side == :first ? + FindFirstFunctions.searchsorted_first(A.kind, tvec, t, hint) : + error("side must be :first or :last") end end diff --git a/test/Methods/derivative_tests.jl b/test/Methods/derivative_tests.jl index df38b339..a751d261 100644 --- a/test/Methods/derivative_tests.jl +++ b/test/Methods/derivative_tests.jl @@ -1,5 +1,5 @@ using DataInterpolations, Test -using FindFirstFunctions: BracketGallop +using FindFirstFunctions: FindFirstFunctions, BracketGallop using FiniteDifferences using DataInterpolations: derivative, get_transition_ts using Symbolics @@ -49,10 +49,10 @@ function test_derivatives(method; args = [], kwargs = [], name::String) @test isapprox(fdiff, adiff, atol = 1.0e-8) @test isapprox(fdiff2, adiff2, atol = 1.0e-8) # Cached index - if hasproperty(func, :iguesser) && !func.iguesser.linear_lookup + if hasproperty(func, :t_props) && !func.t_props.is_uniform @test abs( func.iguesser.idx_prev[] - - searchsortedfirst( + FindFirstFunctions.searchsorted_first( BracketGallop(), func.t, _t, func.iguesser(_t) ) ) <= 1 diff --git a/test/interface.jl b/test/interface.jl index 5be8527d..10c9080b 100644 --- a/test/interface.jl +++ b/test/interface.jl @@ -31,13 +31,22 @@ end @testset "Type Inference" begin u = 2.0collect(1:10) t = 1.0collect(1:10) + tr = 1.0:10.0 methods = [ ConstantInterpolation, LinearInterpolation, QuadraticInterpolation, LagrangeInterpolation, QuadraticSpline, CubicSpline, AkimaInterpolation, ] + # Construction must be type-inferred for both Vector and Range knots — + # uniformity is a runtime field (`kind`), never a cache type parameter, + # so the constructor returns a single concrete type. The query is then + # inferred per instance (the uniform fast path is a runtime branch whose + # arms share a return type). @testset "$method" for method in methods - @inferred method(u, t) + A = @inferred method(u, t) + @inferred A(2.5) + Ar = @inferred method(u, tr) + @inferred Ar(2.5) end @testset "BSplineInterpolation" begin @inferred BSplineInterpolation(u, t, 3, :Uniform, :Uniform) diff --git a/test/interpolation_tests.jl b/test/interpolation_tests.jl index ce5ad0e6..8b5d4811 100644 --- a/test/interpolation_tests.jl +++ b/test/interpolation_tests.jl @@ -1,5 +1,5 @@ using DataInterpolations -using FindFirstFunctions: GuesserHint +using FindFirstFunctions: FindFirstFunctions, GuesserHint using StableRNGs using Optim, ForwardDiff using BenchmarkTools @@ -23,10 +23,10 @@ end function test_cached_index(A) for t in range(first(A.t), last(A.t); length = 2 * length(A.t) - 1) A(t) - idx = searchsortedfirst(GuesserHint(A.iguesser), A.t, t) + idx = FindFirstFunctions.searchsorted_first(GuesserHint(A.iguesser), A.t, t) @test abs( A.iguesser.idx_prev[] - - searchsortedfirst(GuesserHint(A.iguesser), A.t, t) + FindFirstFunctions.searchsorted_first(GuesserHint(A.iguesser), A.t, t) ) <= 2 end return @@ -37,7 +37,6 @@ end for t in (1.0:10.0, 1.0collect(1:10)) u = 2.0collect(1:10) - #t = 1.0collect(1:10) A = @inferred( LinearInterpolation( u, t; extrapolation = ExtrapolationType.Extension @@ -58,10 +57,7 @@ end LinearInterpolation( u, t; extrapolation = ExtrapolationType.Extension ) - ) isa LinearInterpolation broken = VERSION < - v"1.11" && - t isa - AbstractRange + ) isa LinearInterpolation broken = VERSION < v"1.11" && t isa AbstractRange A = LinearInterpolation( u, t; extrapolation = ExtrapolationType.Extension ) @@ -83,10 +79,7 @@ end LinearInterpolation( u, t; extrapolation = ExtrapolationType.Extension ) - ) isa LinearInterpolation broken = VERSION < - v"1.11" && - t isa - AbstractRange + ) isa LinearInterpolation broken = VERSION < v"1.11" && t isa AbstractRange A = LinearInterpolation( u, t; extrapolation = ExtrapolationType.Extension ) @@ -244,7 +237,8 @@ end # Test array-valued interpolation u = collect.(2.0collect(1:10)) t = 1.0collect(1:10) - A = @inferred(LinearInterpolation(u, t; extrapolation = ExtrapolationType.Extension)) + # Vector knots — constructor returns a Union, see Type Inference testset. + A = LinearInterpolation(u, t; extrapolation = ExtrapolationType.Extension) @test A(0) == fill(0.0) @test A(5.5) == fill(11.0) @test A(11) == fill(22) @@ -254,17 +248,17 @@ end # Test constant -Inf interpolation u = [-Inf, -Inf] t = [0.0, 1.0] - A = @inferred(LinearInterpolation(u, t)) + A = LinearInterpolation(u, t) @test A(0.0) == -Inf @test A(0.5) == -Inf # Test extrapolation u = 2.0collect(1:10) t = 1.0collect(1:10) - A = @inferred(LinearInterpolation(u, t; extrapolation = ExtrapolationType.Extension)) + A = LinearInterpolation(u, t; extrapolation = ExtrapolationType.Extension) @test A(-1.0) == -2.0 @test A(11.0) == 22.0 - A = @inferred(LinearInterpolation(u, t)) + A = LinearInterpolation(u, t) @test_throws DataInterpolations.LeftExtrapolationError A(-1.0) @test_throws DataInterpolations.RightExtrapolationError A(11.0) @test_throws DataInterpolations.LeftExtrapolationError A([-1.0, 11.0]) @@ -352,6 +346,100 @@ end zeros(3), [1.0, 2.0] ) end + + @testset "Uniform-grid fast path parity" begin + # The static-dispatched uniform kernel uses the lerp form + # `u1 + α * (u2 - u1)` with α computed from the precomputed + # `inv_step` / `first_val`. This is mathematically equivalent + # to the slope form `u1 + slope * (t - t1)` but differs by a + # few ulps of float roundoff. The dominant error source is the + # multiplication `(q - first_val) * inv_step` which produces a + # value at scale `length(t)` before subtracting `(idx - 1)` to + # recover α, costing log2(length(t)) bits of precision relative + # to a direct `q - t[idx]` subtract. So the realistic error + # scales as `length(t) * eps * max(|u|)`. + + # Compare a uniform interpolation against an equivalent slope-form + # evaluation built by reconstructing the slopes manually. + function slope_form_eval(A, q) + idx = DataInterpolations.get_idx(A, q, A.iguesser) + t1 = A.t[idx] + u1 = A.u[idx] + slope = DataInterpolations.get_parameters(A, idx) + return u1 + slope * (q - t1) + end + + rng = StableRNG(0xfacefeed) + n = 1001 + # AbstractRange knots + t_r = range(0.0, 10.0; length = n) + # Vector knots that the props probe classifies as uniform + t_v = collect(t_r) + u = randn(rng, n) + + for t in (t_r, t_v) + A = LinearInterpolation(u, t) + @test A.t_props.is_uniform + @test A.kind === FindFirstFunctions.KIND_UNIFORM_STEP + + # Tolerance scaled to `length(t) * eps * max(|u|)`; the realistic + # ulp gap at the per-segment scale is O(length(t)). + tol = n * eps(Float64) * maximum(abs, u) + qs = sort!(rand(rng, 5000) .* 9.999) + for q in qs + @test isapprox(A(q), slope_form_eval(A, q); atol = tol, rtol = 0) + end + end + + # Non-uniform must still take the slope-form path and produce the + # exact same value as the manual slope-form reconstruction. + t_nu = sort!(rand(StableRNG(0xcafef00d), n)) .* 10.0 + A_nu = LinearInterpolation(u, t_nu) + @test !A_nu.t_props.is_uniform + @test A_nu.kind !== FindFirstFunctions.KIND_UNIFORM_STEP + qs_nu = sort!(rand(StableRNG(0x0b0bcafe), 5000)) .* (last(t_nu) - first(t_nu)) .+ + first(t_nu) + for q in qs_nu + @test A_nu(q) == slope_form_eval(A_nu, q) + end + + # Vector knots uniform at the sampled probe points but jittered + # between them must not be classified uniform — a false positive + # here would silently corrupt interpolated values on the fast path. + t_trick = collect(1.0:101.0) + t_trick[52:60] .= range(54.5, 60.0, length = 9) + A_trick = LinearInterpolation(randn(StableRNG(0xdeadbeef), 101), t_trick) + @test !A_trick.t_props.is_uniform + @test A_trick.kind !== FindFirstFunctions.KIND_UNIFORM_STEP + + # Extension extrapolation reaches the fast path with t far outside + # the knot span, where the closed-form float index exceeds + # typemax(Int); the kernel must clamp before truncating. + A_ext = LinearInterpolation( + u, t_v; extrapolation = ExtrapolationType.Extension + ) + for q in (1.0e300, -1.0e300) + @test isapprox(A_ext(q), slope_form_eval(A_ext, q); rtol = 1.0e-10) + end + + # push! mutates A.t while t_props (and the cached kind) + # keep their construction-time values. The fast path must detect + # the stale cell/spacing against the live knots and fall back + # rather than silently interpolate with the stale inv_step. + t_m = collect(0.0:1.0:10.0) + A_m = LinearInterpolation(sin.(t_m), t_m) + @test A_m.kind === FindFirstFunctions.KIND_UNIFORM_STEP + push!(A_m, -0.3, 10.5) # breaks the uniform spacing (1.0 → 0.5) + # Queries in the mutated region must fall back to the slope path + # (exact match); queries in the untouched uniform region still + # take the lerp fast path (equal up to a few ulps). + for q in (10.1, 10.25, 10.4) + @test A_m(q) == slope_form_eval(A_m, q) + end + for q in (5.5, 0.3) + @test isapprox(A_m(q), slope_form_eval(A_m, q); atol = 1.0e-12) + end + end end @testset "Quadratic Interpolation" begin @@ -1567,7 +1655,8 @@ end ut2 = Float64[0.1, 0.2, 0.3, 0.4, 0.5] for u in (ut1, ut2), t in (ut1, ut2) - interp = @inferred(LinearInterpolation(ut1, ut2)) + # Vector knots — constructor returns a Union, see Type Inference testset. + interp = LinearInterpolation(ut1, ut2) for xs in (u, t) ys = @inferred(interp(xs)) @test ys isa Vector{typeof(interp(first(xs)))} diff --git a/test/qa/Project.toml b/test/qa/Project.toml index 16d4e70f..3a77d57b 100644 --- a/test/qa/Project.toml +++ b/test/qa/Project.toml @@ -8,7 +8,7 @@ StaticArrays = "90137ffa-7385-5640-81b9-e52037218182" Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" [sources] -DataInterpolations = {path = "../.."} +DataInterpolations = {path = "/home/crackauc/sandbox/tmp_20260610_011338_892/di"} [compat] AllocCheck = "0.2"