From ea52cfd0e98569dfe1d8b03d87770acfdcb4840b Mon Sep 17 00:00:00 2001 From: ChrisRackauckas-Claude Date: Fri, 5 Jun 2026 04:13:30 -0400 Subject: [PATCH 1/3] Make SparseMatrixCSC the native API; SparseMatrixCSR via extension The entire kernel is already CSC: csr_analyze/csr_factor/csr_refactor!/csr_qr now accept a SparseMatrixCSC directly, reading its colptr/rowval/nzval into the pooled workspace with no transpose or intermediate allocation (column norms are computed straight off nzval). The csr_* names are kept for back-compatibility. SparseMatricesCSR is moved from [deps] to [weakdeps]; the SparseMatrixCSR method overloads now live in SparseColumnPivotedQRSparseMatricesCSRExt, which delegates to the CSC core via SparseMatrixCSC(A). The precompile workload exercises the CSC path natively; the CSR extension carries its own workload. Tests: CSC core covers square/over-/under-determined/rank-deficient, Float64 and ComplexF64, and the refactor reuse + zero-alloc steady state, run in a SparseMatricesCSR-free subprocess. The existing CSR suite still exercises the extension path. Refactor steady state on the CSC path is 0 bytes (sparse and adaptive_dense), as is ldiv!. Refs SciML/SparseColumnPivotedQR.jl#33 Co-Authored-By: Chris Rackauckas --- Project.toml | 8 +- ...arseColumnPivotedQRSparseMatricesCSRExt.jl | 67 ++++++ src/SparseColumnPivotedQR.jl | 214 ++++++++++-------- test/csc_core.jl | 117 ++++++++++ test/runtests.jl | 54 +++-- 5 files changed, 347 insertions(+), 113 deletions(-) create mode 100644 ext/SparseColumnPivotedQRSparseMatricesCSRExt.jl create mode 100644 test/csc_core.jl diff --git a/Project.toml b/Project.toml index cd1a353..18f9c2f 100644 --- a/Project.toml +++ b/Project.toml @@ -1,19 +1,20 @@ name = "SparseColumnPivotedQR" uuid = "a57abbd0-fea5-4d57-96be-5e525945e8e4" authors = ["Chris Rackauckas and contributors"] -version = "0.1.0" +version = "0.2.0" [deps] LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" PrecompileTools = "aea7be01-6a6a-4083-8856-8a6e6704d82a" SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf" -SparseMatricesCSR = "a0a7dd2c-ebf4-11e9-1f05-cf50bc540ca1" [weakdeps] AMD = "14f7f29c-3bd6-536c-9a0b-7339e30b5a3e" +SparseMatricesCSR = "a0a7dd2c-ebf4-11e9-1f05-cf50bc540ca1" [extensions] SparseColumnPivotedQRAMDExt = "AMD" +SparseColumnPivotedQRSparseMatricesCSRExt = "SparseMatricesCSR" [compat] AMD = "0.5" @@ -27,7 +28,8 @@ julia = "1.10" AMD = "14f7f29c-3bd6-536c-9a0b-7339e30b5a3e" ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210" Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" +SparseMatricesCSR = "a0a7dd2c-ebf4-11e9-1f05-cf50bc540ca1" Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" [targets] -test = ["AMD", "ForwardDiff", "Random", "Test"] +test = ["AMD", "ForwardDiff", "Random", "SparseMatricesCSR", "Test"] diff --git a/ext/SparseColumnPivotedQRSparseMatricesCSRExt.jl b/ext/SparseColumnPivotedQRSparseMatricesCSRExt.jl new file mode 100644 index 0000000..6e643bd --- /dev/null +++ b/ext/SparseColumnPivotedQRSparseMatricesCSRExt.jl @@ -0,0 +1,67 @@ +module SparseColumnPivotedQRSparseMatricesCSRExt + +using SparseColumnPivotedQR +using LinearAlgebra +using SparseArrays +using SparseMatricesCSR +using PrecompileTools + +import SparseColumnPivotedQR: csr_qr, csr_analyze, csr_factor, csr_refactor!, + CSRQRSymbolic, CSRQRFactorization + +# Convert a `SparseMatrixCSR` to the `SparseMatrixCSC` the core operates on. +# This is the same CSR -> CSC conversion the kernel performed internally before +# the CSC-native refactor; it now lives here so the core never depends on +# `SparseMatricesCSR`. +@inline _to_csc(A::SparseMatrixCSR) = SparseMatrixCSC(A) + +function csr_analyze(A::SparseMatrixCSR; ordering::Symbol = :default) + return csr_analyze(_to_csc(A); ordering = ordering) +end + +function csr_factor(A::SparseMatrixCSR, sym::CSRQRSymbolic; kwargs...) + return csr_factor(_to_csc(A), sym; kwargs...) +end + +function csr_qr(A::SparseMatrixCSR; kwargs...) + return csr_qr(_to_csc(A); kwargs...) +end + +function csr_refactor!(F::CSRQRFactorization, A::SparseMatrixCSR; kwargs...) + return csr_refactor!(F, _to_csc(A); kwargs...) +end + +# Keep the CSR entry points specialized in the package image. Mirrors the +# core workload, but on `SparseMatrixCSR` inputs through the conversion path. +@setup_workload begin + @compile_workload begin + for T in (Float64, Float32, ComplexF64, ComplexF32) + for Ti in (Int32, Int64) + rows = Ti[1, 2, 3, 4, 5, 6, 1, 2, 3, 4] + cols = Ti[1, 2, 3, 4, 5, 6, 2, 3, 4, 5] + vals = T[4, 4, 4, 4, 4, 4, 1, 1, 1, 1] + A = sparsecsr(rows, cols, vals, 6, 6) + b = ones(T, 6) + + F = csr_qr(A; ordering = :natural) + F \ b + rank(F) + + sym = csr_analyze(A; ordering = :natural) + G = csr_factor(A, sym) + csr_refactor!(G, A) + G \ b + + drows = Ti[1, 2, 3, 4, 5, 1, 2, 3, 4, 6] + dcols = Ti[1, 2, 3, 4, 5, 2, 3, 4, 5, 1] + dvals = T[4, 4, 4, 4, 4, 1, 1, 1, 1, 4] + Ad = sparsecsr(drows, dcols, dvals, 6, 6) + Fd = csr_qr(Ad; ordering = :natural) + Fd \ b + rank(Fd) + end + end + end +end + +end # module diff --git a/src/SparseColumnPivotedQR.jl b/src/SparseColumnPivotedQR.jl index fd53456..2b2f5f7 100644 --- a/src/SparseColumnPivotedQR.jl +++ b/src/SparseColumnPivotedQR.jl @@ -2,7 +2,7 @@ module SparseColumnPivotedQR using LinearAlgebra using SparseArrays -using SparseMatricesCSR +using SparseArrays: getcolptr using PrecompileTools import LinearAlgebra: ldiv!, rank @@ -12,11 +12,6 @@ export csr_qr, csr_analyze, csr_factor, csr_refactor!, has_amd_extension, CSRQRSymbolic, CSRQRFactorization -# CSR offset: rowptr stores 1-based if Bi == 1, 0-based if Bi == 0. -@inline function getoffset(::SparseMatrixCSR{Bi}) where {Bi} - return Bi == 1 ? 0 : 1 -end - # The adaptive-dense fallback finishes the trailing dense block with LAPACK # `geqp3!` / `ormqr!`, which are only defined for the four BLAS float types # (Float32/Float64/ComplexF32/ComplexF64). For any other element type (e.g. @@ -415,8 +410,14 @@ mutable struct CSRQRSymbolic rnz::Int rcount::Vector{Int} # exact nnz per column of R (length n) ordering::Symbol + # CSR pattern of the input (internal layout, 1-based) used by the symbolic + # builders and the deferral/repivot `_rebuild_symbolic_for_q` path. pattern_rowptr::Vector{Int} pattern_colval::Vector{Int} + # Native CSC pattern snapshot (colptr, rowval) of the input, used by + # `_pattern_matches` for the zero-allocation fixed-pattern refactor check. + pattern_colptr::Vector{Int} + pattern_rowval::Vector{Int} # Lazily-attached numeric workspace. Type-erased here because Symbolic # is built without knowing the value-element type T. workspace::Union{Nothing, _CSRQRWorkspace} @@ -473,30 +474,67 @@ Base.eltype(::CSRQRFactorization{T}) where {T} = T # CSR <-> CSC conversion (pattern + values) # --------------------------------------------------------------------------- -function _capture_pattern(A::SparseMatrixCSR{Bi}) where {Bi} - off = getoffset(A) - rowptr = Vector{Int}(undef, length(A.rowptr)) - @inbounds for i in eachindex(A.rowptr) - rowptr[i] = Int(A.rowptr[i]) + off +# Transpose a CSC pattern (colptr, rowval) into the CSR pattern (rowptr, colval) +# the symbolic machinery operates on. Pattern-only (integer) counting-sort +# transpose — no values touched. `colval` lists, within each row, the columns +# in ascending order (a stable byproduct of the column-major scan). +function _csc_pattern_to_csr(colptr::Vector{Int}, rowval::Vector{Int}, m::Int, n::Int) + nnz_total = length(rowval) + rowptr = Vector{Int}(undef, m + 1) + rowcounts = zeros(Int, m) + @inbounds for p in 1:nnz_total + rowcounts[rowval[p]] += 1 end - colval = Vector{Int}(undef, length(A.colval)) - @inbounds for i in eachindex(A.colval) - colval[i] = Int(A.colval[i]) + off + rowptr[1] = 1 + @inbounds for i in 1:m + rowptr[i + 1] = rowptr[i] + rowcounts[i] + end + colval = Vector{Int}(undef, nnz_total) + work = Vector{Int}(undef, m) + @inbounds for i in 1:m + work[i] = rowptr[i] + end + @inbounds for j in 1:n + c1 = colptr[j]; c2 = colptr[j + 1] - 1 + for p in c1:c2 + i = rowval[p] + colval[work[i]] = j + work[i] += 1 + end end return rowptr, colval end -function _pattern_matches(S::CSRQRSymbolic, A::SparseMatrixCSR{Bi}) where {Bi} +# Snapshot a `SparseMatrixCSC`'s structural pattern for the symbolic. Returns +# the CSR pattern (rowptr, colval) used by the symbolic builders and the +# rebuild path, plus a copy of the native CSC pattern (colptr, rowval) used by +# `_pattern_matches` for the zero-allocation refactor fast path. +function _capture_pattern(A::SparseMatrixCSC) + m, n = size(A) + # Normalize to `Vector{Int}`: the symbolic machinery is `Int`-indexed, and + # a CSC built from `Int32` arrays would otherwise carry a narrower index + # type through the captured pattern. + colptr = convert(Vector{Int}, getcolptr(A)) + rowval = convert(Vector{Int}, rowvals(A)) + rowptr, colval = _csc_pattern_to_csr(colptr, rowval, m, n) + return rowptr, colval, colptr, rowval +end + +# Fast structural-pattern comparison against the CSC snapshot held on the +# symbolic. Compares `colptr`/`rowval` directly so the common fixed-pattern +# `csr_refactor!` is a cheap O(nnz) integer scan with no allocation. +function _pattern_matches(S::CSRQRSymbolic, A::SparseMatrixCSC) m, n = size(A) (m == S.m && n == S.n) || return false - length(A.rowptr) == length(S.pattern_rowptr) || return false - length(A.colval) == length(S.pattern_colval) || return false - off = getoffset(A) - @inbounds for i in eachindex(A.rowptr) - Int(A.rowptr[i]) + off == S.pattern_rowptr[i] || return false + colptr = getcolptr(A) + rowval = rowvals(A) + length(colptr) == length(S.pattern_colptr) || return false + length(rowval) == length(S.pattern_rowval) || return false + @inbounds for i in eachindex(colptr) + colptr[i] == S.pattern_colptr[i] || return false end - @inbounds for i in eachindex(A.colval) - Int(A.colval[i]) + off == S.pattern_colval[i] || return false + @inbounds for p in eachindex(rowval) + rowval[p] == S.pattern_rowval[p] || return false end return true end @@ -854,13 +892,18 @@ end # --------------------------------------------------------------------------- """ - csr_analyze(A::SparseMatrixCSR; ordering=:default) -> CSRQRSymbolic + csr_analyze(A::SparseMatrixCSC; ordering=:default) -> CSRQRSymbolic Symbolic analysis phase for the sparse column-pivoted Householder QR factorization. Computes the column permutation `q`, row permutation `pinv`, column elimination tree, per-row `leftmost`, and the upper-bound sizes of the V (Householder) and R buffers. +The native input is a `SparseMatrixCSC` (the SparseArrays stdlib format); its +`colptr`/`rowval`/`nzval` are read directly with no transpose or intermediate +allocation. A `SparseMatrixCSR` (from `SparseMatricesCSR.jl`) is also accepted +when that package is loaded, via an extension that converts to CSC. + `ordering` selects the column ordering: * `:default` — `:amd` if the AMD.jl extension is loaded (`using AMD`), `:natural` otherwise. **This is the default.** @@ -878,9 +921,9 @@ the V (Householder) and R buffers. Returns a `CSRQRSymbolic` that can be passed to `csr_factor` and reused via `csr_refactor!` for matrices with identical sparsity patterns. """ -function csr_analyze(A::SparseMatrixCSR{Bi}; ordering::Symbol = :default) where {Bi} +function csr_analyze(A::SparseMatrixCSC; ordering::Symbol = :default) m, n = size(A) - rowptr, colval = _capture_pattern(A) + rowptr, colval, colptr_snap, rowval_snap = _capture_pattern(A) ordering_use = _resolve_ordering(ordering) if ( ordering_use === :amd || ordering_use === :colamd || @@ -910,13 +953,13 @@ function csr_analyze(A::SparseMatrixCSR{Bi}; ordering::Symbol = :default) where return CSRQRSymbolic( m, n, m2_a, q_a, pinv_a, parent_a, leftmost_a, vnz_a, rnz_a, rcount_a, :amd, - rowptr, colval, nothing + rowptr, colval, colptr_snap, rowval_snap, nothing ) else return CSRQRSymbolic( m, n, m2_n, q_n, pinv_n, parent_n, leftmost_n, vnz_n, rnz_n, rcount_n, :natural, - rowptr, colval, nothing + rowptr, colval, colptr_snap, rowval_snap, nothing ) end end @@ -925,12 +968,13 @@ function csr_analyze(A::SparseMatrixCSR{Bi}; ordering::Symbol = :default) where _build_symbolic(rowptr, colval, m, n, ordering_use) return CSRQRSymbolic( m, n, m2, q, pinv, parent, leftmost_perm, - vnz, rnz, rcount, ordering_use, rowptr, colval, nothing + vnz, rnz, rcount, ordering_use, rowptr, colval, + colptr_snap, rowval_snap, nothing ) end """ - csr_factor(A::SparseMatrixCSR, sym::CSRQRSymbolic; tol=nothing, drop_tol=0, + csr_factor(A::SparseMatrixCSC, sym::CSRQRSymbolic; tol=nothing, drop_tol=0, adaptive_dense=false, dense_threshold=0.4) -> CSRQRFactorization Numeric factorization given a `CSRQRSymbolic`. Implements the Davis @@ -959,12 +1003,12 @@ and finishes with LAPACK `geqp3!`. The composed column permutation is stored in `F.q_eff`. """ function csr_factor( - A::SparseMatrixCSR{Bi, T}, sym::CSRQRSymbolic; + A::SparseMatrixCSC{T}, sym::CSRQRSymbolic; tol::Union{Nothing, Real} = nothing, drop_tol::Real = 0, adaptive_dense::Bool = false, dense_threshold::Real = 0.4 - ) where {Bi, T} + ) where {T} return _factor_kernel( A, sym, tol, nothing, real(T)(drop_tol), adaptive_dense, real(T)(dense_threshold) @@ -972,7 +1016,7 @@ function csr_factor( end """ - csr_qr(A::SparseMatrixCSR; tol=nothing, ordering=:default, drop_tol=0, + csr_qr(A::SparseMatrixCSC; tol=nothing, ordering=:default, drop_tol=0, adaptive_dense=false, dense_threshold=0.4) -> CSRQRFactorization One-shot convenience: equivalent to `csr_factor(A, csr_analyze(A; ordering); tol)`. @@ -984,13 +1028,13 @@ typical dense-fill matrices that arise from nonlinear solver linsolves, for matrices whose columns are already well-ordered. """ function csr_qr( - A::SparseMatrixCSR{Bi, T}; + A::SparseMatrixCSC; tol::Union{Nothing, Real} = nothing, ordering::Symbol = :default, drop_tol::Real = 0, adaptive_dense::Bool = false, dense_threshold::Real = 0.4 - ) where {Bi, T} + ) sym = csr_analyze(A; ordering = ordering) return csr_factor( A, sym; tol = tol, drop_tol = drop_tol, @@ -999,7 +1043,7 @@ function csr_qr( end """ - csr_refactor!(F::CSRQRFactorization, A::SparseMatrixCSR; tol=nothing, drop_tol=0, + csr_refactor!(F::CSRQRFactorization, A::SparseMatrixCSC; tol=nothing, drop_tol=0, adaptive_dense=false, dense_threshold=0.4) -> CSRQRFactorization Numeric refactorization, mutating `F` in place. If the sparsity pattern of @@ -1019,12 +1063,12 @@ same meaning as in [`csr_factor`](@ref). """ function csr_refactor!( F::CSRQRFactorization{T}, - A::SparseMatrixCSR{Bi}; + A::SparseMatrixCSC; tol::Union{Nothing, Real} = nothing, drop_tol::Real = 0, adaptive_dense::Bool = false, dense_threshold::Real = 0.4 - ) where {T, Bi} + ) where {T} dt = real(T)(drop_tol) dth = real(T)(dense_threshold) if _pattern_matches(F.sym, A) @@ -1064,25 +1108,25 @@ end end function _factor_kernel( - A::SparseMatrixCSR{Bi, T}, sym::CSRQRSymbolic, + A::SparseMatrixCSC{T}, sym::CSRQRSymbolic, tol::Union{Nothing, Real}, F::Union{Nothing, CSRQRFactorization}, drop_tol::Real = zero(real(T)), adaptive_dense::Bool = false, dense_threshold::Real = real(T)(0.4) - ) where {Bi, T} + ) where {T} m, n = size(A) (m == sym.m && n == sym.n) || throw(DimensionMismatch("A is $m x $n but symbolic is $(sym.m) x $(sym.n)")) RT = real(T) - nnz_A = length(A.colval) + nnz_A = length(rowvals(A)) ws = _get_workspace(T, sym, nnz_A)::_CSRQRWorkspace{T, RT} - # Single-pass CSR -> CSC(A) conversion + Frobenius norm + column-norm - # cache (the column norms feed the zero-column check below). All buffers - # come from the workspace. - fro2 = _csr_to_csc_with_norms!(ws, A) + # Copy A's CSC arrays into the workspace + compute the Frobenius norm and + # per-column squared norms (the column norms feed the zero-column check + # below). All buffers come from the workspace; no transpose needed. + fro2 = _csc_copy_with_norms!(ws, A) fro = sqrt(fro2) tol_use = tol === nothing ? RT(eps(RT) * max(m, n)) * fro : RT(max(tol, 0)) tol2 = tol_use * tol_use @@ -1229,7 +1273,8 @@ function _factor_kernel( sym_next = CSRQRSymbolic( sym_cur.m, sym_cur.n, m2n, qn, pinvn, parentn, leftmostn, vnzn, rnzn, rcountn, sym_cur.ordering, sym_cur.pattern_rowptr, - sym_cur.pattern_colval, sym_cur.workspace + sym_cur.pattern_colval, sym_cur.pattern_colptr, + sym_cur.pattern_rowval, sym_cur.workspace ) ws_next = _get_workspace(T, sym_next, nnz_A)::_CSRQRWorkspace{T, RT} _permute_pq!(ws_next, sym_next.pinv, sym_next.q, m, n) @@ -1288,61 +1333,41 @@ function _factor_kernel( return F_out end -# In-place CSR -> CSC of A, plus per-column squared norms and ||A||_F^2. -# Writes into ws.colptr_A, ws.rowval_A, ws.nzval_A, ws.col_nrm2. Returns -# the Frobenius-norm-squared. ws.work_perm doubles as a temporary copy of -# colptr used during scatter. -function _csr_to_csc_with_norms!( +# Copy A's CSC arrays into the workspace, plus per-column squared norms and +# ||A||_F^2. Writes into ws.colptr_A, ws.rowval_A, ws.nzval_A, ws.col_nrm2. +# Returns the Frobenius-norm-squared. The input is already CSC, so this is a +# straight copy with the column norms read directly off `nzval` per column — +# no transpose, no scatter. +function _csc_copy_with_norms!( ws::_CSRQRWorkspace{T, RT}, - A::SparseMatrixCSR{Bi, T} - ) where {Bi, T, RT} + A::SparseMatrixCSC{T} + ) where {T, RT} m, n = size(A) - off = getoffset(A) - rowptr = A.rowptr - colval = A.colval - nzval_in = A.nzval - nnz_total = length(colval) + colptr_in = getcolptr(A) + rowval_in = rowvals(A) + nzval_in = nonzeros(A) colptr = ws.colptr_A rowval = ws.rowval_A nzval = ws.nzval_A col_nrm2 = ws.col_nrm2 - work = ws.work_perm - # Reset accumulators. - @inbounds for j in 1:n - col_nrm2[j] = zero(RT) - end - # Use colptr as a count buffer first (we'll convert in place). + fro2 = zero(RT) @inbounds for j in 1:(n + 1) - colptr[j] = 0 - end - @inbounds for p in 1:nnz_total - colptr[Int(colval[p]) + off + 1] += 1 + colptr[j] = colptr_in[j] end - # Cumsum: colptr[j+1] = colptr[j] + count[j]. - colptr[1] = 1 @inbounds for j in 1:n - colptr[j + 1] += colptr[j] - end - @inbounds for j in 1:(n + 1) - work[j] = colptr[j] - end - fro2 = zero(RT) - @inbounds for i in 1:m - r1 = rowptr[i] + off - r2 = rowptr[i + 1] + off - 1 - for p in r1:r2 - j = Int(colval[p]) + off - slot = work[j] + c1 = colptr_in[j]; c2 = colptr_in[j + 1] - 1 + cn = zero(RT) + for p in c1:c2 v = nzval_in[p] - rowval[slot] = i - nzval[slot] = v - work[j] = slot + 1 + rowval[p] = rowval_in[p] + nzval[p] = v v2 = abs2(v) - col_nrm2[j] += v2 - fro2 += v2 + cn += v2 end + col_nrm2[j] = cn + fro2 += cn end return fro2 end @@ -1450,7 +1475,8 @@ function _maybe_repivot_zero_cols_from_norms( return CSRQRSymbolic( sym.m, sym.n, m2_2, q2, pinv2, parent2, leftmost2, vnz2, rnz2, rcount2, sym.ordering, sym.pattern_rowptr, - sym.pattern_colval, sym.workspace + sym.pattern_colval, sym.pattern_colptr, sym.pattern_rowval, + sym.workspace ) end @@ -2395,10 +2421,12 @@ end # --------------------------------------------------------------------------- # # Exercise the hot paths (analyze / factor / refactor / solve / rank / size) -# for the four standard BLAS element types and both index types, on tiny -# well-conditioned and rank-deficient inputs, so the specialized methods land -# in the package image. Only the `:natural` ordering is exercised: `:amd` -# lives in a weak-dep extension and cannot be loaded from here. +# for the four standard BLAS element types, on tiny well-conditioned and +# rank-deficient `SparseMatrixCSC` inputs (the native API), so the specialized +# methods land in the package image. Only the `:natural` ordering is exercised: +# `:amd` lives in a weak-dep extension and cannot be loaded from here. The +# `SparseMatrixCSR` overloads live in their own extension, which carries its +# own workload. @setup_workload begin @compile_workload begin @@ -2408,7 +2436,7 @@ end rows = Ti[1, 2, 3, 4, 5, 6, 1, 2, 3, 4] cols = Ti[1, 2, 3, 4, 5, 6, 2, 3, 4, 5] vals = T[4, 4, 4, 4, 4, 4, 1, 1, 1, 1] - A = sparsecsr(rows, cols, vals, 6, 6) + A = sparse(rows, cols, vals, 6, 6) b = ones(T, 6) F = csr_qr(A; ordering = :natural) @@ -2428,7 +2456,7 @@ end drows = Ti[1, 2, 3, 4, 5, 1, 2, 3, 4, 6] dcols = Ti[1, 2, 3, 4, 5, 2, 3, 4, 5, 1] dvals = T[4, 4, 4, 4, 4, 1, 1, 1, 1, 4] - Ad = sparsecsr(drows, dcols, dvals, 6, 6) + Ad = sparse(drows, dcols, dvals, 6, 6) Fd = csr_qr(Ad; ordering = :natural) Fd \ b rank(Fd) diff --git a/test/csc_core.jl b/test/csc_core.jl new file mode 100644 index 0000000..2cdf193 --- /dev/null +++ b/test/csc_core.jl @@ -0,0 +1,117 @@ +# CSC-native core tests. Run in a SEPARATE process that does NOT load +# `SparseMatricesCSR`, so this proves the `SparseMatrixCSC` API works as the +# native path with the CSR extension absent. (Driven from `runtests.jl`.) +using Test +using LinearAlgebra +using SparseArrays +using Random +using SparseColumnPivotedQR +using AMD # AMD extension is independent of the CSR extension + +@assert Base.get_extension( + SparseColumnPivotedQR, :SparseColumnPivotedQRSparseMatricesCSRExt +) === nothing "SparseMatricesCSR extension must NOT be loaded in the CSC-core test process" + +@testset "CSC-native core (no SparseMatricesCSR loaded)" begin + for T in (Float64, ComplexF64) + cv(M) = T <: Complex ? (T.(M) .+ T(0.3im) .* (M .!= 0)) : T.(M) + + @testset "square full-rank ($T)" begin + Random.seed!(1) + n = 40 + base = sprand(Float64, n, n, 0.2) + 5I + A = convert(SparseMatrixCSC{T, Int}, cv(Matrix(base))) + b = ones(T, n) + F = csr_qr(A) + x = F \ b + @test norm(A * x - b) / norm(b) < 1.0e-9 + @test rank(F) == n + end + + @testset "tall overdetermined least-squares ($T)" begin + Random.seed!(2) + m, n = 60, 25 + base = sprand(Float64, m, n, 0.3) + base = base + sparse(1:n, 1:n, ones(n), m, n) + A = convert(SparseMatrixCSC{T, Int}, cv(Matrix(base))) + b = randn(T, m) + F = csr_qr(A) + x = F \ b + # Full-column-rank tall LS: the dense `\` is the unique LS solution. + @test x ≈ Matrix(A) \ b rtol = 1.0e-8 + @test rank(F) == n + end + + @testset "wide underdetermined ($T)" begin + Random.seed!(3) + m, n = 20, 45 + base = sprand(Float64, m, n, 0.3) + base = base + sparse(1:m, 1:m, ones(m), m, n) + A = convert(SparseMatrixCSC{T, Int}, cv(Matrix(base))) + b = randn(T, m) + F = csr_qr(A) + x = F \ b + @test norm(A * x - b) / norm(b) < 1.0e-8 # consistent system + @test rank(F) == m + end + + @testset "rank-deficient ($T)" begin + Random.seed!(4) + m, n = 40, 30 + base = sprand(Float64, m, n, 0.25) + base = base + sparse(1:n, 1:n, ones(n), m, n) + M = Matrix(base) + M[:, n] = M[:, 1] # duplicate column -> rank n-1 + A = convert(SparseMatrixCSC{T, Int}, cv(M)) + b = randn(T, m) + F = csr_qr(A) + @test rank(F) == n - 1 + x = F \ b + # Minimum-residual solve: the residual must match the true + # least-squares residual (computed via the dense pseudoinverse, + # which is well-defined for a rank-deficient overdetermined system). + r_csc = norm(A * x - b) + r_min = norm(A * (pinv(Matrix(A)) * b) - b) + @test r_csc ≈ r_min rtol = 1.0e-6 + end + + @testset "csr_refactor! reuse path ($T)" begin + Random.seed!(5) + n = 30 + sp = sprand(Float64, n, n, 0.2) + rows, cols, _ = findnz(sp) + mk(v) = convert( + SparseMatrixCSC{T, Int}, + cv(Matrix(sparse(rows, cols, v, n, n) + 4I)), + ) + A1 = mk(randn(length(rows))) + A2 = mk(randn(length(rows))) + b = randn(T, n) + F = csr_qr(A1; ordering = :amd) + csr_refactor!(F, A2) + x2 = F \ b + @test norm(A2 * x2 - b) / norm(b) < 1.0e-9 + csr_refactor!(F, A1) + x1 = F \ b + @test norm(A1 * x1 - b) / norm(b) < 1.0e-9 + end + + @testset "csr_refactor! zero-alloc steady state ($T)" begin + Random.seed!(6) + n = 30 + sp = sprand(Float64, n, n, 0.2) + rows, cols, _ = findnz(sp) + mk(v) = convert( + SparseMatrixCSC{T, Int}, + cv(Matrix(sparse(rows, cols, v, n, n) + 4I)), + ) + A1 = mk(randn(length(rows))) + A2 = mk(randn(length(rows))) + F = csr_qr(A1; ordering = :amd) + csr_refactor!(F, A2) # warm + csr_refactor!(F, A1) + @test (@allocated csr_refactor!(F, A2)) == 0 + @test (@allocated csr_refactor!(F, A1)) == 0 + end + end +end diff --git a/test/runtests.jl b/test/runtests.jl index dec76ce..4cff47c 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -19,8 +19,8 @@ end # Steady-state allocation of one `csr_refactor!(adaptive_dense=true)` and one # `ldiv!`, measured inside a function barrier so the `@allocated` is not # perturbed by boxed captures in the calling (testset) scope. -function _refactor_alloc(F, Acsr) - return @allocated csr_refactor!(F, Acsr; adaptive_dense = true) +function _refactor_alloc(F, A) + return @allocated csr_refactor!(F, A; adaptive_dense = true) end function _ldiv_alloc(x, F, b) return @allocated ldiv!(x, F, b) @@ -39,17 +39,17 @@ function _dense_zeroalloc_case(::Type{T}, A64, n) where {T} SparseMatrixCSC{T, Int}, T <: Complex ? (T.(A64) .+ T(0.1im) .* (A64 .!= 0)) : T.(A64), ) - Acsr = SparseMatrixCSR(transpose(sparse(transpose(A)))) + # Native CSC path: the zero-alloc contract applies to `SparseMatrixCSC`. b = ones(T, size(A, 1)) x = zeros(T, n) - sym = csr_analyze(Acsr; ordering = :amd) - F = csr_factor(Acsr, sym; adaptive_dense = true) - csr_refactor!(F, Acsr; adaptive_dense = true); ldiv!(x, F, b) # warm - csr_refactor!(F, Acsr; adaptive_dense = true); ldiv!(x, F, b) - ra = _refactor_alloc(F, Acsr) + sym = csr_analyze(A; ordering = :amd) + F = csr_factor(A, sym; adaptive_dense = true) + csr_refactor!(F, A; adaptive_dense = true); ldiv!(x, F, b) # warm + csr_refactor!(F, A; adaptive_dense = true); ldiv!(x, F, b) + ra = _refactor_alloc(F, A) la = _ldiv_alloc(x, F, b) - x1 = zeros(T, n); csr_refactor!(F, Acsr; adaptive_dense = true); ldiv!(x1, F, b) - x2 = zeros(T, n); csr_refactor!(F, Acsr; adaptive_dense = true); ldiv!(x2, F, b) + x1 = zeros(T, n); csr_refactor!(F, A; adaptive_dense = true); ldiv!(x1, F, b) + x2 = zeros(T, n); csr_refactor!(F, A; adaptive_dense = true); ldiv!(x2, F, b) return F.k_dense, ra, la, x1 == x2 end @@ -605,19 +605,22 @@ end v1 = randn(length(rows)); v2 = randn(length(rows)) A1 = sparse(rows, cols, v1, n, n) + 4 * sparse(I, n, n) A2 = sparse(rows, cols, v2, n, n) + 4 * sparse(I, n, n) - Acsr1 = build_csr(Matrix(A1)) - Acsr2 = build_csr(Matrix(A2)) b = randn(n) - F = csr_qr(Acsr1; ordering = :amd) + # The zero-allocation contract is on the native `SparseMatrixCSC` path: + # its `colptr`/`rowval`/`nzval` are read straight into the pooled + # workspace with no transpose or intermediate allocation. (The CSR + # extension necessarily allocates one `SparseMatrixCSC` per call to + # convert, so it is not — and is not expected to be — zero-alloc.) + F = csr_qr(A1; ordering = :amd) # Warm up. - csr_refactor!(F, Acsr2) - csr_refactor!(F, Acsr1) + csr_refactor!(F, A2) + csr_refactor!(F, A1) # Steady-state: refactor with the cached workspace must allocate 0 bytes. - nbytes = @allocated csr_refactor!(F, Acsr2) + nbytes = @allocated csr_refactor!(F, A2) @test nbytes == 0 - nbytes = @allocated csr_refactor!(F, Acsr1) + nbytes = @allocated csr_refactor!(F, A1) @test nbytes == 0 # Sanity check the solution. @@ -1044,3 +1047,20 @@ end end end + +# The CSC-native core must work with the `SparseMatricesCSR` extension absent. +# This process has `using SparseMatricesCSR` loaded (so the extension is +# active), so run the CSC-only checks in a fresh subprocess that never loads +# `SparseMatricesCSR`. +@testset "CSC-native core in a SparseMatricesCSR-free process" begin + code = """ + push!(LOAD_PATH, "@") + include($(repr(joinpath(@__DIR__, "csc_core.jl")))) + """ + p = run( + ignorestatus( + `$(Base.julia_cmd()) --project=$(Base.active_project()) -e $code` + ), + ) + @test p.exitcode == 0 +end From 1fd83baaa3c902ede4c4429c1479f9a6d5c23993 Mon Sep 17 00:00:00 2001 From: ChrisRackauckas-Claude Date: Fri, 5 Jun 2026 05:41:43 -0400 Subject: [PATCH 2/3] Drop SparseMatricesCSR entirely; SparseMatrixCSC is the sole native API Remove the optional SparseMatricesCSR extension and all SparseMatricesCSR references so the package has no dependency on it. The csr_* public names are kept (they accept SparseMatrixCSC). CSR (SparseMatrixCSR) input support is carved into a separate, optional PR stacked on this one. - Delete ext/SparseColumnPivotedQRSparseMatricesCSRExt.jl. - Remove SparseMatricesCSR from [weakdeps]/[extensions]/[compat]/[extras]/[targets]. - Make the tests CSC-only (helpers now return SparseMatrixCSC); fold the CSC-core checks into a plain include (no subprocess isolation needed once CSR can never be loaded). - Scrub CSR mentions from src docstrings, the precompile comment, README, and docs. Co-Authored-By: Chris Rackauckas --- Project.toml | 6 +- README.md | 14 ++-- docs/Project.toml | 6 +- docs/src/index.md | 17 +++-- ...arseColumnPivotedQRSparseMatricesCSRExt.jl | 67 ------------------- src/SparseColumnPivotedQR.jl | 9 +-- test/csc_core.jl | 14 ++-- test/runtests.jl | 46 ++++--------- 8 files changed, 40 insertions(+), 139 deletions(-) delete mode 100644 ext/SparseColumnPivotedQRSparseMatricesCSRExt.jl diff --git a/Project.toml b/Project.toml index 18f9c2f..94c6a18 100644 --- a/Project.toml +++ b/Project.toml @@ -10,26 +10,22 @@ SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf" [weakdeps] AMD = "14f7f29c-3bd6-536c-9a0b-7339e30b5a3e" -SparseMatricesCSR = "a0a7dd2c-ebf4-11e9-1f05-cf50bc540ca1" [extensions] SparseColumnPivotedQRAMDExt = "AMD" -SparseColumnPivotedQRSparseMatricesCSRExt = "SparseMatricesCSR" [compat] AMD = "0.5" LinearAlgebra = "1" PrecompileTools = "1" SparseArrays = "1" -SparseMatricesCSR = "0.6" julia = "1.10" [extras] AMD = "14f7f29c-3bd6-536c-9a0b-7339e30b5a3e" ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210" Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" -SparseMatricesCSR = "a0a7dd2c-ebf4-11e9-1f05-cf50bc540ca1" Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" [targets] -test = ["AMD", "ForwardDiff", "Random", "SparseMatricesCSR", "Test"] +test = ["AMD", "ForwardDiff", "Random", "Test"] diff --git a/README.md b/README.md index 10b22af..203f3cc 100644 --- a/README.md +++ b/README.md @@ -11,7 +11,7 @@ A pure-Julia, rank-revealing, column-pivoted Householder QR factorization that operates directly on -[`SparseMatrixCSR`](https://github.com/JuliaSmoothOptimizers/SparseMatricesCSR.jl) +[`SparseMatrixCSC`](https://docs.julialang.org/en/v1/stdlib/SparseArrays/) sparse matrices. Targets the same "small-to-medium sparse" niche as KLU does for LU — low symbolic-phase overhead, no BLAS-3 / multifrontal machinery — while preserving the rank-revealing guarantees of LAPACK's column-pivoted QR. @@ -29,13 +29,13 @@ version of the documentation, which contains the unreleased features. using Pkg Pkg.add("SparseColumnPivotedQR") -using SparseArrays, SparseMatricesCSR, SparseColumnPivotedQR +using SparseArrays, SparseColumnPivotedQR -A = SparseMatrixCSR(sparse([1.0 0 2 0 0; - 0 3 0 0 1; - 4 0 5 0 0; - 0 0 0 6 0; - 0 7 0 0 8])) +A = sparse([1.0 0 2 0 0; + 0 3 0 0 1; + 4 0 5 0 0; + 0 0 0 6 0; + 0 7 0 0 8]) b = [1.0, 2.0, 3.0, 4.0, 5.0] F = csr_qr(A) diff --git a/docs/Project.toml b/docs/Project.toml index 3414357..dc79fc5 100644 --- a/docs/Project.toml +++ b/docs/Project.toml @@ -1,11 +1,11 @@ [deps] +AMD = "14f7f29c-3bd6-536c-9a0b-7339e30b5a3e" Documenter = "e30172f5-a6a5-5a46-863b-614d45cd2de4" SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf" SparseColumnPivotedQR = "a57abbd0-fea5-4d57-96be-5e525945e8e4" -SparseMatricesCSR = "a0a7dd2c-ebf4-11e9-1f05-cf50bc540ca1" [compat] +AMD = "0.5" Documenter = "1" SparseArrays = "1" -SparseColumnPivotedQR = "0.1" -SparseMatricesCSR = "0.6" +SparseColumnPivotedQR = "0.2" diff --git a/docs/src/index.md b/docs/src/index.md index bdca4f7..ebefc5e 100644 --- a/docs/src/index.md +++ b/docs/src/index.md @@ -4,7 +4,7 @@ SparseColumnPivotedQR.jl is a component of the [SciML](https://sciml.ai/) ecosystem providing a pure-Julia, rank-revealing, column-pivoted Householder QR factorization that operates directly on -[`SparseMatrixCSR`](https://github.com/gridap/SparseMatricesCSR.jl) +[`SparseMatrixCSC`](https://docs.julialang.org/en/v1/stdlib/SparseArrays/) sparse matrices. The package targets the same "small-to-medium sparse" niche as KLU does for @@ -23,16 +23,15 @@ Pkg.add("SparseColumnPivotedQR") ## Quick start ```julia -using SparseArrays, SparseMatricesCSR, SparseColumnPivotedQR +using SparseArrays, SparseColumnPivotedQR using AMD # enables the recommended AMD column ordering # A 5×5 sparse matrix and a right-hand side. -A_csc = sparse([1.0 0 2 0 0; - 0 3 0 0 1; - 4 0 5 0 0; - 0 0 0 6 0; - 0 7 0 0 8]) -A = SparseMatrixCSR(A_csc) +A = sparse([1.0 0 2 0 0; + 0 3 0 0 1; + 4 0 5 0 0; + 0 0 0 6 0; + 0 7 0 0 8]) b = [1.0, 2.0, 3.0, 4.0, 5.0] # One-shot factor + solve. @@ -93,7 +92,7 @@ least-squares solution whose residual matches the SVD pseudoinverse minimum to floating-point precision. ```julia -A_singular = SparseMatrixCSR(sparse([1.0 2.0; 0.5 1.0; 2.0 4.0])) # rank 1 +A_singular = sparse([1.0 2.0; 0.5 1.0; 2.0 4.0]) # rank 1 b = [1.0, 1.0, 1.0] F = csr_qr(A_singular) rank(F) # → 1 diff --git a/ext/SparseColumnPivotedQRSparseMatricesCSRExt.jl b/ext/SparseColumnPivotedQRSparseMatricesCSRExt.jl deleted file mode 100644 index 6e643bd..0000000 --- a/ext/SparseColumnPivotedQRSparseMatricesCSRExt.jl +++ /dev/null @@ -1,67 +0,0 @@ -module SparseColumnPivotedQRSparseMatricesCSRExt - -using SparseColumnPivotedQR -using LinearAlgebra -using SparseArrays -using SparseMatricesCSR -using PrecompileTools - -import SparseColumnPivotedQR: csr_qr, csr_analyze, csr_factor, csr_refactor!, - CSRQRSymbolic, CSRQRFactorization - -# Convert a `SparseMatrixCSR` to the `SparseMatrixCSC` the core operates on. -# This is the same CSR -> CSC conversion the kernel performed internally before -# the CSC-native refactor; it now lives here so the core never depends on -# `SparseMatricesCSR`. -@inline _to_csc(A::SparseMatrixCSR) = SparseMatrixCSC(A) - -function csr_analyze(A::SparseMatrixCSR; ordering::Symbol = :default) - return csr_analyze(_to_csc(A); ordering = ordering) -end - -function csr_factor(A::SparseMatrixCSR, sym::CSRQRSymbolic; kwargs...) - return csr_factor(_to_csc(A), sym; kwargs...) -end - -function csr_qr(A::SparseMatrixCSR; kwargs...) - return csr_qr(_to_csc(A); kwargs...) -end - -function csr_refactor!(F::CSRQRFactorization, A::SparseMatrixCSR; kwargs...) - return csr_refactor!(F, _to_csc(A); kwargs...) -end - -# Keep the CSR entry points specialized in the package image. Mirrors the -# core workload, but on `SparseMatrixCSR` inputs through the conversion path. -@setup_workload begin - @compile_workload begin - for T in (Float64, Float32, ComplexF64, ComplexF32) - for Ti in (Int32, Int64) - rows = Ti[1, 2, 3, 4, 5, 6, 1, 2, 3, 4] - cols = Ti[1, 2, 3, 4, 5, 6, 2, 3, 4, 5] - vals = T[4, 4, 4, 4, 4, 4, 1, 1, 1, 1] - A = sparsecsr(rows, cols, vals, 6, 6) - b = ones(T, 6) - - F = csr_qr(A; ordering = :natural) - F \ b - rank(F) - - sym = csr_analyze(A; ordering = :natural) - G = csr_factor(A, sym) - csr_refactor!(G, A) - G \ b - - drows = Ti[1, 2, 3, 4, 5, 1, 2, 3, 4, 6] - dcols = Ti[1, 2, 3, 4, 5, 2, 3, 4, 5, 1] - dvals = T[4, 4, 4, 4, 4, 1, 1, 1, 1, 4] - Ad = sparsecsr(drows, dcols, dvals, 6, 6) - Fd = csr_qr(Ad; ordering = :natural) - Fd \ b - rank(Fd) - end - end - end -end - -end # module diff --git a/src/SparseColumnPivotedQR.jl b/src/SparseColumnPivotedQR.jl index 2b2f5f7..246f6a8 100644 --- a/src/SparseColumnPivotedQR.jl +++ b/src/SparseColumnPivotedQR.jl @@ -899,10 +899,9 @@ factorization. Computes the column permutation `q`, row permutation `pinv`, column elimination tree, per-row `leftmost`, and the upper-bound sizes of the V (Householder) and R buffers. -The native input is a `SparseMatrixCSC` (the SparseArrays stdlib format); its +The input is a `SparseMatrixCSC` (the SparseArrays stdlib format); its `colptr`/`rowval`/`nzval` are read directly with no transpose or intermediate -allocation. A `SparseMatrixCSR` (from `SparseMatricesCSR.jl`) is also accepted -when that package is loaded, via an extension that converts to CSC. +allocation. `ordering` selects the column ordering: * `:default` — `:amd` if the AMD.jl extension is loaded (`using AMD`), @@ -2424,9 +2423,7 @@ end # for the four standard BLAS element types, on tiny well-conditioned and # rank-deficient `SparseMatrixCSC` inputs (the native API), so the specialized # methods land in the package image. Only the `:natural` ordering is exercised: -# `:amd` lives in a weak-dep extension and cannot be loaded from here. The -# `SparseMatrixCSR` overloads live in their own extension, which carries its -# own workload. +# `:amd` lives in a weak-dep extension and cannot be loaded from here. @setup_workload begin @compile_workload begin diff --git a/test/csc_core.jl b/test/csc_core.jl index 2cdf193..72ee679 100644 --- a/test/csc_core.jl +++ b/test/csc_core.jl @@ -1,18 +1,14 @@ -# CSC-native core tests. Run in a SEPARATE process that does NOT load -# `SparseMatricesCSR`, so this proves the `SparseMatrixCSC` API works as the -# native path with the CSR extension absent. (Driven from `runtests.jl`.) +# CSC-native core tests: the `SparseMatrixCSC` API is the native path, read +# directly with no transpose or intermediate allocation. (Driven from +# `runtests.jl`.) using Test using LinearAlgebra using SparseArrays using Random using SparseColumnPivotedQR -using AMD # AMD extension is independent of the CSR extension +using AMD -@assert Base.get_extension( - SparseColumnPivotedQR, :SparseColumnPivotedQRSparseMatricesCSRExt -) === nothing "SparseMatricesCSR extension must NOT be loaded in the CSC-core test process" - -@testset "CSC-native core (no SparseMatricesCSR loaded)" begin +@testset "CSC-native core" begin for T in (Float64, ComplexF64) cv(M) = T <: Complex ? (T.(M) .+ T(0.3im) .* (M .!= 0)) : T.(M) diff --git a/test/runtests.jl b/test/runtests.jl index 4cff47c..43b7d14 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -1,20 +1,15 @@ using Test using LinearAlgebra using SparseArrays -using SparseMatricesCSR using Random using SparseColumnPivotedQR using AMD # trigger AMD extension using ForwardDiff -# helper: convert CSC -> CSR -to_csr(A::SparseMatrixCSC) = SparseMatrixCSR(transpose(sparse(transpose(A)))) - -# Slightly nicer helper: build a CSR from a Julia matrix -function build_csr(A::AbstractMatrix{T}) where {T} - Acsc = sparse(A) - return SparseMatrixCSR(transpose(sparse(transpose(Acsc)))) -end +# The native API is `SparseMatrixCSC`; these helpers just normalize an input to +# a CSC matrix. +to_csr(A::SparseMatrixCSC) = A +build_csr(A::AbstractMatrix) = sparse(A) # Steady-state allocation of one `csr_refactor!(adaptive_dense=true)` and one # `ldiv!`, measured inside a function barrier so the `@allocated` is not @@ -285,7 +280,7 @@ end lines = split(text, '\n'; keepempty = false) A = eval(Meta.parse(strip(lines[1]))) b = eval(Meta.parse(strip(lines[2]))) - Acsr = SparseMatrixCSR(transpose(sparse(transpose(A)))) + Acsr = sparse(A) F = csr_qr(Acsr) Fspqr = qr(A) xspqr = Fspqr \ b @@ -457,7 +452,7 @@ end text = read(f, String) lines = split(text, '\n'; keepempty = false) A = eval(Meta.parse(strip(lines[1]))) - Acsr = SparseMatrixCSR(transpose(sparse(transpose(A)))) + Acsr = sparse(A) sym_default = csr_analyze(Acsr) sym_amd = csr_analyze(Acsr; ordering = :amd) # :default == :amd here. @@ -545,7 +540,7 @@ end lines = split(text, '\n'; keepempty = false) A = eval(Meta.parse(strip(lines[1]))) b = eval(Meta.parse(strip(lines[2]))) - Acsr = SparseMatrixCSR(transpose(sparse(transpose(A)))) + Acsr = sparse(A) sym_adapt = csr_analyze(Acsr; ordering = :adaptive) @test sym_adapt.ordering === :amd F = csr_factor(Acsr, sym_adapt) @@ -609,9 +604,7 @@ end # The zero-allocation contract is on the native `SparseMatrixCSC` path: # its `colptr`/`rowval`/`nzval` are read straight into the pooled - # workspace with no transpose or intermediate allocation. (The CSR - # extension necessarily allocates one `SparseMatrixCSC` per call to - # convert, so it is not — and is not expected to be — zero-alloc.) + # workspace with no transpose or intermediate allocation. F = csr_qr(A1; ordering = :amd) # Warm up. csr_refactor!(F, A2) @@ -660,7 +653,7 @@ end A = eval(Meta.parse(strip(lines[1]))) b = eval(Meta.parse(strip(lines[2]))) all(isfinite, b) || continue - Acsr = SparseMatrixCSR(transpose(sparse(transpose(A)))) + Acsr = sparse(A) x_sparse = csr_qr(Acsr; ordering = :amd) \ b Fd = csr_qr(Acsr; ordering = :amd, adaptive_dense = true) x_dense = Fd \ b @@ -915,7 +908,7 @@ end lines = split(text, '\n'; keepempty = false) A = eval(Meta.parse(strip(lines[1]))) b = eval(Meta.parse(strip(lines[2]))) - Acsr = SparseMatrixCSR(transpose(sparse(transpose(A)))) + Acsr = sparse(A) F = csr_qr(Acsr; adaptive_dense = true) Fspqr = qr(A) xspqr = Fspqr \ b @@ -1048,19 +1041,6 @@ end end -# The CSC-native core must work with the `SparseMatricesCSR` extension absent. -# This process has `using SparseMatricesCSR` loaded (so the extension is -# active), so run the CSC-only checks in a fresh subprocess that never loads -# `SparseMatricesCSR`. -@testset "CSC-native core in a SparseMatricesCSR-free process" begin - code = """ - push!(LOAD_PATH, "@") - include($(repr(joinpath(@__DIR__, "csc_core.jl")))) - """ - p = run( - ignorestatus( - `$(Base.julia_cmd()) --project=$(Base.active_project()) -e $code` - ), - ) - @test p.exitcode == 0 -end +# Dedicated CSC-native core checks (the `SparseMatrixCSC` API is the native, +# allocation-free path). +include(joinpath(@__DIR__, "csc_core.jl")) From 9f1f800b5a40045b222ca82dc04f1d4525911dc6 Mon Sep 17 00:00:00 2001 From: ChrisRackauckas-Claude Date: Fri, 5 Jun 2026 05:46:03 -0400 Subject: [PATCH 3/3] Re-add optional SparseMatrixCSR support via SparseMatricesCSR extension MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Stacked on the CSC-native core (#35). Re-adds SparseMatrixCSR input support behind the optional SparseMatricesCSR weak-dep extension, which converts CSR to CSC and dispatches into the unchanged CSC kernel. The package's core stays pure CSC + SparseArrays; CSR is only pulled in when `using SparseMatricesCSR`. - Restore ext/SparseColumnPivotedQRSparseMatricesCSRExt.jl (csr_qr/csr_analyze/ csr_factor/csr_refactor! on SparseMatrixCSR, plus its own precompile workload). - Restore SparseMatricesCSR in [weakdeps]/[extensions]/[compat]/[extras]/[targets]. - Restore the CSR tests (including the subprocess check that the CSC core works with the CSR extension absent) and the CSR docstring/precompile notes, README, and docs. Note: the current CSR path reconverts CSR -> CSC on every csr_refactor! call (~17 KB/call, ~1.8x slower; see the #35 benchmark comment). A future revision should cache a CSC buffer and reuse it, and ideally add a zero-copy CSR fast path (SparseMatrixCSR(A) == SparseMatrixCSC(Aᵀ), so the CSR arrays can be reinterpreted as the transpose's CSC with no copy). Not implemented here. Co-Authored-By: Chris Rackauckas --- Project.toml | 6 +- README.md | 14 ++-- docs/Project.toml | 6 +- docs/src/index.md | 17 ++--- ...arseColumnPivotedQRSparseMatricesCSRExt.jl | 67 +++++++++++++++++++ src/SparseColumnPivotedQR.jl | 9 ++- test/csc_core.jl | 14 ++-- test/runtests.jl | 46 +++++++++---- 8 files changed, 139 insertions(+), 40 deletions(-) create mode 100644 ext/SparseColumnPivotedQRSparseMatricesCSRExt.jl diff --git a/Project.toml b/Project.toml index 94c6a18..18f9c2f 100644 --- a/Project.toml +++ b/Project.toml @@ -10,22 +10,26 @@ SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf" [weakdeps] AMD = "14f7f29c-3bd6-536c-9a0b-7339e30b5a3e" +SparseMatricesCSR = "a0a7dd2c-ebf4-11e9-1f05-cf50bc540ca1" [extensions] SparseColumnPivotedQRAMDExt = "AMD" +SparseColumnPivotedQRSparseMatricesCSRExt = "SparseMatricesCSR" [compat] AMD = "0.5" LinearAlgebra = "1" PrecompileTools = "1" SparseArrays = "1" +SparseMatricesCSR = "0.6" julia = "1.10" [extras] AMD = "14f7f29c-3bd6-536c-9a0b-7339e30b5a3e" ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210" Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" +SparseMatricesCSR = "a0a7dd2c-ebf4-11e9-1f05-cf50bc540ca1" Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" [targets] -test = ["AMD", "ForwardDiff", "Random", "Test"] +test = ["AMD", "ForwardDiff", "Random", "SparseMatricesCSR", "Test"] diff --git a/README.md b/README.md index 203f3cc..10b22af 100644 --- a/README.md +++ b/README.md @@ -11,7 +11,7 @@ A pure-Julia, rank-revealing, column-pivoted Householder QR factorization that operates directly on -[`SparseMatrixCSC`](https://docs.julialang.org/en/v1/stdlib/SparseArrays/) +[`SparseMatrixCSR`](https://github.com/JuliaSmoothOptimizers/SparseMatricesCSR.jl) sparse matrices. Targets the same "small-to-medium sparse" niche as KLU does for LU — low symbolic-phase overhead, no BLAS-3 / multifrontal machinery — while preserving the rank-revealing guarantees of LAPACK's column-pivoted QR. @@ -29,13 +29,13 @@ version of the documentation, which contains the unreleased features. using Pkg Pkg.add("SparseColumnPivotedQR") -using SparseArrays, SparseColumnPivotedQR +using SparseArrays, SparseMatricesCSR, SparseColumnPivotedQR -A = sparse([1.0 0 2 0 0; - 0 3 0 0 1; - 4 0 5 0 0; - 0 0 0 6 0; - 0 7 0 0 8]) +A = SparseMatrixCSR(sparse([1.0 0 2 0 0; + 0 3 0 0 1; + 4 0 5 0 0; + 0 0 0 6 0; + 0 7 0 0 8])) b = [1.0, 2.0, 3.0, 4.0, 5.0] F = csr_qr(A) diff --git a/docs/Project.toml b/docs/Project.toml index dc79fc5..3414357 100644 --- a/docs/Project.toml +++ b/docs/Project.toml @@ -1,11 +1,11 @@ [deps] -AMD = "14f7f29c-3bd6-536c-9a0b-7339e30b5a3e" Documenter = "e30172f5-a6a5-5a46-863b-614d45cd2de4" SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf" SparseColumnPivotedQR = "a57abbd0-fea5-4d57-96be-5e525945e8e4" +SparseMatricesCSR = "a0a7dd2c-ebf4-11e9-1f05-cf50bc540ca1" [compat] -AMD = "0.5" Documenter = "1" SparseArrays = "1" -SparseColumnPivotedQR = "0.2" +SparseColumnPivotedQR = "0.1" +SparseMatricesCSR = "0.6" diff --git a/docs/src/index.md b/docs/src/index.md index ebefc5e..bdca4f7 100644 --- a/docs/src/index.md +++ b/docs/src/index.md @@ -4,7 +4,7 @@ SparseColumnPivotedQR.jl is a component of the [SciML](https://sciml.ai/) ecosystem providing a pure-Julia, rank-revealing, column-pivoted Householder QR factorization that operates directly on -[`SparseMatrixCSC`](https://docs.julialang.org/en/v1/stdlib/SparseArrays/) +[`SparseMatrixCSR`](https://github.com/gridap/SparseMatricesCSR.jl) sparse matrices. The package targets the same "small-to-medium sparse" niche as KLU does for @@ -23,15 +23,16 @@ Pkg.add("SparseColumnPivotedQR") ## Quick start ```julia -using SparseArrays, SparseColumnPivotedQR +using SparseArrays, SparseMatricesCSR, SparseColumnPivotedQR using AMD # enables the recommended AMD column ordering # A 5×5 sparse matrix and a right-hand side. -A = sparse([1.0 0 2 0 0; - 0 3 0 0 1; - 4 0 5 0 0; - 0 0 0 6 0; - 0 7 0 0 8]) +A_csc = sparse([1.0 0 2 0 0; + 0 3 0 0 1; + 4 0 5 0 0; + 0 0 0 6 0; + 0 7 0 0 8]) +A = SparseMatrixCSR(A_csc) b = [1.0, 2.0, 3.0, 4.0, 5.0] # One-shot factor + solve. @@ -92,7 +93,7 @@ least-squares solution whose residual matches the SVD pseudoinverse minimum to floating-point precision. ```julia -A_singular = sparse([1.0 2.0; 0.5 1.0; 2.0 4.0]) # rank 1 +A_singular = SparseMatrixCSR(sparse([1.0 2.0; 0.5 1.0; 2.0 4.0])) # rank 1 b = [1.0, 1.0, 1.0] F = csr_qr(A_singular) rank(F) # → 1 diff --git a/ext/SparseColumnPivotedQRSparseMatricesCSRExt.jl b/ext/SparseColumnPivotedQRSparseMatricesCSRExt.jl new file mode 100644 index 0000000..6e643bd --- /dev/null +++ b/ext/SparseColumnPivotedQRSparseMatricesCSRExt.jl @@ -0,0 +1,67 @@ +module SparseColumnPivotedQRSparseMatricesCSRExt + +using SparseColumnPivotedQR +using LinearAlgebra +using SparseArrays +using SparseMatricesCSR +using PrecompileTools + +import SparseColumnPivotedQR: csr_qr, csr_analyze, csr_factor, csr_refactor!, + CSRQRSymbolic, CSRQRFactorization + +# Convert a `SparseMatrixCSR` to the `SparseMatrixCSC` the core operates on. +# This is the same CSR -> CSC conversion the kernel performed internally before +# the CSC-native refactor; it now lives here so the core never depends on +# `SparseMatricesCSR`. +@inline _to_csc(A::SparseMatrixCSR) = SparseMatrixCSC(A) + +function csr_analyze(A::SparseMatrixCSR; ordering::Symbol = :default) + return csr_analyze(_to_csc(A); ordering = ordering) +end + +function csr_factor(A::SparseMatrixCSR, sym::CSRQRSymbolic; kwargs...) + return csr_factor(_to_csc(A), sym; kwargs...) +end + +function csr_qr(A::SparseMatrixCSR; kwargs...) + return csr_qr(_to_csc(A); kwargs...) +end + +function csr_refactor!(F::CSRQRFactorization, A::SparseMatrixCSR; kwargs...) + return csr_refactor!(F, _to_csc(A); kwargs...) +end + +# Keep the CSR entry points specialized in the package image. Mirrors the +# core workload, but on `SparseMatrixCSR` inputs through the conversion path. +@setup_workload begin + @compile_workload begin + for T in (Float64, Float32, ComplexF64, ComplexF32) + for Ti in (Int32, Int64) + rows = Ti[1, 2, 3, 4, 5, 6, 1, 2, 3, 4] + cols = Ti[1, 2, 3, 4, 5, 6, 2, 3, 4, 5] + vals = T[4, 4, 4, 4, 4, 4, 1, 1, 1, 1] + A = sparsecsr(rows, cols, vals, 6, 6) + b = ones(T, 6) + + F = csr_qr(A; ordering = :natural) + F \ b + rank(F) + + sym = csr_analyze(A; ordering = :natural) + G = csr_factor(A, sym) + csr_refactor!(G, A) + G \ b + + drows = Ti[1, 2, 3, 4, 5, 1, 2, 3, 4, 6] + dcols = Ti[1, 2, 3, 4, 5, 2, 3, 4, 5, 1] + dvals = T[4, 4, 4, 4, 4, 1, 1, 1, 1, 4] + Ad = sparsecsr(drows, dcols, dvals, 6, 6) + Fd = csr_qr(Ad; ordering = :natural) + Fd \ b + rank(Fd) + end + end + end +end + +end # module diff --git a/src/SparseColumnPivotedQR.jl b/src/SparseColumnPivotedQR.jl index 246f6a8..2b2f5f7 100644 --- a/src/SparseColumnPivotedQR.jl +++ b/src/SparseColumnPivotedQR.jl @@ -899,9 +899,10 @@ factorization. Computes the column permutation `q`, row permutation `pinv`, column elimination tree, per-row `leftmost`, and the upper-bound sizes of the V (Householder) and R buffers. -The input is a `SparseMatrixCSC` (the SparseArrays stdlib format); its +The native input is a `SparseMatrixCSC` (the SparseArrays stdlib format); its `colptr`/`rowval`/`nzval` are read directly with no transpose or intermediate -allocation. +allocation. A `SparseMatrixCSR` (from `SparseMatricesCSR.jl`) is also accepted +when that package is loaded, via an extension that converts to CSC. `ordering` selects the column ordering: * `:default` — `:amd` if the AMD.jl extension is loaded (`using AMD`), @@ -2423,7 +2424,9 @@ end # for the four standard BLAS element types, on tiny well-conditioned and # rank-deficient `SparseMatrixCSC` inputs (the native API), so the specialized # methods land in the package image. Only the `:natural` ordering is exercised: -# `:amd` lives in a weak-dep extension and cannot be loaded from here. +# `:amd` lives in a weak-dep extension and cannot be loaded from here. The +# `SparseMatrixCSR` overloads live in their own extension, which carries its +# own workload. @setup_workload begin @compile_workload begin diff --git a/test/csc_core.jl b/test/csc_core.jl index 72ee679..2cdf193 100644 --- a/test/csc_core.jl +++ b/test/csc_core.jl @@ -1,14 +1,18 @@ -# CSC-native core tests: the `SparseMatrixCSC` API is the native path, read -# directly with no transpose or intermediate allocation. (Driven from -# `runtests.jl`.) +# CSC-native core tests. Run in a SEPARATE process that does NOT load +# `SparseMatricesCSR`, so this proves the `SparseMatrixCSC` API works as the +# native path with the CSR extension absent. (Driven from `runtests.jl`.) using Test using LinearAlgebra using SparseArrays using Random using SparseColumnPivotedQR -using AMD +using AMD # AMD extension is independent of the CSR extension -@testset "CSC-native core" begin +@assert Base.get_extension( + SparseColumnPivotedQR, :SparseColumnPivotedQRSparseMatricesCSRExt +) === nothing "SparseMatricesCSR extension must NOT be loaded in the CSC-core test process" + +@testset "CSC-native core (no SparseMatricesCSR loaded)" begin for T in (Float64, ComplexF64) cv(M) = T <: Complex ? (T.(M) .+ T(0.3im) .* (M .!= 0)) : T.(M) diff --git a/test/runtests.jl b/test/runtests.jl index 43b7d14..4cff47c 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -1,15 +1,20 @@ using Test using LinearAlgebra using SparseArrays +using SparseMatricesCSR using Random using SparseColumnPivotedQR using AMD # trigger AMD extension using ForwardDiff -# The native API is `SparseMatrixCSC`; these helpers just normalize an input to -# a CSC matrix. -to_csr(A::SparseMatrixCSC) = A -build_csr(A::AbstractMatrix) = sparse(A) +# helper: convert CSC -> CSR +to_csr(A::SparseMatrixCSC) = SparseMatrixCSR(transpose(sparse(transpose(A)))) + +# Slightly nicer helper: build a CSR from a Julia matrix +function build_csr(A::AbstractMatrix{T}) where {T} + Acsc = sparse(A) + return SparseMatrixCSR(transpose(sparse(transpose(Acsc)))) +end # Steady-state allocation of one `csr_refactor!(adaptive_dense=true)` and one # `ldiv!`, measured inside a function barrier so the `@allocated` is not @@ -280,7 +285,7 @@ end lines = split(text, '\n'; keepempty = false) A = eval(Meta.parse(strip(lines[1]))) b = eval(Meta.parse(strip(lines[2]))) - Acsr = sparse(A) + Acsr = SparseMatrixCSR(transpose(sparse(transpose(A)))) F = csr_qr(Acsr) Fspqr = qr(A) xspqr = Fspqr \ b @@ -452,7 +457,7 @@ end text = read(f, String) lines = split(text, '\n'; keepempty = false) A = eval(Meta.parse(strip(lines[1]))) - Acsr = sparse(A) + Acsr = SparseMatrixCSR(transpose(sparse(transpose(A)))) sym_default = csr_analyze(Acsr) sym_amd = csr_analyze(Acsr; ordering = :amd) # :default == :amd here. @@ -540,7 +545,7 @@ end lines = split(text, '\n'; keepempty = false) A = eval(Meta.parse(strip(lines[1]))) b = eval(Meta.parse(strip(lines[2]))) - Acsr = sparse(A) + Acsr = SparseMatrixCSR(transpose(sparse(transpose(A)))) sym_adapt = csr_analyze(Acsr; ordering = :adaptive) @test sym_adapt.ordering === :amd F = csr_factor(Acsr, sym_adapt) @@ -604,7 +609,9 @@ end # The zero-allocation contract is on the native `SparseMatrixCSC` path: # its `colptr`/`rowval`/`nzval` are read straight into the pooled - # workspace with no transpose or intermediate allocation. + # workspace with no transpose or intermediate allocation. (The CSR + # extension necessarily allocates one `SparseMatrixCSC` per call to + # convert, so it is not — and is not expected to be — zero-alloc.) F = csr_qr(A1; ordering = :amd) # Warm up. csr_refactor!(F, A2) @@ -653,7 +660,7 @@ end A = eval(Meta.parse(strip(lines[1]))) b = eval(Meta.parse(strip(lines[2]))) all(isfinite, b) || continue - Acsr = sparse(A) + Acsr = SparseMatrixCSR(transpose(sparse(transpose(A)))) x_sparse = csr_qr(Acsr; ordering = :amd) \ b Fd = csr_qr(Acsr; ordering = :amd, adaptive_dense = true) x_dense = Fd \ b @@ -908,7 +915,7 @@ end lines = split(text, '\n'; keepempty = false) A = eval(Meta.parse(strip(lines[1]))) b = eval(Meta.parse(strip(lines[2]))) - Acsr = sparse(A) + Acsr = SparseMatrixCSR(transpose(sparse(transpose(A)))) F = csr_qr(Acsr; adaptive_dense = true) Fspqr = qr(A) xspqr = Fspqr \ b @@ -1041,6 +1048,19 @@ end end -# Dedicated CSC-native core checks (the `SparseMatrixCSC` API is the native, -# allocation-free path). -include(joinpath(@__DIR__, "csc_core.jl")) +# The CSC-native core must work with the `SparseMatricesCSR` extension absent. +# This process has `using SparseMatricesCSR` loaded (so the extension is +# active), so run the CSC-only checks in a fresh subprocess that never loads +# `SparseMatricesCSR`. +@testset "CSC-native core in a SparseMatricesCSR-free process" begin + code = """ + push!(LOAD_PATH, "@") + include($(repr(joinpath(@__DIR__, "csc_core.jl")))) + """ + p = run( + ignorestatus( + `$(Base.julia_cmd()) --project=$(Base.active_project()) -e $code` + ), + ) + @test p.exitcode == 0 +end