From ea52cfd0e98569dfe1d8b03d87770acfdcb4840b Mon Sep 17 00:00:00 2001
From: ChrisRackauckas-Claude <accounts@chrisrackauckas.com>
Date: Fri, 5 Jun 2026 04:13:30 -0400
Subject: [PATCH 1/3] Make SparseMatrixCSC the native API; SparseMatrixCSR via
 extension

The entire kernel is already CSC: csr_analyze/csr_factor/csr_refactor!/csr_qr
now accept a SparseMatrixCSC directly, reading its colptr/rowval/nzval into
the pooled workspace with no transpose or intermediate allocation (column
norms are computed straight off nzval). The csr_* names are kept for
back-compatibility.

SparseMatricesCSR is moved from [deps] to [weakdeps]; the SparseMatrixCSR
method overloads now live in SparseColumnPivotedQRSparseMatricesCSRExt, which
delegates to the CSC core via SparseMatrixCSC(A). The precompile workload
exercises the CSC path natively; the CSR extension carries its own workload.

Tests: CSC core covers square/over-/under-determined/rank-deficient, Float64
and ComplexF64, and the refactor reuse + zero-alloc steady state, run in a
SparseMatricesCSR-free subprocess. The existing CSR suite still exercises the
extension path. Refactor steady state on the CSC path is 0 bytes (sparse and
adaptive_dense), as is ldiv!.

Refs SciML/SparseColumnPivotedQR.jl#33

Co-Authored-By: Chris Rackauckas <accounts@chrisrackauckas.com>
---
 Project.toml                                  |   8 +-
 ...arseColumnPivotedQRSparseMatricesCSRExt.jl |  67 ++++++
 src/SparseColumnPivotedQR.jl                  | 214 ++++++++++--------
 test/csc_core.jl                              | 117 ++++++++++
 test/runtests.jl                              |  54 +++--
 5 files changed, 347 insertions(+), 113 deletions(-)
 create mode 100644 ext/SparseColumnPivotedQRSparseMatricesCSRExt.jl
 create mode 100644 test/csc_core.jl

diff --git a/Project.toml b/Project.toml
index cd1a353..18f9c2f 100644
--- a/Project.toml
+++ b/Project.toml
@@ -1,19 +1,20 @@
 name = "SparseColumnPivotedQR"
 uuid = "a57abbd0-fea5-4d57-96be-5e525945e8e4"
 authors = ["Chris Rackauckas <accounts@chrisrackauckas.com> and contributors"]
-version = "0.1.0"
+version = "0.2.0"
 
 [deps]
 LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
 PrecompileTools = "aea7be01-6a6a-4083-8856-8a6e6704d82a"
 SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf"
-SparseMatricesCSR = "a0a7dd2c-ebf4-11e9-1f05-cf50bc540ca1"
 
 [weakdeps]
 AMD = "14f7f29c-3bd6-536c-9a0b-7339e30b5a3e"
+SparseMatricesCSR = "a0a7dd2c-ebf4-11e9-1f05-cf50bc540ca1"
 
 [extensions]
 SparseColumnPivotedQRAMDExt = "AMD"
+SparseColumnPivotedQRSparseMatricesCSRExt = "SparseMatricesCSR"
 
 [compat]
 AMD = "0.5"
@@ -27,7 +28,8 @@ julia = "1.10"
 AMD = "14f7f29c-3bd6-536c-9a0b-7339e30b5a3e"
 ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210"
 Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
+SparseMatricesCSR = "a0a7dd2c-ebf4-11e9-1f05-cf50bc540ca1"
 Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
 
 [targets]
-test = ["AMD", "ForwardDiff", "Random", "Test"]
+test = ["AMD", "ForwardDiff", "Random", "SparseMatricesCSR", "Test"]
diff --git a/ext/SparseColumnPivotedQRSparseMatricesCSRExt.jl b/ext/SparseColumnPivotedQRSparseMatricesCSRExt.jl
new file mode 100644
index 0000000..6e643bd
--- /dev/null
+++ b/ext/SparseColumnPivotedQRSparseMatricesCSRExt.jl
@@ -0,0 +1,67 @@
+module SparseColumnPivotedQRSparseMatricesCSRExt
+
+using SparseColumnPivotedQR
+using LinearAlgebra
+using SparseArrays
+using SparseMatricesCSR
+using PrecompileTools
+
+import SparseColumnPivotedQR: csr_qr, csr_analyze, csr_factor, csr_refactor!,
+    CSRQRSymbolic, CSRQRFactorization
+
+# Convert a `SparseMatrixCSR` to the `SparseMatrixCSC` the core operates on.
+# This is the same CSR -> CSC conversion the kernel performed internally before
+# the CSC-native refactor; it now lives here so the core never depends on
+# `SparseMatricesCSR`.
+@inline _to_csc(A::SparseMatrixCSR) = SparseMatrixCSC(A)
+
+function csr_analyze(A::SparseMatrixCSR; ordering::Symbol = :default)
+    return csr_analyze(_to_csc(A); ordering = ordering)
+end
+
+function csr_factor(A::SparseMatrixCSR, sym::CSRQRSymbolic; kwargs...)
+    return csr_factor(_to_csc(A), sym; kwargs...)
+end
+
+function csr_qr(A::SparseMatrixCSR; kwargs...)
+    return csr_qr(_to_csc(A); kwargs...)
+end
+
+function csr_refactor!(F::CSRQRFactorization, A::SparseMatrixCSR; kwargs...)
+    return csr_refactor!(F, _to_csc(A); kwargs...)
+end
+
+# Keep the CSR entry points specialized in the package image. Mirrors the
+# core workload, but on `SparseMatrixCSR` inputs through the conversion path.
+@setup_workload begin
+    @compile_workload begin
+        for T in (Float64, Float32, ComplexF64, ComplexF32)
+            for Ti in (Int32, Int64)
+                rows = Ti[1, 2, 3, 4, 5, 6, 1, 2, 3, 4]
+                cols = Ti[1, 2, 3, 4, 5, 6, 2, 3, 4, 5]
+                vals = T[4, 4, 4, 4, 4, 4, 1, 1, 1, 1]
+                A = sparsecsr(rows, cols, vals, 6, 6)
+                b = ones(T, 6)
+
+                F = csr_qr(A; ordering = :natural)
+                F \ b
+                rank(F)
+
+                sym = csr_analyze(A; ordering = :natural)
+                G = csr_factor(A, sym)
+                csr_refactor!(G, A)
+                G \ b
+
+                drows = Ti[1, 2, 3, 4, 5, 1, 2, 3, 4, 6]
+                dcols = Ti[1, 2, 3, 4, 5, 2, 3, 4, 5, 1]
+                dvals = T[4, 4, 4, 4, 4, 1, 1, 1, 1, 4]
+                Ad = sparsecsr(drows, dcols, dvals, 6, 6)
+                Fd = csr_qr(Ad; ordering = :natural)
+                Fd \ b
+                rank(Fd)
+            end
+        end
+    end
+end
+
+end # module
diff --git a/src/SparseColumnPivotedQR.jl b/src/SparseColumnPivotedQR.jl
index fd53456..2b2f5f7 100644
--- a/src/SparseColumnPivotedQR.jl
+++ b/src/SparseColumnPivotedQR.jl
@@ -2,7 +2,7 @@ module SparseColumnPivotedQR
 
 using LinearAlgebra
 using SparseArrays
-using SparseMatricesCSR
+using SparseArrays: getcolptr
 using PrecompileTools
 
 import LinearAlgebra: ldiv!, rank
@@ -12,11 +12,6 @@ export csr_qr, csr_analyze, csr_factor, csr_refactor!,
     has_amd_extension,
     CSRQRSymbolic, CSRQRFactorization
 
-# CSR offset: rowptr stores 1-based if Bi == 1, 0-based if Bi == 0.
-@inline function getoffset(::SparseMatrixCSR{Bi}) where {Bi}
-    return Bi == 1 ? 0 : 1
-end
-
 # The adaptive-dense fallback finishes the trailing dense block with LAPACK
 # `geqp3!` / `ormqr!`, which are only defined for the four BLAS float types
 # (Float32/Float64/ComplexF32/ComplexF64). For any other element type (e.g.
@@ -415,8 +410,14 @@ mutable struct CSRQRSymbolic
     rnz::Int
     rcount::Vector{Int}     # exact nnz per column of R (length n)
     ordering::Symbol
+    # CSR pattern of the input (internal layout, 1-based) used by the symbolic
+    # builders and the deferral/repivot `_rebuild_symbolic_for_q` path.
     pattern_rowptr::Vector{Int}
     pattern_colval::Vector{Int}
+    # Native CSC pattern snapshot (colptr, rowval) of the input, used by
+    # `_pattern_matches` for the zero-allocation fixed-pattern refactor check.
+    pattern_colptr::Vector{Int}
+    pattern_rowval::Vector{Int}
     # Lazily-attached numeric workspace. Type-erased here because Symbolic
     # is built without knowing the value-element type T.
     workspace::Union{Nothing, _CSRQRWorkspace}
@@ -473,30 +474,67 @@ Base.eltype(::CSRQRFactorization{T}) where {T} = T
 # CSR <-> CSC conversion (pattern + values)
 # ---------------------------------------------------------------------------
 
-function _capture_pattern(A::SparseMatrixCSR{Bi}) where {Bi}
-    off = getoffset(A)
-    rowptr = Vector{Int}(undef, length(A.rowptr))
-    @inbounds for i in eachindex(A.rowptr)
-        rowptr[i] = Int(A.rowptr[i]) + off
+# Transpose a CSC pattern (colptr, rowval) into the CSR pattern (rowptr, colval)
+# the symbolic machinery operates on. Pattern-only (integer) counting-sort
+# transpose — no values touched. `colval` lists, within each row, the columns
+# in ascending order (a stable byproduct of the column-major scan).
+function _csc_pattern_to_csr(colptr::Vector{Int}, rowval::Vector{Int}, m::Int, n::Int)
+    nnz_total = length(rowval)
+    rowptr = Vector{Int}(undef, m + 1)
+    rowcounts = zeros(Int, m)
+    @inbounds for p in 1:nnz_total
+        rowcounts[rowval[p]] += 1
     end
-    colval = Vector{Int}(undef, length(A.colval))
-    @inbounds for i in eachindex(A.colval)
-        colval[i] = Int(A.colval[i]) + off
+    rowptr[1] = 1
+    @inbounds for i in 1:m
+        rowptr[i + 1] = rowptr[i] + rowcounts[i]
+    end
+    colval = Vector{Int}(undef, nnz_total)
+    work = Vector{Int}(undef, m)
+    @inbounds for i in 1:m
+        work[i] = rowptr[i]
+    end
+    @inbounds for j in 1:n
+        c1 = colptr[j]; c2 = colptr[j + 1] - 1
+        for p in c1:c2
+            i = rowval[p]
+            colval[work[i]] = j
+            work[i] += 1
+        end
     end
     return rowptr, colval
 end
 
-function _pattern_matches(S::CSRQRSymbolic, A::SparseMatrixCSR{Bi}) where {Bi}
+# Snapshot a `SparseMatrixCSC`'s structural pattern for the symbolic. Returns
+# the CSR pattern (rowptr, colval) used by the symbolic builders and the
+# rebuild path, plus a copy of the native CSC pattern (colptr, rowval) used by
+# `_pattern_matches` for the zero-allocation refactor fast path.
+function _capture_pattern(A::SparseMatrixCSC)
+    m, n = size(A)
+    # Normalize to `Vector{Int}`: the symbolic machinery is `Int`-indexed, and
+    # a CSC built from `Int32` arrays would otherwise carry a narrower index
+    # type through the captured pattern.
+    colptr = convert(Vector{Int}, getcolptr(A))
+    rowval = convert(Vector{Int}, rowvals(A))
+    rowptr, colval = _csc_pattern_to_csr(colptr, rowval, m, n)
+    return rowptr, colval, colptr, rowval
+end
+
+# Fast structural-pattern comparison against the CSC snapshot held on the
+# symbolic. Compares `colptr`/`rowval` directly so the common fixed-pattern
+# `csr_refactor!` is a cheap O(nnz) integer scan with no allocation.
+function _pattern_matches(S::CSRQRSymbolic, A::SparseMatrixCSC)
     m, n = size(A)
     (m == S.m && n == S.n) || return false
-    length(A.rowptr) == length(S.pattern_rowptr) || return false
-    length(A.colval) == length(S.pattern_colval) || return false
-    off = getoffset(A)
-    @inbounds for i in eachindex(A.rowptr)
-        Int(A.rowptr[i]) + off == S.pattern_rowptr[i] || return false
+    colptr = getcolptr(A)
+    rowval = rowvals(A)
+    length(colptr) == length(S.pattern_colptr) || return false
+    length(rowval) == length(S.pattern_rowval) || return false
+    @inbounds for i in eachindex(colptr)
+        colptr[i] == S.pattern_colptr[i] || return false
     end
-    @inbounds for i in eachindex(A.colval)
-        Int(A.colval[i]) + off == S.pattern_colval[i] || return false
+    @inbounds for p in eachindex(rowval)
+        rowval[p] == S.pattern_rowval[p] || return false
     end
     return true
 end
@@ -854,13 +892,18 @@ end
 # ---------------------------------------------------------------------------
 
 """
-    csr_analyze(A::SparseMatrixCSR; ordering=:default) -> CSRQRSymbolic
+    csr_analyze(A::SparseMatrixCSC; ordering=:default) -> CSRQRSymbolic
 
 Symbolic analysis phase for the sparse column-pivoted Householder QR
 factorization. Computes the column permutation `q`, row permutation `pinv`,
 column elimination tree, per-row `leftmost`, and the upper-bound sizes of
 the V (Householder) and R buffers.
 
+The native input is a `SparseMatrixCSC` (the SparseArrays stdlib format); its
+`colptr`/`rowval`/`nzval` are read directly with no transpose or intermediate
+allocation. A `SparseMatrixCSR` (from `SparseMatricesCSR.jl`) is also accepted
+when that package is loaded, via an extension that converts to CSC.
+
 `ordering` selects the column ordering:
   * `:default`  — `:amd` if the AMD.jl extension is loaded (`using AMD`),
                   `:natural` otherwise. **This is the default.**
@@ -878,9 +921,9 @@ the V (Householder) and R buffers.
 Returns a `CSRQRSymbolic` that can be passed to `csr_factor` and reused via
 `csr_refactor!` for matrices with identical sparsity patterns.
 """
-function csr_analyze(A::SparseMatrixCSR{Bi}; ordering::Symbol = :default) where {Bi}
+function csr_analyze(A::SparseMatrixCSC; ordering::Symbol = :default)
     m, n = size(A)
-    rowptr, colval = _capture_pattern(A)
+    rowptr, colval, colptr_snap, rowval_snap = _capture_pattern(A)
     ordering_use = _resolve_ordering(ordering)
     if (
             ordering_use === :amd || ordering_use === :colamd ||
@@ -910,13 +953,13 @@ function csr_analyze(A::SparseMatrixCSR{Bi}; ordering::Symbol = :default) where
             return CSRQRSymbolic(
                 m, n, m2_a, q_a, pinv_a, parent_a,
                 leftmost_a, vnz_a, rnz_a, rcount_a, :amd,
-                rowptr, colval, nothing
+                rowptr, colval, colptr_snap, rowval_snap, nothing
             )
         else
             return CSRQRSymbolic(
                 m, n, m2_n, q_n, pinv_n, parent_n,
                 leftmost_n, vnz_n, rnz_n, rcount_n, :natural,
-                rowptr, colval, nothing
+                rowptr, colval, colptr_snap, rowval_snap, nothing
             )
         end
     end
@@ -925,12 +968,13 @@ function csr_analyze(A::SparseMatrixCSR{Bi}; ordering::Symbol = :default) where
         _build_symbolic(rowptr, colval, m, n, ordering_use)
     return CSRQRSymbolic(
         m, n, m2, q, pinv, parent, leftmost_perm,
-        vnz, rnz, rcount, ordering_use, rowptr, colval, nothing
+        vnz, rnz, rcount, ordering_use, rowptr, colval,
+        colptr_snap, rowval_snap, nothing
     )
 end
 
 """
-    csr_factor(A::SparseMatrixCSR, sym::CSRQRSymbolic; tol=nothing, drop_tol=0,
+    csr_factor(A::SparseMatrixCSC, sym::CSRQRSymbolic; tol=nothing, drop_tol=0,
                adaptive_dense=false, dense_threshold=0.4) -> CSRQRFactorization
 
 Numeric factorization given a `CSRQRSymbolic`. Implements the Davis
@@ -959,12 +1003,12 @@ and finishes with LAPACK `geqp3!`. The composed column permutation is
 stored in `F.q_eff`.
 """
 function csr_factor(
-        A::SparseMatrixCSR{Bi, T}, sym::CSRQRSymbolic;
+        A::SparseMatrixCSC{T}, sym::CSRQRSymbolic;
         tol::Union{Nothing, Real} = nothing,
         drop_tol::Real = 0,
         adaptive_dense::Bool = false,
         dense_threshold::Real = 0.4
-    ) where {Bi, T}
+    ) where {T}
     return _factor_kernel(
         A, sym, tol, nothing, real(T)(drop_tol),
         adaptive_dense, real(T)(dense_threshold)
@@ -972,7 +1016,7 @@ function csr_factor(
 end
 
 """
-    csr_qr(A::SparseMatrixCSR; tol=nothing, ordering=:default, drop_tol=0,
+    csr_qr(A::SparseMatrixCSC; tol=nothing, ordering=:default, drop_tol=0,
            adaptive_dense=false, dense_threshold=0.4) -> CSRQRFactorization
 
 One-shot convenience: equivalent to `csr_factor(A, csr_analyze(A; ordering); tol)`.
@@ -984,13 +1028,13 @@ typical dense-fill matrices that arise from nonlinear solver linsolves,
 for matrices whose columns are already well-ordered.
 """
 function csr_qr(
-        A::SparseMatrixCSR{Bi, T};
+        A::SparseMatrixCSC;
         tol::Union{Nothing, Real} = nothing,
         ordering::Symbol = :default,
         drop_tol::Real = 0,
         adaptive_dense::Bool = false,
         dense_threshold::Real = 0.4
-    ) where {Bi, T}
+    )
     sym = csr_analyze(A; ordering = ordering)
     return csr_factor(
         A, sym; tol = tol, drop_tol = drop_tol,
@@ -999,7 +1043,7 @@ function csr_qr(
 end
 
 """
-    csr_refactor!(F::CSRQRFactorization, A::SparseMatrixCSR; tol=nothing, drop_tol=0,
+    csr_refactor!(F::CSRQRFactorization, A::SparseMatrixCSC; tol=nothing, drop_tol=0,
                   adaptive_dense=false, dense_threshold=0.4) -> CSRQRFactorization
 
 Numeric refactorization, mutating `F` in place. If the sparsity pattern of
@@ -1019,12 +1063,12 @@ same meaning as in [`csr_factor`](@ref).
 """
 function csr_refactor!(
         F::CSRQRFactorization{T},
-        A::SparseMatrixCSR{Bi};
+        A::SparseMatrixCSC;
         tol::Union{Nothing, Real} = nothing,
         drop_tol::Real = 0,
         adaptive_dense::Bool = false,
         dense_threshold::Real = 0.4
-    ) where {T, Bi}
+    ) where {T}
     dt = real(T)(drop_tol)
     dth = real(T)(dense_threshold)
     if _pattern_matches(F.sym, A)
@@ -1064,25 +1108,25 @@ end
 end
 
 function _factor_kernel(
-        A::SparseMatrixCSR{Bi, T}, sym::CSRQRSymbolic,
+        A::SparseMatrixCSC{T}, sym::CSRQRSymbolic,
         tol::Union{Nothing, Real},
         F::Union{Nothing, CSRQRFactorization},
         drop_tol::Real = zero(real(T)),
         adaptive_dense::Bool = false,
         dense_threshold::Real = real(T)(0.4)
-    ) where {Bi, T}
+    ) where {T}
     m, n = size(A)
     (m == sym.m && n == sym.n) ||
         throw(DimensionMismatch("A is $m x $n but symbolic is $(sym.m) x $(sym.n)"))
 
     RT = real(T)
-    nnz_A = length(A.colval)
+    nnz_A = length(rowvals(A))
     ws = _get_workspace(T, sym, nnz_A)::_CSRQRWorkspace{T, RT}
 
-    # Single-pass CSR -> CSC(A) conversion + Frobenius norm + column-norm
-    # cache (the column norms feed the zero-column check below). All buffers
-    # come from the workspace.
-    fro2 = _csr_to_csc_with_norms!(ws, A)
+    # Copy A's CSC arrays into the workspace + compute the Frobenius norm and
+    # per-column squared norms (the column norms feed the zero-column check
+    # below). All buffers come from the workspace; no transpose needed.
+    fro2 = _csc_copy_with_norms!(ws, A)
     fro = sqrt(fro2)
     tol_use = tol === nothing ? RT(eps(RT) * max(m, n)) * fro : RT(max(tol, 0))
     tol2 = tol_use * tol_use
@@ -1229,7 +1273,8 @@ function _factor_kernel(
         sym_next = CSRQRSymbolic(
             sym_cur.m, sym_cur.n, m2n, qn, pinvn, parentn, leftmostn,
             vnzn, rnzn, rcountn, sym_cur.ordering, sym_cur.pattern_rowptr,
-            sym_cur.pattern_colval, sym_cur.workspace
+            sym_cur.pattern_colval, sym_cur.pattern_colptr,
+            sym_cur.pattern_rowval, sym_cur.workspace
         )
         ws_next = _get_workspace(T, sym_next, nnz_A)::_CSRQRWorkspace{T, RT}
         _permute_pq!(ws_next, sym_next.pinv, sym_next.q, m, n)
@@ -1288,61 +1333,41 @@ function _factor_kernel(
     return F_out
 end
 
-# In-place CSR -> CSC of A, plus per-column squared norms and ||A||_F^2.
-# Writes into ws.colptr_A, ws.rowval_A, ws.nzval_A, ws.col_nrm2. Returns
-# the Frobenius-norm-squared. ws.work_perm doubles as a temporary copy of
-# colptr used during scatter.
-function _csr_to_csc_with_norms!(
+# Copy A's CSC arrays into the workspace, plus per-column squared norms and
+# ||A||_F^2. Writes into ws.colptr_A, ws.rowval_A, ws.nzval_A, ws.col_nrm2.
+# Returns the Frobenius-norm-squared. The input is already CSC, so this is a
+# straight copy with the column norms read directly off `nzval` per column —
+# no transpose, no scatter.
+function _csc_copy_with_norms!(
         ws::_CSRQRWorkspace{T, RT},
-        A::SparseMatrixCSR{Bi, T}
-    ) where {Bi, T, RT}
+        A::SparseMatrixCSC{T}
+    ) where {T, RT}
     m, n = size(A)
-    off = getoffset(A)
-    rowptr = A.rowptr
-    colval = A.colval
-    nzval_in = A.nzval
-    nnz_total = length(colval)
+    colptr_in = getcolptr(A)
+    rowval_in = rowvals(A)
+    nzval_in = nonzeros(A)
 
     colptr = ws.colptr_A
     rowval = ws.rowval_A
     nzval = ws.nzval_A
     col_nrm2 = ws.col_nrm2
-    work = ws.work_perm
 
-    # Reset accumulators.
-    @inbounds for j in 1:n
-        col_nrm2[j] = zero(RT)
-    end
-    # Use colptr as a count buffer first (we'll convert in place).
+    fro2 = zero(RT)
     @inbounds for j in 1:(n + 1)
-        colptr[j] = 0
-    end
-    @inbounds for p in 1:nnz_total
-        colptr[Int(colval[p]) + off + 1] += 1
+        colptr[j] = colptr_in[j]
     end
-    # Cumsum: colptr[j+1] = colptr[j] + count[j].
-    colptr[1] = 1
     @inbounds for j in 1:n
-        colptr[j + 1] += colptr[j]
-    end
-    @inbounds for j in 1:(n + 1)
-        work[j] = colptr[j]
-    end
-    fro2 = zero(RT)
-    @inbounds for i in 1:m
-        r1 = rowptr[i] + off
-        r2 = rowptr[i + 1] + off - 1
-        for p in r1:r2
-            j = Int(colval[p]) + off
-            slot = work[j]
+        c1 = colptr_in[j]; c2 = colptr_in[j + 1] - 1
+        cn = zero(RT)
+        for p in c1:c2
             v = nzval_in[p]
-            rowval[slot] = i
-            nzval[slot] = v
-            work[j] = slot + 1
+            rowval[p] = rowval_in[p]
+            nzval[p] = v
             v2 = abs2(v)
-            col_nrm2[j] += v2
-            fro2 += v2
+            cn += v2
         end
+        col_nrm2[j] = cn
+        fro2 += cn
     end
     return fro2
 end
@@ -1450,7 +1475,8 @@ function _maybe_repivot_zero_cols_from_norms(
     return CSRQRSymbolic(
         sym.m, sym.n, m2_2, q2, pinv2, parent2, leftmost2,
         vnz2, rnz2, rcount2, sym.ordering, sym.pattern_rowptr,
-        sym.pattern_colval, sym.workspace
+        sym.pattern_colval, sym.pattern_colptr, sym.pattern_rowval,
+        sym.workspace
     )
 end
 
@@ -2395,10 +2421,12 @@ end
 # ---------------------------------------------------------------------------
 #
 # Exercise the hot paths (analyze / factor / refactor / solve / rank / size)
-# for the four standard BLAS element types and both index types, on tiny
-# well-conditioned and rank-deficient inputs, so the specialized methods land
-# in the package image. Only the `:natural` ordering is exercised: `:amd`
-# lives in a weak-dep extension and cannot be loaded from here.
+# for the four standard BLAS element types, on tiny well-conditioned and
+# rank-deficient `SparseMatrixCSC` inputs (the native API), so the specialized
+# methods land in the package image. Only the `:natural` ordering is exercised:
+# `:amd` lives in a weak-dep extension and cannot be loaded from here. The
+# `SparseMatrixCSR` overloads live in their own extension, which carries its
+# own workload.
 
 @setup_workload begin
     @compile_workload begin
@@ -2408,7 +2436,7 @@ end
                 rows = Ti[1, 2, 3, 4, 5, 6, 1, 2, 3, 4]
                 cols = Ti[1, 2, 3, 4, 5, 6, 2, 3, 4, 5]
                 vals = T[4, 4, 4, 4, 4, 4, 1, 1, 1, 1]
-                A = sparsecsr(rows, cols, vals, 6, 6)
+                A = sparse(rows, cols, vals, 6, 6)
                 b = ones(T, 6)
 
                 F = csr_qr(A; ordering = :natural)
@@ -2428,7 +2456,7 @@ end
                 drows = Ti[1, 2, 3, 4, 5, 1, 2, 3, 4, 6]
                 dcols = Ti[1, 2, 3, 4, 5, 2, 3, 4, 5, 1]
                 dvals = T[4, 4, 4, 4, 4, 1, 1, 1, 1, 4]
-                Ad = sparsecsr(drows, dcols, dvals, 6, 6)
+                Ad = sparse(drows, dcols, dvals, 6, 6)
                 Fd = csr_qr(Ad; ordering = :natural)
                 Fd \ b
                 rank(Fd)
diff --git a/test/csc_core.jl b/test/csc_core.jl
new file mode 100644
index 0000000..2cdf193
--- /dev/null
+++ b/test/csc_core.jl
@@ -0,0 +1,117 @@
+# CSC-native core tests. Run in a SEPARATE process that does NOT load
+# `SparseMatricesCSR`, so this proves the `SparseMatrixCSC` API works as the
+# native path with the CSR extension absent. (Driven from `runtests.jl`.)
+using Test
+using LinearAlgebra
+using SparseArrays
+using Random
+using SparseColumnPivotedQR
+using AMD  # AMD extension is independent of the CSR extension
+
+@assert Base.get_extension(
+    SparseColumnPivotedQR, :SparseColumnPivotedQRSparseMatricesCSRExt
+) === nothing "SparseMatricesCSR extension must NOT be loaded in the CSC-core test process"
+
+@testset "CSC-native core (no SparseMatricesCSR loaded)" begin
+    for T in (Float64, ComplexF64)
+        cv(M) = T <: Complex ? (T.(M) .+ T(0.3im) .* (M .!= 0)) : T.(M)
+
+        @testset "square full-rank ($T)" begin
+            Random.seed!(1)
+            n = 40
+            base = sprand(Float64, n, n, 0.2) + 5I
+            A = convert(SparseMatrixCSC{T, Int}, cv(Matrix(base)))
+            b = ones(T, n)
+            F = csr_qr(A)
+            x = F \ b
+            @test norm(A * x - b) / norm(b) < 1.0e-9
+            @test rank(F) == n
+        end
+
+        @testset "tall overdetermined least-squares ($T)" begin
+            Random.seed!(2)
+            m, n = 60, 25
+            base = sprand(Float64, m, n, 0.3)
+            base = base + sparse(1:n, 1:n, ones(n), m, n)
+            A = convert(SparseMatrixCSC{T, Int}, cv(Matrix(base)))
+            b = randn(T, m)
+            F = csr_qr(A)
+            x = F \ b
+            # Full-column-rank tall LS: the dense `\` is the unique LS solution.
+            @test x ≈ Matrix(A) \ b rtol = 1.0e-8
+            @test rank(F) == n
+        end
+
+        @testset "wide underdetermined ($T)" begin
+            Random.seed!(3)
+            m, n = 20, 45
+            base = sprand(Float64, m, n, 0.3)
+            base = base + sparse(1:m, 1:m, ones(m), m, n)
+            A = convert(SparseMatrixCSC{T, Int}, cv(Matrix(base)))
+            b = randn(T, m)
+            F = csr_qr(A)
+            x = F \ b
+            @test norm(A * x - b) / norm(b) < 1.0e-8   # consistent system
+            @test rank(F) == m
+        end
+
+        @testset "rank-deficient ($T)" begin
+            Random.seed!(4)
+            m, n = 40, 30
+            base = sprand(Float64, m, n, 0.25)
+            base = base + sparse(1:n, 1:n, ones(n), m, n)
+            M = Matrix(base)
+            M[:, n] = M[:, 1]          # duplicate column -> rank n-1
+            A = convert(SparseMatrixCSC{T, Int}, cv(M))
+            b = randn(T, m)
+            F = csr_qr(A)
+            @test rank(F) == n - 1
+            x = F \ b
+            # Minimum-residual solve: the residual must match the true
+            # least-squares residual (computed via the dense pseudoinverse,
+            # which is well-defined for a rank-deficient overdetermined system).
+            r_csc = norm(A * x - b)
+            r_min = norm(A * (pinv(Matrix(A)) * b) - b)
+            @test r_csc ≈ r_min rtol = 1.0e-6
+        end
+
+        @testset "csr_refactor! reuse path ($T)" begin
+            Random.seed!(5)
+            n = 30
+            sp = sprand(Float64, n, n, 0.2)
+            rows, cols, _ = findnz(sp)
+            mk(v) = convert(
+                SparseMatrixCSC{T, Int},
+                cv(Matrix(sparse(rows, cols, v, n, n) + 4I)),
+            )
+            A1 = mk(randn(length(rows)))
+            A2 = mk(randn(length(rows)))
+            b = randn(T, n)
+            F = csr_qr(A1; ordering = :amd)
+            csr_refactor!(F, A2)
+            x2 = F \ b
+            @test norm(A2 * x2 - b) / norm(b) < 1.0e-9
+            csr_refactor!(F, A1)
+            x1 = F \ b
+            @test norm(A1 * x1 - b) / norm(b) < 1.0e-9
+        end
+
+        @testset "csr_refactor! zero-alloc steady state ($T)" begin
+            Random.seed!(6)
+            n = 30
+            sp = sprand(Float64, n, n, 0.2)
+            rows, cols, _ = findnz(sp)
+            mk(v) = convert(
+                SparseMatrixCSC{T, Int},
+                cv(Matrix(sparse(rows, cols, v, n, n) + 4I)),
+            )
+            A1 = mk(randn(length(rows)))
+            A2 = mk(randn(length(rows)))
+            F = csr_qr(A1; ordering = :amd)
+            csr_refactor!(F, A2)   # warm
+            csr_refactor!(F, A1)
+            @test (@allocated csr_refactor!(F, A2)) == 0
+            @test (@allocated csr_refactor!(F, A1)) == 0
+        end
+    end
+end
diff --git a/test/runtests.jl b/test/runtests.jl
index dec76ce..4cff47c 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -19,8 +19,8 @@ end
 # Steady-state allocation of one `csr_refactor!(adaptive_dense=true)` and one
 # `ldiv!`, measured inside a function barrier so the `@allocated` is not
 # perturbed by boxed captures in the calling (testset) scope.
-function _refactor_alloc(F, Acsr)
-    return @allocated csr_refactor!(F, Acsr; adaptive_dense = true)
+function _refactor_alloc(F, A)
+    return @allocated csr_refactor!(F, A; adaptive_dense = true)
 end
 function _ldiv_alloc(x, F, b)
     return @allocated ldiv!(x, F, b)
@@ -39,17 +39,17 @@ function _dense_zeroalloc_case(::Type{T}, A64, n) where {T}
         SparseMatrixCSC{T, Int},
         T <: Complex ? (T.(A64) .+ T(0.1im) .* (A64 .!= 0)) : T.(A64),
     )
-    Acsr = SparseMatrixCSR(transpose(sparse(transpose(A))))
+    # Native CSC path: the zero-alloc contract applies to `SparseMatrixCSC`.
     b = ones(T, size(A, 1))
     x = zeros(T, n)
-    sym = csr_analyze(Acsr; ordering = :amd)
-    F = csr_factor(Acsr, sym; adaptive_dense = true)
-    csr_refactor!(F, Acsr; adaptive_dense = true); ldiv!(x, F, b)   # warm
-    csr_refactor!(F, Acsr; adaptive_dense = true); ldiv!(x, F, b)
-    ra = _refactor_alloc(F, Acsr)
+    sym = csr_analyze(A; ordering = :amd)
+    F = csr_factor(A, sym; adaptive_dense = true)
+    csr_refactor!(F, A; adaptive_dense = true); ldiv!(x, F, b)   # warm
+    csr_refactor!(F, A; adaptive_dense = true); ldiv!(x, F, b)
+    ra = _refactor_alloc(F, A)
     la = _ldiv_alloc(x, F, b)
-    x1 = zeros(T, n); csr_refactor!(F, Acsr; adaptive_dense = true); ldiv!(x1, F, b)
-    x2 = zeros(T, n); csr_refactor!(F, Acsr; adaptive_dense = true); ldiv!(x2, F, b)
+    x1 = zeros(T, n); csr_refactor!(F, A; adaptive_dense = true); ldiv!(x1, F, b)
+    x2 = zeros(T, n); csr_refactor!(F, A; adaptive_dense = true); ldiv!(x2, F, b)
     return F.k_dense, ra, la, x1 == x2
 end
 
@@ -605,19 +605,22 @@ end
         v1 = randn(length(rows)); v2 = randn(length(rows))
         A1 = sparse(rows, cols, v1, n, n) + 4 * sparse(I, n, n)
         A2 = sparse(rows, cols, v2, n, n) + 4 * sparse(I, n, n)
-        Acsr1 = build_csr(Matrix(A1))
-        Acsr2 = build_csr(Matrix(A2))
         b = randn(n)
 
-        F = csr_qr(Acsr1; ordering = :amd)
+        # The zero-allocation contract is on the native `SparseMatrixCSC` path:
+        # its `colptr`/`rowval`/`nzval` are read straight into the pooled
+        # workspace with no transpose or intermediate allocation. (The CSR
+        # extension necessarily allocates one `SparseMatrixCSC` per call to
+        # convert, so it is not — and is not expected to be — zero-alloc.)
+        F = csr_qr(A1; ordering = :amd)
         # Warm up.
-        csr_refactor!(F, Acsr2)
-        csr_refactor!(F, Acsr1)
+        csr_refactor!(F, A2)
+        csr_refactor!(F, A1)
 
         # Steady-state: refactor with the cached workspace must allocate 0 bytes.
-        nbytes = @allocated csr_refactor!(F, Acsr2)
+        nbytes = @allocated csr_refactor!(F, A2)
         @test nbytes == 0
-        nbytes = @allocated csr_refactor!(F, Acsr1)
+        nbytes = @allocated csr_refactor!(F, A1)
         @test nbytes == 0
 
         # Sanity check the solution.
@@ -1044,3 +1047,20 @@ end
     end
 
 end
+
+# The CSC-native core must work with the `SparseMatricesCSR` extension absent.
+# This process has `using SparseMatricesCSR` loaded (so the extension is
+# active), so run the CSC-only checks in a fresh subprocess that never loads
+# `SparseMatricesCSR`.
+@testset "CSC-native core in a SparseMatricesCSR-free process" begin
+    code = """
+    push!(LOAD_PATH, "@")
+    include($(repr(joinpath(@__DIR__, "csc_core.jl"))))
+    """
+    p = run(
+        ignorestatus(
+            `$(Base.julia_cmd()) --project=$(Base.active_project()) -e $code`
+        ),
+    )
+    @test p.exitcode == 0
+end

From 1fd83baaa3c902ede4c4429c1479f9a6d5c23993 Mon Sep 17 00:00:00 2001
From: ChrisRackauckas-Claude <accounts@chrisrackauckas.com>
Date: Fri, 5 Jun 2026 05:41:43 -0400
Subject: [PATCH 2/3] Drop SparseMatricesCSR entirely; SparseMatrixCSC is the
 sole native API

Remove the optional SparseMatricesCSR extension and all SparseMatricesCSR
references so the package has no dependency on it. The csr_* public names are
kept (they accept SparseMatrixCSC). CSR (SparseMatrixCSR) input support is
carved into a separate, optional PR stacked on this one.

- Delete ext/SparseColumnPivotedQRSparseMatricesCSRExt.jl.
- Remove SparseMatricesCSR from [weakdeps]/[extensions]/[compat]/[extras]/[targets].
- Make the tests CSC-only (helpers now return SparseMatrixCSC); fold the
  CSC-core checks into a plain include (no subprocess isolation needed once
  CSR can never be loaded).
- Scrub CSR mentions from src docstrings, the precompile comment, README, and docs.

Co-Authored-By: Chris Rackauckas <accounts@chrisrackauckas.com>
---
 Project.toml                                  |  6 +-
 README.md                                     | 14 ++--
 docs/Project.toml                             |  6 +-
 docs/src/index.md                             | 17 +++--
 ...arseColumnPivotedQRSparseMatricesCSRExt.jl | 67 -------------------
 src/SparseColumnPivotedQR.jl                  |  9 +--
 test/csc_core.jl                              | 14 ++--
 test/runtests.jl                              | 46 ++++---------
 8 files changed, 40 insertions(+), 139 deletions(-)
 delete mode 100644 ext/SparseColumnPivotedQRSparseMatricesCSRExt.jl

diff --git a/Project.toml b/Project.toml
index 18f9c2f..94c6a18 100644
--- a/Project.toml
+++ b/Project.toml
@@ -10,26 +10,22 @@ SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf"
 
 [weakdeps]
 AMD = "14f7f29c-3bd6-536c-9a0b-7339e30b5a3e"
-SparseMatricesCSR = "a0a7dd2c-ebf4-11e9-1f05-cf50bc540ca1"
 
 [extensions]
 SparseColumnPivotedQRAMDExt = "AMD"
-SparseColumnPivotedQRSparseMatricesCSRExt = "SparseMatricesCSR"
 
 [compat]
 AMD = "0.5"
 LinearAlgebra = "1"
 PrecompileTools = "1"
 SparseArrays = "1"
-SparseMatricesCSR = "0.6"
 julia = "1.10"
 
 [extras]
 AMD = "14f7f29c-3bd6-536c-9a0b-7339e30b5a3e"
 ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210"
 Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
-SparseMatricesCSR = "a0a7dd2c-ebf4-11e9-1f05-cf50bc540ca1"
 Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
 
 [targets]
-test = ["AMD", "ForwardDiff", "Random", "SparseMatricesCSR", "Test"]
+test = ["AMD", "ForwardDiff", "Random", "Test"]
diff --git a/README.md b/README.md
index 10b22af..203f3cc 100644
--- a/README.md
+++ b/README.md
@@ -11,7 +11,7 @@
 
 A pure-Julia, rank-revealing, column-pivoted Householder QR factorization that
 operates directly on
-[`SparseMatrixCSR`](https://github.com/JuliaSmoothOptimizers/SparseMatricesCSR.jl)
+[`SparseMatrixCSC`](https://docs.julialang.org/en/v1/stdlib/SparseArrays/)
 sparse matrices. Targets the same "small-to-medium sparse" niche as KLU does for LU
 — low symbolic-phase overhead, no BLAS-3 / multifrontal machinery — while preserving
 the rank-revealing guarantees of LAPACK's column-pivoted QR.
@@ -29,13 +29,13 @@ version of the documentation, which contains the unreleased features.
 using Pkg
 Pkg.add("SparseColumnPivotedQR")
 
-using SparseArrays, SparseMatricesCSR, SparseColumnPivotedQR
+using SparseArrays, SparseColumnPivotedQR
 
-A = SparseMatrixCSR(sparse([1.0  0   2   0   0;
-                            0    3   0   0   1;
-                            4    0   5   0   0;
-                            0    0   0   6   0;
-                            0    7   0   0   8]))
+A = sparse([1.0  0   2   0   0;
+            0    3   0   0   1;
+            4    0   5   0   0;
+            0    0   0   6   0;
+            0    7   0   0   8])
 b = [1.0, 2.0, 3.0, 4.0, 5.0]
 
 F = csr_qr(A)
diff --git a/docs/Project.toml b/docs/Project.toml
index 3414357..dc79fc5 100644
--- a/docs/Project.toml
+++ b/docs/Project.toml
@@ -1,11 +1,11 @@
 [deps]
+AMD = "14f7f29c-3bd6-536c-9a0b-7339e30b5a3e"
 Documenter = "e30172f5-a6a5-5a46-863b-614d45cd2de4"
 SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf"
 SparseColumnPivotedQR = "a57abbd0-fea5-4d57-96be-5e525945e8e4"
-SparseMatricesCSR = "a0a7dd2c-ebf4-11e9-1f05-cf50bc540ca1"
 
 [compat]
+AMD = "0.5"
 Documenter = "1"
 SparseArrays = "1"
-SparseColumnPivotedQR = "0.1"
-SparseMatricesCSR = "0.6"
+SparseColumnPivotedQR = "0.2"
diff --git a/docs/src/index.md b/docs/src/index.md
index bdca4f7..ebefc5e 100644
--- a/docs/src/index.md
+++ b/docs/src/index.md
@@ -4,7 +4,7 @@ SparseColumnPivotedQR.jl is a component of the
 [SciML](https://sciml.ai/) ecosystem providing a pure-Julia,
 rank-revealing, column-pivoted Householder QR factorization that operates
 directly on
-[`SparseMatrixCSR`](https://github.com/gridap/SparseMatricesCSR.jl)
+[`SparseMatrixCSC`](https://docs.julialang.org/en/v1/stdlib/SparseArrays/)
 sparse matrices.
 
 The package targets the same "small-to-medium sparse" niche as KLU does for
@@ -23,16 +23,15 @@ Pkg.add("SparseColumnPivotedQR")
 ## Quick start
 
 ```julia
-using SparseArrays, SparseMatricesCSR, SparseColumnPivotedQR
+using SparseArrays, SparseColumnPivotedQR
 using AMD  # enables the recommended AMD column ordering
 
 # A 5×5 sparse matrix and a right-hand side.
-A_csc = sparse([1.0  0   2   0   0;
-                0    3   0   0   1;
-                4    0   5   0   0;
-                0    0   0   6   0;
-                0    7   0   0   8])
-A = SparseMatrixCSR(A_csc)
+A = sparse([1.0  0   2   0   0;
+            0    3   0   0   1;
+            4    0   5   0   0;
+            0    0   0   6   0;
+            0    7   0   0   8])
 b = [1.0, 2.0, 3.0, 4.0, 5.0]
 
 # One-shot factor + solve.
@@ -93,7 +92,7 @@ least-squares solution whose residual matches the SVD pseudoinverse minimum
 to floating-point precision.
 
 ```julia
-A_singular = SparseMatrixCSR(sparse([1.0 2.0; 0.5 1.0; 2.0 4.0]))  # rank 1
+A_singular = sparse([1.0 2.0; 0.5 1.0; 2.0 4.0])  # rank 1
 b = [1.0, 1.0, 1.0]
 F = csr_qr(A_singular)
 rank(F)              # → 1
diff --git a/ext/SparseColumnPivotedQRSparseMatricesCSRExt.jl b/ext/SparseColumnPivotedQRSparseMatricesCSRExt.jl
deleted file mode 100644
index 6e643bd..0000000
--- a/ext/SparseColumnPivotedQRSparseMatricesCSRExt.jl
+++ /dev/null
@@ -1,67 +0,0 @@
-module SparseColumnPivotedQRSparseMatricesCSRExt
-
-using SparseColumnPivotedQR
-using LinearAlgebra
-using SparseArrays
-using SparseMatricesCSR
-using PrecompileTools
-
-import SparseColumnPivotedQR: csr_qr, csr_analyze, csr_factor, csr_refactor!,
-    CSRQRSymbolic, CSRQRFactorization
-
-# Convert a `SparseMatrixCSR` to the `SparseMatrixCSC` the core operates on.
-# This is the same CSR -> CSC conversion the kernel performed internally before
-# the CSC-native refactor; it now lives here so the core never depends on
-# `SparseMatricesCSR`.
-@inline _to_csc(A::SparseMatrixCSR) = SparseMatrixCSC(A)
-
-function csr_analyze(A::SparseMatrixCSR; ordering::Symbol = :default)
-    return csr_analyze(_to_csc(A); ordering = ordering)
-end
-
-function csr_factor(A::SparseMatrixCSR, sym::CSRQRSymbolic; kwargs...)
-    return csr_factor(_to_csc(A), sym; kwargs...)
-end
-
-function csr_qr(A::SparseMatrixCSR; kwargs...)
-    return csr_qr(_to_csc(A); kwargs...)
-end
-
-function csr_refactor!(F::CSRQRFactorization, A::SparseMatrixCSR; kwargs...)
-    return csr_refactor!(F, _to_csc(A); kwargs...)
-end
-
-# Keep the CSR entry points specialized in the package image. Mirrors the
-# core workload, but on `SparseMatrixCSR` inputs through the conversion path.
-@setup_workload begin
-    @compile_workload begin
-        for T in (Float64, Float32, ComplexF64, ComplexF32)
-            for Ti in (Int32, Int64)
-                rows = Ti[1, 2, 3, 4, 5, 6, 1, 2, 3, 4]
-                cols = Ti[1, 2, 3, 4, 5, 6, 2, 3, 4, 5]
-                vals = T[4, 4, 4, 4, 4, 4, 1, 1, 1, 1]
-                A = sparsecsr(rows, cols, vals, 6, 6)
-                b = ones(T, 6)
-
-                F = csr_qr(A; ordering = :natural)
-                F \ b
-                rank(F)
-
-                sym = csr_analyze(A; ordering = :natural)
-                G = csr_factor(A, sym)
-                csr_refactor!(G, A)
-                G \ b
-
-                drows = Ti[1, 2, 3, 4, 5, 1, 2, 3, 4, 6]
-                dcols = Ti[1, 2, 3, 4, 5, 2, 3, 4, 5, 1]
-                dvals = T[4, 4, 4, 4, 4, 1, 1, 1, 1, 4]
-                Ad = sparsecsr(drows, dcols, dvals, 6, 6)
-                Fd = csr_qr(Ad; ordering = :natural)
-                Fd \ b
-                rank(Fd)
-            end
-        end
-    end
-end
-
-end # module
diff --git a/src/SparseColumnPivotedQR.jl b/src/SparseColumnPivotedQR.jl
index 2b2f5f7..246f6a8 100644
--- a/src/SparseColumnPivotedQR.jl
+++ b/src/SparseColumnPivotedQR.jl
@@ -899,10 +899,9 @@ factorization. Computes the column permutation `q`, row permutation `pinv`,
 column elimination tree, per-row `leftmost`, and the upper-bound sizes of
 the V (Householder) and R buffers.
 
-The native input is a `SparseMatrixCSC` (the SparseArrays stdlib format); its
+The input is a `SparseMatrixCSC` (the SparseArrays stdlib format); its
 `colptr`/`rowval`/`nzval` are read directly with no transpose or intermediate
-allocation. A `SparseMatrixCSR` (from `SparseMatricesCSR.jl`) is also accepted
-when that package is loaded, via an extension that converts to CSC.
+allocation.
 
 `ordering` selects the column ordering:
   * `:default`  — `:amd` if the AMD.jl extension is loaded (`using AMD`),
@@ -2424,9 +2423,7 @@ end
 # for the four standard BLAS element types, on tiny well-conditioned and
 # rank-deficient `SparseMatrixCSC` inputs (the native API), so the specialized
 # methods land in the package image. Only the `:natural` ordering is exercised:
-# `:amd` lives in a weak-dep extension and cannot be loaded from here. The
-# `SparseMatrixCSR` overloads live in their own extension, which carries its
-# own workload.
+# `:amd` lives in a weak-dep extension and cannot be loaded from here.
 
 @setup_workload begin
     @compile_workload begin
diff --git a/test/csc_core.jl b/test/csc_core.jl
index 2cdf193..72ee679 100644
--- a/test/csc_core.jl
+++ b/test/csc_core.jl
@@ -1,18 +1,14 @@
-# CSC-native core tests. Run in a SEPARATE process that does NOT load
-# `SparseMatricesCSR`, so this proves the `SparseMatrixCSC` API works as the
-# native path with the CSR extension absent. (Driven from `runtests.jl`.)
+# CSC-native core tests: the `SparseMatrixCSC` API is the native path, read
+# directly with no transpose or intermediate allocation. (Driven from
+# `runtests.jl`.)
 using Test
 using LinearAlgebra
 using SparseArrays
 using Random
 using SparseColumnPivotedQR
-using AMD  # AMD extension is independent of the CSR extension
+using AMD
 
-@assert Base.get_extension(
-    SparseColumnPivotedQR, :SparseColumnPivotedQRSparseMatricesCSRExt
-) === nothing "SparseMatricesCSR extension must NOT be loaded in the CSC-core test process"
-
-@testset "CSC-native core (no SparseMatricesCSR loaded)" begin
+@testset "CSC-native core" begin
     for T in (Float64, ComplexF64)
         cv(M) = T <: Complex ? (T.(M) .+ T(0.3im) .* (M .!= 0)) : T.(M)
 
diff --git a/test/runtests.jl b/test/runtests.jl
index 4cff47c..43b7d14 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -1,20 +1,15 @@
 using Test
 using LinearAlgebra
 using SparseArrays
-using SparseMatricesCSR
 using Random
 using SparseColumnPivotedQR
 using AMD  # trigger AMD extension
 using ForwardDiff
 
-# helper: convert CSC -> CSR
-to_csr(A::SparseMatrixCSC) = SparseMatrixCSR(transpose(sparse(transpose(A))))
-
-# Slightly nicer helper: build a CSR from a Julia matrix
-function build_csr(A::AbstractMatrix{T}) where {T}
-    Acsc = sparse(A)
-    return SparseMatrixCSR(transpose(sparse(transpose(Acsc))))
-end
+# The native API is `SparseMatrixCSC`; these helpers just normalize an input to
+# a CSC matrix.
+to_csr(A::SparseMatrixCSC) = A
+build_csr(A::AbstractMatrix) = sparse(A)
 
 # Steady-state allocation of one `csr_refactor!(adaptive_dense=true)` and one
 # `ldiv!`, measured inside a function barrier so the `@allocated` is not
@@ -285,7 +280,7 @@ end
             lines = split(text, '\n'; keepempty = false)
             A = eval(Meta.parse(strip(lines[1])))
             b = eval(Meta.parse(strip(lines[2])))
-            Acsr = SparseMatrixCSR(transpose(sparse(transpose(A))))
+            Acsr = sparse(A)
             F = csr_qr(Acsr)
             Fspqr = qr(A)
             xspqr = Fspqr \ b
@@ -457,7 +452,7 @@ end
             text = read(f, String)
             lines = split(text, '\n'; keepempty = false)
             A = eval(Meta.parse(strip(lines[1])))
-            Acsr = SparseMatrixCSR(transpose(sparse(transpose(A))))
+            Acsr = sparse(A)
             sym_default = csr_analyze(Acsr)
             sym_amd = csr_analyze(Acsr; ordering = :amd)
             # :default == :amd here.
@@ -545,7 +540,7 @@ end
             lines = split(text, '\n'; keepempty = false)
             A = eval(Meta.parse(strip(lines[1])))
             b = eval(Meta.parse(strip(lines[2])))
-            Acsr = SparseMatrixCSR(transpose(sparse(transpose(A))))
+            Acsr = sparse(A)
             sym_adapt = csr_analyze(Acsr; ordering = :adaptive)
             @test sym_adapt.ordering === :amd
             F = csr_factor(Acsr, sym_adapt)
@@ -609,9 +604,7 @@ end
 
         # The zero-allocation contract is on the native `SparseMatrixCSC` path:
         # its `colptr`/`rowval`/`nzval` are read straight into the pooled
-        # workspace with no transpose or intermediate allocation. (The CSR
-        # extension necessarily allocates one `SparseMatrixCSC` per call to
-        # convert, so it is not — and is not expected to be — zero-alloc.)
+        # workspace with no transpose or intermediate allocation.
         F = csr_qr(A1; ordering = :amd)
         # Warm up.
         csr_refactor!(F, A2)
@@ -660,7 +653,7 @@ end
             A = eval(Meta.parse(strip(lines[1])))
             b = eval(Meta.parse(strip(lines[2])))
             all(isfinite, b) || continue
-            Acsr = SparseMatrixCSR(transpose(sparse(transpose(A))))
+            Acsr = sparse(A)
             x_sparse = csr_qr(Acsr; ordering = :amd) \ b
             Fd = csr_qr(Acsr; ordering = :amd, adaptive_dense = true)
             x_dense = Fd \ b
@@ -915,7 +908,7 @@ end
                 lines = split(text, '\n'; keepempty = false)
                 A = eval(Meta.parse(strip(lines[1])))
                 b = eval(Meta.parse(strip(lines[2])))
-                Acsr = SparseMatrixCSR(transpose(sparse(transpose(A))))
+                Acsr = sparse(A)
                 F = csr_qr(Acsr; adaptive_dense = true)
                 Fspqr = qr(A)
                 xspqr = Fspqr \ b
@@ -1048,19 +1041,6 @@ end
 
 end
 
-# The CSC-native core must work with the `SparseMatricesCSR` extension absent.
-# This process has `using SparseMatricesCSR` loaded (so the extension is
-# active), so run the CSC-only checks in a fresh subprocess that never loads
-# `SparseMatricesCSR`.
-@testset "CSC-native core in a SparseMatricesCSR-free process" begin
-    code = """
-    push!(LOAD_PATH, "@")
-    include($(repr(joinpath(@__DIR__, "csc_core.jl"))))
-    """
-    p = run(
-        ignorestatus(
-            `$(Base.julia_cmd()) --project=$(Base.active_project()) -e $code`
-        ),
-    )
-    @test p.exitcode == 0
-end
+# Dedicated CSC-native core checks (the `SparseMatrixCSC` API is the native,
+# allocation-free path).
+include(joinpath(@__DIR__, "csc_core.jl"))

From 9f1f800b5a40045b222ca82dc04f1d4525911dc6 Mon Sep 17 00:00:00 2001
From: ChrisRackauckas-Claude <accounts@chrisrackauckas.com>
Date: Fri, 5 Jun 2026 05:46:03 -0400
Subject: [PATCH 3/3] Re-add optional SparseMatrixCSR support via
 SparseMatricesCSR extension
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Stacked on the CSC-native core (#35). Re-adds SparseMatrixCSR input support
behind the optional SparseMatricesCSR weak-dep extension, which converts CSR
to CSC and dispatches into the unchanged CSC kernel. The package's core stays
pure CSC + SparseArrays; CSR is only pulled in when `using SparseMatricesCSR`.

- Restore ext/SparseColumnPivotedQRSparseMatricesCSRExt.jl (csr_qr/csr_analyze/
  csr_factor/csr_refactor! on SparseMatrixCSR, plus its own precompile workload).
- Restore SparseMatricesCSR in [weakdeps]/[extensions]/[compat]/[extras]/[targets].
- Restore the CSR tests (including the subprocess check that the CSC core works
  with the CSR extension absent) and the CSR docstring/precompile notes, README,
  and docs.

Note: the current CSR path reconverts CSR -> CSC on every csr_refactor! call
(~17 KB/call, ~1.8x slower; see the #35 benchmark comment). A future revision
should cache a CSC buffer and reuse it, and ideally add a zero-copy CSR fast
path (SparseMatrixCSR(A) == SparseMatrixCSC(Aᵀ), so the CSR arrays can be
reinterpreted as the transpose's CSC with no copy). Not implemented here.

Co-Authored-By: Chris Rackauckas <accounts@chrisrackauckas.com>
---
 Project.toml                                  |  6 +-
 README.md                                     | 14 ++--
 docs/Project.toml                             |  6 +-
 docs/src/index.md                             | 17 ++---
 ...arseColumnPivotedQRSparseMatricesCSRExt.jl | 67 +++++++++++++++++++
 src/SparseColumnPivotedQR.jl                  |  9 ++-
 test/csc_core.jl                              | 14 ++--
 test/runtests.jl                              | 46 +++++++++----
 8 files changed, 139 insertions(+), 40 deletions(-)
 create mode 100644 ext/SparseColumnPivotedQRSparseMatricesCSRExt.jl

diff --git a/Project.toml b/Project.toml
index 94c6a18..18f9c2f 100644
--- a/Project.toml
+++ b/Project.toml
@@ -10,22 +10,26 @@ SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf"
 
 [weakdeps]
 AMD = "14f7f29c-3bd6-536c-9a0b-7339e30b5a3e"
+SparseMatricesCSR = "a0a7dd2c-ebf4-11e9-1f05-cf50bc540ca1"
 
 [extensions]
 SparseColumnPivotedQRAMDExt = "AMD"
+SparseColumnPivotedQRSparseMatricesCSRExt = "SparseMatricesCSR"
 
 [compat]
 AMD = "0.5"
 LinearAlgebra = "1"
 PrecompileTools = "1"
 SparseArrays = "1"
+SparseMatricesCSR = "0.6"
 julia = "1.10"
 
 [extras]
 AMD = "14f7f29c-3bd6-536c-9a0b-7339e30b5a3e"
 ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210"
 Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
+SparseMatricesCSR = "a0a7dd2c-ebf4-11e9-1f05-cf50bc540ca1"
 Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
 
 [targets]
-test = ["AMD", "ForwardDiff", "Random", "Test"]
+test = ["AMD", "ForwardDiff", "Random", "SparseMatricesCSR", "Test"]
diff --git a/README.md b/README.md
index 203f3cc..10b22af 100644
--- a/README.md
+++ b/README.md
@@ -11,7 +11,7 @@
 
 A pure-Julia, rank-revealing, column-pivoted Householder QR factorization that
 operates directly on
-[`SparseMatrixCSC`](https://docs.julialang.org/en/v1/stdlib/SparseArrays/)
+[`SparseMatrixCSR`](https://github.com/JuliaSmoothOptimizers/SparseMatricesCSR.jl)
 sparse matrices. Targets the same "small-to-medium sparse" niche as KLU does for LU
 — low symbolic-phase overhead, no BLAS-3 / multifrontal machinery — while preserving
 the rank-revealing guarantees of LAPACK's column-pivoted QR.
@@ -29,13 +29,13 @@ version of the documentation, which contains the unreleased features.
 using Pkg
 Pkg.add("SparseColumnPivotedQR")
 
-using SparseArrays, SparseColumnPivotedQR
+using SparseArrays, SparseMatricesCSR, SparseColumnPivotedQR
 
-A = sparse([1.0  0   2   0   0;
-            0    3   0   0   1;
-            4    0   5   0   0;
-            0    0   0   6   0;
-            0    7   0   0   8])
+A = SparseMatrixCSR(sparse([1.0  0   2   0   0;
+                            0    3   0   0   1;
+                            4    0   5   0   0;
+                            0    0   0   6   0;
+                            0    7   0   0   8]))
 b = [1.0, 2.0, 3.0, 4.0, 5.0]
 
 F = csr_qr(A)
diff --git a/docs/Project.toml b/docs/Project.toml
index dc79fc5..3414357 100644
--- a/docs/Project.toml
+++ b/docs/Project.toml
@@ -1,11 +1,11 @@
 [deps]
-AMD = "14f7f29c-3bd6-536c-9a0b-7339e30b5a3e"
 Documenter = "e30172f5-a6a5-5a46-863b-614d45cd2de4"
 SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf"
 SparseColumnPivotedQR = "a57abbd0-fea5-4d57-96be-5e525945e8e4"
+SparseMatricesCSR = "a0a7dd2c-ebf4-11e9-1f05-cf50bc540ca1"
 
 [compat]
-AMD = "0.5"
 Documenter = "1"
 SparseArrays = "1"
-SparseColumnPivotedQR = "0.2"
+SparseColumnPivotedQR = "0.1"
+SparseMatricesCSR = "0.6"
diff --git a/docs/src/index.md b/docs/src/index.md
index ebefc5e..bdca4f7 100644
--- a/docs/src/index.md
+++ b/docs/src/index.md
@@ -4,7 +4,7 @@ SparseColumnPivotedQR.jl is a component of the
 [SciML](https://sciml.ai/) ecosystem providing a pure-Julia,
 rank-revealing, column-pivoted Householder QR factorization that operates
 directly on
-[`SparseMatrixCSC`](https://docs.julialang.org/en/v1/stdlib/SparseArrays/)
+[`SparseMatrixCSR`](https://github.com/gridap/SparseMatricesCSR.jl)
 sparse matrices.
 
 The package targets the same "small-to-medium sparse" niche as KLU does for
@@ -23,15 +23,16 @@ Pkg.add("SparseColumnPivotedQR")
 ## Quick start
 
 ```julia
-using SparseArrays, SparseColumnPivotedQR
+using SparseArrays, SparseMatricesCSR, SparseColumnPivotedQR
 using AMD  # enables the recommended AMD column ordering
 
 # A 5×5 sparse matrix and a right-hand side.
-A = sparse([1.0  0   2   0   0;
-            0    3   0   0   1;
-            4    0   5   0   0;
-            0    0   0   6   0;
-            0    7   0   0   8])
+A_csc = sparse([1.0  0   2   0   0;
+                0    3   0   0   1;
+                4    0   5   0   0;
+                0    0   0   6   0;
+                0    7   0   0   8])
+A = SparseMatrixCSR(A_csc)
 b = [1.0, 2.0, 3.0, 4.0, 5.0]
 
 # One-shot factor + solve.
@@ -92,7 +93,7 @@ least-squares solution whose residual matches the SVD pseudoinverse minimum
 to floating-point precision.
 
 ```julia
-A_singular = sparse([1.0 2.0; 0.5 1.0; 2.0 4.0])  # rank 1
+A_singular = SparseMatrixCSR(sparse([1.0 2.0; 0.5 1.0; 2.0 4.0]))  # rank 1
 b = [1.0, 1.0, 1.0]
 F = csr_qr(A_singular)
 rank(F)              # → 1
diff --git a/ext/SparseColumnPivotedQRSparseMatricesCSRExt.jl b/ext/SparseColumnPivotedQRSparseMatricesCSRExt.jl
new file mode 100644
index 0000000..6e643bd
--- /dev/null
+++ b/ext/SparseColumnPivotedQRSparseMatricesCSRExt.jl
@@ -0,0 +1,67 @@
+module SparseColumnPivotedQRSparseMatricesCSRExt
+
+using SparseColumnPivotedQR
+using LinearAlgebra
+using SparseArrays
+using SparseMatricesCSR
+using PrecompileTools
+
+import SparseColumnPivotedQR: csr_qr, csr_analyze, csr_factor, csr_refactor!,
+    CSRQRSymbolic, CSRQRFactorization
+
+# Convert a `SparseMatrixCSR` to the `SparseMatrixCSC` the core operates on.
+# This is the same CSR -> CSC conversion the kernel performed internally before
+# the CSC-native refactor; it now lives here so the core never depends on
+# `SparseMatricesCSR`.
+@inline _to_csc(A::SparseMatrixCSR) = SparseMatrixCSC(A)
+
+function csr_analyze(A::SparseMatrixCSR; ordering::Symbol = :default)
+    return csr_analyze(_to_csc(A); ordering = ordering)
+end
+
+function csr_factor(A::SparseMatrixCSR, sym::CSRQRSymbolic; kwargs...)
+    return csr_factor(_to_csc(A), sym; kwargs...)
+end
+
+function csr_qr(A::SparseMatrixCSR; kwargs...)
+    return csr_qr(_to_csc(A); kwargs...)
+end
+
+function csr_refactor!(F::CSRQRFactorization, A::SparseMatrixCSR; kwargs...)
+    return csr_refactor!(F, _to_csc(A); kwargs...)
+end
+
+# Keep the CSR entry points specialized in the package image. Mirrors the
+# core workload, but on `SparseMatrixCSR` inputs through the conversion path.
+@setup_workload begin
+    @compile_workload begin
+        for T in (Float64, Float32, ComplexF64, ComplexF32)
+            for Ti in (Int32, Int64)
+                rows = Ti[1, 2, 3, 4, 5, 6, 1, 2, 3, 4]
+                cols = Ti[1, 2, 3, 4, 5, 6, 2, 3, 4, 5]
+                vals = T[4, 4, 4, 4, 4, 4, 1, 1, 1, 1]
+                A = sparsecsr(rows, cols, vals, 6, 6)
+                b = ones(T, 6)
+
+                F = csr_qr(A; ordering = :natural)
+                F \ b
+                rank(F)
+
+                sym = csr_analyze(A; ordering = :natural)
+                G = csr_factor(A, sym)
+                csr_refactor!(G, A)
+                G \ b
+
+                drows = Ti[1, 2, 3, 4, 5, 1, 2, 3, 4, 6]
+                dcols = Ti[1, 2, 3, 4, 5, 2, 3, 4, 5, 1]
+                dvals = T[4, 4, 4, 4, 4, 1, 1, 1, 1, 4]
+                Ad = sparsecsr(drows, dcols, dvals, 6, 6)
+                Fd = csr_qr(Ad; ordering = :natural)
+                Fd \ b
+                rank(Fd)
+            end
+        end
+    end
+end
+
+end # module
diff --git a/src/SparseColumnPivotedQR.jl b/src/SparseColumnPivotedQR.jl
index 246f6a8..2b2f5f7 100644
--- a/src/SparseColumnPivotedQR.jl
+++ b/src/SparseColumnPivotedQR.jl
@@ -899,9 +899,10 @@ factorization. Computes the column permutation `q`, row permutation `pinv`,
 column elimination tree, per-row `leftmost`, and the upper-bound sizes of
 the V (Householder) and R buffers.
 
-The input is a `SparseMatrixCSC` (the SparseArrays stdlib format); its
+The native input is a `SparseMatrixCSC` (the SparseArrays stdlib format); its
 `colptr`/`rowval`/`nzval` are read directly with no transpose or intermediate
-allocation.
+allocation. A `SparseMatrixCSR` (from `SparseMatricesCSR.jl`) is also accepted
+when that package is loaded, via an extension that converts to CSC.
 
 `ordering` selects the column ordering:
   * `:default`  — `:amd` if the AMD.jl extension is loaded (`using AMD`),
@@ -2423,7 +2424,9 @@ end
 # for the four standard BLAS element types, on tiny well-conditioned and
 # rank-deficient `SparseMatrixCSC` inputs (the native API), so the specialized
 # methods land in the package image. Only the `:natural` ordering is exercised:
-# `:amd` lives in a weak-dep extension and cannot be loaded from here.
+# `:amd` lives in a weak-dep extension and cannot be loaded from here. The
+# `SparseMatrixCSR` overloads live in their own extension, which carries its
+# own workload.
 
 @setup_workload begin
     @compile_workload begin
diff --git a/test/csc_core.jl b/test/csc_core.jl
index 72ee679..2cdf193 100644
--- a/test/csc_core.jl
+++ b/test/csc_core.jl
@@ -1,14 +1,18 @@
-# CSC-native core tests: the `SparseMatrixCSC` API is the native path, read
-# directly with no transpose or intermediate allocation. (Driven from
-# `runtests.jl`.)
+# CSC-native core tests. Run in a SEPARATE process that does NOT load
+# `SparseMatricesCSR`, so this proves the `SparseMatrixCSC` API works as the
+# native path with the CSR extension absent. (Driven from `runtests.jl`.)
 using Test
 using LinearAlgebra
 using SparseArrays
 using Random
 using SparseColumnPivotedQR
-using AMD
+using AMD  # AMD extension is independent of the CSR extension
 
-@testset "CSC-native core" begin
+@assert Base.get_extension(
+    SparseColumnPivotedQR, :SparseColumnPivotedQRSparseMatricesCSRExt
+) === nothing "SparseMatricesCSR extension must NOT be loaded in the CSC-core test process"
+
+@testset "CSC-native core (no SparseMatricesCSR loaded)" begin
     for T in (Float64, ComplexF64)
         cv(M) = T <: Complex ? (T.(M) .+ T(0.3im) .* (M .!= 0)) : T.(M)
 
diff --git a/test/runtests.jl b/test/runtests.jl
index 43b7d14..4cff47c 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -1,15 +1,20 @@
 using Test
 using LinearAlgebra
 using SparseArrays
+using SparseMatricesCSR
 using Random
 using SparseColumnPivotedQR
 using AMD  # trigger AMD extension
 using ForwardDiff
 
-# The native API is `SparseMatrixCSC`; these helpers just normalize an input to
-# a CSC matrix.
-to_csr(A::SparseMatrixCSC) = A
-build_csr(A::AbstractMatrix) = sparse(A)
+# helper: convert CSC -> CSR
+to_csr(A::SparseMatrixCSC) = SparseMatrixCSR(transpose(sparse(transpose(A))))
+
+# Slightly nicer helper: build a CSR from a Julia matrix
+function build_csr(A::AbstractMatrix{T}) where {T}
+    Acsc = sparse(A)
+    return SparseMatrixCSR(transpose(sparse(transpose(Acsc))))
+end
 
 # Steady-state allocation of one `csr_refactor!(adaptive_dense=true)` and one
 # `ldiv!`, measured inside a function barrier so the `@allocated` is not
@@ -280,7 +285,7 @@ end
             lines = split(text, '\n'; keepempty = false)
             A = eval(Meta.parse(strip(lines[1])))
             b = eval(Meta.parse(strip(lines[2])))
-            Acsr = sparse(A)
+            Acsr = SparseMatrixCSR(transpose(sparse(transpose(A))))
             F = csr_qr(Acsr)
             Fspqr = qr(A)
             xspqr = Fspqr \ b
@@ -452,7 +457,7 @@ end
             text = read(f, String)
             lines = split(text, '\n'; keepempty = false)
             A = eval(Meta.parse(strip(lines[1])))
-            Acsr = sparse(A)
+            Acsr = SparseMatrixCSR(transpose(sparse(transpose(A))))
             sym_default = csr_analyze(Acsr)
             sym_amd = csr_analyze(Acsr; ordering = :amd)
             # :default == :amd here.
@@ -540,7 +545,7 @@ end
             lines = split(text, '\n'; keepempty = false)
             A = eval(Meta.parse(strip(lines[1])))
             b = eval(Meta.parse(strip(lines[2])))
-            Acsr = sparse(A)
+            Acsr = SparseMatrixCSR(transpose(sparse(transpose(A))))
             sym_adapt = csr_analyze(Acsr; ordering = :adaptive)
             @test sym_adapt.ordering === :amd
             F = csr_factor(Acsr, sym_adapt)
@@ -604,7 +609,9 @@ end
 
         # The zero-allocation contract is on the native `SparseMatrixCSC` path:
         # its `colptr`/`rowval`/`nzval` are read straight into the pooled
-        # workspace with no transpose or intermediate allocation.
+        # workspace with no transpose or intermediate allocation. (The CSR
+        # extension necessarily allocates one `SparseMatrixCSC` per call to
+        # convert, so it is not — and is not expected to be — zero-alloc.)
         F = csr_qr(A1; ordering = :amd)
         # Warm up.
         csr_refactor!(F, A2)
@@ -653,7 +660,7 @@ end
             A = eval(Meta.parse(strip(lines[1])))
             b = eval(Meta.parse(strip(lines[2])))
             all(isfinite, b) || continue
-            Acsr = sparse(A)
+            Acsr = SparseMatrixCSR(transpose(sparse(transpose(A))))
             x_sparse = csr_qr(Acsr; ordering = :amd) \ b
             Fd = csr_qr(Acsr; ordering = :amd, adaptive_dense = true)
             x_dense = Fd \ b
@@ -908,7 +915,7 @@ end
                 lines = split(text, '\n'; keepempty = false)
                 A = eval(Meta.parse(strip(lines[1])))
                 b = eval(Meta.parse(strip(lines[2])))
-                Acsr = sparse(A)
+                Acsr = SparseMatrixCSR(transpose(sparse(transpose(A))))
                 F = csr_qr(Acsr; adaptive_dense = true)
                 Fspqr = qr(A)
                 xspqr = Fspqr \ b
@@ -1041,6 +1048,19 @@ end
 
 end
 
-# Dedicated CSC-native core checks (the `SparseMatrixCSC` API is the native,
-# allocation-free path).
-include(joinpath(@__DIR__, "csc_core.jl"))
+# The CSC-native core must work with the `SparseMatricesCSR` extension absent.
+# This process has `using SparseMatricesCSR` loaded (so the extension is
+# active), so run the CSC-only checks in a fresh subprocess that never loads
+# `SparseMatricesCSR`.
+@testset "CSC-native core in a SparseMatricesCSR-free process" begin
+    code = """
+    push!(LOAD_PATH, "@")
+    include($(repr(joinpath(@__DIR__, "csc_core.jl"))))
+    """
+    p = run(
+        ignorestatus(
+            `$(Base.julia_cmd()) --project=$(Base.active_project()) -e $code`
+        ),
+    )
+    @test p.exitcode == 0
+end