From 3a0e4c52063824b54f6bad16a4480fa8b9caf664 Mon Sep 17 00:00:00 2001
From: Ben Corbett <corbett5@llnl.gov>
Date: Sat, 25 Apr 2026 23:45:11 -0600
Subject: [PATCH] Refactored large-graph-mpo.jl

---
 src/ITensorMPOConstruction.jl                 |   4 +-
 ...graph-mpo.jl => large-graph-mpo-common.jl} | 374 ------------------
 src/large-graph-mpo-qr.jl                     | 211 ++++++++++
 src/large-graph-mpo-vertex-cover.jl           | 161 ++++++++
 4 files changed, 375 insertions(+), 375 deletions(-)
 rename src/{large-graph-mpo.jl => large-graph-mpo-common.jl} (57%)
 create mode 100644 src/large-graph-mpo-qr.jl
 create mode 100644 src/large-graph-mpo-vertex-cover.jl

diff --git a/src/ITensorMPOConstruction.jl b/src/ITensorMPOConstruction.jl
index 0fdc75b..9f4d46f 100644
--- a/src/ITensorMPOConstruction.jl
+++ b/src/ITensorMPOConstruction.jl
@@ -21,7 +21,9 @@ include("ops.jl")
 include("BipartiteGraph.jl")
 include("connected-components.jl")
 include("minimum-vertex-cover.jl")
-include("large-graph-mpo.jl")
+include("large-graph-mpo-common.jl")
+include("large-graph-mpo-qr.jl")
+include("large-graph-mpo-vertex-cover.jl")
 include("sparse_tensor_construction.jl")
 include("MPOConstruction.jl")
 
diff --git a/src/large-graph-mpo.jl b/src/large-graph-mpo-common.jl
similarity index 57%
rename from src/large-graph-mpo.jl
rename to src/large-graph-mpo-common.jl
index 8fb4970..aec8010 100644
--- a/src/large-graph-mpo.jl
+++ b/src/large-graph-mpo-common.jl
@@ -1,5 +1,3 @@
-# TODO: Split into multiple files
-
 """
     BlockSparseMatrix{C}
 
@@ -270,88 +268,6 @@ site 1, while right vertices carry the remaining operator tuples.
   return g
 end
 
-"""
-    sparse_qr(A::SparseMatrixCSC, tol::Real, absolute_tol::Bool)
-        -> Tuple{Q,R,prow,pcol,rank}
-
-Compute a sparse QR factorization of `A` using SuiteSparse SPQR.
-
-If `absolute_tol` is `false`, `tol` is interpreted relative to SPQR's default
-tolerance scale. The return values are the orthogonal factor `Q`, upper-triangular
-factor `R`, row and column permutations, and the numerical rank. SPQR uses the
-current BLAS thread count for its internal thread setting.
-"""
-function sparse_qr(
-  A::SparseMatrixCSC, tol::Real, absolute_tol::Bool
-)::Tuple{SparseArrays.SPQR.QRSparseQ,SparseMatrixCSC,Vector{Int},Vector{Int},Int}
-  ret = nothing
-
-  ## The tolerance is specified in Section 2.3
-  ## https://fossies.org/linux/SuiteSparse/SPQR/Doc/spqr_user_guide.pdf
-  if !absolute_tol
-    tol *= SparseArrays.SPQR._default_tol(A)
-  end
-
-  SparseArrays.CHOLMOD.@cholmod_param SPQR_nthreads = BLAS.get_num_threads() begin
-    ret = qr(A; tol)
-  end
-
-  return ret.Q, ret.R, ret.prow, ret.pcol, rank(ret)
-end
-
-"""
-    for_non_zeros_batch(f::Function, A::SparseMatrixCSC, max_col::Int) -> Nothing
-
-Iterate over the nonzero entries of the first `max_col` columns of `A`, calling
-`f(values, rows, col)` once per nonempty column.
-
-`values` and `rows` are views into the CSC storage of `A` for that column, so
-the callback must not retain them beyond the call.
-"""
-function for_non_zeros_batch(f::Function, A::SparseMatrixCSC, max_col::Int)::Nothing
-  @assert max_col <= size(A, 2) "$max_col, $(size(A, 2))"
-
-  rows = rowvals(A)
-  vals = nonzeros(A)
-  for col in 1:max_col
-    range = nzrange(A, col)
-    isempty(range) && continue
-    f((@view vals[range]), (@view rows[range]), col)
-  end
-end
-
-"""
-    for_non_zeros_batch(f::Function, Q::SparseArrays.SPQR.QRSparseQ, max_col::Int) -> Nothing
-
-Iterate over the first `max_col` columns of a sparse SPQR `Q` factor, forming
-each column explicitly and calling `f(column, col)`. The dense column workspace
-is reused between calls, so the callback must copy it if it needs to keep it.
-"""
-function for_non_zeros_batch(
-  f::Function, Q::SparseArrays.SPQR.QRSparseQ, max_col::Int
-)::Nothing
-  @assert max_col <= size(Q, 2) "$max_col, $(size(Q, 2))"
-
-  function get_column!(Q::SparseArrays.SPQR.QRSparseQ, col::Int, res::Vector)
-    res .= 0
-    res[col] = 1
-
-    for l in size(Q.factors, 2):-1:1
-      τl = -Q.τ[l]
-      h = view(Q.factors, :, l)
-      axpy!(τl*dot(h, res), h, res)
-    end
-
-    return res
-  end
-
-  res = zeros(eltype(Q), size(Q, 1))
-  for col in 1:max_col
-    get_column!(Q, col, res)
-    f(res, col)
-  end
-end
-
 """
     add_to_local_matrix!(a, weight, local_op, needs_JW_string) -> Nothing
 
@@ -480,296 +396,6 @@ function process_single_left_vertex_cc!(
   return nothing
 end
 
-"""
-    process_vertex_cover!(
-      matrix_of_cc, rank_of_cc, next_edges_of_cc, g, ccs, n, sites, op_cache_vec
-    ) -> Nothing
-
-Process every connected component using the minimum-vertex-cover specialization.
-
-For each component, the cover columns are laid out as `[left_cover; right_cover]`
-in the local bond dimension. Left-cover columns emit single local operators with
-unit graph weight; right-cover columns accumulate uncovered edges with their
-stored edge weights. On the final site, only the local tensor blocks are needed;
-otherwise this also builds `next_edges_of_cc` for the graph at site `n + 1`.
-"""
-@timeit function process_vertex_cover!(
-  matrix_of_cc::Vector{BlockSparseMatrix{ValType}},
-  rank_of_cc::Vector{Int},
-  next_edges_of_cc::Vector{Matrix{Tuple{Vector{Int},Vector{C}}}},
-  g::MPOGraph{N,C,Ti},
-  ccs::BipartiteGraphConnectedComponents,
-  n::Int,
-  sites::Vector{<:Index},
-  op_cache_vec::OpCacheVec,
-)::Nothing where {ValType<:Number,N,C,Ti}
-  site_dim = dim(sites[n])
-  op_cache = op_cache_vec[n]
-
-  next_op_of_rv_id = Ti[]
-  if n != length(sites)
-    resize!(next_op_of_rv_id, right_size(g))
-    Threads.@threads for rv_id in 1:right_size(g)
-      next_op_of_rv_id[rv_id] = get_onsite_op(right_vertex(g, rv_id), n + 1)
-    end
-  end
-
-  # TODO: Consider nested multithreading
-  Threads.@threads for cc in 1:num_connected_components(ccs)
-    matrix = matrix_of_cc[cc]
-    lvs_of_component::Vector{Int} = ccs.lvs_of_component[cc]
-    position_of_rvs_in_component = ccs.position_of_rvs_in_component
-    rv_size_of_component = ccs.rv_size_of_component[cc]
-
-    ## No idea why, but these need to be typed or allocations go nuts.
-    left_cover::Vector{Int}, right_cover::Vector{Int} = minimum_vertex_cover(g, ccs, cc)
-
-    rank = length(left_cover) + length(right_cover)
-    rank_of_cc[cc] = rank
-
-    ## Construct the tensor from the left cover.
-    @inbounds for m in eachindex(left_cover)
-      lv_id = lvs_of_component[left_cover[m]]
-      lv = left_vertex(g, lv_id)
-      local_op = op_cache[lv.op_id].matrix
-
-      matrix_element = zeros(ValType, site_dim, site_dim)
-      add_to_local_matrix!(matrix_element, one(ValType), local_op, lv.needs_JW_string)
-      matrix[lv.link, m] = matrix_element
-    end
-
-    ## Construct the tensor from the right cover.
-    # TODO: Merge this loop with the one above, using `in_left_cover`
-    let
-      in_left_cover = falses(length(lvs_of_component))
-      @inbounds for local_id in left_cover
-        in_left_cover[local_id] = true
-      end
-
-      uncovered_left_ids = Vector{Int}(undef, length(lvs_of_component) - length(left_cover))
-      next_uncovered = 1
-      @inbounds for local_id in eachindex(lvs_of_component)
-        if !in_left_cover[local_id]
-          uncovered_left_ids[next_uncovered] = lvs_of_component[local_id]
-          next_uncovered += 1
-        end
-      end
-
-      right_cover_m = Vector{Int}(undef, rv_size_of_component)
-      @inbounds for (m, local_rv) in enumerate(right_cover)
-        right_cover_m[local_rv] = length(left_cover) + m
-      end
-
-      @inbounds for lv_id in uncovered_left_ids
-        lv = left_vertex(g, lv_id)
-        local_op = op_cache[lv.op_id].matrix
-
-        for (rv_id, weight) in weighted_edge_iterator(g, lv_id)
-          m = right_cover_m[position_of_rvs_in_component[rv_id]]
-
-          matrix_element = get!(matrix, (lv.link, m)) do
-            zeros(ValType, site_dim, site_dim)
-          end
-
-          add_to_local_matrix!(matrix_element, weight, local_op, lv.needs_JW_string)
-        end
-      end
-    end
-
-    n == length(sites) && continue
-
-    ## Preallocate space for next_edges
-    next_edges = Matrix{Tuple{Vector{Int},Vector{C}}}(
-      undef, rank, length(op_cache_vec[n + 1])
-    )
-    let
-      next_edge_sizes = zeros(Int, rank, length(op_cache_vec[n + 1]))
-      @inbounds for m in eachindex(left_cover)
-        lv_id = lvs_of_component[left_cover[m]]
-        for rv_id in g.right_vertex_ids_from_left[lv_id]
-          op_id = next_op_of_rv_id[rv_id]
-          next_edge_sizes[m, op_id] += 1
-        end
-      end
-
-      @inbounds for i in eachindex(next_edges)
-        n_edges = next_edge_sizes[i]
-        next_edges[i] = sizehint!(Int[], n_edges), sizehint!(C[], n_edges)
-      end
-    end
-
-    ## Construct next_edges for the left_cover
-    @inbounds for m in eachindex(left_cover)
-      lv_id = lvs_of_component[left_cover[m]]
-      for (rv_id, weight) in weighted_edge_iterator(g, lv_id)
-        op_id = next_op_of_rv_id[rv_id]
-
-        next_right_vertex_ids, next_edge_weights = next_edges[m, op_id]
-        push!(next_right_vertex_ids, rv_id)
-        push!(next_edge_weights, weight)
-      end
-    end
-
-    ## Construct next_edges for the right_cover
-    let
-      rvs_of_component = Vector{Int}(undef, rv_size_of_component)
-      @inbounds for lv_id in lvs_of_component
-        right_vertex_ids = g.right_vertex_ids_from_left[lv_id]
-        for edge_id in eachindex(right_vertex_ids)
-          rv_id = right_vertex_ids[edge_id]
-          rvs_of_component[position_of_rvs_in_component[rv_id]] = rv_id
-        end
-      end
-
-      @inbounds for m in eachindex(right_cover)
-        rv_id = rvs_of_component[right_cover[m]]
-        m += length(left_cover)
-
-        op_id = next_op_of_rv_id[rv_id]
-        next_right_vertex_ids, next_edge_weights = next_edges[m, op_id]
-        resize!(next_right_vertex_ids, 1)
-        resize!(next_edge_weights, 1)
-
-        next_right_vertex_ids[1] = rv_id
-        next_edge_weights[1] = one(C)
-      end
-    end
-
-    next_edges_of_cc[cc] = next_edges
-  end
-
-  return nothing
-end
-
-"""
-    process_qr(
-      matrix_of_cc, rank_of_cc, next_edges_of_cc, g, ccs, n, sites, tol, absolute_tol, op_cache_vec
-    ) -> Nothing
-
-Process every connected component using the sparse-QR path.
-
-Single-left-vertex components use `process_single_left_vertex_cc!`; larger
-components are extracted as sparse weighted adjacency matrices, decomposed with
-`sparse_qr`, and translated into local tensor blocks plus outgoing graph edges.
-On the final site, the scalar `R` factor is applied directly to the local
-blocks.
-"""
-@timeit function process_qr(
-  matrix_of_cc::Vector{BlockSparseMatrix{ValType}},
-  rank_of_cc::Vector{Int},
-  next_edges_of_cc::Vector{Matrix{Tuple{Vector{Int},Vector{C}}}},
-  g::MPOGraph{N,C,Ti},
-  ccs::BipartiteGraphConnectedComponents,
-  n::Int,
-  sites::Vector{<:Index},
-  tol::Real,
-  absolute_tol::Bool,
-  op_cache_vec::OpCacheVec,
-)::Nothing where {ValType<:Number,N,C,Ti}
-  Threads.@threads for cc in 1:num_connected_components(ccs)
-    ## A specialization for when there is only one vertex on the left. This is
-    ## a very common case that can be sped up significantly.
-    if left_size(ccs, cc) == 1
-      process_single_left_vertex_cc!(
-        matrix_of_cc, rank_of_cc, next_edges_of_cc, g, ccs, cc, n, sites, op_cache_vec
-      )
-      continue
-    end
-
-    matrix = matrix_of_cc[cc]
-    W, left_map, right_map = get_cc_matrix(g, ccs, cc; clear_edges=true)
-
-    ## Compute the decomposition and then free W
-    Q, R, prow, pcol, rank = sparse_qr(W, tol, absolute_tol)
-    W = nothing
-
-    rank_of_cc[cc] = rank
-
-    ## Form the local transformation tensor.
-    for_non_zeros_batch(Q, rank) do weights::AbstractVector{C}, m::Int
-      for (i, weight) in enumerate(weights)
-        weight == 0 && continue
-
-        ## TODO: This allocates for some reason
-        lv = left_vertex(g, left_map[prow[i]])
-        local_op = op_cache_vec[n][lv.op_id].matrix
-
-        matrix_element = get!(matrix, (lv.link, m)) do
-          return zeros(ValType, dim(sites[n]), dim(sites[n]))
-        end
-
-        add_to_local_matrix!(matrix_element, weight, local_op, lv.needs_JW_string)
-      end
-    end
-
-    ## Q and prow are no longer needed.
-    Q = nothing
-    prow = nothing
-
-    ## If we are at the last site, then R will be a 1x1 matrix containing an overall scaling.
-    if n == length(sites)
-      scaling = only(R)
-      for block in values(matrix)
-        block .*= scaling
-      end
-
-      continue
-    end
-
-    next_edges_size = zeros(Int, rank, length(op_cache_vec[n + 1]))
-    rv_id_lookup = Vector{Int}(undef, size(R, 2))
-    op_id_lookup = Vector{Ti}(undef, size(R, 2))
-
-    for_non_zeros_batch(
-      R, length(right_map)
-    ) do _::AbstractVector{C}, ms::AbstractVector{Int}, j::Int
-      ## Convert j, which has been permuted first by the connected components
-      ## and then again by SPQR into a right vertex Id.
-      rv_id = right_map[pcol[j]]
-
-      ## Get the operator acting on site (n + 1) of this right vertex.
-      op_id = get_onsite_op(right_vertex(g, rv_id), n + 1)
-
-      rv_id_lookup[j] = rv_id
-      op_id_lookup[j] = op_id
-
-      for m in ms
-        next_edges_size[m, op_id] += 1
-      end
-    end
-
-    ## Build the graph for the next site out of this component.
-    next_edges = Matrix{Tuple{Vector{Int},Vector{C}}}(
-      undef, rank, length(op_cache_vec[n + 1])
-    )
-    for i in eachindex(next_edges)
-      right_vertex_ids = Int[]
-      edge_weights = C[]
-      sizehint!(right_vertex_ids, next_edges_size[i])
-      sizehint!(edge_weights, next_edges_size[i])
-      next_edges[i] = (right_vertex_ids, edge_weights)
-    end
-
-    for_non_zeros_batch(
-      R, length(right_map)
-    ) do weights::AbstractVector{C}, ms::AbstractVector{Int}, j::Int
-      rv_id = rv_id_lookup[j]
-      op_id = op_id_lookup[j]
-
-      ## Add the edges.
-      for (m, weight) in zip(ms, weights)
-        next_right_vertex_ids, next_edge_weights = next_edges[m, op_id]
-        push!(next_right_vertex_ids, rv_id)
-        push!(next_edge_weights, weight)
-      end
-    end
-
-    next_edges_of_cc[cc] = next_edges
-  end
-
-  return nothing
-end
-
 """
     at_site!(ValType, g, n, sites, tol, absolute_tol, op_cache_vec, alg;
              combine_qn_sectors, output_level=0)
diff --git a/src/large-graph-mpo-qr.jl b/src/large-graph-mpo-qr.jl
new file mode 100644
index 0000000..d632b17
--- /dev/null
+++ b/src/large-graph-mpo-qr.jl
@@ -0,0 +1,211 @@
+"""
+    sparse_qr(A::SparseMatrixCSC, tol::Real, absolute_tol::Bool)
+        -> Tuple{Q,R,prow,pcol,rank}
+
+Compute a sparse QR factorization of `A` using SuiteSparse SPQR.
+
+If `absolute_tol` is `false`, `tol` is interpreted relative to SPQR's default
+tolerance scale. The return values are the orthogonal factor `Q`, upper-triangular
+factor `R`, row and column permutations, and the numerical rank. SPQR uses the
+current BLAS thread count for its internal thread setting.
+"""
+function sparse_qr(
+  A::SparseMatrixCSC, tol::Real, absolute_tol::Bool
+)::Tuple{SparseArrays.SPQR.QRSparseQ,SparseMatrixCSC,Vector{Int},Vector{Int},Int}
+  ret = nothing
+
+  ## The tolerance is specified in Section 2.3
+  ## https://fossies.org/linux/SuiteSparse/SPQR/Doc/spqr_user_guide.pdf
+  if !absolute_tol
+    tol *= SparseArrays.SPQR._default_tol(A)
+  end
+
+  SparseArrays.CHOLMOD.@cholmod_param SPQR_nthreads = BLAS.get_num_threads() begin
+    ret = qr(A; tol)
+  end
+
+  return ret.Q, ret.R, ret.prow, ret.pcol, rank(ret)
+end
+
+"""
+    for_non_zeros_batch(f::Function, A::SparseMatrixCSC, max_col::Int) -> Nothing
+
+Iterate over the nonzero entries of the first `max_col` columns of `A`, calling
+`f(values, rows, col)` once per nonempty column.
+
+`values` and `rows` are views into the CSC storage of `A` for that column, so
+the callback must not retain them beyond the call.
+"""
+function for_non_zeros_batch(f::Function, A::SparseMatrixCSC, max_col::Int)::Nothing
+  @assert max_col <= size(A, 2) "$max_col, $(size(A, 2))"
+
+  rows = rowvals(A)
+  vals = nonzeros(A)
+  for col in 1:max_col
+    range = nzrange(A, col)
+    isempty(range) && continue
+    f((@view vals[range]), (@view rows[range]), col)
+  end
+end
+
+"""
+    for_non_zeros_batch(f::Function, Q::SparseArrays.SPQR.QRSparseQ, max_col::Int) -> Nothing
+
+Iterate over the first `max_col` columns of a sparse SPQR `Q` factor, forming
+each column explicitly and calling `f(column, col)`. The dense column workspace
+is reused between calls, so the callback must copy it if it needs to keep it.
+"""
+function for_non_zeros_batch(
+  f::Function, Q::SparseArrays.SPQR.QRSparseQ, max_col::Int
+)::Nothing
+  @assert max_col <= size(Q, 2) "$max_col, $(size(Q, 2))"
+
+  function get_column!(Q::SparseArrays.SPQR.QRSparseQ, col::Int, res::Vector)
+    res .= 0
+    res[col] = 1
+
+    for l in size(Q.factors, 2):-1:1
+      τl = -Q.τ[l]
+      h = view(Q.factors, :, l)
+      axpy!(τl*dot(h, res), h, res)
+    end
+
+    return res
+  end
+
+  res = zeros(eltype(Q), size(Q, 1))
+  for col in 1:max_col
+    get_column!(Q, col, res)
+    f(res, col)
+  end
+end
+
+"""
+    process_qr(
+      matrix_of_cc, rank_of_cc, next_edges_of_cc, g, ccs, n, sites, tol, absolute_tol, op_cache_vec
+    ) -> Nothing
+
+Process every connected component using the sparse-QR path.
+
+Single-left-vertex components use `process_single_left_vertex_cc!`; larger
+components are extracted as sparse weighted adjacency matrices, decomposed with
+`sparse_qr`, and translated into local tensor blocks plus outgoing graph edges.
+On the final site, the scalar `R` factor is applied directly to the local
+blocks.
+"""
+@timeit function process_qr(
+  matrix_of_cc::Vector{BlockSparseMatrix{ValType}},
+  rank_of_cc::Vector{Int},
+  next_edges_of_cc::Vector{Matrix{Tuple{Vector{Int},Vector{C}}}},
+  g::MPOGraph{N,C,Ti},
+  ccs::BipartiteGraphConnectedComponents,
+  n::Int,
+  sites::Vector{<:Index},
+  tol::Real,
+  absolute_tol::Bool,
+  op_cache_vec::OpCacheVec,
+)::Nothing where {ValType<:Number,N,C,Ti}
+  Threads.@threads for cc in 1:num_connected_components(ccs)
+    ## A specialization for when there is only one vertex on the left. This is
+    ## a very common case that can be sped up significantly.
+    if left_size(ccs, cc) == 1
+      process_single_left_vertex_cc!(
+        matrix_of_cc, rank_of_cc, next_edges_of_cc, g, ccs, cc, n, sites, op_cache_vec
+      )
+      continue
+    end
+
+    matrix = matrix_of_cc[cc]
+    W, left_map, right_map = get_cc_matrix(g, ccs, cc; clear_edges=true)
+
+    ## Compute the decomposition and then free W
+    Q, R, prow, pcol, rank = sparse_qr(W, tol, absolute_tol)
+    W = nothing
+
+    rank_of_cc[cc] = rank
+
+    ## Form the local transformation tensor.
+    for_non_zeros_batch(Q, rank) do weights::AbstractVector{C}, m::Int
+      for (i, weight) in enumerate(weights)
+        weight == 0 && continue
+
+        ## TODO: This allocates for some reason
+        lv = left_vertex(g, left_map[prow[i]])
+        local_op = op_cache_vec[n][lv.op_id].matrix
+
+        matrix_element = get!(matrix, (lv.link, m)) do
+          return zeros(ValType, dim(sites[n]), dim(sites[n]))
+        end
+
+        add_to_local_matrix!(matrix_element, weight, local_op, lv.needs_JW_string)
+      end
+    end
+
+    ## Q and prow are no longer needed.
+    Q = nothing
+    prow = nothing
+
+    ## If we are at the last site, then R will be a 1x1 matrix containing an overall scaling.
+    if n == length(sites)
+      scaling = only(R)
+      for block in values(matrix)
+        block .*= scaling
+      end
+
+      continue
+    end
+
+    next_edges_size = zeros(Int, rank, length(op_cache_vec[n + 1]))
+    rv_id_lookup = Vector{Int}(undef, size(R, 2))
+    op_id_lookup = Vector{Ti}(undef, size(R, 2))
+
+    for_non_zeros_batch(
+      R, length(right_map)
+    ) do _::AbstractVector{C}, ms::AbstractVector{Int}, j::Int
+      ## Convert j, which has been permuted first by the connected components
+      ## and then again by SPQR into a right vertex Id.
+      rv_id = right_map[pcol[j]]
+
+      ## Get the operator acting on site (n + 1) of this right vertex.
+      op_id = get_onsite_op(right_vertex(g, rv_id), n + 1)
+
+      rv_id_lookup[j] = rv_id
+      op_id_lookup[j] = op_id
+
+      for m in ms
+        next_edges_size[m, op_id] += 1
+      end
+    end
+
+    ## Build the graph for the next site out of this component.
+    next_edges = Matrix{Tuple{Vector{Int},Vector{C}}}(
+      undef, rank, length(op_cache_vec[n + 1])
+    )
+    for i in eachindex(next_edges)
+      right_vertex_ids = Int[]
+      edge_weights = C[]
+      sizehint!(right_vertex_ids, next_edges_size[i])
+      sizehint!(edge_weights, next_edges_size[i])
+      next_edges[i] = (right_vertex_ids, edge_weights)
+    end
+
+    for_non_zeros_batch(
+      R, length(right_map)
+    ) do weights::AbstractVector{C}, ms::AbstractVector{Int}, j::Int
+      rv_id = rv_id_lookup[j]
+      op_id = op_id_lookup[j]
+
+      ## Add the edges.
+      for (m, weight) in zip(ms, weights)
+        next_right_vertex_ids, next_edge_weights = next_edges[m, op_id]
+        push!(next_right_vertex_ids, rv_id)
+        push!(next_edge_weights, weight)
+      end
+    end
+
+    next_edges_of_cc[cc] = next_edges
+  end
+
+  return nothing
+end
+
diff --git a/src/large-graph-mpo-vertex-cover.jl b/src/large-graph-mpo-vertex-cover.jl
new file mode 100644
index 0000000..2909a5d
--- /dev/null
+++ b/src/large-graph-mpo-vertex-cover.jl
@@ -0,0 +1,161 @@
+"""
+    process_vertex_cover!(
+      matrix_of_cc, rank_of_cc, next_edges_of_cc, g, ccs, n, sites, op_cache_vec
+    ) -> Nothing
+
+Process every connected component using the minimum-vertex-cover specialization.
+
+For each component, the cover columns are laid out as `[left_cover; right_cover]`
+in the local bond dimension. Left-cover columns emit single local operators with
+unit graph weight; right-cover columns accumulate uncovered edges with their
+stored edge weights. On the final site, only the local tensor blocks are needed;
+otherwise this also builds `next_edges_of_cc` for the graph at site `n + 1`.
+"""
+@timeit function process_vertex_cover!(
+  matrix_of_cc::Vector{BlockSparseMatrix{ValType}},
+  rank_of_cc::Vector{Int},
+  next_edges_of_cc::Vector{Matrix{Tuple{Vector{Int},Vector{C}}}},
+  g::MPOGraph{N,C,Ti},
+  ccs::BipartiteGraphConnectedComponents,
+  n::Int,
+  sites::Vector{<:Index},
+  op_cache_vec::OpCacheVec,
+)::Nothing where {ValType<:Number,N,C,Ti}
+  site_dim = dim(sites[n])
+  op_cache = op_cache_vec[n]
+
+  next_op_of_rv_id = Ti[]
+  if n != length(sites)
+    resize!(next_op_of_rv_id, right_size(g))
+    Threads.@threads for rv_id in 1:right_size(g)
+      next_op_of_rv_id[rv_id] = get_onsite_op(right_vertex(g, rv_id), n + 1)
+    end
+  end
+
+  # TODO: Consider nested multithreading
+  Threads.@threads for cc in 1:num_connected_components(ccs)
+    matrix = matrix_of_cc[cc]
+    lvs_of_component::Vector{Int} = ccs.lvs_of_component[cc]
+    position_of_rvs_in_component = ccs.position_of_rvs_in_component
+    rv_size_of_component = ccs.rv_size_of_component[cc]
+
+    ## No idea why, but these need to be typed or allocations go nuts.
+    left_cover::Vector{Int}, right_cover::Vector{Int} = minimum_vertex_cover(g, ccs, cc)
+
+    rank = length(left_cover) + length(right_cover)
+    rank_of_cc[cc] = rank
+
+    ## Construct the tensor from the left cover.
+    @inbounds for m in eachindex(left_cover)
+      lv_id = lvs_of_component[left_cover[m]]
+      lv = left_vertex(g, lv_id)
+      local_op = op_cache[lv.op_id].matrix
+
+      matrix_element = zeros(ValType, site_dim, site_dim)
+      add_to_local_matrix!(matrix_element, one(ValType), local_op, lv.needs_JW_string)
+      matrix[lv.link, m] = matrix_element
+    end
+
+    ## Construct the tensor from the right cover.
+    # TODO: Merge this loop with the one above, using `in_left_cover`
+    let
+      in_left_cover = falses(length(lvs_of_component))
+      @inbounds for local_id in left_cover
+        in_left_cover[local_id] = true
+      end
+
+      uncovered_left_ids = Vector{Int}(undef, length(lvs_of_component) - length(left_cover))
+      next_uncovered = 1
+      @inbounds for local_id in eachindex(lvs_of_component)
+        if !in_left_cover[local_id]
+          uncovered_left_ids[next_uncovered] = lvs_of_component[local_id]
+          next_uncovered += 1
+        end
+      end
+
+      right_cover_m = Vector{Int}(undef, rv_size_of_component)
+      @inbounds for (m, local_rv) in enumerate(right_cover)
+        right_cover_m[local_rv] = length(left_cover) + m
+      end
+
+      @inbounds for lv_id in uncovered_left_ids
+        lv = left_vertex(g, lv_id)
+        local_op = op_cache[lv.op_id].matrix
+
+        for (rv_id, weight) in weighted_edge_iterator(g, lv_id)
+          m = right_cover_m[position_of_rvs_in_component[rv_id]]
+
+          matrix_element = get!(matrix, (lv.link, m)) do
+            zeros(ValType, site_dim, site_dim)
+          end
+
+          add_to_local_matrix!(matrix_element, weight, local_op, lv.needs_JW_string)
+        end
+      end
+    end
+
+    n == length(sites) && continue
+
+    ## Preallocate space for next_edges
+    next_edges = Matrix{Tuple{Vector{Int},Vector{C}}}(
+      undef, rank, length(op_cache_vec[n + 1])
+    )
+    let
+      next_edge_sizes = zeros(Int, rank, length(op_cache_vec[n + 1]))
+      @inbounds for m in eachindex(left_cover)
+        lv_id = lvs_of_component[left_cover[m]]
+        for rv_id in g.right_vertex_ids_from_left[lv_id]
+          op_id = next_op_of_rv_id[rv_id]
+          next_edge_sizes[m, op_id] += 1
+        end
+      end
+
+      @inbounds for i in eachindex(next_edges)
+        n_edges = next_edge_sizes[i]
+        next_edges[i] = sizehint!(Int[], n_edges), sizehint!(C[], n_edges)
+      end
+    end
+
+    ## Construct next_edges for the left_cover
+    @inbounds for m in eachindex(left_cover)
+      lv_id = lvs_of_component[left_cover[m]]
+      for (rv_id, weight) in weighted_edge_iterator(g, lv_id)
+        op_id = next_op_of_rv_id[rv_id]
+
+        next_right_vertex_ids, next_edge_weights = next_edges[m, op_id]
+        push!(next_right_vertex_ids, rv_id)
+        push!(next_edge_weights, weight)
+      end
+    end
+
+    ## Construct next_edges for the right_cover
+    let
+      rvs_of_component = Vector{Int}(undef, rv_size_of_component)
+      @inbounds for lv_id in lvs_of_component
+        right_vertex_ids = g.right_vertex_ids_from_left[lv_id]
+        for edge_id in eachindex(right_vertex_ids)
+          rv_id = right_vertex_ids[edge_id]
+          rvs_of_component[position_of_rvs_in_component[rv_id]] = rv_id
+        end
+      end
+
+      @inbounds for m in eachindex(right_cover)
+        rv_id = rvs_of_component[right_cover[m]]
+        m += length(left_cover)
+
+        op_id = next_op_of_rv_id[rv_id]
+        next_right_vertex_ids, next_edge_weights = next_edges[m, op_id]
+        resize!(next_right_vertex_ids, 1)
+        resize!(next_edge_weights, 1)
+
+        next_right_vertex_ids[1] = rv_id
+        next_edge_weights[1] = one(C)
+      end
+    end
+
+    next_edges_of_cc[cc] = next_edges
+  end
+
+  return nothing
+end
+