From 3a0e4c52063824b54f6bad16a4480fa8b9caf664 Mon Sep 17 00:00:00 2001 From: Ben Corbett Date: Sat, 25 Apr 2026 23:45:11 -0600 Subject: [PATCH] Refactored large-graph-mpo.jl --- src/ITensorMPOConstruction.jl | 4 +- ...graph-mpo.jl => large-graph-mpo-common.jl} | 374 ------------------ src/large-graph-mpo-qr.jl | 211 ++++++++++ src/large-graph-mpo-vertex-cover.jl | 161 ++++++++ 4 files changed, 375 insertions(+), 375 deletions(-) rename src/{large-graph-mpo.jl => large-graph-mpo-common.jl} (57%) create mode 100644 src/large-graph-mpo-qr.jl create mode 100644 src/large-graph-mpo-vertex-cover.jl diff --git a/src/ITensorMPOConstruction.jl b/src/ITensorMPOConstruction.jl index 0fdc75b..9f4d46f 100644 --- a/src/ITensorMPOConstruction.jl +++ b/src/ITensorMPOConstruction.jl @@ -21,7 +21,9 @@ include("ops.jl") include("BipartiteGraph.jl") include("connected-components.jl") include("minimum-vertex-cover.jl") -include("large-graph-mpo.jl") +include("large-graph-mpo-common.jl") +include("large-graph-mpo-qr.jl") +include("large-graph-mpo-vertex-cover.jl") include("sparse_tensor_construction.jl") include("MPOConstruction.jl") diff --git a/src/large-graph-mpo.jl b/src/large-graph-mpo-common.jl similarity index 57% rename from src/large-graph-mpo.jl rename to src/large-graph-mpo-common.jl index 8fb4970..aec8010 100644 --- a/src/large-graph-mpo.jl +++ b/src/large-graph-mpo-common.jl @@ -1,5 +1,3 @@ -# TODO: Split into multiple files - """ BlockSparseMatrix{C} @@ -270,88 +268,6 @@ site 1, while right vertices carry the remaining operator tuples. return g end -""" - sparse_qr(A::SparseMatrixCSC, tol::Real, absolute_tol::Bool) - -> Tuple{Q,R,prow,pcol,rank} - -Compute a sparse QR factorization of `A` using SuiteSparse SPQR. - -If `absolute_tol` is `false`, `tol` is interpreted relative to SPQR's default -tolerance scale. The return values are the orthogonal factor `Q`, upper-triangular -factor `R`, row and column permutations, and the numerical rank. SPQR uses the -current BLAS thread count for its internal thread setting. -""" -function sparse_qr( - A::SparseMatrixCSC, tol::Real, absolute_tol::Bool -)::Tuple{SparseArrays.SPQR.QRSparseQ,SparseMatrixCSC,Vector{Int},Vector{Int},Int} - ret = nothing - - ## The tolerance is specified in Section 2.3 - ## https://fossies.org/linux/SuiteSparse/SPQR/Doc/spqr_user_guide.pdf - if !absolute_tol - tol *= SparseArrays.SPQR._default_tol(A) - end - - SparseArrays.CHOLMOD.@cholmod_param SPQR_nthreads = BLAS.get_num_threads() begin - ret = qr(A; tol) - end - - return ret.Q, ret.R, ret.prow, ret.pcol, rank(ret) -end - -""" - for_non_zeros_batch(f::Function, A::SparseMatrixCSC, max_col::Int) -> Nothing - -Iterate over the nonzero entries of the first `max_col` columns of `A`, calling -`f(values, rows, col)` once per nonempty column. - -`values` and `rows` are views into the CSC storage of `A` for that column, so -the callback must not retain them beyond the call. -""" -function for_non_zeros_batch(f::Function, A::SparseMatrixCSC, max_col::Int)::Nothing - @assert max_col <= size(A, 2) "$max_col, $(size(A, 2))" - - rows = rowvals(A) - vals = nonzeros(A) - for col in 1:max_col - range = nzrange(A, col) - isempty(range) && continue - f((@view vals[range]), (@view rows[range]), col) - end -end - -""" - for_non_zeros_batch(f::Function, Q::SparseArrays.SPQR.QRSparseQ, max_col::Int) -> Nothing - -Iterate over the first `max_col` columns of a sparse SPQR `Q` factor, forming -each column explicitly and calling `f(column, col)`. The dense column workspace -is reused between calls, so the callback must copy it if it needs to keep it. -""" -function for_non_zeros_batch( - f::Function, Q::SparseArrays.SPQR.QRSparseQ, max_col::Int -)::Nothing - @assert max_col <= size(Q, 2) "$max_col, $(size(Q, 2))" - - function get_column!(Q::SparseArrays.SPQR.QRSparseQ, col::Int, res::Vector) - res .= 0 - res[col] = 1 - - for l in size(Q.factors, 2):-1:1 - τl = -Q.τ[l] - h = view(Q.factors, :, l) - axpy!(τl*dot(h, res), h, res) - end - - return res - end - - res = zeros(eltype(Q), size(Q, 1)) - for col in 1:max_col - get_column!(Q, col, res) - f(res, col) - end -end - """ add_to_local_matrix!(a, weight, local_op, needs_JW_string) -> Nothing @@ -480,296 +396,6 @@ function process_single_left_vertex_cc!( return nothing end -""" - process_vertex_cover!( - matrix_of_cc, rank_of_cc, next_edges_of_cc, g, ccs, n, sites, op_cache_vec - ) -> Nothing - -Process every connected component using the minimum-vertex-cover specialization. - -For each component, the cover columns are laid out as `[left_cover; right_cover]` -in the local bond dimension. Left-cover columns emit single local operators with -unit graph weight; right-cover columns accumulate uncovered edges with their -stored edge weights. On the final site, only the local tensor blocks are needed; -otherwise this also builds `next_edges_of_cc` for the graph at site `n + 1`. -""" -@timeit function process_vertex_cover!( - matrix_of_cc::Vector{BlockSparseMatrix{ValType}}, - rank_of_cc::Vector{Int}, - next_edges_of_cc::Vector{Matrix{Tuple{Vector{Int},Vector{C}}}}, - g::MPOGraph{N,C,Ti}, - ccs::BipartiteGraphConnectedComponents, - n::Int, - sites::Vector{<:Index}, - op_cache_vec::OpCacheVec, -)::Nothing where {ValType<:Number,N,C,Ti} - site_dim = dim(sites[n]) - op_cache = op_cache_vec[n] - - next_op_of_rv_id = Ti[] - if n != length(sites) - resize!(next_op_of_rv_id, right_size(g)) - Threads.@threads for rv_id in 1:right_size(g) - next_op_of_rv_id[rv_id] = get_onsite_op(right_vertex(g, rv_id), n + 1) - end - end - - # TODO: Consider nested multithreading - Threads.@threads for cc in 1:num_connected_components(ccs) - matrix = matrix_of_cc[cc] - lvs_of_component::Vector{Int} = ccs.lvs_of_component[cc] - position_of_rvs_in_component = ccs.position_of_rvs_in_component - rv_size_of_component = ccs.rv_size_of_component[cc] - - ## No idea why, but these need to be typed or allocations go nuts. - left_cover::Vector{Int}, right_cover::Vector{Int} = minimum_vertex_cover(g, ccs, cc) - - rank = length(left_cover) + length(right_cover) - rank_of_cc[cc] = rank - - ## Construct the tensor from the left cover. - @inbounds for m in eachindex(left_cover) - lv_id = lvs_of_component[left_cover[m]] - lv = left_vertex(g, lv_id) - local_op = op_cache[lv.op_id].matrix - - matrix_element = zeros(ValType, site_dim, site_dim) - add_to_local_matrix!(matrix_element, one(ValType), local_op, lv.needs_JW_string) - matrix[lv.link, m] = matrix_element - end - - ## Construct the tensor from the right cover. - # TODO: Merge this loop with the one above, using `in_left_cover` - let - in_left_cover = falses(length(lvs_of_component)) - @inbounds for local_id in left_cover - in_left_cover[local_id] = true - end - - uncovered_left_ids = Vector{Int}(undef, length(lvs_of_component) - length(left_cover)) - next_uncovered = 1 - @inbounds for local_id in eachindex(lvs_of_component) - if !in_left_cover[local_id] - uncovered_left_ids[next_uncovered] = lvs_of_component[local_id] - next_uncovered += 1 - end - end - - right_cover_m = Vector{Int}(undef, rv_size_of_component) - @inbounds for (m, local_rv) in enumerate(right_cover) - right_cover_m[local_rv] = length(left_cover) + m - end - - @inbounds for lv_id in uncovered_left_ids - lv = left_vertex(g, lv_id) - local_op = op_cache[lv.op_id].matrix - - for (rv_id, weight) in weighted_edge_iterator(g, lv_id) - m = right_cover_m[position_of_rvs_in_component[rv_id]] - - matrix_element = get!(matrix, (lv.link, m)) do - zeros(ValType, site_dim, site_dim) - end - - add_to_local_matrix!(matrix_element, weight, local_op, lv.needs_JW_string) - end - end - end - - n == length(sites) && continue - - ## Preallocate space for next_edges - next_edges = Matrix{Tuple{Vector{Int},Vector{C}}}( - undef, rank, length(op_cache_vec[n + 1]) - ) - let - next_edge_sizes = zeros(Int, rank, length(op_cache_vec[n + 1])) - @inbounds for m in eachindex(left_cover) - lv_id = lvs_of_component[left_cover[m]] - for rv_id in g.right_vertex_ids_from_left[lv_id] - op_id = next_op_of_rv_id[rv_id] - next_edge_sizes[m, op_id] += 1 - end - end - - @inbounds for i in eachindex(next_edges) - n_edges = next_edge_sizes[i] - next_edges[i] = sizehint!(Int[], n_edges), sizehint!(C[], n_edges) - end - end - - ## Construct next_edges for the left_cover - @inbounds for m in eachindex(left_cover) - lv_id = lvs_of_component[left_cover[m]] - for (rv_id, weight) in weighted_edge_iterator(g, lv_id) - op_id = next_op_of_rv_id[rv_id] - - next_right_vertex_ids, next_edge_weights = next_edges[m, op_id] - push!(next_right_vertex_ids, rv_id) - push!(next_edge_weights, weight) - end - end - - ## Construct next_edges for the right_cover - let - rvs_of_component = Vector{Int}(undef, rv_size_of_component) - @inbounds for lv_id in lvs_of_component - right_vertex_ids = g.right_vertex_ids_from_left[lv_id] - for edge_id in eachindex(right_vertex_ids) - rv_id = right_vertex_ids[edge_id] - rvs_of_component[position_of_rvs_in_component[rv_id]] = rv_id - end - end - - @inbounds for m in eachindex(right_cover) - rv_id = rvs_of_component[right_cover[m]] - m += length(left_cover) - - op_id = next_op_of_rv_id[rv_id] - next_right_vertex_ids, next_edge_weights = next_edges[m, op_id] - resize!(next_right_vertex_ids, 1) - resize!(next_edge_weights, 1) - - next_right_vertex_ids[1] = rv_id - next_edge_weights[1] = one(C) - end - end - - next_edges_of_cc[cc] = next_edges - end - - return nothing -end - -""" - process_qr( - matrix_of_cc, rank_of_cc, next_edges_of_cc, g, ccs, n, sites, tol, absolute_tol, op_cache_vec - ) -> Nothing - -Process every connected component using the sparse-QR path. - -Single-left-vertex components use `process_single_left_vertex_cc!`; larger -components are extracted as sparse weighted adjacency matrices, decomposed with -`sparse_qr`, and translated into local tensor blocks plus outgoing graph edges. -On the final site, the scalar `R` factor is applied directly to the local -blocks. -""" -@timeit function process_qr( - matrix_of_cc::Vector{BlockSparseMatrix{ValType}}, - rank_of_cc::Vector{Int}, - next_edges_of_cc::Vector{Matrix{Tuple{Vector{Int},Vector{C}}}}, - g::MPOGraph{N,C,Ti}, - ccs::BipartiteGraphConnectedComponents, - n::Int, - sites::Vector{<:Index}, - tol::Real, - absolute_tol::Bool, - op_cache_vec::OpCacheVec, -)::Nothing where {ValType<:Number,N,C,Ti} - Threads.@threads for cc in 1:num_connected_components(ccs) - ## A specialization for when there is only one vertex on the left. This is - ## a very common case that can be sped up significantly. - if left_size(ccs, cc) == 1 - process_single_left_vertex_cc!( - matrix_of_cc, rank_of_cc, next_edges_of_cc, g, ccs, cc, n, sites, op_cache_vec - ) - continue - end - - matrix = matrix_of_cc[cc] - W, left_map, right_map = get_cc_matrix(g, ccs, cc; clear_edges=true) - - ## Compute the decomposition and then free W - Q, R, prow, pcol, rank = sparse_qr(W, tol, absolute_tol) - W = nothing - - rank_of_cc[cc] = rank - - ## Form the local transformation tensor. - for_non_zeros_batch(Q, rank) do weights::AbstractVector{C}, m::Int - for (i, weight) in enumerate(weights) - weight == 0 && continue - - ## TODO: This allocates for some reason - lv = left_vertex(g, left_map[prow[i]]) - local_op = op_cache_vec[n][lv.op_id].matrix - - matrix_element = get!(matrix, (lv.link, m)) do - return zeros(ValType, dim(sites[n]), dim(sites[n])) - end - - add_to_local_matrix!(matrix_element, weight, local_op, lv.needs_JW_string) - end - end - - ## Q and prow are no longer needed. - Q = nothing - prow = nothing - - ## If we are at the last site, then R will be a 1x1 matrix containing an overall scaling. - if n == length(sites) - scaling = only(R) - for block in values(matrix) - block .*= scaling - end - - continue - end - - next_edges_size = zeros(Int, rank, length(op_cache_vec[n + 1])) - rv_id_lookup = Vector{Int}(undef, size(R, 2)) - op_id_lookup = Vector{Ti}(undef, size(R, 2)) - - for_non_zeros_batch( - R, length(right_map) - ) do _::AbstractVector{C}, ms::AbstractVector{Int}, j::Int - ## Convert j, which has been permuted first by the connected components - ## and then again by SPQR into a right vertex Id. - rv_id = right_map[pcol[j]] - - ## Get the operator acting on site (n + 1) of this right vertex. - op_id = get_onsite_op(right_vertex(g, rv_id), n + 1) - - rv_id_lookup[j] = rv_id - op_id_lookup[j] = op_id - - for m in ms - next_edges_size[m, op_id] += 1 - end - end - - ## Build the graph for the next site out of this component. - next_edges = Matrix{Tuple{Vector{Int},Vector{C}}}( - undef, rank, length(op_cache_vec[n + 1]) - ) - for i in eachindex(next_edges) - right_vertex_ids = Int[] - edge_weights = C[] - sizehint!(right_vertex_ids, next_edges_size[i]) - sizehint!(edge_weights, next_edges_size[i]) - next_edges[i] = (right_vertex_ids, edge_weights) - end - - for_non_zeros_batch( - R, length(right_map) - ) do weights::AbstractVector{C}, ms::AbstractVector{Int}, j::Int - rv_id = rv_id_lookup[j] - op_id = op_id_lookup[j] - - ## Add the edges. - for (m, weight) in zip(ms, weights) - next_right_vertex_ids, next_edge_weights = next_edges[m, op_id] - push!(next_right_vertex_ids, rv_id) - push!(next_edge_weights, weight) - end - end - - next_edges_of_cc[cc] = next_edges - end - - return nothing -end - """ at_site!(ValType, g, n, sites, tol, absolute_tol, op_cache_vec, alg; combine_qn_sectors, output_level=0) diff --git a/src/large-graph-mpo-qr.jl b/src/large-graph-mpo-qr.jl new file mode 100644 index 0000000..d632b17 --- /dev/null +++ b/src/large-graph-mpo-qr.jl @@ -0,0 +1,211 @@ +""" + sparse_qr(A::SparseMatrixCSC, tol::Real, absolute_tol::Bool) + -> Tuple{Q,R,prow,pcol,rank} + +Compute a sparse QR factorization of `A` using SuiteSparse SPQR. + +If `absolute_tol` is `false`, `tol` is interpreted relative to SPQR's default +tolerance scale. The return values are the orthogonal factor `Q`, upper-triangular +factor `R`, row and column permutations, and the numerical rank. SPQR uses the +current BLAS thread count for its internal thread setting. +""" +function sparse_qr( + A::SparseMatrixCSC, tol::Real, absolute_tol::Bool +)::Tuple{SparseArrays.SPQR.QRSparseQ,SparseMatrixCSC,Vector{Int},Vector{Int},Int} + ret = nothing + + ## The tolerance is specified in Section 2.3 + ## https://fossies.org/linux/SuiteSparse/SPQR/Doc/spqr_user_guide.pdf + if !absolute_tol + tol *= SparseArrays.SPQR._default_tol(A) + end + + SparseArrays.CHOLMOD.@cholmod_param SPQR_nthreads = BLAS.get_num_threads() begin + ret = qr(A; tol) + end + + return ret.Q, ret.R, ret.prow, ret.pcol, rank(ret) +end + +""" + for_non_zeros_batch(f::Function, A::SparseMatrixCSC, max_col::Int) -> Nothing + +Iterate over the nonzero entries of the first `max_col` columns of `A`, calling +`f(values, rows, col)` once per nonempty column. + +`values` and `rows` are views into the CSC storage of `A` for that column, so +the callback must not retain them beyond the call. +""" +function for_non_zeros_batch(f::Function, A::SparseMatrixCSC, max_col::Int)::Nothing + @assert max_col <= size(A, 2) "$max_col, $(size(A, 2))" + + rows = rowvals(A) + vals = nonzeros(A) + for col in 1:max_col + range = nzrange(A, col) + isempty(range) && continue + f((@view vals[range]), (@view rows[range]), col) + end +end + +""" + for_non_zeros_batch(f::Function, Q::SparseArrays.SPQR.QRSparseQ, max_col::Int) -> Nothing + +Iterate over the first `max_col` columns of a sparse SPQR `Q` factor, forming +each column explicitly and calling `f(column, col)`. The dense column workspace +is reused between calls, so the callback must copy it if it needs to keep it. +""" +function for_non_zeros_batch( + f::Function, Q::SparseArrays.SPQR.QRSparseQ, max_col::Int +)::Nothing + @assert max_col <= size(Q, 2) "$max_col, $(size(Q, 2))" + + function get_column!(Q::SparseArrays.SPQR.QRSparseQ, col::Int, res::Vector) + res .= 0 + res[col] = 1 + + for l in size(Q.factors, 2):-1:1 + τl = -Q.τ[l] + h = view(Q.factors, :, l) + axpy!(τl*dot(h, res), h, res) + end + + return res + end + + res = zeros(eltype(Q), size(Q, 1)) + for col in 1:max_col + get_column!(Q, col, res) + f(res, col) + end +end + +""" + process_qr( + matrix_of_cc, rank_of_cc, next_edges_of_cc, g, ccs, n, sites, tol, absolute_tol, op_cache_vec + ) -> Nothing + +Process every connected component using the sparse-QR path. + +Single-left-vertex components use `process_single_left_vertex_cc!`; larger +components are extracted as sparse weighted adjacency matrices, decomposed with +`sparse_qr`, and translated into local tensor blocks plus outgoing graph edges. +On the final site, the scalar `R` factor is applied directly to the local +blocks. +""" +@timeit function process_qr( + matrix_of_cc::Vector{BlockSparseMatrix{ValType}}, + rank_of_cc::Vector{Int}, + next_edges_of_cc::Vector{Matrix{Tuple{Vector{Int},Vector{C}}}}, + g::MPOGraph{N,C,Ti}, + ccs::BipartiteGraphConnectedComponents, + n::Int, + sites::Vector{<:Index}, + tol::Real, + absolute_tol::Bool, + op_cache_vec::OpCacheVec, +)::Nothing where {ValType<:Number,N,C,Ti} + Threads.@threads for cc in 1:num_connected_components(ccs) + ## A specialization for when there is only one vertex on the left. This is + ## a very common case that can be sped up significantly. + if left_size(ccs, cc) == 1 + process_single_left_vertex_cc!( + matrix_of_cc, rank_of_cc, next_edges_of_cc, g, ccs, cc, n, sites, op_cache_vec + ) + continue + end + + matrix = matrix_of_cc[cc] + W, left_map, right_map = get_cc_matrix(g, ccs, cc; clear_edges=true) + + ## Compute the decomposition and then free W + Q, R, prow, pcol, rank = sparse_qr(W, tol, absolute_tol) + W = nothing + + rank_of_cc[cc] = rank + + ## Form the local transformation tensor. + for_non_zeros_batch(Q, rank) do weights::AbstractVector{C}, m::Int + for (i, weight) in enumerate(weights) + weight == 0 && continue + + ## TODO: This allocates for some reason + lv = left_vertex(g, left_map[prow[i]]) + local_op = op_cache_vec[n][lv.op_id].matrix + + matrix_element = get!(matrix, (lv.link, m)) do + return zeros(ValType, dim(sites[n]), dim(sites[n])) + end + + add_to_local_matrix!(matrix_element, weight, local_op, lv.needs_JW_string) + end + end + + ## Q and prow are no longer needed. + Q = nothing + prow = nothing + + ## If we are at the last site, then R will be a 1x1 matrix containing an overall scaling. + if n == length(sites) + scaling = only(R) + for block in values(matrix) + block .*= scaling + end + + continue + end + + next_edges_size = zeros(Int, rank, length(op_cache_vec[n + 1])) + rv_id_lookup = Vector{Int}(undef, size(R, 2)) + op_id_lookup = Vector{Ti}(undef, size(R, 2)) + + for_non_zeros_batch( + R, length(right_map) + ) do _::AbstractVector{C}, ms::AbstractVector{Int}, j::Int + ## Convert j, which has been permuted first by the connected components + ## and then again by SPQR into a right vertex Id. + rv_id = right_map[pcol[j]] + + ## Get the operator acting on site (n + 1) of this right vertex. + op_id = get_onsite_op(right_vertex(g, rv_id), n + 1) + + rv_id_lookup[j] = rv_id + op_id_lookup[j] = op_id + + for m in ms + next_edges_size[m, op_id] += 1 + end + end + + ## Build the graph for the next site out of this component. + next_edges = Matrix{Tuple{Vector{Int},Vector{C}}}( + undef, rank, length(op_cache_vec[n + 1]) + ) + for i in eachindex(next_edges) + right_vertex_ids = Int[] + edge_weights = C[] + sizehint!(right_vertex_ids, next_edges_size[i]) + sizehint!(edge_weights, next_edges_size[i]) + next_edges[i] = (right_vertex_ids, edge_weights) + end + + for_non_zeros_batch( + R, length(right_map) + ) do weights::AbstractVector{C}, ms::AbstractVector{Int}, j::Int + rv_id = rv_id_lookup[j] + op_id = op_id_lookup[j] + + ## Add the edges. + for (m, weight) in zip(ms, weights) + next_right_vertex_ids, next_edge_weights = next_edges[m, op_id] + push!(next_right_vertex_ids, rv_id) + push!(next_edge_weights, weight) + end + end + + next_edges_of_cc[cc] = next_edges + end + + return nothing +end + diff --git a/src/large-graph-mpo-vertex-cover.jl b/src/large-graph-mpo-vertex-cover.jl new file mode 100644 index 0000000..2909a5d --- /dev/null +++ b/src/large-graph-mpo-vertex-cover.jl @@ -0,0 +1,161 @@ +""" + process_vertex_cover!( + matrix_of_cc, rank_of_cc, next_edges_of_cc, g, ccs, n, sites, op_cache_vec + ) -> Nothing + +Process every connected component using the minimum-vertex-cover specialization. + +For each component, the cover columns are laid out as `[left_cover; right_cover]` +in the local bond dimension. Left-cover columns emit single local operators with +unit graph weight; right-cover columns accumulate uncovered edges with their +stored edge weights. On the final site, only the local tensor blocks are needed; +otherwise this also builds `next_edges_of_cc` for the graph at site `n + 1`. +""" +@timeit function process_vertex_cover!( + matrix_of_cc::Vector{BlockSparseMatrix{ValType}}, + rank_of_cc::Vector{Int}, + next_edges_of_cc::Vector{Matrix{Tuple{Vector{Int},Vector{C}}}}, + g::MPOGraph{N,C,Ti}, + ccs::BipartiteGraphConnectedComponents, + n::Int, + sites::Vector{<:Index}, + op_cache_vec::OpCacheVec, +)::Nothing where {ValType<:Number,N,C,Ti} + site_dim = dim(sites[n]) + op_cache = op_cache_vec[n] + + next_op_of_rv_id = Ti[] + if n != length(sites) + resize!(next_op_of_rv_id, right_size(g)) + Threads.@threads for rv_id in 1:right_size(g) + next_op_of_rv_id[rv_id] = get_onsite_op(right_vertex(g, rv_id), n + 1) + end + end + + # TODO: Consider nested multithreading + Threads.@threads for cc in 1:num_connected_components(ccs) + matrix = matrix_of_cc[cc] + lvs_of_component::Vector{Int} = ccs.lvs_of_component[cc] + position_of_rvs_in_component = ccs.position_of_rvs_in_component + rv_size_of_component = ccs.rv_size_of_component[cc] + + ## No idea why, but these need to be typed or allocations go nuts. + left_cover::Vector{Int}, right_cover::Vector{Int} = minimum_vertex_cover(g, ccs, cc) + + rank = length(left_cover) + length(right_cover) + rank_of_cc[cc] = rank + + ## Construct the tensor from the left cover. + @inbounds for m in eachindex(left_cover) + lv_id = lvs_of_component[left_cover[m]] + lv = left_vertex(g, lv_id) + local_op = op_cache[lv.op_id].matrix + + matrix_element = zeros(ValType, site_dim, site_dim) + add_to_local_matrix!(matrix_element, one(ValType), local_op, lv.needs_JW_string) + matrix[lv.link, m] = matrix_element + end + + ## Construct the tensor from the right cover. + # TODO: Merge this loop with the one above, using `in_left_cover` + let + in_left_cover = falses(length(lvs_of_component)) + @inbounds for local_id in left_cover + in_left_cover[local_id] = true + end + + uncovered_left_ids = Vector{Int}(undef, length(lvs_of_component) - length(left_cover)) + next_uncovered = 1 + @inbounds for local_id in eachindex(lvs_of_component) + if !in_left_cover[local_id] + uncovered_left_ids[next_uncovered] = lvs_of_component[local_id] + next_uncovered += 1 + end + end + + right_cover_m = Vector{Int}(undef, rv_size_of_component) + @inbounds for (m, local_rv) in enumerate(right_cover) + right_cover_m[local_rv] = length(left_cover) + m + end + + @inbounds for lv_id in uncovered_left_ids + lv = left_vertex(g, lv_id) + local_op = op_cache[lv.op_id].matrix + + for (rv_id, weight) in weighted_edge_iterator(g, lv_id) + m = right_cover_m[position_of_rvs_in_component[rv_id]] + + matrix_element = get!(matrix, (lv.link, m)) do + zeros(ValType, site_dim, site_dim) + end + + add_to_local_matrix!(matrix_element, weight, local_op, lv.needs_JW_string) + end + end + end + + n == length(sites) && continue + + ## Preallocate space for next_edges + next_edges = Matrix{Tuple{Vector{Int},Vector{C}}}( + undef, rank, length(op_cache_vec[n + 1]) + ) + let + next_edge_sizes = zeros(Int, rank, length(op_cache_vec[n + 1])) + @inbounds for m in eachindex(left_cover) + lv_id = lvs_of_component[left_cover[m]] + for rv_id in g.right_vertex_ids_from_left[lv_id] + op_id = next_op_of_rv_id[rv_id] + next_edge_sizes[m, op_id] += 1 + end + end + + @inbounds for i in eachindex(next_edges) + n_edges = next_edge_sizes[i] + next_edges[i] = sizehint!(Int[], n_edges), sizehint!(C[], n_edges) + end + end + + ## Construct next_edges for the left_cover + @inbounds for m in eachindex(left_cover) + lv_id = lvs_of_component[left_cover[m]] + for (rv_id, weight) in weighted_edge_iterator(g, lv_id) + op_id = next_op_of_rv_id[rv_id] + + next_right_vertex_ids, next_edge_weights = next_edges[m, op_id] + push!(next_right_vertex_ids, rv_id) + push!(next_edge_weights, weight) + end + end + + ## Construct next_edges for the right_cover + let + rvs_of_component = Vector{Int}(undef, rv_size_of_component) + @inbounds for lv_id in lvs_of_component + right_vertex_ids = g.right_vertex_ids_from_left[lv_id] + for edge_id in eachindex(right_vertex_ids) + rv_id = right_vertex_ids[edge_id] + rvs_of_component[position_of_rvs_in_component[rv_id]] = rv_id + end + end + + @inbounds for m in eachindex(right_cover) + rv_id = rvs_of_component[right_cover[m]] + m += length(left_cover) + + op_id = next_op_of_rv_id[rv_id] + next_right_vertex_ids, next_edge_weights = next_edges[m, op_id] + resize!(next_right_vertex_ids, 1) + resize!(next_edge_weights, 1) + + next_right_vertex_ids[1] = rv_id + next_edge_weights[1] = one(C) + end + end + + next_edges_of_cc[cc] = next_edges + end + + return nothing +end +