m3g · lmiq · Apr 9, 2026 · Apr 9, 2026 · Apr 9, 2026 · Apr 9, 2026
diff --git a/src/ComplexMixtures.jl b/src/ComplexMixtures.jl
@@ -79,6 +79,7 @@ include("./parallel_setup.jl")
 include("./mddf.jl")
 
 # Tools
+include("./tools/merge.jl")
 include("./tools/contributions.jl")
 include("./tools/residue_contributions.jl")
 include("./tools/coordination_number.jl")

diff --git a/src/results.jl b/src/results.jl
@@ -86,12 +86,14 @@ The Result{Vector{Float64}} parametric type is necessary only for reading the JS
 
     # Group (atomic type by default) contributions to 
     # the coordination number counts. These are used to
-    # compute group contributions to the MDDFs
+    # compute group contributions to the MDDFs and KBIs
     # Note: These could be Matrix{Float64}, but for the convenience
     # of using JSON3, we use Vector{Vector{Float64}}, which is 
     # read directly.
     solute_group_count::Vector{Vector{Float64}}
     solvent_group_count::Vector{Vector{Float64}}
+    solute_group_count_random::Vector{Vector{Float64}}
+    solvent_group_count_random::Vector{Vector{Float64}}
 
     # Data to compute a RDF and the KB integral from this count
     rdf_count::Vector{Float64} = zeros(nbins)
@@ -162,6 +164,8 @@ function Result(
     # Initialize group count arrays
     solute_group_count = [zeros(nbins) for _ in 1:trajectory_data.n_groups_solute]
     solvent_group_count = [zeros(nbins) for _ in 1:trajectory_data.n_groups_solvent]
+    solute_group_count_random = [zeros(nbins) for _ in 1:trajectory_data.n_groups_solute]
+    solvent_group_count_random = [zeros(nbins) for _ in 1:trajectory_data.n_groups_solvent]
 
     return Result(
         nbins=nbins,
@@ -172,6 +176,8 @@ function Result(
         solvent=trajectory.solvent,
         solute_group_count=solute_group_count,
         solvent_group_count=solvent_group_count,
+        solute_group_count_random=solute_group_count_random,
+        solvent_group_count_random=solvent_group_count_random,
         files=[
             TrajectoryFileOptions(
                 filename=trajectory.filename,
@@ -325,10 +331,13 @@ function _mddf_final_results!(R::Result, options::Options)
     # Scale counters by number of samples and frames
     @. R.md_count = R.md_count / (R.solute.nmols * Q)
     @. R.solute_group_count = R.solute_group_count / (R.solute.nmols * Q)
+    @. R.solute_group_count_random = R.solute_group_count_random / (samples.random * Q)
     if R.autocorrelation
         R.solvent_group_count .= R.solute_group_count
+        R.solvent_group_count_random .= R.solute_group_count_random 
     else
         @. R.solvent_group_count = R.solvent_group_count / (R.solute.nmols * Q)
+        @. R.solvent_group_count_random = R.solvent_group_count_random / (samples.random * Q)
     end
     @. R.md_count_random = R.md_count_random / (samples.random * Q)
     @. R.rdf_count = R.rdf_count / (R.solute.nmols * Q)
@@ -367,8 +376,10 @@ function _mddf_final_results!(R::Result, options::Options)
 end
 
 function renormalize!(R::Result, density_fix::Number; silent)
-    R.md_count_random .= density_fix * R.md_count_random
-    R.rdf_count_random .= density_fix * R.rdf_count_random
+    R.md_count_random .*= density_fix 
+    R.rdf_count_random .*= density_fix
+    R.solute_group_count_random .*= density_fix
+    R.solvent_group_count_random .*= density_fix
 
     # Computing the distribution functions and KB integrals, from the MDDF and from the RDF
     #
@@ -457,226 +468,6 @@ function _coordination_number_final_results!(R::Result, options::Options)
     return R
 end
 
-"""
-    merge(r::Vector{Result})
-
-This function merges the results of MDDF calculations obtained by running the same
-analysis on multiple trajectories, or multiple parts of the same trajectory. It returns
-a Result structure of the same type, with all the functions and counters representing averages
-of the set provided weighted by the number of frames read in each Result set.
-
-"""
-function Base.merge(results::Vector{<:Result})
-    cannot_merge = false
-    nframes_read = 0
-    for ir in eachindex(results)
-        for file in results[ir].files
-            nframes_read += file.nframes_read
-        end
-        for jr in ir+1:lastindex(results)
-            if results[ir].nbins != results[jr].nbins
-                println(
-                    "ERROR: To merge Results, the number of bins of the histograms of both sets must be the same.",
-                )
-                cannot_merge = true
-            end
-            if !(results[ir].cutoff ≈ results[ir].cutoff)
-                println(
-                    "ERROR: To merge Results, cutoff distance of the of the histograms of both sets must be the same.",
-                )
-                cannot_merge = true
-            end
-            if (results[ir].solute != results[jr].solute) || (results[ir].solvent != results[jr].solvent)
-                println(
-                    "ERROR: To merge Results, the solute and solvent selections of both sets must be the same.",
-                )
-                cannot_merge = true
-            end
-        end
-    end
-    if cannot_merge
-        throw(ArgumentError(" Incompatible set of results to merge. "))
-    end
-
-    # Total number of frames of all results, and total weight of frames
-    nfiles = 0
-    ntot_frames = 0
-    tot_frame_weight = 0.0
-    for result in results
-        nfiles += length(result.files)
-        ntot_frames += sum(file.nframes_read for file in result.files)
-        tot_frame_weight += sum_frame_weights(result)
-    end
-
-    # Compute weight of each file, given the number frames. Also compute 
-    # the weight of the data of each file, given the weights of the frames
-    weights = zeros(nfiles)
-    ifile = 0
-    for result in results
-        for file in result.files
-            ifile += 1
-            weights[ifile] = file.nframes_read / ntot_frames
-        end
-    end
-
-    # Append all TrajectoryFileOptions to single vector
-    files = TrajectoryFileOptions[]
-    for result in results
-        for file in result.files
-            push!(files, file)
-        end
-    end
-
-    # Initialize group counts
-    solute_group_count = [zeros(results[1].nbins) for _ in 1:length(results[1].solute_group_count)]
-    solvent_group_count = [zeros(results[1].nbins) for _ in 1:length(results[1].solvent_group_count)]
-
-    # Structure for merged results
-    R = Result(
-        nbins=results[1].nbins,
-        dbulk=results[1].dbulk,
-        cutoff=results[1].cutoff,
-        autocorrelation=results[1].autocorrelation,
-        solute=results[1].solute,
-        solvent=results[1].solvent,
-        solute_group_count=solute_group_count,
-        solvent_group_count=solvent_group_count,
-        files=files,
-        weights=weights,
-    )
-
-    # Average results weighting the data considering the weights of the frames of each data set
-    warn = false
-    @. R.d = results[1].d
-    for ifile in eachindex(results)
-        result = results[ifile]
-        w = sum_frame_weights(result) / tot_frame_weight
-        if !(w ≈ R.weights[ifile]) && !warn
-            warn = true
-            @warn begin
-                """\n
-                    Frame weights and file weights differ, because crustom frame weights were provided.
-
-                """
-            end _file = nothing _line = nothing
-        end
-        @. R.mddf += w * result.mddf
-        @. R.kb += w * result.kb
-        @. R.rdf += w * result.rdf
-        @. R.kb_rdf += w * result.kb_rdf
-        @. R.md_count += w * result.md_count
-        @. R.md_count_random += w * result.md_count_random
-        @. R.coordination_number += w * result.coordination_number
-        @. R.coordination_number_random += w * result.coordination_number_random
-        for i in eachindex(R.solute_group_count, result.solute_group_count)
-            @. R.solute_group_count[i] += w * result.solute_group_count[i]
-        end
-        for i in eachindex(R.solvent_group_count, result.solvent_group_count)
-            @. R.solvent_group_count[i] += w * result.solvent_group_count[i]
-        end
-        @. R.rdf_count += w * result.rdf_count
-        @. R.rdf_count_random += w * result.rdf_count_random
-        @. R.sum_rdf_count += w * result.sum_rdf_count
-        @. R.sum_rdf_count_random += w * result.sum_rdf_count_random
-        R.density.solute += w * result.density.solute
-        R.density.solvent += w * result.density.solvent
-        R.density.solvent_bulk += w * result.density.solvent_bulk
-        R.volume.total += w * result.volume.total
-        R.volume.bulk += w * result.volume.bulk
-        R.volume.domain += w * result.volume.domain
-        R.volume.shell += w * result.volume.shell
-    end
-    return R
-end
-
-@testitem "merge" begin
-    using ComplexMixtures: mddf, merge
-    using PDBTools: read_pdb, select, selindex
-    using ComplexMixtures: data_dir
-
-    # Test simple three-molecule system
-    atoms = read_pdb("$data_dir/toy/cross.pdb")
-    protein = AtomSelection(select(atoms, "protein and model 1"), nmols=1)
-    water = AtomSelection(select(atoms, "resname WAT and model 1"), natomspermol=3)
-    traj = Trajectory("$data_dir/toy/cross.pdb", protein, water, format="PDBTraj")
-
-    options = Options(
-        seed=321,
-        StableRNG=true,
-        nthreads=1,
-        silent=true,
-        n_random_samples=10^5,
-        lastframe=1,
-    )
-    R1 = mddf(traj, options)
-
-    options = Options(
-        seed=321,
-        StableRNG=true,
-        nthreads=1,
-        silent=true,
-        n_random_samples=10^5,
-        firstframe=2,
-    )
-    R2 = mddf(traj, options)
-
-    R = merge([R1, R2])
-    @test R.volume.total == 27000.0
-    @test R.volume.domain ≈ R.volume.total - R.volume.bulk
-    @test isapprox(R.volume.domain, (4π / 3) * R.dbulk^3; rtol=0.01)
-    @test R.density.solute ≈ 1 / R.volume.total
-    @test R.density.solvent ≈ 3 / R.volume.total
-    @test R.density.solvent_bulk ≈ 2 / R.volume.bulk
-    @test R.weights == [0.5, 0.5]
-
-    # Test loading a saved merged file
-    dir = mktempdir()
-    save(R, "$dir/merged.json")
-    R_save = load("$dir/merged.json")
-    @test isapprox(R, R_save, debug=true)
-
-    # Test merging files for which weights are provided for the frames
-    R2 = mddf(traj, options, frame_weights=[0.0, 2.0])
-    @test R.weights == [0.5, 0.5]
-    @test length(R.files) == 2
-
-    # Two-atom system
-    at1 = AtomSelection([1], nmols=1)
-    at2 = AtomSelection([2], nmols=1)
-    traj = Trajectory("$data_dir/toy/self_monoatomic.pdb", at1, at2, format="PDBTraj")
-    R1 = mddf(traj, Options(lastframe=1))
-    @test sum(R1.md_count) == 1
-    R2 = mddf(traj, Options(firstframe=2))
-    @test sum(R2.md_count) == 0
-    R = merge([R1, R2])
-    @test R.weights == [0.5, 0.5]
-    @test sum(R.md_count) == 0.5
-    R1 = mddf(traj, Options(lastframe=1), frame_weights=[2.0])
-    @test sum(R1.md_count) == 1
-    R = merge([R1, R2])
-    @test sum(R.md_count) == 2 / 3
-    @test sum(sum.(R1.solute_group_count)) == 1
-    @test sum(sum.(R1.solvent_group_count)) == 1
-    @test sum(sum.(R2.solute_group_count)) == 0
-    @test sum(sum.(R2.solvent_group_count)) == 0
-    @test sum(sum.(R.solute_group_count)) == 2 / 3
-    @test sum(sum.(R.solvent_group_count)) == 2 / 3
-
-    # Test throwing merging incompatible results
-    protein = AtomSelection(select(atoms, "protein and model 1"), nmols=1)
-    traj = Trajectory("$data_dir/toy/cross.pdb", protein, water, format="PDBTraj")
-    R1 = mddf(traj, options)
-    protein = AtomSelection(
-        select(atoms, "protein and model 1"), nmols=1,
-        group_names=["acidic"],
-        group_atom_indices=[selindex(atoms, "protein and acidic")]
-    )
-    traj = Trajectory("$data_dir/toy/cross.pdb", protein, water, format="PDBTraj")
-    R2 = mddf(traj, options)
-
-
-end
-
 @testitem "Result - empty" begin
     #using ComplexMixtures: Result, Trajectory, Options, AtomSelection
     using ComplexMixtures: data_dir, pdb_file_example
@@ -838,9 +629,11 @@ function sum!(R1::Result, R2::Result)
 
     for i in eachindex(R1.solute_group_count)
         @. R1.solute_group_count[i] += R2.solute_group_count[i]
+        @. R1.solute_group_count_random[i] += R2.solute_group_count_random[i]
     end
     for i in eachindex(R1.solvent_group_count)
         @. R1.solvent_group_count[i] += R2.solvent_group_count[i]
+        @. R1.solvent_group_count_random[i] += R2.solvent_group_count_random[i]
     end
 
     @. R1.rdf_count += R2.rdf_count
@@ -850,7 +643,6 @@ function sum!(R1::Result, R2::Result)
 
     return R1
 end
-
 #=
     title(R::Result, solute::AtomSelection, solvent::AtomSelection)
     title(R::Result, solute::AtomSelection, solvent::AtomSelection, nspawn::Int)