From 316557f77d256200cb9dcfb3f93d47c975d39146 Mon Sep 17 00:00:00 2001 From: LasNikas Date: Tue, 13 Jan 2026 09:16:59 +0100 Subject: [PATCH 01/24] add sorting callback --- src/TrixiParticles.jl | 2 +- src/callbacks/callbacks.jl | 1 + src/callbacks/sorting.jl | 121 +++++++++++++++++++++++++++++++++++++ src/general/buffer.jl | 10 +++ 4 files changed, 133 insertions(+), 1 deletion(-) create mode 100644 src/callbacks/sorting.jl diff --git a/src/TrixiParticles.jl b/src/TrixiParticles.jl index 68e1e87a5a..74802b5790 100644 --- a/src/TrixiParticles.jl +++ b/src/TrixiParticles.jl @@ -69,7 +69,7 @@ export WeaklyCompressibleSPHSystem, EntropicallyDampedSPHSystem, TotalLagrangian export BoundaryZone, InFlow, OutFlow, BidirectionalFlow export InfoCallback, SolutionSavingCallback, DensityReinitializationCallback, PostprocessCallback, StepsizeCallback, UpdateCallback, SteadyStateReachedCallback, - SplitIntegrationCallback + SplitIntegrationCallback, SortingCallback export ContinuityDensity, SummationDensity export PenaltyForceGanzenmueller, TransportVelocityAdami, ParticleShiftingTechnique, ParticleShiftingTechniqueSun2017, ConsistentShiftingSun2019, diff --git a/src/callbacks/callbacks.jl b/src/callbacks/callbacks.jl index b1bd602269..510eaef3db 100644 --- a/src/callbacks/callbacks.jl +++ b/src/callbacks/callbacks.jl @@ -33,3 +33,4 @@ include("stepsize.jl") include("update.jl") include("steady_state_reached.jl") include("split_integration.jl") +include("sorting.jl") diff --git a/src/callbacks/sorting.jl b/src/callbacks/sorting.jl new file mode 100644 index 0000000000..d1b6d7de03 --- /dev/null +++ b/src/callbacks/sorting.jl @@ -0,0 +1,121 @@ +struct SortingCallback{I} + interval::I +end + +""" + SortingCallback(; interval::Integer=-1, dt=0.0) + +# Keywords +- `interval=1`: Sort particles at the end of every `interval` time steps. +- `dt`: Sort particles in regular intervals of `dt` in terms of integration time + by adding additional `tstops` (note that this may change the solution). +""" +function SortingCallback(; interval::Integer=-1, dt=0.0) + if dt > 0 && interval !== -1 + throw(ArgumentError("Setting both interval and dt is not supported!")) + end + + # Update in intervals in terms of simulation time + if dt > 0 + interval = Float64(dt) + + # Update every time step (default) + elseif interval == -1 + interval = 1 + end + + sorting_callback! = SortingCallback(interval) + + if dt > 0 + # Add a `tstop` every `dt` + return PeriodicCallback(sorting_callback!, dt, + initialize=(initial_sort!), + save_positions=(false, false)) + else + # The first one is the `condition`, the second the `affect!` + return DiscreteCallback(sorting_callback!, sorting_callback!, + initialize=(initial_sort!), + save_positions=(false, false)) + end +end + +# `initialize` +function initial_sort!(cb, u, t, integrator) + # The `SortingCallback` is either `cb.affect!` (with `DiscreteCallback`) + # or `cb.affect!.affect!` (with `PeriodicCallback`). + # Let recursive dispatch handle this. + + initial_sort!(cb.affect!, u, t, integrator) +end + +function initial_sort!(cb::SortingCallback, u, t, integrator) + return cb(integrator) +end + +# `condition` +function (sorting_callback!::SortingCallback)(u, t, integrator) + (; interval) = sorting_callback! + + return condition_integrator_interval(integrator, interval) +end + +# `affect!` +function (sorting_callback!::SortingCallback)(integrator) + semi = integrator.p + v_ode, u_ode = integrator.u.x + + @trixi_timeit timer() "sorting callback" begin + foreach_system(semi) do system + v = wrap_v(v_ode, system, semi) + u = wrap_u(u_ode, system, semi) + + sort_particles!(system, v, u, semi) + end + end + + # Tell OrdinaryDiffEq that `integrator.u` has been modified + u_modified!(integrator, true) + + return integrator +end + +sort_particles!(system, v, u, semi) = system + +function sort_particles!(system::AbstractFluidSystem, v, u, semi) + nhs = get_neighborhood_search(system, semi) + cell_list = nhs.cell_list + + sort_particles!(system, v, u, nhs, cell_list, semi) +end + +# TODO: Sort also masses and particle spacings for varialble smoothing lengths. +function sort_particles!(system::AbstractFluidSystem, v, u, nhs, + cell_list::FullGridCellList, semi) + # Note that the following contain also inactive particles + system_coords = current_coordinates(u, system) + system_velocity = current_velocity(v, system) + system_density = current_density(v, system) + system_pressure = current_pressure(v, system) + + + cell_ids = zeros(Int, nparticles(system)) + @threaded semi for particle in eachparticle(system) + point_coords = current_coords(u, system, particle) + cell_ids[particle] = PointNeighbors.cell_index(cell_list, + PointNeighbors.cell_coords(point_coords, + nhs)) + end + + perm = sortperm(cell_ids) + + system_coords .= system_coords[:, perm] + system_velocity .= system_velocity[:, perm] + system_pressure .= system_pressure[perm] + system_density .= system_density[perm] + + sort_buffer!(system.buffer, perm, semi) + + return system +end + +sort_buffer!(buffer, perm, semi) = buffer diff --git a/src/general/buffer.jl b/src/general/buffer.jl index 1e2c196799..5df76a5ae1 100644 --- a/src/general/buffer.jl +++ b/src/general/buffer.jl @@ -80,3 +80,13 @@ end return system end + +function sort_buffer!(buffer::SystemBuffer, perm, semi) + (; active_particle) = buffer + + active_particle .= active_particle[perm] + + update_system_buffer!(buffer, semi) + + return buffer +end From ce11e315d26d61cbd53ff423a0ed8570dbedbdf2 Mon Sep 17 00:00:00 2001 From: LasNikas Date: Tue, 13 Jan 2026 09:21:14 +0100 Subject: [PATCH 02/24] fix typo --- src/callbacks/sorting.jl | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/callbacks/sorting.jl b/src/callbacks/sorting.jl index d1b6d7de03..7d85eec382 100644 --- a/src/callbacks/sorting.jl +++ b/src/callbacks/sorting.jl @@ -15,11 +15,11 @@ function SortingCallback(; interval::Integer=-1, dt=0.0) throw(ArgumentError("Setting both interval and dt is not supported!")) end - # Update in intervals in terms of simulation time + # Sort in intervals in terms of simulation time if dt > 0 interval = Float64(dt) - # Update every time step (default) + # Sort every time step (default) elseif interval == -1 interval = 1 end @@ -88,7 +88,7 @@ function sort_particles!(system::AbstractFluidSystem, v, u, semi) sort_particles!(system, v, u, nhs, cell_list, semi) end -# TODO: Sort also masses and particle spacings for varialble smoothing lengths. +# TODO: Sort also masses and particle spacings for variable smoothing lengths. function sort_particles!(system::AbstractFluidSystem, v, u, nhs, cell_list::FullGridCellList, semi) # Note that the following contain also inactive particles From 42abb9d920178be4573d817ca5053c1393575f62 Mon Sep 17 00:00:00 2001 From: LasNikas Date: Tue, 13 Jan 2026 09:21:44 +0100 Subject: [PATCH 03/24] apply formatter --- src/callbacks/sorting.jl | 1 - 1 file changed, 1 deletion(-) diff --git a/src/callbacks/sorting.jl b/src/callbacks/sorting.jl index 7d85eec382..630bb18424 100644 --- a/src/callbacks/sorting.jl +++ b/src/callbacks/sorting.jl @@ -97,7 +97,6 @@ function sort_particles!(system::AbstractFluidSystem, v, u, nhs, system_density = current_density(v, system) system_pressure = current_pressure(v, system) - cell_ids = zeros(Int, nparticles(system)) @threaded semi for particle in eachparticle(system) point_coords = current_coords(u, system, particle) From ab56ed40d2985ccfd988653b6f540b569b8bab71 Mon Sep 17 00:00:00 2001 From: LasNikas Date: Tue, 13 Jan 2026 10:09:28 +0100 Subject: [PATCH 04/24] move inactive particles to end --- src/callbacks/sorting.jl | 24 +++++++++++++----------- src/general/buffer.jl | 28 +++++++++++++++++++++++++--- 2 files changed, 38 insertions(+), 14 deletions(-) diff --git a/src/callbacks/sorting.jl b/src/callbacks/sorting.jl index 630bb18424..057de4d031 100644 --- a/src/callbacks/sorting.jl +++ b/src/callbacks/sorting.jl @@ -91,14 +91,8 @@ end # TODO: Sort also masses and particle spacings for variable smoothing lengths. function sort_particles!(system::AbstractFluidSystem, v, u, nhs, cell_list::FullGridCellList, semi) - # Note that the following contain also inactive particles - system_coords = current_coordinates(u, system) - system_velocity = current_velocity(v, system) - system_density = current_density(v, system) - system_pressure = current_pressure(v, system) - cell_ids = zeros(Int, nparticles(system)) - @threaded semi for particle in eachparticle(system) + @threaded semi for particle in each_active_particle(system) point_coords = current_coords(u, system, particle) cell_ids[particle] = PointNeighbors.cell_index(cell_list, PointNeighbors.cell_coords(point_coords, @@ -107,14 +101,22 @@ function sort_particles!(system::AbstractFluidSystem, v, u, nhs, perm = sortperm(cell_ids) + sort_system!(system, v, u, perm, system.buffer) + + return system +end + +function sort_system!(system, v, u, perm, buffer::Nothing) + # Note that the following contain also inactive particles + system_coords = current_coordinates(u, system) + system_velocity = current_velocity(v, system) + system_density = current_density(v, system) + system_pressure = current_pressure(v, system) + system_coords .= system_coords[:, perm] system_velocity .= system_velocity[:, perm] system_pressure .= system_pressure[perm] system_density .= system_density[perm] - sort_buffer!(system.buffer, perm, semi) - return system end - -sort_buffer!(buffer, perm, semi) = buffer diff --git a/src/general/buffer.jl b/src/general/buffer.jl index 5df76a5ae1..efdb122541 100644 --- a/src/general/buffer.jl +++ b/src/general/buffer.jl @@ -81,12 +81,34 @@ end return system end -function sort_buffer!(buffer::SystemBuffer, perm, semi) +function sort_system!(system, v, u, perm, buffer::SystemBuffer) (; active_particle) = buffer - active_particle .= active_particle[perm] + # Note that the following contain also inactive particles + system_coords = current_coordinates(u, system) + system_velocity = current_velocity(v, system) + system_density = current_density(v, system) + system_pressure = current_pressure(v, system) - update_system_buffer!(buffer, semi) + # First permutation: sort by desired `perm` + active_particle_sorted = active_particle[perm] + + # Second permutation: move inactive particles to the end (true first, false last) + perm2 = sortperm(active_particle_sorted, rev=true) + + # Combined permutation + combined_perm = perm[perm2] + + # Apply to all data + active_particle .= active_particle_sorted[perm2] + system_coords .= system_coords[:, combined_perm] + system_velocity .= system_velocity[:, combined_perm] + system_pressure .= system_pressure[combined_perm] + system_density .= system_density[combined_perm] + + # Update buffer + buffer.active_particle_count[] = count(active_particle) + buffer.eachparticle[1:buffer.active_particle_count[]] .= 1:buffer.active_particle_count[] return buffer end From 4f088d38b7609726b78f5ed2f90beb688785e8f1 Mon Sep 17 00:00:00 2001 From: LasNikas Date: Tue, 13 Jan 2026 10:33:00 +0100 Subject: [PATCH 05/24] add show --- src/callbacks/sorting.jl | 43 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 43 insertions(+) diff --git a/src/callbacks/sorting.jl b/src/callbacks/sorting.jl index 057de4d031..79cb610c1d 100644 --- a/src/callbacks/sorting.jl +++ b/src/callbacks/sorting.jl @@ -120,3 +120,46 @@ function sort_system!(system, v, u, perm, buffer::Nothing) return system end + +function Base.show(io::IO, cb::DiscreteCallback{<:Any, <:SortingCallback}) + @nospecialize cb # reduce precompilation time + print(io, "SortingCallback(interval=", cb.affect!.interval, ")") +end + +function Base.show(io::IO, + cb::DiscreteCallback{<:Any, + <:PeriodicCallbackAffect{<:SortingCallback}}) + @nospecialize cb # reduce precompilation time + print(io, "SortingCallback(dt=", cb.affect!.affect!.interval, ")") +end + +function Base.show(io::IO, ::MIME"text/plain", + cb::DiscreteCallback{<:Any, <:SortingCallback}) + @nospecialize cb # reduce precompilation time + + if get(io, :compact, false) + show(io, cb) + else + sorting_cb = cb.affect! + setup = [ + "interval" => sorting_cb.interval + ] + summary_box(io, "SortingCallback", setup) + end +end + +function Base.show(io::IO, ::MIME"text/plain", + cb::DiscreteCallback{<:Any, + <:PeriodicCallbackAffect{<:SortingCallback}}) + @nospecialize cb # reduce precompilation time + + if get(io, :compact, false) + show(io, cb) + else + sorting_cb = cb.affect!.affect! + setup = [ + "dt" => sorting_cb.interval + ] + summary_box(io, "SortingCallback", setup) + end +end From 398daf0a8b20ef46b381d7a2cab240a4d9860525 Mon Sep 17 00:00:00 2001 From: LasNikas Date: Tue, 13 Jan 2026 14:36:44 +0100 Subject: [PATCH 06/24] gpu compatible --- src/callbacks/sorting.jl | 4 ++-- src/general/buffer.jl | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/callbacks/sorting.jl b/src/callbacks/sorting.jl index 79cb610c1d..ce0925cd85 100644 --- a/src/callbacks/sorting.jl +++ b/src/callbacks/sorting.jl @@ -91,7 +91,7 @@ end # TODO: Sort also masses and particle spacings for variable smoothing lengths. function sort_particles!(system::AbstractFluidSystem, v, u, nhs, cell_list::FullGridCellList, semi) - cell_ids = zeros(Int, nparticles(system)) + cell_ids = zero(allocate(semi.parallelization_backend, Int, nparticles(system))) @threaded semi for particle in each_active_particle(system) point_coords = current_coords(u, system, particle) cell_ids[particle] = PointNeighbors.cell_index(cell_list, @@ -99,7 +99,7 @@ function sort_particles!(system::AbstractFluidSystem, v, u, nhs, nhs)) end - perm = sortperm(cell_ids) + perm = sortperm(transfer2cpu(cell_ids)) sort_system!(system, v, u, perm, system.buffer) diff --git a/src/general/buffer.jl b/src/general/buffer.jl index efdb122541..6b0d0f7f61 100644 --- a/src/general/buffer.jl +++ b/src/general/buffer.jl @@ -94,7 +94,7 @@ function sort_system!(system, v, u, perm, buffer::SystemBuffer) active_particle_sorted = active_particle[perm] # Second permutation: move inactive particles to the end (true first, false last) - perm2 = sortperm(active_particle_sorted, rev=true) + perm2 = sortperm(transfer2cpu(active_particle_sorted), rev=true) # Combined permutation combined_perm = perm[perm2] From 2c3cf91c42c99f12b43726ee3f1974e7063a702f Mon Sep 17 00:00:00 2001 From: LasNikas Date: Mon, 9 Mar 2026 15:32:39 +0100 Subject: [PATCH 07/24] sort by `cell_coords` --- src/callbacks/sorting.jl | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/src/callbacks/sorting.jl b/src/callbacks/sorting.jl index ce0925cd85..8288163373 100644 --- a/src/callbacks/sorting.jl +++ b/src/callbacks/sorting.jl @@ -91,15 +91,14 @@ end # TODO: Sort also masses and particle spacings for variable smoothing lengths. function sort_particles!(system::AbstractFluidSystem, v, u, nhs, cell_list::FullGridCellList, semi) - cell_ids = zero(allocate(semi.parallelization_backend, Int, nparticles(system))) + cell_coords = allocate(semi.parallelization_backend, SVector{ndims(system), Int}, + nparticles(system)) @threaded semi for particle in each_active_particle(system) point_coords = current_coords(u, system, particle) - cell_ids[particle] = PointNeighbors.cell_index(cell_list, - PointNeighbors.cell_coords(point_coords, - nhs)) + cell_coords[particle] = PointNeighbors.cell_coords(point_coords, nhs) end - perm = sortperm(transfer2cpu(cell_ids)) + perm = sortperm(transfer2cpu(cell_coords)) sort_system!(system, v, u, perm, system.buffer) From 78ef5a2d23de16f8138b6d735f19e40093acf551 Mon Sep 17 00:00:00 2001 From: LasNikas Date: Wed, 18 Mar 2026 15:08:56 +0100 Subject: [PATCH 08/24] implement suggestions --- src/callbacks/sorting.jl | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/src/callbacks/sorting.jl b/src/callbacks/sorting.jl index 8288163373..458a511332 100644 --- a/src/callbacks/sorting.jl +++ b/src/callbacks/sorting.jl @@ -1,3 +1,8 @@ +# These are the systems that require sorting. +# TODO: The `DEMSystem` should be added here in the future. +# Boundary particles always stay fixed relative to each other, TLSPH computes in the initial configuration. +const RequiresSortingSystem = AbstractFluidSystem + struct SortingCallback{I} interval::I end @@ -81,15 +86,18 @@ end sort_particles!(system, v, u, semi) = system -function sort_particles!(system::AbstractFluidSystem, v, u, semi) +function sort_particles!(system::RequiresSortingSystem, v, u, semi) nhs = get_neighborhood_search(system, semi) - cell_list = nhs.cell_list - sort_particles!(system, v, u, nhs, cell_list, semi) + if !(nhs isa GridNeighborhoodSearch) + throw(ArgumentError("`SortingCallback` can only be used with a `GridNeighborhoodSearch`")) + end + + sort_particles!(system, v, u, nhs, nhs.cell_list, semi) end # TODO: Sort also masses and particle spacings for variable smoothing lengths. -function sort_particles!(system::AbstractFluidSystem, v, u, nhs, +function sort_particles!(system::RequiresSortingSystem, v, u, nhs, cell_list::FullGridCellList, semi) cell_coords = allocate(semi.parallelization_backend, SVector{ndims(system), Int}, nparticles(system)) From 26a15db4fc665f588b1e3ad09e6ccc32e9783a44 Mon Sep 17 00:00:00 2001 From: LasNikas Date: Thu, 19 Mar 2026 11:22:45 +0100 Subject: [PATCH 09/24] add docs --- src/callbacks/sorting.jl | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/callbacks/sorting.jl b/src/callbacks/sorting.jl index 458a511332..43b30a44bc 100644 --- a/src/callbacks/sorting.jl +++ b/src/callbacks/sorting.jl @@ -10,6 +10,8 @@ end """ SortingCallback(; interval::Integer=-1, dt=0.0) +Reorders particles according to neighborhood-search cells for performance optimization. + # Keywords - `interval=1`: Sort particles at the end of every `interval` time steps. - `dt`: Sort particles in regular intervals of `dt` in terms of integration time From a41c2b3efeb19cfb735757bd226eab14a8f03b56 Mon Sep 17 00:00:00 2001 From: LasNikas Date: Thu, 19 Mar 2026 11:42:57 +0100 Subject: [PATCH 10/24] implement suggestions --- src/callbacks/sorting.jl | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/src/callbacks/sorting.jl b/src/callbacks/sorting.jl index 43b30a44bc..fd54de4784 100644 --- a/src/callbacks/sorting.jl +++ b/src/callbacks/sorting.jl @@ -12,6 +12,13 @@ end Reorders particles according to neighborhood-search cells for performance optimization. +When particles become very unordered throughout a long-running simulation, performance +decreases due to increased cache-misses (on CPUs) and lack of block structure (on GPUs). +On GPUs, a fully shuffled particle ordering causes a 3-4x slowdown compared to a sorted configuration. +On CPUs, there is no difference for small problems (<16k particles), but the performance penalty +grows linearly with the problem size up to 10x slowdown for very large problems (65M particles). +See [#1044](https://github.com/trixi-framework/TrixiParticles.jl/pull/1044) for more details. + # Keywords - `interval=1`: Sort particles at the end of every `interval` time steps. - `dt`: Sort particles in regular intervals of `dt` in terms of integration time @@ -116,7 +123,6 @@ function sort_particles!(system::RequiresSortingSystem, v, u, nhs, end function sort_system!(system, v, u, perm, buffer::Nothing) - # Note that the following contain also inactive particles system_coords = current_coordinates(u, system) system_velocity = current_velocity(v, system) system_density = current_density(v, system) From 0b2147a44e9f19dbb99abfc3563da28769ce6752 Mon Sep 17 00:00:00 2001 From: LasNikas Date: Thu, 19 Mar 2026 11:46:27 +0100 Subject: [PATCH 11/24] rm dt --- src/callbacks/sorting.jl | 35 ++++++----------------------------- 1 file changed, 6 insertions(+), 29 deletions(-) diff --git a/src/callbacks/sorting.jl b/src/callbacks/sorting.jl index fd54de4784..cf4891d252 100644 --- a/src/callbacks/sorting.jl +++ b/src/callbacks/sorting.jl @@ -8,7 +8,7 @@ struct SortingCallback{I} end """ - SortingCallback(; interval::Integer=-1, dt=0.0) + SortingCallback(; interval::Integer) Reorders particles according to neighborhood-search cells for performance optimization. @@ -20,37 +20,14 @@ grows linearly with the problem size up to 10x slowdown for very large problems See [#1044](https://github.com/trixi-framework/TrixiParticles.jl/pull/1044) for more details. # Keywords -- `interval=1`: Sort particles at the end of every `interval` time steps. -- `dt`: Sort particles in regular intervals of `dt` in terms of integration time - by adding additional `tstops` (note that this may change the solution). +- `interval::Integer`: Sort particles at the end of every `interval` time steps. """ -function SortingCallback(; interval::Integer=-1, dt=0.0) - if dt > 0 && interval !== -1 - throw(ArgumentError("Setting both interval and dt is not supported!")) - end - - # Sort in intervals in terms of simulation time - if dt > 0 - interval = Float64(dt) - - # Sort every time step (default) - elseif interval == -1 - interval = 1 - end - +function SortingCallback(; interval::Integer) sorting_callback! = SortingCallback(interval) - if dt > 0 - # Add a `tstop` every `dt` - return PeriodicCallback(sorting_callback!, dt, - initialize=(initial_sort!), - save_positions=(false, false)) - else - # The first one is the `condition`, the second the `affect!` - return DiscreteCallback(sorting_callback!, sorting_callback!, - initialize=(initial_sort!), - save_positions=(false, false)) - end + # The first one is the `condition`, the second the `affect!` + return DiscreteCallback(sorting_callback!, sorting_callback!, + initialize=(initial_sort!), save_positions=(false, false)) end # `initialize` From 13c7c6227702dff62f8a41c6cef8df036d05704a Mon Sep 17 00:00:00 2001 From: LasNikas Date: Thu, 19 Mar 2026 12:11:40 +0100 Subject: [PATCH 12/24] add optional initial sort --- src/callbacks/sorting.jl | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/src/callbacks/sorting.jl b/src/callbacks/sorting.jl index cf4891d252..dbd10a4b33 100644 --- a/src/callbacks/sorting.jl +++ b/src/callbacks/sorting.jl @@ -8,7 +8,7 @@ struct SortingCallback{I} end """ - SortingCallback(; interval::Integer) + SortingCallback(; interval::Integer, initial_sort=true) Reorders particles according to neighborhood-search cells for performance optimization. @@ -21,13 +21,19 @@ See [#1044](https://github.com/trixi-framework/TrixiParticles.jl/pull/1044) for # Keywords - `interval::Integer`: Sort particles at the end of every `interval` time steps. +- `initial_sort=true`: When enabled, particles are sorted at the beginning of the simulation. + When the initial configuration is a perfect grid of particles, + sorting at the beginning is not necessary and might even slightly + slow down the first time steps, since a perfect grid is even better + than sorting by NHS cell index. """ -function SortingCallback(; interval::Integer) +function SortingCallback(; interval::Integer, initial_sort=true) sorting_callback! = SortingCallback(interval) # The first one is the `condition`, the second the `affect!` return DiscreteCallback(sorting_callback!, sorting_callback!, - initialize=(initial_sort!), save_positions=(false, false)) + initialize=initial_sort ? (initial_sort!) : nothing, + save_positions=(false, false)) end # `initialize` From b45f738108537fc356f3969f32f229fa959712c4 Mon Sep 17 00:00:00 2001 From: LasNikas Date: Thu, 19 Mar 2026 12:29:13 +0100 Subject: [PATCH 13/24] fix --- src/callbacks/sorting.jl | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/callbacks/sorting.jl b/src/callbacks/sorting.jl index dbd10a4b33..e02b5489d3 100644 --- a/src/callbacks/sorting.jl +++ b/src/callbacks/sorting.jl @@ -32,7 +32,8 @@ function SortingCallback(; interval::Integer, initial_sort=true) # The first one is the `condition`, the second the `affect!` return DiscreteCallback(sorting_callback!, sorting_callback!, - initialize=initial_sort ? (initial_sort!) : nothing, + initialize=initial_sort ? (initial_sort!) : + SciMLBase.INITIALIZE_DEFAULT, save_positions=(false, false)) end From d68ec5fd9a70089d3f31e1a21113ddf5883f1661 Mon Sep 17 00:00:00 2001 From: LasNikas Date: Thu, 19 Mar 2026 12:32:53 +0100 Subject: [PATCH 14/24] add comment --- src/callbacks/sorting.jl | 1 + 1 file changed, 1 insertion(+) diff --git a/src/callbacks/sorting.jl b/src/callbacks/sorting.jl index e02b5489d3..5964ca0036 100644 --- a/src/callbacks/sorting.jl +++ b/src/callbacks/sorting.jl @@ -99,6 +99,7 @@ function sort_particles!(system::RequiresSortingSystem, v, u, nhs, cell_coords[particle] = PointNeighbors.cell_coords(point_coords, nhs) end + # TODO `sortperm` works on CUDA but not (yet) on Metal perm = sortperm(transfer2cpu(cell_coords)) sort_system!(system, v, u, perm, system.buffer) From de8304bdae04d8248e71aec0be482b9233ec1f2f Mon Sep 17 00:00:00 2001 From: LasNikas Date: Thu, 19 Mar 2026 16:44:15 +0100 Subject: [PATCH 15/24] implement suggestions --- src/callbacks/sorting.jl | 50 ++++++++++++++++++++++++++++------------ 1 file changed, 35 insertions(+), 15 deletions(-) diff --git a/src/callbacks/sorting.jl b/src/callbacks/sorting.jl index 5964ca0036..23b5a46ebc 100644 --- a/src/callbacks/sorting.jl +++ b/src/callbacks/sorting.jl @@ -5,10 +5,11 @@ const RequiresSortingSystem = AbstractFluidSystem struct SortingCallback{I} interval::I + last_t::Float64 end """ - SortingCallback(; interval::Integer, initial_sort=true) + SortingCallback(; interval=-1, dt=0.0, initial_sort=true) Reorders particles according to neighborhood-search cells for performance optimization. @@ -20,21 +21,35 @@ grows linearly with the problem size up to 10x slowdown for very large problems See [#1044](https://github.com/trixi-framework/TrixiParticles.jl/pull/1044) for more details. # Keywords -- `interval::Integer`: Sort particles at the end of every `interval` time steps. +- `interval`: Sort particles at the end of every `interval` time steps. +- `dt`: Sort particles in regular intervals of `dt` in terms of integration time. + This callback does not add extra time steps / `tstops`; instead, reinitialization is + triggered at the first solver step after each `dt` interval has elapsed. - `initial_sort=true`: When enabled, particles are sorted at the beginning of the simulation. When the initial configuration is a perfect grid of particles, sorting at the beginning is not necessary and might even slightly slow down the first time steps, since a perfect grid is even better than sorting by NHS cell index. """ -function SortingCallback(; interval::Integer, initial_sort=true) - sorting_callback! = SortingCallback(interval) +function SortingCallback(; interval::Integer=-1, dt=0.0) + if dt > 0 && interval !== -1 + throw(ArgumentError("Setting both interval and dt is not supported!")) + end + + # Sort in intervals in terms of simulation time + if dt > 0 + interval = Float64(dt) + + # Sort every time step (default) + elseif interval == -1 + interval = 1 + end + + sorting_callback! = SortingCallback(interval, last_t) # The first one is the `condition`, the second the `affect!` return DiscreteCallback(sorting_callback!, sorting_callback!, - initialize=initial_sort ? (initial_sort!) : - SciMLBase.INITIALIZE_DEFAULT, - save_positions=(false, false)) + initialize=(initial_sort!), save_positions=(false, false)) end # `initialize` @@ -50,13 +65,20 @@ function initial_sort!(cb::SortingCallback, u, t, integrator) return cb(integrator) end -# `condition` -function (sorting_callback!::SortingCallback)(u, t, integrator) +# `condition` with `interval` +function (sorting_callback!::SortingCallback{Int})(u, t, integrator) (; interval) = sorting_callback! return condition_integrator_interval(integrator, interval) end +# condition with `dt` +function (sorting_callback!::SortingCallback)(u, t, integrator) + (; interval, last_t) = sorting_callback! + + return (t - last_t) > interval +end + # `affect!` function (sorting_callback!::SortingCallback)(integrator) semi = integrator.p @@ -121,20 +143,19 @@ function sort_system!(system, v, u, perm, buffer::Nothing) return system end -function Base.show(io::IO, cb::DiscreteCallback{<:Any, <:SortingCallback}) +function Base.show(io::IO, cb::DiscreteCallback{<:Any, <:SortingCallback{Int}}) @nospecialize cb # reduce precompilation time print(io, "SortingCallback(interval=", cb.affect!.interval, ")") end function Base.show(io::IO, - cb::DiscreteCallback{<:Any, - <:PeriodicCallbackAffect{<:SortingCallback}}) + cb::DiscreteCallback{<:Any, <:SortingCallback}) @nospecialize cb # reduce precompilation time print(io, "SortingCallback(dt=", cb.affect!.affect!.interval, ")") end function Base.show(io::IO, ::MIME"text/plain", - cb::DiscreteCallback{<:Any, <:SortingCallback}) + cb::DiscreteCallback{<:Any, <:SortingCallback{Int}}) @nospecialize cb # reduce precompilation time if get(io, :compact, false) @@ -149,8 +170,7 @@ function Base.show(io::IO, ::MIME"text/plain", end function Base.show(io::IO, ::MIME"text/plain", - cb::DiscreteCallback{<:Any, - <:PeriodicCallbackAffect{<:SortingCallback}}) + cb::DiscreteCallback{<:Any, <:SortingCallback{Int}}) @nospecialize cb # reduce precompilation time if get(io, :compact, false) From b71c51d1c3b4627431b657265f911cf3e3a07971 Mon Sep 17 00:00:00 2001 From: Erik Faulhaber <44124897+efaulhaber@users.noreply.github.com> Date: Thu, 19 Mar 2026 17:35:56 +0100 Subject: [PATCH 16/24] Implement comparison with DualSPHysics --- Project.toml | 2 + examples/fluid/dam_break_2d_dualsphysics.jl | 70 +++++ examples/fluid/dam_break_3d.jl | 5 +- examples/fluid/dam_break_3d_dualsphysics.jl | 65 +++++ src/TrixiParticles.jl | 1 + src/schemes/fluid/fluid.jl | 43 ++- src/schemes/fluid/pressure_acceleration.jl | 3 +- src/schemes/fluid/viscosity.jl | 66 ++--- .../fluid/weakly_compressible_sph/rhs.jl | 267 +++++++++++++++++- .../fluid/weakly_compressible_sph/system.jl | 3 +- 10 files changed, 473 insertions(+), 52 deletions(-) create mode 100644 examples/fluid/dam_break_2d_dualsphysics.jl create mode 100644 examples/fluid/dam_break_3d_dualsphysics.jl diff --git a/Project.toml b/Project.toml index dff0de0627..84b5497b2a 100644 --- a/Project.toml +++ b/Project.toml @@ -26,6 +26,7 @@ Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" ReadVTK = "dc215faf-f008-4882-a9f7-a79a826fadc3" RecipesBase = "3cdcf5f2-1ef4-517c-9805-6587b60abb01" Reexport = "189a3867-3050-52da-a836-e630ba90ab69" +SIMD = "fdea26ae-647d-5447-a871-4b548cad5224" SciMLBase = "0bca4576-84f4-4d90-8ffe-ffa030f20462" StaticArrays = "90137ffa-7385-5640-81b9-e52037218182" Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2" @@ -62,6 +63,7 @@ Polyester = "0.7.10" ReadVTK = "0.2" RecipesBase = "1" Reexport = "1" +SIMD = "3.7.2" SciMLBase = "2" StaticArrays = "1" Statistics = "1" diff --git a/examples/fluid/dam_break_2d_dualsphysics.jl b/examples/fluid/dam_break_2d_dualsphysics.jl new file mode 100644 index 0000000000..8e55ccc4a7 --- /dev/null +++ b/examples/fluid/dam_break_2d_dualsphysics.jl @@ -0,0 +1,70 @@ +# Modify the 01_DamBreak example of DualSPHysics like this: +# +# +# +# +# When comparing with high resolution, change the resolution here: +# +# With this resolution, use: +# + +using TrixiParticles, TrixiParticles.PointNeighbors + +fluid_particle_spacing = 0.002 + +# Load setup from dam break example +trixi_include(@__MODULE__, + joinpath(examples_dir(), "fluid", "dam_break_2d.jl"), + fluid_particle_spacing=fluid_particle_spacing, + smoothing_length=1.414216 * fluid_particle_spacing, + tank_size=(4.0, 3.0), W=1.0, H=2.0, + spacing_ratio=1, boundary_layers=1, + sol=nothing, ode=nothing) + +tank = RectangularTank(fluid_particle_spacing, initial_fluid_size, tank_size, fluid_density, + n_layers=boundary_layers, spacing_ratio=spacing_ratio, + coordinates_eltype=Float64) + +tank.fluid.coordinates .+= 0.005 +tank.boundary.coordinates .+= 0.005 + +# Define a GPU-compatible neighborhood search +min_corner = minimum(tank.boundary.coordinates, dims=2) +max_corner = maximum(tank.boundary.coordinates, dims=2) +cell_list = FullGridCellList(; min_corner, max_corner, backend=PointNeighbors.CompactVectorOfVectors{Int32}) +neighborhood_search = GridNeighborhoodSearch{2}(; cell_list, + update_strategy=ParallelUpdate()) + +search_radius = TrixiParticles.get_neighborhood_search(fluid_system, fluid_system, semi).search_radius +nhs = PointNeighbors.copy_neighborhood_search(neighborhood_search, search_radius, 0) +cell_coords(x) = PointNeighbors.cell_coords(x, nhs) +cell_index(x) = PointNeighbors. cell_index(nhs.cell_list, cell_coords(x)) +coords = reinterpret(reshape, SVector{ndims(fluid_system), eltype(tank.fluid.coordinates)}, tank.fluid.coordinates) +sort!(coords, by=cell_index) + +# function cells(coordinates, system, semi) +# nhs = TrixiParticles.get_neighborhood_search(fluid_system, fluid_system, semi) +# coords = reinterpret(reshape, SVector{ndims(system), eltype(coordinates)}, coordinates) +# return TrixiParticles.PointNeighbors.cell_coords.(coords, Ref(nhs)) +# end + +# Run the dam break simulation with this neighborhood search +trixi_include(@__MODULE__, + joinpath(examples_dir(), "fluid", "dam_break_2d.jl"), + tank=tank, + smoothing_length=1.414216 * fluid_particle_spacing, + time_integration_scheme=SymplecticPositionVerlet(), + boundary_density_calculator=ContinuityDensity(), + fluid_particle_spacing=fluid_particle_spacing, + tank_size=(4.0, 3.0), W=1.0, H=2.0, + spacing_ratio=1, boundary_layers=1, + tspan=(0.0, 1.0), cfl=0.2, + neighborhood_search=neighborhood_search, + viscosity_wall=viscosity_fluid, + # This is the same saving frequency as in DualSPHysics for easier comparison + saving_callback=SolutionSavingCallback(dt=0.01), + extra_callback=SortingCallback(interval=1), + density_diffusion=nothing, # TODO only for benchmark + # For benchmarks, use spacing 0.002, fix time steps, and disable VTK saving: + dt=1e-5, stepsize_callback=nothing, #saving_callback=nothing, + parallelization_backend=PolyesterBackend()) diff --git a/examples/fluid/dam_break_3d.jl b/examples/fluid/dam_break_3d.jl index a073c001ec..936a88c704 100644 --- a/examples/fluid/dam_break_3d.jl +++ b/examples/fluid/dam_break_3d.jl @@ -66,6 +66,7 @@ boundary_system = WallBoundarySystem(tank.boundary, boundary_model) # ========================================================================================== # ==== Simulation semi = Semidiscretization(fluid_system, boundary_system, + neighborhood_search=GridNeighborhoodSearch{3}(), parallelization_backend=PolyesterBackend()) ode = semidiscretize(semi, tspan) @@ -80,8 +81,10 @@ callbacks = CallbackSet(info_callback, saving_callback) # Sometimes, the method fails to do so because forces become extremely large when # fluid particles are very close to boundary particles, and the time integration method # interprets this as an instability. -sol = solve(ode, RDPK3SpFSAL35(), +time_integration_scheme = RDPK3SpFSAL35() +sol = solve(ode, time_integration_scheme, abstol=1e-5, # Default abstol is 1e-6 (may need to be tuned to prevent boundary penetration) reltol=1e-4, # Default reltol is 1e-3 (may need to be tuned to prevent boundary penetration) dtmax=1e-2, # Limit stepsize to prevent crashing + dt=1.0, save_everystep=false, callback=callbacks); diff --git a/examples/fluid/dam_break_3d_dualsphysics.jl b/examples/fluid/dam_break_3d_dualsphysics.jl new file mode 100644 index 0000000000..9435bd2b63 --- /dev/null +++ b/examples/fluid/dam_break_3d_dualsphysics.jl @@ -0,0 +1,65 @@ +# Modify the 01_DamBreak example of DualSPHysics like this: +# +# +# +# +# When comparing with high resolution, change the resolution here: +# +# With this resolution, use: +# + +using TrixiParticles, TrixiParticles.PointNeighbors, OrdinaryDiffEq + +fluid_particle_spacing = 0.0085 + +smoothing_length = 1.414216 * fluid_particle_spacing +tank_size = (1.6, 0.665, 0.4) +initial_fluid_size = (0.4, 0.665, 0.3) +acceleration = (0.0, 0.0, -9.81) +spacing_ratio = 1 +boundary_layers = 1 +fluid_density = 1000.0 + +tank = RectangularTank(fluid_particle_spacing, initial_fluid_size, tank_size, fluid_density, + n_layers=boundary_layers, spacing_ratio=spacing_ratio, + coordinates_eltype=Float64) + +tank.fluid.coordinates .+= 0.005 +tank.boundary.coordinates .+= 0.005 + +# Define a GPU-compatible neighborhood search +min_corner = minimum(tank.boundary.coordinates, dims=2) +max_corner = maximum(tank.boundary.coordinates, dims=2) +cell_list = FullGridCellList(; min_corner, max_corner)#, backend=PointNeighbors.CompactVectorOfVectors{Int32}) +neighborhood_search = GridNeighborhoodSearch{3}(; cell_list, + update_strategy=ParallelUpdate()) + +search_radius = TrixiParticles.compact_support(WendlandC2Kernel{3}(), smoothing_length) +nhs = PointNeighbors.copy_neighborhood_search(neighborhood_search, search_radius, 0) +cell_coords(x) = PointNeighbors.cell_coords(x, nhs) +cell_index(x) = PointNeighbors. cell_index(nhs.cell_list, cell_coords(x)) +coords = reinterpret(reshape, SVector{ndims(nhs), eltype(tank.fluid.coordinates)}, tank.fluid.coordinates) +sort!(coords, by=cell_index) + +# Run the dam break simulation with this neighborhood search +trixi_include(@__MODULE__, + joinpath(examples_dir(), "fluid", "dam_break_3d.jl"), + tank=tank, + smoothing_length=1.7320508 * fluid_particle_spacing, + time_integration_scheme=SymplecticPositionVerlet(), + boundary_density_calculator=ContinuityDensity(), + fluid_particle_spacing=fluid_particle_spacing, + tank_size=tank_size, initial_fluid_size=initial_fluid_size, + acceleration=acceleration, + alpha=0.1, + spacing_ratio=spacing_ratio, boundary_layers=boundary_layers, + tspan=(0.0, 1.0), #cfl=0.2, + neighborhood_search=neighborhood_search, + # viscosity_wall=viscosity_fluid, TODO + # This is the same saving frequency as in DualSPHysics for easier comparison + # saving_callback=SolutionSavingCallback(dt=0.01), + extra_callback=SortingCallback(interval=1), + density_diffusion=nothing, # TODO only for benchmark + # For benchmarks, use spacing 0.002, fix time steps, and disable VTK saving: + dt=8e-5, #stepsize_callback=nothing, saving_callback=nothing, + parallelization_backend=PolyesterBackend()) diff --git a/src/TrixiParticles.jl b/src/TrixiParticles.jl index faa1fd2956..c6b07787a4 100644 --- a/src/TrixiParticles.jl +++ b/src/TrixiParticles.jl @@ -26,6 +26,7 @@ using Random: seed! using SciMLBase: SciMLBase, CallbackSet, DiscreteCallback, DynamicalODEProblem, u_modified!, get_tmp_cache, set_proposed_dt!, ODESolution, ODEProblem, terminate!, add_tstop! +using SIMD: vloada, Vec @reexport using StaticArrays: SVector using StaticArrays: @SMatrix, SMatrix, setindex using Statistics: Statistics diff --git a/src/schemes/fluid/fluid.jl b/src/schemes/fluid/fluid.jl index 8725190506..20fd08a4dc 100644 --- a/src/schemes/fluid/fluid.jl +++ b/src/schemes/fluid/fluid.jl @@ -137,7 +137,7 @@ end end # This formulation was chosen to be consistent with the used pressure_acceleration formulations -@propagate_inbounds function continuity_equation!(dv, density_calculator::ContinuityDensity, +@inline function continuity_equation!(dv, density_calculator::ContinuityDensity, particle_system::AbstractFluidSystem, neighbor_system, v_particle_system, v_neighbor_system, @@ -146,21 +146,44 @@ end vdiff = current_velocity(v_particle_system, particle_system, particle) - current_velocity(v_neighbor_system, neighbor_system, neighbor) - vdiff += continuity_equation_shifting_term(shifting_technique(particle_system), - particle_system, neighbor_system, - particle, neighbor, rho_a, rho_b) + # vdiff += continuity_equation_shifting_term(shifting_technique(particle_system), + # particle_system, neighbor_system, + # particle, neighbor, rho_a, rho_b) dv[end, particle] += rho_a / rho_b * m_b * dot(vdiff, grad_kernel) # Artificial density diffusion should only be applied to systems representing a fluid # with the same physical properties i.e. density and viscosity. # TODO: shouldn't be applied to particles on the interface (depends on PR #539) - if particle_system === neighbor_system - density_diffusion!(dv, density_diffusion(particle_system), - v_particle_system, particle, neighbor, - pos_diff, distance, m_b, rho_a, rho_b, particle_system, - grad_kernel) - end + # if particle_system === neighbor_system + # density_diffusion!(dv, density_diffusion(particle_system), + # v_particle_system, particle, neighbor, + # pos_diff, distance, m_b, rho_a, rho_b, particle_system, + # grad_kernel) + # end +end + +@propagate_inbounds function continuity_equation!(drho_particle, density_calculator::ContinuityDensity, + particle_system::AbstractFluidSystem, + neighbor_system, + particle, neighbor, pos_diff, distance, + vdiff, m_b, rho_a, rho_b, grad_kernel) + + # vdiff += continuity_equation_shifting_term(shifting_technique(particle_system), + # particle_system, neighbor_system, + # particle, neighbor, rho_a, rho_b) + + drho_particle[] += Base.FastMath.div_fast(rho_a, rho_b) * m_b * dot(vdiff, grad_kernel) + + # Artificial density diffusion should only be applied to systems representing a fluid + # with the same physical properties i.e. density and viscosity. + # TODO: shouldn't be applied to particles on the interface (depends on PR #539) + # if particle_system === neighbor_system + # density_diffusion!(dv, density_diffusion(particle_system), + # v_particle_system, particle, neighbor, + # pos_diff, distance, m_b, rho_a, rho_b, particle_system, + # grad_kernel) + # end end function calculate_dt(v_ode, u_ode, cfl_number, system::AbstractFluidSystem, semi) diff --git a/src/schemes/fluid/pressure_acceleration.jl b/src/schemes/fluid/pressure_acceleration.jl index 7ffa5850b3..6dbb6e2c64 100644 --- a/src/schemes/fluid/pressure_acceleration.jl +++ b/src/schemes/fluid/pressure_acceleration.jl @@ -26,7 +26,8 @@ end # asymmetric version. @inline function pressure_acceleration_continuity_density(m_a, m_b, rho_a, rho_b, p_a, p_b, W_a) - return -m_b * (p_a + p_b) / (rho_a * rho_b) * W_a + # return -m_b * (p_a + p_b) / (rho_a * rho_b) * W_a + return -m_b * Base.FastMath.div_fast(p_a + p_b, rho_a * rho_b) * W_a end # Same as above, but not assuming symmetry of the kernel gradient. To be used with diff --git a/src/schemes/fluid/viscosity.jl b/src/schemes/fluid/viscosity.jl index 2a6c51ba71..4b6eca4a42 100644 --- a/src/schemes/fluid/viscosity.jl +++ b/src/schemes/fluid/viscosity.jl @@ -1,24 +1,24 @@ # Unpack the neighboring systems viscosity to dispatch on the viscosity type. # This function is only necessary to allow `nothing` as viscosity. # Otherwise, we could just apply the viscosity as a function directly. -@propagate_inbounds function dv_viscosity(particle_system, neighbor_system, - v_particle_system, v_neighbor_system, +@propagate_inbounds function dv_viscosity(dv_particle, particle_system, neighbor_system, + vdiff, particle, neighbor, pos_diff, distance, sound_speed, m_a, m_b, rho_a, rho_b, grad_kernel) viscosity = viscosity_model(particle_system, neighbor_system) - return dv_viscosity(viscosity, particle_system, neighbor_system, - v_particle_system, v_neighbor_system, + return dv_viscosity(dv_particle, viscosity, particle_system, neighbor_system, + vdiff, particle, neighbor, pos_diff, distance, sound_speed, m_a, m_b, rho_a, rho_b, grad_kernel) end -@propagate_inbounds function dv_viscosity(viscosity, particle_system, neighbor_system, - v_particle_system, v_neighbor_system, +@propagate_inbounds function dv_viscosity(dv_particle, viscosity, particle_system, neighbor_system, + vdiff, particle, neighbor, pos_diff, distance, sound_speed, m_a, m_b, rho_a, rho_b, grad_kernel) - return viscosity(particle_system, neighbor_system, - v_particle_system, v_neighbor_system, + return viscosity(dv_particle, particle_system, neighbor_system, + vdiff, particle, neighbor, pos_diff, distance, sound_speed, m_a, m_b, rho_a, rho_b, grad_kernel) end @@ -82,41 +82,42 @@ function kinematic_viscosity(system, viscosity::ViscosityMorris, smoothing_lengt end @propagate_inbounds function (viscosity::Union{ArtificialViscosityMonaghan, - ViscosityMorris})(particle_system, + ViscosityMorris})(dv_particle, particle_system, neighbor_system, - v_particle_system, - v_neighbor_system, + v_diff, particle, neighbor, pos_diff, distance, sound_speed, m_a, m_b, rho_a, rho_b, grad_kernel) - rho_mean = (rho_a + rho_b) / 2 + rho_mean = 0#(rho_a + rho_b) / 2 - v_a = viscous_velocity(v_particle_system, particle_system, particle) - v_b = viscous_velocity(v_neighbor_system, neighbor_system, neighbor) - v_diff = v_a - v_b + # v_a = viscous_velocity(v_particle_system, particle_system, particle) + # v_b = viscous_velocity(v_neighbor_system, neighbor_system, neighbor) + # v_diff = v_a - v_b - smoothing_length_particle = smoothing_length(particle_system, particle) - smoothing_length_neighbor = smoothing_length(particle_system, neighbor) - smoothing_length_average = (smoothing_length_particle + smoothing_length_neighbor) / 2 + # smoothing_length_particle = smoothing_length(particle_system, particle) + # smoothing_length_neighbor = smoothing_length(particle_system, neighbor) + smoothing_length_average = smoothing_length(particle_system, particle)#(smoothing_length_particle + smoothing_length_neighbor) / 2 - nu_a = kinematic_viscosity(particle_system, - viscosity_model(neighbor_system, particle_system), - smoothing_length_particle, sound_speed) - nu_b = kinematic_viscosity(neighbor_system, - viscosity_model(particle_system, neighbor_system), - smoothing_length_neighbor, sound_speed) + # nu_a = kinematic_viscosity(particle_system, + # viscosity_model(neighbor_system, particle_system), + # smoothing_length_particle, sound_speed) + # nu_b = kinematic_viscosity(neighbor_system, + # viscosity_model(particle_system, neighbor_system), + # smoothing_length_neighbor, sound_speed) + nu_a = 0 + nu_b = 0 - pi_ab = viscosity(sound_speed, v_diff, pos_diff, distance, rho_mean, rho_a, rho_b, - smoothing_length_average, grad_kernel, nu_a, nu_b) + pi_ab = viscosity(dv_particle, sound_speed, v_diff, pos_diff, distance, rho_mean, rho_a, rho_b, + smoothing_length_average, grad_kernel, nu_a, nu_b, m_b) - return m_b * pi_ab + # return m_b * pi_ab end -@inline function (viscosity::ArtificialViscosityMonaghan)(c, v_diff, pos_diff, distance, +@inline function (viscosity::ArtificialViscosityMonaghan)(dv_particle, c, v_diff, pos_diff, distance, rho_mean, rho_a, rho_b, h, - grad_kernel, nu_a, nu_b) + grad_kernel, nu_a, nu_b, m_b) (; alpha, beta, epsilon) = viscosity # v_ab ⋅ r_ab @@ -127,11 +128,12 @@ end # approaching particles and turn it off for receding particles. In this way, the # viscosity is used for shocks and not rarefactions." if vr < 0 - mu = h * vr / (distance^2 + epsilon * h^2) - return (alpha * c * mu + beta * mu^2) / rho_mean * grad_kernel + mu = Base.FastMath.div_fast(h * vr, distance^2 + epsilon * h^2) + rho_mean = (rho_a + rho_b) / 2 + dv_particle[] += m_b * Base.FastMath.div_fast(alpha * c * mu + beta * mu^2, rho_mean) * grad_kernel end - return zero(v_diff) + return nothing#zero(v_diff) end @inline function (viscosity::ViscosityMorris)(c, v_diff, pos_diff, distance, rho_mean, diff --git a/src/schemes/fluid/weakly_compressible_sph/rhs.jl b/src/schemes/fluid/weakly_compressible_sph/rhs.jl index acb0f06894..198996cc48 100644 --- a/src/schemes/fluid/weakly_compressible_sph/rhs.jl +++ b/src/schemes/fluid/weakly_compressible_sph/rhs.jl @@ -2,7 +2,7 @@ # in `neighbor_system` and updates `dv` accordingly. # It takes into account pressure forces, viscosity, and for `ContinuityDensity` updates # the density using the continuity equation. -function interact!(dv, v_particle_system, u_particle_system, +function interact_old!(dv, v_particle_system, u_particle_system, v_neighbor_system, u_neighbor_system, particle_system::WeaklyCompressibleSPHSystem, neighbor_system, semi) (; density_calculator, correction) = particle_system @@ -63,12 +63,13 @@ function interact!(dv, v_particle_system, u_particle_system, distance, grad_kernel, correction) # Propagate `@inbounds` to the viscosity function, which accesses particle data - dv_viscosity_ = viscosity_correction * - @inbounds dv_viscosity(particle_system, neighbor_system, - v_particle_system, v_neighbor_system, - particle, neighbor, pos_diff, distance, - sound_speed, m_a, m_b, rho_a, rho_b, - grad_kernel) + dv_viscosity_ = zero(pos_diff) + # viscosity_correction * + # @inbounds dv_viscosity(particle_system, neighbor_system, + # v_particle_system, v_neighbor_system, + # particle, neighbor, pos_diff, distance, + # sound_speed, m_a, m_b, rho_a, rho_b, + # grad_kernel) # Extra terms in the momentum equation when using a shifting technique dv_tvf = @inbounds dv_shifting(shifting_technique(particle_system), @@ -112,6 +113,258 @@ function interact!(dv, v_particle_system, u_particle_system, return dv end +function interact!(dv, v_particle_system, u_particle_system, + v_neighbor_system, u_neighbor_system, + particle_system::WeaklyCompressibleSPHSystem, neighbor_system, semi) + interact_old!(dv, v_particle_system, u_particle_system, + v_neighbor_system, u_neighbor_system, + particle_system, neighbor_system, semi) +end + +function interact!(dv, v_particle_system, u_particle_system, + v_neighbor_system, u_neighbor_system, + particle_system::WeaklyCompressibleSPHSystem, + neighbor_system::WeaklyCompressibleSPHSystem, semi) + dv_ = view(dv, 1:ndims(particle_system), :) + drho = view(dv, ndims(particle_system) + 1, :) + interact!(dv_, drho, v_particle_system, u_particle_system, + v_neighbor_system, u_neighbor_system, + particle_system, neighbor_system, semi) +end + +function interact!(dv, drho, v_particle_system, u_particle_system, + v_neighbor_system, u_neighbor_system, + particle_system::WeaklyCompressibleSPHSystem{NDIMS}, + neighbor_system::WeaklyCompressibleSPHSystem, semi) where NDIMS + system_coords = current_coordinates(u_particle_system, particle_system) + neighbor_system_coords = current_coordinates(u_neighbor_system, neighbor_system) + # system_coords = vcat(system_coords, zero(drho)') + # neighbor_system_coords = vcat(neighbor_system_coords, zero(drho)') + + neighborhood_search = get_neighborhood_search(particle_system, neighbor_system, semi) + cell_list = neighborhood_search.cell_list + search_radius2 = PointNeighbors.search_radius(neighborhood_search)^2 + + backend = semi.parallelization_backend + ndrange = n_integrated_particles(particle_system) + mykernel(backend)(dv, drho, system_coords, neighbor_system_coords, neighborhood_search, + cell_list, search_radius2, v_particle_system, v_neighbor_system, + particle_system, neighbor_system; ndrange=ndrange) + + KernelAbstractions.synchronize(backend) + + return dv +end + +@kernel function mykernel(dv, drho, + system_coords, neighbor_system_coords, + nhs, cell_list, search_radius2, + v_particle_system, v_neighbor_system, + particle_system::WeaklyCompressibleSPHSystem{NDIMS}, + neighbor_system::WeaklyCompressibleSPHSystem) where NDIMS + particle = @index(Global) + + sound_speed = particle_system.state_equation.sound_speed + # VT_coords = Vec{4, eltype(system_coords)} + # point_coords_ = vloada(VT_coords, pointer(system_coords, 4*(particle-1)+1)) + # a, b, c, d = Tuple(point_coords_) + # point_coords = SVector(a, b, c) + point_coords = @inbounds extract_svector(system_coords, Val(NDIMS), particle) + p_a = @inbounds particle_system.pressure[particle] + + VT = Vec{4, eltype(v_particle_system)} + vrho_a = vloada(VT, pointer(v_particle_system, 4*(particle-1)+1)) + a, b, c, d = Tuple(vrho_a) + v_a = SVector(a, b, c) + rho_a = d + # v_a = @inbounds extract_svector(v_particle_system, Val(NDIMS), particle) + # rho_a = @inbounds v_particle_system[end, particle] + + dv_particle = zero(v_a) + drho_particle = zero(rho_a) + + cell = PointNeighbors.cell_coords(point_coords, nhs) + + # cell_blocks = ((cell[1] - 1, cell[2] - 1), (cell[1] - 1, cell[2]), (cell[1] - 1, cell[2] + 1)) + # cell_blocks = CartesianIndices(ntuple(i -> (cell[i + 1] - 1):(cell[i + 1] + 1), Val(NDIMS - 1))) + # for cell_block in cell_blocks + # cell_block_start = (cell[1] - 1, Tuple(cell_block)...) + # cell_index = @inbounds PointNeighbors.cell_index(cell_list, cell_block_start) + # start = @inbounds cell_list.cells.first_bin_index[cell_index] + # stop = @inbounds cell_list.cells.first_bin_index[cell_index + 3] - 1 + + # for neighbor in start:stop + + for neighbor_cell_ in PointNeighbors.neighboring_cells(cell, nhs) + neighbor_cell = Tuple(neighbor_cell_) + neighbors = @inbounds PointNeighbors.points_in_cell(neighbor_cell, nhs) + + for neighbor_ in eachindex(neighbors) + neighbor = @inbounds neighbors[neighbor_] + + # neighbor_coords_ = vloada(VT_coords, pointer(neighbor_system_coords, 4*(neighbor-1)+1)) + # a, b, c, d = Tuple(neighbor_coords_) + # neighbor_coords = SVector(a, b, c) + neighbor_coords = @inbounds extract_svector(neighbor_system_coords, + Val(NDIMS), neighbor) + + # pos_diff = convert.(eltype(particle_system), point_coords - neighbor_coords) + pos_diff = point_coords - neighbor_coords + distance2 = dot(pos_diff, pos_diff) + + if eps(search_radius2) <= distance2 <= search_radius2 + distance = sqrt(distance2) + + m_b = @inbounds neighbor_system.mass[neighbor] + p_b = @inbounds neighbor_system.pressure[neighbor] + + vrho_b = vloada(VT, pointer(v_neighbor_system, 4*(neighbor-1)+1)) + a, b, c, d = Tuple(vrho_b) + v_b = SVector(a, b, c) + rho_b = d + + # v_b = @inbounds extract_svector(v_neighbor_system, Val(NDIMS), neighbor) + # rho_b = @inbounds v_neighbor_system[end, neighbor] + + grad_kernel = kernel_grad_ds(particle_system, pos_diff, distance) + + # dv_particle += -m_b * (p_a + p_b) / (rho_a * rho_b) * grad_kernel + dv_particle += -m_b * Base.FastMath.div_fast(p_a + p_b, rho_a * rho_b) * grad_kernel + + vdiff = v_a - v_b + # drho_particle += rho_a / rho_b * m_b * dot(vdiff, grad_kernel) + drho_particle += Base.FastMath.div_fast(rho_a, rho_b) * m_b * dot(vdiff, grad_kernel) + + h = particle_system.cache.smoothing_length + alpha = particle_system.viscosity.alpha + epsilon = particle_system.viscosity.epsilon + + vr = dot(vdiff, pos_diff) + if vr < 0 + # mu = h * vr / (distance2 + epsilon) + mu = Base.FastMath.div_fast(h * vr, distance2 + epsilon) + rho_mean = (rho_a + rho_b) / 2 + # @fastmath pi_ab = (alpha * sound_speed * mu) / rho_mean * grad_kernel + pi_ab = Base.FastMath.div_fast(alpha * sound_speed * mu, rho_mean) * grad_kernel + dv_particle += m_b * pi_ab + end + end + end + end + + for i in eachindex(dv_particle) + @inbounds dv[i, particle] += dv_particle[i] + # Debug example + # debug_array[i, particle] += dv_pressure[i] + end + @inbounds drho[particle] += drho_particle +end + +@inline function kernel_grad_ds(system, pos_diff, r) + h = system.cache.smoothing_length + normalization_factor = system.cache.normalization_factor + + # q = r / h + q = Base.FastMath.div_fast(r, h) + wqq1 = (1 - q / 2) + return normalization_factor * wqq1 * wqq1 * wqq1 * pos_diff +end + +function interact_reassembled!(dv, v_particle_system, u_particle_system, + v_neighbor_system, u_neighbor_system, + particle_system::WeaklyCompressibleSPHSystem{NDIMS}, + neighbor_system::WeaklyCompressibleSPHSystem, semi) where NDIMS + (; density_calculator, correction) = particle_system + + system_coords = current_coordinates(u_particle_system, particle_system) + neighbor_system_coords = current_coordinates(u_neighbor_system, neighbor_system) + + neighborhood_search = get_neighborhood_search(particle_system, neighbor_system, semi) + sound_speed = particle_system.state_equation.sound_speed + + @threaded semi for particle in each_integrated_particle(particle_system) + p_a = @inbounds current_pressure(v_particle_system, particle_system, particle) + m_a = @inbounds hydrodynamic_mass(particle_system, particle) + + # v_a = @inbounds extract_svector(v_particle_system, Val(NDIMS), particle) + # rho_a = @inbounds v_particle_system[end, particle] + v_a, rho_a = @inbounds velocity_and_density(v_particle_system, particle_system, particle) + + dv_particle = Ref(zero(v_a)) + drho_particle = Ref(zero(rho_a)) + + PointNeighbors.foreach_neighbor(system_coords, neighbor_system_coords, + neighborhood_search, particle) do _, neighbor, pos_diff, distance + m_b = @inbounds hydrodynamic_mass(neighbor_system, neighbor) + p_b = @inbounds current_pressure(v_neighbor_system, neighbor_system, neighbor) + + # v_b = @inbounds extract_svector(v_neighbor_system, Val(NDIMS), neighbor) + # rho_b = @inbounds v_neighbor_system[end, neighbor] + v_b, rho_b = @inbounds velocity_and_density(v_neighbor_system, neighbor_system, neighbor) + + grad_kernel = kernel_grad_ds(particle_system, pos_diff, distance) + # grad_kernel = smoothing_kernel_grad(particle_system, pos_diff, distance, particle) + + # dv_particle += -m_b * (p_a + p_b) / (rho_a * rho_b) * grad_kernel + # dv_particle[] += -m_b * Base.FastMath.div_fast(p_a + p_b, rho_a * rho_b) * grad_kernel + + dv_particle[] += pressure_acceleration(particle_system, neighbor_system, + particle, neighbor, + m_a, m_b, p_a, p_b, rho_a, rho_b, pos_diff, + distance, grad_kernel, correction) + + vdiff = v_a - v_b + # drho_particle += rho_a / rho_b * m_b * dot(vdiff, grad_kernel) + # drho_particle[] += Base.FastMath.div_fast(rho_a, rho_b) * m_b * dot(vdiff, grad_kernel) + + @inbounds dv_viscosity(dv_particle, particle_system, neighbor_system, + vdiff, + particle, neighbor, pos_diff, distance, + sound_speed, m_a, m_b, rho_a, rho_b, + grad_kernel) + + @inbounds continuity_equation!(drho_particle, density_calculator, particle_system, + neighbor_system, particle, neighbor, + pos_diff, distance, vdiff, m_b, rho_a, rho_b, grad_kernel) + + # h = particle_system.cache.smoothing_length + # alpha = particle_system.viscosity.alpha + # epsilon = particle_system.viscosity.epsilon + + # vr = dot(vdiff, pos_diff) + # if vr < 0 + # # mu = h * vr / (distance2 + epsilon) + # mu = Base.FastMath.div_fast(h * vr, distance^2 + epsilon) + # rho_mean = (rho_a + rho_b) / 2 + # # @fastmath pi_ab = (alpha * sound_speed * mu) / rho_mean * grad_kernel + # pi_ab = Base.FastMath.div_fast(alpha * sound_speed * mu, rho_mean) * grad_kernel + # dv_particle[] += m_b * pi_ab + # end + end + + for i in eachindex(dv_particle[]) + @inbounds dv[i, particle] += dv_particle[][i] + end + @inbounds dv[end, particle] += drho_particle[] + end +end + +@propagate_inbounds function velocity_and_density(v, system, particle) + v_particle = current_velocity(v, system, particle) + rho_particle = current_density(v, system, particle) + + return v_particle, rho_particle +end + +@inline function velocity_and_density(v, ::WeaklyCompressibleSPHSystem{3}, particle) + vrho_a = vloada(Vec{4, eltype(v)}, pointer(v, 4 * (particle - 1) + 1)) + a, b, c, d = Tuple(vrho_a) + v_particle = SVector(a, b, c) + rho_particle = d + + return v_particle, rho_particle +end + @propagate_inbounds function particle_neighbor_pressure(v_particle_system, v_neighbor_system, particle_system, neighbor_system, diff --git a/src/schemes/fluid/weakly_compressible_sph/system.jl b/src/schemes/fluid/weakly_compressible_sph/system.jl index f84849f096..1f0b32d1f0 100644 --- a/src/schemes/fluid/weakly_compressible_sph/system.jl +++ b/src/schemes/fluid/weakly_compressible_sph/system.jl @@ -154,7 +154,8 @@ function WeaklyCompressibleSPHSystem(initial_condition, density_calculator, stat smoothing_length)..., create_cache_shifting(initial_condition, shifting_technique)..., # Per-system color tag for colorfield surface-normal logic and VTK output. - color=Int(color_value)) + color=Int(color_value), + normalization_factor = Float32(-2.7852 / smoothing_length^4)) # If the `reference_density_spacing` is set calculate the `ideal_neighbor_count` if reference_particle_spacing > 0 From a6c8948c928923ca20ab2e5d32fd398b4f7272d0 Mon Sep 17 00:00:00 2001 From: Erik Faulhaber <44124897+efaulhaber@users.noreply.github.com> Date: Thu, 19 Mar 2026 17:48:32 +0100 Subject: [PATCH 17/24] Fix example file --- examples/fluid/dam_break_3d_dualsphysics.jl | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/examples/fluid/dam_break_3d_dualsphysics.jl b/examples/fluid/dam_break_3d_dualsphysics.jl index 9435bd2b63..db02ad7c40 100644 --- a/examples/fluid/dam_break_3d_dualsphysics.jl +++ b/examples/fluid/dam_break_3d_dualsphysics.jl @@ -19,6 +19,9 @@ acceleration = (0.0, 0.0, -9.81) spacing_ratio = 1 boundary_layers = 1 fluid_density = 1000.0 +sound_speed = 20 * sqrt(9.81 * tank_size[2]) +state_equation = StateEquationCole(; sound_speed, reference_density=fluid_density, + exponent=7) tank = RectangularTank(fluid_particle_spacing, initial_fluid_size, tank_size, fluid_density, n_layers=boundary_layers, spacing_ratio=spacing_ratio, @@ -48,6 +51,7 @@ trixi_include(@__MODULE__, smoothing_length=1.7320508 * fluid_particle_spacing, time_integration_scheme=SymplecticPositionVerlet(), boundary_density_calculator=ContinuityDensity(), + state_equation=state_equation, fluid_particle_spacing=fluid_particle_spacing, tank_size=tank_size, initial_fluid_size=initial_fluid_size, acceleration=acceleration, From 45560dfa8546df6f90a32e50733c36cb9304d914 Mon Sep 17 00:00:00 2001 From: Erik Faulhaber <44124897+efaulhaber@users.noreply.github.com> Date: Thu, 19 Mar 2026 17:54:56 +0100 Subject: [PATCH 18/24] Fix example --- examples/fluid/dam_break_3d.jl | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/examples/fluid/dam_break_3d.jl b/examples/fluid/dam_break_3d.jl index 936a88c704..2fdd752732 100644 --- a/examples/fluid/dam_break_3d.jl +++ b/examples/fluid/dam_break_3d.jl @@ -72,7 +72,8 @@ ode = semidiscretize(semi, tspan) info_callback = InfoCallback(interval=100) saving_callback = SolutionSavingCallback(dt=0.1, prefix="") -callbacks = CallbackSet(info_callback, saving_callback) +extra_callback = nothing +callbacks = CallbackSet(info_callback, saving_callback, extra_callback) # Use a Runge-Kutta method with automatic (error based) time step size control. # Limiting of the maximum stepsize is necessary to prevent crashing. From 857a7d3ecda65c0b4070d2f4c73887587004caf6 Mon Sep 17 00:00:00 2001 From: Erik Faulhaber <44124897+efaulhaber@users.noreply.github.com> Date: Thu, 19 Mar 2026 18:38:05 +0100 Subject: [PATCH 19/24] Update --- examples/fluid/dam_break_2d_dualsphysics.jl | 7 ++++- examples/fluid/dam_break_3d_dualsphysics.jl | 4 +-- .../fluid/weakly_compressible_sph/rhs.jl | 26 +++++++++---------- 3 files changed, 21 insertions(+), 16 deletions(-) diff --git a/examples/fluid/dam_break_2d_dualsphysics.jl b/examples/fluid/dam_break_2d_dualsphysics.jl index 8e55ccc4a7..6f67c70875 100644 --- a/examples/fluid/dam_break_2d_dualsphysics.jl +++ b/examples/fluid/dam_break_2d_dualsphysics.jl @@ -48,6 +48,11 @@ sort!(coords, by=cell_index) # return TrixiParticles.PointNeighbors.cell_coords.(coords, Ref(nhs)) # end +# For benchmarking, run: +# trixi_include_changeprecision(Float32, "../TrixiParticles.jl/examples/fluid/dam_break_2d_dualsphysics.jl", parallelization_backend=CUDABackend(), tspan=(0.0f0, 1.0f-10), fluid_particle_spacing=0.001, coordinates_eltype=Float32); +# dv_ode, du_ode = copy(sol.u[end]).x; v_ode, u_ode = copy(sol.u[end]).x; semi = ode.p; system = semi.systems[1]; dv = TrixiParticles.wrap_v(dv_ode, system, semi); v = TrixiParticles.wrap_v(v_ode, system, semi); u = TrixiParticles.wrap_u(u_ode, system, semi); +# @benchmark TrixiParticles.interact_reassembled!($dv, $v, $u, $v, $u, $system, $system, $semi) + # Run the dam break simulation with this neighborhood search trixi_include(@__MODULE__, joinpath(examples_dir(), "fluid", "dam_break_2d.jl"), @@ -63,7 +68,7 @@ trixi_include(@__MODULE__, viscosity_wall=viscosity_fluid, # This is the same saving frequency as in DualSPHysics for easier comparison saving_callback=SolutionSavingCallback(dt=0.01), - extra_callback=SortingCallback(interval=1), + # extra_callback=SortingCallback(interval=1), density_diffusion=nothing, # TODO only for benchmark # For benchmarks, use spacing 0.002, fix time steps, and disable VTK saving: dt=1e-5, stepsize_callback=nothing, #saving_callback=nothing, diff --git a/examples/fluid/dam_break_3d_dualsphysics.jl b/examples/fluid/dam_break_3d_dualsphysics.jl index db02ad7c40..c2aeceda81 100644 --- a/examples/fluid/dam_break_3d_dualsphysics.jl +++ b/examples/fluid/dam_break_3d_dualsphysics.jl @@ -12,7 +12,7 @@ using TrixiParticles, TrixiParticles.PointNeighbors, OrdinaryDiffEq fluid_particle_spacing = 0.0085 -smoothing_length = 1.414216 * fluid_particle_spacing +smoothing_length = 1.7320508 * fluid_particle_spacing tank_size = (1.6, 0.665, 0.4) initial_fluid_size = (0.4, 0.665, 0.3) acceleration = (0.0, 0.0, -9.81) @@ -62,7 +62,7 @@ trixi_include(@__MODULE__, # viscosity_wall=viscosity_fluid, TODO # This is the same saving frequency as in DualSPHysics for easier comparison # saving_callback=SolutionSavingCallback(dt=0.01), - extra_callback=SortingCallback(interval=1), + # extra_callback=SortingCallback(interval=1), density_diffusion=nothing, # TODO only for benchmark # For benchmarks, use spacing 0.002, fix time steps, and disable VTK saving: dt=8e-5, #stepsize_callback=nothing, saving_callback=nothing, diff --git a/src/schemes/fluid/weakly_compressible_sph/rhs.jl b/src/schemes/fluid/weakly_compressible_sph/rhs.jl index 198996cc48..19e7103cbc 100644 --- a/src/schemes/fluid/weakly_compressible_sph/rhs.jl +++ b/src/schemes/fluid/weakly_compressible_sph/rhs.jl @@ -121,7 +121,7 @@ function interact!(dv, v_particle_system, u_particle_system, particle_system, neighbor_system, semi) end -function interact!(dv, v_particle_system, u_particle_system, +function interact2!(dv, v_particle_system, u_particle_system, v_neighbor_system, u_neighbor_system, particle_system::WeaklyCompressibleSPHSystem, neighbor_system::WeaklyCompressibleSPHSystem, semi) @@ -186,21 +186,21 @@ end cell = PointNeighbors.cell_coords(point_coords, nhs) # cell_blocks = ((cell[1] - 1, cell[2] - 1), (cell[1] - 1, cell[2]), (cell[1] - 1, cell[2] + 1)) - # cell_blocks = CartesianIndices(ntuple(i -> (cell[i + 1] - 1):(cell[i + 1] + 1), Val(NDIMS - 1))) - # for cell_block in cell_blocks - # cell_block_start = (cell[1] - 1, Tuple(cell_block)...) - # cell_index = @inbounds PointNeighbors.cell_index(cell_list, cell_block_start) - # start = @inbounds cell_list.cells.first_bin_index[cell_index] - # stop = @inbounds cell_list.cells.first_bin_index[cell_index + 3] - 1 + cell_blocks = CartesianIndices(ntuple(i -> (cell[i + 1] - 1):(cell[i + 1] + 1), Val(NDIMS - 1))) + for cell_block in cell_blocks + cell_block_start = (cell[1] - 1, Tuple(cell_block)...) + cell_index = @inbounds PointNeighbors.cell_index(cell_list, cell_block_start) + start = @inbounds cell_list.cells.first_bin_index[cell_index] + stop = @inbounds cell_list.cells.first_bin_index[cell_index + 3] - 1 - # for neighbor in start:stop + for neighbor in start:stop - for neighbor_cell_ in PointNeighbors.neighboring_cells(cell, nhs) - neighbor_cell = Tuple(neighbor_cell_) - neighbors = @inbounds PointNeighbors.points_in_cell(neighbor_cell, nhs) + # for neighbor_cell_ in PointNeighbors.neighboring_cells(cell, nhs) + # neighbor_cell = Tuple(neighbor_cell_) + # neighbors = @inbounds PointNeighbors.points_in_cell(neighbor_cell, nhs) - for neighbor_ in eachindex(neighbors) - neighbor = @inbounds neighbors[neighbor_] + # for neighbor_ in eachindex(neighbors) + # neighbor = @inbounds neighbors[neighbor_] # neighbor_coords_ = vloada(VT_coords, pointer(neighbor_system_coords, 4*(neighbor-1)+1)) # a, b, c, d = Tuple(neighbor_coords_) From 46d54fdd87fb1df225921ae8e9652296e416ea65 Mon Sep 17 00:00:00 2001 From: Erik Faulhaber <44124897+efaulhaber@users.noreply.github.com> Date: Thu, 19 Mar 2026 18:55:41 +0100 Subject: [PATCH 20/24] Add benchmark numbers --- dualsphysics_2d_benchmark_2m_particles.txt | 105 +++++++++++++++++++++ 1 file changed, 105 insertions(+) create mode 100644 dualsphysics_2d_benchmark_2m_particles.txt diff --git a/dualsphysics_2d_benchmark_2m_particles.txt b/dualsphysics_2d_benchmark_2m_particles.txt new file mode 100644 index 0000000000..e1f32dbeab --- /dev/null +++ b/dualsphysics_2d_benchmark_2m_particles.txt @@ -0,0 +1,105 @@ +2D benchmark 2M particles + +DualSPHysics: +1.55ms + +DualSPHysics no density diffusion: +1.1ms + +main FP32, no density diffusion: +3.1ms + +now with compact vov +main mixed precision: +5ms (3.2x) + +main FP32: +4ms (2.6x) + +with dd, sorted by cell coords: +4.8ms + +sorted by cell index: +4.9ms + +blocks of three cells: +4.9ms + +optimizations (inbounds, zero distance skip): +4.7ms + +simplified kernel gradient: +4.3ms + +no density diffusion: +3.1ms + +unrolled continuity equation: +3.1ms + +unrolled viscosity: +2.9ms + +pull *_a quantities before the neighbor loop: +2.7ms + +split dv and drho: +2.55ms (2.3x) + +add to arrays after loop: +2.17ms (1.97x) + +optimized day 1: +1.3ms + +optimized day 1 mixed precision: +3.7ms + +full grid NHS: +1.5ms (2.1x) + + +3D DualSPHysics 638400 fluid particles: +4.9ms ++ 200µs KerUpdatePosCell + +main 3D TP, no DD: +12ms + +3D TP stripped (above with pull quantities...) and compact vov: +8.3ms (3.1x, 1.7x?) + +add to arrays after loop: +7.5ms (2.8x, 1.5x?) + +split dv and drho: +7.4ms + +fixed @inbounds: +6.8ms + +fastmath if distance: +4.7ms + +first vloada: +4.6ms + +second vloada: +4.3ms + +mixed precision: +21.8ms??? + +kernel grad fastmath: +4ms + +targeted fastdiv with sorted NHS: +4ms +fluid-boundary: 0.4ms. fluid-*: 4.4ms + +full grid NHS: +4.9ms (2.4x) +total 5.3ms + +full grid with foreach_point_neighbor: +7.7ms From 8cb19b6885ab5029c878fc065106520f4239caad Mon Sep 17 00:00:00 2001 From: Erik Faulhaber <44124897+efaulhaber@users.noreply.github.com> Date: Fri, 20 Mar 2026 12:08:11 +0100 Subject: [PATCH 21/24] Use regular smoothing kernel implementation with div_fast --- examples/fluid/dam_break_2d_dualsphysics.jl | 2 +- src/general/smoothing_kernels.jl | 3 ++- src/schemes/fluid/viscosity.jl | 4 ++-- src/schemes/fluid/weakly_compressible_sph/rhs.jl | 6 +++--- 4 files changed, 8 insertions(+), 7 deletions(-) diff --git a/examples/fluid/dam_break_2d_dualsphysics.jl b/examples/fluid/dam_break_2d_dualsphysics.jl index 6f67c70875..472801d5a3 100644 --- a/examples/fluid/dam_break_2d_dualsphysics.jl +++ b/examples/fluid/dam_break_2d_dualsphysics.jl @@ -31,7 +31,7 @@ tank.boundary.coordinates .+= 0.005 # Define a GPU-compatible neighborhood search min_corner = minimum(tank.boundary.coordinates, dims=2) max_corner = maximum(tank.boundary.coordinates, dims=2) -cell_list = FullGridCellList(; min_corner, max_corner, backend=PointNeighbors.CompactVectorOfVectors{Int32}) +cell_list = FullGridCellList(; min_corner, max_corner)#, backend=PointNeighbors.CompactVectorOfVectors{Int32}) neighborhood_search = GridNeighborhoodSearch{2}(; cell_list, update_strategy=ParallelUpdate()) diff --git a/src/general/smoothing_kernels.jl b/src/general/smoothing_kernels.jl index 4a97564648..87622065e9 100644 --- a/src/general/smoothing_kernels.jl +++ b/src/general/smoothing_kernels.jl @@ -11,7 +11,8 @@ abstract type AbstractSmoothingKernel{NDIMS} end # Also note that `sqrt(eps(h^2)) != eps(h)`. distance^2 < eps(h^2) && return zero(pos_diff) - return kernel_deriv(kernel, distance, h) / distance * pos_diff + distance_inv = Base.FastMath.div_fast(1, distance) + return kernel_deriv(kernel, distance, h) * distance_inv * pos_diff end @inline function corrected_kernel_grad(kernel, pos_diff, distance, h, correction, system, diff --git a/src/schemes/fluid/viscosity.jl b/src/schemes/fluid/viscosity.jl index 4b6eca4a42..b178749f35 100644 --- a/src/schemes/fluid/viscosity.jl +++ b/src/schemes/fluid/viscosity.jl @@ -23,8 +23,8 @@ end sound_speed, m_a, m_b, rho_a, rho_b, grad_kernel) end -@inline function dv_viscosity(viscosity::Nothing, particle_system, neighbor_system, - v_particle_system, v_neighbor_system, +@inline function dv_viscosity(dv_particle, viscosity::Nothing, particle_system, neighbor_system, + vdiff, particle, neighbor, pos_diff, distance, sound_speed, m_a, m_b, rho_a, rho_b, grad_kernel) return zero(pos_diff) diff --git a/src/schemes/fluid/weakly_compressible_sph/rhs.jl b/src/schemes/fluid/weakly_compressible_sph/rhs.jl index 19e7103cbc..6be665550b 100644 --- a/src/schemes/fluid/weakly_compressible_sph/rhs.jl +++ b/src/schemes/fluid/weakly_compressible_sph/rhs.jl @@ -273,7 +273,7 @@ end function interact_reassembled!(dv, v_particle_system, u_particle_system, v_neighbor_system, u_neighbor_system, particle_system::WeaklyCompressibleSPHSystem{NDIMS}, - neighbor_system::WeaklyCompressibleSPHSystem, semi) where NDIMS + neighbor_system, semi) where NDIMS (; density_calculator, correction) = particle_system system_coords = current_coordinates(u_particle_system, particle_system) @@ -302,8 +302,8 @@ function interact_reassembled!(dv, v_particle_system, u_particle_system, # rho_b = @inbounds v_neighbor_system[end, neighbor] v_b, rho_b = @inbounds velocity_and_density(v_neighbor_system, neighbor_system, neighbor) - grad_kernel = kernel_grad_ds(particle_system, pos_diff, distance) - # grad_kernel = smoothing_kernel_grad(particle_system, pos_diff, distance, particle) + # grad_kernel = kernel_grad_ds(particle_system, pos_diff, distance) + grad_kernel = smoothing_kernel_grad(particle_system, pos_diff, distance, particle) # dv_particle += -m_b * (p_a + p_b) / (rho_a * rho_b) * grad_kernel # dv_particle[] += -m_b * Base.FastMath.div_fast(p_a + p_b, rho_a * rho_b) * grad_kernel From ef509b589706b0991002e31cc1e8fcab29030ee4 Mon Sep 17 00:00:00 2001 From: Erik Faulhaber <44124897+efaulhaber@users.noreply.github.com> Date: Mon, 23 Mar 2026 13:40:01 +0100 Subject: [PATCH 22/24] Prepare for maximum performance --- src/schemes/fluid/weakly_compressible_sph/rhs.jl | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/schemes/fluid/weakly_compressible_sph/rhs.jl b/src/schemes/fluid/weakly_compressible_sph/rhs.jl index 6be665550b..13f698aaf7 100644 --- a/src/schemes/fluid/weakly_compressible_sph/rhs.jl +++ b/src/schemes/fluid/weakly_compressible_sph/rhs.jl @@ -293,7 +293,7 @@ function interact_reassembled!(dv, v_particle_system, u_particle_system, dv_particle = Ref(zero(v_a)) drho_particle = Ref(zero(rho_a)) - PointNeighbors.foreach_neighbor(system_coords, neighbor_system_coords, + @inbounds PointNeighbors.foreach_neighbor(system_coords, neighbor_system_coords, neighborhood_search, particle) do _, neighbor, pos_diff, distance m_b = @inbounds hydrodynamic_mass(neighbor_system, neighbor) p_b = @inbounds current_pressure(v_neighbor_system, neighbor_system, neighbor) @@ -302,8 +302,8 @@ function interact_reassembled!(dv, v_particle_system, u_particle_system, # rho_b = @inbounds v_neighbor_system[end, neighbor] v_b, rho_b = @inbounds velocity_and_density(v_neighbor_system, neighbor_system, neighbor) - # grad_kernel = kernel_grad_ds(particle_system, pos_diff, distance) - grad_kernel = smoothing_kernel_grad(particle_system, pos_diff, distance, particle) + grad_kernel = kernel_grad_ds(particle_system, pos_diff, distance) + # grad_kernel = smoothing_kernel_grad(particle_system, pos_diff, distance, particle) # dv_particle += -m_b * (p_a + p_b) / (rho_a * rho_b) * grad_kernel # dv_particle[] += -m_b * Base.FastMath.div_fast(p_a + p_b, rho_a * rho_b) * grad_kernel From 2203849e598d336a86950e706a494851309a7499 Mon Sep 17 00:00:00 2001 From: Erik Faulhaber <44124897+efaulhaber@users.noreply.github.com> Date: Mon, 23 Mar 2026 13:40:46 +0100 Subject: [PATCH 23/24] Update timings --- dualsphysics_2d_benchmark_2m_particles.txt | 3 +++ 1 file changed, 3 insertions(+) diff --git a/dualsphysics_2d_benchmark_2m_particles.txt b/dualsphysics_2d_benchmark_2m_particles.txt index e1f32dbeab..12e3c2cafb 100644 --- a/dualsphysics_2d_benchmark_2m_particles.txt +++ b/dualsphysics_2d_benchmark_2m_particles.txt @@ -101,5 +101,8 @@ full grid NHS: 4.9ms (2.4x) total 5.3ms +full grid with foreach_neighbor inbounds: +4.8ms + full grid with foreach_point_neighbor: 7.7ms From b392d3d4ea304eab85c2153fa69f7dad7fd9c570 Mon Sep 17 00:00:00 2001 From: Erik Faulhaber <44124897+efaulhaber@users.noreply.github.com> Date: Wed, 1 Apr 2026 18:06:03 +0200 Subject: [PATCH 24/24] Fix zero distance --- src/schemes/fluid/weakly_compressible_sph/rhs.jl | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/schemes/fluid/weakly_compressible_sph/rhs.jl b/src/schemes/fluid/weakly_compressible_sph/rhs.jl index 13f698aaf7..086816e767 100644 --- a/src/schemes/fluid/weakly_compressible_sph/rhs.jl +++ b/src/schemes/fluid/weakly_compressible_sph/rhs.jl @@ -295,6 +295,8 @@ function interact_reassembled!(dv, v_particle_system, u_particle_system, @inbounds PointNeighbors.foreach_neighbor(system_coords, neighbor_system_coords, neighborhood_search, particle) do _, neighbor, pos_diff, distance + distance < eps() && return + m_b = @inbounds hydrodynamic_mass(neighbor_system, neighbor) p_b = @inbounds current_pressure(v_neighbor_system, neighbor_system, neighbor)