diff --git a/Project.toml b/Project.toml
index dff0de0627..84b5497b2a 100644
--- a/Project.toml
+++ b/Project.toml
@@ -26,6 +26,7 @@ Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
ReadVTK = "dc215faf-f008-4882-a9f7-a79a826fadc3"
RecipesBase = "3cdcf5f2-1ef4-517c-9805-6587b60abb01"
Reexport = "189a3867-3050-52da-a836-e630ba90ab69"
+SIMD = "fdea26ae-647d-5447-a871-4b548cad5224"
SciMLBase = "0bca4576-84f4-4d90-8ffe-ffa030f20462"
StaticArrays = "90137ffa-7385-5640-81b9-e52037218182"
Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
@@ -62,6 +63,7 @@ Polyester = "0.7.10"
ReadVTK = "0.2"
RecipesBase = "1"
Reexport = "1"
+SIMD = "3.7.2"
SciMLBase = "2"
StaticArrays = "1"
Statistics = "1"
diff --git a/dualsphysics_2d_benchmark_2m_particles.txt b/dualsphysics_2d_benchmark_2m_particles.txt
new file mode 100644
index 0000000000..12e3c2cafb
--- /dev/null
+++ b/dualsphysics_2d_benchmark_2m_particles.txt
@@ -0,0 +1,108 @@
+2D benchmark 2M particles
+
+DualSPHysics:
+1.55ms
+
+DualSPHysics no density diffusion:
+1.1ms
+
+main FP32, no density diffusion:
+3.1ms
+
+now with compact vov
+main mixed precision:
+5ms (3.2x)
+
+main FP32:
+4ms (2.6x)
+
+with dd, sorted by cell coords:
+4.8ms
+
+sorted by cell index:
+4.9ms
+
+blocks of three cells:
+4.9ms
+
+optimizations (inbounds, zero distance skip):
+4.7ms
+
+simplified kernel gradient:
+4.3ms
+
+no density diffusion:
+3.1ms
+
+unrolled continuity equation:
+3.1ms
+
+unrolled viscosity:
+2.9ms
+
+pull *_a quantities before the neighbor loop:
+2.7ms
+
+split dv and drho:
+2.55ms (2.3x)
+
+add to arrays after loop:
+2.17ms (1.97x)
+
+optimized day 1:
+1.3ms
+
+optimized day 1 mixed precision:
+3.7ms
+
+full grid NHS:
+1.5ms (2.1x)
+
+
+3D DualSPHysics 638400 fluid particles:
+4.9ms
++ 200µs KerUpdatePosCell
+
+main 3D TP, no DD:
+12ms
+
+3D TP stripped (above with pull quantities...) and compact vov:
+8.3ms (3.1x, 1.7x?)
+
+add to arrays after loop:
+7.5ms (2.8x, 1.5x?)
+
+split dv and drho:
+7.4ms
+
+fixed @inbounds:
+6.8ms
+
+fastmath if distance:
+4.7ms
+
+first vloada:
+4.6ms
+
+second vloada:
+4.3ms
+
+mixed precision:
+21.8ms???
+
+kernel grad fastmath:
+4ms
+
+targeted fastdiv with sorted NHS:
+4ms
+fluid-boundary: 0.4ms. fluid-*: 4.4ms
+
+full grid NHS:
+4.9ms (2.4x)
+total 5.3ms
+
+full grid with foreach_neighbor inbounds:
+4.8ms
+
+full grid with foreach_point_neighbor:
+7.7ms
diff --git a/examples/fluid/dam_break_2d_dualsphysics.jl b/examples/fluid/dam_break_2d_dualsphysics.jl
new file mode 100644
index 0000000000..472801d5a3
--- /dev/null
+++ b/examples/fluid/dam_break_2d_dualsphysics.jl
@@ -0,0 +1,75 @@
+# Modify the 01_DamBreak example of DualSPHysics like this:
+#
+#
+#
+#
+# When comparing with high resolution, change the resolution here:
+#
+# With this resolution, use:
+#
+
+using TrixiParticles, TrixiParticles.PointNeighbors
+
+fluid_particle_spacing = 0.002
+
+# Load setup from dam break example
+trixi_include(@__MODULE__,
+ joinpath(examples_dir(), "fluid", "dam_break_2d.jl"),
+ fluid_particle_spacing=fluid_particle_spacing,
+ smoothing_length=1.414216 * fluid_particle_spacing,
+ tank_size=(4.0, 3.0), W=1.0, H=2.0,
+ spacing_ratio=1, boundary_layers=1,
+ sol=nothing, ode=nothing)
+
+tank = RectangularTank(fluid_particle_spacing, initial_fluid_size, tank_size, fluid_density,
+ n_layers=boundary_layers, spacing_ratio=spacing_ratio,
+ coordinates_eltype=Float64)
+
+tank.fluid.coordinates .+= 0.005
+tank.boundary.coordinates .+= 0.005
+
+# Define a GPU-compatible neighborhood search
+min_corner = minimum(tank.boundary.coordinates, dims=2)
+max_corner = maximum(tank.boundary.coordinates, dims=2)
+cell_list = FullGridCellList(; min_corner, max_corner)#, backend=PointNeighbors.CompactVectorOfVectors{Int32})
+neighborhood_search = GridNeighborhoodSearch{2}(; cell_list,
+ update_strategy=ParallelUpdate())
+
+search_radius = TrixiParticles.get_neighborhood_search(fluid_system, fluid_system, semi).search_radius
+nhs = PointNeighbors.copy_neighborhood_search(neighborhood_search, search_radius, 0)
+cell_coords(x) = PointNeighbors.cell_coords(x, nhs)
+cell_index(x) = PointNeighbors. cell_index(nhs.cell_list, cell_coords(x))
+coords = reinterpret(reshape, SVector{ndims(fluid_system), eltype(tank.fluid.coordinates)}, tank.fluid.coordinates)
+sort!(coords, by=cell_index)
+
+# function cells(coordinates, system, semi)
+# nhs = TrixiParticles.get_neighborhood_search(fluid_system, fluid_system, semi)
+# coords = reinterpret(reshape, SVector{ndims(system), eltype(coordinates)}, coordinates)
+# return TrixiParticles.PointNeighbors.cell_coords.(coords, Ref(nhs))
+# end
+
+# For benchmarking, run:
+# trixi_include_changeprecision(Float32, "../TrixiParticles.jl/examples/fluid/dam_break_2d_dualsphysics.jl", parallelization_backend=CUDABackend(), tspan=(0.0f0, 1.0f-10), fluid_particle_spacing=0.001, coordinates_eltype=Float32);
+# dv_ode, du_ode = copy(sol.u[end]).x; v_ode, u_ode = copy(sol.u[end]).x; semi = ode.p; system = semi.systems[1]; dv = TrixiParticles.wrap_v(dv_ode, system, semi); v = TrixiParticles.wrap_v(v_ode, system, semi); u = TrixiParticles.wrap_u(u_ode, system, semi);
+# @benchmark TrixiParticles.interact_reassembled!($dv, $v, $u, $v, $u, $system, $system, $semi)
+
+# Run the dam break simulation with this neighborhood search
+trixi_include(@__MODULE__,
+ joinpath(examples_dir(), "fluid", "dam_break_2d.jl"),
+ tank=tank,
+ smoothing_length=1.414216 * fluid_particle_spacing,
+ time_integration_scheme=SymplecticPositionVerlet(),
+ boundary_density_calculator=ContinuityDensity(),
+ fluid_particle_spacing=fluid_particle_spacing,
+ tank_size=(4.0, 3.0), W=1.0, H=2.0,
+ spacing_ratio=1, boundary_layers=1,
+ tspan=(0.0, 1.0), cfl=0.2,
+ neighborhood_search=neighborhood_search,
+ viscosity_wall=viscosity_fluid,
+ # This is the same saving frequency as in DualSPHysics for easier comparison
+ saving_callback=SolutionSavingCallback(dt=0.01),
+ # extra_callback=SortingCallback(interval=1),
+ density_diffusion=nothing, # TODO only for benchmark
+ # For benchmarks, use spacing 0.002, fix time steps, and disable VTK saving:
+ dt=1e-5, stepsize_callback=nothing, #saving_callback=nothing,
+ parallelization_backend=PolyesterBackend())
diff --git a/examples/fluid/dam_break_3d.jl b/examples/fluid/dam_break_3d.jl
index a073c001ec..2fdd752732 100644
--- a/examples/fluid/dam_break_3d.jl
+++ b/examples/fluid/dam_break_3d.jl
@@ -66,12 +66,14 @@ boundary_system = WallBoundarySystem(tank.boundary, boundary_model)
# ==========================================================================================
# ==== Simulation
semi = Semidiscretization(fluid_system, boundary_system,
+ neighborhood_search=GridNeighborhoodSearch{3}(),
parallelization_backend=PolyesterBackend())
ode = semidiscretize(semi, tspan)
info_callback = InfoCallback(interval=100)
saving_callback = SolutionSavingCallback(dt=0.1, prefix="")
-callbacks = CallbackSet(info_callback, saving_callback)
+extra_callback = nothing
+callbacks = CallbackSet(info_callback, saving_callback, extra_callback)
# Use a Runge-Kutta method with automatic (error based) time step size control.
# Limiting of the maximum stepsize is necessary to prevent crashing.
@@ -80,8 +82,10 @@ callbacks = CallbackSet(info_callback, saving_callback)
# Sometimes, the method fails to do so because forces become extremely large when
# fluid particles are very close to boundary particles, and the time integration method
# interprets this as an instability.
-sol = solve(ode, RDPK3SpFSAL35(),
+time_integration_scheme = RDPK3SpFSAL35()
+sol = solve(ode, time_integration_scheme,
abstol=1e-5, # Default abstol is 1e-6 (may need to be tuned to prevent boundary penetration)
reltol=1e-4, # Default reltol is 1e-3 (may need to be tuned to prevent boundary penetration)
dtmax=1e-2, # Limit stepsize to prevent crashing
+ dt=1.0,
save_everystep=false, callback=callbacks);
diff --git a/examples/fluid/dam_break_3d_dualsphysics.jl b/examples/fluid/dam_break_3d_dualsphysics.jl
new file mode 100644
index 0000000000..c2aeceda81
--- /dev/null
+++ b/examples/fluid/dam_break_3d_dualsphysics.jl
@@ -0,0 +1,69 @@
+# Modify the 01_DamBreak example of DualSPHysics like this:
+#
+#
+#
+#
+# When comparing with high resolution, change the resolution here:
+#
+# With this resolution, use:
+#
+
+using TrixiParticles, TrixiParticles.PointNeighbors, OrdinaryDiffEq
+
+fluid_particle_spacing = 0.0085
+
+smoothing_length = 1.7320508 * fluid_particle_spacing
+tank_size = (1.6, 0.665, 0.4)
+initial_fluid_size = (0.4, 0.665, 0.3)
+acceleration = (0.0, 0.0, -9.81)
+spacing_ratio = 1
+boundary_layers = 1
+fluid_density = 1000.0
+sound_speed = 20 * sqrt(9.81 * tank_size[2])
+state_equation = StateEquationCole(; sound_speed, reference_density=fluid_density,
+ exponent=7)
+
+tank = RectangularTank(fluid_particle_spacing, initial_fluid_size, tank_size, fluid_density,
+ n_layers=boundary_layers, spacing_ratio=spacing_ratio,
+ coordinates_eltype=Float64)
+
+tank.fluid.coordinates .+= 0.005
+tank.boundary.coordinates .+= 0.005
+
+# Define a GPU-compatible neighborhood search
+min_corner = minimum(tank.boundary.coordinates, dims=2)
+max_corner = maximum(tank.boundary.coordinates, dims=2)
+cell_list = FullGridCellList(; min_corner, max_corner)#, backend=PointNeighbors.CompactVectorOfVectors{Int32})
+neighborhood_search = GridNeighborhoodSearch{3}(; cell_list,
+ update_strategy=ParallelUpdate())
+
+search_radius = TrixiParticles.compact_support(WendlandC2Kernel{3}(), smoothing_length)
+nhs = PointNeighbors.copy_neighborhood_search(neighborhood_search, search_radius, 0)
+cell_coords(x) = PointNeighbors.cell_coords(x, nhs)
+cell_index(x) = PointNeighbors. cell_index(nhs.cell_list, cell_coords(x))
+coords = reinterpret(reshape, SVector{ndims(nhs), eltype(tank.fluid.coordinates)}, tank.fluid.coordinates)
+sort!(coords, by=cell_index)
+
+# Run the dam break simulation with this neighborhood search
+trixi_include(@__MODULE__,
+ joinpath(examples_dir(), "fluid", "dam_break_3d.jl"),
+ tank=tank,
+ smoothing_length=1.7320508 * fluid_particle_spacing,
+ time_integration_scheme=SymplecticPositionVerlet(),
+ boundary_density_calculator=ContinuityDensity(),
+ state_equation=state_equation,
+ fluid_particle_spacing=fluid_particle_spacing,
+ tank_size=tank_size, initial_fluid_size=initial_fluid_size,
+ acceleration=acceleration,
+ alpha=0.1,
+ spacing_ratio=spacing_ratio, boundary_layers=boundary_layers,
+ tspan=(0.0, 1.0), #cfl=0.2,
+ neighborhood_search=neighborhood_search,
+ # viscosity_wall=viscosity_fluid, TODO
+ # This is the same saving frequency as in DualSPHysics for easier comparison
+ # saving_callback=SolutionSavingCallback(dt=0.01),
+ # extra_callback=SortingCallback(interval=1),
+ density_diffusion=nothing, # TODO only for benchmark
+ # For benchmarks, use spacing 0.002, fix time steps, and disable VTK saving:
+ dt=8e-5, #stepsize_callback=nothing, saving_callback=nothing,
+ parallelization_backend=PolyesterBackend())
diff --git a/src/TrixiParticles.jl b/src/TrixiParticles.jl
index fa87c296ca..c6b07787a4 100644
--- a/src/TrixiParticles.jl
+++ b/src/TrixiParticles.jl
@@ -26,6 +26,7 @@ using Random: seed!
using SciMLBase: SciMLBase, CallbackSet, DiscreteCallback, DynamicalODEProblem, u_modified!,
get_tmp_cache, set_proposed_dt!, ODESolution, ODEProblem, terminate!,
add_tstop!
+using SIMD: vloada, Vec
@reexport using StaticArrays: SVector
using StaticArrays: @SMatrix, SMatrix, setindex
using Statistics: Statistics
@@ -73,7 +74,7 @@ export WeaklyCompressibleSPHSystem, EntropicallyDampedSPHSystem, TotalLagrangian
export BoundaryZone, InFlow, OutFlow, BidirectionalFlow
export InfoCallback, SolutionSavingCallback, DensityReinitializationCallback,
PostprocessCallback, StepsizeCallback, UpdateCallback, SteadyStateReachedCallback,
- SplitIntegrationCallback
+ SplitIntegrationCallback, SortingCallback
export ContinuityDensity, SummationDensity
export PenaltyForceGanzenmueller, TransportVelocityAdami, ParticleShiftingTechnique,
ParticleShiftingTechniqueSun2017, ConsistentShiftingSun2019,
diff --git a/src/callbacks/callbacks.jl b/src/callbacks/callbacks.jl
index 975c760a8e..eba012c4af 100644
--- a/src/callbacks/callbacks.jl
+++ b/src/callbacks/callbacks.jl
@@ -76,3 +76,4 @@ include("stepsize.jl")
include("update.jl")
include("steady_state_reached.jl")
include("split_integration.jl")
+include("sorting.jl")
diff --git a/src/callbacks/sorting.jl b/src/callbacks/sorting.jl
new file mode 100644
index 0000000000..23b5a46ebc
--- /dev/null
+++ b/src/callbacks/sorting.jl
@@ -0,0 +1,185 @@
+# These are the systems that require sorting.
+# TODO: The `DEMSystem` should be added here in the future.
+# Boundary particles always stay fixed relative to each other, TLSPH computes in the initial configuration.
+const RequiresSortingSystem = AbstractFluidSystem
+
+struct SortingCallback{I}
+ interval::I
+ last_t::Float64
+end
+
+"""
+ SortingCallback(; interval=-1, dt=0.0, initial_sort=true)
+
+Reorders particles according to neighborhood-search cells for performance optimization.
+
+When particles become very unordered throughout a long-running simulation, performance
+decreases due to increased cache-misses (on CPUs) and lack of block structure (on GPUs).
+On GPUs, a fully shuffled particle ordering causes a 3-4x slowdown compared to a sorted configuration.
+On CPUs, there is no difference for small problems (<16k particles), but the performance penalty
+grows linearly with the problem size up to 10x slowdown for very large problems (65M particles).
+See [#1044](https://github.com/trixi-framework/TrixiParticles.jl/pull/1044) for more details.
+
+# Keywords
+- `interval`: Sort particles at the end of every `interval` time steps.
+- `dt`: Sort particles in regular intervals of `dt` in terms of integration time.
+ This callback does not add extra time steps / `tstops`; instead, reinitialization is
+ triggered at the first solver step after each `dt` interval has elapsed.
+- `initial_sort=true`: When enabled, particles are sorted at the beginning of the simulation.
+ When the initial configuration is a perfect grid of particles,
+ sorting at the beginning is not necessary and might even slightly
+ slow down the first time steps, since a perfect grid is even better
+ than sorting by NHS cell index.
+"""
+function SortingCallback(; interval::Integer=-1, dt=0.0)
+ if dt > 0 && interval !== -1
+ throw(ArgumentError("Setting both interval and dt is not supported!"))
+ end
+
+ # Sort in intervals in terms of simulation time
+ if dt > 0
+ interval = Float64(dt)
+
+ # Sort every time step (default)
+ elseif interval == -1
+ interval = 1
+ end
+
+ sorting_callback! = SortingCallback(interval, last_t)
+
+ # The first one is the `condition`, the second the `affect!`
+ return DiscreteCallback(sorting_callback!, sorting_callback!,
+ initialize=(initial_sort!), save_positions=(false, false))
+end
+
+# `initialize`
+function initial_sort!(cb, u, t, integrator)
+ # The `SortingCallback` is either `cb.affect!` (with `DiscreteCallback`)
+ # or `cb.affect!.affect!` (with `PeriodicCallback`).
+ # Let recursive dispatch handle this.
+
+ initial_sort!(cb.affect!, u, t, integrator)
+end
+
+function initial_sort!(cb::SortingCallback, u, t, integrator)
+ return cb(integrator)
+end
+
+# `condition` with `interval`
+function (sorting_callback!::SortingCallback{Int})(u, t, integrator)
+ (; interval) = sorting_callback!
+
+ return condition_integrator_interval(integrator, interval)
+end
+
+# condition with `dt`
+function (sorting_callback!::SortingCallback)(u, t, integrator)
+ (; interval, last_t) = sorting_callback!
+
+ return (t - last_t) > interval
+end
+
+# `affect!`
+function (sorting_callback!::SortingCallback)(integrator)
+ semi = integrator.p
+ v_ode, u_ode = integrator.u.x
+
+ @trixi_timeit timer() "sorting callback" begin
+ foreach_system(semi) do system
+ v = wrap_v(v_ode, system, semi)
+ u = wrap_u(u_ode, system, semi)
+
+ sort_particles!(system, v, u, semi)
+ end
+ end
+
+ # Tell OrdinaryDiffEq that `integrator.u` has been modified
+ u_modified!(integrator, true)
+
+ return integrator
+end
+
+sort_particles!(system, v, u, semi) = system
+
+function sort_particles!(system::RequiresSortingSystem, v, u, semi)
+ nhs = get_neighborhood_search(system, semi)
+
+ if !(nhs isa GridNeighborhoodSearch)
+ throw(ArgumentError("`SortingCallback` can only be used with a `GridNeighborhoodSearch`"))
+ end
+
+ sort_particles!(system, v, u, nhs, nhs.cell_list, semi)
+end
+
+# TODO: Sort also masses and particle spacings for variable smoothing lengths.
+function sort_particles!(system::RequiresSortingSystem, v, u, nhs,
+ cell_list::FullGridCellList, semi)
+ cell_coords = allocate(semi.parallelization_backend, SVector{ndims(system), Int},
+ nparticles(system))
+ @threaded semi for particle in each_active_particle(system)
+ point_coords = current_coords(u, system, particle)
+ cell_coords[particle] = PointNeighbors.cell_coords(point_coords, nhs)
+ end
+
+ # TODO `sortperm` works on CUDA but not (yet) on Metal
+ perm = sortperm(transfer2cpu(cell_coords))
+
+ sort_system!(system, v, u, perm, system.buffer)
+
+ return system
+end
+
+function sort_system!(system, v, u, perm, buffer::Nothing)
+ system_coords = current_coordinates(u, system)
+ system_velocity = current_velocity(v, system)
+ system_density = current_density(v, system)
+ system_pressure = current_pressure(v, system)
+
+ system_coords .= system_coords[:, perm]
+ system_velocity .= system_velocity[:, perm]
+ system_pressure .= system_pressure[perm]
+ system_density .= system_density[perm]
+
+ return system
+end
+
+function Base.show(io::IO, cb::DiscreteCallback{<:Any, <:SortingCallback{Int}})
+ @nospecialize cb # reduce precompilation time
+ print(io, "SortingCallback(interval=", cb.affect!.interval, ")")
+end
+
+function Base.show(io::IO,
+ cb::DiscreteCallback{<:Any, <:SortingCallback})
+ @nospecialize cb # reduce precompilation time
+ print(io, "SortingCallback(dt=", cb.affect!.affect!.interval, ")")
+end
+
+function Base.show(io::IO, ::MIME"text/plain",
+ cb::DiscreteCallback{<:Any, <:SortingCallback{Int}})
+ @nospecialize cb # reduce precompilation time
+
+ if get(io, :compact, false)
+ show(io, cb)
+ else
+ sorting_cb = cb.affect!
+ setup = [
+ "interval" => sorting_cb.interval
+ ]
+ summary_box(io, "SortingCallback", setup)
+ end
+end
+
+function Base.show(io::IO, ::MIME"text/plain",
+ cb::DiscreteCallback{<:Any, <:SortingCallback{Int}})
+ @nospecialize cb # reduce precompilation time
+
+ if get(io, :compact, false)
+ show(io, cb)
+ else
+ sorting_cb = cb.affect!.affect!
+ setup = [
+ "dt" => sorting_cb.interval
+ ]
+ summary_box(io, "SortingCallback", setup)
+ end
+end
diff --git a/src/general/buffer.jl b/src/general/buffer.jl
index 1e2c196799..6b0d0f7f61 100644
--- a/src/general/buffer.jl
+++ b/src/general/buffer.jl
@@ -80,3 +80,35 @@ end
return system
end
+
+function sort_system!(system, v, u, perm, buffer::SystemBuffer)
+ (; active_particle) = buffer
+
+ # Note that the following contain also inactive particles
+ system_coords = current_coordinates(u, system)
+ system_velocity = current_velocity(v, system)
+ system_density = current_density(v, system)
+ system_pressure = current_pressure(v, system)
+
+ # First permutation: sort by desired `perm`
+ active_particle_sorted = active_particle[perm]
+
+ # Second permutation: move inactive particles to the end (true first, false last)
+ perm2 = sortperm(transfer2cpu(active_particle_sorted), rev=true)
+
+ # Combined permutation
+ combined_perm = perm[perm2]
+
+ # Apply to all data
+ active_particle .= active_particle_sorted[perm2]
+ system_coords .= system_coords[:, combined_perm]
+ system_velocity .= system_velocity[:, combined_perm]
+ system_pressure .= system_pressure[combined_perm]
+ system_density .= system_density[combined_perm]
+
+ # Update buffer
+ buffer.active_particle_count[] = count(active_particle)
+ buffer.eachparticle[1:buffer.active_particle_count[]] .= 1:buffer.active_particle_count[]
+
+ return buffer
+end
diff --git a/src/general/smoothing_kernels.jl b/src/general/smoothing_kernels.jl
index 4a97564648..87622065e9 100644
--- a/src/general/smoothing_kernels.jl
+++ b/src/general/smoothing_kernels.jl
@@ -11,7 +11,8 @@ abstract type AbstractSmoothingKernel{NDIMS} end
# Also note that `sqrt(eps(h^2)) != eps(h)`.
distance^2 < eps(h^2) && return zero(pos_diff)
- return kernel_deriv(kernel, distance, h) / distance * pos_diff
+ distance_inv = Base.FastMath.div_fast(1, distance)
+ return kernel_deriv(kernel, distance, h) * distance_inv * pos_diff
end
@inline function corrected_kernel_grad(kernel, pos_diff, distance, h, correction, system,
diff --git a/src/schemes/fluid/fluid.jl b/src/schemes/fluid/fluid.jl
index 8725190506..20fd08a4dc 100644
--- a/src/schemes/fluid/fluid.jl
+++ b/src/schemes/fluid/fluid.jl
@@ -137,7 +137,7 @@ end
end
# This formulation was chosen to be consistent with the used pressure_acceleration formulations
-@propagate_inbounds function continuity_equation!(dv, density_calculator::ContinuityDensity,
+@inline function continuity_equation!(dv, density_calculator::ContinuityDensity,
particle_system::AbstractFluidSystem,
neighbor_system,
v_particle_system, v_neighbor_system,
@@ -146,21 +146,44 @@ end
vdiff = current_velocity(v_particle_system, particle_system, particle) -
current_velocity(v_neighbor_system, neighbor_system, neighbor)
- vdiff += continuity_equation_shifting_term(shifting_technique(particle_system),
- particle_system, neighbor_system,
- particle, neighbor, rho_a, rho_b)
+ # vdiff += continuity_equation_shifting_term(shifting_technique(particle_system),
+ # particle_system, neighbor_system,
+ # particle, neighbor, rho_a, rho_b)
dv[end, particle] += rho_a / rho_b * m_b * dot(vdiff, grad_kernel)
# Artificial density diffusion should only be applied to systems representing a fluid
# with the same physical properties i.e. density and viscosity.
# TODO: shouldn't be applied to particles on the interface (depends on PR #539)
- if particle_system === neighbor_system
- density_diffusion!(dv, density_diffusion(particle_system),
- v_particle_system, particle, neighbor,
- pos_diff, distance, m_b, rho_a, rho_b, particle_system,
- grad_kernel)
- end
+ # if particle_system === neighbor_system
+ # density_diffusion!(dv, density_diffusion(particle_system),
+ # v_particle_system, particle, neighbor,
+ # pos_diff, distance, m_b, rho_a, rho_b, particle_system,
+ # grad_kernel)
+ # end
+end
+
+@propagate_inbounds function continuity_equation!(drho_particle, density_calculator::ContinuityDensity,
+ particle_system::AbstractFluidSystem,
+ neighbor_system,
+ particle, neighbor, pos_diff, distance,
+ vdiff, m_b, rho_a, rho_b, grad_kernel)
+
+ # vdiff += continuity_equation_shifting_term(shifting_technique(particle_system),
+ # particle_system, neighbor_system,
+ # particle, neighbor, rho_a, rho_b)
+
+ drho_particle[] += Base.FastMath.div_fast(rho_a, rho_b) * m_b * dot(vdiff, grad_kernel)
+
+ # Artificial density diffusion should only be applied to systems representing a fluid
+ # with the same physical properties i.e. density and viscosity.
+ # TODO: shouldn't be applied to particles on the interface (depends on PR #539)
+ # if particle_system === neighbor_system
+ # density_diffusion!(dv, density_diffusion(particle_system),
+ # v_particle_system, particle, neighbor,
+ # pos_diff, distance, m_b, rho_a, rho_b, particle_system,
+ # grad_kernel)
+ # end
end
function calculate_dt(v_ode, u_ode, cfl_number, system::AbstractFluidSystem, semi)
diff --git a/src/schemes/fluid/pressure_acceleration.jl b/src/schemes/fluid/pressure_acceleration.jl
index 7ffa5850b3..6dbb6e2c64 100644
--- a/src/schemes/fluid/pressure_acceleration.jl
+++ b/src/schemes/fluid/pressure_acceleration.jl
@@ -26,7 +26,8 @@ end
# asymmetric version.
@inline function pressure_acceleration_continuity_density(m_a, m_b, rho_a, rho_b, p_a, p_b,
W_a)
- return -m_b * (p_a + p_b) / (rho_a * rho_b) * W_a
+ # return -m_b * (p_a + p_b) / (rho_a * rho_b) * W_a
+ return -m_b * Base.FastMath.div_fast(p_a + p_b, rho_a * rho_b) * W_a
end
# Same as above, but not assuming symmetry of the kernel gradient. To be used with
diff --git a/src/schemes/fluid/viscosity.jl b/src/schemes/fluid/viscosity.jl
index 2a6c51ba71..b178749f35 100644
--- a/src/schemes/fluid/viscosity.jl
+++ b/src/schemes/fluid/viscosity.jl
@@ -1,30 +1,30 @@
# Unpack the neighboring systems viscosity to dispatch on the viscosity type.
# This function is only necessary to allow `nothing` as viscosity.
# Otherwise, we could just apply the viscosity as a function directly.
-@propagate_inbounds function dv_viscosity(particle_system, neighbor_system,
- v_particle_system, v_neighbor_system,
+@propagate_inbounds function dv_viscosity(dv_particle, particle_system, neighbor_system,
+ vdiff,
particle, neighbor, pos_diff, distance,
sound_speed, m_a, m_b, rho_a, rho_b, grad_kernel)
viscosity = viscosity_model(particle_system, neighbor_system)
- return dv_viscosity(viscosity, particle_system, neighbor_system,
- v_particle_system, v_neighbor_system,
+ return dv_viscosity(dv_particle, viscosity, particle_system, neighbor_system,
+ vdiff,
particle, neighbor, pos_diff, distance,
sound_speed, m_a, m_b, rho_a, rho_b, grad_kernel)
end
-@propagate_inbounds function dv_viscosity(viscosity, particle_system, neighbor_system,
- v_particle_system, v_neighbor_system,
+@propagate_inbounds function dv_viscosity(dv_particle, viscosity, particle_system, neighbor_system,
+ vdiff,
particle, neighbor, pos_diff, distance,
sound_speed, m_a, m_b, rho_a, rho_b, grad_kernel)
- return viscosity(particle_system, neighbor_system,
- v_particle_system, v_neighbor_system,
+ return viscosity(dv_particle, particle_system, neighbor_system,
+ vdiff,
particle, neighbor, pos_diff, distance,
sound_speed, m_a, m_b, rho_a, rho_b, grad_kernel)
end
-@inline function dv_viscosity(viscosity::Nothing, particle_system, neighbor_system,
- v_particle_system, v_neighbor_system,
+@inline function dv_viscosity(dv_particle, viscosity::Nothing, particle_system, neighbor_system,
+ vdiff,
particle, neighbor, pos_diff, distance,
sound_speed, m_a, m_b, rho_a, rho_b, grad_kernel)
return zero(pos_diff)
@@ -82,41 +82,42 @@ function kinematic_viscosity(system, viscosity::ViscosityMorris, smoothing_lengt
end
@propagate_inbounds function (viscosity::Union{ArtificialViscosityMonaghan,
- ViscosityMorris})(particle_system,
+ ViscosityMorris})(dv_particle, particle_system,
neighbor_system,
- v_particle_system,
- v_neighbor_system,
+ v_diff,
particle, neighbor,
pos_diff, distance,
sound_speed,
m_a, m_b, rho_a, rho_b,
grad_kernel)
- rho_mean = (rho_a + rho_b) / 2
+ rho_mean = 0#(rho_a + rho_b) / 2
- v_a = viscous_velocity(v_particle_system, particle_system, particle)
- v_b = viscous_velocity(v_neighbor_system, neighbor_system, neighbor)
- v_diff = v_a - v_b
+ # v_a = viscous_velocity(v_particle_system, particle_system, particle)
+ # v_b = viscous_velocity(v_neighbor_system, neighbor_system, neighbor)
+ # v_diff = v_a - v_b
- smoothing_length_particle = smoothing_length(particle_system, particle)
- smoothing_length_neighbor = smoothing_length(particle_system, neighbor)
- smoothing_length_average = (smoothing_length_particle + smoothing_length_neighbor) / 2
+ # smoothing_length_particle = smoothing_length(particle_system, particle)
+ # smoothing_length_neighbor = smoothing_length(particle_system, neighbor)
+ smoothing_length_average = smoothing_length(particle_system, particle)#(smoothing_length_particle + smoothing_length_neighbor) / 2
- nu_a = kinematic_viscosity(particle_system,
- viscosity_model(neighbor_system, particle_system),
- smoothing_length_particle, sound_speed)
- nu_b = kinematic_viscosity(neighbor_system,
- viscosity_model(particle_system, neighbor_system),
- smoothing_length_neighbor, sound_speed)
+ # nu_a = kinematic_viscosity(particle_system,
+ # viscosity_model(neighbor_system, particle_system),
+ # smoothing_length_particle, sound_speed)
+ # nu_b = kinematic_viscosity(neighbor_system,
+ # viscosity_model(particle_system, neighbor_system),
+ # smoothing_length_neighbor, sound_speed)
+ nu_a = 0
+ nu_b = 0
- pi_ab = viscosity(sound_speed, v_diff, pos_diff, distance, rho_mean, rho_a, rho_b,
- smoothing_length_average, grad_kernel, nu_a, nu_b)
+ pi_ab = viscosity(dv_particle, sound_speed, v_diff, pos_diff, distance, rho_mean, rho_a, rho_b,
+ smoothing_length_average, grad_kernel, nu_a, nu_b, m_b)
- return m_b * pi_ab
+ # return m_b * pi_ab
end
-@inline function (viscosity::ArtificialViscosityMonaghan)(c, v_diff, pos_diff, distance,
+@inline function (viscosity::ArtificialViscosityMonaghan)(dv_particle, c, v_diff, pos_diff, distance,
rho_mean, rho_a, rho_b, h,
- grad_kernel, nu_a, nu_b)
+ grad_kernel, nu_a, nu_b, m_b)
(; alpha, beta, epsilon) = viscosity
# v_ab ⋅ r_ab
@@ -127,11 +128,12 @@ end
# approaching particles and turn it off for receding particles. In this way, the
# viscosity is used for shocks and not rarefactions."
if vr < 0
- mu = h * vr / (distance^2 + epsilon * h^2)
- return (alpha * c * mu + beta * mu^2) / rho_mean * grad_kernel
+ mu = Base.FastMath.div_fast(h * vr, distance^2 + epsilon * h^2)
+ rho_mean = (rho_a + rho_b) / 2
+ dv_particle[] += m_b * Base.FastMath.div_fast(alpha * c * mu + beta * mu^2, rho_mean) * grad_kernel
end
- return zero(v_diff)
+ return nothing#zero(v_diff)
end
@inline function (viscosity::ViscosityMorris)(c, v_diff, pos_diff, distance, rho_mean,
diff --git a/src/schemes/fluid/weakly_compressible_sph/rhs.jl b/src/schemes/fluid/weakly_compressible_sph/rhs.jl
index acb0f06894..086816e767 100644
--- a/src/schemes/fluid/weakly_compressible_sph/rhs.jl
+++ b/src/schemes/fluid/weakly_compressible_sph/rhs.jl
@@ -2,7 +2,7 @@
# in `neighbor_system` and updates `dv` accordingly.
# It takes into account pressure forces, viscosity, and for `ContinuityDensity` updates
# the density using the continuity equation.
-function interact!(dv, v_particle_system, u_particle_system,
+function interact_old!(dv, v_particle_system, u_particle_system,
v_neighbor_system, u_neighbor_system,
particle_system::WeaklyCompressibleSPHSystem, neighbor_system, semi)
(; density_calculator, correction) = particle_system
@@ -63,12 +63,13 @@ function interact!(dv, v_particle_system, u_particle_system,
distance, grad_kernel, correction)
# Propagate `@inbounds` to the viscosity function, which accesses particle data
- dv_viscosity_ = viscosity_correction *
- @inbounds dv_viscosity(particle_system, neighbor_system,
- v_particle_system, v_neighbor_system,
- particle, neighbor, pos_diff, distance,
- sound_speed, m_a, m_b, rho_a, rho_b,
- grad_kernel)
+ dv_viscosity_ = zero(pos_diff)
+ # viscosity_correction *
+ # @inbounds dv_viscosity(particle_system, neighbor_system,
+ # v_particle_system, v_neighbor_system,
+ # particle, neighbor, pos_diff, distance,
+ # sound_speed, m_a, m_b, rho_a, rho_b,
+ # grad_kernel)
# Extra terms in the momentum equation when using a shifting technique
dv_tvf = @inbounds dv_shifting(shifting_technique(particle_system),
@@ -112,6 +113,260 @@ function interact!(dv, v_particle_system, u_particle_system,
return dv
end
+function interact!(dv, v_particle_system, u_particle_system,
+ v_neighbor_system, u_neighbor_system,
+ particle_system::WeaklyCompressibleSPHSystem, neighbor_system, semi)
+ interact_old!(dv, v_particle_system, u_particle_system,
+ v_neighbor_system, u_neighbor_system,
+ particle_system, neighbor_system, semi)
+end
+
+function interact2!(dv, v_particle_system, u_particle_system,
+ v_neighbor_system, u_neighbor_system,
+ particle_system::WeaklyCompressibleSPHSystem,
+ neighbor_system::WeaklyCompressibleSPHSystem, semi)
+ dv_ = view(dv, 1:ndims(particle_system), :)
+ drho = view(dv, ndims(particle_system) + 1, :)
+ interact!(dv_, drho, v_particle_system, u_particle_system,
+ v_neighbor_system, u_neighbor_system,
+ particle_system, neighbor_system, semi)
+end
+
+function interact!(dv, drho, v_particle_system, u_particle_system,
+ v_neighbor_system, u_neighbor_system,
+ particle_system::WeaklyCompressibleSPHSystem{NDIMS},
+ neighbor_system::WeaklyCompressibleSPHSystem, semi) where NDIMS
+ system_coords = current_coordinates(u_particle_system, particle_system)
+ neighbor_system_coords = current_coordinates(u_neighbor_system, neighbor_system)
+ # system_coords = vcat(system_coords, zero(drho)')
+ # neighbor_system_coords = vcat(neighbor_system_coords, zero(drho)')
+
+ neighborhood_search = get_neighborhood_search(particle_system, neighbor_system, semi)
+ cell_list = neighborhood_search.cell_list
+ search_radius2 = PointNeighbors.search_radius(neighborhood_search)^2
+
+ backend = semi.parallelization_backend
+ ndrange = n_integrated_particles(particle_system)
+ mykernel(backend)(dv, drho, system_coords, neighbor_system_coords, neighborhood_search,
+ cell_list, search_radius2, v_particle_system, v_neighbor_system,
+ particle_system, neighbor_system; ndrange=ndrange)
+
+ KernelAbstractions.synchronize(backend)
+
+ return dv
+end
+
+@kernel function mykernel(dv, drho,
+ system_coords, neighbor_system_coords,
+ nhs, cell_list, search_radius2,
+ v_particle_system, v_neighbor_system,
+ particle_system::WeaklyCompressibleSPHSystem{NDIMS},
+ neighbor_system::WeaklyCompressibleSPHSystem) where NDIMS
+ particle = @index(Global)
+
+ sound_speed = particle_system.state_equation.sound_speed
+ # VT_coords = Vec{4, eltype(system_coords)}
+ # point_coords_ = vloada(VT_coords, pointer(system_coords, 4*(particle-1)+1))
+ # a, b, c, d = Tuple(point_coords_)
+ # point_coords = SVector(a, b, c)
+ point_coords = @inbounds extract_svector(system_coords, Val(NDIMS), particle)
+ p_a = @inbounds particle_system.pressure[particle]
+
+ VT = Vec{4, eltype(v_particle_system)}
+ vrho_a = vloada(VT, pointer(v_particle_system, 4*(particle-1)+1))
+ a, b, c, d = Tuple(vrho_a)
+ v_a = SVector(a, b, c)
+ rho_a = d
+ # v_a = @inbounds extract_svector(v_particle_system, Val(NDIMS), particle)
+ # rho_a = @inbounds v_particle_system[end, particle]
+
+ dv_particle = zero(v_a)
+ drho_particle = zero(rho_a)
+
+ cell = PointNeighbors.cell_coords(point_coords, nhs)
+
+ # cell_blocks = ((cell[1] - 1, cell[2] - 1), (cell[1] - 1, cell[2]), (cell[1] - 1, cell[2] + 1))
+ cell_blocks = CartesianIndices(ntuple(i -> (cell[i + 1] - 1):(cell[i + 1] + 1), Val(NDIMS - 1)))
+ for cell_block in cell_blocks
+ cell_block_start = (cell[1] - 1, Tuple(cell_block)...)
+ cell_index = @inbounds PointNeighbors.cell_index(cell_list, cell_block_start)
+ start = @inbounds cell_list.cells.first_bin_index[cell_index]
+ stop = @inbounds cell_list.cells.first_bin_index[cell_index + 3] - 1
+
+ for neighbor in start:stop
+
+ # for neighbor_cell_ in PointNeighbors.neighboring_cells(cell, nhs)
+ # neighbor_cell = Tuple(neighbor_cell_)
+ # neighbors = @inbounds PointNeighbors.points_in_cell(neighbor_cell, nhs)
+
+ # for neighbor_ in eachindex(neighbors)
+ # neighbor = @inbounds neighbors[neighbor_]
+
+ # neighbor_coords_ = vloada(VT_coords, pointer(neighbor_system_coords, 4*(neighbor-1)+1))
+ # a, b, c, d = Tuple(neighbor_coords_)
+ # neighbor_coords = SVector(a, b, c)
+ neighbor_coords = @inbounds extract_svector(neighbor_system_coords,
+ Val(NDIMS), neighbor)
+
+ # pos_diff = convert.(eltype(particle_system), point_coords - neighbor_coords)
+ pos_diff = point_coords - neighbor_coords
+ distance2 = dot(pos_diff, pos_diff)
+
+ if eps(search_radius2) <= distance2 <= search_radius2
+ distance = sqrt(distance2)
+
+ m_b = @inbounds neighbor_system.mass[neighbor]
+ p_b = @inbounds neighbor_system.pressure[neighbor]
+
+ vrho_b = vloada(VT, pointer(v_neighbor_system, 4*(neighbor-1)+1))
+ a, b, c, d = Tuple(vrho_b)
+ v_b = SVector(a, b, c)
+ rho_b = d
+
+ # v_b = @inbounds extract_svector(v_neighbor_system, Val(NDIMS), neighbor)
+ # rho_b = @inbounds v_neighbor_system[end, neighbor]
+
+ grad_kernel = kernel_grad_ds(particle_system, pos_diff, distance)
+
+ # dv_particle += -m_b * (p_a + p_b) / (rho_a * rho_b) * grad_kernel
+ dv_particle += -m_b * Base.FastMath.div_fast(p_a + p_b, rho_a * rho_b) * grad_kernel
+
+ vdiff = v_a - v_b
+ # drho_particle += rho_a / rho_b * m_b * dot(vdiff, grad_kernel)
+ drho_particle += Base.FastMath.div_fast(rho_a, rho_b) * m_b * dot(vdiff, grad_kernel)
+
+ h = particle_system.cache.smoothing_length
+ alpha = particle_system.viscosity.alpha
+ epsilon = particle_system.viscosity.epsilon
+
+ vr = dot(vdiff, pos_diff)
+ if vr < 0
+ # mu = h * vr / (distance2 + epsilon)
+ mu = Base.FastMath.div_fast(h * vr, distance2 + epsilon)
+ rho_mean = (rho_a + rho_b) / 2
+ # @fastmath pi_ab = (alpha * sound_speed * mu) / rho_mean * grad_kernel
+ pi_ab = Base.FastMath.div_fast(alpha * sound_speed * mu, rho_mean) * grad_kernel
+ dv_particle += m_b * pi_ab
+ end
+ end
+ end
+ end
+
+ for i in eachindex(dv_particle)
+ @inbounds dv[i, particle] += dv_particle[i]
+ # Debug example
+ # debug_array[i, particle] += dv_pressure[i]
+ end
+ @inbounds drho[particle] += drho_particle
+end
+
+@inline function kernel_grad_ds(system, pos_diff, r)
+ h = system.cache.smoothing_length
+ normalization_factor = system.cache.normalization_factor
+
+ # q = r / h
+ q = Base.FastMath.div_fast(r, h)
+ wqq1 = (1 - q / 2)
+ return normalization_factor * wqq1 * wqq1 * wqq1 * pos_diff
+end
+
+function interact_reassembled!(dv, v_particle_system, u_particle_system,
+ v_neighbor_system, u_neighbor_system,
+ particle_system::WeaklyCompressibleSPHSystem{NDIMS},
+ neighbor_system, semi) where NDIMS
+ (; density_calculator, correction) = particle_system
+
+ system_coords = current_coordinates(u_particle_system, particle_system)
+ neighbor_system_coords = current_coordinates(u_neighbor_system, neighbor_system)
+
+ neighborhood_search = get_neighborhood_search(particle_system, neighbor_system, semi)
+ sound_speed = particle_system.state_equation.sound_speed
+
+ @threaded semi for particle in each_integrated_particle(particle_system)
+ p_a = @inbounds current_pressure(v_particle_system, particle_system, particle)
+ m_a = @inbounds hydrodynamic_mass(particle_system, particle)
+
+ # v_a = @inbounds extract_svector(v_particle_system, Val(NDIMS), particle)
+ # rho_a = @inbounds v_particle_system[end, particle]
+ v_a, rho_a = @inbounds velocity_and_density(v_particle_system, particle_system, particle)
+
+ dv_particle = Ref(zero(v_a))
+ drho_particle = Ref(zero(rho_a))
+
+ @inbounds PointNeighbors.foreach_neighbor(system_coords, neighbor_system_coords,
+ neighborhood_search, particle) do _, neighbor, pos_diff, distance
+ distance < eps() && return
+
+ m_b = @inbounds hydrodynamic_mass(neighbor_system, neighbor)
+ p_b = @inbounds current_pressure(v_neighbor_system, neighbor_system, neighbor)
+
+ # v_b = @inbounds extract_svector(v_neighbor_system, Val(NDIMS), neighbor)
+ # rho_b = @inbounds v_neighbor_system[end, neighbor]
+ v_b, rho_b = @inbounds velocity_and_density(v_neighbor_system, neighbor_system, neighbor)
+
+ grad_kernel = kernel_grad_ds(particle_system, pos_diff, distance)
+ # grad_kernel = smoothing_kernel_grad(particle_system, pos_diff, distance, particle)
+
+ # dv_particle += -m_b * (p_a + p_b) / (rho_a * rho_b) * grad_kernel
+ # dv_particle[] += -m_b * Base.FastMath.div_fast(p_a + p_b, rho_a * rho_b) * grad_kernel
+
+ dv_particle[] += pressure_acceleration(particle_system, neighbor_system,
+ particle, neighbor,
+ m_a, m_b, p_a, p_b, rho_a, rho_b, pos_diff,
+ distance, grad_kernel, correction)
+
+ vdiff = v_a - v_b
+ # drho_particle += rho_a / rho_b * m_b * dot(vdiff, grad_kernel)
+ # drho_particle[] += Base.FastMath.div_fast(rho_a, rho_b) * m_b * dot(vdiff, grad_kernel)
+
+ @inbounds dv_viscosity(dv_particle, particle_system, neighbor_system,
+ vdiff,
+ particle, neighbor, pos_diff, distance,
+ sound_speed, m_a, m_b, rho_a, rho_b,
+ grad_kernel)
+
+ @inbounds continuity_equation!(drho_particle, density_calculator, particle_system,
+ neighbor_system, particle, neighbor,
+ pos_diff, distance, vdiff, m_b, rho_a, rho_b, grad_kernel)
+
+ # h = particle_system.cache.smoothing_length
+ # alpha = particle_system.viscosity.alpha
+ # epsilon = particle_system.viscosity.epsilon
+
+ # vr = dot(vdiff, pos_diff)
+ # if vr < 0
+ # # mu = h * vr / (distance2 + epsilon)
+ # mu = Base.FastMath.div_fast(h * vr, distance^2 + epsilon)
+ # rho_mean = (rho_a + rho_b) / 2
+ # # @fastmath pi_ab = (alpha * sound_speed * mu) / rho_mean * grad_kernel
+ # pi_ab = Base.FastMath.div_fast(alpha * sound_speed * mu, rho_mean) * grad_kernel
+ # dv_particle[] += m_b * pi_ab
+ # end
+ end
+
+ for i in eachindex(dv_particle[])
+ @inbounds dv[i, particle] += dv_particle[][i]
+ end
+ @inbounds dv[end, particle] += drho_particle[]
+ end
+end
+
+@propagate_inbounds function velocity_and_density(v, system, particle)
+ v_particle = current_velocity(v, system, particle)
+ rho_particle = current_density(v, system, particle)
+
+ return v_particle, rho_particle
+end
+
+@inline function velocity_and_density(v, ::WeaklyCompressibleSPHSystem{3}, particle)
+ vrho_a = vloada(Vec{4, eltype(v)}, pointer(v, 4 * (particle - 1) + 1))
+ a, b, c, d = Tuple(vrho_a)
+ v_particle = SVector(a, b, c)
+ rho_particle = d
+
+ return v_particle, rho_particle
+end
+
@propagate_inbounds function particle_neighbor_pressure(v_particle_system,
v_neighbor_system,
particle_system, neighbor_system,
diff --git a/src/schemes/fluid/weakly_compressible_sph/system.jl b/src/schemes/fluid/weakly_compressible_sph/system.jl
index f84849f096..1f0b32d1f0 100644
--- a/src/schemes/fluid/weakly_compressible_sph/system.jl
+++ b/src/schemes/fluid/weakly_compressible_sph/system.jl
@@ -154,7 +154,8 @@ function WeaklyCompressibleSPHSystem(initial_condition, density_calculator, stat
smoothing_length)...,
create_cache_shifting(initial_condition, shifting_technique)...,
# Per-system color tag for colorfield surface-normal logic and VTK output.
- color=Int(color_value))
+ color=Int(color_value),
+ normalization_factor = Float32(-2.7852 / smoothing_length^4))
# If the `reference_density_spacing` is set calculate the `ideal_neighbor_count`
if reference_particle_spacing > 0