diff --git a/Project.toml b/Project.toml index dff0de0627..84b5497b2a 100644 --- a/Project.toml +++ b/Project.toml @@ -26,6 +26,7 @@ Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" ReadVTK = "dc215faf-f008-4882-a9f7-a79a826fadc3" RecipesBase = "3cdcf5f2-1ef4-517c-9805-6587b60abb01" Reexport = "189a3867-3050-52da-a836-e630ba90ab69" +SIMD = "fdea26ae-647d-5447-a871-4b548cad5224" SciMLBase = "0bca4576-84f4-4d90-8ffe-ffa030f20462" StaticArrays = "90137ffa-7385-5640-81b9-e52037218182" Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2" @@ -62,6 +63,7 @@ Polyester = "0.7.10" ReadVTK = "0.2" RecipesBase = "1" Reexport = "1" +SIMD = "3.7.2" SciMLBase = "2" StaticArrays = "1" Statistics = "1" diff --git a/dualsphysics_2d_benchmark_2m_particles.txt b/dualsphysics_2d_benchmark_2m_particles.txt new file mode 100644 index 0000000000..12e3c2cafb --- /dev/null +++ b/dualsphysics_2d_benchmark_2m_particles.txt @@ -0,0 +1,108 @@ +2D benchmark 2M particles + +DualSPHysics: +1.55ms + +DualSPHysics no density diffusion: +1.1ms + +main FP32, no density diffusion: +3.1ms + +now with compact vov +main mixed precision: +5ms (3.2x) + +main FP32: +4ms (2.6x) + +with dd, sorted by cell coords: +4.8ms + +sorted by cell index: +4.9ms + +blocks of three cells: +4.9ms + +optimizations (inbounds, zero distance skip): +4.7ms + +simplified kernel gradient: +4.3ms + +no density diffusion: +3.1ms + +unrolled continuity equation: +3.1ms + +unrolled viscosity: +2.9ms + +pull *_a quantities before the neighbor loop: +2.7ms + +split dv and drho: +2.55ms (2.3x) + +add to arrays after loop: +2.17ms (1.97x) + +optimized day 1: +1.3ms + +optimized day 1 mixed precision: +3.7ms + +full grid NHS: +1.5ms (2.1x) + + +3D DualSPHysics 638400 fluid particles: +4.9ms ++ 200µs KerUpdatePosCell + +main 3D TP, no DD: +12ms + +3D TP stripped (above with pull quantities...) and compact vov: +8.3ms (3.1x, 1.7x?) + +add to arrays after loop: +7.5ms (2.8x, 1.5x?) + +split dv and drho: +7.4ms + +fixed @inbounds: +6.8ms + +fastmath if distance: +4.7ms + +first vloada: +4.6ms + +second vloada: +4.3ms + +mixed precision: +21.8ms??? + +kernel grad fastmath: +4ms + +targeted fastdiv with sorted NHS: +4ms +fluid-boundary: 0.4ms. fluid-*: 4.4ms + +full grid NHS: +4.9ms (2.4x) +total 5.3ms + +full grid with foreach_neighbor inbounds: +4.8ms + +full grid with foreach_point_neighbor: +7.7ms diff --git a/examples/fluid/dam_break_2d_dualsphysics.jl b/examples/fluid/dam_break_2d_dualsphysics.jl new file mode 100644 index 0000000000..472801d5a3 --- /dev/null +++ b/examples/fluid/dam_break_2d_dualsphysics.jl @@ -0,0 +1,75 @@ +# Modify the 01_DamBreak example of DualSPHysics like this: +# +# +# +# +# When comparing with high resolution, change the resolution here: +# +# With this resolution, use: +# + +using TrixiParticles, TrixiParticles.PointNeighbors + +fluid_particle_spacing = 0.002 + +# Load setup from dam break example +trixi_include(@__MODULE__, + joinpath(examples_dir(), "fluid", "dam_break_2d.jl"), + fluid_particle_spacing=fluid_particle_spacing, + smoothing_length=1.414216 * fluid_particle_spacing, + tank_size=(4.0, 3.0), W=1.0, H=2.0, + spacing_ratio=1, boundary_layers=1, + sol=nothing, ode=nothing) + +tank = RectangularTank(fluid_particle_spacing, initial_fluid_size, tank_size, fluid_density, + n_layers=boundary_layers, spacing_ratio=spacing_ratio, + coordinates_eltype=Float64) + +tank.fluid.coordinates .+= 0.005 +tank.boundary.coordinates .+= 0.005 + +# Define a GPU-compatible neighborhood search +min_corner = minimum(tank.boundary.coordinates, dims=2) +max_corner = maximum(tank.boundary.coordinates, dims=2) +cell_list = FullGridCellList(; min_corner, max_corner)#, backend=PointNeighbors.CompactVectorOfVectors{Int32}) +neighborhood_search = GridNeighborhoodSearch{2}(; cell_list, + update_strategy=ParallelUpdate()) + +search_radius = TrixiParticles.get_neighborhood_search(fluid_system, fluid_system, semi).search_radius +nhs = PointNeighbors.copy_neighborhood_search(neighborhood_search, search_radius, 0) +cell_coords(x) = PointNeighbors.cell_coords(x, nhs) +cell_index(x) = PointNeighbors. cell_index(nhs.cell_list, cell_coords(x)) +coords = reinterpret(reshape, SVector{ndims(fluid_system), eltype(tank.fluid.coordinates)}, tank.fluid.coordinates) +sort!(coords, by=cell_index) + +# function cells(coordinates, system, semi) +# nhs = TrixiParticles.get_neighborhood_search(fluid_system, fluid_system, semi) +# coords = reinterpret(reshape, SVector{ndims(system), eltype(coordinates)}, coordinates) +# return TrixiParticles.PointNeighbors.cell_coords.(coords, Ref(nhs)) +# end + +# For benchmarking, run: +# trixi_include_changeprecision(Float32, "../TrixiParticles.jl/examples/fluid/dam_break_2d_dualsphysics.jl", parallelization_backend=CUDABackend(), tspan=(0.0f0, 1.0f-10), fluid_particle_spacing=0.001, coordinates_eltype=Float32); +# dv_ode, du_ode = copy(sol.u[end]).x; v_ode, u_ode = copy(sol.u[end]).x; semi = ode.p; system = semi.systems[1]; dv = TrixiParticles.wrap_v(dv_ode, system, semi); v = TrixiParticles.wrap_v(v_ode, system, semi); u = TrixiParticles.wrap_u(u_ode, system, semi); +# @benchmark TrixiParticles.interact_reassembled!($dv, $v, $u, $v, $u, $system, $system, $semi) + +# Run the dam break simulation with this neighborhood search +trixi_include(@__MODULE__, + joinpath(examples_dir(), "fluid", "dam_break_2d.jl"), + tank=tank, + smoothing_length=1.414216 * fluid_particle_spacing, + time_integration_scheme=SymplecticPositionVerlet(), + boundary_density_calculator=ContinuityDensity(), + fluid_particle_spacing=fluid_particle_spacing, + tank_size=(4.0, 3.0), W=1.0, H=2.0, + spacing_ratio=1, boundary_layers=1, + tspan=(0.0, 1.0), cfl=0.2, + neighborhood_search=neighborhood_search, + viscosity_wall=viscosity_fluid, + # This is the same saving frequency as in DualSPHysics for easier comparison + saving_callback=SolutionSavingCallback(dt=0.01), + # extra_callback=SortingCallback(interval=1), + density_diffusion=nothing, # TODO only for benchmark + # For benchmarks, use spacing 0.002, fix time steps, and disable VTK saving: + dt=1e-5, stepsize_callback=nothing, #saving_callback=nothing, + parallelization_backend=PolyesterBackend()) diff --git a/examples/fluid/dam_break_3d.jl b/examples/fluid/dam_break_3d.jl index a073c001ec..2fdd752732 100644 --- a/examples/fluid/dam_break_3d.jl +++ b/examples/fluid/dam_break_3d.jl @@ -66,12 +66,14 @@ boundary_system = WallBoundarySystem(tank.boundary, boundary_model) # ========================================================================================== # ==== Simulation semi = Semidiscretization(fluid_system, boundary_system, + neighborhood_search=GridNeighborhoodSearch{3}(), parallelization_backend=PolyesterBackend()) ode = semidiscretize(semi, tspan) info_callback = InfoCallback(interval=100) saving_callback = SolutionSavingCallback(dt=0.1, prefix="") -callbacks = CallbackSet(info_callback, saving_callback) +extra_callback = nothing +callbacks = CallbackSet(info_callback, saving_callback, extra_callback) # Use a Runge-Kutta method with automatic (error based) time step size control. # Limiting of the maximum stepsize is necessary to prevent crashing. @@ -80,8 +82,10 @@ callbacks = CallbackSet(info_callback, saving_callback) # Sometimes, the method fails to do so because forces become extremely large when # fluid particles are very close to boundary particles, and the time integration method # interprets this as an instability. -sol = solve(ode, RDPK3SpFSAL35(), +time_integration_scheme = RDPK3SpFSAL35() +sol = solve(ode, time_integration_scheme, abstol=1e-5, # Default abstol is 1e-6 (may need to be tuned to prevent boundary penetration) reltol=1e-4, # Default reltol is 1e-3 (may need to be tuned to prevent boundary penetration) dtmax=1e-2, # Limit stepsize to prevent crashing + dt=1.0, save_everystep=false, callback=callbacks); diff --git a/examples/fluid/dam_break_3d_dualsphysics.jl b/examples/fluid/dam_break_3d_dualsphysics.jl new file mode 100644 index 0000000000..c2aeceda81 --- /dev/null +++ b/examples/fluid/dam_break_3d_dualsphysics.jl @@ -0,0 +1,69 @@ +# Modify the 01_DamBreak example of DualSPHysics like this: +# +# +# +# +# When comparing with high resolution, change the resolution here: +# +# With this resolution, use: +# + +using TrixiParticles, TrixiParticles.PointNeighbors, OrdinaryDiffEq + +fluid_particle_spacing = 0.0085 + +smoothing_length = 1.7320508 * fluid_particle_spacing +tank_size = (1.6, 0.665, 0.4) +initial_fluid_size = (0.4, 0.665, 0.3) +acceleration = (0.0, 0.0, -9.81) +spacing_ratio = 1 +boundary_layers = 1 +fluid_density = 1000.0 +sound_speed = 20 * sqrt(9.81 * tank_size[2]) +state_equation = StateEquationCole(; sound_speed, reference_density=fluid_density, + exponent=7) + +tank = RectangularTank(fluid_particle_spacing, initial_fluid_size, tank_size, fluid_density, + n_layers=boundary_layers, spacing_ratio=spacing_ratio, + coordinates_eltype=Float64) + +tank.fluid.coordinates .+= 0.005 +tank.boundary.coordinates .+= 0.005 + +# Define a GPU-compatible neighborhood search +min_corner = minimum(tank.boundary.coordinates, dims=2) +max_corner = maximum(tank.boundary.coordinates, dims=2) +cell_list = FullGridCellList(; min_corner, max_corner)#, backend=PointNeighbors.CompactVectorOfVectors{Int32}) +neighborhood_search = GridNeighborhoodSearch{3}(; cell_list, + update_strategy=ParallelUpdate()) + +search_radius = TrixiParticles.compact_support(WendlandC2Kernel{3}(), smoothing_length) +nhs = PointNeighbors.copy_neighborhood_search(neighborhood_search, search_radius, 0) +cell_coords(x) = PointNeighbors.cell_coords(x, nhs) +cell_index(x) = PointNeighbors. cell_index(nhs.cell_list, cell_coords(x)) +coords = reinterpret(reshape, SVector{ndims(nhs), eltype(tank.fluid.coordinates)}, tank.fluid.coordinates) +sort!(coords, by=cell_index) + +# Run the dam break simulation with this neighborhood search +trixi_include(@__MODULE__, + joinpath(examples_dir(), "fluid", "dam_break_3d.jl"), + tank=tank, + smoothing_length=1.7320508 * fluid_particle_spacing, + time_integration_scheme=SymplecticPositionVerlet(), + boundary_density_calculator=ContinuityDensity(), + state_equation=state_equation, + fluid_particle_spacing=fluid_particle_spacing, + tank_size=tank_size, initial_fluid_size=initial_fluid_size, + acceleration=acceleration, + alpha=0.1, + spacing_ratio=spacing_ratio, boundary_layers=boundary_layers, + tspan=(0.0, 1.0), #cfl=0.2, + neighborhood_search=neighborhood_search, + # viscosity_wall=viscosity_fluid, TODO + # This is the same saving frequency as in DualSPHysics for easier comparison + # saving_callback=SolutionSavingCallback(dt=0.01), + # extra_callback=SortingCallback(interval=1), + density_diffusion=nothing, # TODO only for benchmark + # For benchmarks, use spacing 0.002, fix time steps, and disable VTK saving: + dt=8e-5, #stepsize_callback=nothing, saving_callback=nothing, + parallelization_backend=PolyesterBackend()) diff --git a/src/TrixiParticles.jl b/src/TrixiParticles.jl index fa87c296ca..c6b07787a4 100644 --- a/src/TrixiParticles.jl +++ b/src/TrixiParticles.jl @@ -26,6 +26,7 @@ using Random: seed! using SciMLBase: SciMLBase, CallbackSet, DiscreteCallback, DynamicalODEProblem, u_modified!, get_tmp_cache, set_proposed_dt!, ODESolution, ODEProblem, terminate!, add_tstop! +using SIMD: vloada, Vec @reexport using StaticArrays: SVector using StaticArrays: @SMatrix, SMatrix, setindex using Statistics: Statistics @@ -73,7 +74,7 @@ export WeaklyCompressibleSPHSystem, EntropicallyDampedSPHSystem, TotalLagrangian export BoundaryZone, InFlow, OutFlow, BidirectionalFlow export InfoCallback, SolutionSavingCallback, DensityReinitializationCallback, PostprocessCallback, StepsizeCallback, UpdateCallback, SteadyStateReachedCallback, - SplitIntegrationCallback + SplitIntegrationCallback, SortingCallback export ContinuityDensity, SummationDensity export PenaltyForceGanzenmueller, TransportVelocityAdami, ParticleShiftingTechnique, ParticleShiftingTechniqueSun2017, ConsistentShiftingSun2019, diff --git a/src/callbacks/callbacks.jl b/src/callbacks/callbacks.jl index 975c760a8e..eba012c4af 100644 --- a/src/callbacks/callbacks.jl +++ b/src/callbacks/callbacks.jl @@ -76,3 +76,4 @@ include("stepsize.jl") include("update.jl") include("steady_state_reached.jl") include("split_integration.jl") +include("sorting.jl") diff --git a/src/callbacks/sorting.jl b/src/callbacks/sorting.jl new file mode 100644 index 0000000000..23b5a46ebc --- /dev/null +++ b/src/callbacks/sorting.jl @@ -0,0 +1,185 @@ +# These are the systems that require sorting. +# TODO: The `DEMSystem` should be added here in the future. +# Boundary particles always stay fixed relative to each other, TLSPH computes in the initial configuration. +const RequiresSortingSystem = AbstractFluidSystem + +struct SortingCallback{I} + interval::I + last_t::Float64 +end + +""" + SortingCallback(; interval=-1, dt=0.0, initial_sort=true) + +Reorders particles according to neighborhood-search cells for performance optimization. + +When particles become very unordered throughout a long-running simulation, performance +decreases due to increased cache-misses (on CPUs) and lack of block structure (on GPUs). +On GPUs, a fully shuffled particle ordering causes a 3-4x slowdown compared to a sorted configuration. +On CPUs, there is no difference for small problems (<16k particles), but the performance penalty +grows linearly with the problem size up to 10x slowdown for very large problems (65M particles). +See [#1044](https://github.com/trixi-framework/TrixiParticles.jl/pull/1044) for more details. + +# Keywords +- `interval`: Sort particles at the end of every `interval` time steps. +- `dt`: Sort particles in regular intervals of `dt` in terms of integration time. + This callback does not add extra time steps / `tstops`; instead, reinitialization is + triggered at the first solver step after each `dt` interval has elapsed. +- `initial_sort=true`: When enabled, particles are sorted at the beginning of the simulation. + When the initial configuration is a perfect grid of particles, + sorting at the beginning is not necessary and might even slightly + slow down the first time steps, since a perfect grid is even better + than sorting by NHS cell index. +""" +function SortingCallback(; interval::Integer=-1, dt=0.0) + if dt > 0 && interval !== -1 + throw(ArgumentError("Setting both interval and dt is not supported!")) + end + + # Sort in intervals in terms of simulation time + if dt > 0 + interval = Float64(dt) + + # Sort every time step (default) + elseif interval == -1 + interval = 1 + end + + sorting_callback! = SortingCallback(interval, last_t) + + # The first one is the `condition`, the second the `affect!` + return DiscreteCallback(sorting_callback!, sorting_callback!, + initialize=(initial_sort!), save_positions=(false, false)) +end + +# `initialize` +function initial_sort!(cb, u, t, integrator) + # The `SortingCallback` is either `cb.affect!` (with `DiscreteCallback`) + # or `cb.affect!.affect!` (with `PeriodicCallback`). + # Let recursive dispatch handle this. + + initial_sort!(cb.affect!, u, t, integrator) +end + +function initial_sort!(cb::SortingCallback, u, t, integrator) + return cb(integrator) +end + +# `condition` with `interval` +function (sorting_callback!::SortingCallback{Int})(u, t, integrator) + (; interval) = sorting_callback! + + return condition_integrator_interval(integrator, interval) +end + +# condition with `dt` +function (sorting_callback!::SortingCallback)(u, t, integrator) + (; interval, last_t) = sorting_callback! + + return (t - last_t) > interval +end + +# `affect!` +function (sorting_callback!::SortingCallback)(integrator) + semi = integrator.p + v_ode, u_ode = integrator.u.x + + @trixi_timeit timer() "sorting callback" begin + foreach_system(semi) do system + v = wrap_v(v_ode, system, semi) + u = wrap_u(u_ode, system, semi) + + sort_particles!(system, v, u, semi) + end + end + + # Tell OrdinaryDiffEq that `integrator.u` has been modified + u_modified!(integrator, true) + + return integrator +end + +sort_particles!(system, v, u, semi) = system + +function sort_particles!(system::RequiresSortingSystem, v, u, semi) + nhs = get_neighborhood_search(system, semi) + + if !(nhs isa GridNeighborhoodSearch) + throw(ArgumentError("`SortingCallback` can only be used with a `GridNeighborhoodSearch`")) + end + + sort_particles!(system, v, u, nhs, nhs.cell_list, semi) +end + +# TODO: Sort also masses and particle spacings for variable smoothing lengths. +function sort_particles!(system::RequiresSortingSystem, v, u, nhs, + cell_list::FullGridCellList, semi) + cell_coords = allocate(semi.parallelization_backend, SVector{ndims(system), Int}, + nparticles(system)) + @threaded semi for particle in each_active_particle(system) + point_coords = current_coords(u, system, particle) + cell_coords[particle] = PointNeighbors.cell_coords(point_coords, nhs) + end + + # TODO `sortperm` works on CUDA but not (yet) on Metal + perm = sortperm(transfer2cpu(cell_coords)) + + sort_system!(system, v, u, perm, system.buffer) + + return system +end + +function sort_system!(system, v, u, perm, buffer::Nothing) + system_coords = current_coordinates(u, system) + system_velocity = current_velocity(v, system) + system_density = current_density(v, system) + system_pressure = current_pressure(v, system) + + system_coords .= system_coords[:, perm] + system_velocity .= system_velocity[:, perm] + system_pressure .= system_pressure[perm] + system_density .= system_density[perm] + + return system +end + +function Base.show(io::IO, cb::DiscreteCallback{<:Any, <:SortingCallback{Int}}) + @nospecialize cb # reduce precompilation time + print(io, "SortingCallback(interval=", cb.affect!.interval, ")") +end + +function Base.show(io::IO, + cb::DiscreteCallback{<:Any, <:SortingCallback}) + @nospecialize cb # reduce precompilation time + print(io, "SortingCallback(dt=", cb.affect!.affect!.interval, ")") +end + +function Base.show(io::IO, ::MIME"text/plain", + cb::DiscreteCallback{<:Any, <:SortingCallback{Int}}) + @nospecialize cb # reduce precompilation time + + if get(io, :compact, false) + show(io, cb) + else + sorting_cb = cb.affect! + setup = [ + "interval" => sorting_cb.interval + ] + summary_box(io, "SortingCallback", setup) + end +end + +function Base.show(io::IO, ::MIME"text/plain", + cb::DiscreteCallback{<:Any, <:SortingCallback{Int}}) + @nospecialize cb # reduce precompilation time + + if get(io, :compact, false) + show(io, cb) + else + sorting_cb = cb.affect!.affect! + setup = [ + "dt" => sorting_cb.interval + ] + summary_box(io, "SortingCallback", setup) + end +end diff --git a/src/general/buffer.jl b/src/general/buffer.jl index 1e2c196799..6b0d0f7f61 100644 --- a/src/general/buffer.jl +++ b/src/general/buffer.jl @@ -80,3 +80,35 @@ end return system end + +function sort_system!(system, v, u, perm, buffer::SystemBuffer) + (; active_particle) = buffer + + # Note that the following contain also inactive particles + system_coords = current_coordinates(u, system) + system_velocity = current_velocity(v, system) + system_density = current_density(v, system) + system_pressure = current_pressure(v, system) + + # First permutation: sort by desired `perm` + active_particle_sorted = active_particle[perm] + + # Second permutation: move inactive particles to the end (true first, false last) + perm2 = sortperm(transfer2cpu(active_particle_sorted), rev=true) + + # Combined permutation + combined_perm = perm[perm2] + + # Apply to all data + active_particle .= active_particle_sorted[perm2] + system_coords .= system_coords[:, combined_perm] + system_velocity .= system_velocity[:, combined_perm] + system_pressure .= system_pressure[combined_perm] + system_density .= system_density[combined_perm] + + # Update buffer + buffer.active_particle_count[] = count(active_particle) + buffer.eachparticle[1:buffer.active_particle_count[]] .= 1:buffer.active_particle_count[] + + return buffer +end diff --git a/src/general/smoothing_kernels.jl b/src/general/smoothing_kernels.jl index 4a97564648..87622065e9 100644 --- a/src/general/smoothing_kernels.jl +++ b/src/general/smoothing_kernels.jl @@ -11,7 +11,8 @@ abstract type AbstractSmoothingKernel{NDIMS} end # Also note that `sqrt(eps(h^2)) != eps(h)`. distance^2 < eps(h^2) && return zero(pos_diff) - return kernel_deriv(kernel, distance, h) / distance * pos_diff + distance_inv = Base.FastMath.div_fast(1, distance) + return kernel_deriv(kernel, distance, h) * distance_inv * pos_diff end @inline function corrected_kernel_grad(kernel, pos_diff, distance, h, correction, system, diff --git a/src/schemes/fluid/fluid.jl b/src/schemes/fluid/fluid.jl index 8725190506..20fd08a4dc 100644 --- a/src/schemes/fluid/fluid.jl +++ b/src/schemes/fluid/fluid.jl @@ -137,7 +137,7 @@ end end # This formulation was chosen to be consistent with the used pressure_acceleration formulations -@propagate_inbounds function continuity_equation!(dv, density_calculator::ContinuityDensity, +@inline function continuity_equation!(dv, density_calculator::ContinuityDensity, particle_system::AbstractFluidSystem, neighbor_system, v_particle_system, v_neighbor_system, @@ -146,21 +146,44 @@ end vdiff = current_velocity(v_particle_system, particle_system, particle) - current_velocity(v_neighbor_system, neighbor_system, neighbor) - vdiff += continuity_equation_shifting_term(shifting_technique(particle_system), - particle_system, neighbor_system, - particle, neighbor, rho_a, rho_b) + # vdiff += continuity_equation_shifting_term(shifting_technique(particle_system), + # particle_system, neighbor_system, + # particle, neighbor, rho_a, rho_b) dv[end, particle] += rho_a / rho_b * m_b * dot(vdiff, grad_kernel) # Artificial density diffusion should only be applied to systems representing a fluid # with the same physical properties i.e. density and viscosity. # TODO: shouldn't be applied to particles on the interface (depends on PR #539) - if particle_system === neighbor_system - density_diffusion!(dv, density_diffusion(particle_system), - v_particle_system, particle, neighbor, - pos_diff, distance, m_b, rho_a, rho_b, particle_system, - grad_kernel) - end + # if particle_system === neighbor_system + # density_diffusion!(dv, density_diffusion(particle_system), + # v_particle_system, particle, neighbor, + # pos_diff, distance, m_b, rho_a, rho_b, particle_system, + # grad_kernel) + # end +end + +@propagate_inbounds function continuity_equation!(drho_particle, density_calculator::ContinuityDensity, + particle_system::AbstractFluidSystem, + neighbor_system, + particle, neighbor, pos_diff, distance, + vdiff, m_b, rho_a, rho_b, grad_kernel) + + # vdiff += continuity_equation_shifting_term(shifting_technique(particle_system), + # particle_system, neighbor_system, + # particle, neighbor, rho_a, rho_b) + + drho_particle[] += Base.FastMath.div_fast(rho_a, rho_b) * m_b * dot(vdiff, grad_kernel) + + # Artificial density diffusion should only be applied to systems representing a fluid + # with the same physical properties i.e. density and viscosity. + # TODO: shouldn't be applied to particles on the interface (depends on PR #539) + # if particle_system === neighbor_system + # density_diffusion!(dv, density_diffusion(particle_system), + # v_particle_system, particle, neighbor, + # pos_diff, distance, m_b, rho_a, rho_b, particle_system, + # grad_kernel) + # end end function calculate_dt(v_ode, u_ode, cfl_number, system::AbstractFluidSystem, semi) diff --git a/src/schemes/fluid/pressure_acceleration.jl b/src/schemes/fluid/pressure_acceleration.jl index 7ffa5850b3..6dbb6e2c64 100644 --- a/src/schemes/fluid/pressure_acceleration.jl +++ b/src/schemes/fluid/pressure_acceleration.jl @@ -26,7 +26,8 @@ end # asymmetric version. @inline function pressure_acceleration_continuity_density(m_a, m_b, rho_a, rho_b, p_a, p_b, W_a) - return -m_b * (p_a + p_b) / (rho_a * rho_b) * W_a + # return -m_b * (p_a + p_b) / (rho_a * rho_b) * W_a + return -m_b * Base.FastMath.div_fast(p_a + p_b, rho_a * rho_b) * W_a end # Same as above, but not assuming symmetry of the kernel gradient. To be used with diff --git a/src/schemes/fluid/viscosity.jl b/src/schemes/fluid/viscosity.jl index 2a6c51ba71..b178749f35 100644 --- a/src/schemes/fluid/viscosity.jl +++ b/src/schemes/fluid/viscosity.jl @@ -1,30 +1,30 @@ # Unpack the neighboring systems viscosity to dispatch on the viscosity type. # This function is only necessary to allow `nothing` as viscosity. # Otherwise, we could just apply the viscosity as a function directly. -@propagate_inbounds function dv_viscosity(particle_system, neighbor_system, - v_particle_system, v_neighbor_system, +@propagate_inbounds function dv_viscosity(dv_particle, particle_system, neighbor_system, + vdiff, particle, neighbor, pos_diff, distance, sound_speed, m_a, m_b, rho_a, rho_b, grad_kernel) viscosity = viscosity_model(particle_system, neighbor_system) - return dv_viscosity(viscosity, particle_system, neighbor_system, - v_particle_system, v_neighbor_system, + return dv_viscosity(dv_particle, viscosity, particle_system, neighbor_system, + vdiff, particle, neighbor, pos_diff, distance, sound_speed, m_a, m_b, rho_a, rho_b, grad_kernel) end -@propagate_inbounds function dv_viscosity(viscosity, particle_system, neighbor_system, - v_particle_system, v_neighbor_system, +@propagate_inbounds function dv_viscosity(dv_particle, viscosity, particle_system, neighbor_system, + vdiff, particle, neighbor, pos_diff, distance, sound_speed, m_a, m_b, rho_a, rho_b, grad_kernel) - return viscosity(particle_system, neighbor_system, - v_particle_system, v_neighbor_system, + return viscosity(dv_particle, particle_system, neighbor_system, + vdiff, particle, neighbor, pos_diff, distance, sound_speed, m_a, m_b, rho_a, rho_b, grad_kernel) end -@inline function dv_viscosity(viscosity::Nothing, particle_system, neighbor_system, - v_particle_system, v_neighbor_system, +@inline function dv_viscosity(dv_particle, viscosity::Nothing, particle_system, neighbor_system, + vdiff, particle, neighbor, pos_diff, distance, sound_speed, m_a, m_b, rho_a, rho_b, grad_kernel) return zero(pos_diff) @@ -82,41 +82,42 @@ function kinematic_viscosity(system, viscosity::ViscosityMorris, smoothing_lengt end @propagate_inbounds function (viscosity::Union{ArtificialViscosityMonaghan, - ViscosityMorris})(particle_system, + ViscosityMorris})(dv_particle, particle_system, neighbor_system, - v_particle_system, - v_neighbor_system, + v_diff, particle, neighbor, pos_diff, distance, sound_speed, m_a, m_b, rho_a, rho_b, grad_kernel) - rho_mean = (rho_a + rho_b) / 2 + rho_mean = 0#(rho_a + rho_b) / 2 - v_a = viscous_velocity(v_particle_system, particle_system, particle) - v_b = viscous_velocity(v_neighbor_system, neighbor_system, neighbor) - v_diff = v_a - v_b + # v_a = viscous_velocity(v_particle_system, particle_system, particle) + # v_b = viscous_velocity(v_neighbor_system, neighbor_system, neighbor) + # v_diff = v_a - v_b - smoothing_length_particle = smoothing_length(particle_system, particle) - smoothing_length_neighbor = smoothing_length(particle_system, neighbor) - smoothing_length_average = (smoothing_length_particle + smoothing_length_neighbor) / 2 + # smoothing_length_particle = smoothing_length(particle_system, particle) + # smoothing_length_neighbor = smoothing_length(particle_system, neighbor) + smoothing_length_average = smoothing_length(particle_system, particle)#(smoothing_length_particle + smoothing_length_neighbor) / 2 - nu_a = kinematic_viscosity(particle_system, - viscosity_model(neighbor_system, particle_system), - smoothing_length_particle, sound_speed) - nu_b = kinematic_viscosity(neighbor_system, - viscosity_model(particle_system, neighbor_system), - smoothing_length_neighbor, sound_speed) + # nu_a = kinematic_viscosity(particle_system, + # viscosity_model(neighbor_system, particle_system), + # smoothing_length_particle, sound_speed) + # nu_b = kinematic_viscosity(neighbor_system, + # viscosity_model(particle_system, neighbor_system), + # smoothing_length_neighbor, sound_speed) + nu_a = 0 + nu_b = 0 - pi_ab = viscosity(sound_speed, v_diff, pos_diff, distance, rho_mean, rho_a, rho_b, - smoothing_length_average, grad_kernel, nu_a, nu_b) + pi_ab = viscosity(dv_particle, sound_speed, v_diff, pos_diff, distance, rho_mean, rho_a, rho_b, + smoothing_length_average, grad_kernel, nu_a, nu_b, m_b) - return m_b * pi_ab + # return m_b * pi_ab end -@inline function (viscosity::ArtificialViscosityMonaghan)(c, v_diff, pos_diff, distance, +@inline function (viscosity::ArtificialViscosityMonaghan)(dv_particle, c, v_diff, pos_diff, distance, rho_mean, rho_a, rho_b, h, - grad_kernel, nu_a, nu_b) + grad_kernel, nu_a, nu_b, m_b) (; alpha, beta, epsilon) = viscosity # v_ab ⋅ r_ab @@ -127,11 +128,12 @@ end # approaching particles and turn it off for receding particles. In this way, the # viscosity is used for shocks and not rarefactions." if vr < 0 - mu = h * vr / (distance^2 + epsilon * h^2) - return (alpha * c * mu + beta * mu^2) / rho_mean * grad_kernel + mu = Base.FastMath.div_fast(h * vr, distance^2 + epsilon * h^2) + rho_mean = (rho_a + rho_b) / 2 + dv_particle[] += m_b * Base.FastMath.div_fast(alpha * c * mu + beta * mu^2, rho_mean) * grad_kernel end - return zero(v_diff) + return nothing#zero(v_diff) end @inline function (viscosity::ViscosityMorris)(c, v_diff, pos_diff, distance, rho_mean, diff --git a/src/schemes/fluid/weakly_compressible_sph/rhs.jl b/src/schemes/fluid/weakly_compressible_sph/rhs.jl index acb0f06894..086816e767 100644 --- a/src/schemes/fluid/weakly_compressible_sph/rhs.jl +++ b/src/schemes/fluid/weakly_compressible_sph/rhs.jl @@ -2,7 +2,7 @@ # in `neighbor_system` and updates `dv` accordingly. # It takes into account pressure forces, viscosity, and for `ContinuityDensity` updates # the density using the continuity equation. -function interact!(dv, v_particle_system, u_particle_system, +function interact_old!(dv, v_particle_system, u_particle_system, v_neighbor_system, u_neighbor_system, particle_system::WeaklyCompressibleSPHSystem, neighbor_system, semi) (; density_calculator, correction) = particle_system @@ -63,12 +63,13 @@ function interact!(dv, v_particle_system, u_particle_system, distance, grad_kernel, correction) # Propagate `@inbounds` to the viscosity function, which accesses particle data - dv_viscosity_ = viscosity_correction * - @inbounds dv_viscosity(particle_system, neighbor_system, - v_particle_system, v_neighbor_system, - particle, neighbor, pos_diff, distance, - sound_speed, m_a, m_b, rho_a, rho_b, - grad_kernel) + dv_viscosity_ = zero(pos_diff) + # viscosity_correction * + # @inbounds dv_viscosity(particle_system, neighbor_system, + # v_particle_system, v_neighbor_system, + # particle, neighbor, pos_diff, distance, + # sound_speed, m_a, m_b, rho_a, rho_b, + # grad_kernel) # Extra terms in the momentum equation when using a shifting technique dv_tvf = @inbounds dv_shifting(shifting_technique(particle_system), @@ -112,6 +113,260 @@ function interact!(dv, v_particle_system, u_particle_system, return dv end +function interact!(dv, v_particle_system, u_particle_system, + v_neighbor_system, u_neighbor_system, + particle_system::WeaklyCompressibleSPHSystem, neighbor_system, semi) + interact_old!(dv, v_particle_system, u_particle_system, + v_neighbor_system, u_neighbor_system, + particle_system, neighbor_system, semi) +end + +function interact2!(dv, v_particle_system, u_particle_system, + v_neighbor_system, u_neighbor_system, + particle_system::WeaklyCompressibleSPHSystem, + neighbor_system::WeaklyCompressibleSPHSystem, semi) + dv_ = view(dv, 1:ndims(particle_system), :) + drho = view(dv, ndims(particle_system) + 1, :) + interact!(dv_, drho, v_particle_system, u_particle_system, + v_neighbor_system, u_neighbor_system, + particle_system, neighbor_system, semi) +end + +function interact!(dv, drho, v_particle_system, u_particle_system, + v_neighbor_system, u_neighbor_system, + particle_system::WeaklyCompressibleSPHSystem{NDIMS}, + neighbor_system::WeaklyCompressibleSPHSystem, semi) where NDIMS + system_coords = current_coordinates(u_particle_system, particle_system) + neighbor_system_coords = current_coordinates(u_neighbor_system, neighbor_system) + # system_coords = vcat(system_coords, zero(drho)') + # neighbor_system_coords = vcat(neighbor_system_coords, zero(drho)') + + neighborhood_search = get_neighborhood_search(particle_system, neighbor_system, semi) + cell_list = neighborhood_search.cell_list + search_radius2 = PointNeighbors.search_radius(neighborhood_search)^2 + + backend = semi.parallelization_backend + ndrange = n_integrated_particles(particle_system) + mykernel(backend)(dv, drho, system_coords, neighbor_system_coords, neighborhood_search, + cell_list, search_radius2, v_particle_system, v_neighbor_system, + particle_system, neighbor_system; ndrange=ndrange) + + KernelAbstractions.synchronize(backend) + + return dv +end + +@kernel function mykernel(dv, drho, + system_coords, neighbor_system_coords, + nhs, cell_list, search_radius2, + v_particle_system, v_neighbor_system, + particle_system::WeaklyCompressibleSPHSystem{NDIMS}, + neighbor_system::WeaklyCompressibleSPHSystem) where NDIMS + particle = @index(Global) + + sound_speed = particle_system.state_equation.sound_speed + # VT_coords = Vec{4, eltype(system_coords)} + # point_coords_ = vloada(VT_coords, pointer(system_coords, 4*(particle-1)+1)) + # a, b, c, d = Tuple(point_coords_) + # point_coords = SVector(a, b, c) + point_coords = @inbounds extract_svector(system_coords, Val(NDIMS), particle) + p_a = @inbounds particle_system.pressure[particle] + + VT = Vec{4, eltype(v_particle_system)} + vrho_a = vloada(VT, pointer(v_particle_system, 4*(particle-1)+1)) + a, b, c, d = Tuple(vrho_a) + v_a = SVector(a, b, c) + rho_a = d + # v_a = @inbounds extract_svector(v_particle_system, Val(NDIMS), particle) + # rho_a = @inbounds v_particle_system[end, particle] + + dv_particle = zero(v_a) + drho_particle = zero(rho_a) + + cell = PointNeighbors.cell_coords(point_coords, nhs) + + # cell_blocks = ((cell[1] - 1, cell[2] - 1), (cell[1] - 1, cell[2]), (cell[1] - 1, cell[2] + 1)) + cell_blocks = CartesianIndices(ntuple(i -> (cell[i + 1] - 1):(cell[i + 1] + 1), Val(NDIMS - 1))) + for cell_block in cell_blocks + cell_block_start = (cell[1] - 1, Tuple(cell_block)...) + cell_index = @inbounds PointNeighbors.cell_index(cell_list, cell_block_start) + start = @inbounds cell_list.cells.first_bin_index[cell_index] + stop = @inbounds cell_list.cells.first_bin_index[cell_index + 3] - 1 + + for neighbor in start:stop + + # for neighbor_cell_ in PointNeighbors.neighboring_cells(cell, nhs) + # neighbor_cell = Tuple(neighbor_cell_) + # neighbors = @inbounds PointNeighbors.points_in_cell(neighbor_cell, nhs) + + # for neighbor_ in eachindex(neighbors) + # neighbor = @inbounds neighbors[neighbor_] + + # neighbor_coords_ = vloada(VT_coords, pointer(neighbor_system_coords, 4*(neighbor-1)+1)) + # a, b, c, d = Tuple(neighbor_coords_) + # neighbor_coords = SVector(a, b, c) + neighbor_coords = @inbounds extract_svector(neighbor_system_coords, + Val(NDIMS), neighbor) + + # pos_diff = convert.(eltype(particle_system), point_coords - neighbor_coords) + pos_diff = point_coords - neighbor_coords + distance2 = dot(pos_diff, pos_diff) + + if eps(search_radius2) <= distance2 <= search_radius2 + distance = sqrt(distance2) + + m_b = @inbounds neighbor_system.mass[neighbor] + p_b = @inbounds neighbor_system.pressure[neighbor] + + vrho_b = vloada(VT, pointer(v_neighbor_system, 4*(neighbor-1)+1)) + a, b, c, d = Tuple(vrho_b) + v_b = SVector(a, b, c) + rho_b = d + + # v_b = @inbounds extract_svector(v_neighbor_system, Val(NDIMS), neighbor) + # rho_b = @inbounds v_neighbor_system[end, neighbor] + + grad_kernel = kernel_grad_ds(particle_system, pos_diff, distance) + + # dv_particle += -m_b * (p_a + p_b) / (rho_a * rho_b) * grad_kernel + dv_particle += -m_b * Base.FastMath.div_fast(p_a + p_b, rho_a * rho_b) * grad_kernel + + vdiff = v_a - v_b + # drho_particle += rho_a / rho_b * m_b * dot(vdiff, grad_kernel) + drho_particle += Base.FastMath.div_fast(rho_a, rho_b) * m_b * dot(vdiff, grad_kernel) + + h = particle_system.cache.smoothing_length + alpha = particle_system.viscosity.alpha + epsilon = particle_system.viscosity.epsilon + + vr = dot(vdiff, pos_diff) + if vr < 0 + # mu = h * vr / (distance2 + epsilon) + mu = Base.FastMath.div_fast(h * vr, distance2 + epsilon) + rho_mean = (rho_a + rho_b) / 2 + # @fastmath pi_ab = (alpha * sound_speed * mu) / rho_mean * grad_kernel + pi_ab = Base.FastMath.div_fast(alpha * sound_speed * mu, rho_mean) * grad_kernel + dv_particle += m_b * pi_ab + end + end + end + end + + for i in eachindex(dv_particle) + @inbounds dv[i, particle] += dv_particle[i] + # Debug example + # debug_array[i, particle] += dv_pressure[i] + end + @inbounds drho[particle] += drho_particle +end + +@inline function kernel_grad_ds(system, pos_diff, r) + h = system.cache.smoothing_length + normalization_factor = system.cache.normalization_factor + + # q = r / h + q = Base.FastMath.div_fast(r, h) + wqq1 = (1 - q / 2) + return normalization_factor * wqq1 * wqq1 * wqq1 * pos_diff +end + +function interact_reassembled!(dv, v_particle_system, u_particle_system, + v_neighbor_system, u_neighbor_system, + particle_system::WeaklyCompressibleSPHSystem{NDIMS}, + neighbor_system, semi) where NDIMS + (; density_calculator, correction) = particle_system + + system_coords = current_coordinates(u_particle_system, particle_system) + neighbor_system_coords = current_coordinates(u_neighbor_system, neighbor_system) + + neighborhood_search = get_neighborhood_search(particle_system, neighbor_system, semi) + sound_speed = particle_system.state_equation.sound_speed + + @threaded semi for particle in each_integrated_particle(particle_system) + p_a = @inbounds current_pressure(v_particle_system, particle_system, particle) + m_a = @inbounds hydrodynamic_mass(particle_system, particle) + + # v_a = @inbounds extract_svector(v_particle_system, Val(NDIMS), particle) + # rho_a = @inbounds v_particle_system[end, particle] + v_a, rho_a = @inbounds velocity_and_density(v_particle_system, particle_system, particle) + + dv_particle = Ref(zero(v_a)) + drho_particle = Ref(zero(rho_a)) + + @inbounds PointNeighbors.foreach_neighbor(system_coords, neighbor_system_coords, + neighborhood_search, particle) do _, neighbor, pos_diff, distance + distance < eps() && return + + m_b = @inbounds hydrodynamic_mass(neighbor_system, neighbor) + p_b = @inbounds current_pressure(v_neighbor_system, neighbor_system, neighbor) + + # v_b = @inbounds extract_svector(v_neighbor_system, Val(NDIMS), neighbor) + # rho_b = @inbounds v_neighbor_system[end, neighbor] + v_b, rho_b = @inbounds velocity_and_density(v_neighbor_system, neighbor_system, neighbor) + + grad_kernel = kernel_grad_ds(particle_system, pos_diff, distance) + # grad_kernel = smoothing_kernel_grad(particle_system, pos_diff, distance, particle) + + # dv_particle += -m_b * (p_a + p_b) / (rho_a * rho_b) * grad_kernel + # dv_particle[] += -m_b * Base.FastMath.div_fast(p_a + p_b, rho_a * rho_b) * grad_kernel + + dv_particle[] += pressure_acceleration(particle_system, neighbor_system, + particle, neighbor, + m_a, m_b, p_a, p_b, rho_a, rho_b, pos_diff, + distance, grad_kernel, correction) + + vdiff = v_a - v_b + # drho_particle += rho_a / rho_b * m_b * dot(vdiff, grad_kernel) + # drho_particle[] += Base.FastMath.div_fast(rho_a, rho_b) * m_b * dot(vdiff, grad_kernel) + + @inbounds dv_viscosity(dv_particle, particle_system, neighbor_system, + vdiff, + particle, neighbor, pos_diff, distance, + sound_speed, m_a, m_b, rho_a, rho_b, + grad_kernel) + + @inbounds continuity_equation!(drho_particle, density_calculator, particle_system, + neighbor_system, particle, neighbor, + pos_diff, distance, vdiff, m_b, rho_a, rho_b, grad_kernel) + + # h = particle_system.cache.smoothing_length + # alpha = particle_system.viscosity.alpha + # epsilon = particle_system.viscosity.epsilon + + # vr = dot(vdiff, pos_diff) + # if vr < 0 + # # mu = h * vr / (distance2 + epsilon) + # mu = Base.FastMath.div_fast(h * vr, distance^2 + epsilon) + # rho_mean = (rho_a + rho_b) / 2 + # # @fastmath pi_ab = (alpha * sound_speed * mu) / rho_mean * grad_kernel + # pi_ab = Base.FastMath.div_fast(alpha * sound_speed * mu, rho_mean) * grad_kernel + # dv_particle[] += m_b * pi_ab + # end + end + + for i in eachindex(dv_particle[]) + @inbounds dv[i, particle] += dv_particle[][i] + end + @inbounds dv[end, particle] += drho_particle[] + end +end + +@propagate_inbounds function velocity_and_density(v, system, particle) + v_particle = current_velocity(v, system, particle) + rho_particle = current_density(v, system, particle) + + return v_particle, rho_particle +end + +@inline function velocity_and_density(v, ::WeaklyCompressibleSPHSystem{3}, particle) + vrho_a = vloada(Vec{4, eltype(v)}, pointer(v, 4 * (particle - 1) + 1)) + a, b, c, d = Tuple(vrho_a) + v_particle = SVector(a, b, c) + rho_particle = d + + return v_particle, rho_particle +end + @propagate_inbounds function particle_neighbor_pressure(v_particle_system, v_neighbor_system, particle_system, neighbor_system, diff --git a/src/schemes/fluid/weakly_compressible_sph/system.jl b/src/schemes/fluid/weakly_compressible_sph/system.jl index f84849f096..1f0b32d1f0 100644 --- a/src/schemes/fluid/weakly_compressible_sph/system.jl +++ b/src/schemes/fluid/weakly_compressible_sph/system.jl @@ -154,7 +154,8 @@ function WeaklyCompressibleSPHSystem(initial_condition, density_calculator, stat smoothing_length)..., create_cache_shifting(initial_condition, shifting_technique)..., # Per-system color tag for colorfield surface-normal logic and VTK output. - color=Int(color_value)) + color=Int(color_value), + normalization_factor = Float32(-2.7852 / smoothing_length^4)) # If the `reference_density_spacing` is set calculate the `ideal_neighbor_count` if reference_particle_spacing > 0