-
Notifications
You must be signed in to change notification settings - Fork 19
Improve GPU performance of TLSPH #1084
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
deb90dd
a11ce1f
beee2f7
09e04d3
af05303
4b05263
0aca765
2435602
3e173c3
37f388c
7a01935
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -413,62 +413,69 @@ end | |
|
|
||
| function drift!(du_ode, v_ode, u_ode, semi, t) | ||
| @trixi_timeit timer() "drift!" begin | ||
| @trixi_timeit timer() "reset ∂u/∂t" set_zero!(du_ode) | ||
|
|
||
| @trixi_timeit timer() "velocity" begin | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why are you removing the timers?
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Because there is only one thing happening now in |
||
| # Set velocity and add acceleration for each system | ||
| foreach_system(semi) do system | ||
| du = wrap_u(du_ode, system, semi) | ||
| v = wrap_v(v_ode, system, semi) | ||
| u = wrap_u(u_ode, system, semi) | ||
|
|
||
| integrate_tlsph = semi.integrate_tlsph[] | ||
| @threaded semi for particle in each_integrated_particle(system) | ||
| # This can be dispatched per system | ||
| add_velocity!(du, v, u, particle, system, integrate_tlsph, t) | ||
| end | ||
| end | ||
| foreach_system(semi) do system | ||
| du = wrap_u(du_ode, system, semi) | ||
| v = wrap_v(v_ode, system, semi) | ||
| u = wrap_u(u_ode, system, semi) | ||
|
|
||
| set_velocity!(du, v, u, system, semi, t) | ||
| end | ||
| end | ||
|
|
||
| return du_ode | ||
| end | ||
|
|
||
| @inline function add_velocity!(du, v, u, particle, system, integrate_tlsph, t) | ||
| add_velocity!(du, v, u, particle, system, t) | ||
| end | ||
|
|
||
| @inline function add_velocity!(du, v, u, particle, system::TotalLagrangianSPHSystem, | ||
| integrate_tlsph, t) | ||
| # Only add velocity for TLSPH systems if they are integrated | ||
| if integrate_tlsph | ||
| add_velocity!(du, v, u, particle, system, t) | ||
| end | ||
| # Generic fallback for all systems that don't define this function | ||
| function set_velocity!(du, v, u, system, semi, t) | ||
| set_velocity_default!(du, v, u, system, semi, t) | ||
| end | ||
|
|
||
| @inline function add_velocity!(du, v, u, particle, system, t) | ||
| # Generic fallback for all systems that don't define this function | ||
| for i in 1:ndims(system) | ||
| @inbounds du[i, particle] = v[i, particle] | ||
| # Only set velocity for TLSPH systems if they are integrated | ||
| function set_velocity!(du, v, u, system::TotalLagrangianSPHSystem, semi, t) | ||
| if semi.integrate_tlsph[] | ||
| set_velocity_default!(du, v, u, system, semi, t) | ||
| else | ||
| set_zero!(du) | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Have you checked whether the broad casting
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I've never seen this. Please create an issue if you see this again. |
||
| end | ||
|
|
||
| return du | ||
| end | ||
|
|
||
| # Solid wall boundary system doesn't integrate the particle positions | ||
| @inline add_velocity!(du, v, u, particle, system::WallBoundarySystem, t) = du | ||
| function set_velocity!(du, v, u, system::WallBoundarySystem, semi, t) | ||
| # Note that `du` is of length zero, so we don't have to set it to zero | ||
| return du | ||
| end | ||
|
|
||
| @inline function add_velocity!(du, v, u, particle, system::AbstractFluidSystem, t) | ||
| # This is zero unless a shifting technique is used | ||
| delta_v_ = delta_v(system, particle) | ||
| # Fluid systems integrate the particle positions and can have a shifting velocity | ||
| function set_velocity!(du, v, u, system::AbstractFluidSystem, semi, t) | ||
| @threaded semi for particle in each_integrated_particle(system) | ||
| delta_v_ = @inbounds delta_v(system, particle) | ||
|
|
||
| for i in 1:ndims(system) | ||
| @inbounds du[i, particle] = v[i, particle] + delta_v_[i] | ||
| for i in 1:ndims(system) | ||
| @inbounds du[i, particle] = v[i, particle] + delta_v_[i] | ||
| end | ||
| end | ||
efaulhaber marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
|
||
| return du | ||
| end | ||
|
|
||
| function set_velocity_default!(du, v, u, system, semi, t) | ||
| @threaded semi for particle in each_integrated_particle(system) | ||
| for i in 1:ndims(system) | ||
| @inbounds du[i, particle] = v[i, particle] | ||
| end | ||
| end | ||
|
|
||
| return du | ||
| end | ||
|
|
||
| # This defaults to optimized GPU copy that is about 4x faster than the threaded version above | ||
| function set_velocity_default!(du::AbstractGPUArray, v, u, system, semi, t) | ||
| indices = CartesianIndices(du) | ||
| copyto!(du, indices, v, indices) | ||
| end | ||
|
|
||
| function kick!(dv_ode, v_ode, u_ode, semi, t) | ||
| @trixi_timeit timer() "kick!" begin | ||
| # Check that the `UpdateCallback` is used if required | ||
|
|
@@ -482,8 +489,7 @@ function kick!(dv_ode, v_ode, u_ode, semi, t) | |
| @trixi_timeit timer() "system interaction" system_interaction!(dv_ode, v_ode, u_ode, | ||
| semi) | ||
|
|
||
| @trixi_timeit timer() "source terms" add_source_terms!(dv_ode, v_ode, u_ode, | ||
| semi, t) | ||
| add_source_terms!(dv_ode, v_ode, u_ode, semi, t) | ||
| end | ||
|
|
||
| return dv_ode | ||
|
|
@@ -545,6 +551,7 @@ end | |
|
|
||
| # The `SplitIntegrationCallback` overwrites `semi_wrap` to use a different | ||
| # semidiscretization for wrapping arrays. | ||
| # `semi_wrap` is the small semidiscretization, `semi` is the large semidiscretization. | ||
| # TODO `semi` is not used yet, but will be used when the source terms API is modified | ||
| # to match the custom quantities API. | ||
| function add_source_terms!(dv_ode, v_ode, u_ode, semi, t; semi_wrap=semi) | ||
|
|
@@ -555,54 +562,63 @@ function add_source_terms!(dv_ode, v_ode, u_ode, semi, t; semi_wrap=semi) | |
|
|
||
| # `integrate_tlsph` is extracted from the `semi_wrap`, so that this function | ||
| # can be used in the `SplitIntegrationCallback` as well. | ||
| integrate_tlsph = semi_wrap.integrate_tlsph[] | ||
|
|
||
| @threaded semi for particle in each_integrated_particle(system) | ||
| # Dispatch by system type to exclude boundary systems | ||
| add_acceleration!(dv, particle, system, integrate_tlsph) | ||
| add_source_terms_inner!(dv, v, u, particle, system, source_terms(system), t, | ||
| integrate_tlsph) | ||
| end | ||
| # In this case, `semi_wrap` will be the small sub-integration semidiscretization. | ||
| add_source_terms!(dv, v, u, system, semi, t, semi_wrap.integrate_tlsph[]) | ||
| end | ||
|
|
||
| return dv_ode | ||
| end | ||
|
|
||
| @inline source_terms(system) = nothing | ||
| @inline source_terms(system::Union{AbstractFluidSystem, AbstractStructureSystem}) = system.source_terms | ||
|
|
||
| @inline function add_acceleration!(dv, particle, system, integrate_tlsph) | ||
| add_acceleration!(dv, particle, system) | ||
| # This is a no-op by default but can be dispatched by system type | ||
| function add_source_terms!(dv, v, u, system, semi, t, integrate_tlsph) | ||
efaulhaber marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| return dv | ||
| end | ||
|
|
||
| @inline function add_acceleration!(dv, particle, system::TotalLagrangianSPHSystem, | ||
| integrate_tlsph) | ||
| integrate_tlsph && add_acceleration!(dv, particle, system) | ||
| function add_source_terms!(dv, v, u, | ||
| system::Union{AbstractFluidSystem, AbstractStructureSystem}, | ||
| semi, t, integrate_tlsph) | ||
| add_source_terms_inner!(dv, v, u, system, semi, t) | ||
| end | ||
efaulhaber marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
|
||
| @inline add_acceleration!(dv, particle, system) = dv | ||
| function add_source_terms!(dv, v, u, system::TotalLagrangianSPHSystem, | ||
| semi, t, integrate_tlsph) | ||
| if integrate_tlsph | ||
| add_source_terms_inner!(dv, v, u, system, semi, t) | ||
| end | ||
|
|
||
| @propagate_inbounds function add_acceleration!(dv, particle, | ||
| system::Union{AbstractFluidSystem, | ||
| AbstractStructureSystem}) | ||
| (; acceleration) = system | ||
| return dv | ||
| end | ||
|
|
||
| for i in 1:ndims(system) | ||
| dv[i, particle] += acceleration[i] | ||
| function add_source_terms_inner!(dv, v, u, | ||
| system::Union{AbstractFluidSystem, | ||
| AbstractStructureSystem}, | ||
| semi, t) | ||
| if iszero(system.acceleration) && isnothing(source_terms(system)) | ||
| # Nothing to do | ||
| return dv | ||
| end | ||
|
|
||
| @trixi_timeit timer() "source terms" begin | ||
| @threaded semi for particle in each_integrated_particle(system) | ||
| add_acceleration!(dv, system, particle) | ||
| add_source_terms_inner!(dv, v, u, particle, system, source_terms(system), t) | ||
| end | ||
| end | ||
efaulhaber marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
|
||
| return dv | ||
| end | ||
|
|
||
| @inline function add_source_terms_inner!(dv, v, u, particle, system, source_terms_, t, | ||
| integrate_tlsph) | ||
| add_source_terms_inner!(dv, v, u, particle, system, source_terms_, t) | ||
| end | ||
| @inline source_terms(system) = nothing | ||
| @inline source_terms(system::Union{AbstractFluidSystem, AbstractStructureSystem}) = system.source_terms | ||
|
|
||
| @inline function add_source_terms_inner!(dv, v, u, particle, | ||
| system::TotalLagrangianSPHSystem, | ||
| source_terms_, t, integrate_tlsph) | ||
| integrate_tlsph && add_source_terms_inner!(dv, v, u, particle, system, source_terms_, t) | ||
| @inline function add_acceleration!(dv, system, particle) | ||
| (; acceleration) = system | ||
|
|
||
| for i in 1:ndims(system) | ||
| @inbounds dv[i, particle] += acceleration[i] | ||
| end | ||
|
|
||
| return dv | ||
| end | ||
|
|
||
| @propagate_inbounds function add_source_terms_inner!(dv, v, u, particle, | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Didn't we ever need
set_zero!forduin that case?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yes, we now need to be aware that we have to set this to zero for systems where it is not set to something non-zero. We can't just dispatch these functions to do nothing. Hence the comment
# Note that `du` is of length zero, so we don't have to set it to zerofor the boundary system, and the
for TLSPH.