Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion Project.toml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
name = "PointNeighbors"
uuid = "1c4d5385-0a27-49de-8e2c-43b175c8985c"
authors = ["Erik Faulhaber <erik.faulhaber@uni-koeln.de>"]
version = "0.6.6-dev"
version = "0.6.6"

[deps]
Adapt = "79e6a3ab-5dfb-504d-930d-738a2a938a0e"
Expand Down
91 changes: 74 additions & 17 deletions benchmarks/run_benchmarks.jl
Original file line number Diff line number Diff line change
Expand Up @@ -50,10 +50,11 @@ run_benchmark(benchmark_count_neighbors, (10, 10), 3,
```
"""
function run_benchmark(benchmark, n_points_per_dimension, iterations, neighborhood_searches;
search_radius_factor = 3.0,
parallelization_backend = PolyesterBackend(),
names = ["Neighborhood search $i"
for i in 1:length(neighborhood_searches)]',
seed = 1, perturbation_factor_position = 1.0)
seed = 1, perturbation_factor_position = 1.0, shuffle = false)
# Multiply number of points in each iteration (roughly) by this factor
scaling_factor = 4
per_dimension_factor = scaling_factor^(1 / length(n_points_per_dimension))
Expand All @@ -64,24 +65,29 @@ function run_benchmark(benchmark, n_points_per_dimension, iterations, neighborho
times = zeros(iterations, length(neighborhood_searches))

for iter in 1:iterations
coordinates = point_cloud(sizes[iter]; seed, perturbation_factor_position)
coordinates_ = point_cloud(sizes[iter], search_radius_factor;
seed, perturbation_factor_position, shuffle)
coordinates = convert.(typeof(search_radius_factor), coordinates_)
domain_size = maximum(sizes[iter]) + 1

# Normalize domain size to 1
coordinates ./= domain_size

# Make this Float32 to make sure that Float32 benchmarks use Float32 exclusively
search_radius = 4.0f0 / domain_size
search_radius = search_radius_factor / domain_size
n_particles = size(coordinates, 2)

neighborhood_searches_copy = copy_neighborhood_search.(neighborhood_searches,
search_radius, n_particles)

for i in eachindex(neighborhood_searches_copy)
neighborhood_search = neighborhood_searches_copy[i]
PointNeighbors.initialize!(neighborhood_search, coordinates, coordinates)
neighborhood_search_ = neighborhood_searches_copy[i]
neighborhood_search = PointNeighbors.Adapt.adapt(parallelization_backend,
neighborhood_search_)
coords = PointNeighbors.Adapt.adapt(parallelization_backend, coordinates)
PointNeighbors.initialize!(neighborhood_search, coords, coords)

time = benchmark(neighborhood_search, coordinates; parallelization_backend)
time = benchmark(neighborhood_search, coords; parallelization_backend)
times[iter, i] = time
time_string = BenchmarkTools.prettytime(time * 1e9)
time_string_per_particle = BenchmarkTools.prettytime(time * 1e9 / n_particles)
Expand Down Expand Up @@ -170,23 +176,74 @@ include("benchmarks/benchmarks.jl")
run_benchmark_gpu(benchmark_n_body, (10, 10), 3)
```
"""
function run_benchmark_gpu(benchmark, n_points_per_dimension, iterations; kwargs...)
function run_benchmark_gpu(benchmark, n_points_per_dimension, iterations;
parallelization_backend=PolyesterBackend(), kwargs...)
NDIMS = length(n_points_per_dimension)

min_corner = 0.0f0 .* n_points_per_dimension
max_corner = Float32.(n_points_per_dimension ./ maximum(n_points_per_dimension))
neighborhood_searches = [GridNeighborhoodSearch{NDIMS}(search_radius = 0.0f0,
cell_list = FullGridCellList(;
search_radius = 0.0f0,
min_corner,
max_corner))
PrecomputedNeighborhoodSearch{NDIMS}(search_radius = 0.0f0)]

names = ["GridNeighborhoodSearch with FullGridCellList";;
"PrecomputedNeighborhoodSearch"]
cell_list = FullGridCellList(; search_radius = 0.0f0, min_corner, max_corner)
grid_nhs = GridNeighborhoodSearch{NDIMS}(; search_radius = 0.0f0, cell_list,
update_strategy = ParallelUpdate())
transpose_backend = parallelization_backend isa PointNeighbors.KernelAbstractions.GPU
neighborhood_searches = [
grid_nhs
PrecomputedNeighborhoodSearch{NDIMS}(; search_radius = 0.0f0,
update_neighborhood_search = grid_nhs,
transpose_backend)#, max_neighbors=128)
]

names = [
"GridNeighborhoodSearch with FullGridCellList";;
"PrecomputedNeighborhoodSearch"
]

run_benchmark(benchmark, n_points_per_dimension, iterations,
neighborhood_searches; names, kwargs...)
neighborhood_searches; names, parallelization_backend, kwargs...)
end

"""
run_benchmark_full_grid(benchmark, n_points_per_dimension, iterations; kwargs...)

Shortcut to call [`run_benchmark`](@ref) with a `GridNeighborhoodSearch` with a
`FullGridCellList`. This is the neighborhood search implementation that is used
in TrixiParticles.jl when performance is important.
Use this function to benchmark and profile TrixiParticles.jl kernels.

# Arguments
- `benchmark`: The benchmark function. See [`benchmark_count_neighbors`](@ref),
[`benchmark_n_body`](@ref), [`benchmark_wcsph`](@ref),
[`benchmark_wcsph_fp32`](@ref) and [`benchmark_tlsph`](@ref).
- `n_points_per_dimension`: Initial resolution as tuple. The product is the initial number
of points. For example, use `(100, 100)` for a 2D benchmark or
`(10, 10, 10)` for a 3D benchmark.
- `iterations`: Number of refinement iterations

# Keywords
See [`run_benchmark`](@ref) for a list of available keywords.

# Examples
```julia
include("benchmarks/benchmarks.jl")

run_benchmark_full_grid(benchmark_n_body, (10, 10), 3)
```
"""
function run_benchmark_full_grid(benchmark, n_points_per_dimension, iterations;
parallelization_backend=PolyesterBackend(), kwargs...)
NDIMS = length(n_points_per_dimension)

min_corner = 0.0f0 .* n_points_per_dimension
max_corner = Float32.(n_points_per_dimension ./ maximum(n_points_per_dimension))
cell_list = FullGridCellList(; search_radius = 0.0f0, min_corner, max_corner)
grid_nhs = GridNeighborhoodSearch{NDIMS}(; search_radius = 0.0f0, cell_list,
update_strategy = ParallelUpdate())
neighborhood_searches = [grid_nhs]

names = ["GridNeighborhoodSearch with FullGridCellList";;]

run_benchmark(benchmark, n_points_per_dimension, iterations,
neighborhood_searches; names, parallelization_backend, kwargs...)
end

"""
Expand Down
94 changes: 44 additions & 50 deletions benchmarks/smoothed_particle_hydrodynamics.jl
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
using PointNeighbors
using PointNeighbors.Adapt
using TrixiParticles
using BenchmarkTools

Expand Down Expand Up @@ -43,47 +44,30 @@ This method is used to simulate an incompressible fluid.
"""
function benchmark_wcsph(neighborhood_search, coordinates;
parallelization_backend = default_backend(coordinates))
density = 1000.0
particle_spacing = PointNeighbors.search_radius(neighborhood_search) / 3
fluid = InitialCondition(; coordinates, density, mass = 0.1, particle_spacing)

sound_speed = 10.0
state_equation = StateEquationCole(; sound_speed, reference_density = density,
exponent = 1)

viscosity = ArtificialViscosityMonaghan(alpha = 0.02, beta = 0.0)
density_diffusion = DensityDiffusionMolteniColagrossi(delta = 0.1)
# System initialization has to happen on the CPU
coordinates_cpu = PointNeighbors.Adapt.adapt(Array, coordinates)

__benchmark_wcsph_inner(neighborhood_search, fluid, state_equation,
viscosity, density_diffusion, parallelization_backend)
end

"""
benchmark_wcsph_fp32(neighborhood_search, coordinates;
parallelization_backend = default_backend(coordinates))

Like [`benchmark_wcsph`](@ref), but using single precision floating point numbers.
"""
function benchmark_wcsph_fp32(neighborhood_search, coordinates_;
parallelization_backend = default_backend(coordinates_))
coordinates = convert(Matrix{Float32}, coordinates_)
density = 1000.0f0
search_radius = PointNeighbors.search_radius(neighborhood_search)
ELTYPE = typeof(search_radius)
density = convert(ELTYPE, 1000.0)
particle_spacing = PointNeighbors.search_radius(neighborhood_search) / 3
fluid = InitialCondition(; coordinates, density, mass = 0.1f0, particle_spacing)
fluid = InitialCondition(; coordinates = coordinates_cpu, density,
mass = convert(ELTYPE, 0.1) * particle_spacing,
particle_spacing)

sound_speed = 10.0f0
# Make sure that the computed forces are not all zero
for i in eachindex(fluid.density)
fluid.density[i] += rand(eltype(fluid.density))
end

sound_speed = convert(ELTYPE, 10.0)
state_equation = StateEquationCole(; sound_speed, reference_density = density,
exponent = 1)

viscosity = ArtificialViscosityMonaghan(alpha = 0.02f0, beta = 0.0f0)
density_diffusion = DensityDiffusionMolteniColagrossi(delta = 0.1f0)
viscosity = ArtificialViscosityMonaghan(alpha = convert(ELTYPE, 0.02),
beta = convert(ELTYPE, 0.0))
density_diffusion = DensityDiffusionMolteniColagrossi(delta = convert(ELTYPE, 0.1))

__benchmark_wcsph_inner(neighborhood_search, fluid, state_equation,
viscosity, density_diffusion, parallelization_backend)
end

function __benchmark_wcsph_inner(neighborhood_search, initial_condition, state_equation,
viscosity, density_diffusion, parallelization_backend)
# Compact support == 2 * smoothing length for these kernels
smoothing_length = PointNeighbors.search_radius(neighborhood_search) / 2
if ndims(neighborhood_search) == 1
Expand All @@ -92,23 +76,21 @@ function __benchmark_wcsph_inner(neighborhood_search, initial_condition, state_e
smoothing_kernel = WendlandC2Kernel{ndims(neighborhood_search)}()
end

fluid_system = WeaklyCompressibleSPHSystem(initial_condition, ContinuityDensity(),
fluid_system = WeaklyCompressibleSPHSystem(fluid, ContinuityDensity(),
state_equation, smoothing_kernel,
smoothing_length, viscosity = viscosity,
density_diffusion = density_diffusion)

system = PointNeighbors.Adapt.adapt(parallelization_backend, fluid_system)
system = Adapt.adapt(parallelization_backend, fluid_system)

# Remove unnecessary data structures that are only used for initialization
neighborhood_search_ = PointNeighbors.freeze_neighborhood_search(neighborhood_search)
nhs = PointNeighbors.freeze_neighborhood_search(neighborhood_search)

nhs = PointNeighbors.Adapt.adapt(parallelization_backend, neighborhood_search_)
semi = DummySemidiscretization(nhs, parallelization_backend, true)

v = PointNeighbors.Adapt.adapt(parallelization_backend,
vcat(initial_condition.velocity,
initial_condition.density'))
u = PointNeighbors.Adapt.adapt(parallelization_backend, initial_condition.coordinates)
v = Adapt.adapt(parallelization_backend,
vcat(fluid.velocity, fluid.density'))
u = Adapt.adapt(parallelization_backend, fluid.coordinates)
dv = zero(v)

# Initialize the system
Expand All @@ -128,8 +110,15 @@ This method is used to simulate an elastic structure.
"""
function benchmark_tlsph(neighborhood_search, coordinates;
parallelization_backend = default_backend(coordinates))
material = (density = 1000.0, E = 1.4e6, nu = 0.4)
solid = InitialCondition(; coordinates, density = material.density, mass = 0.1)
# System initialization has to happen on the CPU
coordinates_cpu = PointNeighbors.Adapt.adapt(Array, coordinates)

search_radius = PointNeighbors.search_radius(neighborhood_search)
ELTYPE = typeof(search_radius)
material = (density = convert(ELTYPE, 1000.0), E = convert(ELTYPE, 1.4e6),
nu = convert(ELTYPE, 0.4))
solid = InitialCondition(; coordinates = coordinates_cpu,
density = material.density, mass = convert(ELTYPE, 0.1))

# Compact support == 2 * smoothing length for these kernels
smoothing_length_ = PointNeighbors.search_radius(neighborhood_search) / 2
Expand All @@ -142,15 +131,20 @@ function benchmark_tlsph(neighborhood_search, coordinates;

solid_system = TotalLagrangianSPHSystem(solid, smoothing_kernel, smoothing_length,
material.E, material.nu)
semi = DummySemidiscretization(neighborhood_search, parallelization_backend, true)
system_ = Adapt.adapt(parallelization_backend, solid_system)

v = copy(solid.velocity)
u = copy(solid.coordinates)
# Remove unnecessary data structures that are only used for initialization
nhs = PointNeighbors.freeze_neighborhood_search(neighborhood_search)
system = TrixiParticles.@set system_.self_interaction_nhs = nhs

semi = DummySemidiscretization(nhs, parallelization_backend, true)

v = Adapt.adapt(parallelization_backend, copy(solid.velocity))
u = Adapt.adapt(parallelization_backend, copy(solid.coordinates))
dv = zero(v)

# Initialize the system
TrixiParticles.initialize!(solid_system, semi)
TrixiParticles.initialize!(system, semi)

return @belapsed TrixiParticles.interact!($dv, $v, $u, $v, $u,
$solid_system, $solid_system, $semi)
return @belapsed TrixiParticles.interact_structure_structure2!($dv, $v, $system, $semi)
end
3 changes: 2 additions & 1 deletion src/PointNeighbors.jl
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,8 @@ include("nhs_grid.jl")
include("nhs_precomputed.jl")
include("gpu.jl")

export foreach_point_neighbor, foreach_neighbor
export foreach_point_neighbor, foreach_point_neighbor_unsafe,
foreach_neighbor, foreach_neighbor_unsafe
export TrivialNeighborhoodSearch, GridNeighborhoodSearch, PrecomputedNeighborhoodSearch
export DictionaryCellList, FullGridCellList, SpatialHashingCellList
export DynamicVectorOfVectors
Expand Down
Loading
Loading