diff --git a/Project.toml b/Project.toml index d4654124..32e1fdf5 100644 --- a/Project.toml +++ b/Project.toml @@ -1,7 +1,7 @@ name = "PointNeighbors" uuid = "1c4d5385-0a27-49de-8e2c-43b175c8985c" authors = ["Erik Faulhaber "] -version = "0.6.6-dev" +version = "0.6.6" [deps] Adapt = "79e6a3ab-5dfb-504d-930d-738a2a938a0e" diff --git a/src/PointNeighbors.jl b/src/PointNeighbors.jl index 702c19e4..58b38687 100644 --- a/src/PointNeighbors.jl +++ b/src/PointNeighbors.jl @@ -20,7 +20,8 @@ include("nhs_grid.jl") include("nhs_precomputed.jl") include("gpu.jl") -export foreach_point_neighbor, foreach_neighbor +export foreach_point_neighbor, foreach_point_neighbor_unsafe, + foreach_neighbor, foreach_neighbor_unsafe export TrivialNeighborhoodSearch, GridNeighborhoodSearch, PrecomputedNeighborhoodSearch export DictionaryCellList, FullGridCellList, SpatialHashingCellList export DynamicVectorOfVectors diff --git a/src/neighborhood_search.jl b/src/neighborhood_search.jl index 5e99b9cd..528c40de 100644 --- a/src/neighborhood_search.jl +++ b/src/neighborhood_search.jl @@ -161,7 +161,7 @@ Note that `system_coords` and `neighbor_coords` can be identical. !!! warning The `neighborhood_search` must have been initialized or updated with `system_coords` as first coordinate array and `neighbor_coords` as second coordinate array. - This can be skipped for certain implementations. See [`requires_update`](@ref). + For some implementations, this requirement can be relaxed; see [`requires_update`](@ref). # Arguments - `f`: The function explained above. @@ -178,7 +178,7 @@ Note that `system_coords` and `neighbor_coords` can be identical. automatically based on the type of `system_coords`. See [`@threaded`](@ref) for a list of available backends. -See also [`initialize!`](@ref), [`update!`](@ref). +See also [`foreach_point_neighbor_unsafe`](@ref), [`initialize!`](@ref), [`update!`](@ref). """ function foreach_point_neighbor(f::T, system_coords, neighbor_coords, neighborhood_search; parallelization_backend::ParallelizationBackend = default_backend(system_coords), @@ -200,41 +200,143 @@ function foreach_point_neighbor(f::T, system_coords, neighbor_coords, neighborho return nothing end -@propagate_inbounds function foreach_neighbor(f, system_coords, neighbor_system_coords, +""" + foreach_point_neighbor_unsafe(f, system_coords, neighbor_coords, neighborhood_search; + parallelization_backend = default_backend(system_coords), + points = axes(system_coords, 2)) + +Like [`foreach_point_neighbor`](@ref), but skips **all** bounds checks inside the +threaded loop or GPU kernel. +See [`foreach_neighbor_unsafe`](@ref) for more details on which bounds checks are skipped. + +!!! warning + The `neighborhood_search` must have been initialized or updated with `system_coords` + as first coordinate array and `neighbor_coords` as second coordinate array. + For some implementations, this requirement can be relaxed; see [`requires_update`](@ref). + +!!! warning + Use this only when `neighborhood_search` is known to be initialized correctly for + `system_coords` and `neighbor_coords`. +""" +function foreach_point_neighbor_unsafe(f::T, system_coords, neighbor_coords, + neighborhood_search; + parallelization_backend::ParallelizationBackend = default_backend(system_coords), + points = axes(system_coords, 2)) where {T} + # Explicit bounds check before the hot loop (or GPU kernel) + @boundscheck checkbounds(system_coords, ndims(neighborhood_search), points) + + @threaded parallelization_backend for point in points + foreach_neighbor_unsafe(f, system_coords, neighbor_coords, + neighborhood_search, point) + end + + return nothing +end + +""" + foreach_neighbor(f, system_coords, neighbor_coords, + neighborhood_search::AbstractNeighborhoodSearch, point; + search_radius = search_radius(neighborhood_search)) + +Loop over all neighbors of `point` and execute `f(i, j, pos_diff, d)` for every +neighbor within `search_radius`, where `i` is `point`, `j` is the neighbor index, +`pos_diff` is the vector from neighbor to point, and `d` is the distance. + +This method performs bounds checks, even when called with `@inbounds`. +`@inbounds` only skips the bounds check for loading the coordinates of `point` +from `system_coords`. +See [`foreach_neighbor_unsafe`](@ref) for a version that skips all bounds checks. +""" +@propagate_inbounds function foreach_neighbor(f, system_coords, neighbor_coords, neighborhood_search::AbstractNeighborhoodSearch, point; search_radius = search_radius(neighborhood_search)) # Due to https://github.com/JuliaLang/julia/issues/30411, we cannot just remove # a `@boundscheck` by calling this function with `@inbounds` because it has a kwarg. # We have to use `@propagate_inbounds`, which will also remove boundschecks - # in the neighbor loop, which is not safe (see comment below). + # in the neighbor loop, which is not safe (that's what `foreach_neighbor_unsafe` is for). # To avoid this, we have to use a function barrier to disable the `@inbounds` again. point_coords = extract_svector(system_coords, Val(ndims(neighborhood_search)), point) - foreach_neighbor(f, neighbor_system_coords, neighborhood_search, + foreach_neighbor(f, neighbor_coords, neighborhood_search, point, point_coords, search_radius) end +# This is a function barrier to prevent the `@inbounds` in `foreach_neighbor` +# from propagating into the neighbor loop, which is not safe. +@inline function foreach_neighbor(f, neighbor_coords, + neighborhood_search::AbstractNeighborhoodSearch, + point, point_coords, search_radius) + foreach_neighbor_inner(f, neighbor_coords, neighborhood_search, + point, point_coords, search_radius) +end + +""" + foreach_neighbor_unsafe(f, system_coords, neighbor_coords, + neighborhood_search::AbstractNeighborhoodSearch, point; + search_radius = search_radius(neighborhood_search)) + +Like [`foreach_neighbor`](@ref), but skips **all** bounds checks. + +`foreach_neighbor` performs the following bounds checks that are skipped here: +- Check if `point` is in bounds of `system_coords`. This is the only bounds check + that is skipped when calling `foreach_neighbor` with `@inbounds`, and the only one that + is safe to skip when `point` is guaranteed to be in bounds of `system_coords`. +- Check if the neighbors of `point` are in bounds of `neighbor_coords`. + This is not safe to skip when the neighborhood search was not initialized correctly; + for example when initialized with coordinate arrays of different sizes than the ones + passed to this function or when the internal data structures have been tampered with. + In this case, the cell lists (for [`GridNeighborhoodSearch`](@ref)) or neighbor lists + (for [`PrecomputedNeighborhoodSearch`](@ref)) might contain indices that are out of + bounds for `neighbor_coords`. +- With `GridNeighborhoodSearch` and [`FullGridCellList`](@ref), check if the neighboring + cells are in bounds of the grid. Again, this is not safe to skip when the neighborhood + search might not have been initialized correctly. +- With `PrecomputedNeighborhoodSearch`, verify that `point` is in bounds of the neighbor lists. + Skipping this is unsafe if the neighborhood search was not initialized correctly. + +Note that all these bounds checks are safe to skip if +- `point` is guaranteed to be in bounds of `system_coords`, +- the neighborhood search was initialized correctly with `system_coords` + and `neighbor_coords` and has not been tampered with since then. + +!!! warning + Use this only when `point` is known to be in bounds of `system_coords` + and when `neighborhood_search` is known to be initialized correctly for + `system_coords` and `neighbor_coords`. +""" +@inline function foreach_neighbor_unsafe(f, system_coords, neighbor_coords, + neighborhood_search::AbstractNeighborhoodSearch, + point; + search_radius = search_radius(neighborhood_search)) + point_coords = @inbounds extract_svector(system_coords, Val(ndims(neighborhood_search)), + point) + + @inbounds foreach_neighbor_inner(f, neighbor_coords, neighborhood_search, + point, point_coords, search_radius) +end + # This is the generic function that is called for `TrivialNeighborhoodSearch`. # For `GridNeighborhoodSearch`, a specialized function is used for slightly better # performance. `PrecomputedNeighborhoodSearch` can skip the distance check altogether. -@inline function foreach_neighbor(f, neighbor_system_coords, - neighborhood_search::AbstractNeighborhoodSearch, - point, point_coords, search_radius) +# Note that calling this function with `@inbounds` is not safe. +# See the comments in `foreach_neighbor_unsafe`. +@propagate_inbounds function foreach_neighbor_inner(f, neighbor_coords, + neighborhood_search::AbstractNeighborhoodSearch, + point, point_coords, search_radius) (; periodic_box) = neighborhood_search for neighbor in eachneighbor(point_coords, neighborhood_search) - # Making the following `@inbounds` yields a ~2% speedup on an NVIDIA H100. - # But we don't know if `neighbor` (extracted from the cell list) is in bounds. - neighbor_coords = extract_svector(neighbor_system_coords, - Val(ndims(neighborhood_search)), neighbor) + neighbor_point_coords = extract_svector(neighbor_coords, + Val(ndims(neighborhood_search)), neighbor) - pos_diff = convert.(eltype(neighborhood_search), point_coords - neighbor_coords) + pos_diff = convert.(eltype(neighborhood_search), + point_coords - neighbor_point_coords) distance2 = dot(pos_diff, pos_diff) - pos_diff, - distance2 = compute_periodic_distance(pos_diff, distance2, search_radius, - periodic_box) + (pos_diff, + distance2) = compute_periodic_distance(pos_diff, distance2, search_radius, + periodic_box) if distance2 <= search_radius^2 distance = sqrt(distance2) diff --git a/src/nhs_grid.jl b/src/nhs_grid.jl index 21759ab2..8e3c1ff2 100644 --- a/src/nhs_grid.jl +++ b/src/nhs_grid.jl @@ -508,14 +508,19 @@ end # Specialized version of the function in `neighborhood_search.jl`, which is faster # than looping over `eachneighbor`. -@inline function foreach_neighbor(f, neighbor_system_coords, - neighborhood_search::GridNeighborhoodSearch, - point, point_coords, search_radius) +# Note that calling this function with `@inbounds` is not safe. +# See the comments in `foreach_neighbor_unsafe`. +@propagate_inbounds function foreach_neighbor_inner(f, neighbor_coords, + neighborhood_search::GridNeighborhoodSearch, + point, point_coords, search_radius) (; cell_list, periodic_box) = neighborhood_search cell = cell_coords(point_coords, neighborhood_search) for neighbor_cell_ in neighboring_cells(cell, neighborhood_search) neighbor_cell = Tuple(neighbor_cell_) + + # Making the following `@inbounds` is not safe because we don't know if + # `neighbor_cell` is in bounds of the grid. neighbors = points_in_cell(neighbor_cell, neighborhood_search) # Boolean to indicate if this cell has a collision (only with `SpatialHashingCellList`) @@ -525,17 +530,19 @@ end for neighbor_ in eachindex(neighbors) neighbor = @inbounds neighbors[neighbor_] - # Making the following `@inbounds` yields a ~2% speedup on an NVIDIA H100. - # But we don't know if `neighbor` (extracted from the cell list) is in bounds. - neighbor_coords = extract_svector(neighbor_system_coords, - Val(ndims(neighborhood_search)), neighbor) + # Making the following `@inbounds` is not safe because we don't know + # if `neighbor` (extracted from the cell list) is in bounds. + neighbor_point_coords = extract_svector(neighbor_coords, + Val(ndims(neighborhood_search)), + neighbor) - pos_diff = convert.(eltype(neighborhood_search), point_coords - neighbor_coords) + pos_diff = convert.(eltype(neighborhood_search), + point_coords - neighbor_point_coords) distance2 = dot(pos_diff, pos_diff) - pos_diff, - distance2 = compute_periodic_distance(pos_diff, distance2, - search_radius, periodic_box) + (pos_diff, + distance2) = compute_periodic_distance(pos_diff, distance2, + search_radius, periodic_box) if distance2 <= search_radius^2 distance = sqrt(distance2) @@ -543,7 +550,7 @@ end # If this cell has a collision, check if this point belongs to this cell # (only with `SpatialHashingCellList`). if cell_collision && - check_collision(neighbor_cell_, neighbor_coords, cell_list, + check_collision(neighbor_cell_, neighbor_point_coords, cell_list, neighborhood_search) continue end diff --git a/src/nhs_precomputed.jl b/src/nhs_precomputed.jl index 187ff0c5..c7ddceca 100644 --- a/src/nhs_precomputed.jl +++ b/src/nhs_precomputed.jl @@ -200,29 +200,32 @@ function initialize_neighbor_lists!(neighbor_lists::DynamicVectorOfVectors, end end -@inline function foreach_neighbor(f, neighbor_system_coords, - neighborhood_search::PrecomputedNeighborhoodSearch, - point, point_coords, search_radius) +# Note that calling this function with `@inbounds` is not safe. +# See the comments in `foreach_neighbor_unsafe`. +@propagate_inbounds function foreach_neighbor_inner(f, neighbor_coords, + neighborhood_search::PrecomputedNeighborhoodSearch, + point, point_coords, search_radius) (; periodic_box, neighbor_lists) = neighborhood_search - neighbors = @inbounds neighbor_lists[point] + # Making the following `@inbounds` is not safe because the neighbor list + # might not contain `point` if the NHS was not initialized correctly. + neighbors = neighbor_lists[point] for neighbor_ in eachindex(neighbors) neighbor = @inbounds neighbors[neighbor_] - # Making this `@inbounds` is not perfectly safe because + # Making this `@inbounds` is not safe because # `neighbor` (extracted from the neighbor list) is only guaranteed to be in bounds # if the neighbor lists were constructed correctly and have not been corrupted. - # However, adding this `@inbounds` yields a ~20% speedup for TLSPH on GPUs (A4500). - neighbor_coords = @inbounds extract_svector(neighbor_system_coords, - Val(ndims(neighborhood_search)), - neighbor) + neighbor_point_coords = extract_svector(neighbor_coords, + Val(ndims(neighborhood_search)), neighbor) - pos_diff = convert.(eltype(neighborhood_search), point_coords - neighbor_coords) + pos_diff = convert.(eltype(neighborhood_search), + point_coords - neighbor_point_coords) distance2 = dot(pos_diff, pos_diff) - pos_diff, - distance2 = compute_periodic_distance(pos_diff, distance2, search_radius, - periodic_box) + (pos_diff, + distance2) = compute_periodic_distance(pos_diff, distance2, search_radius, + periodic_box) distance = sqrt(distance2) diff --git a/test/cell_lists/spatial_hashing.jl b/test/cell_lists/spatial_hashing.jl index 28d13b12..6a8e8104 100644 --- a/test/cell_lists/spatial_hashing.jl +++ b/test/cell_lists/spatial_hashing.jl @@ -71,9 +71,11 @@ @testset "Cell Coordinates Hash Function" begin # 1D coordinates - @test coordinates_flattened([1]) == UInt128(reinterpret(UInt32, Int32(1))) - @test coordinates_flattened([-1]) == UInt128(reinterpret(UInt32, Int32(-1))) - @test coordinates_flattened([0]) == Int128(0) + @test PointNeighbors.coordinates_flattened([1]) == + UInt128(reinterpret(UInt32, Int32(1))) + @test PointNeighbors.coordinates_flattened([-1]) == + UInt128(reinterpret(UInt32, Int32(-1))) + @test PointNeighbors.coordinates_flattened([0]) == Int128(0) # 2D coordinates coord2 = [-1, 1] @@ -82,18 +84,20 @@ # The second coordinate gives 1 shifted by 32 bits, so 1 * 2^32. expected2 = UInt128(2^32 - 1 + 2^32) - @test coordinates_flattened(coord2) == expected2 + @test PointNeighbors.coordinates_flattened(coord2) == expected2 # 3D coordinates coord3 = [1, 0, -1] expected3 = UInt128(1 + 0 * 2^32 + (2^32 - 1) * Int128(2)^64) - @test coordinates_flattened(coord3) == expected3 + @test PointNeighbors.coordinates_flattened(coord3) == expected3 # Extreme Int32 bounds max_val = Int32(typemax(Int32)) min_val = Int32(typemin(Int32)) - @test coordinates_flattened((max_val,)) == UInt128(reinterpret(UInt32, max_val)) - @test coordinates_flattened((min_val,)) == UInt128(reinterpret(UInt32, min_val)) + @test PointNeighbors.coordinates_flattened((max_val,)) == + UInt128(reinterpret(UInt32, max_val)) + @test PointNeighbors.coordinates_flattened((min_val,)) == + UInt128(reinterpret(UInt32, min_val)) # 3D extreme Int32 bounds coord4 = [min_val, max_val, Int32(0)] @@ -102,19 +106,19 @@ # `typemax(Int32)` gives the unsigned value 2^31 - 1. expected4 = UInt128(2^31 + (2^31 - 1) * 2^32) - @test coordinates_flattened(coord4) == expected4 + @test PointNeighbors.coordinates_flattened(coord4) == expected4 # Passing non-Int32-coercible should error large_val = typemax(Int32) + 1 - @test_throws InexactError coordinates_flattened([large_val]) + @test_throws InexactError PointNeighbors.coordinates_flattened([large_val]) small_val = typemin(Int32) - 1 - @test_throws InexactError coordinates_flattened([small_val]) + @test_throws InexactError PointNeighbors.coordinates_flattened([small_val]) - @test_throws InexactError coordinates_flattened([Inf]) - @test_throws InexactError coordinates_flattened([NaN]) + @test_throws InexactError PointNeighbors.coordinates_flattened([Inf]) + @test_throws InexactError PointNeighbors.coordinates_flattened([NaN]) # Too many dimensions should throw assertion error - @test_throws AssertionError coordinates_flattened([1, 2, 3, 4]) + @test_throws AssertionError PointNeighbors.coordinates_flattened([1, 2, 3, 4]) end end diff --git a/test/neighborhood_search.jl b/test/neighborhood_search.jl index 65d3ed85..3820e11d 100644 --- a/test/neighborhood_search.jl +++ b/test/neighborhood_search.jl @@ -254,7 +254,7 @@ names_copied = [name * " copied" for name in names] append!(names, names_copied) - @testset "$(names[i])" for i in eachindex(names) + @testset verbose=true "$(names[i])" for i in eachindex(names) nhs = neighborhood_searches[i] # Initialize with `seed = 1` @@ -267,17 +267,60 @@ update!(nhs, coords, coords) end - neighbors = [Int[] for _ in axes(coords, 2)] + # Test the regular `foreach_point_neighbor` + @testset "`foreach_point_neighbor`" begin + neighbors = [Int[] for _ in axes(coords, 2)] + foreach_point_neighbor(coords, coords, nhs, + parallelization_backend = SerialBackend()) do point, + neighbor, + pos_diff, + distance + append!(neighbors[point], neighbor) + end + + @test sort.(neighbors) == neighbors_expected + end - foreach_point_neighbor(coords, coords, nhs, - parallelization_backend = SerialBackend()) do point, - neighbor, - pos_diff, - distance - append!(neighbors[point], neighbor) + # Test manual loop with `foreach_neighbor` + @testset "Manual Loop with `foreach_neighbor`" begin + neighbors_manual = [Int[] for _ in axes(coords, 2)] + for point in axes(coords, 2) + foreach_neighbor(coords, coords, nhs, + point) do point, neighbor, pos_diff, distance + append!(neighbors_manual[point], neighbor) + end + end + + @test sort.(neighbors_manual) == neighbors_expected end - @test sort.(neighbors) == neighbors_expected + # Repeat with foreach_point_neighbor_unsafe + @testset "`foreach_point_neighbor_unsafe`" begin + neighbors_unsafe = [Int[] for _ in axes(coords, 2)] + foreach_point_neighbor_unsafe(coords, coords, nhs, + parallelization_backend = SerialBackend()) do point, + neighbor, + pos_diff, + distance + append!(neighbors_unsafe[point], neighbor) + end + + @test sort.(neighbors_unsafe) == neighbors_expected + end + + # Repeat with manual loop with `foreach_neighbor_unsafe` + @testset "Manual Loop with `foreach_neighbor_unsafe`" begin + neighbors_manual_unsafe = [Int[] for _ in axes(coords, 2)] + for point in axes(coords, 2) + foreach_neighbor_unsafe(coords, coords, nhs, + point) do point, neighbor, + pos_diff, distance + append!(neighbors_manual_unsafe[point], neighbor) + end + end + + @test sort.(neighbors_manual_unsafe) == neighbors_expected + end end end end diff --git a/test/unittest.jl b/test/unittest.jl index f0981614..262b1753 100644 --- a/test/unittest.jl +++ b/test/unittest.jl @@ -7,4 +7,5 @@ include("nhs_precomputed.jl") include("neighborhood_search.jl") include("cell_lists/full_grid.jl") + include("cell_lists/spatial_hashing.jl") end;