diff --git a/numba_cuda/numba/cuda/tests/cudapy/test_boolean.py b/numba_cuda/numba/cuda/tests/cudapy/test_boolean.py index eb9d74104..ca9467769 100644 --- a/numba_cuda/numba/cuda/tests/cudapy/test_boolean.py +++ b/numba_cuda/numba/cuda/tests/cudapy/test_boolean.py @@ -2,7 +2,7 @@ # SPDX-License-Identifier: BSD-2-Clause import numpy as np -from numba.cuda.testing import unittest, CUDATestCase +from numba.cuda.testing import CUDATestCase from numba import cuda @@ -13,15 +13,10 @@ def boolean_func(A, vertial): A[0] = 321 -class TestCudaBoolean(CUDATestCase): - def test_boolean(self): - func = cuda.jit("void(float64[:], bool_)")(boolean_func) - A = np.array([0], dtype="float64") - func[1, 1](A, True) - self.assertTrue(A[0] == 123) - func[1, 1](A, False) - self.assertTrue(A[0] == 321) - - -if __name__ == "__main__": - unittest.main() +def test_boolean(): + func = cuda.jit("void(float64[:], bool_)")(boolean_func) + A = np.array([0], dtype="float64") + func[1, 1](A, True) + assert A[0] == 123 + func[1, 1](A, False) + assert A[0] == 321 diff --git a/numba_cuda/numba/cuda/tests/cudapy/test_minmax.py b/numba_cuda/numba/cuda/tests/cudapy/test_minmax.py index 26f2ab830..b7d1ee44f 100644 --- a/numba_cuda/numba/cuda/tests/cudapy/test_minmax.py +++ b/numba_cuda/numba/cuda/tests/cudapy/test_minmax.py @@ -2,10 +2,11 @@ # SPDX-License-Identifier: BSD-2-Clause import numpy as np +import pytest from numba import cuda from numba.cuda import float64 -from numba.cuda.testing import unittest, CUDATestCase, skip_on_cudasim +from numba.cuda.testing import skip_on_cudasim def builtin_max(A, B, C): @@ -26,53 +27,98 @@ def builtin_min(A, B, C): C[i] = float64(min(A[i], B[i])) -@skip_on_cudasim("Tests PTX emission") -class TestCudaMinMax(CUDATestCase): - def _run( - self, - kernel, - numpy_equivalent, - ptx_instruction, - dtype_left, - dtype_right, - n=5, - ): - kernel = cuda.jit(kernel) - - c = np.zeros(n, dtype=np.float64) - a = np.arange(n, dtype=dtype_left) + 0.5 - b = np.full(n, fill_value=2, dtype=dtype_right) - - kernel[1, c.shape](a, b, c) - np.testing.assert_allclose(c, numpy_equivalent(a, b)) - - ptx = next(p for p in kernel.inspect_asm().values()) - self.assertIn(ptx_instruction, ptx) - - def test_max_f8f8(self): - self._run(builtin_max, np.maximum, "max.f64", np.float64, np.float64) +def _run( + kernel, + numpy_equivalent, + ptx_instruction, + dtype_left, + dtype_right, + n=5, +): + kernel = cuda.jit(kernel) - def test_max_f4f8(self): - self._run(builtin_max, np.maximum, "max.f64", np.float32, np.float64) + c = np.zeros(n, dtype=np.float64) + a = np.arange(n, dtype=dtype_left) + 0.5 + b = np.full(n, fill_value=2, dtype=dtype_right) - def test_max_f8f4(self): - self._run(builtin_max, np.maximum, "max.f64", np.float64, np.float32) + kernel[1, c.shape](a, b, c) + np.testing.assert_allclose(c, numpy_equivalent(a, b)) - def test_max_f4f4(self): - self._run(builtin_max, np.maximum, "max.f32", np.float32, np.float32) + ptx = next(p for p in kernel.inspect_asm().values()) + assert ptx_instruction in ptx - def test_min_f8f8(self): - self._run(builtin_min, np.minimum, "min.f64", np.float64, np.float64) - def test_min_f4f8(self): - self._run(builtin_min, np.minimum, "min.f64", np.float32, np.float64) - - def test_min_f8f4(self): - self._run(builtin_min, np.minimum, "min.f64", np.float64, np.float32) - - def test_min_f4f4(self): - self._run(builtin_min, np.minimum, "min.f32", np.float32, np.float32) - - -if __name__ == "__main__": - unittest.main() +@skip_on_cudasim("Tests PTX emission") +@pytest.mark.parametrize( + "kernel,numpy_equivalent,ptx_instruction,dtype_left,dtype_right", + [ + pytest.param( + builtin_max, + np.maximum, + "max.f64", + np.float64, + np.float64, + id="max_f8f8", + ), + pytest.param( + builtin_max, + np.maximum, + "max.f64", + np.float32, + np.float64, + id="max_f4f8", + ), + pytest.param( + builtin_max, + np.maximum, + "max.f64", + np.float64, + np.float32, + id="max_f8f4", + ), + pytest.param( + builtin_max, + np.maximum, + "max.f32", + np.float32, + np.float32, + id="max_f4f4", + ), + pytest.param( + builtin_min, + np.minimum, + "min.f64", + np.float64, + np.float64, + id="min_f8f8", + ), + pytest.param( + builtin_min, + np.minimum, + "min.f64", + np.float32, + np.float64, + id="min_f4f8", + ), + pytest.param( + builtin_min, + np.minimum, + "min.f64", + np.float64, + np.float32, + id="min_f8f4", + ), + pytest.param( + builtin_min, + np.minimum, + "min.f32", + np.float32, + np.float32, + id="min_f4f4", + ), + ], +) +def test_minmax( + kernel, numpy_equivalent, ptx_instruction, dtype_left, dtype_right +): + _run(kernel, numpy_equivalent, ptx_instruction, dtype_left, dtype_right) diff --git a/numba_cuda/numba/cuda/tests/cudapy/test_multigpu.py b/numba_cuda/numba/cuda/tests/cudapy/test_multigpu.py index 10df01fc0..ec141fac8 100644 --- a/numba_cuda/numba/cuda/tests/cudapy/test_multigpu.py +++ b/numba_cuda/numba/cuda/tests/cudapy/test_multigpu.py @@ -1,145 +1,143 @@ # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: BSD-2-Clause -from numba import cuda -import numpy as np -from numba.cuda.testing import skip_on_cudasim, CUDATestCase import threading -import unittest +import numpy as np +import pytest -class TestMultiGPUContext(CUDATestCase): - @unittest.skipIf(len(cuda.gpus) < 2, "need more than 1 gpus") - def test_multigpu_context(self): - @cuda.jit("void(float64[:], float64[:])") - def copy_plus_1(inp, out): - i = cuda.grid(1) - if i < out.size: - out[i] = inp[i] + 1 +from numba import cuda +from numba.cuda.testing import skip_on_cudasim - def check(inp, out): - np.testing.assert_equal(inp + 1, out) - N = 32 - A = np.arange(N, dtype=np.float64) - B = np.arange(N, dtype=np.float64) +@pytest.mark.skipif(len(cuda.gpus) < 2, reason="need more than 1 gpus") +def test_multigpu_context(): + @cuda.jit("void(float64[:], float64[:])") + def copy_plus_1(inp, out): + i = cuda.grid(1) + if i < out.size: + out[i] = inp[i] + 1 - with cuda.gpus[0]: - copy_plus_1[1, N](A, B) + def check(inp, out): + np.testing.assert_equal(inp + 1, out) - check(A, B) + N = 32 + A = np.arange(N, dtype=np.float64) + B = np.arange(N, dtype=np.float64) + with cuda.gpus[0]: copy_plus_1[1, N](A, B) - check(A, B) - with cuda.gpus[0]: - A0 = np.arange(N, dtype=np.float64) - B0 = np.arange(N, dtype=np.float64) - copy_plus_1[1, N](A0, B0) + check(A, B) - with cuda.gpus[1]: - A1 = np.arange(N, dtype=np.float64) - B1 = np.arange(N, dtype=np.float64) - copy_plus_1[1, N](A1, B1) + copy_plus_1[1, N](A, B) + check(A, B) - check(A0, B0) - check(A1, B1) - - A = np.arange(N, dtype=np.float64) - B = np.arange(N, dtype=np.float64) - copy_plus_1[1, N](A, B) - check(A, B) - - @skip_on_cudasim("Simulator does not support multiple threads") - def test_multithreaded(self): - def work(gpu, dA, results, ridx): - try: - with gpu: - arr = dA.copy_to_host() - - except Exception as e: - results[ridx] = e - - else: - results[ridx] = np.all(arr == np.arange(10)) - - dA = cuda.to_device(np.arange(10)) - - nthreads = 10 - results = [None] * nthreads - threads = [ - threading.Thread( - target=work, args=(cuda.gpus.current, dA, results, i) - ) - for i in range(nthreads) - ] - for th in threads: - th.start() - - for th in threads: - th.join() - - for r in results: - if isinstance(r, BaseException): - raise r - else: - self.assertTrue(r) - - @unittest.skipIf(len(cuda.gpus) < 2, "need more than 1 gpus") - def test_with_context(self): - @cuda.jit - def vector_add_scalar(arr, val): - i = cuda.grid(1) - if i < arr.size: - arr[i] += val - - hostarr = np.arange(10, dtype=np.float32) - with cuda.gpus[0]: - arr1 = cuda.to_device(hostarr) + with cuda.gpus[0]: + A0 = np.arange(N, dtype=np.float64) + B0 = np.arange(N, dtype=np.float64) + copy_plus_1[1, N](A0, B0) with cuda.gpus[1]: - arr2 = cuda.to_device(hostarr) + A1 = np.arange(N, dtype=np.float64) + B1 = np.arange(N, dtype=np.float64) + copy_plus_1[1, N](A1, B1) - with cuda.gpus[0]: - vector_add_scalar[1, 10](arr1, 1) + check(A0, B0) + check(A1, B1) - with cuda.gpus[1]: - vector_add_scalar[1, 10](arr2, 2) + A = np.arange(N, dtype=np.float64) + B = np.arange(N, dtype=np.float64) + copy_plus_1[1, N](A, B) + check(A, B) - with cuda.gpus[0]: - np.testing.assert_equal(arr1.copy_to_host(), (hostarr + 1)) - with cuda.gpus[1]: - np.testing.assert_equal(arr2.copy_to_host(), (hostarr + 2)) - - @unittest.skipIf(len(cuda.gpus) < 2, "need more than 1 gpus") - def test_with_context_peer_copy(self): - # Peer access is not always possible - for example, with one GPU in TCC - # mode and one in WDDM - if that is the case, this test would fail so - # we need to skip it. - with cuda.gpus[0]: - ctx = cuda.current_context() - if not ctx.can_access_peer(1): - self.skipTest("Peer access between GPUs disabled") - - # 1. Create a range in an array - hostarr = np.arange(10, dtype=np.float32) - - # 2. Copy range array from host -> GPU 0 - with cuda.gpus[0]: - arr1 = cuda.to_device(hostarr) - - # 3. Initialize a zero-filled array on GPU 1 - with cuda.gpus[1]: - arr2 = cuda.to_device(np.zeros_like(hostarr)) +@skip_on_cudasim("Simulator does not support multiple threads") +def test_multithreaded(): + def work(gpu, dA, results, ridx): + try: + with gpu: + arr = dA.copy_to_host() + + except Exception as e: + results[ridx] = e + + else: + results[ridx] = np.all(arr == np.arange(10)) + + dA = cuda.to_device(np.arange(10)) + + nthreads = 10 + results = [None] * nthreads + threads = [ + threading.Thread(target=work, args=(cuda.gpus.current, dA, results, i)) + for i in range(nthreads) + ] + for th in threads: + th.start() + + for th in threads: + th.join() + + for r in results: + if isinstance(r, BaseException): + raise r + else: + assert r + + +@pytest.mark.skipif(len(cuda.gpus) < 2, reason="need more than 1 gpus") +def test_with_context(): + @cuda.jit + def vector_add_scalar(arr, val): + i = cuda.grid(1) + if i < arr.size: + arr[i] += val + + hostarr = np.arange(10, dtype=np.float32) + with cuda.gpus[0]: + arr1 = cuda.to_device(hostarr) + + with cuda.gpus[1]: + arr2 = cuda.to_device(hostarr) + + with cuda.gpus[0]: + vector_add_scalar[1, 10](arr1, 1) + + with cuda.gpus[1]: + vector_add_scalar[1, 10](arr2, 2) + + with cuda.gpus[0]: + np.testing.assert_equal(arr1.copy_to_host(), (hostarr + 1)) + + with cuda.gpus[1]: + np.testing.assert_equal(arr2.copy_to_host(), (hostarr + 2)) + + +@pytest.mark.skipif(len(cuda.gpus) < 2, reason="need more than 1 gpus") +def test_with_context_peer_copy(): + # Peer access is not always possible - for example, with one GPU in TCC + # mode and one in WDDM - if that is the case, this test would fail so + # we need to skip it. + with cuda.gpus[0]: + ctx = cuda.current_context() + if not ctx.can_access_peer(1): + pytest.skip("Peer access between GPUs disabled") + + # 1. Create a range in an array + hostarr = np.arange(10, dtype=np.float32) - with cuda.gpus[0]: - # 4. Copy range from GPU 0 -> GPU 1 - arr2.copy_to_device(arr1) + # 2. Copy range array from host -> GPU 0 + with cuda.gpus[0]: + arr1 = cuda.to_device(hostarr) - # 5. Copy range from GPU 1 -> host and check contents - np.testing.assert_equal(arr2.copy_to_host(), hostarr) + # 3. Initialize a zero-filled array on GPU 1 + with cuda.gpus[1]: + arr2 = cuda.to_device(np.zeros_like(hostarr)) + with cuda.gpus[0]: + # 4. Copy range from GPU 0 -> GPU 1 + arr2.copy_to_device(arr1) -if __name__ == "__main__": - unittest.main() + # 5. Copy range from GPU 1 -> host and check contents + np.testing.assert_equal(arr2.copy_to_host(), hostarr) diff --git a/numba_cuda/numba/cuda/tests/cudapy/test_powi.py b/numba_cuda/numba/cuda/tests/cudapy/test_powi.py index 4bf80bb1a..d99886744 100644 --- a/numba_cuda/numba/cuda/tests/cudapy/test_powi.py +++ b/numba_cuda/numba/cuda/tests/cudapy/test_powi.py @@ -5,7 +5,6 @@ import numpy as np from numba import cuda from numba.cuda import float64, int8, int32, void -from numba.cuda.testing import unittest, CUDATestCase def cu_mat_power(A, power, power_A): @@ -54,7 +53,7 @@ def random_complex(N): return np.random.random(1) + np.random.random(1) * 1j -class TestCudaPowi(CUDATestCase): +class TestCudaPowi: def test_powi(self): dec = cuda.jit(void(float64[:, :], int8, float64[:, :])) kernel = dec(cu_mat_power) @@ -63,7 +62,7 @@ def test_powi(self): A = np.arange(10, dtype=np.float64).reshape(2, 5) Aout = np.empty_like(A) kernel[1, A.shape](A, power, Aout) - self.assertTrue(np.allclose(Aout, A**power)) + assert np.allclose(Aout, A**power) def test_powi_binop(self): dec = cuda.jit(void(float64[:, :], int8, float64[:, :])) @@ -73,7 +72,7 @@ def test_powi_binop(self): A = np.arange(10, dtype=np.float64).reshape(2, 5) Aout = np.empty_like(A) kernel[1, A.shape](A, power, Aout) - self.assertTrue(np.allclose(Aout, A**power)) + assert np.allclose(Aout, A**power) # Relative tolerance kwarg is provided because 1.0e-7 (the default for # assert_allclose) is a bit tight for single precision. @@ -122,7 +121,3 @@ def test_cpow_complex64_inplace_binop(self): def test_cpow_complex128_inplace_binop(self): self._test_cpow_inplace_binop(np.complex128, rtol=3.0e-7) - - -if __name__ == "__main__": - unittest.main() diff --git a/numba_cuda/numba/cuda/tests/cudapy/test_random.py b/numba_cuda/numba/cuda/tests/cudapy/test_random.py index c99e29aa5..7d17627ec 100644 --- a/numba_cuda/numba/cuda/tests/cudapy/test_random.py +++ b/numba_cuda/numba/cuda/tests/cudapy/test_random.py @@ -4,12 +4,11 @@ import math import numpy as np +import pytest from numba import cuda -from numba.cuda.testing import unittest from numba.cuda.testing import ( skip_on_cudasim, - CUDATestCase, skip_on_standalone_numba_cuda, ) @@ -20,12 +19,32 @@ xoroshiro128p_normal_float64, ) - # Distributions UNIFORM = 1 NORMAL = 2 +def check_uniform(kernel_func, dtype): + states = cuda.random.create_xoroshiro128p_states(32 * 2, seed=1) + out = np.zeros(2 * 32 * 32, dtype=np.float32) + + kernel_func[2, 32](states, out, 32, UNIFORM) + assert out.max() == pytest.approx(1.0, abs=1e-3) + assert out.min() == pytest.approx(0.0, abs=1e-3) + assert out.mean() == pytest.approx(0.5, abs=1.5e-2) + assert out.std() == pytest.approx(1.0 / (2 * math.sqrt(3)), abs=6e-3) + + +def check_normal(kernel_func, dtype): + states = cuda.random.create_xoroshiro128p_states(32 * 2, seed=1) + out = np.zeros(2 * 32 * 32, dtype=dtype) + + kernel_func[2, 32](states, out, 32, NORMAL) + + assert out.mean() == pytest.approx(0.0, abs=4e-3) + assert out.std() == pytest.approx(1.0, abs=2e-3) + + @cuda.jit def rng_kernel_float32(states, out, count, distribution): thread_id = cuda.grid(1) @@ -53,11 +72,11 @@ def rng_kernel_float64(states, out, count, distribution): @skip_on_standalone_numba_cuda -class TestCudaRandomXoroshiro128p(CUDATestCase): +class TestCudaRandomXoroshiro128p: def test_create(self): states = cuda.random.create_xoroshiro128p_states(10, seed=1) s = states.copy_to_host() - self.assertEqual(len(np.unique(s)), 10) + assert len(np.unique(s)) == 10 def test_create_subsequence_start(self): states = cuda.random.create_xoroshiro128p_states(10, seed=1) @@ -77,41 +96,18 @@ def test_create_stream(self): 10, seed=1, stream=stream ) s = states.copy_to_host() - self.assertEqual(len(np.unique(s)), 10) - - def check_uniform(self, kernel_func, dtype): - states = cuda.random.create_xoroshiro128p_states(32 * 2, seed=1) - out = np.zeros(2 * 32 * 32, dtype=np.float32) - - kernel_func[2, 32](states, out, 32, UNIFORM) - self.assertAlmostEqual(out.min(), 0.0, delta=1e-3) - self.assertAlmostEqual(out.max(), 1.0, delta=1e-3) - self.assertAlmostEqual(out.mean(), 0.5, delta=1.5e-2) - self.assertAlmostEqual(out.std(), 1.0 / (2 * math.sqrt(3)), delta=6e-3) + assert len(np.unique(s)) == 10 def test_uniform_float32(self): - self.check_uniform(rng_kernel_float32, np.float32) + check_uniform(rng_kernel_float32, np.float32) @skip_on_cudasim("skip test for speed under cudasim") def test_uniform_float64(self): - self.check_uniform(rng_kernel_float64, np.float64) - - def check_normal(self, kernel_func, dtype): - states = cuda.random.create_xoroshiro128p_states(32 * 2, seed=1) - out = np.zeros(2 * 32 * 32, dtype=dtype) - - kernel_func[2, 32](states, out, 32, NORMAL) - - self.assertAlmostEqual(out.mean(), 0.0, delta=4e-3) - self.assertAlmostEqual(out.std(), 1.0, delta=2e-3) + check_uniform(rng_kernel_float64, np.float64) def test_normal_float32(self): - self.check_normal(rng_kernel_float32, np.float32) + check_normal(rng_kernel_float32, np.float32) @skip_on_cudasim("skip test for speed under cudasim") def test_normal_float64(self): - self.check_normal(rng_kernel_float64, np.float64) - - -if __name__ == "__main__": - unittest.main() + check_normal(rng_kernel_float64, np.float64) diff --git a/numba_cuda/numba/cuda/tests/cudapy/test_ufuncs.py b/numba_cuda/numba/cuda/tests/cudapy/test_ufuncs.py index c3d85e8ac..fbd9c24b3 100644 --- a/numba_cuda/numba/cuda/tests/cudapy/test_ufuncs.py +++ b/numba_cuda/numba/cuda/tests/cudapy/test_ufuncs.py @@ -5,272 +5,362 @@ import itertools import warnings import numpy as np -import unittest +import pytest from numba.cuda import HAS_NUMBA if HAS_NUMBA: from numba import njit -from numba import cuda from numba.cuda import config, types from numba.cuda.testing import skip_on_standalone_numba_cuda from numba.cuda.typing.typeof import typeof from numba.cuda.np import numpy_support -from numba.cuda.tests.support import TestCase - - -class BaseUFuncTest: - def setUp(self): - self.inputs = [ - (np.uint32(0), types.uint32), - (np.uint32(1), types.uint32), - (np.int32(-1), types.int32), - (np.int32(0), types.int32), - (np.int32(1), types.int32), - (np.uint64(0), types.uint64), - (np.uint64(1), types.uint64), - (np.int64(-1), types.int64), - (np.int64(0), types.int64), - (np.int64(1), types.int64), - (np.float32(-0.5), types.float32), - (np.float32(0.0), types.float32), - (np.float32(0.5), types.float32), - (np.float64(-0.5), types.float64), - (np.float64(0.0), types.float64), - (np.float64(0.5), types.float64), - (np.array([0, 1], dtype="u4"), types.Array(types.uint32, 1, "C")), - (np.array([0, 1], dtype="u8"), types.Array(types.uint64, 1, "C")), - ( - np.array([-1, 0, 1], dtype="i4"), - types.Array(types.int32, 1, "C"), - ), - ( - np.array([-1, 0, 1], dtype="i8"), - types.Array(types.int64, 1, "C"), - ), - ( - np.array([-0.5, 0.0, 0.5], dtype="f4"), - types.Array(types.float32, 1, "C"), - ), - ( - np.array([-0.5, 0.0, 0.5], dtype="f8"), - types.Array(types.float64, 1, "C"), - ), - (np.array([0, 1], dtype=np.int8), types.Array(types.int8, 1, "C")), - ( - np.array([0, 1], dtype=np.int16), - types.Array(types.int16, 1, "C"), - ), - ( - np.array([0, 1], dtype=np.uint8), - types.Array(types.uint8, 1, "C"), - ), - ( - np.array([0, 1], dtype=np.uint16), - types.Array(types.uint16, 1, "C"), - ), - ] - - @skip_on_standalone_numba_cuda - @functools.lru_cache(maxsize=None) - def _compile(self, pyfunc, args, nrt=False): - # NOTE: to test the implementation of Numpy ufuncs, we disable - # rewriting of array expressions. - return njit(args, _nrt=nrt, no_rewrites=True)(pyfunc) - - def _determine_output_type( - self, input_type, int_output_type=None, float_output_type=None - ): - ty = input_type - if isinstance(ty, types.Array): - ndim = ty.ndim - ty = ty.dtype +from numba.cuda.tests.support import reset_module_warnings + + +@pytest.fixture +def base_input(): + return [ + (np.uint32(0), types.uint32), + (np.uint32(1), types.uint32), + (np.int32(-1), types.int32), + (np.int32(0), types.int32), + (np.int32(1), types.int32), + (np.uint64(0), types.uint64), + (np.uint64(1), types.uint64), + (np.int64(-1), types.int64), + (np.int64(0), types.int64), + (np.int64(1), types.int64), + (np.float32(-0.5), types.float32), + (np.float32(0.0), types.float32), + (np.float32(0.5), types.float32), + (np.float64(-0.5), types.float64), + (np.float64(0.0), types.float64), + (np.float64(0.5), types.float64), + (np.array([0, 1], dtype="u4"), types.Array(types.uint32, 1, "C")), + (np.array([0, 1], dtype="u8"), types.Array(types.uint64, 1, "C")), + ( + np.array([-1, 0, 1], dtype="i4"), + types.Array(types.int32, 1, "C"), + ), + ( + np.array([-1, 0, 1], dtype="i8"), + types.Array(types.int64, 1, "C"), + ), + ( + np.array([-0.5, 0.0, 0.5], dtype="f4"), + types.Array(types.float32, 1, "C"), + ), + ( + np.array([-0.5, 0.0, 0.5], dtype="f8"), + types.Array(types.float64, 1, "C"), + ), + (np.array([0, 1], dtype=np.int8), types.Array(types.int8, 1, "C")), + ( + np.array([0, 1], dtype=np.int16), + types.Array(types.int16, 1, "C"), + ), + ( + np.array([0, 1], dtype=np.uint8), + types.Array(types.uint8, 1, "C"), + ), + ( + np.array([0, 1], dtype=np.uint16), + types.Array(types.uint16, 1, "C"), + ), + ] + + +@pytest.fixture(name="inputs") +def ufunc_setup(base_input): + # The basic ufunc test does not set up complex inputs, so we'll add + # some here for testing with CUDA. + extra_input = [ + (np.complex64(-0.5 - 0.5j), types.complex64), + (np.complex64(0.0), types.complex64), + (np.complex64(0.5 + 0.5j), types.complex64), + (np.complex128(-0.5 - 0.5j), types.complex128), + (np.complex128(0.0), types.complex128), + (np.complex128(0.5 + 0.5j), types.complex128), + ( + np.array([-0.5 - 0.5j, 0.0, 0.5 + 0.5j], dtype="c8"), + types.Array(types.complex64, 1, "C"), + ), + ( + np.array([-0.5 - 0.5j, 0.0, 0.5 + 0.5j], dtype="c16"), + types.Array(types.complex128, 1, "C"), + ), + ] + + # Test with multiple dimensions + extra_input += [ + # Basic 2D and 3D arrays + ( + np.linspace(0, 1).reshape((5, -1)), + types.Array(types.float64, 2, "C"), + ), + ( + np.linspace(0, 1).reshape((2, 5, -1)), + types.Array(types.float64, 3, "C"), + ), + # Complex data (i.e. interleaved) + ( + np.linspace(0, 1 + 1j).reshape(5, -1), + types.Array(types.complex128, 2, "C"), + ), + # F-ordered + ( + np.asfortranarray(np.linspace(0, 1).reshape((5, -1))), + types.Array(types.float64, 2, "F"), + ), + ] + + # Add tests for other integer types + extra_input += [ + (np.uint8(0), types.uint8), + (np.uint8(1), types.uint8), + (np.int8(-1), types.int8), + (np.int8(0), types.int8), + (np.uint16(0), types.uint16), + (np.uint16(1), types.uint16), + (np.int16(-1), types.int16), + (np.int16(0), types.int16), + (np.ulonglong(0), types.ulonglong), + (np.ulonglong(1), types.ulonglong), + (np.longlong(-1), types.longlong), + (np.longlong(0), types.longlong), + ( + np.array([0, 1], dtype=np.ulonglong), + types.Array(types.ulonglong, 1, "C"), + ), + ( + np.array([0, 1], dtype=np.longlong), + types.Array(types.longlong, 1, "C"), + ), + ] + + inputs = base_input + extra_input + + low_occupancy_warnings = config.CUDA_LOW_OCCUPANCY_WARNINGS + warn_on_implicit_copy = config.CUDA_WARN_ON_IMPLICIT_COPY + + # Disable warnings about low gpu utilization in the test suite + config.CUDA_LOW_OCCUPANCY_WARNINGS = 0 + # Disable warnings about host arrays in the test suite + config.CUDA_WARN_ON_IMPLICIT_COPY = 0 + + yield inputs + + config.CUDA_LOW_OCCUPANCY_WARNINGS = low_occupancy_warnings + config.CUDA_WARN_ON_IMPLICIT_COPY = warn_on_implicit_copy + + +def basic_int_ufunc_test(name, inputs): + skip_inputs = [ + types.float32, + types.float64, + types.Array(types.float32, 1, "C"), + types.Array(types.float32, 2, "C"), + types.Array(types.float64, 1, "C"), + types.Array(types.float64, 2, "C"), + types.Array(types.float64, 3, "C"), + types.Array(types.float64, 2, "F"), + types.complex64, + types.complex128, + types.Array(types.complex64, 1, "C"), + types.Array(types.complex64, 2, "C"), + types.Array(types.complex128, 1, "C"), + types.Array(types.complex128, 2, "C"), + ] + basic_ufunc_test(name, inputs, skip_inputs=skip_inputs) + + +def signed_unsigned_cmp_test(comparison_ufunc, inputs): + basic_ufunc_test(comparison_ufunc, inputs) + + if numpy_support.numpy_version < (1, 25): + return + + # Test additional implementations that specifically handle signed / + # unsigned comparisons added in NumPy 1.25: + # https://github.com/numpy/numpy/pull/23713 + additional_inputs = ( + (np.int64(-1), np.uint64(0)), + (np.int64(-1), np.uint64(1)), + (np.int64(0), np.uint64(0)), + (np.int64(0), np.uint64(1)), + (np.int64(1), np.uint64(0)), + (np.int64(1), np.uint64(1)), + (np.uint64(0), np.int64(-1)), + (np.uint64(0), np.int64(0)), + (np.uint64(0), np.int64(1)), + (np.uint64(1), np.int64(-1)), + (np.uint64(1), np.int64(0)), + (np.uint64(1), np.int64(1)), + ( + np.array([-1, -1, 0, 0, 1, 1], dtype=np.int64), + np.array([0, 1, 0, 1, 0, 1], dtype=np.uint64), + ), + ( + np.array([0, 1, 0, 1, 0, 1], dtype=np.uint64), + np.array([-1, -1, 0, 0, 1, 1], dtype=np.int64), + ), + ) + + pyfunc = _make_ufunc_usecase(comparison_ufunc) + + for a, b in additional_inputs: + input_types = (typeof(a), typeof(b)) + output_type = types.Array(types.bool_, 1, "C") + argtys = input_types + (output_type,) + cfunc = _compile(pyfunc, argtys) + + if isinstance(a, np.ndarray): + result = np.zeros(a.shape, dtype=np.bool_) else: - ndim = 1 - - if ty in types.signed_domain: - if int_output_type: - output_type = types.Array(int_output_type, ndim, "C") - else: - output_type = types.Array(ty, ndim, "C") - elif ty in types.unsigned_domain: - if int_output_type: - output_type = types.Array(int_output_type, ndim, "C") - else: - output_type = types.Array(ty, ndim, "C") + result = np.zeros(1, dtype=np.bool_) + + expected = np.zeros_like(result) + + pyfunc(a, b, expected) + cfunc(a, b, result) + np.testing.assert_equal(expected, result) + + +def basic_ufunc_test( + ufunc, + inputs, + skip_inputs=(), + additional_inputs=(), + int_output_type=None, + float_output_type=None, + kinds="ifc", + positive_only=False, +): + # Necessary to avoid some Numpy warnings being silenced, despite + # the simplefilter() call below. + reset_module_warnings(__name__) + + pyfunc = _make_ufunc_usecase(ufunc) + + for input_operand, input_type in itertools.chain(inputs, additional_inputs): + is_tuple = isinstance(input_operand, tuple) + if is_tuple: + args = input_operand else: - if float_output_type: - output_type = types.Array(float_output_type, ndim, "C") - else: - output_type = types.Array(ty, ndim, "C") - return output_type - - -class BasicUFuncTest(BaseUFuncTest): - def _make_ufunc_usecase(self, ufunc): - return _make_ufunc_usecase(ufunc) - - def basic_ufunc_test( - self, - ufunc, - skip_inputs=(), - additional_inputs=(), - int_output_type=None, - float_output_type=None, - kinds="ifc", - positive_only=False, - ): - # Necessary to avoid some Numpy warnings being silenced, despite - # the simplefilter() call below. - self.reset_module_warnings(__name__) - - pyfunc = self._make_ufunc_usecase(ufunc) - - for input_operand, input_type in itertools.chain( - self.inputs, additional_inputs - ): - is_tuple = isinstance(input_operand, tuple) - if is_tuple: - args = input_operand - else: - args = (input_operand,) * ufunc.nin - - if input_type in skip_inputs: - continue - if positive_only and np.any(args[0] < 0): - continue - - # Some ufuncs don't allow all kinds of arguments - if args[0].dtype.kind not in kinds: - continue - - output_type = self._determine_output_type( - input_type, int_output_type, float_output_type - ) + args = (input_operand,) * ufunc.nin - input_types = (input_type,) * ufunc.nin - output_types = (output_type,) * ufunc.nout - argtys = input_types + output_types - cfunc = self._compile(pyfunc, argtys) + if input_type in skip_inputs: + continue + if positive_only and np.any(args[0] < 0): + continue - if isinstance(args[0], np.ndarray): - results = [ - np.zeros(args[0].shape, dtype=out_ty.dtype.name) - for out_ty in output_types - ] - expected = [ - np.zeros(args[0].shape, dtype=out_ty.dtype.name) - for out_ty in output_types - ] - else: - results = [ - np.zeros(1, dtype=out_ty.dtype.name) - for out_ty in output_types - ] - expected = [ - np.zeros(1, dtype=out_ty.dtype.name) - for out_ty in output_types - ] + # Some ufuncs don't allow all kinds of arguments + if args[0].dtype.kind not in kinds: + continue - invalid_flag = False - with warnings.catch_warnings(record=True) as warnlist: - warnings.simplefilter("always") - pyfunc(*args, *expected) - - warnmsg = "invalid value encountered" - for thiswarn in warnlist: - if issubclass(thiswarn.category, RuntimeWarning) and str( - thiswarn.message - ).startswith(warnmsg): - invalid_flag = True - - cfunc(*args, *results) - - for expected_i, result_i in zip(expected, results): - msg = "\n".join( - [ - "ufunc '{0}' failed", - "inputs ({1}):", - "{2}", - "got({3})", - "{4}", - "expected ({5}):", - "{6}", - ] - ).format( - ufunc.__name__, - input_type, - input_operand, - output_type, - result_i, - expected_i.dtype, - expected_i, - ) - try: - np.testing.assert_array_almost_equal( - expected_i, result_i, decimal=5, err_msg=msg - ) - except AssertionError: - if invalid_flag: - # Allow output to mismatch for invalid input - print( - "Output mismatch for invalid input", - input_tuple, - result_i, - expected_i, - ) - else: - raise - - def signed_unsigned_cmp_test(self, comparison_ufunc): - self.basic_ufunc_test(comparison_ufunc) - - if numpy_support.numpy_version < (1, 25): - return - - # Test additional implementations that specifically handle signed / - # unsigned comparisons added in NumPy 1.25: - # https://github.com/numpy/numpy/pull/23713 - additional_inputs = ( - (np.int64(-1), np.uint64(0)), - (np.int64(-1), np.uint64(1)), - (np.int64(0), np.uint64(0)), - (np.int64(0), np.uint64(1)), - (np.int64(1), np.uint64(0)), - (np.int64(1), np.uint64(1)), - (np.uint64(0), np.int64(-1)), - (np.uint64(0), np.int64(0)), - (np.uint64(0), np.int64(1)), - (np.uint64(1), np.int64(-1)), - (np.uint64(1), np.int64(0)), - (np.uint64(1), np.int64(1)), - ( - np.array([-1, -1, 0, 0, 1, 1], dtype=np.int64), - np.array([0, 1, 0, 1, 0, 1], dtype=np.uint64), - ), - ( - np.array([0, 1, 0, 1, 0, 1], dtype=np.uint64), - np.array([-1, -1, 0, 0, 1, 1], dtype=np.int64), - ), + output_type = _determine_output_type( + input_type, int_output_type, float_output_type ) - pyfunc = self._make_ufunc_usecase(comparison_ufunc) - - for a, b in additional_inputs: - input_types = (typeof(a), typeof(b)) - output_type = types.Array(types.bool_, 1, "C") - argtys = input_types + (output_type,) - cfunc = self._compile(pyfunc, argtys) + input_types = (input_type,) * ufunc.nin + output_types = (output_type,) * ufunc.nout + argtys = input_types + output_types + cfunc = _compile(pyfunc, argtys) - if isinstance(a, np.ndarray): - result = np.zeros(a.shape, dtype=np.bool_) - else: - result = np.zeros(1, dtype=np.bool_) - - expected = np.zeros_like(result) + if isinstance(args[0], np.ndarray): + results = [ + np.zeros(args[0].shape, dtype=out_ty.dtype.name) + for out_ty in output_types + ] + expected = [ + np.zeros(args[0].shape, dtype=out_ty.dtype.name) + for out_ty in output_types + ] + else: + results = [ + np.zeros(1, dtype=out_ty.dtype.name) for out_ty in output_types + ] + expected = [ + np.zeros(1, dtype=out_ty.dtype.name) for out_ty in output_types + ] - pyfunc(a, b, expected) - cfunc(a, b, result) - np.testing.assert_equal(expected, result) + invalid_flag = False + with warnings.catch_warnings(record=True) as warnlist: + warnings.simplefilter("always") + pyfunc(*args, *expected) + + warnmsg = "invalid value encountered" + for thiswarn in warnlist: + if issubclass(thiswarn.category, RuntimeWarning) and str( + thiswarn.message + ).startswith(warnmsg): + invalid_flag = True + + cfunc(*args, *results) + + for expected_i, result_i in zip(expected, results): + msg = "\n".join( + [ + "ufunc '{0}' failed", + "inputs ({1}):", + "{2}", + "got({3})", + "{4}", + "expected ({5}):", + "{6}", + ] + ).format( + ufunc.__name__, + input_type, + input_operand, + output_type, + result_i, + expected_i.dtype, + expected_i, + ) + try: + np.testing.assert_array_almost_equal( + expected_i, result_i, decimal=5, err_msg=msg + ) + except AssertionError: + if invalid_flag: + # Allow output to mismatch for invalid input + print( + "Output mismatch for invalid input", + input_tuple, + result_i, + expected_i, + ) + else: + raise + + +def _determine_output_type( + input_type, int_output_type=None, float_output_type=None +): + ty = input_type + if isinstance(ty, types.Array): + ndim = ty.ndim + ty = ty.dtype + else: + ndim = 1 + + if ty in types.signed_domain: + if int_output_type: + output_type = types.Array(int_output_type, ndim, "C") + else: + output_type = types.Array(ty, ndim, "C") + elif ty in types.unsigned_domain: + if int_output_type: + output_type = types.Array(int_output_type, ndim, "C") + else: + output_type = types.Array(ty, ndim, "C") + else: + if float_output_type: + output_type = types.Array(float_output_type, ndim, "C") + else: + output_type = types.Array(ty, ndim, "C") + return output_type def _make_ufunc_usecase(ufunc): @@ -283,179 +373,57 @@ def _make_ufunc_usecase(ufunc): return fn -# This class provides common functionality for UFunc tests. The UFunc tests -# are quite long-running in comparison to other tests, so we break the tests up -# into multiple test classes for distribution across workers. -# -# This class would also be a CUDATestCase, but to avoid a confusing and -# potentially dangerous inheritance diamond with setUp methods that modify -# global state, we implement the necessary part of CUDATestCase within this -# class instead. This disables CUDA performance warnings for the duration of -# tests. -class CUDAUFuncTestBase(BasicUFuncTest, TestCase): - def setUp(self): - BasicUFuncTest.setUp(self) - - # The basic ufunc test does not set up complex inputs, so we'll add - # some here for testing with CUDA. - self.inputs.extend( - [ - (np.complex64(-0.5 - 0.5j), types.complex64), - (np.complex64(0.0), types.complex64), - (np.complex64(0.5 + 0.5j), types.complex64), - (np.complex128(-0.5 - 0.5j), types.complex128), - (np.complex128(0.0), types.complex128), - (np.complex128(0.5 + 0.5j), types.complex128), - ( - np.array([-0.5 - 0.5j, 0.0, 0.5 + 0.5j], dtype="c8"), - types.Array(types.complex64, 1, "C"), - ), - ( - np.array([-0.5 - 0.5j, 0.0, 0.5 + 0.5j], dtype="c16"), - types.Array(types.complex128, 1, "C"), - ), - ] - ) +@skip_on_standalone_numba_cuda +@functools.lru_cache(maxsize=None) +def _compile(pyfunc, args, nrt=False): + # NOTE: to test the implementation of Numpy ufuncs, we disable + # rewriting of array expressions. + return njit(args, _nrt=nrt, no_rewrites=True)(pyfunc) - # Test with multiple dimensions - self.inputs.extend( - [ - # Basic 2D and 3D arrays - ( - np.linspace(0, 1).reshape((5, -1)), - types.Array(types.float64, 2, "C"), - ), - ( - np.linspace(0, 1).reshape((2, 5, -1)), - types.Array(types.float64, 3, "C"), - ), - # Complex data (i.e. interleaved) - ( - np.linspace(0, 1 + 1j).reshape(5, -1), - types.Array(types.complex128, 2, "C"), - ), - # F-ordered - ( - np.asfortranarray(np.linspace(0, 1).reshape((5, -1))), - types.Array(types.float64, 2, "F"), - ), - ] - ) - # Add tests for other integer types - self.inputs.extend( - [ - (np.uint8(0), types.uint8), - (np.uint8(1), types.uint8), - (np.int8(-1), types.int8), - (np.int8(0), types.int8), - (np.uint16(0), types.uint16), - (np.uint16(1), types.uint16), - (np.int16(-1), types.int16), - (np.int16(0), types.int16), - (np.ulonglong(0), types.ulonglong), - (np.ulonglong(1), types.ulonglong), - (np.longlong(-1), types.longlong), - (np.longlong(0), types.longlong), - ( - np.array([0, 1], dtype=np.ulonglong), - types.Array(types.ulonglong, 1, "C"), - ), - ( - np.array([0, 1], dtype=np.longlong), - types.Array(types.longlong, 1, "C"), - ), - ] - ) +class TestBasicTrigUFuncs: + def test_sin_ufunc(self, inputs): + basic_ufunc_test(np.sin, inputs, kinds="cf") - self._low_occupancy_warnings = config.CUDA_LOW_OCCUPANCY_WARNINGS - self._warn_on_implicit_copy = config.CUDA_WARN_ON_IMPLICIT_COPY + def test_cos_ufunc(self, inputs): + basic_ufunc_test(np.cos, inputs, kinds="cf") - # Disable warnings about low gpu utilization in the test suite - config.CUDA_LOW_OCCUPANCY_WARNINGS = 0 - # Disable warnings about host arrays in the test suite - config.CUDA_WARN_ON_IMPLICIT_COPY = 0 + def test_tan_ufunc(self, inputs): + basic_ufunc_test(np.tan, inputs, kinds="cf") - def tearDown(self): - # Restore original warning settings - config.CUDA_LOW_OCCUPANCY_WARNINGS = self._low_occupancy_warnings - config.CUDA_WARN_ON_IMPLICIT_COPY = self._warn_on_implicit_copy + def test_arcsin_ufunc(self, inputs): + basic_ufunc_test(np.arcsin, inputs, kinds="cf") - def _make_ufunc_usecase(self, ufunc): - return _make_ufunc_usecase(ufunc) + def test_arccos_ufunc(self, inputs): + basic_ufunc_test(np.arccos, inputs, kinds="cf") - @functools.lru_cache(maxsize=None) - def _compile(self, pyfunc, args): - # We return an already-configured kernel so that basic_ufunc_test can - # call it just like it does for a CPU function - return cuda.jit(args)(pyfunc)[1, 1] + def test_arctan_ufunc(self, inputs): + basic_ufunc_test(np.arctan, inputs, kinds="cf") - def basic_int_ufunc_test(self, name=None): - skip_inputs = [ - types.float32, - types.float64, - types.Array(types.float32, 1, "C"), - types.Array(types.float32, 2, "C"), - types.Array(types.float64, 1, "C"), - types.Array(types.float64, 2, "C"), - types.Array(types.float64, 3, "C"), - types.Array(types.float64, 2, "F"), - types.complex64, - types.complex128, - types.Array(types.complex64, 1, "C"), - types.Array(types.complex64, 2, "C"), - types.Array(types.complex128, 1, "C"), - types.Array(types.complex128, 2, "C"), - ] - self.basic_ufunc_test(name, skip_inputs=skip_inputs) - - ############################################################################ - # Trigonometric Functions - - -class TestBasicTrigUFuncs(CUDAUFuncTestBase): - def test_sin_ufunc(self): - self.basic_ufunc_test(np.sin, kinds="cf") - - def test_cos_ufunc(self): - self.basic_ufunc_test(np.cos, kinds="cf") - - def test_tan_ufunc(self): - self.basic_ufunc_test(np.tan, kinds="cf") - - def test_arcsin_ufunc(self): - self.basic_ufunc_test(np.arcsin, kinds="cf") - - def test_arccos_ufunc(self): - self.basic_ufunc_test(np.arccos, kinds="cf") + def test_arctan2_ufunc(self, inputs): + basic_ufunc_test(np.arctan2, inputs, kinds="f") - def test_arctan_ufunc(self): - self.basic_ufunc_test(np.arctan, kinds="cf") - def test_arctan2_ufunc(self): - self.basic_ufunc_test(np.arctan2, kinds="f") +class TestHypTrigUFuncs: + def test_hypot_ufunc(self, inputs): + basic_ufunc_test(np.hypot, inputs, kinds="f") + def test_sinh_ufunc(self, inputs): + basic_ufunc_test(np.sinh, inputs, kinds="cf") -class TestHypTrigUFuncs(CUDAUFuncTestBase): - def test_hypot_ufunc(self): - self.basic_ufunc_test(np.hypot, kinds="f") + def test_cosh_ufunc(self, inputs): + basic_ufunc_test(np.cosh, inputs, kinds="cf") - def test_sinh_ufunc(self): - self.basic_ufunc_test(np.sinh, kinds="cf") + def test_tanh_ufunc(self, inputs): + basic_ufunc_test(np.tanh, inputs, kinds="cf") - def test_cosh_ufunc(self): - self.basic_ufunc_test(np.cosh, kinds="cf") + def test_arcsinh_ufunc(self, inputs): + basic_ufunc_test(np.arcsinh, inputs, kinds="cf") - def test_tanh_ufunc(self): - self.basic_ufunc_test(np.tanh, kinds="cf") + def test_arccosh_ufunc(self, inputs): + basic_ufunc_test(np.arccosh, inputs, kinds="cf") - def test_arcsinh_ufunc(self): - self.basic_ufunc_test(np.arcsinh, kinds="cf") - - def test_arccosh_ufunc(self): - self.basic_ufunc_test(np.arccosh, kinds="cf") - - def test_arctanh_ufunc(self): + def test_arctanh_ufunc(self, inputs): # arctanh is only valid is only finite in the range ]-1, 1[ # This means that for any of the integer types it will produce # conversion from infinity/-infinity to integer. That's undefined @@ -475,109 +443,94 @@ def test_arctanh_ufunc(self): types.int64, ] - self.basic_ufunc_test(np.arctanh, skip_inputs=to_skip, kinds="cf") - - -class TestConversionUFuncs(CUDAUFuncTestBase): - def test_deg2rad_ufunc(self): - self.basic_ufunc_test(np.deg2rad, kinds="f") - - def test_rad2deg_ufunc(self): - self.basic_ufunc_test(np.rad2deg, kinds="f") - - def test_degrees_ufunc(self): - self.basic_ufunc_test(np.degrees, kinds="f") - - def test_radians_ufunc(self): - self.basic_ufunc_test(np.radians, kinds="f") + basic_ufunc_test(np.arctanh, inputs, skip_inputs=to_skip, kinds="cf") - ############################################################################ - # Comparison functions +class TestConversionUFuncs: + def test_deg2rad_ufunc(self, inputs): + basic_ufunc_test(np.deg2rad, inputs, kinds="f") -class TestComparisonUFuncs1(CUDAUFuncTestBase): - def test_greater_ufunc(self): - self.signed_unsigned_cmp_test(np.greater) + def test_rad2deg_ufunc(self, inputs): + basic_ufunc_test(np.rad2deg, inputs, kinds="f") - def test_greater_equal_ufunc(self): - self.signed_unsigned_cmp_test(np.greater_equal) + def test_degrees_ufunc(self, inputs): + basic_ufunc_test(np.degrees, inputs, kinds="f") - def test_less_ufunc(self): - self.signed_unsigned_cmp_test(np.less) + def test_radians_ufunc(self, inputs): + basic_ufunc_test(np.radians, inputs, kinds="f") - def test_less_equal_ufunc(self): - self.signed_unsigned_cmp_test(np.less_equal) - def test_not_equal_ufunc(self): - self.signed_unsigned_cmp_test(np.not_equal) +class TestComparisonUFuncs1: + def test_greater_ufunc(self, inputs): + signed_unsigned_cmp_test(np.greater, inputs) - def test_equal_ufunc(self): - self.signed_unsigned_cmp_test(np.equal) + def test_greater_equal_ufunc(self, inputs): + signed_unsigned_cmp_test(np.greater_equal, inputs) + def test_less_ufunc(self, inputs): + signed_unsigned_cmp_test(np.less, inputs) -class TestLogicalUFuncs(CUDAUFuncTestBase): - def test_logical_and_ufunc(self): - self.basic_ufunc_test(np.logical_and) + def test_less_equal_ufunc(self, inputs): + signed_unsigned_cmp_test(np.less_equal, inputs) - def test_logical_or_ufunc(self): - self.basic_ufunc_test(np.logical_or) + def test_not_equal_ufunc(self, inputs): + signed_unsigned_cmp_test(np.not_equal, inputs) - def test_logical_xor_ufunc(self): - self.basic_ufunc_test(np.logical_xor) + def test_equal_ufunc(self, inputs): + signed_unsigned_cmp_test(np.equal, inputs) - def test_logical_not_ufunc(self): - self.basic_ufunc_test(np.logical_not) +class TestLogicalUFuncs: + def test_logical_and_ufunc(self, inputs): + basic_ufunc_test(np.logical_and, inputs) -class TestMinmaxUFuncs(CUDAUFuncTestBase): - def test_maximum_ufunc(self): - self.basic_ufunc_test(np.maximum) + def test_logical_or_ufunc(self, inputs): + basic_ufunc_test(np.logical_or, inputs) - def test_minimum_ufunc(self): - self.basic_ufunc_test(np.minimum) + def test_logical_xor_ufunc(self, inputs): + basic_ufunc_test(np.logical_xor, inputs) - def test_fmax_ufunc(self): - self.basic_ufunc_test(np.fmax) + def test_logical_not_ufunc(self, inputs): + basic_ufunc_test(np.logical_not, inputs) - def test_fmin_ufunc(self): - self.basic_ufunc_test(np.fmin) +class TestMinmaxUFuncs: + def test_maximum_ufunc(self, inputs): + basic_ufunc_test(np.maximum, inputs) -class TestBitwiseUFuncs(CUDAUFuncTestBase): - def test_bitwise_and_ufunc(self): - self.basic_int_ufunc_test(np.bitwise_and) + def test_minimum_ufunc(self, inputs): + basic_ufunc_test(np.minimum, inputs) - def test_bitwise_or_ufunc(self): - self.basic_int_ufunc_test(np.bitwise_or) + def test_fmax_ufunc(self, inputs): + basic_ufunc_test(np.fmax, inputs) - def test_bitwise_xor_ufunc(self): - self.basic_int_ufunc_test(np.bitwise_xor) + def test_fmin_ufunc(self, inputs): + basic_ufunc_test(np.fmin, inputs) - def test_invert_ufunc(self): - self.basic_int_ufunc_test(np.invert) - def test_bitwise_not_ufunc(self): - self.basic_int_ufunc_test(np.bitwise_not) +class TestBitwiseUFuncs: + def test_bitwise_and_ufunc(self, inputs): + basic_int_ufunc_test(np.bitwise_and, inputs) - # Note: there is no entry for np.left_shift and np.right_shift - # because their implementations in NumPy have undefined behavior - # when the second argument is a negative. See the comment in - # numba/tests/test_ufuncs.py for more details. + def test_bitwise_or_ufunc(self, inputs): + basic_int_ufunc_test(np.bitwise_or, inputs) - ############################################################################ - # Mathematical Functions + def test_bitwise_xor_ufunc(self, inputs): + basic_int_ufunc_test(np.bitwise_xor, inputs) + def test_invert_ufunc(self, inputs): + basic_int_ufunc_test(np.invert, inputs) -class TestLogUFuncs(CUDAUFuncTestBase): - def test_log_ufunc(self): - self.basic_ufunc_test(np.log, kinds="cf") + def test_bitwise_not_ufunc(self, inputs): + basic_int_ufunc_test(np.bitwise_not, inputs) - def test_log2_ufunc(self): - self.basic_ufunc_test(np.log2, kinds="cf") - def test_log10_ufunc(self): - self.basic_ufunc_test(np.log10, kinds="cf") +class TestLogUFuncs: + def test_log_ufunc(self, inputs): + basic_ufunc_test(np.log, inputs, kinds="cf") + def test_log2_ufunc(self, inputs): + basic_ufunc_test(np.log2, inputs, kinds="cf") -if __name__ == "__main__": - unittest.main() + def test_log10_ufunc(self, inputs): + basic_ufunc_test(np.log10, inputs, kinds="cf") diff --git a/numba_cuda/numba/cuda/tests/doc_examples/test_random.py b/numba_cuda/numba/cuda/tests/doc_examples/test_random.py index f8c198a2c..2428ce133 100644 --- a/numba_cuda/numba/cuda/tests/doc_examples/test_random.py +++ b/numba_cuda/numba/cuda/tests/doc_examples/test_random.py @@ -4,63 +4,57 @@ # Contents in this file are referenced from the sphinx-generated docs. # "magictoken" is used for markers as beginning and ending of example text. -import unittest -from numba.cuda.testing import CUDATestCase, skip_on_cudasim +from numba.cuda.testing import skip_on_cudasim @skip_on_cudasim("cudasim doesn't support cuda import at non-top-level") -class TestRandom(CUDATestCase): - def test_ex_3d_grid(self): - # magictoken.ex_3d_grid.begin - from numba import cuda - from numba.cuda.random import ( - create_xoroshiro128p_states, - xoroshiro128p_uniform_float32, - ) - import numpy as np - - @cuda.jit - def random_3d(arr, rng_states): - # Per-dimension thread indices and strides - startx, starty, startz = cuda.grid(3) - stridex, stridey, stridez = cuda.gridsize(3) - - # Linearized thread index - tid = (startz * stridey * stridex) + (starty * stridex) + startx - - # Use strided loops over the array to assign a random value to each entry - for i in range(startz, arr.shape[0], stridez): - for j in range(starty, arr.shape[1], stridey): - for k in range(startx, arr.shape[2], stridex): - arr[i, j, k] = xoroshiro128p_uniform_float32( - rng_states, tid - ) - - # Array dimensions - X, Y, Z = 701, 900, 719 - - # Block and grid dimensions - bx, by, bz = 8, 8, 8 - gx, gy, gz = 16, 16, 16 - - # Total number of threads - nthreads = bx * by * bz * gx * gy * gz - - # Initialize a state for each thread - rng_states = create_xoroshiro128p_states(nthreads, seed=1) - - # Generate random numbers - arr = cuda.device_array((X, Y, Z), dtype=np.float32) - random_3d[(gx, gy, gz), (bx, by, bz)](arr, rng_states) - # magictoken.ex_3d_grid.end - - # Some basic tests of the randomly-generated numbers - host_arr = arr.copy_to_host() - self.assertGreater(np.mean(host_arr), 0.49) - self.assertLess(np.mean(host_arr), 0.51) - self.assertTrue(np.all(host_arr <= 1.0)) - self.assertTrue(np.all(host_arr >= 0.0)) - - -if __name__ == "__main__": - unittest.main() +def test_random(): + # magictoken.ex_3d_grid.begin + from numba import cuda + from numba.cuda.random import ( + create_xoroshiro128p_states, + xoroshiro128p_uniform_float32, + ) + import numpy as np + + @cuda.jit + def random_3d(arr, rng_states): + # Per-dimension thread indices and strides + startx, starty, startz = cuda.grid(3) + stridex, stridey, stridez = cuda.gridsize(3) + + # Linearized thread index + tid = (startz * stridey * stridex) + (starty * stridex) + startx + + # Use strided loops over the array to assign a random value to each entry + for i in range(startz, arr.shape[0], stridez): + for j in range(starty, arr.shape[1], stridey): + for k in range(startx, arr.shape[2], stridex): + arr[i, j, k] = xoroshiro128p_uniform_float32( + rng_states, tid + ) + + # Array dimensions + X, Y, Z = 701, 900, 719 + + # Block and grid dimensions + bx, by, bz = 8, 8, 8 + gx, gy, gz = 16, 16, 16 + + # Total number of threads + nthreads = bx * by * bz * gx * gy * gz + + # Initialize a state for each thread + rng_states = create_xoroshiro128p_states(nthreads, seed=1) + + # Generate random numbers + arr = cuda.device_array((X, Y, Z), dtype=np.float32) + random_3d[(gx, gy, gz), (bx, by, bz)](arr, rng_states) + # magictoken.ex_3d_grid.end + + # Some basic tests of the randomly-generated numbers + host_arr = arr.copy_to_host() + assert np.mean(host_arr) > 0.49 + assert np.mean(host_arr) < 0.51 + assert np.all(host_arr <= 1.0) + assert np.all(host_arr >= 0.0) diff --git a/numba_cuda/numba/cuda/tests/doc_examples/test_ufunc.py b/numba_cuda/numba/cuda/tests/doc_examples/test_ufunc.py index adff666ae..cba7a9411 100644 --- a/numba_cuda/numba/cuda/tests/doc_examples/test_ufunc.py +++ b/numba_cuda/numba/cuda/tests/doc_examples/test_ufunc.py @@ -1,31 +1,19 @@ # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: BSD-2-Clause -import unittest +# Contents in this file are referenced from the sphinx-generated docs. +# "ex_cuda_ufunc" is used for markers as beginning and ending of example text. from numba.cuda.testing import CUDATestCase, skip_on_cudasim from numba.cuda.tests.support import captured_stdout @skip_on_cudasim("cudasim doesn't support cuda import at non-top-level") -class TestUFunc(CUDATestCase): +def test_ufunc(): """ Test calling a UFunc """ - - def setUp(self): - # Prevent output from this test showing - # up when running the test suite - self._captured_stdout = captured_stdout() - self._captured_stdout.__enter__() - super().setUp() - - def tearDown(self): - # No exception type, value, or traceback - self._captured_stdout.__exit__(None, None, None) - super().tearDown() - - def test_ex_cuda_ufunc_call(self): + with captured_stdout(): # ex_cuda_ufunc.begin import numpy as np from numba import cuda @@ -47,7 +35,3 @@ def f(r, x): # the sin ufunc inside the kernel, and NumPy's sin ufunc np.testing.assert_allclose(r, np.sin(x)) # ex_cuda_ufunc.end - - -if __name__ == "__main__": - unittest.main() diff --git a/numba_cuda/numba/cuda/tests/support.py b/numba_cuda/numba/cuda/tests/support.py index a895c97ed..725f19cf0 100644 --- a/numba_cuda/numba/cuda/tests/support.py +++ b/numba_cuda/numba/cuda/tests/support.py @@ -164,7 +164,8 @@ def import_dynamic(modname): def ignore_internal_warnings(): - """Use in testing within a ` warnings.catch_warnings` block to filter out + """ + Use in testing within a ` warnings.catch_warnings` block to filter out warnings that are unrelated/internally generated by Numba. """ # Filter out warnings from typeguard @@ -178,6 +179,20 @@ def ignore_internal_warnings(): ) +def reset_module_warnings(module): + """ + Reset the warnings registry of a module. This can be necessary + as the warnings module is buggy in that regard. + See http://bugs.python.org/issue4180 + """ + if isinstance(module, str): + module = sys.modules[module] + try: + del module.__warningregistry__ + except AttributeError: + pass + + @contextlib.contextmanager def override_config(name, value): """ @@ -246,19 +261,6 @@ class TestCase(unittest.TestCase): def random(self): return np.random.RandomState(42) - def reset_module_warnings(self, module): - """ - Reset the warnings registry of a module. This can be necessary - as the warnings module is buggy in that regard. - See http://bugs.python.org/issue4180 - """ - if isinstance(module, str): - module = sys.modules[module] - try: - del module.__warningregistry__ - except AttributeError: - pass - @contextlib.contextmanager def assertTypingError(self): """