diff --git a/numba_cuda/numba/cuda/core/callconv.py b/numba_cuda/numba/cuda/core/callconv.py index d00ac2895..07f35cd90 100644 --- a/numba_cuda/numba/cuda/core/callconv.py +++ b/numba_cuda/numba/cuda/core/callconv.py @@ -104,6 +104,8 @@ def get_return_type(self, ty): Get the actual type of the return argument for Numba type *ty*. """ restype = self.context.data_model_manager[ty].get_return_type() + if isinstance(restype, ir.VoidType): + return ir.IntType(8).as_pointer().as_pointer() return restype.as_pointer() def init_call_helper(self, builder): @@ -414,6 +416,10 @@ def _make_call_helper(self, builder): def return_value(self, builder, retval): expected_type = builder.function.ftype.return_type + + if isinstance(expected_type, ir.VoidType): + return builder.ret_void() + actual_type = retval.type # If types don't match, we need to cast @@ -473,7 +479,11 @@ def call_function(self, builder, callee, resty, argtys, args): # No status required as we don't support exceptions or a distinct None # value in a C ABI. status = None - out = self.context.get_returned_value(builder, resty, code) + ret_model = self.context.data_model_manager[resty] + if isinstance(ret_model.get_return_type(), ir.VoidType): + out = cgutils.get_null_value(ret_model.get_value_type()) + else: + out = self.context.get_returned_value(builder, resty, code) return status, out def call_internal(self, builder, fndesc, sig, args): diff --git a/numba_cuda/numba/cuda/datamodel/cuda_models.py b/numba_cuda/numba/cuda/datamodel/cuda_models.py index 6108ae8d0..147fbb878 100644 --- a/numba_cuda/numba/cuda/datamodel/cuda_models.py +++ b/numba_cuda/numba/cuda/datamodel/cuda_models.py @@ -308,7 +308,6 @@ def __init__(self, dmm, fe_type): @register_default(types.Opaque) @register_default(types.PyObject) @register_default(types.RawPointer) -@register_default(types.NoneType) @register_default(types.StringLiteral) @register_default(types.EllipsisType) @register_default(types.Function) diff --git a/numba_cuda/numba/cuda/models.py b/numba_cuda/numba/cuda/models.py index 862ff3a8f..4ec54e9a2 100644 --- a/numba_cuda/numba/cuda/models.py +++ b/numba_cuda/numba/cuda/models.py @@ -7,7 +7,7 @@ from numba.cuda.datamodel.registry import DataModelManager, register from numba.cuda.datamodel import PrimitiveModel -from numba.cuda.datamodel.models import StructModel +from numba.cuda.datamodel.models import DataModel, StructModel from numba.cuda.extending import core_models as models from numba.cuda import types from numba.cuda.types.ext_types import Dim3, GridGroup, CUDADispatcher, Bfloat16 @@ -49,6 +49,62 @@ def __init__(self, dmm, fe_type): register_model(CUDADispatcher)(models.OpaqueModel) +@register_model(types.NoneType) +class NoneTypeModel(DataModel): + """Data model for ``types.NoneType`` (``types.void``). + + Shadows the ``OpaqueModel`` registration for ``types.NoneType`` that + exists in ``default_manager`` (from upstream numba). Because + ``cuda_data_manager`` is the first map in the ``ChainMap`` built by + ``CUDATargetContext.__init__``, this model takes priority without + mutating the underlying ``default_manager``. + + This model intentionally returns *different* LLVM types from + ``get_value_type()`` and ``get_return_type()`` because LLVM treats + ``void`` as a function-return-only concept, not a first-class value: + + ``get_value_type()`` → ``i8*`` (opaque pointer) + Used whenever a concrete LLVM value is needed: variable + assignment, alloca, store/load, constants, boxing/unboxing of + ``None``, and the Numba-ABI return-slot pointer. LLVM forbids + creating constants, pointers, or stack slots of ``void``, so an + opaque ``i8*`` null serves as the runtime stand-in for ``None``. + + ``get_return_type()`` → ``ir.VoidType()`` + Used exclusively when building ``ir.FunctionType`` for a + function's return signature. Returning ``void`` here lets the + C-ABI calling convention emit ``void foo(...)`` instead of the + incorrect ``i8* foo(...)``, which fixes the ABI / LTO mismatch + described in GitHub issue #845. + """ + + _ptr_type = ir.IntType(8).as_pointer() + + def get_value_type(self): + return self._ptr_type + + def get_return_type(self): + return ir.VoidType() + + def as_data(self, builder, value): + return value + + def as_argument(self, builder, value): + return value + + def as_return(self, builder, value): + return value + + def from_data(self, builder, value): + return value + + def from_argument(self, builder, value): + return value + + def from_return(self, builder, value): + return value + + @register_model(Bfloat16) class _model___nv_bfloat16(PrimitiveModel): def __init__(self, dmm, fe_type): diff --git a/numba_cuda/numba/cuda/tests/cudapy/test_device_func.py b/numba_cuda/numba/cuda/tests/cudapy/test_device_func.py index 6698e6337..9fd677adf 100644 --- a/numba_cuda/numba/cuda/tests/cudapy/test_device_func.py +++ b/numba_cuda/numba/cuda/tests/cudapy/test_device_func.py @@ -630,5 +630,112 @@ def kernel(x): np.testing.assert_equal(x, 42) +@skip_on_cudasim("Data model inspection unsupported in the simulator") +class TestNoneTypeModel(CUDATestCase): + """Tests for the NoneTypeModel data model (issue #845). + + Verifies that types.void / types.NoneType uses ir.VoidType() as the + LLVM return type, fixing ABI mismatches when linking with external + C/C++ device code via LTO. + """ + + def test_nonetype_model_return_type_is_void(self): + from llvmlite import ir + from numba.cuda.descriptor import cuda_target + + dm = cuda_target.target_context.data_model_manager + model = dm.lookup(types.void) + + self.assertEqual(type(model).__name__, "NoneTypeModel") + self.assertIsInstance(model.get_return_type(), ir.VoidType) + + def test_nonetype_model_value_type_is_opaque_ptr(self): + from llvmlite import ir + from numba.cuda.descriptor import cuda_target + + dm = cuda_target.target_context.data_model_manager + model = dm.lookup(types.void) + vt = model.get_value_type() + + self.assertIsInstance(vt, ir.PointerType) + + def test_cabi_void_device_function_signature(self): + consume = cuda.declare_device( + "consume", "void(int32)", link=consume_cabi_cu, abi="c" + ) + + @cuda.jit + def kernel(r, x): + i = cuda.grid(1) + if i < len(r): + consume(x[i]) + r[i] = x[i] * 3 + + x = np.arange(10, dtype=np.int32) + r = np.empty_like(x) + kernel[1, 32](r, x) + + irs = kernel.inspect_llvm() + + # Pattern to match 'call void @consume(i32 ...)' in the LLVM IR + pat = re.compile(r'call void @"?consume"?\s*\(\s*i32\b') + matched = any(pat.search(ir) for ir in irs.values()) + self.assertTrue( + matched, + "Did not find the expected 'call void @consume(i32 ...)' pattern in LLVM IR", + ) + + def test_void_device_function_numba_abi(self): + @cuda.jit(device=True) + def noop(): + pass + + @cuda.jit + def kernel(r): + i = cuda.grid(1) + if i < len(r): + noop() + r[i] = 42 + + r = np.zeros(10, dtype=np.int32) + kernel[1, 32](r) + + callee_irs = noop.inspect_llvm() + caller_irs = kernel.inspect_llvm() + + callee_pat = re.compile(r'define\b[^@]*\bvoid\s+@"[^"]*noop[^"]*"') + callee_matched = any( + callee_pat.search(ir) for ir in callee_irs.values() + ) + self.assertTrue( + callee_matched, + "Device function 'noop' should be defined with void return type", + ) + + caller_pat = re.compile(r'call\s+void\s+@"[^"]*noop[^"]*"\s*\(') + caller_matched = any( + caller_pat.search(ir) for ir in caller_irs.values() + ) + self.assertTrue( + caller_matched, + "Kernel should call 'noop' with void return type", + ) + + def test_void_device_function_with_side_effect(self): + @cuda.jit(device=True) + def write_value(arr, idx, val): + arr[idx] = val + + @cuda.jit + def kernel(arr): + i = cuda.grid(1) + if i < len(arr): + write_value(arr, i, i * 10) + + arr = np.zeros(10, dtype=np.int32) + kernel[1, 32](arr) + np.testing.assert_equal(arr, np.arange(10, dtype=np.int32) * 10) + + if __name__ == "__main__": unittest.main()