From 81cb6d273f5bc676a2b93dd038407715abb6c07b Mon Sep 17 00:00:00 2001 From: Michael Wang Date: Fri, 3 Apr 2026 11:21:13 -0700 Subject: [PATCH 1/5] Map types.NoneType to LLVM void return type instead of i8* Add NoneTypeModel for types.NoneType that returns ir.VoidType() from get_return_type() while keeping i8* for get_value_type() (since LLVM void cannot represent values). This ensures CABI device functions returning void have correct LLVM IR signatures without unnecessary return pointer arguments. Also update the Numba and CABI calling conventions to handle void return types: - Numba ABI: use a dummy i8** return slot for void functions - CABI: emit ret_void and synthesize null value for void returns Add tests verifying the data model, Numba ABI, and CABI void returns. Closes #845 Made-with: Cursor --- numba_cuda/numba/cuda/core/callconv.py | 12 ++- numba_cuda/numba/cuda/models.py | 52 ++++++++++++- .../cuda/tests/cudapy/test_device_func.py | 78 +++++++++++++++++++ 3 files changed, 140 insertions(+), 2 deletions(-) diff --git a/numba_cuda/numba/cuda/core/callconv.py b/numba_cuda/numba/cuda/core/callconv.py index d00ac2895..07f35cd90 100644 --- a/numba_cuda/numba/cuda/core/callconv.py +++ b/numba_cuda/numba/cuda/core/callconv.py @@ -104,6 +104,8 @@ def get_return_type(self, ty): Get the actual type of the return argument for Numba type *ty*. """ restype = self.context.data_model_manager[ty].get_return_type() + if isinstance(restype, ir.VoidType): + return ir.IntType(8).as_pointer().as_pointer() return restype.as_pointer() def init_call_helper(self, builder): @@ -414,6 +416,10 @@ def _make_call_helper(self, builder): def return_value(self, builder, retval): expected_type = builder.function.ftype.return_type + + if isinstance(expected_type, ir.VoidType): + return builder.ret_void() + actual_type = retval.type # If types don't match, we need to cast @@ -473,7 +479,11 @@ def call_function(self, builder, callee, resty, argtys, args): # No status required as we don't support exceptions or a distinct None # value in a C ABI. status = None - out = self.context.get_returned_value(builder, resty, code) + ret_model = self.context.data_model_manager[resty] + if isinstance(ret_model.get_return_type(), ir.VoidType): + out = cgutils.get_null_value(ret_model.get_value_type()) + else: + out = self.context.get_returned_value(builder, resty, code) return status, out def call_internal(self, builder, fndesc, sig, args): diff --git a/numba_cuda/numba/cuda/models.py b/numba_cuda/numba/cuda/models.py index 862ff3a8f..5986474e1 100644 --- a/numba_cuda/numba/cuda/models.py +++ b/numba_cuda/numba/cuda/models.py @@ -7,7 +7,7 @@ from numba.cuda.datamodel.registry import DataModelManager, register from numba.cuda.datamodel import PrimitiveModel -from numba.cuda.datamodel.models import StructModel +from numba.cuda.datamodel.models import DataModel, StructModel from numba.cuda.extending import core_models as models from numba.cuda import types from numba.cuda.types.ext_types import Dim3, GridGroup, CUDADispatcher, Bfloat16 @@ -49,6 +49,56 @@ def __init__(self, dmm, fe_type): register_model(CUDADispatcher)(models.OpaqueModel) +@register_model(types.NoneType) +class NoneTypeModel(DataModel): + """Data model for ``types.NoneType`` (``types.void``). + + This model intentionally returns *different* LLVM types from + ``get_value_type()`` and ``get_return_type()`` because LLVM treats + ``void`` as a function-return-only concept, not a first-class value: + + ``get_value_type()`` → ``i8*`` (opaque pointer) + Used whenever a concrete LLVM value is needed: variable + assignment, alloca, store/load, constants, boxing/unboxing of + ``None``, and the Numba-ABI return-slot pointer. LLVM forbids + creating constants, pointers, or stack slots of ``void``, so an + opaque ``i8*`` null serves as the runtime stand-in for ``None``. + + ``get_return_type()`` → ``ir.VoidType()`` + Used exclusively when building ``ir.FunctionType`` for a + function's return signature. Returning ``void`` here lets the + C-ABI calling convention emit ``void foo(...)`` instead of the + incorrect ``i8* foo(...)``, which fixes the ABI / LTO mismatch + described in GitHub issue #845. + """ + + _ptr_type = ir.IntType(8).as_pointer() + + def get_value_type(self): + return self._ptr_type + + def get_return_type(self): + return ir.VoidType() + + def as_data(self, builder, value): + return value + + def as_argument(self, builder, value): + return value + + def as_return(self, builder, value): + return value + + def from_data(self, builder, value): + return value + + def from_argument(self, builder, value): + return value + + def from_return(self, builder, value): + return value + + @register_model(Bfloat16) class _model___nv_bfloat16(PrimitiveModel): def __init__(self, dmm, fe_type): diff --git a/numba_cuda/numba/cuda/tests/cudapy/test_device_func.py b/numba_cuda/numba/cuda/tests/cudapy/test_device_func.py index 6698e6337..7a021b994 100644 --- a/numba_cuda/numba/cuda/tests/cudapy/test_device_func.py +++ b/numba_cuda/numba/cuda/tests/cudapy/test_device_func.py @@ -630,5 +630,83 @@ def kernel(x): np.testing.assert_equal(x, 42) +@skip_on_cudasim("Data model inspection unsupported in the simulator") +class TestNoneTypeModel(CUDATestCase): + """Tests for the NoneTypeModel data model (issue #845). + + Verifies that types.void / types.NoneType uses ir.VoidType() as the + LLVM return type, fixing ABI mismatches when linking with external + C/C++ device code via LTO. + """ + + def test_nonetype_model_return_type_is_void(self): + from llvmlite import ir + from numba.cuda.descriptor import cuda_target + + dm = cuda_target.target_context.data_model_manager + model = dm.lookup(types.void) + + self.assertEqual(type(model).__name__, "NoneTypeModel") + self.assertIsInstance(model.get_return_type(), ir.VoidType) + + def test_nonetype_model_value_type_is_opaque_ptr(self): + from llvmlite import ir + from numba.cuda.descriptor import cuda_target + + dm = cuda_target.target_context.data_model_manager + model = dm.lookup(types.void) + vt = model.get_value_type() + + self.assertIsInstance(vt, ir.PointerType) + + def test_cabi_void_device_function_signature(self): + consume = cuda.declare_device( + "consume", "void(int32)", link=consume_cabi_cu, abi="c" + ) + + @cuda.jit + def kernel(r, x): + i = cuda.grid(1) + if i < len(r): + consume(x[i]) + r[i] = x[i] * 3 + + x = np.arange(10, dtype=np.int32) + r = np.empty_like(x) + kernel[1, 32](r, x) + np.testing.assert_equal(r, x * 3) + + def test_void_device_function_numba_abi(self): + @cuda.jit(device=True) + def noop(): + pass + + @cuda.jit + def kernel(r): + i = cuda.grid(1) + if i < len(r): + noop() + r[i] = 42 + + r = np.zeros(10, dtype=np.int32) + kernel[1, 32](r) + np.testing.assert_equal(r, 42) + + def test_void_device_function_with_side_effect(self): + @cuda.jit(device=True) + def write_value(arr, idx, val): + arr[idx] = val + + @cuda.jit + def kernel(arr): + i = cuda.grid(1) + if i < len(arr): + write_value(arr, i, i * 10) + + arr = np.zeros(10, dtype=np.int32) + kernel[1, 32](arr) + np.testing.assert_equal(arr, np.arange(10, dtype=np.int32) * 10) + + if __name__ == "__main__": unittest.main() From ca5a21006091e8fed41ee2ee44e738a728b2e656 Mon Sep 17 00:00:00 2001 From: Michael Wang Date: Fri, 3 Apr 2026 13:56:21 -0700 Subject: [PATCH 2/5] Document NoneTypeModel shadowing of default_manager's OpaqueModel Add a note to the NoneTypeModel docstring explaining that it shadows the OpaqueModel registration for types.NoneType in default_manager via ChainMap priority in cuda_data_manager. Made-with: Cursor --- numba_cuda/numba/cuda/models.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/numba_cuda/numba/cuda/models.py b/numba_cuda/numba/cuda/models.py index 5986474e1..2e87c925c 100644 --- a/numba_cuda/numba/cuda/models.py +++ b/numba_cuda/numba/cuda/models.py @@ -53,6 +53,13 @@ def __init__(self, dmm, fe_type): class NoneTypeModel(DataModel): """Data model for ``types.NoneType`` (``types.void``). + Shadows the ``OpaqueModel`` registration for ``types.NoneType`` that + exists in ``default_manager`` (from upstream numba or + ``cuda_models.py``). Because ``cuda_data_manager`` is the first map + in the ``ChainMap`` built by ``CUDATargetContext.__init__``, this + model takes priority without mutating the underlying + ``default_manager``. + This model intentionally returns *different* LLVM types from ``get_value_type()`` and ``get_return_type()`` because LLVM treats ``void`` as a function-return-only concept, not a first-class value: From 118bb74c556e1b6688fd7a2e5c10c805f5e468f4 Mon Sep 17 00:00:00 2001 From: Michael Wang Date: Fri, 3 Apr 2026 13:58:10 -0700 Subject: [PATCH 3/5] Remove stale NoneType registration from OpaqueModel in cuda_models.py NoneTypeModel in models.py (cuda_data_manager) is the single source of truth for the NoneType data model. Remove the OpaqueModel registration for types.NoneType in cuda_models.py so there is no incorrect fallback in default_manager for the standalone path. Made-with: Cursor --- numba_cuda/numba/cuda/datamodel/cuda_models.py | 1 - 1 file changed, 1 deletion(-) diff --git a/numba_cuda/numba/cuda/datamodel/cuda_models.py b/numba_cuda/numba/cuda/datamodel/cuda_models.py index 6108ae8d0..147fbb878 100644 --- a/numba_cuda/numba/cuda/datamodel/cuda_models.py +++ b/numba_cuda/numba/cuda/datamodel/cuda_models.py @@ -308,7 +308,6 @@ def __init__(self, dmm, fe_type): @register_default(types.Opaque) @register_default(types.PyObject) @register_default(types.RawPointer) -@register_default(types.NoneType) @register_default(types.StringLiteral) @register_default(types.EllipsisType) @register_default(types.Function) From 2fffff5f77e5ff3da88ec9c4c8fbb0bee10ad849 Mon Sep 17 00:00:00 2001 From: Michael Wang Date: Fri, 3 Apr 2026 14:03:42 -0700 Subject: [PATCH 4/5] Update NoneTypeModel docstring to reflect cuda_models.py removal Made-with: Cursor --- numba_cuda/numba/cuda/models.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/numba_cuda/numba/cuda/models.py b/numba_cuda/numba/cuda/models.py index 2e87c925c..4ec54e9a2 100644 --- a/numba_cuda/numba/cuda/models.py +++ b/numba_cuda/numba/cuda/models.py @@ -54,11 +54,10 @@ class NoneTypeModel(DataModel): """Data model for ``types.NoneType`` (``types.void``). Shadows the ``OpaqueModel`` registration for ``types.NoneType`` that - exists in ``default_manager`` (from upstream numba or - ``cuda_models.py``). Because ``cuda_data_manager`` is the first map - in the ``ChainMap`` built by ``CUDATargetContext.__init__``, this - model takes priority without mutating the underlying - ``default_manager``. + exists in ``default_manager`` (from upstream numba). Because + ``cuda_data_manager`` is the first map in the ``ChainMap`` built by + ``CUDATargetContext.__init__``, this model takes priority without + mutating the underlying ``default_manager``. This model intentionally returns *different* LLVM types from ``get_value_type()`` and ``get_return_type()`` because LLVM treats From 62e12209a9ae9e56235e6913ddcf68c85ab6326b Mon Sep 17 00:00:00 2001 From: Michael Wang Date: Mon, 6 Apr 2026 12:43:52 -0700 Subject: [PATCH 5/5] checkpointing --- .../cuda/tests/cudapy/test_device_func.py | 33 +++++++++++++++++-- 1 file changed, 31 insertions(+), 2 deletions(-) diff --git a/numba_cuda/numba/cuda/tests/cudapy/test_device_func.py b/numba_cuda/numba/cuda/tests/cudapy/test_device_func.py index 7a021b994..9fd677adf 100644 --- a/numba_cuda/numba/cuda/tests/cudapy/test_device_func.py +++ b/numba_cuda/numba/cuda/tests/cudapy/test_device_func.py @@ -674,7 +674,16 @@ def kernel(r, x): x = np.arange(10, dtype=np.int32) r = np.empty_like(x) kernel[1, 32](r, x) - np.testing.assert_equal(r, x * 3) + + irs = kernel.inspect_llvm() + + # Pattern to match 'call void @consume(i32 ...)' in the LLVM IR + pat = re.compile(r'call void @"?consume"?\s*\(\s*i32\b') + matched = any(pat.search(ir) for ir in irs.values()) + self.assertTrue( + matched, + "Did not find the expected 'call void @consume(i32 ...)' pattern in LLVM IR", + ) def test_void_device_function_numba_abi(self): @cuda.jit(device=True) @@ -690,7 +699,27 @@ def kernel(r): r = np.zeros(10, dtype=np.int32) kernel[1, 32](r) - np.testing.assert_equal(r, 42) + + callee_irs = noop.inspect_llvm() + caller_irs = kernel.inspect_llvm() + + callee_pat = re.compile(r'define\b[^@]*\bvoid\s+@"[^"]*noop[^"]*"') + callee_matched = any( + callee_pat.search(ir) for ir in callee_irs.values() + ) + self.assertTrue( + callee_matched, + "Device function 'noop' should be defined with void return type", + ) + + caller_pat = re.compile(r'call\s+void\s+@"[^"]*noop[^"]*"\s*\(') + caller_matched = any( + caller_pat.search(ir) for ir in caller_irs.values() + ) + self.assertTrue( + caller_matched, + "Kernel should call 'noop' with void return type", + ) def test_void_device_function_with_side_effect(self): @cuda.jit(device=True)