NVIDIA · isVoid · Apr 3, 2026 · Apr 3, 2026 · Apr 3, 2026 · Apr 3, 2026
diff --git a/numba_cuda/numba/cuda/core/callconv.py b/numba_cuda/numba/cuda/core/callconv.py
@@ -104,6 +104,8 @@ def get_return_type(self, ty):
         Get the actual type of the return argument for Numba type *ty*.
         """
         restype = self.context.data_model_manager[ty].get_return_type()
+        if isinstance(restype, ir.VoidType):
+            return ir.IntType(8).as_pointer().as_pointer()
         return restype.as_pointer()
 
     def init_call_helper(self, builder):
@@ -414,6 +416,10 @@ def _make_call_helper(self, builder):
 
     def return_value(self, builder, retval):
         expected_type = builder.function.ftype.return_type
+
+        if isinstance(expected_type, ir.VoidType):
+            return builder.ret_void()
+
         actual_type = retval.type
 
         # If types don't match, we need to cast
@@ -473,7 +479,11 @@ def call_function(self, builder, callee, resty, argtys, args):
         # No status required as we don't support exceptions or a distinct None
         # value in a C ABI.
         status = None
-        out = self.context.get_returned_value(builder, resty, code)
+        ret_model = self.context.data_model_manager[resty]
+        if isinstance(ret_model.get_return_type(), ir.VoidType):
+            out = cgutils.get_null_value(ret_model.get_value_type())
+        else:
+            out = self.context.get_returned_value(builder, resty, code)
         return status, out
 
     def call_internal(self, builder, fndesc, sig, args):

diff --git a/numba_cuda/numba/cuda/datamodel/cuda_models.py b/numba_cuda/numba/cuda/datamodel/cuda_models.py
@@ -308,7 +308,6 @@ def __init__(self, dmm, fe_type):
 @register_default(types.Opaque)
 @register_default(types.PyObject)
 @register_default(types.RawPointer)
-@register_default(types.NoneType)
 @register_default(types.StringLiteral)
 @register_default(types.EllipsisType)
 @register_default(types.Function)

diff --git a/numba_cuda/numba/cuda/models.py b/numba_cuda/numba/cuda/models.py
@@ -7,7 +7,7 @@
 
 from numba.cuda.datamodel.registry import DataModelManager, register
 from numba.cuda.datamodel import PrimitiveModel
-from numba.cuda.datamodel.models import StructModel
+from numba.cuda.datamodel.models import DataModel, StructModel
 from numba.cuda.extending import core_models as models
 from numba.cuda import types
 from numba.cuda.types.ext_types import Dim3, GridGroup, CUDADispatcher, Bfloat16
@@ -49,6 +49,62 @@ def __init__(self, dmm, fe_type):
 register_model(CUDADispatcher)(models.OpaqueModel)
 
 
+@register_model(types.NoneType)
+class NoneTypeModel(DataModel):
+    """Data model for ``types.NoneType`` (``types.void``).
+
+    Shadows the ``OpaqueModel`` registration for ``types.NoneType`` that
+    exists in ``default_manager`` (from upstream numba).  Because
+    ``cuda_data_manager`` is the first map in the ``ChainMap`` built by
+    ``CUDATargetContext.__init__``, this model takes priority without
+    mutating the underlying ``default_manager``.
+
+    This model intentionally returns *different* LLVM types from
+    ``get_value_type()`` and ``get_return_type()`` because LLVM treats
+    ``void`` as a function-return-only concept, not a first-class value:
+
+    ``get_value_type()`` → ``i8*`` (opaque pointer)
+        Used whenever a concrete LLVM value is needed: variable
+        assignment, alloca, store/load, constants, boxing/unboxing of
+        ``None``, and the Numba-ABI return-slot pointer.  LLVM forbids
+        creating constants, pointers, or stack slots of ``void``, so an
+        opaque ``i8*`` null serves as the runtime stand-in for ``None``.
+
+    ``get_return_type()`` → ``ir.VoidType()``
+        Used exclusively when building ``ir.FunctionType`` for a
+        function's return signature.  Returning ``void`` here lets the
+        C-ABI calling convention emit ``void foo(...)`` instead of the
+        incorrect ``i8* foo(...)``, which fixes the ABI / LTO mismatch
+        described in GitHub issue #845.
+    """
+
+    _ptr_type = ir.IntType(8).as_pointer()
+
+    def get_value_type(self):
+        return self._ptr_type
+
+    def get_return_type(self):
+        return ir.VoidType()
+
+    def as_data(self, builder, value):
+        return value
+
+    def as_argument(self, builder, value):
+        return value
+
+    def as_return(self, builder, value):
+        return value
+
+    def from_data(self, builder, value):
+        return value
+
+    def from_argument(self, builder, value):
+        return value
+
+    def from_return(self, builder, value):
+        return value
+
+
 @register_model(Bfloat16)
 class _model___nv_bfloat16(PrimitiveModel):
     def __init__(self, dmm, fe_type):

diff --git a/numba_cuda/numba/cuda/tests/cudapy/test_device_func.py b/numba_cuda/numba/cuda/tests/cudapy/test_device_func.py
@@ -630,5 +630,112 @@ def kernel(x):
         np.testing.assert_equal(x, 42)
 
 
+@skip_on_cudasim("Data model inspection unsupported in the simulator")
+class TestNoneTypeModel(CUDATestCase):
+    """Tests for the NoneTypeModel data model (issue #845).
+
+    Verifies that types.void / types.NoneType uses ir.VoidType() as the
+    LLVM return type, fixing ABI mismatches when linking with external
+    C/C++ device code via LTO.
+    """
+
+    def test_nonetype_model_return_type_is_void(self):
+        from llvmlite import ir
+        from numba.cuda.descriptor import cuda_target
+
+        dm = cuda_target.target_context.data_model_manager
+        model = dm.lookup(types.void)
+
+        self.assertEqual(type(model).__name__, "NoneTypeModel")
+        self.assertIsInstance(model.get_return_type(), ir.VoidType)
+
+    def test_nonetype_model_value_type_is_opaque_ptr(self):
+        from llvmlite import ir
+        from numba.cuda.descriptor import cuda_target
+
+        dm = cuda_target.target_context.data_model_manager
+        model = dm.lookup(types.void)
+        vt = model.get_value_type()
+
+        self.assertIsInstance(vt, ir.PointerType)
+
+    def test_cabi_void_device_function_signature(self):
+        consume = cuda.declare_device(
+            "consume", "void(int32)", link=consume_cabi_cu, abi="c"
+        )
+
+        @cuda.jit
+        def kernel(r, x):
+            i = cuda.grid(1)
+            if i < len(r):
+                consume(x[i])
+                r[i] = x[i] * 3
+
+        x = np.arange(10, dtype=np.int32)
+        r = np.empty_like(x)
+        kernel[1, 32](r, x)
+
+        irs = kernel.inspect_llvm()
+
+        # Pattern to match 'call void @consume(i32 ...)' in the LLVM IR
+        pat = re.compile(r'call void @"?consume"?\s*\(\s*i32\b')
+        matched = any(pat.search(ir) for ir in irs.values())
+        self.assertTrue(
+            matched,
+            "Did not find the expected 'call void @consume(i32 ...)' pattern in LLVM IR",
+        )
+
+    def test_void_device_function_numba_abi(self):
+        @cuda.jit(device=True)
+        def noop():
+            pass
+
+        @cuda.jit
+        def kernel(r):
+            i = cuda.grid(1)
+            if i < len(r):
+                noop()
+                r[i] = 42
+
+        r = np.zeros(10, dtype=np.int32)
+        kernel[1, 32](r)
+
+        callee_irs = noop.inspect_llvm()
+        caller_irs = kernel.inspect_llvm()
+
+        callee_pat = re.compile(r'define\b[^@]*\bvoid\s+@"[^"]*noop[^"]*"')
+        callee_matched = any(
+            callee_pat.search(ir) for ir in callee_irs.values()
+        )
+        self.assertTrue(
+            callee_matched,
+            "Device function 'noop' should be defined with void return type",
+        )
+
+        caller_pat = re.compile(r'call\s+void\s+@"[^"]*noop[^"]*"\s*\(')
+        caller_matched = any(
+            caller_pat.search(ir) for ir in caller_irs.values()
+        )
+        self.assertTrue(
+            caller_matched,
+            "Kernel should call 'noop' with void return type",
+        )
+
+    def test_void_device_function_with_side_effect(self):
+        @cuda.jit(device=True)
+        def write_value(arr, idx, val):
+            arr[idx] = val
+
+        @cuda.jit
+        def kernel(arr):
+            i = cuda.grid(1)
+            if i < len(arr):
+                write_value(arr, i, i * 10)
+
+        arr = np.zeros(10, dtype=np.int32)
+        kernel[1, 32](arr)
+        np.testing.assert_equal(arr, np.arange(10, dtype=np.int32) * 10)
+
+
 if __name__ == "__main__":
     unittest.main()