Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 11 additions & 1 deletion numba_cuda/numba/cuda/core/callconv.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,8 @@ def get_return_type(self, ty):
Get the actual type of the return argument for Numba type *ty*.
"""
restype = self.context.data_model_manager[ty].get_return_type()
if isinstance(restype, ir.VoidType):
return ir.IntType(8).as_pointer().as_pointer()
Comment on lines +107 to +108
Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We need to return a placeholder value type because void type cannot be used in declaration for argument type. This also aligns with how the rest of Numba internally passes arguments around.

return restype.as_pointer()

def init_call_helper(self, builder):
Expand Down Expand Up @@ -414,6 +416,10 @@ def _make_call_helper(self, builder):

def return_value(self, builder, retval):
expected_type = builder.function.ftype.return_type

if isinstance(expected_type, ir.VoidType):
return builder.ret_void()

actual_type = retval.type

# If types don't match, we need to cast
Expand Down Expand Up @@ -473,7 +479,11 @@ def call_function(self, builder, callee, resty, argtys, args):
# No status required as we don't support exceptions or a distinct None
# value in a C ABI.
status = None
out = self.context.get_returned_value(builder, resty, code)
ret_model = self.context.data_model_manager[resty]
if isinstance(ret_model.get_return_type(), ir.VoidType):
out = cgutils.get_null_value(ret_model.get_value_type())
else:
out = self.context.get_returned_value(builder, resty, code)
return status, out

def call_internal(self, builder, fndesc, sig, args):
Expand Down
1 change: 0 additions & 1 deletion numba_cuda/numba/cuda/datamodel/cuda_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -308,7 +308,6 @@ def __init__(self, dmm, fe_type):
@register_default(types.Opaque)
@register_default(types.PyObject)
@register_default(types.RawPointer)
@register_default(types.NoneType)
@register_default(types.StringLiteral)
@register_default(types.EllipsisType)
@register_default(types.Function)
Expand Down
58 changes: 57 additions & 1 deletion numba_cuda/numba/cuda/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@

from numba.cuda.datamodel.registry import DataModelManager, register
from numba.cuda.datamodel import PrimitiveModel
from numba.cuda.datamodel.models import StructModel
from numba.cuda.datamodel.models import DataModel, StructModel
from numba.cuda.extending import core_models as models
from numba.cuda import types
from numba.cuda.types.ext_types import Dim3, GridGroup, CUDADispatcher, Bfloat16
Expand Down Expand Up @@ -49,6 +49,62 @@ def __init__(self, dmm, fe_type):
register_model(CUDADispatcher)(models.OpaqueModel)


@register_model(types.NoneType)
class NoneTypeModel(DataModel):
"""Data model for ``types.NoneType`` (``types.void``).

Shadows the ``OpaqueModel`` registration for ``types.NoneType`` that
exists in ``default_manager`` (from upstream numba). Because
``cuda_data_manager`` is the first map in the ``ChainMap`` built by
``CUDATargetContext.__init__``, this model takes priority without
mutating the underlying ``default_manager``.

This model intentionally returns *different* LLVM types from
``get_value_type()`` and ``get_return_type()`` because LLVM treats
``void`` as a function-return-only concept, not a first-class value:

``get_value_type()`` → ``i8*`` (opaque pointer)
Used whenever a concrete LLVM value is needed: variable
assignment, alloca, store/load, constants, boxing/unboxing of
``None``, and the Numba-ABI return-slot pointer. LLVM forbids
creating constants, pointers, or stack slots of ``void``, so an
opaque ``i8*`` null serves as the runtime stand-in for ``None``.

``get_return_type()`` → ``ir.VoidType()``
Used exclusively when building ``ir.FunctionType`` for a
function's return signature. Returning ``void`` here lets the
C-ABI calling convention emit ``void foo(...)`` instead of the
incorrect ``i8* foo(...)``, which fixes the ABI / LTO mismatch
described in GitHub issue #845.
"""

_ptr_type = ir.IntType(8).as_pointer()

def get_value_type(self):
return self._ptr_type

def get_return_type(self):
return ir.VoidType()

def as_data(self, builder, value):
return value

def as_argument(self, builder, value):
return value

def as_return(self, builder, value):
return value

def from_data(self, builder, value):
return value

def from_argument(self, builder, value):
return value

def from_return(self, builder, value):
return value


@register_model(Bfloat16)
class _model___nv_bfloat16(PrimitiveModel):
def __init__(self, dmm, fe_type):
Expand Down
107 changes: 107 additions & 0 deletions numba_cuda/numba/cuda/tests/cudapy/test_device_func.py
Original file line number Diff line number Diff line change
Expand Up @@ -630,5 +630,112 @@ def kernel(x):
np.testing.assert_equal(x, 42)


@skip_on_cudasim("Data model inspection unsupported in the simulator")
class TestNoneTypeModel(CUDATestCase):
"""Tests for the NoneTypeModel data model (issue #845).

Verifies that types.void / types.NoneType uses ir.VoidType() as the
LLVM return type, fixing ABI mismatches when linking with external
C/C++ device code via LTO.
"""

def test_nonetype_model_return_type_is_void(self):
from llvmlite import ir
from numba.cuda.descriptor import cuda_target

dm = cuda_target.target_context.data_model_manager
model = dm.lookup(types.void)

self.assertEqual(type(model).__name__, "NoneTypeModel")
self.assertIsInstance(model.get_return_type(), ir.VoidType)

def test_nonetype_model_value_type_is_opaque_ptr(self):
from llvmlite import ir
from numba.cuda.descriptor import cuda_target

dm = cuda_target.target_context.data_model_manager
model = dm.lookup(types.void)
vt = model.get_value_type()

self.assertIsInstance(vt, ir.PointerType)

def test_cabi_void_device_function_signature(self):
consume = cuda.declare_device(
"consume", "void(int32)", link=consume_cabi_cu, abi="c"
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This by itself does not guarantee that it was inlined. In fact that code used to work properly before. Could we add tests that verify ir signature both for callee and caller

)

@cuda.jit
def kernel(r, x):
i = cuda.grid(1)
if i < len(r):
consume(x[i])
r[i] = x[i] * 3

x = np.arange(10, dtype=np.int32)
r = np.empty_like(x)
kernel[1, 32](r, x)

irs = kernel.inspect_llvm()

# Pattern to match 'call void @consume(i32 ...)' in the LLVM IR
pat = re.compile(r'call void @"?consume"?\s*\(\s*i32\b')
matched = any(pat.search(ir) for ir in irs.values())
self.assertTrue(
matched,
"Did not find the expected 'call void @consume(i32 ...)' pattern in LLVM IR",
)

def test_void_device_function_numba_abi(self):
@cuda.jit(device=True)
def noop():
pass

@cuda.jit
def kernel(r):
i = cuda.grid(1)
if i < len(r):
noop()
r[i] = 42

r = np.zeros(10, dtype=np.int32)
kernel[1, 32](r)

callee_irs = noop.inspect_llvm()
caller_irs = kernel.inspect_llvm()

callee_pat = re.compile(r'define\b[^@]*\bvoid\s+@"[^"]*noop[^"]*"')
callee_matched = any(
callee_pat.search(ir) for ir in callee_irs.values()
)
self.assertTrue(
callee_matched,
"Device function 'noop' should be defined with void return type",
)

caller_pat = re.compile(r'call\s+void\s+@"[^"]*noop[^"]*"\s*\(')
caller_matched = any(
caller_pat.search(ir) for ir in caller_irs.values()
)
self.assertTrue(
caller_matched,
"Kernel should call 'noop' with void return type",
)

def test_void_device_function_with_side_effect(self):
@cuda.jit(device=True)
def write_value(arr, idx, val):
arr[idx] = val

@cuda.jit
def kernel(arr):
i = cuda.grid(1)
if i < len(arr):
write_value(arr, i, i * 10)

arr = np.zeros(10, dtype=np.int32)
kernel[1, 32](arr)
np.testing.assert_equal(arr, np.arange(10, dtype=np.int32) * 10)


if __name__ == "__main__":
unittest.main()