Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions conda/environments/all_cuda-129_arch-aarch64.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ dependencies:
- c-compiler
- cloudpickle
- cmake>=3.26.4,!=3.30.0
- cuda-core>=0.3.2
- cuda-cudart-dev
- cuda-nvcc
- cuda-version=12.9
Expand Down
1 change: 1 addition & 0 deletions conda/environments/all_cuda-129_arch-x86_64.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ dependencies:
- c-compiler
- cloudpickle
- cmake>=3.26.4,!=3.30.0
- cuda-core>=0.3.2
- cuda-cudart-dev
- cuda-nvcc
- cuda-version=12.9
Expand Down
1 change: 1 addition & 0 deletions conda/environments/all_cuda-131_arch-aarch64.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ dependencies:
- c-compiler
- cloudpickle
- cmake>=3.26.4,!=3.30.0
- cuda-core>=0.3.2
- cuda-cudart-dev
- cuda-nvcc
- cuda-version=13.1
Expand Down
1 change: 1 addition & 0 deletions conda/environments/all_cuda-131_arch-x86_64.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ dependencies:
- c-compiler
- cloudpickle
- cmake>=3.26.4,!=3.30.0
- cuda-core>=0.3.2
- cuda-cudart-dev
- cuda-nvcc
- cuda-version=13.1
Expand Down
6 changes: 2 additions & 4 deletions conda/recipes/ucxx/recipe.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -290,8 +290,7 @@ outputs:
- ${{ pin_subpackage("libucxx", exact=True) }}
- cuda-cudart-dev
run:
- numba >=0.60.0,<0.62.0
- numba-cuda >=0.22.1
- cuda-core >=0.3.2
- numpy >=1.23,<3.0
# 'nvidia-ml-py' provides the 'pynvml' module
- nvidia-ml-py>=12
Expand Down Expand Up @@ -431,8 +430,7 @@ outputs:
- setuptools>=77.0.0
- wheel
run:
- numba >=0.60.0,<0.62.0
- numba-cuda >=0.22.1
- cuda-core >=0.3.2
- python
- pyyaml >=6
- rapids-dask-dependency ${{ rapids_version }}
Expand Down
67 changes: 34 additions & 33 deletions dependencies.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -297,73 +297,74 @@ dependencies:
- &numpy numpy>=1.23,<3.0
# 'nvidia-ml-py' provides the 'pynvml' module
- nvidia-ml-py>=12
- cuda-core>=0.3.2
run_python_distributed_ucxx:
common:
- output_types: [conda, requirements, pyproject]
packages:
- rapids-dask-dependency==26.4.*,>=0.0.0a0
- pyyaml>=6
- cuda-core>=0.3.2
test_cpp:
common:
- output_types: conda
packages:
- *cmake_ver
test_python_ucxx:
common:
- output_types: [conda, requirements, pyproject]
packages:
- cloudpickle
- pytest<9.0.0
- pytest-asyncio>=1.0.0
- pytest-rerunfailures!=16.0.0 # See https://github.com/pytest-dev/pytest-rerunfailures/issues/302
- rapids-dask-dependency==26.4.*,>=0.0.0a0
- output_types: [conda]
packages:
- &numba_cuda numba-cuda>=0.22.1
- &numba_cuda_test numba-cuda>=0.22.1
specific:
- output_types: [requirements, pyproject]
matrices:
- matrix:
cuda: "12.*"
cuda_suffixed: "true"
packages:
- &numba_cuda_cu12 numba-cuda[cu12]>=0.22.1
- &numba_cuda_cu12_test numba-cuda[cu12]>=0.22.1
- matrix:
cuda: "13.*"
cuda_suffixed: "true"
packages:
- &numba_cuda_cu13 numba-cuda[cu13]>=0.22.1
- &numba_cuda_cu13_test numba-cuda[cu13]>=0.22.1
# fallback to numba-cuda with no extra CUDA packages if 'cuda_suffixed' isn't true
- matrix:
packages:
- *numba_cuda
run_python_distributed_ucxx:
- *numba_cuda_test
test_python_distributed_ucxx:
common:
- output_types: [conda, requirements, pyproject]
packages:
- rapids-dask-dependency==26.4.*,>=0.0.0a0
- pyyaml>=6
- *numpy
- pytest<9.0.0
- pytest-rerunfailures!=16.0.0 # See https://github.com/pytest-dev/pytest-rerunfailures/issues/302
- output_types: [conda]
packages:
- *numba_cuda
- *numba_cuda_test
specific:
- output_types: [requirements, pyproject]
matrices:
- matrix:
cuda: "12.*"
cuda_suffixed: "true"
packages:
- *numba_cuda_cu12
- *numba_cuda_cu12_test
- matrix:
cuda: "13.*"
cuda_suffixed: "true"
packages:
- *numba_cuda_cu13
# fallback to numba-cuda with no extra CUDA packages if 'cuda_suffixed' isn't true
- *numba_cuda_cu13_test
- matrix:
packages:
- *numba_cuda
test_cpp:
common:
- output_types: conda
packages:
- *cmake_ver
test_python_ucxx:
common:
- output_types: [conda, requirements, pyproject]
packages:
- cloudpickle
- pytest<9.0.0
- pytest-asyncio>=1.0.0
- pytest-rerunfailures!=16.0.0 # See https://github.com/pytest-dev/pytest-rerunfailures/issues/302
- rapids-dask-dependency==26.4.*,>=0.0.0a0
test_python_distributed_ucxx:
common:
- output_types: [conda, requirements, pyproject]
packages:
- *numpy
- pytest<9.0.0
- pytest-rerunfailures!=16.0.0 # See https://github.com/pytest-dev/pytest-rerunfailures/issues/302
- *numba_cuda_test
depends_on_cupy:
common:
- output_types: conda
Expand Down
37 changes: 12 additions & 25 deletions python/distributed-ucxx/distributed_ucxx/ucxx.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# SPDX-FileCopyrightText: Copyright (c) 2023-2025, NVIDIA CORPORATION & AFFILIATES.
# SPDX-FileCopyrightText: Copyright (c) 2023-2026, NVIDIA CORPORATION & AFFILIATES.
# SPDX-License-Identifier: BSD-3-Clause

"""
Expand Down Expand Up @@ -94,13 +94,12 @@ class CudaStream(Enum):


def synchronize_stream(stream: CudaStream = CudaStream.Default):
import numba.cuda
from ucxx._cuda_context import synchronize_default_stream

if stream == CudaStream.Default:
numba_stream = numba.cuda.default_stream()
synchronize_default_stream()
else:
raise ValueError("Unsupported stream")
numba_stream.synchronize()


class gc_disabled:
Expand Down Expand Up @@ -246,11 +245,11 @@ def init_once():
or ("cuda" in ucx_tls and "^cuda" not in ucx_tls)
):
try:
import numba.cuda
except ImportError:
from ucxx._cuda_context import ensure_cuda_context
except ImportError as e:
raise ImportError(
"CUDA support with UCX requires Numba for context management"
)
"CUDA support with UCX requires cuda-core for context management."
) from e

cuda_visible_device = get_device_index_and_uuid(
os.environ.get("CUDA_VISIBLE_DEVICES", "0").split(",")[0]
Expand All @@ -261,7 +260,7 @@ def init_once():
pre_existing_cuda_context.device_info, os.getpid()
)

numba.cuda.current_context()
ensure_cuda_context(0)

cuda_context_created = has_cuda_context()
if (
Expand Down Expand Up @@ -291,7 +290,8 @@ def init_once():

pool_size_str = get_rmm_config("pool-size")

# Find the function, `cuda_array()`, to use when allocating new CUDA arrays
# Find the function, `cuda_array()`, to use when allocating new CUDA arrays.
# RMM is required for CUDA array allocation at runtime (numba is only for tests).
try:
import rmm

Expand All @@ -304,22 +304,9 @@ def device_array(n):
pool_allocator=True, managed_memory=False, initial_pool_size=pool_size
)
except ImportError:
try:
import numba.cuda

def numba_device_array(n):
a = numba.cuda.device_array((n,), dtype="u1")
weakref.finalize(a, numba.cuda.current_context)
return a

device_array = numba_device_array

except ImportError:

def device_array(n):
raise RuntimeError(
"In order to send/recv CUDA arrays, Numba or RMM is required"
)
def device_array(n):
raise RuntimeError("In order to send/recv CUDA arrays, RMM is required.")

if pool_size_str is not None:
logger.warning(
Expand Down
3 changes: 2 additions & 1 deletion python/distributed-ucxx/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ license = "BSD-3-Clause"
license-files = ["LICENSE"]
requires-python = ">=3.11"
dependencies = [
"numba-cuda>=0.22.1",
"cuda-core>=0.3.2",
"pyyaml>=6",
"rapids-dask-dependency==26.4.*,>=0.0.0a0",
"ucxx==0.49.*,>=0.0.0a0",
Expand All @@ -46,6 +46,7 @@ docs = [
test = [
"cudf==26.4.*,>=0.0.0a0",
"cupy-cuda13x>=13.6.0",
"numba-cuda>=0.22.1",
"numpy>=1.23,<3.0",
"pytest-rerunfailures!=16.0.0",
"pytest<9.0.0",
Expand Down
6 changes: 3 additions & 3 deletions python/ucxx/examples/basic.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# SPDX-FileCopyrightText: Copyright (c) 2022-2024, NVIDIA CORPORATION & AFFILIATES.
# SPDX-FileCopyrightText: Copyright (c) 2022-2026, NVIDIA CORPORATION & AFFILIATES.
# SPDX-License-Identifier: BSD-3-Clause

import argparse
Expand All @@ -12,9 +12,9 @@


def _create_cuda_context():
import numba.cuda
from ucxx._cuda_context import ensure_cuda_context

numba.cuda.current_context()
ensure_cuda_context(0)


async def _progress_coroutine(worker):
Expand Down
3 changes: 2 additions & 1 deletion python/ucxx/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,8 @@ authors = [
license = "BSD-3-Clause"
requires-python = ">=3.11"
dependencies = [
"cuda-core>=0.3.2",
"libucxx==0.49.*,>=0.0.0a0",
"numba-cuda>=0.22.1",
"numpy>=1.23,<3.0",
"nvidia-ml-py>=12",
"rmm==26.4.*,>=0.0.0a0",
Expand All @@ -44,6 +44,7 @@ test = [
"cloudpickle",
"cudf==26.4.*,>=0.0.0a0",
"cupy-cuda13x>=13.6.0",
"numba-cuda>=0.22.1",
"pytest-asyncio>=1.0.0",
"pytest-rerunfailures!=16.0.0",
"pytest<9.0.0",
Expand Down
54 changes: 54 additions & 0 deletions python/ucxx/ucxx/_cuda_context.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
# SPDX-FileCopyrightText: Copyright (c) 2022-2026, NVIDIA CORPORATION & AFFILIATES.
# SPDX-License-Identifier: BSD-3-Clause

"""CUDA context management using cuda.core.

Provides helpers to ensure a CUDA context is created and to synchronize
the default stream.
"""


def _get_device_class():
"""Get the Device class from cuda.core."""
try:
from cuda.core import Device

return Device
except ImportError:
try:
from cuda.core.experimental import Device

return Device
except ImportError as e:
raise ImportError(
"CUDA context management requires cuda-core (cuda-core>=0.3.2)."
) from e


def ensure_cuda_context(device_id: int = 0) -> None:
"""Ensure a CUDA context exists for the given device and set it as current.

Parameters
----------
device_id : int, optional
The CUDA device index (default: 0).
"""
Device = _get_device_class()
Device(device_id).set_current()


def synchronize_default_stream(device_id: int = 0) -> None:
"""Synchronize the default CUDA stream of the current device.

Required when coordinating with UCX CUDA transfers (e.g. before send/recv
of CUDA buffers).

Parameters
----------
device_id : int, optional
The CUDA device index (default: 0).
"""
Device = _get_device_class()
device = Device(device_id)
device.set_current()
device.sync()
6 changes: 3 additions & 3 deletions python/ucxx/ucxx/_lib_async/continuous_ucx_progress.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# SPDX-FileCopyrightText: Copyright (c) 2022-2025, NVIDIA CORPORATION & AFFILIATES.
# SPDX-FileCopyrightText: Copyright (c) 2022-2026, NVIDIA CORPORATION & AFFILIATES.
# SPDX-License-Identifier: BSD-3-Clause


Expand Down Expand Up @@ -70,9 +70,9 @@ def __eq__(self, other):


def _create_context():
import numba.cuda
from ucxx._cuda_context import ensure_cuda_context

numba.cuda.current_context()
ensure_cuda_context(0)


class ThreadMode(ProgressTask):
Expand Down
6 changes: 3 additions & 3 deletions python/ucxx/ucxx/benchmarks/backends/ucxx_core.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# SPDX-FileCopyrightText: Copyright (c) 2022-2025, NVIDIA CORPORATION & AFFILIATES.
# SPDX-FileCopyrightText: Copyright (c) 2022-2026, NVIDIA CORPORATION & AFFILIATES.
# SPDX-License-Identifier: BSD-3-Clause

from argparse import Namespace
Expand All @@ -17,9 +17,9 @@


def _create_cuda_context(device):
import numba.cuda
from ucxx._cuda_context import ensure_cuda_context

numba.cuda.current_context(0)
ensure_cuda_context(device)


def _transfer_wireup(ep, server):
Expand Down
Loading
Loading