diff --git a/conda/environments/all_cuda-129_arch-aarch64.yaml b/conda/environments/all_cuda-129_arch-aarch64.yaml index 7fe2e3152..263dad037 100644 --- a/conda/environments/all_cuda-129_arch-aarch64.yaml +++ b/conda/environments/all_cuda-129_arch-aarch64.yaml @@ -10,7 +10,9 @@ dependencies: - c-compiler - cloudpickle - cmake>=4.0 +- cuda-bindings>=12.9.6,!=13.0.*,!=13.1.* - cuda-core>=0.3.2 +- cuda-core>=1.0.0 - cuda-cudart-dev - cuda-nvcc - cuda-version=12.9 @@ -28,7 +30,6 @@ dependencies: - ninja - numba-cuda>=0.22.1,<0.29.0 - numpy>=1.23,<3.0 -- nvidia-ml-py>=12 - pip - pkg-config - pre-commit diff --git a/conda/environments/all_cuda-129_arch-x86_64.yaml b/conda/environments/all_cuda-129_arch-x86_64.yaml index ccf7a8fe0..7840fc737 100644 --- a/conda/environments/all_cuda-129_arch-x86_64.yaml +++ b/conda/environments/all_cuda-129_arch-x86_64.yaml @@ -10,7 +10,9 @@ dependencies: - c-compiler - cloudpickle - cmake>=4.0 +- cuda-bindings>=12.9.6,!=13.0.*,!=13.1.* - cuda-core>=0.3.2 +- cuda-core>=1.0.0 - cuda-cudart-dev - cuda-nvcc - cuda-version=12.9 @@ -28,7 +30,6 @@ dependencies: - ninja - numba-cuda>=0.22.1,<0.29.0 - numpy>=1.23,<3.0 -- nvidia-ml-py>=12 - pip - pkg-config - pre-commit diff --git a/conda/environments/all_cuda-132_arch-aarch64.yaml b/conda/environments/all_cuda-132_arch-aarch64.yaml index 22fa68009..52acfa57f 100644 --- a/conda/environments/all_cuda-132_arch-aarch64.yaml +++ b/conda/environments/all_cuda-132_arch-aarch64.yaml @@ -10,7 +10,9 @@ dependencies: - c-compiler - cloudpickle - cmake>=4.0 +- cuda-bindings>=12.9.6,!=13.0.*,!=13.1.* - cuda-core>=0.3.2 +- cuda-core>=1.0.0 - cuda-cudart-dev - cuda-nvcc - cuda-version=13.2 @@ -28,7 +30,6 @@ dependencies: - ninja - numba-cuda>=0.22.1,<0.29.0 - numpy>=1.23,<3.0 -- nvidia-ml-py>=12 - pip - pkg-config - pre-commit diff --git a/conda/environments/all_cuda-132_arch-x86_64.yaml b/conda/environments/all_cuda-132_arch-x86_64.yaml index 58e0f71db..bf485bd0b 100644 --- a/conda/environments/all_cuda-132_arch-x86_64.yaml +++ b/conda/environments/all_cuda-132_arch-x86_64.yaml @@ -10,7 +10,9 @@ dependencies: - c-compiler - cloudpickle - cmake>=4.0 +- cuda-bindings>=12.9.6,!=13.0.*,!=13.1.* - cuda-core>=0.3.2 +- cuda-core>=1.0.0 - cuda-cudart-dev - cuda-nvcc - cuda-version=13.2 @@ -28,7 +30,6 @@ dependencies: - ninja - numba-cuda>=0.22.1,<0.29.0 - numpy>=1.23,<3.0 -- nvidia-ml-py>=12 - pip - pkg-config - pre-commit diff --git a/conda/recipes/ucxx/recipe.yaml b/conda/recipes/ucxx/recipe.yaml index 02ddcce1b..13b4a0573 100644 --- a/conda/recipes/ucxx/recipe.yaml +++ b/conda/recipes/ucxx/recipe.yaml @@ -87,8 +87,6 @@ outputs: host: - cuda-version =${{ cuda_version }} - cython >=3.2.2 - # 'nvidia-ml-py' provides the 'pynvml' module - - nvidia-ml-py>=12 - pip - python =${{ py_abi_min }} - python-abi3 ${{ py_abi_min }}.* @@ -98,16 +96,17 @@ outputs: - ucx - libucxx =${{ version }} - cuda-cudart-dev + - cuda-bindings>=12.9.6,!=13.0.*,!=13.1.* + - cuda-core>=1.0.0 run: - - cuda-core >=0.3.2 - numpy >=1.23,<3.0 - # 'nvidia-ml-py' provides the 'pynvml' module - - nvidia-ml-py>=12 - python - ucx >=1.18.0,<1.21.0 - ${{ pin_compatible("cuda-version", upper_bound="x", lower_bound="x") }} - ${{ pin_compatible("rmm", upper_bound="x.x") }} - libucxx =${{ version }} + - cuda-bindings>=12.9.6,!=13.0.*,!=13.1.* + - cuda-core>=1.0.0 run_constraints: - cupy >=13.6.0 ignore_run_exports: @@ -241,7 +240,7 @@ outputs: - setuptools>=77.0.0 - wheel run: - - cuda-core >=0.3.2 + - cuda-core>=1.0.0 - python - pyyaml >=6 - rapids-dask-dependency ${{ rapids_version }} diff --git a/dependencies.yaml b/dependencies.yaml index 1e5fc556c..4695ea7b0 100644 --- a/dependencies.yaml +++ b/dependencies.yaml @@ -355,9 +355,8 @@ dependencies: - output_types: [conda, requirements, pyproject] packages: - &numpy numpy>=1.23,<3.0 - # 'nvidia-ml-py' provides the 'pynvml' module - - nvidia-ml-py>=12 - - cuda-core>=0.3.2 + - cuda-bindings>=12.9.6,!=13.0.*,!=13.1.* + - cuda-core>=1.0.0 run_python_distributed_ucxx: common: - output_types: [conda, requirements, pyproject] diff --git a/python/ucxx/pyproject.toml b/python/ucxx/pyproject.toml index 1785196f9..971f65ef5 100644 --- a/python/ucxx/pyproject.toml +++ b/python/ucxx/pyproject.toml @@ -19,10 +19,10 @@ authors = [ license = "BSD-3-Clause" requires-python = ">=3.11" dependencies = [ - "cuda-core>=0.3.2", + "cuda-bindings>=12.9.6,!=13.0.*,!=13.1.*", + "cuda-core>=1.0.0", "libucxx==0.51.*,>=0.0.0a0", "numpy>=1.23,<3.0", - "nvidia-ml-py>=12", "rmm==26.8.*,>=0.0.0a0", ] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`. classifiers = [ diff --git a/python/ucxx/ucxx/__init__.py b/python/ucxx/ucxx/__init__.py index a727cbe41..5f7fd3e61 100644 --- a/python/ucxx/ucxx/__init__.py +++ b/python/ucxx/ucxx/__init__.py @@ -33,10 +33,7 @@ from .core import * # noqa: E402, F403 from .utils import get_address, get_ucxpy_logger # noqa: E402 -try: - import pynvml -except ImportError: - pynvml = None +from cuda.core import system # noqa: E402 _ucx_version = get_ucx_version() # noqa: F405 __ucx_min_version__ = "1.18.0" @@ -61,29 +58,15 @@ logger.info("Setting UCX_RNDV_FRAG_MEM_TYPE=cuda") os.environ["UCX_RNDV_FRAG_MEM_TYPE"] = "cuda" -if ( - pynvml is not None - and "UCX_CUDA_COPY_MAX_REG_RATIO" not in os.environ - and _ucx_version >= (1, 12, 0) -): +if "UCX_CUDA_COPY_MAX_REG_RATIO" not in os.environ and _ucx_version >= (1, 12, 0): try: - pynvml.nvmlInit() - device_count = pynvml.nvmlDeviceGetCount() + device_count = system.Device.get_device_count() large_bar1 = [False] * device_count - def _is_mig_device(handle): - try: - pynvml.nvmlDeviceGetMigMode(handle)[0] - except pynvml.NVMLError: - return False - return True - - for dev_idx in range(device_count): - handle = pynvml.nvmlDeviceGetHandleByIndex(dev_idx) - + for dev_idx, device in enumerate(system.Device.get_all_devices()): try: - total_memory = pynvml.nvmlDeviceGetMemoryInfo(handle).total - except pynvml.NVMLError_NotSupported: + total_memory = device.memory_info.total + except system.NotSupportedError: total_memory = None # Ignore MIG devices and devices with no memory resource (i.e., only @@ -91,12 +74,12 @@ def _is_mig_device(handle): # now. Increasing `UCX_CUDA_COPY_MAX_REG_RATIO` should be thoroughly # tested, as it's not yet clear whether it would be safe to set `1.0` # for those instances too. - if _is_mig_device(handle) or total_memory is None: + if device.mig.is_mig_device or total_memory is None: continue try: - bar1_total = pynvml.nvmlDeviceGetBAR1MemoryInfo(handle).bar1Total - except pynvml.NVMLError_NotSupported: + bar1_total = device.bar1_memory_info.total + except system.NotSupportedError: # Bar1 access not supported on this device, set it to # zero (always lower than device memory). bar1_total = 0 @@ -108,9 +91,9 @@ def _is_mig_device(handle): logger.info("Setting UCX_CUDA_COPY_MAX_REG_RATIO=1.0") os.environ["UCX_CUDA_COPY_MAX_REG_RATIO"] = "1.0" except ( - pynvml.NVMLError_LibraryNotFound, - pynvml.NVMLError_DriverNotLoaded, - pynvml.NVMLError_Unknown, + system.NotFoundError, + system.DriverNotLoadedError, + system.UnknownError, ): pass diff --git a/python/ucxx/ucxx/_cuda_context.py b/python/ucxx/ucxx/_cuda_context.py index a4d32f518..abb54aeac 100644 --- a/python/ucxx/ucxx/_cuda_context.py +++ b/python/ucxx/ucxx/_cuda_context.py @@ -21,7 +21,7 @@ def _get_device_class(): return Device except ImportError as e: raise ImportError( - "CUDA context management requires cuda-core (cuda-core>=0.3.2)." + "CUDA context management requires cuda-core (cuda-core>=1.0.0)." ) from e diff --git a/python/ucxx/ucxx/_lib_async/utils_test.py b/python/ucxx/ucxx/_lib_async/utils_test.py index e6b6359e5..da959961d 100644 --- a/python/ucxx/ucxx/_lib_async/utils_test.py +++ b/python/ucxx/ucxx/_lib_async/utils_test.py @@ -14,6 +14,9 @@ from ucxx._lib_async.pytest_stash_keys import ASYNCIO_PLUGIN_TIMEOUT_STASH_KEY +from cuda.core import system + + normal_env = { "UCX_RNDV_SCHEME": "put_zcopy", "UCX_MEMTYPE_CACHE": "n", @@ -27,12 +30,7 @@ def set_env(): def get_num_gpus(): - import pynvml - - pynvml.nvmlInit() - ngpus = pynvml.nvmlDeviceGetCount() - pynvml.nvmlShutdown() - return ngpus + return system.Device.get_device_count() def get_cuda_devices():