diff --git a/conda/environments/all_cuda-129_arch-aarch64.yaml b/conda/environments/all_cuda-129_arch-aarch64.yaml
index 7fe2e3152..263dad037 100644
--- a/conda/environments/all_cuda-129_arch-aarch64.yaml
+++ b/conda/environments/all_cuda-129_arch-aarch64.yaml
@@ -10,7 +10,9 @@ dependencies:
 - c-compiler
 - cloudpickle
 - cmake>=4.0
+- cuda-bindings>=12.9.6,!=13.0.*,!=13.1.*
 - cuda-core>=0.3.2
+- cuda-core>=1.0.0
 - cuda-cudart-dev
 - cuda-nvcc
 - cuda-version=12.9
@@ -28,7 +30,6 @@ dependencies:
 - ninja
 - numba-cuda>=0.22.1,<0.29.0
 - numpy>=1.23,<3.0
-- nvidia-ml-py>=12
 - pip
 - pkg-config
 - pre-commit
diff --git a/conda/environments/all_cuda-129_arch-x86_64.yaml b/conda/environments/all_cuda-129_arch-x86_64.yaml
index ccf7a8fe0..7840fc737 100644
--- a/conda/environments/all_cuda-129_arch-x86_64.yaml
+++ b/conda/environments/all_cuda-129_arch-x86_64.yaml
@@ -10,7 +10,9 @@ dependencies:
 - c-compiler
 - cloudpickle
 - cmake>=4.0
+- cuda-bindings>=12.9.6,!=13.0.*,!=13.1.*
 - cuda-core>=0.3.2
+- cuda-core>=1.0.0
 - cuda-cudart-dev
 - cuda-nvcc
 - cuda-version=12.9
@@ -28,7 +30,6 @@ dependencies:
 - ninja
 - numba-cuda>=0.22.1,<0.29.0
 - numpy>=1.23,<3.0
-- nvidia-ml-py>=12
 - pip
 - pkg-config
 - pre-commit
diff --git a/conda/environments/all_cuda-132_arch-aarch64.yaml b/conda/environments/all_cuda-132_arch-aarch64.yaml
index 22fa68009..52acfa57f 100644
--- a/conda/environments/all_cuda-132_arch-aarch64.yaml
+++ b/conda/environments/all_cuda-132_arch-aarch64.yaml
@@ -10,7 +10,9 @@ dependencies:
 - c-compiler
 - cloudpickle
 - cmake>=4.0
+- cuda-bindings>=12.9.6,!=13.0.*,!=13.1.*
 - cuda-core>=0.3.2
+- cuda-core>=1.0.0
 - cuda-cudart-dev
 - cuda-nvcc
 - cuda-version=13.2
@@ -28,7 +30,6 @@ dependencies:
 - ninja
 - numba-cuda>=0.22.1,<0.29.0
 - numpy>=1.23,<3.0
-- nvidia-ml-py>=12
 - pip
 - pkg-config
 - pre-commit
diff --git a/conda/environments/all_cuda-132_arch-x86_64.yaml b/conda/environments/all_cuda-132_arch-x86_64.yaml
index 58e0f71db..bf485bd0b 100644
--- a/conda/environments/all_cuda-132_arch-x86_64.yaml
+++ b/conda/environments/all_cuda-132_arch-x86_64.yaml
@@ -10,7 +10,9 @@ dependencies:
 - c-compiler
 - cloudpickle
 - cmake>=4.0
+- cuda-bindings>=12.9.6,!=13.0.*,!=13.1.*
 - cuda-core>=0.3.2
+- cuda-core>=1.0.0
 - cuda-cudart-dev
 - cuda-nvcc
 - cuda-version=13.2
@@ -28,7 +30,6 @@ dependencies:
 - ninja
 - numba-cuda>=0.22.1,<0.29.0
 - numpy>=1.23,<3.0
-- nvidia-ml-py>=12
 - pip
 - pkg-config
 - pre-commit
diff --git a/conda/recipes/ucxx/recipe.yaml b/conda/recipes/ucxx/recipe.yaml
index 02ddcce1b..13b4a0573 100644
--- a/conda/recipes/ucxx/recipe.yaml
+++ b/conda/recipes/ucxx/recipe.yaml
@@ -87,8 +87,6 @@ outputs:
       host:
         - cuda-version =${{ cuda_version }}
         - cython >=3.2.2
-        # 'nvidia-ml-py' provides the 'pynvml' module
-        - nvidia-ml-py>=12
         - pip
         - python =${{ py_abi_min }}
         - python-abi3 ${{ py_abi_min }}.*
@@ -98,16 +96,17 @@ outputs:
         - ucx
         - libucxx =${{ version }}
         - cuda-cudart-dev
+        - cuda-bindings>=12.9.6,!=13.0.*,!=13.1.*
+        - cuda-core>=1.0.0
       run:
-        - cuda-core >=0.3.2
         - numpy >=1.23,<3.0
-        # 'nvidia-ml-py' provides the 'pynvml' module
-        - nvidia-ml-py>=12
         - python
         - ucx >=1.18.0,<1.21.0
         - ${{ pin_compatible("cuda-version", upper_bound="x", lower_bound="x") }}
         - ${{ pin_compatible("rmm", upper_bound="x.x") }}
         - libucxx =${{ version }}
+        - cuda-bindings>=12.9.6,!=13.0.*,!=13.1.*
+        - cuda-core>=1.0.0
       run_constraints:
         - cupy >=13.6.0
       ignore_run_exports:
@@ -241,7 +240,7 @@ outputs:
         - setuptools>=77.0.0
         - wheel
       run:
-        - cuda-core >=0.3.2
+        - cuda-core>=1.0.0
         - python
         - pyyaml >=6
         - rapids-dask-dependency ${{ rapids_version }}
diff --git a/dependencies.yaml b/dependencies.yaml
index 1e5fc556c..4695ea7b0 100644
--- a/dependencies.yaml
+++ b/dependencies.yaml
@@ -355,9 +355,8 @@ dependencies:
       - output_types: [conda, requirements, pyproject]
         packages:
           - &numpy numpy>=1.23,<3.0
-          # 'nvidia-ml-py' provides the 'pynvml' module
-          - nvidia-ml-py>=12
-          - cuda-core>=0.3.2
+          - cuda-bindings>=12.9.6,!=13.0.*,!=13.1.*
+          - cuda-core>=1.0.0
   run_python_distributed_ucxx:
     common:
       - output_types: [conda, requirements, pyproject]
diff --git a/python/ucxx/pyproject.toml b/python/ucxx/pyproject.toml
index 1785196f9..971f65ef5 100644
--- a/python/ucxx/pyproject.toml
+++ b/python/ucxx/pyproject.toml
@@ -19,10 +19,10 @@ authors = [
 license = "BSD-3-Clause"
 requires-python = ">=3.11"
 dependencies = [
-    "cuda-core>=0.3.2",
+    "cuda-bindings>=12.9.6,!=13.0.*,!=13.1.*",
+    "cuda-core>=1.0.0",
     "libucxx==0.51.*,>=0.0.0a0",
     "numpy>=1.23,<3.0",
-    "nvidia-ml-py>=12",
     "rmm==26.8.*,>=0.0.0a0",
 ] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`.
 classifiers = [
diff --git a/python/ucxx/ucxx/__init__.py b/python/ucxx/ucxx/__init__.py
index a727cbe41..5f7fd3e61 100644
--- a/python/ucxx/ucxx/__init__.py
+++ b/python/ucxx/ucxx/__init__.py
@@ -33,10 +33,7 @@
 from .core import *  # noqa: E402, F403
 from .utils import get_address, get_ucxpy_logger  # noqa: E402
 
-try:
-    import pynvml
-except ImportError:
-    pynvml = None
+from cuda.core import system  # noqa: E402
 
 _ucx_version = get_ucx_version()  # noqa: F405
 __ucx_min_version__ = "1.18.0"
@@ -61,29 +58,15 @@
     logger.info("Setting UCX_RNDV_FRAG_MEM_TYPE=cuda")
     os.environ["UCX_RNDV_FRAG_MEM_TYPE"] = "cuda"
 
-if (
-    pynvml is not None
-    and "UCX_CUDA_COPY_MAX_REG_RATIO" not in os.environ
-    and _ucx_version >= (1, 12, 0)
-):
+if "UCX_CUDA_COPY_MAX_REG_RATIO" not in os.environ and _ucx_version >= (1, 12, 0):
     try:
-        pynvml.nvmlInit()
-        device_count = pynvml.nvmlDeviceGetCount()
+        device_count = system.Device.get_device_count()
         large_bar1 = [False] * device_count
 
-        def _is_mig_device(handle):
-            try:
-                pynvml.nvmlDeviceGetMigMode(handle)[0]
-            except pynvml.NVMLError:
-                return False
-            return True
-
-        for dev_idx in range(device_count):
-            handle = pynvml.nvmlDeviceGetHandleByIndex(dev_idx)
-
+        for dev_idx, device in enumerate(system.Device.get_all_devices()):
             try:
-                total_memory = pynvml.nvmlDeviceGetMemoryInfo(handle).total
-            except pynvml.NVMLError_NotSupported:
+                total_memory = device.memory_info.total
+            except system.NotSupportedError:
                 total_memory = None
 
             # Ignore MIG devices and devices with no memory resource (i.e., only
@@ -91,12 +74,12 @@ def _is_mig_device(handle):
             # now. Increasing `UCX_CUDA_COPY_MAX_REG_RATIO` should be thoroughly
             # tested, as it's not yet clear whether it would be safe to set `1.0`
             # for those instances too.
-            if _is_mig_device(handle) or total_memory is None:
+            if device.mig.is_mig_device or total_memory is None:
                 continue
 
             try:
-                bar1_total = pynvml.nvmlDeviceGetBAR1MemoryInfo(handle).bar1Total
-            except pynvml.NVMLError_NotSupported:
+                bar1_total = device.bar1_memory_info.total
+            except system.NotSupportedError:
                 # Bar1 access not supported on this device, set it to
                 # zero (always lower than device memory).
                 bar1_total = 0
@@ -108,9 +91,9 @@ def _is_mig_device(handle):
             logger.info("Setting UCX_CUDA_COPY_MAX_REG_RATIO=1.0")
             os.environ["UCX_CUDA_COPY_MAX_REG_RATIO"] = "1.0"
     except (
-        pynvml.NVMLError_LibraryNotFound,
-        pynvml.NVMLError_DriverNotLoaded,
-        pynvml.NVMLError_Unknown,
+        system.NotFoundError,
+        system.DriverNotLoadedError,
+        system.UnknownError,
     ):
         pass
 
diff --git a/python/ucxx/ucxx/_cuda_context.py b/python/ucxx/ucxx/_cuda_context.py
index a4d32f518..abb54aeac 100644
--- a/python/ucxx/ucxx/_cuda_context.py
+++ b/python/ucxx/ucxx/_cuda_context.py
@@ -21,7 +21,7 @@ def _get_device_class():
             return Device
         except ImportError as e:
             raise ImportError(
-                "CUDA context management requires cuda-core (cuda-core>=0.3.2)."
+                "CUDA context management requires cuda-core (cuda-core>=1.0.0)."
             ) from e
 
 
diff --git a/python/ucxx/ucxx/_lib_async/utils_test.py b/python/ucxx/ucxx/_lib_async/utils_test.py
index e6b6359e5..da959961d 100644
--- a/python/ucxx/ucxx/_lib_async/utils_test.py
+++ b/python/ucxx/ucxx/_lib_async/utils_test.py
@@ -14,6 +14,9 @@
 
 from ucxx._lib_async.pytest_stash_keys import ASYNCIO_PLUGIN_TIMEOUT_STASH_KEY
 
+from cuda.core import system
+
+
 normal_env = {
     "UCX_RNDV_SCHEME": "put_zcopy",
     "UCX_MEMTYPE_CACHE": "n",
@@ -27,12 +30,7 @@ def set_env():
 
 
 def get_num_gpus():
-    import pynvml
-
-    pynvml.nvmlInit()
-    ngpus = pynvml.nvmlDeviceGetCount()
-    pynvml.nvmlShutdown()
-    return ngpus
+    return system.Device.get_device_count()
 
 
 def get_cuda_devices():