From 4aa045bc41ae198959a7a60590f74989993b79b0 Mon Sep 17 00:00:00 2001
From: Evan Lezar <elezar@nvidia.com>
Date: Fri, 20 Mar 2026 23:31:29 +0100
Subject: [PATCH 1/2] feat(gpu): add WSL CDI spec watcher and set
 deviceIDStrategy to index

On WSL2 hosts the NVIDIA device plugin generates CDI specs that cannot
be used directly by k3s containerd since it includes a single device
name "all" and not one based on the index or UUID of the device.

Add a background watch_cdi_specs function to cluster-entrypoint.sh that:
- detects WSL2 via /dev/dxg presence
- handles specs already present at gateway restart
- uses inotifywait to watch for new/updated specs
- transforms the spec with jq (cdiVersion=0.5.0, devices[0].name="0")

Add inotify-tools and jq to the cluster image apt-get install block to
support the watcher.

Signed-off-by: Evan Lezar <elezar@nvidia.com>
---
 deploy/docker/Dockerfile.images               |  2 +
 deploy/docker/cluster-entrypoint.sh           | 44 +++++++++++++++++++
 .../nvidia-device-plugin-helmchart.yaml       |  6 +++
 3 files changed, 52 insertions(+)

diff --git a/deploy/docker/Dockerfile.images b/deploy/docker/Dockerfile.images
index 9cc50085..28f250e8 100644
--- a/deploy/docker/Dockerfile.images
+++ b/deploy/docker/Dockerfile.images
@@ -229,6 +229,8 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
     iptables \
     mount \
     dnsutils \
+    inotify-tools \
+    jq \
     && rm -rf /var/lib/apt/lists/*
 
 COPY --from=k3s /bin/ /bin/
diff --git a/deploy/docker/cluster-entrypoint.sh b/deploy/docker/cluster-entrypoint.sh
index 84b8cf9a..e18884ce 100644
--- a/deploy/docker/cluster-entrypoint.sh
+++ b/deploy/docker/cluster-entrypoint.sh
@@ -317,6 +317,45 @@ fi
 # the k3s manifests directory so the Helm controller installs it automatically.
 # The nvidia-container-runtime binary is already on PATH (baked into the image)
 # so k3s registers the "nvidia" RuntimeClass at startup.
+CDI_SPEC_DIR="/var/run/cdi"
+CDI_WSL_INPUT="${CDI_SPEC_DIR}/k8s.device-plugin.nvidia.com-gpu.json"
+CDI_WSL_OUTPUT="${CDI_SPEC_DIR}/openshell-wsl.json"
+
+transform_wsl_cdi_spec() {
+    local tmp="${CDI_WSL_OUTPUT}.tmp.$$"
+    if jq '.cdiVersion = "0.5.0" | .devices[0].name = "0"' \
+            "$CDI_WSL_INPUT" > "$tmp" 2>/dev/null; then
+        mv "$tmp" "$CDI_WSL_OUTPUT"
+        echo "CDI: transformed WSL spec -> $CDI_WSL_OUTPUT"
+    else
+        rm -f "$tmp"
+        echo "CDI: failed to transform WSL spec (jq error)"
+    fi
+}
+
+watch_cdi_specs() {
+    if ! command -v inotifywait > /dev/null 2>&1; then
+        echo "CDI: inotifywait not found, skipping spec watcher"
+        return 1
+    fi
+
+    mkdir -p "$CDI_SPEC_DIR"
+
+    # Process spec already present at startup (e.g. gateway restart)
+    if [ -f "$CDI_WSL_INPUT" ] && grep -q '/dev/dxg' "$CDI_WSL_INPUT" 2>/dev/null; then
+        transform_wsl_cdi_spec
+    fi
+
+    # Watch for the spec to appear or be updated
+    inotifywait -m -e close_write,moved_to --format '%f' "$CDI_SPEC_DIR" 2>/dev/null \
+    | while IFS= read -r filename; do
+        if [ "$filename" = "k8s.device-plugin.nvidia.com-gpu.json" ] \
+                && grep -q '/dev/dxg' "$CDI_WSL_INPUT" 2>/dev/null; then
+            transform_wsl_cdi_spec
+        fi
+    done
+}
+
 if [ "${GPU_ENABLED:-}" = "true" ]; then
     echo "GPU support enabled — deploying NVIDIA device plugin"
 
@@ -327,6 +366,11 @@ if [ "${GPU_ENABLED:-}" = "true" ]; then
             cp "$manifest" "$K3S_MANIFESTS/"
         done
     fi
+
+    if [ -c /dev/dxg ]; then
+        echo "WSL2 GPU detected (/dev/dxg present) — starting CDI spec watcher"
+        watch_cdi_specs &
+    fi
 fi
 
 # ---------------------------------------------------------------------------
diff --git a/deploy/kube/gpu-manifests/nvidia-device-plugin-helmchart.yaml b/deploy/kube/gpu-manifests/nvidia-device-plugin-helmchart.yaml
index 088562ac..497169d5 100644
--- a/deploy/kube/gpu-manifests/nvidia-device-plugin-helmchart.yaml
+++ b/deploy/kube/gpu-manifests/nvidia-device-plugin-helmchart.yaml
@@ -12,6 +12,11 @@
 # (which requires nvidia.com/gpu.present=true) is overridden to empty
 # so it schedules on any node without requiring NFD/GFD labels.
 #
+# The device plugin is set to deviceIDStrategy=index so that device names are 
+# numeric indices (e.g. "0"). This simplifies the conversion of CDI specs on WSL
+# systems, where we need to rename the *.nvidia.com/gpu=all device that is
+# generated by the device plugin to *.nvidia.com/gpu=0.
+#
 # k3s auto-detects nvidia-container-runtime on PATH and registers the "nvidia"
 # RuntimeClass automatically, so no manual RuntimeClass manifest is needed.
 
@@ -28,6 +33,7 @@ spec:
   createNamespace: true
   valuesContent: |-
     runtimeClassName: nvidia
+    deviceIDStrategy: index
     gfd:
       enabled: false
     nfd:

From 31ea52005c4be64e9978f45b49580fb9c20fe0c0 Mon Sep 17 00:00:00 2001
From: Evan Lezar <elezar@nvidia.com>
Date: Fri, 20 Mar 2026 23:31:58 +0100
Subject: [PATCH 2/2] docs(gateway): document WSL2 CDI spec watcher in
 gateway-single-node architecture

Signed-off-by: Evan Lezar <elezar@nvidia.com>
---
 architecture/gateway-single-node.md | 29 ++++++++++++++++++++++++++++-
 1 file changed, 28 insertions(+), 1 deletion(-)

diff --git a/architecture/gateway-single-node.md b/architecture/gateway-single-node.md
index 57aebd3a..bb051cd4 100644
--- a/architecture/gateway-single-node.md
+++ b/architecture/gateway-single-node.md
@@ -272,6 +272,28 @@ Writes `/etc/rancher/k3s/registries.yaml` from `REGISTRY_HOST`, `REGISTRY_ENDPOI
 
 Copies bundled manifests from `/opt/openshell/manifests/` to `/var/lib/rancher/k3s/server/manifests/`. This is needed because the volume mount on `/var/lib/rancher/k3s` overwrites any files baked into that path at image build time.
 
+### WSL2 CDI spec watcher
+
+On WSL2 hosts with GPU support, the NVIDIA device plugin generates CDI (Container Device Interface) specs that k3s/containerd cannot consume directly. Two incompatibilities exist: the `cdiVersion` field uses a version that containerd rejects, and the device is named `"all"` instead of the numeric index `"0"` that containerd expects. The entrypoint solves this with a background watcher that transforms the spec in real time.
+
+Three shell variables define the file paths:
+
+| Variable | Value |
+|---|---|
+| `CDI_SPEC_DIR` | `/var/run/cdi` |
+| `CDI_WSL_INPUT` | `/var/run/cdi/k8s.device-plugin.nvidia.com-gpu.json` (device plugin output) |
+| `CDI_WSL_OUTPUT` | `/var/run/cdi/openshell-wsl.json` (transformed spec for containerd) |
+
+`transform_wsl_cdi_spec()` uses `jq` to rewrite the input spec: it sets `cdiVersion` to `"0.5.0"` and renames `devices[0].name` from `"all"` to `"0"`. The write is atomic -- `jq` outputs to a PID-suffixed temp file, then `mv` replaces the output path.
+
+`watch_cdi_specs()` runs as a background process:
+
+1. Creates `CDI_SPEC_DIR` if missing.
+2. Checks for a spec already present at startup (handles gateway container restarts). If found and it references `/dev/dxg`, transforms it immediately.
+3. Enters a persistent `inotifywait` loop watching for `close_write` or `moved_to` events on the CDI spec directory. When the device plugin writes or moves a new spec matching the expected filename, and the spec references `/dev/dxg` (confirming WSL2 context), it triggers `transform_wsl_cdi_spec()`.
+
+The watcher only starts when both `GPU_ENABLED=true` and `/dev/dxg` exists (a character device present only on WSL2 hosts). It runs in the background (`watch_cdi_specs &`) before `exec k3s`.
+
 ### Image configuration overrides
 
 When environment variables are set, the entrypoint modifies the HelmChart manifest at `/var/lib/rancher/k3s/server/manifests/openshell-helmchart.yaml`:
@@ -299,7 +321,7 @@ GPU support is part of the single-node gateway bootstrap path rather than a sepa
 - `openshell gateway start --gpu` threads a boolean deploy option through `crates/openshell-cli`, `crates/openshell-bootstrap`, and `crates/openshell-bootstrap/src/docker.rs`.
 - When enabled, the cluster container is created with Docker `DeviceRequests`, which is the API equivalent of `docker run --gpus all`.
 - `deploy/docker/Dockerfile.images` installs NVIDIA Container Toolkit packages in a dedicated Ubuntu stage and copies the runtime binaries, config, and `libnvidia-container` shared libraries into the final Ubuntu-based cluster image.
-- `deploy/docker/cluster-entrypoint.sh` checks `GPU_ENABLED=true` and copies GPU-only manifests from `/opt/openshell/gpu-manifests/` into k3s's manifests directory.
+- `deploy/docker/cluster-entrypoint.sh` checks `GPU_ENABLED=true` and copies GPU-only manifests from `/opt/openshell/gpu-manifests/` into k3s's manifests directory. On WSL2 hosts (detected by `/dev/dxg`), the entrypoint also starts a background CDI spec watcher that transforms device plugin specs for k3s/containerd compatibility (see [WSL2 CDI spec watcher](#wsl2-cdi-spec-watcher) under Entrypoint Script).
 - `deploy/kube/gpu-manifests/nvidia-device-plugin-helmchart.yaml` installs the NVIDIA device plugin chart, currently pinned to `0.18.2`. NFD and GFD are disabled; the device plugin's default `nodeAffinity` (which requires `feature.node.kubernetes.io/pci-10de.present=true` or `nvidia.com/gpu.present=true` from NFD/GFD) is overridden to empty so the DaemonSet schedules on the single-node cluster without requiring those labels.
 - k3s auto-detects `nvidia-container-runtime` on `PATH`, registers the `nvidia` containerd runtime, and creates the `nvidia` `RuntimeClass` automatically.
 - The OpenShell Helm chart grants the gateway service account cluster-scoped read access to `node.k8s.io/runtimeclasses` and core `nodes` so GPU sandbox admission can verify both the `nvidia` `RuntimeClass` and allocatable GPU capacity before creating a sandbox.
@@ -316,6 +338,11 @@ Host GPU drivers & NVIDIA Container Toolkit
 
 The expected smoke test is a plain pod requesting `nvidia.com/gpu: 1` with `runtimeClassName: nvidia` and running `nvidia-smi`.
 
+### WSL2 GPU specifics
+
+On WSL2 hosts, the GPU is exposed through `/dev/dxg` rather than native NVIDIA device nodes. In the case where the NVIDIA device plugin is configured to use a CDI-base device list strategy, the generated CDI spec (`/var/run/cdi/k8s.device-plugin.nvidia.com-gpu.json`) needs to be transformed to list a device with name `"0"` instead of `"all"`. The cluster entrypoint runs a background `inotifywait`-based watcher that detects these specs and writes a corrected version to `/var/run/cdi/openshell-wsl.json`. See [WSL2 CDI spec watcher](#wsl2-cdi-spec-watcher) for implementation details.
+
+
 ## Remote Image Transfer
 
 ```mermaid