diff --git a/drivers/vfio/pci/nvgrace-gpu/main.c b/drivers/vfio/pci/nvgrace-gpu/main.c index 7a4b46d972fe1..53bbc5c0e222d 100644 --- a/drivers/vfio/pci/nvgrace-gpu/main.c +++ b/drivers/vfio/pci/nvgrace-gpu/main.c @@ -32,6 +32,9 @@ #define C2C_LINK_BAR0_OFFSET 0x1498 #define HBM_TRAINING_BAR0_OFFSET 0x200BC +#define HBM_TRAINING_BAR0_OFFSET_BLACKWELL_NEXT 0xAD00BC +#define BOOT_42_OFFSET 0xA00 +#define BOOT_42_ARCHITECTURE_BLACKWELL 0x0000001A #define STATUS_READY 0xFF #define POLL_QUANTUM_MS 1000 @@ -245,13 +248,25 @@ static void nvgrace_gpu_close_device(struct vfio_device *core_vdev) vfio_pci_core_close_device(core_vdev); } +static u32 hbm_training_offset(void __iomem *io) +{ + u32 reg = ioread32(io + BOOT_42_OFFSET); + u32 arch = FIELD_GET(GENMASK(29, 24), reg); + u32 offset = (arch > BOOT_42_ARCHITECTURE_BLACKWELL) ? + HBM_TRAINING_BAR0_OFFSET_BLACKWELL_NEXT : + HBM_TRAINING_BAR0_OFFSET; + + return offset; +} + static int nvgrace_gpu_wait_device_ready(void __iomem *io) { unsigned long timeout = jiffies + msecs_to_jiffies(POLL_TIMEOUT_MS); + u32 hbm_offset = hbm_training_offset(io); do { if ((ioread32(io + C2C_LINK_BAR0_OFFSET) == STATUS_READY) && - (ioread32(io + HBM_TRAINING_BAR0_OFFSET) == STATUS_READY)) + (ioread32(io + hbm_offset) == STATUS_READY)) return 0; msleep(POLL_QUANTUM_MS); } while (!time_after(jiffies, timeout));