From 99a84a10bae5279ae6fb2fb02806ce6a8d63c197 Mon Sep 17 00:00:00 2001 From: Leonard Hong Date: Sat, 18 Apr 2026 10:12:39 +0800 Subject: [PATCH] ggml-cuda: gate native ue4m3 conversion to sm_90+ Signed-off-by: Leonard Hong --- ggml/src/ggml-cuda/common.cuh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ggml/src/ggml-cuda/common.cuh b/ggml/src/ggml-cuda/common.cuh index ddf50baf49..d2edcdf3c7 100644 --- a/ggml/src/ggml-cuda/common.cuh +++ b/ggml/src/ggml-cuda/common.cuh @@ -813,7 +813,7 @@ static __device__ __forceinline__ float ggml_cuda_ue4m3_to_fp32(uint8_t x) { const __hip_fp8_e4m3_fnuz xf = *reinterpret_cast(&bits); return static_cast(xf) / 2; #else -#if defined(FP8_AVAILABLE) && !defined(GGML_USE_HIP) +#if defined(FP8_AVAILABLE) && !defined(GGML_USE_HIP) && defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 900 const uint32_t bits = x * (x != 0x7F && x != 0xFF); // Convert NaN to 0.0f to match CPU implementation. const __nv_fp8_e4m3 xf = *reinterpret_cast(&bits); return static_cast(xf) / 2;