From 8f5bd4c55584097e8ff1f64c142eb5220472ba26 Mon Sep 17 00:00:00 2001 From: Luca Versari Date: Thu, 1 Jan 2026 13:07:02 +0100 Subject: [PATCH] Clarify documentation around "undefined" bits in intrinsics. --- crates/core_arch/src/x86/avx.rs | 18 +++++++++-- crates/core_arch/src/x86/avx512f.rs | 48 +++++++++++++++++++++++++---- crates/core_arch/src/x86/sse.rs | 2 +- 3 files changed, 58 insertions(+), 10 deletions(-) diff --git a/crates/core_arch/src/x86/avx.rs b/crates/core_arch/src/x86/avx.rs index 94f075894a..e0e01ae6d0 100644 --- a/crates/core_arch/src/x86/avx.rs +++ b/crates/core_arch/src/x86/avx.rs @@ -2946,7 +2946,11 @@ pub const fn _mm256_castsi256_si128(a: __m256i) -> __m128i { } /// Casts vector of type __m128 to type __m256; -/// the upper 128 bits of the result are undefined. +/// the upper 128 bits of the result are indeterminate. +/// +/// In the Intel documentation, the upper bits are declared to be "undefined". +/// This is not equivalent to [`mem::MaybeUninit`]; instead, these bits are non-deterministically +/// set to some valid value. In practice, this is typically equivalent to [`mem::zeroed`]. /// /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_castps128_ps256) #[inline] @@ -2960,7 +2964,11 @@ pub const fn _mm256_castps128_ps256(a: __m128) -> __m256 { } /// Casts vector of type __m128d to type __m256d; -/// the upper 128 bits of the result are undefined. +/// the upper 128 bits of the result are indeterminate. +/// +/// In the Intel documentation, the upper bits are declared to be "undefined". +/// This is not equivalent to [`mem::MaybeUninit`]; instead, these bits are non-deterministically +/// set to some valid value. In practice, this is typically equivalent to [`mem::zeroed`]. /// /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_castpd128_pd256) #[inline] @@ -2974,7 +2982,11 @@ pub const fn _mm256_castpd128_pd256(a: __m128d) -> __m256d { } /// Casts vector of type __m128i to type __m256i; -/// the upper 128 bits of the result are undefined. +/// the upper 128 bits of the result are indeterminate. +/// +/// In the Intel documentation, the upper bits are declared to be "undefined". +/// This is not equivalent to [`mem::MaybeUninit`]; instead, these bits are non-deterministically +/// set to some valid value. In practice, this is typically equivalent to [`mem::zeroed`]. /// /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_castsi128_si256) #[inline] diff --git a/crates/core_arch/src/x86/avx512f.rs b/crates/core_arch/src/x86/avx512f.rs index 5da633f50e..76b7539383 100644 --- a/crates/core_arch/src/x86/avx512f.rs +++ b/crates/core_arch/src/x86/avx512f.rs @@ -27728,7 +27728,13 @@ pub const fn _mm_maskz_unpacklo_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m12 } } -/// Cast vector of type __m128 to type __m512; the upper 384 bits of the result are undefined. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency. +/// Cast vector of type __m128 to type __m512; the upper 384 bits of the result are indeterminate. +/// +/// In the Intel documentation, the upper bits are declared to be "undefined". +/// This is not equivalent to [`mem::MaybeUninit`]; instead, these bits are non-deterministically +/// set to some valid value. In practice, this is typically equivalent to [`mem::zeroed`]. +/// +/// This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency. /// /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_castps128_ps512&expand=621) #[inline] @@ -27745,7 +27751,13 @@ pub const fn _mm512_castps128_ps512(a: __m128) -> __m512 { } } -/// Cast vector of type __m256 to type __m512; the upper 256 bits of the result are undefined. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency. +/// Cast vector of type __m256 to type __m512; the upper 256 bits of the result are indeterminate. +/// +/// In the Intel documentation, the upper bits are declared to be "undefined". +/// This is not equivalent to [`mem::MaybeUninit`]; instead, these bits are non-deterministically +/// set to some valid value. In practice, this is typically equivalent to [`mem::zeroed`]. +/// +/// This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency. /// /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_castps256_ps512&expand=623) #[inline] @@ -27840,7 +27852,13 @@ pub const fn _mm512_castps_si512(a: __m512) -> __m512i { unsafe { transmute(a) } } -/// Cast vector of type __m128d to type __m512d; the upper 384 bits of the result are undefined. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency. +/// Cast vector of type __m128d to type __m512d; the upper 384 bits of the result are indeterminate. +/// +/// In the Intel documentation, the upper bits are declared to be "undefined". +/// This is not equivalent to [`mem::MaybeUninit`]; instead, these bits are non-deterministically +/// set to some valid value. In practice, this is typically equivalent to [`mem::zeroed`]. +/// +/// This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency. /// /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_castpd128_pd512&expand=609) #[inline] @@ -27851,7 +27869,13 @@ pub const fn _mm512_castpd128_pd512(a: __m128d) -> __m512d { unsafe { simd_shuffle!(a, _mm_undefined_pd(), [0, 1, 2, 2, 2, 2, 2, 2]) } } -/// Cast vector of type __m256d to type __m512d; the upper 256 bits of the result are undefined. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency. +/// Cast vector of type __m256d to type __m512d; the upper 256 bits of the result are indeterminate. +/// +/// In the Intel documentation, the upper bits are declared to be "undefined". +/// This is not equivalent to [`mem::MaybeUninit`]; instead, these bits are non-deterministically +/// set to some valid value. In practice, this is typically equivalent to [`mem::zeroed`]. +/// +/// This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency. /// /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_castpd256_pd512&expand=611) #[inline] @@ -27928,7 +27952,13 @@ pub const fn _mm512_castpd_si512(a: __m512d) -> __m512i { unsafe { transmute(a) } } -/// Cast vector of type __m128i to type __m512i; the upper 384 bits of the result are undefined. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency. +/// Cast vector of type __m128i to type __m512i; the upper 384 bits of the result are indeterminate. +/// +/// In the Intel documentation, the upper bits are declared to be "undefined". +/// This is not equivalent to [`mem::MaybeUninit`]; instead, these bits are non-deterministically +/// set to some valid value. In practice, this is typically equivalent to [`mem::zeroed`]. +/// +/// This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency. /// /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_castsi128_si512&expand=629) #[inline] @@ -27939,7 +27969,13 @@ pub const fn _mm512_castsi128_si512(a: __m128i) -> __m512i { unsafe { simd_shuffle!(a, _mm_undefined_si128(), [0, 1, 2, 2, 2, 2, 2, 2]) } } -/// Cast vector of type __m256i to type __m512i; the upper 256 bits of the result are undefined. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency. +/// Cast vector of type __m256i to type __m512i; the upper 256 bits of the result are indeterminate. +/// +/// In the Intel documentation, the upper bits are declared to be "undefined". +/// This is not equivalent to [`mem::MaybeUninit`]; instead, these bits are non-deterministically +/// set to some valid value. In practice, this is typically equivalent to [`mem::zeroed`]. +/// +/// This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency. /// /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_castsi256_si512&expand=633) #[inline] diff --git a/crates/core_arch/src/x86/sse.rs b/crates/core_arch/src/x86/sse.rs index 55392f9944..751f969e50 100644 --- a/crates/core_arch/src/x86/sse.rs +++ b/crates/core_arch/src/x86/sse.rs @@ -1947,7 +1947,7 @@ pub fn _mm_prefetch(p: *const i8) { } } -/// Returns vector of type __m128 with indeterminate elements.with indetermination elements. +/// Returns vector of type __m128 with indeterminate elements. /// Despite using the word "undefined" (following Intel's naming scheme), this non-deterministically /// picks some valid value and is not equivalent to [`mem::MaybeUninit`]. /// In practice, this is typically equivalent to [`mem::zeroed`].