From f0fd6f78d48064972a5b7e73f219e0f99661b76e Mon Sep 17 00:00:00 2001
From: Folkert de Vries <folkert@folkertdev.nl>
Date: Wed, 18 Feb 2026 21:28:03 +0100
Subject: [PATCH 01/21] cleanup long shuffle mask literals

---
 .../src/arm_shared/neon/generated.rs          | 240 +++---------------
 .../spec/neon/arm_shared.spec.yml             |  72 +++---
 2 files changed, 72 insertions(+), 240 deletions(-)
diff --git a/library/stdarch/crates/core_arch/src/arm_shared/neon/generated.rs b/library/stdarch/crates/core_arch/src/arm_shared/neon/generated.rs
index 45c83b880e907..13dee7a6e6e0e 100644
--- a/library/stdarch/crates/core_arch/src/arm_shared/neon/generated.rs
+++ b/library/stdarch/crates/core_arch/src/arm_shared/neon/generated.rs
@@ -10149,7 +10149,7 @@ pub fn vdotq_u32(a: uint32x4_t, b: uint8x16_t, c: uint8x16_t) -> uint32x4_t {
 #[cfg(not(target_arch = "arm64ec"))]
 pub fn vdup_lane_f16<const N: i32>(a: float16x4_t) -> float16x4_t {
     static_assert_uimm_bits!(N, 2);
-    unsafe { simd_shuffle!(a, a, [N as u32, N as u32, N as u32, N as u32]) }
+    unsafe { simd_shuffle!(a, a, [N as u32; 4]) }
 }
 #[doc = "Set all vector lanes to the same value"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_lane_f16)"]
@@ -10174,13 +10174,7 @@ pub fn vdup_lane_f16<const N: i32>(a: float16x4_t) -> float16x4_t {
 #[cfg(not(target_arch = "arm64ec"))]
 pub fn vdupq_lane_f16<const N: i32>(a: float16x4_t) -> float16x8_t {
     static_assert_uimm_bits!(N, 2);
-    unsafe {
-        simd_shuffle!(
-            a,
-            a,
-            [N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32]
-        )
-    }
+    unsafe { simd_shuffle!(a, a, [N as u32; 8]) }
 }
 #[doc = "Set all vector lanes to the same value"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_lane_f32)"]
@@ -10341,7 +10335,7 @@ pub fn vdupq_lane_u32<const N: i32>(a: uint32x2_t) -> uint32x4_t {
 )]
 pub fn vdup_lane_p16<const N: i32>(a: poly16x4_t) -> poly16x4_t {
     static_assert_uimm_bits!(N, 2);
-    unsafe { simd_shuffle!(a, a, [N as u32, N as u32, N as u32, N as u32]) }
+    unsafe { simd_shuffle!(a, a, [N as u32; 4]) }
 }
 #[doc = "Set all vector lanes to the same value"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_lane_s16)"]
@@ -10364,7 +10358,7 @@ pub fn vdup_lane_p16<const N: i32>(a: poly16x4_t) -> poly16x4_t {
 )]
 pub fn vdup_lane_s16<const N: i32>(a: int16x4_t) -> int16x4_t {
     static_assert_uimm_bits!(N, 2);
-    unsafe { simd_shuffle!(a, a, [N as u32, N as u32, N as u32, N as u32]) }
+    unsafe { simd_shuffle!(a, a, [N as u32; 4]) }
 }
 #[doc = "Set all vector lanes to the same value"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_lane_u16)"]
@@ -10387,7 +10381,7 @@ pub fn vdup_lane_s16<const N: i32>(a: int16x4_t) -> int16x4_t {
 )]
 pub fn vdup_lane_u16<const N: i32>(a: uint16x4_t) -> uint16x4_t {
     static_assert_uimm_bits!(N, 2);
-    unsafe { simd_shuffle!(a, a, [N as u32, N as u32, N as u32, N as u32]) }
+    unsafe { simd_shuffle!(a, a, [N as u32; 4]) }
 }
 #[doc = "Set all vector lanes to the same value"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_lane_p16)"]
@@ -10410,13 +10404,7 @@ pub fn vdup_lane_u16<const N: i32>(a: uint16x4_t) -> uint16x4_t {
 )]
 pub fn vdupq_lane_p16<const N: i32>(a: poly16x4_t) -> poly16x8_t {
     static_assert_uimm_bits!(N, 2);
-    unsafe {
-        simd_shuffle!(
-            a,
-            a,
-            [N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32]
-        )
-    }
+    unsafe { simd_shuffle!(a, a, [N as u32; 8]) }
 }
 #[doc = "Set all vector lanes to the same value"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_lane_s16)"]
@@ -10439,13 +10427,7 @@ pub fn vdupq_lane_p16<const N: i32>(a: poly16x4_t) -> poly16x8_t {
 )]
 pub fn vdupq_lane_s16<const N: i32>(a: int16x4_t) -> int16x8_t {
     static_assert_uimm_bits!(N, 2);
-    unsafe {
-        simd_shuffle!(
-            a,
-            a,
-            [N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32]
-        )
-    }
+    unsafe { simd_shuffle!(a, a, [N as u32; 8]) }
 }
 #[doc = "Set all vector lanes to the same value"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_lane_u16)"]
@@ -10468,13 +10450,7 @@ pub fn vdupq_lane_s16<const N: i32>(a: int16x4_t) -> int16x8_t {
 )]
 pub fn vdupq_lane_u16<const N: i32>(a: uint16x4_t) -> uint16x8_t {
     static_assert_uimm_bits!(N, 2);
-    unsafe {
-        simd_shuffle!(
-            a,
-            a,
-            [N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32]
-        )
-    }
+    unsafe { simd_shuffle!(a, a, [N as u32; 8]) }
 }
 #[doc = "Set all vector lanes to the same value"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_lane_p8)"]
@@ -10497,13 +10473,7 @@ pub fn vdupq_lane_u16<const N: i32>(a: uint16x4_t) -> uint16x8_t {
 )]
 pub fn vdup_lane_p8<const N: i32>(a: poly8x8_t) -> poly8x8_t {
     static_assert_uimm_bits!(N, 3);
-    unsafe {
-        simd_shuffle!(
-            a,
-            a,
-            [N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32]
-        )
-    }
+    unsafe { simd_shuffle!(a, a, [N as u32; 8]) }
 }
 #[doc = "Set all vector lanes to the same value"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_lane_s8)"]
@@ -10526,13 +10496,7 @@ pub fn vdup_lane_p8<const N: i32>(a: poly8x8_t) -> poly8x8_t {
 )]
 pub fn vdup_lane_s8<const N: i32>(a: int8x8_t) -> int8x8_t {
     static_assert_uimm_bits!(N, 3);
-    unsafe {
-        simd_shuffle!(
-            a,
-            a,
-            [N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32]
-        )
-    }
+    unsafe { simd_shuffle!(a, a, [N as u32; 8]) }
 }
 #[doc = "Set all vector lanes to the same value"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_lane_u8)"]
@@ -10555,13 +10519,7 @@ pub fn vdup_lane_s8<const N: i32>(a: int8x8_t) -> int8x8_t {
 )]
 pub fn vdup_lane_u8<const N: i32>(a: uint8x8_t) -> uint8x8_t {
     static_assert_uimm_bits!(N, 3);
-    unsafe {
-        simd_shuffle!(
-            a,
-            a,
-            [N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32]
-        )
-    }
+    unsafe { simd_shuffle!(a, a, [N as u32; 8]) }
 }
 #[doc = "Set all vector lanes to the same value"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_lane_p8)"]
@@ -10584,16 +10542,7 @@ pub fn vdup_lane_u8<const N: i32>(a: uint8x8_t) -> uint8x8_t {
 )]
 pub fn vdupq_lane_p8<const N: i32>(a: poly8x8_t) -> poly8x16_t {
     static_assert_uimm_bits!(N, 3);
-    unsafe {
-        simd_shuffle!(
-            a,
-            a,
-            [
-                N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32,
-                N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32
-            ]
-        )
-    }
+    unsafe { simd_shuffle!(a, a, [N as u32; 16]) }
 }
 #[doc = "Set all vector lanes to the same value"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_lane_s8)"]
@@ -10616,16 +10565,7 @@ pub fn vdupq_lane_p8<const N: i32>(a: poly8x8_t) -> poly8x16_t {
 )]
 pub fn vdupq_lane_s8<const N: i32>(a: int8x8_t) -> int8x16_t {
     static_assert_uimm_bits!(N, 3);
-    unsafe {
-        simd_shuffle!(
-            a,
-            a,
-            [
-                N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32,
-                N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32
-            ]
-        )
-    }
+    unsafe { simd_shuffle!(a, a, [N as u32; 16]) }
 }
 #[doc = "Set all vector lanes to the same value"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_lane_u8)"]
@@ -10648,16 +10588,7 @@ pub fn vdupq_lane_s8<const N: i32>(a: int8x8_t) -> int8x16_t {
 )]
 pub fn vdupq_lane_u8<const N: i32>(a: uint8x8_t) -> uint8x16_t {
     static_assert_uimm_bits!(N, 3);
-    unsafe {
-        simd_shuffle!(
-            a,
-            a,
-            [
-                N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32,
-                N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32
-            ]
-        )
-    }
+    unsafe { simd_shuffle!(a, a, [N as u32; 16]) }
 }
 #[doc = "Set all vector lanes to the same value"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_lane_s64)"]
@@ -10728,7 +10659,7 @@ pub fn vdup_lane_u64<const N: i32>(a: uint64x1_t) -> uint64x1_t {
 #[cfg(not(target_arch = "arm64ec"))]
 pub fn vdup_laneq_f16<const N: i32>(a: float16x8_t) -> float16x4_t {
     static_assert_uimm_bits!(N, 3);
-    unsafe { simd_shuffle!(a, a, [N as u32, N as u32, N as u32, N as u32]) }
+    unsafe { simd_shuffle!(a, a, [N as u32; 4]) }
 }
 #[doc = "Set all vector lanes to the same value"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_laneq_f16)"]
@@ -10753,13 +10684,7 @@ pub fn vdup_laneq_f16<const N: i32>(a: float16x8_t) -> float16x4_t {
 #[cfg(not(target_arch = "arm64ec"))]
 pub fn vdupq_laneq_f16<const N: i32>(a: float16x8_t) -> float16x8_t {
     static_assert_uimm_bits!(N, 3);
-    unsafe {
-        simd_shuffle!(
-            a,
-            a,
-            [N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32]
-        )
-    }
+    unsafe { simd_shuffle!(a, a, [N as u32; 8]) }
 }
 #[doc = "Set all vector lanes to the same value"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_laneq_f32)"]
@@ -10920,7 +10845,7 @@ pub fn vdupq_laneq_u32<const N: i32>(a: uint32x4_t) -> uint32x4_t {
 )]
 pub fn vdup_laneq_p16<const N: i32>(a: poly16x8_t) -> poly16x4_t {
     static_assert_uimm_bits!(N, 3);
-    unsafe { simd_shuffle!(a, a, [N as u32, N as u32, N as u32, N as u32]) }
+    unsafe { simd_shuffle!(a, a, [N as u32; 4]) }
 }
 #[doc = "Set all vector lanes to the same value"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_laneq_s16)"]
@@ -10943,7 +10868,7 @@ pub fn vdup_laneq_p16<const N: i32>(a: poly16x8_t) -> poly16x4_t {
 )]
 pub fn vdup_laneq_s16<const N: i32>(a: int16x8_t) -> int16x4_t {
     static_assert_uimm_bits!(N, 3);
-    unsafe { simd_shuffle!(a, a, [N as u32, N as u32, N as u32, N as u32]) }
+    unsafe { simd_shuffle!(a, a, [N as u32; 4]) }
 }
 #[doc = "Set all vector lanes to the same value"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_laneq_u16)"]
@@ -10966,7 +10891,7 @@ pub fn vdup_laneq_s16<const N: i32>(a: int16x8_t) -> int16x4_t {
 )]
 pub fn vdup_laneq_u16<const N: i32>(a: uint16x8_t) -> uint16x4_t {
     static_assert_uimm_bits!(N, 3);
-    unsafe { simd_shuffle!(a, a, [N as u32, N as u32, N as u32, N as u32]) }
+    unsafe { simd_shuffle!(a, a, [N as u32; 4]) }
 }
 #[doc = "Set all vector lanes to the same value"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_laneq_p16)"]
@@ -10989,13 +10914,7 @@ pub fn vdup_laneq_u16<const N: i32>(a: uint16x8_t) -> uint16x4_t {
 )]
 pub fn vdupq_laneq_p16<const N: i32>(a: poly16x8_t) -> poly16x8_t {
     static_assert_uimm_bits!(N, 3);
-    unsafe {
-        simd_shuffle!(
-            a,
-            a,
-            [N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32]
-        )
-    }
+    unsafe { simd_shuffle!(a, a, [N as u32; 8]) }
 }
 #[doc = "Set all vector lanes to the same value"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_laneq_s16)"]
@@ -11018,13 +10937,7 @@ pub fn vdupq_laneq_p16<const N: i32>(a: poly16x8_t) -> poly16x8_t {
 )]
 pub fn vdupq_laneq_s16<const N: i32>(a: int16x8_t) -> int16x8_t {
     static_assert_uimm_bits!(N, 3);
-    unsafe {
-        simd_shuffle!(
-            a,
-            a,
-            [N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32]
-        )
-    }
+    unsafe { simd_shuffle!(a, a, [N as u32; 8]) }
 }
 #[doc = "Set all vector lanes to the same value"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_laneq_u16)"]
@@ -11047,13 +10960,7 @@ pub fn vdupq_laneq_s16<const N: i32>(a: int16x8_t) -> int16x8_t {
 )]
 pub fn vdupq_laneq_u16<const N: i32>(a: uint16x8_t) -> uint16x8_t {
     static_assert_uimm_bits!(N, 3);
-    unsafe {
-        simd_shuffle!(
-            a,
-            a,
-            [N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32]
-        )
-    }
+    unsafe { simd_shuffle!(a, a, [N as u32; 8]) }
 }
 #[doc = "Set all vector lanes to the same value"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_laneq_p8)"]
@@ -11076,13 +10983,7 @@ pub fn vdupq_laneq_u16<const N: i32>(a: uint16x8_t) -> uint16x8_t {
 )]
 pub fn vdup_laneq_p8<const N: i32>(a: poly8x16_t) -> poly8x8_t {
     static_assert_uimm_bits!(N, 4);
-    unsafe {
-        simd_shuffle!(
-            a,
-            a,
-            [N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32]
-        )
-    }
+    unsafe { simd_shuffle!(a, a, [N as u32; 8]) }
 }
 #[doc = "Set all vector lanes to the same value"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_laneq_s8)"]
@@ -11105,13 +11006,7 @@ pub fn vdup_laneq_p8<const N: i32>(a: poly8x16_t) -> poly8x8_t {
 )]
 pub fn vdup_laneq_s8<const N: i32>(a: int8x16_t) -> int8x8_t {
     static_assert_uimm_bits!(N, 4);
-    unsafe {
-        simd_shuffle!(
-            a,
-            a,
-            [N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32]
-        )
-    }
+    unsafe { simd_shuffle!(a, a, [N as u32; 8]) }
 }
 #[doc = "Set all vector lanes to the same value"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_laneq_u8)"]
@@ -11134,13 +11029,7 @@ pub fn vdup_laneq_s8<const N: i32>(a: int8x16_t) -> int8x8_t {
 )]
 pub fn vdup_laneq_u8<const N: i32>(a: uint8x16_t) -> uint8x8_t {
     static_assert_uimm_bits!(N, 4);
-    unsafe {
-        simd_shuffle!(
-            a,
-            a,
-            [N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32]
-        )
-    }
+    unsafe { simd_shuffle!(a, a, [N as u32; 8]) }
 }
 #[doc = "Set all vector lanes to the same value"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_laneq_p8)"]
@@ -11163,16 +11052,7 @@ pub fn vdup_laneq_u8<const N: i32>(a: uint8x16_t) -> uint8x8_t {
 )]
 pub fn vdupq_laneq_p8<const N: i32>(a: poly8x16_t) -> poly8x16_t {
     static_assert_uimm_bits!(N, 4);
-    unsafe {
-        simd_shuffle!(
-            a,
-            a,
-            [
-                N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32,
-                N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32
-            ]
-        )
-    }
+    unsafe { simd_shuffle!(a, a, [N as u32; 16]) }
 }
 #[doc = "Set all vector lanes to the same value"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_laneq_s8)"]
@@ -11195,16 +11075,7 @@ pub fn vdupq_laneq_p8<const N: i32>(a: poly8x16_t) -> poly8x16_t {
 )]
 pub fn vdupq_laneq_s8<const N: i32>(a: int8x16_t) -> int8x16_t {
     static_assert_uimm_bits!(N, 4);
-    unsafe {
-        simd_shuffle!(
-            a,
-            a,
-            [
-                N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32,
-                N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32
-            ]
-        )
-    }
+    unsafe { simd_shuffle!(a, a, [N as u32; 16]) }
 }
 #[doc = "Set all vector lanes to the same value"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_laneq_u8)"]
@@ -11227,16 +11098,7 @@ pub fn vdupq_laneq_s8<const N: i32>(a: int8x16_t) -> int8x16_t {
 )]
 pub fn vdupq_laneq_u8<const N: i32>(a: uint8x16_t) -> uint8x16_t {
     static_assert_uimm_bits!(N, 4);
-    unsafe {
-        simd_shuffle!(
-            a,
-            a,
-            [
-                N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32,
-                N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32
-            ]
-        )
-    }
+    unsafe { simd_shuffle!(a, a, [N as u32; 16]) }
 }
 #[doc = "Set all vector lanes to the same value"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_laneq_s64)"]
@@ -35894,7 +35756,7 @@ pub fn vqdmulhq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
 pub fn vqdmull_lane_s16<const N: i32>(a: int16x4_t, b: int16x4_t) -> int32x4_t {
     static_assert_uimm_bits!(N, 2);
     unsafe {
-        let b: int16x4_t = simd_shuffle!(b, b, [N as u32, N as u32, N as u32, N as u32]);
+        let b: int16x4_t = simd_shuffle!(b, b, [N as u32; 4]);
         vqdmull_s16(a, b)
     }
 }
@@ -35920,7 +35782,7 @@ pub fn vqdmull_lane_s16<const N: i32>(a: int16x4_t, b: int16x4_t) -> int32x4_t {
 pub fn vqdmull_lane_s32<const N: i32>(a: int32x2_t, b: int32x2_t) -> int64x2_t {
     static_assert_uimm_bits!(N, 1);
     unsafe {
-        let b: int32x2_t = simd_shuffle!(b, b, [N as u32, N as u32]);
+        let b: int32x2_t = simd_shuffle!(b, b, [N as u32; 2]);
         vqdmull_s32(a, b)
     }
 }
@@ -37480,17 +37342,7 @@ pub fn vqrshrn_n_u16<const N: i32>(a: uint16x8_t) -> uint8x8_t {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftnu.v8i8")]
         fn _vqrshrn_n_u16(a: uint16x8_t, n: uint16x8_t) -> uint8x8_t;
     }
-    unsafe {
-        _vqrshrn_n_u16(
-            a,
-            const {
-                uint16x8_t([
-                    -N as u16, -N as u16, -N as u16, -N as u16, -N as u16, -N as u16, -N as u16,
-                    -N as u16,
-                ])
-            },
-        )
-    }
+    unsafe { _vqrshrn_n_u16(a, const { uint16x8_t([-N as u16; 8]) }) }
 }
 #[doc = "Unsigned signed saturating rounded shift right narrow"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrn_n_u32)"]
@@ -37506,12 +37358,7 @@ pub fn vqrshrn_n_u32<const N: i32>(a: uint32x4_t) -> uint16x4_t {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftnu.v4i16")]
         fn _vqrshrn_n_u32(a: uint32x4_t, n: uint32x4_t) -> uint16x4_t;
     }
-    unsafe {
-        _vqrshrn_n_u32(
-            a,
-            const { uint32x4_t([-N as u32, -N as u32, -N as u32, -N as u32]) },
-        )
-    }
+    unsafe { _vqrshrn_n_u32(a, const { uint32x4_t([-N as u32; 4]) }) }
 }
 #[doc = "Unsigned signed saturating rounded shift right narrow"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrn_n_u64)"]
@@ -37527,7 +37374,7 @@ pub fn vqrshrn_n_u64<const N: i32>(a: uint64x2_t) -> uint32x2_t {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftnu.v2i32")]
         fn _vqrshrn_n_u64(a: uint64x2_t, n: uint64x2_t) -> uint32x2_t;
     }
-    unsafe { _vqrshrn_n_u64(a, const { uint64x2_t([-N as u64, -N as u64]) }) }
+    unsafe { _vqrshrn_n_u64(a, const { uint64x2_t([-N as u64; 2]) }) }
 }
 #[doc = "Unsigned signed saturating rounded shift right narrow"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrn_n_u16)"]
@@ -38922,17 +38769,7 @@ pub fn vqshrn_n_u16<const N: i32>(a: uint16x8_t) -> uint8x8_t {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftnu.v8i8")]
         fn _vqshrn_n_u16(a: uint16x8_t, n: uint16x8_t) -> uint8x8_t;
     }
-    unsafe {
-        _vqshrn_n_u16(
-            a,
-            const {
-                uint16x8_t([
-                    -N as u16, -N as u16, -N as u16, -N as u16, -N as u16, -N as u16, -N as u16,
-                    -N as u16,
-                ])
-            },
-        )
-    }
+    unsafe { _vqshrn_n_u16(a, const { uint16x8_t([-N as u16; 8]) }) }
 }
 #[doc = "Unsigned saturating shift right narrow"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrn_n_u32)"]
@@ -38948,12 +38785,7 @@ pub fn vqshrn_n_u32<const N: i32>(a: uint32x4_t) -> uint16x4_t {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftnu.v4i16")]
         fn _vqshrn_n_u32(a: uint32x4_t, n: uint32x4_t) -> uint16x4_t;
     }
-    unsafe {
-        _vqshrn_n_u32(
-            a,
-            const { uint32x4_t([-N as u32, -N as u32, -N as u32, -N as u32]) },
-        )
-    }
+    unsafe { _vqshrn_n_u32(a, const { uint32x4_t([-N as u32; 4]) }) }
 }
 #[doc = "Unsigned saturating shift right narrow"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrn_n_u64)"]
@@ -38969,7 +38801,7 @@ pub fn vqshrn_n_u64<const N: i32>(a: uint64x2_t) -> uint32x2_t {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftnu.v2i32")]
         fn _vqshrn_n_u64(a: uint64x2_t, n: uint64x2_t) -> uint32x2_t;
     }
-    unsafe { _vqshrn_n_u64(a, const { uint64x2_t([-N as u64, -N as u64]) }) }
+    unsafe { _vqshrn_n_u64(a, const { uint64x2_t([-N as u64; 2]) }) }
 }
 #[doc = "Unsigned saturating shift right narrow"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrn_n_u16)"]
diff --git a/library/stdarch/crates/stdarch-gen-arm/spec/neon/arm_shared.spec.yml b/library/stdarch/crates/stdarch-gen-arm/spec/neon/arm_shared.spec.yml
index 8e10fff984ac7..90cd0c80a1c18 100644
--- a/library/stdarch/crates/stdarch-gen-arm/spec/neon/arm_shared.spec.yml
+++ b/library/stdarch/crates/stdarch-gen-arm/spec/neon/arm_shared.spec.yml
@@ -1439,12 +1439,12 @@ intrinsics:
     static_defs: ['const N: i32']
     safety: safe
     types:
-      - [_lane_s8, int8x8_t, int8x8_t, '3', '[N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32]']
-      - [q_lane_s8, int8x8_t, int8x16_t, '3', '[N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32]']
-      - [_lane_u8, uint8x8_t, uint8x8_t, '3', '[N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32]']
-      - [q_lane_u8, uint8x8_t, uint8x16_t, '3', '[N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32]']
-      - [_lane_p8, poly8x8_t, poly8x8_t, '3', '[N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32]']
-      - [q_lane_p8, poly8x8_t, poly8x16_t, '3', '[N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32]']
+      - [_lane_s8, int8x8_t, int8x8_t, '3', '[N as u32; 8]']
+      - [q_lane_s8, int8x8_t, int8x16_t, '3', '[N as u32; 16]']
+      - [_lane_u8, uint8x8_t, uint8x8_t, '3', '[N as u32; 8]']
+      - [q_lane_u8, uint8x8_t, uint8x16_t, '3', '[N as u32; 16]']
+      - [_lane_p8, poly8x8_t, poly8x8_t, '3', '[N as u32; 8]']
+      - [q_lane_p8, poly8x8_t, poly8x16_t, '3', '[N as u32; 16]']
     compose:
       - FnCall: [static_assert_uimm_bits!, [N, "{type[3]}"]]
       - FnCall: [simd_shuffle!, [a, a, "{type[4]}"]]
@@ -1463,12 +1463,12 @@ intrinsics:
     static_defs: ['const N: i32']
     safety: safe
     types:
-      - [q_laneq_s8, int8x16_t, int8x16_t, '4', '[N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32]']
-      - [_laneq_s8, int8x16_t, int8x8_t, '4', '[N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32]']
-      - [q_laneq_u8, uint8x16_t, uint8x16_t, '4', '[N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32]']
-      - [_laneq_u8, uint8x16_t, uint8x8_t, '4', '[N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32]']
-      - [q_laneq_p8, poly8x16_t, poly8x16_t, '4', '[N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32]']
-      - [_laneq_p8, poly8x16_t, poly8x8_t, '4', '[N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32]']
+      - [q_laneq_s8, int8x16_t, int8x16_t, '4', '[N as u32; 16]']
+      - [_laneq_s8, int8x16_t, int8x8_t, '4', '[N as u32; 8]']
+      - [q_laneq_u8, uint8x16_t, uint8x16_t, '4', '[N as u32; 16]']
+      - [_laneq_u8, uint8x16_t, uint8x8_t, '4', '[N as u32; 8]']
+      - [q_laneq_p8, poly8x16_t, poly8x16_t, '4', '[N as u32; 16]']
+      - [_laneq_p8, poly8x16_t, poly8x8_t, '4', '[N as u32; 8]']
     compose:
       - FnCall: [static_assert_uimm_bits!, [N, "{type[3]}"]]
       - FnCall: [simd_shuffle!, [a, a, "{type[4]}"]]
@@ -1487,12 +1487,12 @@ intrinsics:
     static_defs: ['const N: i32']
     safety: safe
     types:
-      - [_lane_s16, int16x4_t, int16x4_t, '2', '[N as u32, N as u32, N as u32, N as u32]']
-      - [q_lane_s16, int16x4_t, int16x8_t, '2', '[N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32]']
-      - [_lane_u16, uint16x4_t, uint16x4_t, '2', '[N as u32, N as u32, N as u32, N as u32]']
-      - [q_lane_u16, uint16x4_t, uint16x8_t, '2', '[N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32]']
-      - [_lane_p16, poly16x4_t, poly16x4_t, '2', '[N as u32, N as u32, N as u32, N as u32]']
-      - [q_lane_p16, poly16x4_t, poly16x8_t, '2', '[N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32]']
+      - [_lane_s16, int16x4_t, int16x4_t, '2', '[N as u32; 4]']
+      - [q_lane_s16, int16x4_t, int16x8_t, '2', '[N as u32; 8]']
+      - [_lane_u16, uint16x4_t, uint16x4_t, '2', '[N as u32; 4]']
+      - [q_lane_u16, uint16x4_t, uint16x8_t, '2', '[N as u32; 8]']
+      - [_lane_p16, poly16x4_t, poly16x4_t, '2', '[N as u32; 4]']
+      - [q_lane_p16, poly16x4_t, poly16x8_t, '2', '[N as u32; 8]']
     compose:
       - FnCall: [static_assert_uimm_bits!, [N, "{type[3]}"]]
       - FnCall: [simd_shuffle!, [a, a, "{type[4]}"]]
@@ -1511,12 +1511,12 @@ intrinsics:
     static_defs: ['const N: i32']
     safety: safe
     types:
-      - [q_laneq_s16, int16x8_t, int16x8_t, '3', '[N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32]']
-      - [_laneq_s16, int16x8_t, int16x4_t, '3', '[N as u32, N as u32, N as u32, N as u32]']
-      - [q_laneq_u16, uint16x8_t, uint16x8_t, '3', '[N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32]']
-      - [_laneq_u16, uint16x8_t, uint16x4_t, '3', '[N as u32, N as u32, N as u32, N as u32]']
-      - [q_laneq_p16, poly16x8_t, poly16x8_t, '3', '[N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32]']
-      - [_laneq_p16, poly16x8_t, poly16x4_t, '3', '[N as u32, N as u32, N as u32, N as u32]']
+      - [q_laneq_s16, int16x8_t, int16x8_t, '3', '[N as u32; 8]']
+      - [_laneq_s16, int16x8_t, int16x4_t, '3', '[N as u32; 4]']
+      - [q_laneq_u16, uint16x8_t, uint16x8_t, '3', '[N as u32; 8]']
+      - [_laneq_u16, uint16x8_t, uint16x4_t, '3', '[N as u32; 4]']
+      - [q_laneq_p16, poly16x8_t, poly16x8_t, '3', '[N as u32; 8]']
+      - [_laneq_p16, poly16x8_t, poly16x4_t, '3', '[N as u32; 4]']
     compose:
       - FnCall: [static_assert_uimm_bits!, [N, "{type[3]}"]]
       - FnCall: [simd_shuffle!, [a, a, "{type[4]}"]]
@@ -1538,8 +1538,8 @@ intrinsics:
     static_defs: ['const N: i32']
     safety: safe
     types:
-      - [q_laneq_f16, float16x8_t, float16x8_t, '3', '[N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32]']
-      - [_laneq_f16, float16x8_t, float16x4_t, '3', '[N as u32, N as u32, N as u32, N as u32]']
+      - [q_laneq_f16, float16x8_t, float16x8_t, '3', '[N as u32; 8]']
+      - [_laneq_f16, float16x8_t, float16x4_t, '3', '[N as u32; 4]']
     compose:
       - FnCall: [static_assert_uimm_bits!, [N, "{type[3]}"]]
       - FnCall: [simd_shuffle!, [a, a, "{type[4]}"]]
@@ -1578,8 +1578,8 @@ intrinsics:
     static_defs: ['const N: i32']
     safety: safe
     types:
-      - [_lane_f16, float16x4_t, float16x4_t, '2', '[N as u32, N as u32, N as u32, N as u32]']
-      - [q_lane_f16, float16x4_t, float16x8_t, '2', '[N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32]']
+      - [_lane_f16, float16x4_t, float16x4_t, '2', '[N as u32; 4]']
+      - [q_lane_f16, float16x4_t, float16x8_t, '2', '[N as u32; 8]']
     compose:
       - FnCall: [static_assert_uimm_bits!, [N, "{type[3]}"]]
       - FnCall: [simd_shuffle!, [a, a, "{type[4]}"]]
@@ -7675,7 +7675,7 @@ intrinsics:
     static_defs: ['const N: i32']
     safety: safe
     types:
-      - [int16x4_t, int16x4_t, int32x4_t, '[N as u32, N as u32, N as u32, N as u32]']
+      - [int16x4_t, int16x4_t, int32x4_t, '[N as u32; 4]']
     compose:
       - FnCall: [static_assert_uimm_bits!, [N, '2']]
       - Let: [b, "{neon_type[0]}", {FnCall: [simd_shuffle!, [b, b, "{type[3]}"]]}]
@@ -7695,7 +7695,7 @@ intrinsics:
     static_defs: ['const N: i32']
     safety: safe
     types:
-      - [int32x2_t, int32x2_t, int64x2_t, '[N as u32, N as u32]']
+      - [int32x2_t, int32x2_t, int64x2_t, '[N as u32; 2]']
     compose:
       - FnCall: [static_assert_uimm_bits!, [N, '1']]
       - Let: [b, "{neon_type[0]}", {FnCall: [simd_shuffle!, [b, b, "{type[3]}"]]}]
@@ -8320,9 +8320,9 @@ intrinsics:
     static_defs: ['const N: i32']
     safety: safe
     types:
-      - [uint16x8_t, uint8x8_t, 'N >= 1 && N <= 8', 'const { uint16x8_t([-N as u16, -N as u16, -N as u16, -N as u16, -N as u16, -N as u16, -N as u16, -N as u16]) }']
-      - [uint32x4_t, uint16x4_t, 'N >= 1 && N <= 16', 'const { uint32x4_t([-N as u32, -N as u32, -N as u32, -N as u32]) }']
-      - [uint64x2_t, uint32x2_t, 'N >= 1 && N <= 32', 'const { uint64x2_t([-N as u64, -N as u64]) }']
+      - [uint16x8_t, uint8x8_t, 'N >= 1 && N <= 8', 'const { uint16x8_t([-N as u16; 8]) }']
+      - [uint32x4_t, uint16x4_t, 'N >= 1 && N <= 16', 'const { uint32x4_t([-N as u32; 4]) }']
+      - [uint64x2_t, uint32x2_t, 'N >= 1 && N <= 32', 'const { uint64x2_t([-N as u64; 2]) }']
     compose:
       - FnCall: [static_assert!, ["{type[2]}"]]
       - LLVMLink:
@@ -10789,9 +10789,9 @@ intrinsics:
     static_defs: ['const N: i32']
     safety: safe
     types:
-      - [uint16x8_t, uint8x8_t, '8', 'const { uint16x8_t([-N as u16, -N as u16, -N as u16, -N as u16, -N as u16, -N as u16, -N as u16, -N as u16]) }']
-      - [uint32x4_t, uint16x4_t, '16', 'const { uint32x4_t([-N as u32, -N as u32, -N as u32, -N as u32]) }']
-      - [uint64x2_t, uint32x2_t, '32', 'const { uint64x2_t([-N as u64, -N as u64]) }']
+      - [uint16x8_t, uint8x8_t, '8', 'const { uint16x8_t([-N as u16; 8]) }']
+      - [uint32x4_t, uint16x4_t, '16', 'const { uint32x4_t([-N as u32; 4]) }']
+      - [uint64x2_t, uint32x2_t, '32', 'const { uint64x2_t([-N as u64; 2]) }']
     compose:
       - FnCall: [static_assert!, ['N >= 1 && N <= {type[2]}']]
       - LLVMLink:

From c0c9d025d5a0edcc34bf58645a74e76d4f6b5456 Mon Sep 17 00:00:00 2001
From: Folkert de Vries <folkert@folkertdev.nl>
Date: Wed, 18 Feb 2026 21:23:00 +0100
Subject: [PATCH 02/21] use `intrinsics::simd` for interleaving store

---
 .../src/arm_shared/neon/generated.rs          | 192 +++---------------
 .../stdarch/crates/core_arch/src/macros.rs    |  67 ++++++
 .../spec/neon/arm_shared.spec.yml             |  86 +++-----
 3 files changed, 118 insertions(+), 227 deletions(-)

diff --git a/library/stdarch/crates/core_arch/src/arm_shared/neon/generated.rs b/library/stdarch/crates/core_arch/src/arm_shared/neon/generated.rs
index 45c83b880e907..37c7ef8fea887 100644
--- a/library/stdarch/crates/core_arch/src/arm_shared/neon/generated.rs
+++ b/library/stdarch/crates/core_arch/src/arm_shared/neon/generated.rs
@@ -66001,14 +66001,7 @@ pub unsafe fn vst2q_f16(a: *mut f16, b: float16x8x2_t) {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(st2))]
 pub unsafe fn vst2_f32(a: *mut f32, b: float32x2x2_t) {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.st2.v2f32.p0"
-        )]
-        fn _vst2_f32(a: float32x2_t, b: float32x2_t, ptr: *mut i8);
-    }
-    _vst2_f32(b.0, b.1, a as _)
+    crate::core_arch::macros::interleaving_store!(f32, 2, 2, a, b)
 }
 #[doc = "Store multiple 2-element structures from two registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_f32)"]
@@ -66020,14 +66013,7 @@ pub unsafe fn vst2_f32(a: *mut f32, b: float32x2x2_t) {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(st2))]
 pub unsafe fn vst2q_f32(a: *mut f32, b: float32x4x2_t) {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.st2.v4f32.p0"
-        )]
-        fn _vst2q_f32(a: float32x4_t, b: float32x4_t, ptr: *mut i8);
-    }
-    _vst2q_f32(b.0, b.1, a as _)
+    crate::core_arch::macros::interleaving_store!(f32, 4, 2, a, b)
 }
 #[doc = "Store multiple 2-element structures from two registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_s8)"]
@@ -66039,14 +66025,7 @@ pub unsafe fn vst2q_f32(a: *mut f32, b: float32x4x2_t) {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(st2))]
 pub unsafe fn vst2_s8(a: *mut i8, b: int8x8x2_t) {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.st2.v8i8.p0"
-        )]
-        fn _vst2_s8(a: int8x8_t, b: int8x8_t, ptr: *mut i8);
-    }
-    _vst2_s8(b.0, b.1, a as _)
+    crate::core_arch::macros::interleaving_store!(i8, 8, 2, a, b)
 }
 #[doc = "Store multiple 2-element structures from two registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_s8)"]
@@ -66058,14 +66037,7 @@ pub unsafe fn vst2_s8(a: *mut i8, b: int8x8x2_t) {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(st2))]
 pub unsafe fn vst2q_s8(a: *mut i8, b: int8x16x2_t) {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.st2.v16i8.p0"
-        )]
-        fn _vst2q_s8(a: int8x16_t, b: int8x16_t, ptr: *mut i8);
-    }
-    _vst2q_s8(b.0, b.1, a as _)
+    crate::core_arch::macros::interleaving_store!(i8, 16, 2, a, b)
 }
 #[doc = "Store multiple 2-element structures from two registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_s16)"]
@@ -66077,14 +66049,7 @@ pub unsafe fn vst2q_s8(a: *mut i8, b: int8x16x2_t) {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(st2))]
 pub unsafe fn vst2_s16(a: *mut i16, b: int16x4x2_t) {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.st2.v4i16.p0"
-        )]
-        fn _vst2_s16(a: int16x4_t, b: int16x4_t, ptr: *mut i8);
-    }
-    _vst2_s16(b.0, b.1, a as _)
+    crate::core_arch::macros::interleaving_store!(i16, 4, 2, a, b)
 }
 #[doc = "Store multiple 2-element structures from two registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_s16)"]
@@ -66096,14 +66061,7 @@ pub unsafe fn vst2_s16(a: *mut i16, b: int16x4x2_t) {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(st2))]
 pub unsafe fn vst2q_s16(a: *mut i16, b: int16x8x2_t) {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.st2.v8i16.p0"
-        )]
-        fn _vst2q_s16(a: int16x8_t, b: int16x8_t, ptr: *mut i8);
-    }
-    _vst2q_s16(b.0, b.1, a as _)
+    crate::core_arch::macros::interleaving_store!(i16, 8, 2, a, b)
 }
 #[doc = "Store multiple 2-element structures from two registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_s32)"]
@@ -66115,14 +66073,7 @@ pub unsafe fn vst2q_s16(a: *mut i16, b: int16x8x2_t) {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(st2))]
 pub unsafe fn vst2_s32(a: *mut i32, b: int32x2x2_t) {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.st2.v2i32.p0"
-        )]
-        fn _vst2_s32(a: int32x2_t, b: int32x2_t, ptr: *mut i8);
-    }
-    _vst2_s32(b.0, b.1, a as _)
+    crate::core_arch::macros::interleaving_store!(i32, 2, 2, a, b)
 }
 #[doc = "Store multiple 2-element structures from two registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_s32)"]
@@ -66134,14 +66085,7 @@ pub unsafe fn vst2_s32(a: *mut i32, b: int32x2x2_t) {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(st2))]
 pub unsafe fn vst2q_s32(a: *mut i32, b: int32x4x2_t) {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.st2.v4i32.p0"
-        )]
-        fn _vst2q_s32(a: int32x4_t, b: int32x4_t, ptr: *mut i8);
-    }
-    _vst2q_s32(b.0, b.1, a as _)
+    crate::core_arch::macros::interleaving_store!(i32, 4, 2, a, b)
 }
 #[doc = "Store multiple 2-element structures from two registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_f32)"]
@@ -67233,11 +67177,7 @@ pub unsafe fn vst3q_f16(a: *mut f16, b: float16x8x3_t) {
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
 #[cfg_attr(test, assert_instr(vst3))]
 pub unsafe fn vst3_f32(a: *mut f32, b: float32x2x3_t) {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst3.p0.v2f32")]
-        fn _vst3_f32(ptr: *mut i8, a: float32x2_t, b: float32x2_t, c: float32x2_t, size: i32);
-    }
-    _vst3_f32(a as _, b.0, b.1, b.2, 4)
+    crate::core_arch::macros::interleaving_store!(f32, 2, 3, a, b)
 }
 #[doc = "Store multiple 3-element structures from three registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_f32)"]
@@ -67249,11 +67189,7 @@ pub unsafe fn vst3_f32(a: *mut f32, b: float32x2x3_t) {
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
 #[cfg_attr(test, assert_instr(vst3))]
 pub unsafe fn vst3q_f32(a: *mut f32, b: float32x4x3_t) {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst3.p0.v4f32")]
-        fn _vst3q_f32(ptr: *mut i8, a: float32x4_t, b: float32x4_t, c: float32x4_t, size: i32);
-    }
-    _vst3q_f32(a as _, b.0, b.1, b.2, 4)
+    crate::core_arch::macros::interleaving_store!(f32, 4, 3, a, b)
 }
 #[doc = "Store multiple 3-element structures from three registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_s8)"]
@@ -67265,11 +67201,7 @@ pub unsafe fn vst3q_f32(a: *mut f32, b: float32x4x3_t) {
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
 #[cfg_attr(test, assert_instr(vst3))]
 pub unsafe fn vst3_s8(a: *mut i8, b: int8x8x3_t) {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst3.p0.v8i8")]
-        fn _vst3_s8(ptr: *mut i8, a: int8x8_t, b: int8x8_t, c: int8x8_t, size: i32);
-    }
-    _vst3_s8(a as _, b.0, b.1, b.2, 1)
+    crate::core_arch::macros::interleaving_store!(i8, 8, 3, a, b)
 }
 #[doc = "Store multiple 3-element structures from three registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_s8)"]
@@ -67281,11 +67213,7 @@ pub unsafe fn vst3_s8(a: *mut i8, b: int8x8x3_t) {
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
 #[cfg_attr(test, assert_instr(vst3))]
 pub unsafe fn vst3q_s8(a: *mut i8, b: int8x16x3_t) {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst3.p0.v16i8")]
-        fn _vst3q_s8(ptr: *mut i8, a: int8x16_t, b: int8x16_t, c: int8x16_t, size: i32);
-    }
-    _vst3q_s8(a as _, b.0, b.1, b.2, 1)
+    crate::core_arch::macros::interleaving_store!(i8, 16, 3, a, b)
 }
 #[doc = "Store multiple 3-element structures from three registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_s16)"]
@@ -67297,11 +67225,7 @@ pub unsafe fn vst3q_s8(a: *mut i8, b: int8x16x3_t) {
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
 #[cfg_attr(test, assert_instr(vst3))]
 pub unsafe fn vst3_s16(a: *mut i16, b: int16x4x3_t) {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst3.p0.v4i16")]
-        fn _vst3_s16(ptr: *mut i8, a: int16x4_t, b: int16x4_t, c: int16x4_t, size: i32);
-    }
-    _vst3_s16(a as _, b.0, b.1, b.2, 2)
+    crate::core_arch::macros::interleaving_store!(i16, 4, 3, a, b)
 }
 #[doc = "Store multiple 3-element structures from three registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_s16)"]
@@ -67313,11 +67237,7 @@ pub unsafe fn vst3_s16(a: *mut i16, b: int16x4x3_t) {
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
 #[cfg_attr(test, assert_instr(vst3))]
 pub unsafe fn vst3q_s16(a: *mut i16, b: int16x8x3_t) {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst3.p0.v8i16")]
-        fn _vst3q_s16(ptr: *mut i8, a: int16x8_t, b: int16x8_t, c: int16x8_t, size: i32);
-    }
-    _vst3q_s16(a as _, b.0, b.1, b.2, 2)
+    crate::core_arch::macros::interleaving_store!(i16, 8, 3, a, b)
 }
 #[doc = "Store multiple 3-element structures from three registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_s32)"]
@@ -67329,11 +67249,7 @@ pub unsafe fn vst3q_s16(a: *mut i16, b: int16x8x3_t) {
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
 #[cfg_attr(test, assert_instr(vst3))]
 pub unsafe fn vst3_s32(a: *mut i32, b: int32x2x3_t) {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst3.p0.v2i32")]
-        fn _vst3_s32(ptr: *mut i8, a: int32x2_t, b: int32x2_t, c: int32x2_t, size: i32);
-    }
-    _vst3_s32(a as _, b.0, b.1, b.2, 4)
+    crate::core_arch::macros::interleaving_store!(i32, 2, 3, a, b)
 }
 #[doc = "Store multiple 3-element structures from three registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_s32)"]
@@ -67345,11 +67261,7 @@ pub unsafe fn vst3_s32(a: *mut i32, b: int32x2x3_t) {
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
 #[cfg_attr(test, assert_instr(vst3))]
 pub unsafe fn vst3q_s32(a: *mut i32, b: int32x4x3_t) {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst3.p0.v4i32")]
-        fn _vst3q_s32(ptr: *mut i8, a: int32x4_t, b: int32x4_t, c: int32x4_t, size: i32);
-    }
-    _vst3q_s32(a as _, b.0, b.1, b.2, 4)
+    crate::core_arch::macros::interleaving_store!(i32, 4, 3, a, b)
 }
 #[doc = "Store multiple 3-element structures from three registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_f32)"]
@@ -68712,14 +68624,7 @@ pub unsafe fn vst4q_s32(a: *mut i32, b: int32x4x4_t) {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(st4))]
 pub unsafe fn vst4_f32(a: *mut f32, b: float32x2x4_t) {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.st4.v2f32.p0"
-        )]
-        fn _vst4_f32(a: float32x2_t, b: float32x2_t, c: float32x2_t, d: float32x2_t, ptr: *mut i8);
-    }
-    _vst4_f32(b.0, b.1, b.2, b.3, a as _)
+    crate::core_arch::macros::interleaving_store!(f32, 2, 4, a, b)
 }
 #[doc = "Store multiple 4-element structures from four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_f32)"]
@@ -68731,14 +68636,7 @@ pub unsafe fn vst4_f32(a: *mut f32, b: float32x2x4_t) {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(st4))]
 pub unsafe fn vst4q_f32(a: *mut f32, b: float32x4x4_t) {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.st4.v4f32.p0"
-        )]
-        fn _vst4q_f32(a: float32x4_t, b: float32x4_t, c: float32x4_t, d: float32x4_t, ptr: *mut i8);
-    }
-    _vst4q_f32(b.0, b.1, b.2, b.3, a as _)
+    crate::core_arch::macros::interleaving_store!(f32, 4, 4, a, b)
 }
 #[doc = "Store multiple 4-element structures from four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_s8)"]
@@ -68750,14 +68648,7 @@ pub unsafe fn vst4q_f32(a: *mut f32, b: float32x4x4_t) {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(st4))]
 pub unsafe fn vst4_s8(a: *mut i8, b: int8x8x4_t) {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.st4.v8i8.p0"
-        )]
-        fn _vst4_s8(a: int8x8_t, b: int8x8_t, c: int8x8_t, d: int8x8_t, ptr: *mut i8);
-    }
-    _vst4_s8(b.0, b.1, b.2, b.3, a as _)
+    crate::core_arch::macros::interleaving_store!(i8, 8, 4, a, b)
 }
 #[doc = "Store multiple 4-element structures from four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_s8)"]
@@ -68769,14 +68660,7 @@ pub unsafe fn vst4_s8(a: *mut i8, b: int8x8x4_t) {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(st4))]
 pub unsafe fn vst4q_s8(a: *mut i8, b: int8x16x4_t) {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.st4.v16i8.p0"
-        )]
-        fn _vst4q_s8(a: int8x16_t, b: int8x16_t, c: int8x16_t, d: int8x16_t, ptr: *mut i8);
-    }
-    _vst4q_s8(b.0, b.1, b.2, b.3, a as _)
+    crate::core_arch::macros::interleaving_store!(i8, 16, 4, a, b)
 }
 #[doc = "Store multiple 4-element structures from four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_s16)"]
@@ -68788,14 +68672,7 @@ pub unsafe fn vst4q_s8(a: *mut i8, b: int8x16x4_t) {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(st4))]
 pub unsafe fn vst4_s16(a: *mut i16, b: int16x4x4_t) {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.st4.v4i16.p0"
-        )]
-        fn _vst4_s16(a: int16x4_t, b: int16x4_t, c: int16x4_t, d: int16x4_t, ptr: *mut i8);
-    }
-    _vst4_s16(b.0, b.1, b.2, b.3, a as _)
+    crate::core_arch::macros::interleaving_store!(i16, 4, 4, a, b)
 }
 #[doc = "Store multiple 4-element structures from four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_s16)"]
@@ -68807,14 +68684,7 @@ pub unsafe fn vst4_s16(a: *mut i16, b: int16x4x4_t) {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(st4))]
 pub unsafe fn vst4q_s16(a: *mut i16, b: int16x8x4_t) {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.st4.v8i16.p0"
-        )]
-        fn _vst4q_s16(a: int16x8_t, b: int16x8_t, c: int16x8_t, d: int16x8_t, ptr: *mut i8);
-    }
-    _vst4q_s16(b.0, b.1, b.2, b.3, a as _)
+    crate::core_arch::macros::interleaving_store!(i16, 8, 4, a, b)
 }
 #[doc = "Store multiple 4-element structures from four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_s32)"]
@@ -68826,14 +68696,7 @@ pub unsafe fn vst4q_s16(a: *mut i16, b: int16x8x4_t) {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(st4))]
 pub unsafe fn vst4_s32(a: *mut i32, b: int32x2x4_t) {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.st4.v2i32.p0"
-        )]
-        fn _vst4_s32(a: int32x2_t, b: int32x2_t, c: int32x2_t, d: int32x2_t, ptr: *mut i8);
-    }
-    _vst4_s32(b.0, b.1, b.2, b.3, a as _)
+    crate::core_arch::macros::interleaving_store!(i32, 2, 4, a, b)
 }
 #[doc = "Store multiple 4-element structures from four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_s32)"]
@@ -68845,14 +68708,7 @@ pub unsafe fn vst4_s32(a: *mut i32, b: int32x2x4_t) {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(st4))]
 pub unsafe fn vst4q_s32(a: *mut i32, b: int32x4x4_t) {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.st4.v4i32.p0"
-        )]
-        fn _vst4q_s32(a: int32x4_t, b: int32x4_t, c: int32x4_t, d: int32x4_t, ptr: *mut i8);
-    }
-    _vst4q_s32(b.0, b.1, b.2, b.3, a as _)
+    crate::core_arch::macros::interleaving_store!(i32, 4, 4, a, b)
 }
 #[doc = "Store multiple 4-element structures from four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_lane_f16)"]
diff --git a/library/stdarch/crates/core_arch/src/macros.rs b/library/stdarch/crates/core_arch/src/macros.rs
index 9f6922efeeb7d..5a582fe17772b 100644
--- a/library/stdarch/crates/core_arch/src/macros.rs
+++ b/library/stdarch/crates/core_arch/src/macros.rs
@@ -187,6 +187,17 @@ macro_rules! simd_masked_store {
     };
 }
 
+/// The first N indices `[0, 1, 2, ...]`.
+pub(crate) const fn identity<const N: usize>() -> [u32; N] {
+    let mut out = [0u32; N];
+    let mut i = 0usize;
+    while i < N {
+        out[i] = i as u32;
+        i += 1;
+    }
+    out
+}
+
 /// The first N even indices `[0, 2, 4, ...]`.
 pub(crate) const fn even<const N: usize>() -> [u32; N] {
     let mut out = [0u32; N];
@@ -277,3 +288,59 @@ macro_rules! deinterleaving_load {
 
 #[allow(unused)]
 pub(crate) use deinterleaving_load;
+
+pub(crate) const fn interleave_mask<const LANES: usize, const N: usize, const K: usize>()
+-> [u32; LANES] {
+    let mut out = [0u32; LANES];
+    let mut j = 0usize;
+    while j < LANES {
+        out[j] = ((j % K) * N + j / K) as u32;
+        j += 1;
+    }
+    out
+}
+
+#[allow(unused)]
+macro_rules! interleaving_store {
+    ($elem:ty, $lanes:literal, 2, $ptr:expr, $v:expr) => {{
+        use $crate::core_arch::macros::interleave_mask;
+        use $crate::core_arch::simd::Simd;
+
+        type W = Simd<$elem, { $lanes * 2 }>;
+        let w: W = simd_shuffle!($v.0, $v.1, interleave_mask::<{ $lanes * 2 }, $lanes, 2>());
+        $crate::ptr::write_unaligned($ptr as *mut W, w);
+    }};
+
+    // N = 3
+    ($elem:ty, $lanes:literal, 3, $ptr:expr, $v:expr) => {{
+        use $crate::core_arch::macros::{identity, interleave_mask};
+        use $crate::core_arch::simd::Simd;
+
+        let v0v1: Simd<$elem, { $lanes * 2 }> =
+            simd_shuffle!($v.0, $v.1, identity::<{ $lanes * 2 }>());
+        let v2v2: Simd<$elem, { $lanes * 2 }> =
+            simd_shuffle!($v.2, $v.2, identity::<{ $lanes * 2 }>());
+
+        type W = Simd<$elem, { $lanes * 3 }>;
+        let w: W = simd_shuffle!(v0v1, v2v2, interleave_mask::<{ $lanes * 3 }, $lanes, 3>());
+        $crate::ptr::write_unaligned($ptr as *mut W, w);
+    }};
+
+    // N = 4
+    ($elem:ty, $lanes:literal, 4, $ptr:expr, $v:expr) => {{
+        use $crate::core_arch::macros::{identity, interleave_mask};
+        use $crate::core_arch::simd::Simd;
+
+        let v0v1: Simd<$elem, { $lanes * 2 }> =
+            simd_shuffle!($v.0, $v.1, identity::<{ $lanes * 2 }>());
+        let v2v3: Simd<$elem, { $lanes * 2 }> =
+            simd_shuffle!($v.2, $v.3, identity::<{ $lanes * 2 }>());
+
+        type W = Simd<$elem, { $lanes * 4 }>;
+        let w: W = simd_shuffle!(v0v1, v2v3, interleave_mask::<{ $lanes * 4 }, $lanes, 4>());
+        $crate::ptr::write_unaligned($ptr as *mut W, w);
+    }};
+}
+
+#[allow(unused)]
+pub(crate) use interleaving_store;
diff --git a/library/stdarch/crates/stdarch-gen-arm/spec/neon/arm_shared.spec.yml b/library/stdarch/crates/stdarch-gen-arm/spec/neon/arm_shared.spec.yml
index 8e10fff984ac7..f890b39f071d2 100644
--- a/library/stdarch/crates/stdarch-gen-arm/spec/neon/arm_shared.spec.yml
+++ b/library/stdarch/crates/stdarch-gen-arm/spec/neon/arm_shared.spec.yml
@@ -5113,26 +5113,16 @@ intrinsics:
     safety:
       unsafe: [neon]
     types:
-      - [i8, int8x8x2_t, int8x8_t]
-      - [i16, int16x4x2_t, int16x4_t]
-      - [i32, int32x2x2_t, int32x2_t]
-      - [i8, int8x16x2_t, int8x16_t]
-      - [i16, int16x8x2_t, int16x8_t]
-      - [i32, int32x4x2_t, int32x4_t]
-      - [f32, float32x2x2_t, float32x2_t]
-      - [f32, float32x4x2_t, float32x4_t]
+      - [i8, int8x8x2_t, "8"]
+      - [i16, int16x4x2_t, "4"]
+      - [i32, int32x2x2_t, "2"]
+      - [i8, int8x16x2_t, "16"]
+      - [i16, int16x8x2_t, "8"]
+      - [i32, int32x4x2_t, "4"]
+      - [f32, float32x2x2_t, "2"]
+      - [f32, float32x4x2_t, "4"]
     compose:
-      - LLVMLink:
-          name: 'st2.{neon_type[1]}'
-          arguments:
-            - 'a: {type[2]}'
-            - 'b: {type[2]}'
-            - 'ptr: *mut i8'
-          links:
-            - link: 'llvm.aarch64.neon.st2.v{neon_type[1].lane}{type[0]}.p0'
-              arch: aarch64,arm64ec
-      - FnCall: ['_vst2{neon_type[1].nox}', ['b.0', 'b.1', 'a as _']]
-
+      - FnCall: ["crate::core_arch::macros::interleaving_store!", [{ Type: "{type[0]}" }, "{type[2]}", "2", a, b], [], true]
 
   - name: "vst2{neon_type[1].nox}"
     doc: "Store multiple 2-element structures from two registers"
@@ -5571,27 +5561,16 @@ intrinsics:
     safety:
       unsafe: [neon]
     types:
-      - [i8, int8x8x3_t, int8x8_t, '1']
-      - [i16, int16x4x3_t, int16x4_t, '2']
-      - [i32, int32x2x3_t, int32x2_t, '4']
-      - [i8, int8x16x3_t, int8x16_t, '1']
-      - [i16, int16x8x3_t, int16x8_t, '2']
-      - [i32, int32x4x3_t, int32x4_t, '4']
-      - [f32, float32x2x3_t, float32x2_t, '4']
-      - [f32, float32x4x3_t, float32x4_t, '4']
+      - [i8, int8x8x3_t, '8']
+      - [i16, int16x4x3_t, '4']
+      - [i32, int32x2x3_t, '2']
+      - [i8, int8x16x3_t, '16']
+      - [i16, int16x8x3_t, '8']
+      - [i32, int32x4x3_t, '4']
+      - [f32, float32x2x3_t, '2']
+      - [f32, float32x4x3_t, '4']
     compose:
-      - LLVMLink:
-          name: 'vst3.{neon_type[1]}'
-          arguments:
-            - 'ptr: *mut i8'
-            - 'a: {type[2]}'
-            - 'b: {type[2]}'
-            - 'c: {type[2]}'
-            - 'size: i32'
-          links:
-            - link: 'llvm.arm.neon.vst3.p0.v{neon_type[1].lane}{type[0]}'
-              arch: arm
-      - FnCall: ['_vst3{neon_type[1].nox}', ['a as _', 'b.0', 'b.1', 'b.2', "{type[3]}"]]
+      - FnCall: ["crate::core_arch::macros::interleaving_store!", [{ Type: "{type[0]}" }, "{type[2]}", "3", a, b], [], true]
 
 
   - name: "vst3{neon_type[1].nox}"
@@ -6114,27 +6093,16 @@ intrinsics:
     safety:
       unsafe: [neon]
     types:
-      - [i8, int8x8x4_t, int8x8_t]
-      - [i16, int16x4x4_t, int16x4_t]
-      - [i32, int32x2x4_t, int32x2_t]
-      - [i8, int8x16x4_t, int8x16_t]
-      - [i16, int16x8x4_t, int16x8_t]
-      - [i32, int32x4x4_t, int32x4_t]
-      - [f32, float32x2x4_t, float32x2_t]
-      - [f32, float32x4x4_t, float32x4_t]
+      - [i8, int8x8x4_t, "8"]
+      - [i16, int16x4x4_t, "4"]
+      - [i32, int32x2x4_t, "2"]
+      - [i8, int8x16x4_t, "16"]
+      - [i16, int16x8x4_t, "8"]
+      - [i32, int32x4x4_t, "4"]
+      - [f32, float32x2x4_t, "2"]
+      - [f32, float32x4x4_t, "4"]
     compose:
-      - LLVMLink:
-          name: 'vst4.{neon_type[1]}'
-          arguments:
-            - 'a: {type[2]}'
-            - 'b: {type[2]}'
-            - 'c: {type[2]}'
-            - 'd: {type[2]}'
-            - 'ptr: *mut i8'
-          links:
-            - link: 'llvm.aarch64.neon.st4.v{neon_type[1].lane}{type[0]}.p0'
-              arch: aarch64,arm64ec
-      - FnCall: ['_vst4{neon_type[1].nox}', ['b.0', 'b.1', 'b.2', 'b.3', 'a as _']]
+      - FnCall: ["crate::core_arch::macros::interleaving_store!", [{ Type: "{type[0]}" }, "{type[2]}", "4", a, b], [], true]
 
 
   - name: "vst4{neon_type[1].nox}"

From e219383c8f00a56d61e3101ac4feca383c4ecaca Mon Sep 17 00:00:00 2001
From: Folkert de Vries <folkert@folkertdev.nl>
Date: Wed, 18 Feb 2026 21:40:50 +0100
Subject: [PATCH 03/21] use `intrinsics::simd` for interleaving store of
 `int64x1`

---
 .../src/arm_shared/neon/generated.rs          | 52 ++-----------
 .../spec/neon/arm_shared.spec.yml             | 75 ++-----------------
 2 files changed, 12 insertions(+), 115 deletions(-)

diff --git a/library/stdarch/crates/core_arch/src/arm_shared/neon/generated.rs b/library/stdarch/crates/core_arch/src/arm_shared/neon/generated.rs
index 37c7ef8fea887..62201edfdaf25 100644
--- a/library/stdarch/crates/core_arch/src/arm_shared/neon/generated.rs
+++ b/library/stdarch/crates/core_arch/src/arm_shared/neon/generated.rs
@@ -66809,11 +66809,7 @@ pub unsafe fn vst2_p64(a: *mut p64, b: poly64x1x2_t) {
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
 #[cfg_attr(test, assert_instr(nop))]
 pub unsafe fn vst2_s64(a: *mut i64, b: int64x1x2_t) {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst2.v1i64.p0")]
-        fn _vst2_s64(ptr: *mut i8, a: int64x1_t, b: int64x1_t, size: i32);
-    }
-    _vst2_s64(a as _, b.0, b.1, 8)
+    core::ptr::write_unaligned(a.cast(), b)
 }
 #[doc = "Store multiple 2-element structures from two registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_s64)"]
@@ -66825,14 +66821,7 @@ pub unsafe fn vst2_s64(a: *mut i64, b: int64x1x2_t) {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(nop))]
 pub unsafe fn vst2_s64(a: *mut i64, b: int64x1x2_t) {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.st2.v1i64.p0"
-        )]
-        fn _vst2_s64(a: int64x1_t, b: int64x1_t, ptr: *mut i8);
-    }
-    _vst2_s64(b.0, b.1, a as _)
+    core::ptr::write_unaligned(a.cast(), b)
 }
 #[doc = "Store multiple 2-element structures from two registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_u64)"]
@@ -68065,14 +68054,7 @@ pub unsafe fn vst3_p64(a: *mut p64, b: poly64x1x3_t) {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(nop))]
 pub unsafe fn vst3_s64(a: *mut i64, b: int64x1x3_t) {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.st3.v1i64.p0"
-        )]
-        fn _vst3_s64(a: int64x1_t, b: int64x1_t, c: int64x1_t, ptr: *mut i8);
-    }
-    _vst3_s64(b.0, b.1, b.2, a as _)
+    core::ptr::write_unaligned(a.cast(), b)
 }
 #[doc = "Store multiple 3-element structures from three registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_s64)"]
@@ -68084,11 +68066,7 @@ pub unsafe fn vst3_s64(a: *mut i64, b: int64x1x3_t) {
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
 #[cfg_attr(test, assert_instr(nop))]
 pub unsafe fn vst3_s64(a: *mut i64, b: int64x1x3_t) {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst3.p0.v1i64")]
-        fn _vst3_s64(ptr: *mut i8, a: int64x1_t, b: int64x1_t, c: int64x1_t, size: i32);
-    }
-    _vst3_s64(a as _, b.0, b.1, b.2, 8)
+    core::ptr::write_unaligned(a.cast(), b)
 }
 #[doc = "Store multiple 3-element structures from three registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_u64)"]
@@ -69432,18 +69410,7 @@ pub unsafe fn vst4_p64(a: *mut p64, b: poly64x1x4_t) {
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
 #[cfg_attr(test, assert_instr(nop))]
 pub unsafe fn vst4_s64(a: *mut i64, b: int64x1x4_t) {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst4.p0.v1i64")]
-        fn _vst4_s64(
-            ptr: *mut i8,
-            a: int64x1_t,
-            b: int64x1_t,
-            c: int64x1_t,
-            d: int64x1_t,
-            size: i32,
-        );
-    }
-    _vst4_s64(a as _, b.0, b.1, b.2, b.3, 8)
+    core::ptr::write_unaligned(a.cast(), b)
 }
 #[doc = "Store multiple 4-element structures from four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_s64)"]
@@ -69455,14 +69422,7 @@ pub unsafe fn vst4_s64(a: *mut i64, b: int64x1x4_t) {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(nop))]
 pub unsafe fn vst4_s64(a: *mut i64, b: int64x1x4_t) {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.st4.v1i64.p0"
-        )]
-        fn _vst4_s64(a: int64x1_t, b: int64x1_t, c: int64x1_t, d: int64x1_t, ptr: *mut i8);
-    }
-    _vst4_s64(b.0, b.1, b.2, b.3, a as _)
+    core::ptr::write_unaligned(a.cast(), b)
 }
 #[doc = "Store multiple 4-element structures from four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_u64)"]
diff --git a/library/stdarch/crates/stdarch-gen-arm/spec/neon/arm_shared.spec.yml b/library/stdarch/crates/stdarch-gen-arm/spec/neon/arm_shared.spec.yml
index f890b39f071d2..23145d6d6692e 100644
--- a/library/stdarch/crates/stdarch-gen-arm/spec/neon/arm_shared.spec.yml
+++ b/library/stdarch/crates/stdarch-gen-arm/spec/neon/arm_shared.spec.yml
@@ -5049,17 +5049,7 @@ intrinsics:
     types:
       - [i64, int64x1x2_t, int64x1_t]
     compose:
-      - LLVMLink:
-          name: 'vst2.{neon_type[1]}'
-          arguments:
-            - 'ptr: *mut i8'
-            - 'a: {type[2]}'
-            - 'b: {type[2]}'
-            - 'size: i32'
-          links:
-            - link: 'llvm.arm.neon.vst2.v{neon_type[1].lane}{type[0]}.p0'
-              arch: arm
-      - FnCall: ['_vst2{neon_type[1].nox}', ['a as _', 'b.0', 'b.1', '8']]
+      - FnCall: [core::ptr::write_unaligned, ['a.cast()', b]]
 
   - name: "vst2{neon_type[1].nox}"
     doc: "Store multiple 2-element structures from two registers"
@@ -5092,16 +5082,7 @@ intrinsics:
     types:
       - [i64, int64x1x2_t, int64x1_t]
     compose:
-      - LLVMLink:
-          name: 'st2.{neon_type[1]}'
-          arguments:
-            - 'a: {type[2]}'
-            - 'b: {type[2]}'
-            - 'ptr: *mut i8'
-          links:
-            - link: 'llvm.aarch64.neon.st2.v{neon_type[1].lane}{type[0]}.p0'
-              arch: aarch64,arm64ec
-      - FnCall: ['_vst2{neon_type[1].nox}', ['b.0', 'b.1', 'a as _']]
+      - FnCall: [core::ptr::write_unaligned, ['a.cast()', b]]
 
   - name: "vst2{neon_type[1].nox}"
     doc: "Store multiple 2-element structures from two registers"
@@ -5416,17 +5397,7 @@ intrinsics:
     types:
       - [i64, int64x1x3_t, int64x1_t]
     compose:
-      - LLVMLink:
-          name: 'st3.{neon_type[1].nox}'
-          arguments:
-            - 'a: {type[2]}'
-            - 'b: {type[2]}'
-            - 'c: {type[2]}'
-            - 'ptr: *mut i8'
-          links:
-            - link: 'llvm.aarch64.neon.st3.v{neon_type[1].lane}{type[0]}.p0'
-              arch: aarch64,arm64ec
-      - FnCall: ['_vst3{neon_type[1].nox}', ['b.0', 'b.1', 'b.2', 'a as _']]
+      - FnCall: [core::ptr::write_unaligned, ['a.cast()', b]]
 
   - name: "vst3{neon_type[1].nox}"
     doc: "Store multiple 3-element structures from three registers"
@@ -5461,18 +5432,7 @@ intrinsics:
     types:
       - [i64, int64x1x3_t, int64x1_t]
     compose:
-      - LLVMLink:
-          name: 'vst3.{neon_type[1]}'
-          arguments:
-            - 'ptr: *mut i8'
-            - 'a: {type[2]}'
-            - 'b: {type[2]}'
-            - 'c: {type[2]}'
-            - 'size: i32'
-          links:
-            - link: 'llvm.arm.neon.vst3.p0.v{neon_type[1].lane}{type[0]}'
-              arch: arm
-      - FnCall: ['_vst3{neon_type[1].nox}', ['a as _', 'b.0', 'b.1', 'b.2', '8']]
+      - FnCall: [core::ptr::write_unaligned, ['a.cast()', b]]
 
   - name: "vst3{neon_type[1].nox}"
     doc: "Store multiple 3-element structures from three registers"
@@ -5832,19 +5792,7 @@ intrinsics:
     types:
       - [i64, int64x1x4_t, int64x1_t]
     compose:
-      - LLVMLink:
-          name: 'vst4.{neon_type[1]}'
-          arguments:
-            - 'ptr: *mut i8'
-            - 'a: {type[2]}'
-            - 'b: {type[2]}'
-            - 'c: {type[2]}'
-            - 'd: {type[2]}'
-            - 'size: i32'
-          links:
-            - link: 'llvm.arm.neon.vst4.p0.v{neon_type[1].lane}{type[0]}'
-              arch: arm
-      - FnCall: ['_vst4{neon_type[1].nox}', ['a as _', 'b.0', 'b.1', 'b.2', 'b.3', '8']]
+      - FnCall: [core::ptr::write_unaligned, ['a.cast()', b]]
 
   - name: "vst4{neon_type[1].nox}"
     doc: "Store multiple 4-element structures from four registers"
@@ -5858,18 +5806,7 @@ intrinsics:
     types:
       - [i64, int64x1x4_t, int64x1_t]
     compose:
-      - LLVMLink:
-          name: 'vst4.{neon_type[1]}'
-          arguments:
-            - 'a: {type[2]}'
-            - 'b: {type[2]}'
-            - 'c: {type[2]}'
-            - 'd: {type[2]}'
-            - 'ptr: *mut i8'
-          links:
-            - link: 'llvm.aarch64.neon.st4.{neon_type[2]}.p0'
-              arch: aarch64,arm64ec
-      - FnCall: ['_vst4{neon_type[1].nox}', ['b.0', 'b.1', 'b.2', 'b.3', 'a as _']]
+      - FnCall: [core::ptr::write_unaligned, ['a.cast()', b]]
 
   - name: "vst4{neon_type[1].nox}"
     doc: "Store multiple 4-element structures from four registers"

From afb890e048205ba28b090c6607721dee2cb636bc Mon Sep 17 00:00:00 2001
From: Folkert de Vries <folkert@folkertdev.nl>
Date: Wed, 18 Feb 2026 21:51:54 +0100
Subject: [PATCH 04/21] use `intrinsics::simd` for interleaving store of f16

---
 library/stdarch/crates/core_arch/src/aarch64/neon/mod.rs | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/library/stdarch/crates/core_arch/src/aarch64/neon/mod.rs b/library/stdarch/crates/core_arch/src/aarch64/neon/mod.rs
index 2df4ba7443314..2fbd2255aa0fd 100644
--- a/library/stdarch/crates/core_arch/src/aarch64/neon/mod.rs
+++ b/library/stdarch/crates/core_arch/src/aarch64/neon/mod.rs
@@ -1050,6 +1050,14 @@ mod tests {
         test_vld1q_f16_x2(f16, 16, float16x8x2_t, vst1q_f16_x2, vld1q_f16_x2);
         test_vld1q_f16_x3(f16, 24, float16x8x3_t, vst1q_f16_x3, vld1q_f16_x3);
         test_vld1q_f16_x4(f16, 32, float16x8x4_t, vst1q_f16_x4, vld1q_f16_x4);
+
+        test_vld2_f16_x2(f16, 8, float16x4x2_t, vst2_f16, vld2_f16);
+        test_vld2_f16_x3(f16, 12, float16x4x3_t, vst3_f16, vld3_f16);
+        test_vld2_f16_x4(f16, 16, float16x4x4_t, vst4_f16, vld4_f16);
+
+        test_vld2q_f16_x2(f16, 16, float16x8x2_t, vst2q_f16, vld2q_f16);
+        test_vld3q_f16_x3(f16, 24, float16x8x3_t, vst3q_f16, vld3q_f16);
+        test_vld4q_f16_x4(f16, 32, float16x8x4_t, vst4q_f16, vld4q_f16);
     }
 
     macro_rules! wide_store_load_roundtrip_aes {

From 2b656f99a5cd12722831a1a92365a8000b61602a Mon Sep 17 00:00:00 2001
From: Folkert de Vries <folkert@folkertdev.nl>
Date: Wed, 18 Feb 2026 22:27:23 +0100
Subject: [PATCH 05/21] use `intrinsics::simd` for aarch64 interleaving `st`

---
 .../core_arch/src/aarch64/neon/generated.rs   | 83 ++---------------
 .../spec/neon/aarch64.spec.yml                | 92 ++++---------------
 2 files changed, 26 insertions(+), 149 deletions(-)

diff --git a/library/stdarch/crates/core_arch/src/aarch64/neon/generated.rs b/library/stdarch/crates/core_arch/src/aarch64/neon/generated.rs
index 88afaae8b80d3..41f01d445fc71 100644
--- a/library/stdarch/crates/core_arch/src/aarch64/neon/generated.rs
+++ b/library/stdarch/crates/core_arch/src/aarch64/neon/generated.rs
@@ -25039,16 +25039,9 @@ pub unsafe fn vst1q_lane_f64<const LANE: i32>(a: *mut f64, b: float64x2_t) {
 #[inline(always)]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(st1))]
+#[cfg_attr(test, assert_instr(stp))]
 pub unsafe fn vst2_f64(a: *mut f64, b: float64x1x2_t) {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.st2.v1f64.p0"
-        )]
-        fn _vst2_f64(a: float64x1_t, b: float64x1_t, ptr: *mut i8);
-    }
-    _vst2_f64(b.0, b.1, a as _)
+    core::ptr::write_unaligned(a.cast(), b)
 }
 #[doc = "Store multiple 2-element structures from two registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_lane_f64)"]
@@ -25125,14 +25118,7 @@ pub unsafe fn vst2_lane_u64<const LANE: i32>(a: *mut u64, b: uint64x1x2_t) {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(st2))]
 pub unsafe fn vst2q_f64(a: *mut f64, b: float64x2x2_t) {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.st2.v2f64.p0"
-        )]
-        fn _vst2q_f64(a: float64x2_t, b: float64x2_t, ptr: *mut i8);
-    }
-    _vst2q_f64(b.0, b.1, a as _)
+    crate::core_arch::macros::interleaving_store!(f64, 2, 2, a, b)
 }
 #[doc = "Store multiple 2-element structures from two registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_s64)"]
@@ -25143,14 +25129,7 @@ pub unsafe fn vst2q_f64(a: *mut f64, b: float64x2x2_t) {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(st2))]
 pub unsafe fn vst2q_s64(a: *mut i64, b: int64x2x2_t) {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.st2.v2i64.p0"
-        )]
-        fn _vst2q_s64(a: int64x2_t, b: int64x2_t, ptr: *mut i8);
-    }
-    _vst2q_s64(b.0, b.1, a as _)
+    crate::core_arch::macros::interleaving_store!(i64, 2, 2, a, b)
 }
 #[doc = "Store multiple 2-element structures from two registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_lane_f64)"]
@@ -25295,14 +25274,7 @@ pub unsafe fn vst2q_u64(a: *mut u64, b: uint64x2x2_t) {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(nop))]
 pub unsafe fn vst3_f64(a: *mut f64, b: float64x1x3_t) {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.st3.v1f64.p0"
-        )]
-        fn _vst3_f64(a: float64x1_t, b: float64x1_t, c: float64x1_t, ptr: *mut i8);
-    }
-    _vst3_f64(b.0, b.1, b.2, a as _)
+    core::ptr::write_unaligned(a.cast(), b)
 }
 #[doc = "Store multiple 3-element structures from three registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_lane_f64)"]
@@ -25379,14 +25351,7 @@ pub unsafe fn vst3_lane_u64<const LANE: i32>(a: *mut u64, b: uint64x1x3_t) {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(st3))]
 pub unsafe fn vst3q_f64(a: *mut f64, b: float64x2x3_t) {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.st3.v2f64.p0"
-        )]
-        fn _vst3q_f64(a: float64x2_t, b: float64x2_t, c: float64x2_t, ptr: *mut i8);
-    }
-    _vst3q_f64(b.0, b.1, b.2, a as _)
+    crate::core_arch::macros::interleaving_store!(f64, 2, 3, a, b)
 }
 #[doc = "Store multiple 3-element structures from three registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_s64)"]
@@ -25397,14 +25362,7 @@ pub unsafe fn vst3q_f64(a: *mut f64, b: float64x2x3_t) {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(st3))]
 pub unsafe fn vst3q_s64(a: *mut i64, b: int64x2x3_t) {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.st3.v2i64.p0"
-        )]
-        fn _vst3q_s64(a: int64x2_t, b: int64x2_t, c: int64x2_t, ptr: *mut i8);
-    }
-    _vst3q_s64(b.0, b.1, b.2, a as _)
+    crate::core_arch::macros::interleaving_store!(i64, 2, 3, a, b)
 }
 #[doc = "Store multiple 3-element structures from three registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_lane_f64)"]
@@ -25549,14 +25507,7 @@ pub unsafe fn vst3q_u64(a: *mut u64, b: uint64x2x3_t) {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(nop))]
 pub unsafe fn vst4_f64(a: *mut f64, b: float64x1x4_t) {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.st4.v1f64.p0"
-        )]
-        fn _vst4_f64(a: float64x1_t, b: float64x1_t, c: float64x1_t, d: float64x1_t, ptr: *mut i8);
-    }
-    _vst4_f64(b.0, b.1, b.2, b.3, a as _)
+    core::ptr::write_unaligned(a.cast(), b)
 }
 #[doc = "Store multiple 4-element structures from four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_lane_f64)"]
@@ -25647,14 +25598,7 @@ pub unsafe fn vst4_lane_u64<const LANE: i32>(a: *mut u64, b: uint64x1x4_t) {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(st4))]
 pub unsafe fn vst4q_f64(a: *mut f64, b: float64x2x4_t) {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.st4.v2f64.p0"
-        )]
-        fn _vst4q_f64(a: float64x2_t, b: float64x2_t, c: float64x2_t, d: float64x2_t, ptr: *mut i8);
-    }
-    _vst4q_f64(b.0, b.1, b.2, b.3, a as _)
+    crate::core_arch::macros::interleaving_store!(f64, 2, 4, a, b)
 }
 #[doc = "Store multiple 4-element structures from four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_s64)"]
@@ -25665,14 +25609,7 @@ pub unsafe fn vst4q_f64(a: *mut f64, b: float64x2x4_t) {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(st4))]
 pub unsafe fn vst4q_s64(a: *mut i64, b: int64x2x4_t) {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.st4.v2i64.p0"
-        )]
-        fn _vst4q_s64(a: int64x2_t, b: int64x2_t, c: int64x2_t, d: int64x2_t, ptr: *mut i8);
-    }
-    _vst4q_s64(b.0, b.1, b.2, b.3, a as _)
+    crate::core_arch::macros::interleaving_store!(i64, 2, 4, a, b)
 }
 #[doc = "Store multiple 4-element structures from four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_lane_f64)"]
diff --git a/library/stdarch/crates/stdarch-gen-arm/spec/neon/aarch64.spec.yml b/library/stdarch/crates/stdarch-gen-arm/spec/neon/aarch64.spec.yml
index 9190c8518a667..0ec8024fdfbb6 100644
--- a/library/stdarch/crates/stdarch-gen-arm/spec/neon/aarch64.spec.yml
+++ b/library/stdarch/crates/stdarch-gen-arm/spec/neon/aarch64.spec.yml
@@ -4567,20 +4567,11 @@ intrinsics:
       unsafe: [neon]
     attr:
       - *neon-stable
-    assert_instr: [st1]
+    assert_instr: [stp]
     types:
-      - ['f64', float64x1x2_t, float64x1_t]
+      - ['f64', float64x1x2_t]
     compose:
-      - LLVMLink:
-          name: 'st2.{neon_type[1]}'
-          arguments:
-            - 'a: {type[2]}'
-            - 'b: {type[2]}'
-            - 'ptr: *mut i8'
-          links:
-            - link: 'llvm.aarch64.neon.st2.v{neon_type[1].lane}{type[0]}.p0'
-              arch: aarch64,arm64ec
-      - FnCall: ['_vst2{neon_type[1].nox}', ['b.0', 'b.1', 'a as _']]
+      - FnCall: [core::ptr::write_unaligned, ['a.cast()', b]]
 
   - name: "vst2{neon_type[1].nox}"
     doc: "Store multiple 2-element structures from two registers"
@@ -4591,19 +4582,10 @@ intrinsics:
       - *neon-stable
     assert_instr: [st2]
     types:
-      - [i64, int64x2x2_t, int64x2_t]
-      - [f64, float64x2x2_t, float64x2_t]
+      - [i64, int64x2x2_t, "2"]
+      - [f64, float64x2x2_t, "2"]
     compose:
-      - LLVMLink:
-          name: 'st2.{neon_type[1]}'
-          arguments:
-            - 'a: {type[2]}'
-            - 'b: {type[2]}'
-            - 'ptr: *mut i8'
-          links:
-            - link: 'llvm.aarch64.neon.st2.v{neon_type[1].lane}{type[0]}.p0'
-              arch: aarch64,arm64ec
-      - FnCall: ['_vst2{neon_type[1].nox}', ['b.0', 'b.1', 'a as _']]
+      - FnCall: ["crate::core_arch::macros::interleaving_store!", [{ Type: "{type[0]}" }, "{type[2]}", "2", a, b], [], true]
 
   - name: "vst2{neon_type[1].lane_nox}"
     doc: "Store multiple 2-element structures from two registers"
@@ -4781,19 +4763,9 @@ intrinsics:
     safety:
       unsafe: [neon]
     types:
-      - [f64, float64x1x3_t, float64x1_t]
+      - [f64, float64x1x3_t]
     compose:
-      - LLVMLink:
-          name: 'st3.{neon_type[1].nox}'
-          arguments:
-            - 'a: {type[2]}'
-            - 'b: {type[2]}'
-            - 'c: {type[2]}'
-            - 'ptr: *mut i8'
-          links:
-            - link: 'llvm.aarch64.neon.st3.v{neon_type[1].lane}{type[0]}.p0'
-              arch: aarch64,arm64ec
-      - FnCall: ['_vst3{neon_type[1].nox}', ['b.0', 'b.1', 'b.2', 'a as _']]
+      - FnCall: [core::ptr::write_unaligned, ['a.cast()', b]]
 
   - name: "vst3{neon_type[1].lane_nox}"
     doc: "Store multiple 3-element structures from three registers"
@@ -4860,20 +4832,10 @@ intrinsics:
     safety:
       unsafe: [neon]
     types:
-      - [i64, int64x2x3_t, int64x2_t]
-      - [f64, float64x2x3_t, float64x2_t]
+      - [i64, int64x2x3_t, "2"]
+      - [f64, float64x2x3_t, "2"]
     compose:
-      - LLVMLink:
-          name: 'st3.{neon_type[1].nox}'
-          arguments:
-            - 'a: {type[2]}'
-            - 'b: {type[2]}'
-            - 'c: {type[2]}'
-            - 'ptr: *mut i8'
-          links:
-            - link: 'llvm.aarch64.neon.st3.v{neon_type[1].lane}{type[0]}.p0'
-              arch: aarch64,arm64ec
-      - FnCall: ['_vst3{neon_type[1].nox}', ['b.0', 'b.1', 'b.2', 'a as _']]
+      - FnCall: ["crate::core_arch::macros::interleaving_store!", [{ Type: "{type[0]}" }, "{type[2]}", "3", a, b], [], true]
 
   - name: "vst3{neon_type[1].nox}"
     doc: "Store multiple 3-element structures from three registers"
@@ -4995,20 +4957,9 @@ intrinsics:
     safety:
       unsafe: [neon]
     types:
-      - [f64, float64x1x4_t, float64x1_t]
+      - [f64, float64x1x4_t]
     compose:
-      - LLVMLink:
-          name: 'st4.{neon_type[1].nox}'
-          arguments:
-            - 'a: {type[2]}'
-            - 'b: {type[2]}'
-            - 'c: {type[2]}'
-            - 'd: {type[2]}'
-            - 'ptr: *mut i8'
-          links:
-            - link: 'llvm.aarch64.neon.st4.v{neon_type[1].lane}{type[0]}.p0'
-              arch: aarch64,arm64ec
-      - FnCall: ['_vst4{neon_type[1].nox}', ['b.0', 'b.1', 'b.2', 'b.3', 'a as _']]
+      - FnCall: [core::ptr::write_unaligned, ['a.cast()', b]]
 
   - name: "vst4{neon_type[1].lane_nox}"
     doc: "Store multiple 4-element structures from four registers"
@@ -5075,21 +5026,10 @@ intrinsics:
     safety:
       unsafe: [neon]
     types:
-      - [i64, int64x2x4_t, int64x2_t]
-      - [f64, float64x2x4_t, float64x2_t]
+      - [i64, int64x2x4_t, "2"]
+      - [f64, float64x2x4_t, "2"]
     compose:
-      - LLVMLink:
-          name: 'st4.{neon_type[1].nox}'
-          arguments:
-            - 'a: {type[2]}'
-            - 'b: {type[2]}'
-            - 'c: {type[2]}'
-            - 'd: {type[2]}'
-            - 'ptr: *mut i8'
-          links:
-            - link: 'llvm.aarch64.neon.st4.v{neon_type[1].lane}{type[0]}.p0'
-              arch: aarch64,arm64ec
-      - FnCall: ['_vst4{neon_type[1].nox}', ['b.0', 'b.1', 'b.2', 'b.3', 'a as _']]
+      - FnCall: ["crate::core_arch::macros::interleaving_store!", [{ Type: "{type[0]}" }, "{type[2]}", "4", a, b], [], true]
 
   - name: "vst4{neon_type[1].nox}"
     doc: "Store multiple 4-element structures from four registers"

From a5ba7941f93f3a619247e583e0a913e2c3a7613d Mon Sep 17 00:00:00 2001
From: okaneco <47607823+okaneco@users.noreply.github.com>
Date: Thu, 19 Feb 2026 06:50:17 -0500
Subject: [PATCH 06/21] x86: use `simd::intrinsics` for saturating packs

Use intrinsics for `sse2`, `sse41`, `avx2`, `avx512bw`

The majority of implementations make use of `simd_shuffle` since that
optimized through to the avx512 intrinsics that made use of the lower
target feature intrinsics. Combined with masked stores, instruction
tests would fail presumably due to the casting and clamping that
the compiler couldn't see through. This is a known weakness as seen
in the other masked stores like the truncating conversion stores.
---
 .../stdarch/crates/core_arch/src/x86/sse2.rs  | 67 +++++++++++++++----
 1 file changed, 55 insertions(+), 12 deletions(-)

diff --git a/library/stdarch/crates/core_arch/src/x86/sse2.rs b/library/stdarch/crates/core_arch/src/x86/sse2.rs
index f339a003df4d1..fbf62c362f51b 100644
--- a/library/stdarch/crates/core_arch/src/x86/sse2.rs
+++ b/library/stdarch/crates/core_arch/src/x86/sse2.rs
@@ -1484,7 +1484,7 @@ pub const fn _mm_move_epi64(a: __m128i) -> __m128i {
     }
 }
 
-/// Converts packed 16-bit integers from `a` and `b` to packed 8-bit integers
+/// Converts packed signed 16-bit integers from `a` and `b` to packed 8-bit integers
 /// using signed saturation.
 ///
 /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_packs_epi16)
@@ -1493,10 +1493,27 @@ pub const fn _mm_move_epi64(a: __m128i) -> __m128i {
 #[cfg_attr(test, assert_instr(packsswb))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
 pub fn _mm_packs_epi16(a: __m128i, b: __m128i) -> __m128i {
-    unsafe { transmute(packsswb(a.as_i16x8(), b.as_i16x8())) }
+    unsafe {
+        let max = simd_splat(i16::from(i8::MAX));
+        let min = simd_splat(i16::from(i8::MIN));
+
+        let clamped_a = simd_imax(simd_imin(a.as_i16x8(), max), min)
+            .as_m128i()
+            .as_i8x16();
+        let clamped_b = simd_imax(simd_imin(b.as_i16x8(), max), min)
+            .as_m128i()
+            .as_i8x16();
+
+        // Shuffle the low i8 of each i16 from two concatenated vectors into
+        // the low bits of the result register.
+        const IDXS: [u32; 16] = [0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30];
+        let result: i8x16 = simd_shuffle!(clamped_a, clamped_b, IDXS);
+
+        result.as_m128i()
+    }
 }
 
-/// Converts packed 32-bit integers from `a` and `b` to packed 16-bit integers
+/// Converts packed signed 32-bit integers from `a` and `b` to packed 16-bit integers
 /// using signed saturation.
 ///
 /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_packs_epi32)
@@ -1505,10 +1522,23 @@ pub fn _mm_packs_epi16(a: __m128i, b: __m128i) -> __m128i {
 #[cfg_attr(test, assert_instr(packssdw))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
 pub fn _mm_packs_epi32(a: __m128i, b: __m128i) -> __m128i {
-    unsafe { transmute(packssdw(a.as_i32x4(), b.as_i32x4())) }
+    unsafe {
+        let max = simd_splat(i32::from(i16::MAX));
+        let min = simd_splat(i32::from(i16::MIN));
+
+        let clamped_a = simd_imax(simd_imin(a.as_i32x4(), max), min);
+        let clamped_b = simd_imax(simd_imin(b.as_i32x4(), max), min);
+
+        let clamped_a: i16x4 = simd_cast(clamped_a);
+        let clamped_b: i16x4 = simd_cast(clamped_b);
+
+        let a: i64 = transmute(clamped_a);
+        let b: i64 = transmute(clamped_b);
+        i64x2::new(a, b).as_m128i()
+    }
 }
 
-/// Converts packed 16-bit integers from `a` and `b` to packed 8-bit integers
+/// Converts packed signed 16-bit integers from `a` and `b` to packed 8-bit integers
 /// using unsigned saturation.
 ///
 /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_packus_epi16)
@@ -1517,7 +1547,26 @@ pub fn _mm_packs_epi32(a: __m128i, b: __m128i) -> __m128i {
 #[cfg_attr(test, assert_instr(packuswb))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
 pub fn _mm_packus_epi16(a: __m128i, b: __m128i) -> __m128i {
-    unsafe { transmute(packuswb(a.as_i16x8(), b.as_i16x8())) }
+    unsafe {
+        let max = simd_splat(i16::from(u8::MAX));
+        let min = simd_splat(i16::from(u8::MIN));
+
+        let clamped_a = simd_imax(simd_imin(a.as_i16x8(), max), min)
+            .as_m128i()
+            .as_i8x16();
+        let clamped_b = simd_imax(simd_imin(b.as_i16x8(), max), min)
+            .as_m128i()
+            .as_i8x16();
+
+        // Shuffle the low bytes of each i16 from two concatenated vectors into
+        // the low bits of the result register.
+        // Without `simd_shuffle`, this intrinsic will cause the AVX-512BW
+        // `_mm_mask_packus_epi16` and `_mm_maskz_packus_epi16` tests to fail.
+        const IDXS: [u32; 16] = [0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30];
+        let result: i8x16 = simd_shuffle!(clamped_a, clamped_b, IDXS);
+
+        result.as_m128i()
+    }
 }
 
 /// Returns the `imm8` element of `a`.
@@ -3217,12 +3266,6 @@ unsafe extern "C" {
     fn cvtps2dq(a: __m128) -> i32x4;
     #[link_name = "llvm.x86.sse2.maskmov.dqu"]
     fn maskmovdqu(a: i8x16, mask: i8x16, mem_addr: *mut i8);
-    #[link_name = "llvm.x86.sse2.packsswb.128"]
-    fn packsswb(a: i16x8, b: i16x8) -> i8x16;
-    #[link_name = "llvm.x86.sse2.packssdw.128"]
-    fn packssdw(a: i32x4, b: i32x4) -> i16x8;
-    #[link_name = "llvm.x86.sse2.packuswb.128"]
-    fn packuswb(a: i16x8, b: i16x8) -> u8x16;
     #[link_name = "llvm.x86.sse2.max.sd"]
     fn maxsd(a: __m128d, b: __m128d) -> __m128d;
     #[link_name = "llvm.x86.sse2.max.pd"]

From 6fa33bea62865ebbfb58b88f689e4210b0769c1f Mon Sep 17 00:00:00 2001
From: okaneco <47607823+okaneco@users.noreply.github.com>
Date: Thu, 19 Feb 2026 07:05:07 -0500
Subject: [PATCH 07/21] Use intrinsics for `sse41`

---
 .../stdarch/crates/core_arch/src/x86/sse41.rs | 23 +++++++++++++++----
 1 file changed, 19 insertions(+), 4 deletions(-)

diff --git a/library/stdarch/crates/core_arch/src/x86/sse41.rs b/library/stdarch/crates/core_arch/src/x86/sse41.rs
index 7ad4306f36f21..8036f24e24d37 100644
--- a/library/stdarch/crates/core_arch/src/x86/sse41.rs
+++ b/library/stdarch/crates/core_arch/src/x86/sse41.rs
@@ -418,7 +418,7 @@ pub const fn _mm_min_epu32(a: __m128i, b: __m128i) -> __m128i {
     unsafe { simd_imin(a.as_u32x4(), b.as_u32x4()).as_m128i() }
 }
 
-/// Converts packed 32-bit integers from `a` and `b` to packed 16-bit integers
+/// Converts packed signed 32-bit integers from `a` and `b` to packed 16-bit integers
 /// using unsigned saturation
 ///
 /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_packus_epi32)
@@ -427,7 +427,24 @@ pub const fn _mm_min_epu32(a: __m128i, b: __m128i) -> __m128i {
 #[cfg_attr(test, assert_instr(packusdw))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
 pub fn _mm_packus_epi32(a: __m128i, b: __m128i) -> __m128i {
-    unsafe { transmute(packusdw(a.as_i32x4(), b.as_i32x4())) }
+    unsafe {
+        let max = simd_splat(i32::from(u16::MAX));
+        let min = simd_splat(i32::from(u16::MIN));
+
+        let clamped_a = simd_imax(simd_imin(a.as_i32x4(), max), min)
+            .as_m128i()
+            .as_i16x8();
+        let clamped_b = simd_imax(simd_imin(b.as_i32x4(), max), min)
+            .as_m128i()
+            .as_i16x8();
+
+        // Shuffle the low u16 of each i32 from two concatenated vectors into
+        // the low bits of the result register.
+        const IDXS: [u32; 8] = [0, 2, 4, 6, 8, 10, 12, 14];
+        let result: i16x8 = simd_shuffle!(clamped_a, clamped_b, IDXS);
+
+        result.as_m128i()
+    }
 }
 
 /// Compares packed 64-bit integers in `a` and `b` for equality
@@ -1166,8 +1183,6 @@ pub unsafe fn _mm_stream_load_si128(mem_addr: *const __m128i) -> __m128i {
 unsafe extern "C" {
     #[link_name = "llvm.x86.sse41.insertps"]
     fn insertps(a: __m128, b: __m128, imm8: u8) -> __m128;
-    #[link_name = "llvm.x86.sse41.packusdw"]
-    fn packusdw(a: i32x4, b: i32x4) -> u16x8;
     #[link_name = "llvm.x86.sse41.dppd"]
     fn dppd(a: __m128d, b: __m128d, imm8: u8) -> __m128d;
     #[link_name = "llvm.x86.sse41.dpps"]

From 639b07ec7513d655fd674ce109ec3b3e165ff441 Mon Sep 17 00:00:00 2001
From: okaneco <47607823+okaneco@users.noreply.github.com>
Date: Thu, 19 Feb 2026 08:06:24 -0500
Subject: [PATCH 08/21] Use intrinsics for `avx2`

---
 .../stdarch/crates/core_arch/src/x86/avx2.rs  | 108 +++++++++++++++---
 1 file changed, 92 insertions(+), 16 deletions(-)

diff --git a/library/stdarch/crates/core_arch/src/x86/avx2.rs b/library/stdarch/crates/core_arch/src/x86/avx2.rs
index 04a88e461f752..ca4ca9a2de9a4 100644
--- a/library/stdarch/crates/core_arch/src/x86/avx2.rs
+++ b/library/stdarch/crates/core_arch/src/x86/avx2.rs
@@ -2315,7 +2315,7 @@ pub const fn _mm256_or_si256(a: __m256i, b: __m256i) -> __m256i {
     unsafe { transmute(simd_or(a.as_i32x8(), b.as_i32x8())) }
 }
 
-/// Converts packed 16-bit integers from `a` and `b` to packed 8-bit integers
+/// Converts packed signed 16-bit integers from `a` and `b` to packed 8-bit integers
 /// using signed saturation
 ///
 /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_packs_epi16)
@@ -2324,10 +2324,31 @@ pub const fn _mm256_or_si256(a: __m256i, b: __m256i) -> __m256i {
 #[cfg_attr(test, assert_instr(vpacksswb))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
 pub fn _mm256_packs_epi16(a: __m256i, b: __m256i) -> __m256i {
-    unsafe { transmute(packsswb(a.as_i16x16(), b.as_i16x16())) }
+    unsafe {
+        let max = simd_splat(i16::from(i8::MAX));
+        let min = simd_splat(i16::from(i8::MIN));
+
+        let clamped_a = simd_imax(simd_imin(a.as_i16x16(), max), min)
+            .as_m256i()
+            .as_i8x32();
+        let clamped_b = simd_imax(simd_imin(b.as_i16x16(), max), min)
+            .as_m256i()
+            .as_i8x32();
+
+        #[rustfmt::skip]
+        const IDXS: [u32; 32] = [
+            00, 02, 04, 06, 08, 10, 12, 14, // a-lo i16 to i8 conversions
+            32, 34, 36, 38, 40, 42, 44, 46, // b-lo
+            16, 18, 20, 22, 24, 26, 28, 30, // a-hi
+            48, 50, 52, 54, 56, 58, 60, 62, // b-hi
+        ];
+        let result: i8x32 = simd_shuffle!(clamped_a, clamped_b, IDXS);
+
+        result.as_m256i()
+    }
 }
 
-/// Converts packed 32-bit integers from `a` and `b` to packed 16-bit integers
+/// Converts packed signed 32-bit integers from `a` and `b` to packed 16-bit integers
 /// using signed saturation
 ///
 /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_packs_epi32)
@@ -2336,10 +2357,31 @@ pub fn _mm256_packs_epi16(a: __m256i, b: __m256i) -> __m256i {
 #[cfg_attr(test, assert_instr(vpackssdw))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
 pub fn _mm256_packs_epi32(a: __m256i, b: __m256i) -> __m256i {
-    unsafe { transmute(packssdw(a.as_i32x8(), b.as_i32x8())) }
+    unsafe {
+        let max = simd_splat(i32::from(i16::MAX));
+        let min = simd_splat(i32::from(i16::MIN));
+
+        let clamped_a = simd_imax(simd_imin(a.as_i32x8(), max), min)
+            .as_m256i()
+            .as_i16x16();
+        let clamped_b = simd_imax(simd_imin(b.as_i32x8(), max), min)
+            .as_m256i()
+            .as_i16x16();
+
+        #[rustfmt::skip]
+        const IDXS: [u32; 16] = [
+            00, 02, 04, 06, // a-lo i32 to i16 conversions
+            16, 18, 20, 22, // b-lo
+            08, 10, 12, 14, // a-hi
+            24, 26, 28, 30, // b-hi
+        ];
+        let result: i16x16 = simd_shuffle!(clamped_a, clamped_b, IDXS);
+
+        result.as_m256i()
+    }
 }
 
-/// Converts packed 16-bit integers from `a` and `b` to packed 8-bit integers
+/// Converts packed signed 16-bit integers from `a` and `b` to packed 8-bit integers
 /// using unsigned saturation
 ///
 /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_packus_epi16)
@@ -2348,10 +2390,31 @@ pub fn _mm256_packs_epi32(a: __m256i, b: __m256i) -> __m256i {
 #[cfg_attr(test, assert_instr(vpackuswb))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
 pub fn _mm256_packus_epi16(a: __m256i, b: __m256i) -> __m256i {
-    unsafe { transmute(packuswb(a.as_i16x16(), b.as_i16x16())) }
+    unsafe {
+        let max = simd_splat(i16::from(u8::MAX));
+        let min = simd_splat(i16::from(u8::MIN));
+
+        let clamped_a = simd_imax(simd_imin(a.as_i16x16(), max), min)
+            .as_m256i()
+            .as_i8x32();
+        let clamped_b = simd_imax(simd_imin(b.as_i16x16(), max), min)
+            .as_m256i()
+            .as_i8x32();
+
+        #[rustfmt::skip]
+        const IDXS: [u32; 32] = [
+            00, 02, 04, 06, 08, 10, 12, 14, // a-lo i16 to u8 conversions
+            32, 34, 36, 38, 40, 42, 44, 46, // b-lo
+            16, 18, 20, 22, 24, 26, 28, 30, // a-hi
+            48, 50, 52, 54, 56, 58, 60, 62, // b-hi
+        ];
+        let result: i8x32 = simd_shuffle!(clamped_a, clamped_b, IDXS);
+
+        result.as_m256i()
+    }
 }
 
-/// Converts packed 32-bit integers from `a` and `b` to packed 16-bit integers
+/// Converts packed signed 32-bit integers from `a` and `b` to packed 16-bit integers
 /// using unsigned saturation
 ///
 /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_packus_epi32)
@@ -2360,7 +2423,28 @@ pub fn _mm256_packus_epi16(a: __m256i, b: __m256i) -> __m256i {
 #[cfg_attr(test, assert_instr(vpackusdw))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
 pub fn _mm256_packus_epi32(a: __m256i, b: __m256i) -> __m256i {
-    unsafe { transmute(packusdw(a.as_i32x8(), b.as_i32x8())) }
+    unsafe {
+        let max = simd_splat(i32::from(u16::MAX));
+        let min = simd_splat(i32::from(u16::MIN));
+
+        let clamped_a = simd_imax(simd_imin(a.as_i32x8(), max), min)
+            .as_m256i()
+            .as_i16x16();
+        let clamped_b = simd_imax(simd_imin(b.as_i32x8(), max), min)
+            .as_m256i()
+            .as_i16x16();
+
+        #[rustfmt::skip]
+        const IDXS: [u32; 16] = [
+            00, 02, 04, 06, // a-lo i32 to u16 conversions
+            16, 18, 20, 22, // b-lo
+            08, 10, 12, 14, // a-hi
+            24, 26, 28, 30, // b-hi
+        ];
+        let result: i16x16 = simd_shuffle!(clamped_a, clamped_b, IDXS);
+
+        result.as_m256i()
+    }
 }
 
 /// Permutes packed 32-bit integers from `a` according to the content of `b`.
@@ -3827,14 +3911,6 @@ unsafe extern "C" {
     fn mpsadbw(a: u8x32, b: u8x32, imm8: i8) -> u16x16;
     #[link_name = "llvm.x86.avx2.pmul.hr.sw"]
     fn pmulhrsw(a: i16x16, b: i16x16) -> i16x16;
-    #[link_name = "llvm.x86.avx2.packsswb"]
-    fn packsswb(a: i16x16, b: i16x16) -> i8x32;
-    #[link_name = "llvm.x86.avx2.packssdw"]
-    fn packssdw(a: i32x8, b: i32x8) -> i16x16;
-    #[link_name = "llvm.x86.avx2.packuswb"]
-    fn packuswb(a: i16x16, b: i16x16) -> u8x32;
-    #[link_name = "llvm.x86.avx2.packusdw"]
-    fn packusdw(a: i32x8, b: i32x8) -> u16x16;
     #[link_name = "llvm.x86.avx2.psad.bw"]
     fn psadbw(a: u8x32, b: u8x32) -> u64x4;
     #[link_name = "llvm.x86.avx2.psign.b"]

From c7ecbd5e4b412a3ee76463037ac46ab401a62c5b Mon Sep 17 00:00:00 2001
From: okaneco <47607823+okaneco@users.noreply.github.com>
Date: Thu, 19 Feb 2026 08:42:08 -0500
Subject: [PATCH 09/21] Use intrinsics for `avx512bw`

---
 .../crates/core_arch/src/x86/avx512bw.rs      | 117 ++++++++++++++++--
 1 file changed, 104 insertions(+), 13 deletions(-)

diff --git a/library/stdarch/crates/core_arch/src/x86/avx512bw.rs b/library/stdarch/crates/core_arch/src/x86/avx512bw.rs
index 3ba171c0fa50f..78801e8902107 100644
--- a/library/stdarch/crates/core_arch/src/x86/avx512bw.rs
+++ b/library/stdarch/crates/core_arch/src/x86/avx512bw.rs
@@ -6524,7 +6524,32 @@ pub fn _mm_maskz_maddubs_epi16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpackssdw))]
 pub fn _mm512_packs_epi32(a: __m512i, b: __m512i) -> __m512i {
-    unsafe { transmute(vpackssdw(a.as_i32x16(), b.as_i32x16())) }
+    unsafe {
+        let max = simd_splat(i32::from(i16::MAX));
+        let min = simd_splat(i32::from(i16::MIN));
+
+        let clamped_a = simd_imax(simd_imin(a.as_i32x16(), max), min)
+            .as_m512i()
+            .as_i16x32();
+        let clamped_b = simd_imax(simd_imin(b.as_i32x16(), max), min)
+            .as_m512i()
+            .as_i16x32();
+
+        #[rustfmt::skip]
+        const IDXS: [u32; 32] = [
+            00, 02, 04, 06,
+            32, 34, 36, 38,
+            08, 10, 12, 14,
+            40, 42, 44, 46,
+            16, 18, 20, 22,
+            48, 50, 52, 54,
+            24, 26, 28, 30,
+            56, 58, 60, 62,
+        ];
+        let result: i16x32 = simd_shuffle!(clamped_a, clamped_b, IDXS);
+
+        result.as_m512i()
+    }
 }
 
 /// Convert packed signed 32-bit integers from a and b to packed 16-bit integers using signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
@@ -6619,7 +6644,32 @@ pub fn _mm_maskz_packs_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpacksswb))]
 pub fn _mm512_packs_epi16(a: __m512i, b: __m512i) -> __m512i {
-    unsafe { transmute(vpacksswb(a.as_i16x32(), b.as_i16x32())) }
+    unsafe {
+        let max = simd_splat(i16::from(i8::MAX));
+        let min = simd_splat(i16::from(i8::MIN));
+
+        let clamped_a = simd_imax(simd_imin(a.as_i16x32(), max), min)
+            .as_m512i()
+            .as_i8x64();
+        let clamped_b = simd_imax(simd_imin(b.as_i16x32(), max), min)
+            .as_m512i()
+            .as_i8x64();
+
+        #[rustfmt::skip]
+        const IDXS: [u32; 64] = [
+            000, 002, 004, 006, 008, 010, 012, 014,
+            064, 066, 068, 070, 072, 074, 076, 078,
+            016, 018, 020, 022, 024, 026, 028, 030,
+            080, 082, 084, 086, 088, 090, 092, 094,
+            032, 034, 036, 038, 040, 042, 044, 046,
+            096, 098, 100, 102, 104, 106, 108, 110,
+            048, 050, 052, 054, 056, 058, 060, 062,
+            112, 114, 116, 118, 120, 122, 124, 126,
+        ];
+        let result: i8x64 = simd_shuffle!(clamped_a, clamped_b, IDXS);
+
+        result.as_m512i()
+    }
 }
 
 /// Convert packed signed 16-bit integers from a and b to packed 8-bit integers using signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
@@ -6714,7 +6764,32 @@ pub fn _mm_maskz_packs_epi16(k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpackusdw))]
 pub fn _mm512_packus_epi32(a: __m512i, b: __m512i) -> __m512i {
-    unsafe { transmute(vpackusdw(a.as_i32x16(), b.as_i32x16())) }
+    unsafe {
+        let max = simd_splat(i32::from(u16::MAX));
+        let min = simd_splat(i32::from(u16::MIN));
+
+        let clamped_a = simd_imax(simd_imin(a.as_i32x16(), max), min)
+            .as_m512i()
+            .as_i16x32();
+        let clamped_b = simd_imax(simd_imin(b.as_i32x16(), max), min)
+            .as_m512i()
+            .as_i16x32();
+
+        #[rustfmt::skip]
+        const IDXS: [u32; 32] = [
+            00, 02, 04, 06,
+            32, 34, 36, 38,
+            08, 10, 12, 14,
+            40, 42, 44, 46,
+            16, 18, 20, 22,
+            48, 50, 52, 54,
+            24, 26, 28, 30,
+            56, 58, 60, 62,
+        ];
+        let result: i16x32 = simd_shuffle!(clamped_a, clamped_b, IDXS);
+
+        result.as_m512i()
+    }
 }
 
 /// Convert packed signed 32-bit integers from a and b to packed 16-bit integers using unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
@@ -6809,7 +6884,32 @@ pub fn _mm_maskz_packus_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpackuswb))]
 pub fn _mm512_packus_epi16(a: __m512i, b: __m512i) -> __m512i {
-    unsafe { transmute(vpackuswb(a.as_i16x32(), b.as_i16x32())) }
+    unsafe {
+        let max = simd_splat(i16::from(u8::MAX));
+        let min = simd_splat(i16::from(u8::MIN));
+
+        let clamped_a = simd_imax(simd_imin(a.as_i16x32(), max), min)
+            .as_m512i()
+            .as_i8x64();
+        let clamped_b = simd_imax(simd_imin(b.as_i16x32(), max), min)
+            .as_m512i()
+            .as_i8x64();
+
+        #[rustfmt::skip]
+        const IDXS: [u32; 64] = [
+            000, 002, 004, 006, 008, 010, 012, 014,
+            064, 066, 068, 070, 072, 074, 076, 078,
+            016, 018, 020, 022, 024, 026, 028, 030,
+            080, 082, 084, 086, 088, 090, 092, 094,
+            032, 034, 036, 038, 040, 042, 044, 046,
+            096, 098, 100, 102, 104, 106, 108, 110,
+            048, 050, 052, 054, 056, 058, 060, 062,
+            112, 114, 116, 118, 120, 122, 124, 126,
+        ];
+        let result: i8x64 = simd_shuffle!(clamped_a, clamped_b, IDXS);
+
+        result.as_m512i()
+    }
 }
 
 /// Convert packed signed 16-bit integers from a and b to packed 8-bit integers using unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
@@ -12606,15 +12706,6 @@ unsafe extern "C" {
     #[link_name = "llvm.x86.avx512.pmaddubs.w.512"]
     fn vpmaddubsw(a: u8x64, b: i8x64) -> i16x32;
 
-    #[link_name = "llvm.x86.avx512.packssdw.512"]
-    fn vpackssdw(a: i32x16, b: i32x16) -> i16x32;
-    #[link_name = "llvm.x86.avx512.packsswb.512"]
-    fn vpacksswb(a: i16x32, b: i16x32) -> i8x64;
-    #[link_name = "llvm.x86.avx512.packusdw.512"]
-    fn vpackusdw(a: i32x16, b: i32x16) -> u16x32;
-    #[link_name = "llvm.x86.avx512.packuswb.512"]
-    fn vpackuswb(a: i16x32, b: i16x32) -> u8x64;
-
     #[link_name = "llvm.x86.avx512.psll.w.512"]
     fn vpsllw(a: i16x32, count: i16x8) -> i16x32;
 

From 22d10c516d0b2cb1dd90a489bfd5402a96106f3e Mon Sep 17 00:00:00 2001
From: okaneco <47607823+okaneco@users.noreply.github.com>
Date: Fri, 20 Feb 2026 06:53:07 -0500
Subject: [PATCH 10/21] x86: Followup to add const for pack intrinsics

Add const to `sse2`, `sse41`, `avx2`, and `avx512bw` functions and tests
---
 .../crates/core_arch/src/x86/avx512bw.rs      | 30 +++++++++++--------
 .../stdarch/crates/core_arch/src/x86/sse2.rs  | 27 +++++++++--------
 2 files changed, 33 insertions(+), 24 deletions(-)

diff --git a/library/stdarch/crates/core_arch/src/x86/avx512bw.rs b/library/stdarch/crates/core_arch/src/x86/avx512bw.rs
index 78801e8902107..8481edcdb38d6 100644
--- a/library/stdarch/crates/core_arch/src/x86/avx512bw.rs
+++ b/library/stdarch/crates/core_arch/src/x86/avx512bw.rs
@@ -6615,7 +6615,8 @@ pub fn _mm256_maskz_packs_epi32(k: __mmask16, a: __m256i, b: __m256i) -> __m256i
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpackssdw))]
-pub fn _mm_mask_packs_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mask_packs_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
     unsafe {
         let pack = _mm_packs_epi32(a, b).as_i16x8();
         transmute(simd_select_bitmask(k, pack, src.as_i16x8()))
@@ -6629,7 +6630,8 @@ pub fn _mm_mask_packs_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpackssdw))]
-pub fn _mm_maskz_packs_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_maskz_packs_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
     unsafe {
         let pack = _mm_packs_epi32(a, b).as_i16x8();
         transmute(simd_select_bitmask(k, pack, i16x8::ZERO))
@@ -6735,7 +6737,8 @@ pub fn _mm256_maskz_packs_epi16(k: __mmask32, a: __m256i, b: __m256i) -> __m256i
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpacksswb))]
-pub fn _mm_mask_packs_epi16(src: __m128i, k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mask_packs_epi16(src: __m128i, k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
     unsafe {
         let pack = _mm_packs_epi16(a, b).as_i8x16();
         transmute(simd_select_bitmask(k, pack, src.as_i8x16()))
@@ -6749,7 +6752,8 @@ pub fn _mm_mask_packs_epi16(src: __m128i, k: __mmask16, a: __m128i, b: __m128i)
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpacksswb))]
-pub fn _mm_maskz_packs_epi16(k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_maskz_packs_epi16(k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
     unsafe {
         let pack = _mm_packs_epi16(a, b).as_i8x16();
         transmute(simd_select_bitmask(k, pack, i8x16::ZERO))
@@ -6975,7 +6979,8 @@ pub fn _mm256_maskz_packus_epi16(k: __mmask32, a: __m256i, b: __m256i) -> __m256
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpackuswb))]
-pub fn _mm_mask_packus_epi16(src: __m128i, k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mask_packus_epi16(src: __m128i, k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
     unsafe {
         let pack = _mm_packus_epi16(a, b).as_i8x16();
         transmute(simd_select_bitmask(k, pack, src.as_i8x16()))
@@ -6989,7 +6994,8 @@ pub fn _mm_mask_packus_epi16(src: __m128i, k: __mmask16, a: __m128i, b: __m128i)
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpackuswb))]
-pub fn _mm_maskz_packus_epi16(k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_maskz_packus_epi16(k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
     unsafe {
         let pack = _mm_packus_epi16(a, b).as_i8x16();
         transmute(simd_select_bitmask(k, pack, i8x16::ZERO))
@@ -17854,7 +17860,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    fn test_mm_mask_packs_epi32() {
+    const fn test_mm_mask_packs_epi32() {
         let a = _mm_set1_epi32(i32::MAX);
         let b = _mm_set1_epi32(1 << 16 | 1);
         let r = _mm_mask_packs_epi32(a, 0, a, b);
@@ -17865,7 +17871,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    fn test_mm_maskz_packs_epi32() {
+    const fn test_mm_maskz_packs_epi32() {
         let a = _mm_set1_epi32(i32::MAX);
         let b = _mm_set1_epi32(1);
         let r = _mm_maskz_packs_epi32(0, a, b);
@@ -17954,7 +17960,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    fn test_mm_mask_packs_epi16() {
+    const fn test_mm_mask_packs_epi16() {
         let a = _mm_set1_epi16(i16::MAX);
         let b = _mm_set1_epi16(1 << 8 | 1);
         let r = _mm_mask_packs_epi16(a, 0, a, b);
@@ -17966,7 +17972,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    fn test_mm_maskz_packs_epi16() {
+    const fn test_mm_maskz_packs_epi16() {
         let a = _mm_set1_epi16(i16::MAX);
         let b = _mm_set1_epi16(1);
         let r = _mm_maskz_packs_epi16(0, a, b);
@@ -18137,7 +18143,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    fn test_mm_mask_packus_epi16() {
+    const fn test_mm_mask_packus_epi16() {
         let a = _mm_set1_epi16(-1);
         let b = _mm_set1_epi16(1 << 8 | 1);
         let r = _mm_mask_packus_epi16(a, 0, a, b);
@@ -18148,7 +18154,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    fn test_mm_maskz_packus_epi16() {
+    const fn test_mm_maskz_packus_epi16() {
         let a = _mm_set1_epi16(-1);
         let b = _mm_set1_epi16(1);
         let r = _mm_maskz_packus_epi16(0, a, b);
diff --git a/library/stdarch/crates/core_arch/src/x86/sse2.rs b/library/stdarch/crates/core_arch/src/x86/sse2.rs
index fbf62c362f51b..1f97f3c69d0e3 100644
--- a/library/stdarch/crates/core_arch/src/x86/sse2.rs
+++ b/library/stdarch/crates/core_arch/src/x86/sse2.rs
@@ -1492,10 +1492,11 @@ pub const fn _mm_move_epi64(a: __m128i) -> __m128i {
 #[target_feature(enable = "sse2")]
 #[cfg_attr(test, assert_instr(packsswb))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm_packs_epi16(a: __m128i, b: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_packs_epi16(a: __m128i, b: __m128i) -> __m128i {
     unsafe {
-        let max = simd_splat(i16::from(i8::MAX));
-        let min = simd_splat(i16::from(i8::MIN));
+        let max = simd_splat(i8::MAX as i16);
+        let min = simd_splat(i8::MIN as i16);
 
         let clamped_a = simd_imax(simd_imin(a.as_i16x8(), max), min)
             .as_m128i()
@@ -1521,10 +1522,11 @@ pub fn _mm_packs_epi16(a: __m128i, b: __m128i) -> __m128i {
 #[target_feature(enable = "sse2")]
 #[cfg_attr(test, assert_instr(packssdw))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm_packs_epi32(a: __m128i, b: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_packs_epi32(a: __m128i, b: __m128i) -> __m128i {
     unsafe {
-        let max = simd_splat(i32::from(i16::MAX));
-        let min = simd_splat(i32::from(i16::MIN));
+        let max = simd_splat(i16::MAX as i32);
+        let min = simd_splat(i16::MIN as i32);
 
         let clamped_a = simd_imax(simd_imin(a.as_i32x4(), max), min);
         let clamped_b = simd_imax(simd_imin(b.as_i32x4(), max), min);
@@ -1546,10 +1548,11 @@ pub fn _mm_packs_epi32(a: __m128i, b: __m128i) -> __m128i {
 #[target_feature(enable = "sse2")]
 #[cfg_attr(test, assert_instr(packuswb))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm_packus_epi16(a: __m128i, b: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_packus_epi16(a: __m128i, b: __m128i) -> __m128i {
     unsafe {
-        let max = simd_splat(i16::from(u8::MAX));
-        let min = simd_splat(i16::from(u8::MIN));
+        let max = simd_splat(u8::MAX as i16);
+        let min = simd_splat(u8::MIN as i16);
 
         let clamped_a = simd_imax(simd_imin(a.as_i16x8(), max), min)
             .as_m128i()
@@ -4329,7 +4332,7 @@ mod tests {
     }
 
     #[simd_test(enable = "sse2")]
-    fn test_mm_packs_epi16() {
+    const fn test_mm_packs_epi16() {
         let a = _mm_setr_epi16(0x80, -0x81, 0, 0, 0, 0, 0, 0);
         let b = _mm_setr_epi16(0, 0, 0, 0, 0, 0, -0x81, 0x80);
         let r = _mm_packs_epi16(a, b);
@@ -4343,7 +4346,7 @@ mod tests {
     }
 
     #[simd_test(enable = "sse2")]
-    fn test_mm_packs_epi32() {
+    const fn test_mm_packs_epi32() {
         let a = _mm_setr_epi32(0x8000, -0x8001, 0, 0);
         let b = _mm_setr_epi32(0, 0, -0x8001, 0x8000);
         let r = _mm_packs_epi32(a, b);
@@ -4354,7 +4357,7 @@ mod tests {
     }
 
     #[simd_test(enable = "sse2")]
-    fn test_mm_packus_epi16() {
+    const fn test_mm_packus_epi16() {
         let a = _mm_setr_epi16(0x100, -1, 0, 0, 0, 0, 0, 0);
         let b = _mm_setr_epi16(0, 0, 0, 0, 0, 0, -1, 0x100);
         let r = _mm_packus_epi16(a, b);

From 17a4c8ea8fa5c17fb698e6a3f3194ef1ac79c7d5 Mon Sep 17 00:00:00 2001
From: okaneco <47607823+okaneco@users.noreply.github.com>
Date: Fri, 20 Feb 2026 07:02:29 -0500
Subject: [PATCH 11/21] Add const to `sse41` intrinsics

---
 library/stdarch/crates/core_arch/src/x86/avx512bw.rs | 10 ++++++----
 library/stdarch/crates/core_arch/src/x86/sse41.rs    |  9 +++++----
 2 files changed, 11 insertions(+), 8 deletions(-)

diff --git a/library/stdarch/crates/core_arch/src/x86/avx512bw.rs b/library/stdarch/crates/core_arch/src/x86/avx512bw.rs
index 8481edcdb38d6..360b755d5818a 100644
--- a/library/stdarch/crates/core_arch/src/x86/avx512bw.rs
+++ b/library/stdarch/crates/core_arch/src/x86/avx512bw.rs
@@ -6859,7 +6859,8 @@ pub fn _mm256_maskz_packus_epi32(k: __mmask16, a: __m256i, b: __m256i) -> __m256
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpackusdw))]
-pub fn _mm_mask_packus_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mask_packus_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
     unsafe {
         let pack = _mm_packus_epi32(a, b).as_i16x8();
         transmute(simd_select_bitmask(k, pack, src.as_i16x8()))
@@ -6873,7 +6874,8 @@ pub fn _mm_mask_packus_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i)
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpackusdw))]
-pub fn _mm_maskz_packus_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_maskz_packus_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
     unsafe {
         let pack = _mm_packus_epi32(a, b).as_i16x8();
         transmute(simd_select_bitmask(k, pack, i16x8::ZERO))
@@ -18043,7 +18045,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    fn test_mm_mask_packus_epi32() {
+    const fn test_mm_mask_packus_epi32() {
         let a = _mm_set1_epi32(-1);
         let b = _mm_set1_epi32(1 << 16 | 1);
         let r = _mm_mask_packus_epi32(a, 0, a, b);
@@ -18054,7 +18056,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    fn test_mm_maskz_packus_epi32() {
+    const fn test_mm_maskz_packus_epi32() {
         let a = _mm_set1_epi32(-1);
         let b = _mm_set1_epi32(1);
         let r = _mm_maskz_packus_epi32(0, a, b);
diff --git a/library/stdarch/crates/core_arch/src/x86/sse41.rs b/library/stdarch/crates/core_arch/src/x86/sse41.rs
index 8036f24e24d37..4ebf7d3bd39a8 100644
--- a/library/stdarch/crates/core_arch/src/x86/sse41.rs
+++ b/library/stdarch/crates/core_arch/src/x86/sse41.rs
@@ -426,10 +426,11 @@ pub const fn _mm_min_epu32(a: __m128i, b: __m128i) -> __m128i {
 #[target_feature(enable = "sse4.1")]
 #[cfg_attr(test, assert_instr(packusdw))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm_packus_epi32(a: __m128i, b: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_packus_epi32(a: __m128i, b: __m128i) -> __m128i {
     unsafe {
-        let max = simd_splat(i32::from(u16::MAX));
-        let min = simd_splat(i32::from(u16::MIN));
+        let max = simd_splat(u16::MAX as i32);
+        let min = simd_splat(u16::MIN as i32);
 
         let clamped_a = simd_imax(simd_imin(a.as_i32x4(), max), min)
             .as_m128i()
@@ -1470,7 +1471,7 @@ mod tests {
     }
 
     #[simd_test(enable = "sse4.1")]
-    fn test_mm_packus_epi32() {
+    const fn test_mm_packus_epi32() {
         let a = _mm_setr_epi32(1, 2, 3, 4);
         let b = _mm_setr_epi32(-1, -2, -3, -4);
         let r = _mm_packus_epi32(a, b);

From 4f881c4b89bc86577eeb63767292bac5d5896270 Mon Sep 17 00:00:00 2001
From: okaneco <47607823+okaneco@users.noreply.github.com>
Date: Fri, 20 Feb 2026 07:10:51 -0500
Subject: [PATCH 12/21] Add const to `avx2` intrinsics

---
 .../stdarch/crates/core_arch/src/x86/avx2.rs  | 36 ++++++------
 .../crates/core_arch/src/x86/avx512bw.rs      | 55 ++++++++++++++-----
 2 files changed, 61 insertions(+), 30 deletions(-)

diff --git a/library/stdarch/crates/core_arch/src/x86/avx2.rs b/library/stdarch/crates/core_arch/src/x86/avx2.rs
index ca4ca9a2de9a4..b49ad9522a412 100644
--- a/library/stdarch/crates/core_arch/src/x86/avx2.rs
+++ b/library/stdarch/crates/core_arch/src/x86/avx2.rs
@@ -2323,10 +2323,11 @@ pub const fn _mm256_or_si256(a: __m256i, b: __m256i) -> __m256i {
 #[target_feature(enable = "avx2")]
 #[cfg_attr(test, assert_instr(vpacksswb))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm256_packs_epi16(a: __m256i, b: __m256i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_packs_epi16(a: __m256i, b: __m256i) -> __m256i {
     unsafe {
-        let max = simd_splat(i16::from(i8::MAX));
-        let min = simd_splat(i16::from(i8::MIN));
+        let max = simd_splat(i8::MAX as i16);
+        let min = simd_splat(i8::MIN as i16);
 
         let clamped_a = simd_imax(simd_imin(a.as_i16x16(), max), min)
             .as_m256i()
@@ -2356,10 +2357,11 @@ pub fn _mm256_packs_epi16(a: __m256i, b: __m256i) -> __m256i {
 #[target_feature(enable = "avx2")]
 #[cfg_attr(test, assert_instr(vpackssdw))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm256_packs_epi32(a: __m256i, b: __m256i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_packs_epi32(a: __m256i, b: __m256i) -> __m256i {
     unsafe {
-        let max = simd_splat(i32::from(i16::MAX));
-        let min = simd_splat(i32::from(i16::MIN));
+        let max = simd_splat(i16::MAX as i32);
+        let min = simd_splat(i16::MIN as i32);
 
         let clamped_a = simd_imax(simd_imin(a.as_i32x8(), max), min)
             .as_m256i()
@@ -2389,10 +2391,11 @@ pub fn _mm256_packs_epi32(a: __m256i, b: __m256i) -> __m256i {
 #[target_feature(enable = "avx2")]
 #[cfg_attr(test, assert_instr(vpackuswb))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm256_packus_epi16(a: __m256i, b: __m256i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_packus_epi16(a: __m256i, b: __m256i) -> __m256i {
     unsafe {
-        let max = simd_splat(i16::from(u8::MAX));
-        let min = simd_splat(i16::from(u8::MIN));
+        let max = simd_splat(u8::MAX as i16);
+        let min = simd_splat(u8::MIN as i16);
 
         let clamped_a = simd_imax(simd_imin(a.as_i16x16(), max), min)
             .as_m256i()
@@ -2422,10 +2425,11 @@ pub fn _mm256_packus_epi16(a: __m256i, b: __m256i) -> __m256i {
 #[target_feature(enable = "avx2")]
 #[cfg_attr(test, assert_instr(vpackusdw))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm256_packus_epi32(a: __m256i, b: __m256i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_packus_epi32(a: __m256i, b: __m256i) -> __m256i {
     unsafe {
-        let max = simd_splat(i32::from(u16::MAX));
-        let min = simd_splat(i32::from(u16::MIN));
+        let max = simd_splat(u16::MAX as i32);
+        let min = simd_splat(u16::MIN as i32);
 
         let clamped_a = simd_imax(simd_imin(a.as_i32x8(), max), min)
             .as_m256i()
@@ -5064,7 +5068,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx2")]
-    fn test_mm256_packs_epi16() {
+    const fn test_mm256_packs_epi16() {
         let a = _mm256_set1_epi16(2);
         let b = _mm256_set1_epi16(4);
         let r = _mm256_packs_epi16(a, b);
@@ -5080,7 +5084,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx2")]
-    fn test_mm256_packs_epi32() {
+    const fn test_mm256_packs_epi32() {
         let a = _mm256_set1_epi32(2);
         let b = _mm256_set1_epi32(4);
         let r = _mm256_packs_epi32(a, b);
@@ -5090,7 +5094,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx2")]
-    fn test_mm256_packus_epi16() {
+    const fn test_mm256_packus_epi16() {
         let a = _mm256_set1_epi16(2);
         let b = _mm256_set1_epi16(4);
         let r = _mm256_packus_epi16(a, b);
@@ -5106,7 +5110,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx2")]
-    fn test_mm256_packus_epi32() {
+    const fn test_mm256_packus_epi32() {
         let a = _mm256_set1_epi32(2);
         let b = _mm256_set1_epi32(4);
         let r = _mm256_packus_epi32(a, b);
diff --git a/library/stdarch/crates/core_arch/src/x86/avx512bw.rs b/library/stdarch/crates/core_arch/src/x86/avx512bw.rs
index 360b755d5818a..8c7921fc18019 100644
--- a/library/stdarch/crates/core_arch/src/x86/avx512bw.rs
+++ b/library/stdarch/crates/core_arch/src/x86/avx512bw.rs
@@ -6587,7 +6587,13 @@ pub fn _mm512_maskz_packs_epi32(k: __mmask32, a: __m512i, b: __m512i) -> __m512i
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpackssdw))]
-pub fn _mm256_mask_packs_epi32(src: __m256i, k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_mask_packs_epi32(
+    src: __m256i,
+    k: __mmask16,
+    a: __m256i,
+    b: __m256i,
+) -> __m256i {
     unsafe {
         let pack = _mm256_packs_epi32(a, b).as_i16x16();
         transmute(simd_select_bitmask(k, pack, src.as_i16x16()))
@@ -6709,7 +6715,13 @@ pub fn _mm512_maskz_packs_epi16(k: __mmask64, a: __m512i, b: __m512i) -> __m512i
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpacksswb))]
-pub fn _mm256_mask_packs_epi16(src: __m256i, k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_mask_packs_epi16(
+    src: __m256i,
+    k: __mmask32,
+    a: __m256i,
+    b: __m256i,
+) -> __m256i {
     unsafe {
         let pack = _mm256_packs_epi16(a, b).as_i8x32();
         transmute(simd_select_bitmask(k, pack, src.as_i8x32()))
@@ -6723,7 +6735,8 @@ pub fn _mm256_mask_packs_epi16(src: __m256i, k: __mmask32, a: __m256i, b: __m256
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpacksswb))]
-pub fn _mm256_maskz_packs_epi16(k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_maskz_packs_epi16(k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
     unsafe {
         let pack = _mm256_packs_epi16(a, b).as_i8x32();
         transmute(simd_select_bitmask(k, pack, i8x32::ZERO))
@@ -6831,7 +6844,13 @@ pub fn _mm512_maskz_packus_epi32(k: __mmask32, a: __m512i, b: __m512i) -> __m512
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpackusdw))]
-pub fn _mm256_mask_packus_epi32(src: __m256i, k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_mask_packus_epi32(
+    src: __m256i,
+    k: __mmask16,
+    a: __m256i,
+    b: __m256i,
+) -> __m256i {
     unsafe {
         let pack = _mm256_packus_epi32(a, b).as_i16x16();
         transmute(simd_select_bitmask(k, pack, src.as_i16x16()))
@@ -6845,7 +6864,8 @@ pub fn _mm256_mask_packus_epi32(src: __m256i, k: __mmask16, a: __m256i, b: __m25
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpackusdw))]
-pub fn _mm256_maskz_packus_epi32(k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_maskz_packus_epi32(k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
     unsafe {
         let pack = _mm256_packus_epi32(a, b).as_i16x16();
         transmute(simd_select_bitmask(k, pack, i16x16::ZERO))
@@ -6953,7 +6973,13 @@ pub fn _mm512_maskz_packus_epi16(k: __mmask64, a: __m512i, b: __m512i) -> __m512
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpackuswb))]
-pub fn _mm256_mask_packus_epi16(src: __m256i, k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_mask_packus_epi16(
+    src: __m256i,
+    k: __mmask32,
+    a: __m256i,
+    b: __m256i,
+) -> __m256i {
     unsafe {
         let pack = _mm256_packus_epi16(a, b).as_i8x32();
         transmute(simd_select_bitmask(k, pack, src.as_i8x32()))
@@ -6967,7 +6993,8 @@ pub fn _mm256_mask_packus_epi16(src: __m256i, k: __mmask32, a: __m256i, b: __m25
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpackuswb))]
-pub fn _mm256_maskz_packus_epi16(k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_maskz_packus_epi16(k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
     unsafe {
         let pack = _mm256_packus_epi16(a, b).as_i8x32();
         transmute(simd_select_bitmask(k, pack, i8x32::ZERO))
@@ -17838,7 +17865,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    fn test_mm256_mask_packs_epi32() {
+    const fn test_mm256_mask_packs_epi32() {
         let a = _mm256_set1_epi32(i32::MAX);
         let b = _mm256_set1_epi32(1 << 16 | 1);
         let r = _mm256_mask_packs_epi32(a, 0, a, b);
@@ -17936,7 +17963,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    fn test_mm256_mask_packs_epi16() {
+    const fn test_mm256_mask_packs_epi16() {
         let a = _mm256_set1_epi16(i16::MAX);
         let b = _mm256_set1_epi16(1 << 8 | 1);
         let r = _mm256_mask_packs_epi16(a, 0, a, b);
@@ -17949,7 +17976,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    fn test_mm256_maskz_packs_epi16() {
+    const fn test_mm256_maskz_packs_epi16() {
         let a = _mm256_set1_epi16(i16::MAX);
         let b = _mm256_set1_epi16(1);
         let r = _mm256_maskz_packs_epi16(0, a, b);
@@ -18023,7 +18050,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    fn test_mm256_mask_packus_epi32() {
+    const fn test_mm256_mask_packus_epi32() {
         let a = _mm256_set1_epi32(-1);
         let b = _mm256_set1_epi32(1 << 16 | 1);
         let r = _mm256_mask_packus_epi32(a, 0, a, b);
@@ -18034,7 +18061,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    fn test_mm256_maskz_packus_epi32() {
+    const fn test_mm256_maskz_packus_epi32() {
         let a = _mm256_set1_epi32(-1);
         let b = _mm256_set1_epi32(1);
         let r = _mm256_maskz_packus_epi32(0, a, b);
@@ -18119,7 +18146,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    fn test_mm256_mask_packus_epi16() {
+    const fn test_mm256_mask_packus_epi16() {
         let a = _mm256_set1_epi16(-1);
         let b = _mm256_set1_epi16(1 << 8 | 1);
         let r = _mm256_mask_packus_epi16(a, 0, a, b);
@@ -18132,7 +18159,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    fn test_mm256_maskz_packus_epi16() {
+    const fn test_mm256_maskz_packus_epi16() {
         let a = _mm256_set1_epi16(-1);
         let b = _mm256_set1_epi16(1);
         let r = _mm256_maskz_packus_epi16(0, a, b);

From 37bd99e2e0992495589c330d7478ec00f2dd8b85 Mon Sep 17 00:00:00 2001
From: okaneco <47607823+okaneco@users.noreply.github.com>
Date: Fri, 20 Feb 2026 07:30:10 -0500
Subject: [PATCH 13/21] Add const to `avx512bw` intrinsics

---
 .../crates/core_arch/src/x86/avx512bw.rs      | 96 ++++++++++++-------
 1 file changed, 64 insertions(+), 32 deletions(-)

diff --git a/library/stdarch/crates/core_arch/src/x86/avx512bw.rs b/library/stdarch/crates/core_arch/src/x86/avx512bw.rs
index 8c7921fc18019..b41f8576cfe54 100644
--- a/library/stdarch/crates/core_arch/src/x86/avx512bw.rs
+++ b/library/stdarch/crates/core_arch/src/x86/avx512bw.rs
@@ -6523,10 +6523,11 @@ pub fn _mm_maskz_maddubs_epi16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
 #[target_feature(enable = "avx512bw")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpackssdw))]
-pub fn _mm512_packs_epi32(a: __m512i, b: __m512i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_packs_epi32(a: __m512i, b: __m512i) -> __m512i {
     unsafe {
-        let max = simd_splat(i32::from(i16::MAX));
-        let min = simd_splat(i32::from(i16::MIN));
+        let max = simd_splat(i16::MAX as i32);
+        let min = simd_splat(i16::MIN as i32);
 
         let clamped_a = simd_imax(simd_imin(a.as_i32x16(), max), min)
             .as_m512i()
@@ -6559,7 +6560,13 @@ pub fn _mm512_packs_epi32(a: __m512i, b: __m512i) -> __m512i {
 #[target_feature(enable = "avx512bw")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpackssdw))]
-pub fn _mm512_mask_packs_epi32(src: __m512i, k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask_packs_epi32(
+    src: __m512i,
+    k: __mmask32,
+    a: __m512i,
+    b: __m512i,
+) -> __m512i {
     unsafe {
         let pack = _mm512_packs_epi32(a, b).as_i16x32();
         transmute(simd_select_bitmask(k, pack, src.as_i16x32()))
@@ -6573,7 +6580,8 @@ pub fn _mm512_mask_packs_epi32(src: __m512i, k: __mmask32, a: __m512i, b: __m512
 #[target_feature(enable = "avx512bw")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpackssdw))]
-pub fn _mm512_maskz_packs_epi32(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_maskz_packs_epi32(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
     unsafe {
         let pack = _mm512_packs_epi32(a, b).as_i16x32();
         transmute(simd_select_bitmask(k, pack, i16x32::ZERO))
@@ -6651,10 +6659,11 @@ pub const fn _mm_maskz_packs_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m12
 #[target_feature(enable = "avx512bw")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpacksswb))]
-pub fn _mm512_packs_epi16(a: __m512i, b: __m512i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_packs_epi16(a: __m512i, b: __m512i) -> __m512i {
     unsafe {
-        let max = simd_splat(i16::from(i8::MAX));
-        let min = simd_splat(i16::from(i8::MIN));
+        let max = simd_splat(i8::MAX as i16);
+        let min = simd_splat(i8::MIN as i16);
 
         let clamped_a = simd_imax(simd_imin(a.as_i16x32(), max), min)
             .as_m512i()
@@ -6687,7 +6696,13 @@ pub fn _mm512_packs_epi16(a: __m512i, b: __m512i) -> __m512i {
 #[target_feature(enable = "avx512bw")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpacksswb))]
-pub fn _mm512_mask_packs_epi16(src: __m512i, k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask_packs_epi16(
+    src: __m512i,
+    k: __mmask64,
+    a: __m512i,
+    b: __m512i,
+) -> __m512i {
     unsafe {
         let pack = _mm512_packs_epi16(a, b).as_i8x64();
         transmute(simd_select_bitmask(k, pack, src.as_i8x64()))
@@ -6701,7 +6716,8 @@ pub fn _mm512_mask_packs_epi16(src: __m512i, k: __mmask64, a: __m512i, b: __m512
 #[target_feature(enable = "avx512bw")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpacksswb))]
-pub fn _mm512_maskz_packs_epi16(k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_maskz_packs_epi16(k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
     unsafe {
         let pack = _mm512_packs_epi16(a, b).as_i8x64();
         transmute(simd_select_bitmask(k, pack, i8x64::ZERO))
@@ -6780,10 +6796,11 @@ pub const fn _mm_maskz_packs_epi16(k: __mmask16, a: __m128i, b: __m128i) -> __m1
 #[target_feature(enable = "avx512bw")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpackusdw))]
-pub fn _mm512_packus_epi32(a: __m512i, b: __m512i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_packus_epi32(a: __m512i, b: __m512i) -> __m512i {
     unsafe {
-        let max = simd_splat(i32::from(u16::MAX));
-        let min = simd_splat(i32::from(u16::MIN));
+        let max = simd_splat(u16::MAX as i32);
+        let min = simd_splat(u16::MIN as i32);
 
         let clamped_a = simd_imax(simd_imin(a.as_i32x16(), max), min)
             .as_m512i()
@@ -6816,7 +6833,13 @@ pub fn _mm512_packus_epi32(a: __m512i, b: __m512i) -> __m512i {
 #[target_feature(enable = "avx512bw")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpackusdw))]
-pub fn _mm512_mask_packus_epi32(src: __m512i, k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask_packus_epi32(
+    src: __m512i,
+    k: __mmask32,
+    a: __m512i,
+    b: __m512i,
+) -> __m512i {
     unsafe {
         let pack = _mm512_packus_epi32(a, b).as_i16x32();
         transmute(simd_select_bitmask(k, pack, src.as_i16x32()))
@@ -6830,7 +6853,8 @@ pub fn _mm512_mask_packus_epi32(src: __m512i, k: __mmask32, a: __m512i, b: __m51
 #[target_feature(enable = "avx512bw")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpackusdw))]
-pub fn _mm512_maskz_packus_epi32(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_maskz_packus_epi32(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
     unsafe {
         let pack = _mm512_packus_epi32(a, b).as_i16x32();
         transmute(simd_select_bitmask(k, pack, i16x32::ZERO))
@@ -6909,10 +6933,11 @@ pub const fn _mm_maskz_packus_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m1
 #[target_feature(enable = "avx512bw")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpackuswb))]
-pub fn _mm512_packus_epi16(a: __m512i, b: __m512i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_packus_epi16(a: __m512i, b: __m512i) -> __m512i {
     unsafe {
-        let max = simd_splat(i16::from(u8::MAX));
-        let min = simd_splat(i16::from(u8::MIN));
+        let max = simd_splat(u8::MAX as i16);
+        let min = simd_splat(u8::MIN as i16);
 
         let clamped_a = simd_imax(simd_imin(a.as_i16x32(), max), min)
             .as_m512i()
@@ -6945,7 +6970,13 @@ pub fn _mm512_packus_epi16(a: __m512i, b: __m512i) -> __m512i {
 #[target_feature(enable = "avx512bw")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpackuswb))]
-pub fn _mm512_mask_packus_epi16(src: __m512i, k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask_packus_epi16(
+    src: __m512i,
+    k: __mmask64,
+    a: __m512i,
+    b: __m512i,
+) -> __m512i {
     unsafe {
         let pack = _mm512_packus_epi16(a, b).as_i8x64();
         transmute(simd_select_bitmask(k, pack, src.as_i8x64()))
@@ -6959,7 +6990,8 @@ pub fn _mm512_mask_packus_epi16(src: __m512i, k: __mmask64, a: __m512i, b: __m51
 #[target_feature(enable = "avx512bw")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpackuswb))]
-pub fn _mm512_maskz_packus_epi16(k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_maskz_packus_epi16(k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
     unsafe {
         let pack = _mm512_packus_epi16(a, b).as_i8x64();
         transmute(simd_select_bitmask(k, pack, i8x64::ZERO))
@@ -17828,7 +17860,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw")]
-    fn test_mm512_packs_epi32() {
+    const fn test_mm512_packs_epi32() {
         let a = _mm512_set1_epi32(i32::MAX);
         let b = _mm512_set1_epi32(1);
         let r = _mm512_packs_epi32(a, b);
@@ -17839,7 +17871,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw")]
-    fn test_mm512_mask_packs_epi32() {
+    const fn test_mm512_mask_packs_epi32() {
         let a = _mm512_set1_epi32(i32::MAX);
         let b = _mm512_set1_epi32(1 << 16 | 1);
         let r = _mm512_mask_packs_epi32(a, 0, a, b);
@@ -17852,7 +17884,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw")]
-    fn test_mm512_maskz_packs_epi32() {
+    const fn test_mm512_maskz_packs_epi32() {
         let a = _mm512_set1_epi32(i32::MAX);
         let b = _mm512_set1_epi32(1);
         let r = _mm512_maskz_packs_epi32(0, a, b);
@@ -17911,7 +17943,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw")]
-    fn test_mm512_packs_epi16() {
+    const fn test_mm512_packs_epi16() {
         let a = _mm512_set1_epi16(i16::MAX);
         let b = _mm512_set1_epi16(1);
         let r = _mm512_packs_epi16(a, b);
@@ -17924,7 +17956,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw")]
-    fn test_mm512_mask_packs_epi16() {
+    const fn test_mm512_mask_packs_epi16() {
         let a = _mm512_set1_epi16(i16::MAX);
         let b = _mm512_set1_epi16(1 << 8 | 1);
         let r = _mm512_mask_packs_epi16(a, 0, a, b);
@@ -17944,7 +17976,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw")]
-    fn test_mm512_maskz_packs_epi16() {
+    const fn test_mm512_maskz_packs_epi16() {
         let a = _mm512_set1_epi16(i16::MAX);
         let b = _mm512_set1_epi16(1);
         let r = _mm512_maskz_packs_epi16(0, a, b);
@@ -18013,7 +18045,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw")]
-    fn test_mm512_packus_epi32() {
+    const fn test_mm512_packus_epi32() {
         let a = _mm512_set1_epi32(-1);
         let b = _mm512_set1_epi32(1);
         let r = _mm512_packus_epi32(a, b);
@@ -18024,7 +18056,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw")]
-    fn test_mm512_mask_packus_epi32() {
+    const fn test_mm512_mask_packus_epi32() {
         let a = _mm512_set1_epi32(-1);
         let b = _mm512_set1_epi32(1 << 16 | 1);
         let r = _mm512_mask_packus_epi32(a, 0, a, b);
@@ -18037,7 +18069,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw")]
-    fn test_mm512_maskz_packus_epi32() {
+    const fn test_mm512_maskz_packus_epi32() {
         let a = _mm512_set1_epi32(-1);
         let b = _mm512_set1_epi32(1);
         let r = _mm512_maskz_packus_epi32(0, a, b);
@@ -18094,7 +18126,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw")]
-    fn test_mm512_packus_epi16() {
+    const fn test_mm512_packus_epi16() {
         let a = _mm512_set1_epi16(-1);
         let b = _mm512_set1_epi16(1);
         let r = _mm512_packus_epi16(a, b);
@@ -18107,7 +18139,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw")]
-    fn test_mm512_mask_packus_epi16() {
+    const fn test_mm512_mask_packus_epi16() {
         let a = _mm512_set1_epi16(-1);
         let b = _mm512_set1_epi16(1 << 8 | 1);
         let r = _mm512_mask_packus_epi16(a, 0, a, b);
@@ -18127,7 +18159,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw")]
-    fn test_mm512_maskz_packus_epi16() {
+    const fn test_mm512_maskz_packus_epi16() {
         let a = _mm512_set1_epi16(-1);
         let b = _mm512_set1_epi16(1);
         let r = _mm512_maskz_packus_epi16(0, a, b);

From 47ef7f73de0547680678ef1184ed1f6889e69614 Mon Sep 17 00:00:00 2001
From: Folkert de Vries <folkert@folkertdev.nl>
Date: Mon, 23 Feb 2026 11:58:18 +0100
Subject: [PATCH 14/21] aarch64: cleanup of some long array literals

---
 .../core_arch/src/aarch64/neon/generated.rs   | 539 ++----------------
 .../spec/neon/aarch64.spec.yml                | 108 ++--
 2 files changed, 108 insertions(+), 539 deletions(-)

diff --git a/library/stdarch/crates/core_arch/src/aarch64/neon/generated.rs b/library/stdarch/crates/core_arch/src/aarch64/neon/generated.rs
index 41f01d445fc71..de64839661d6e 100644
--- a/library/stdarch/crates/core_arch/src/aarch64/neon/generated.rs
+++ b/library/stdarch/crates/core_arch/src/aarch64/neon/generated.rs
@@ -14131,26 +14131,7 @@ pub fn vmlaq_f64(a: float64x2_t, b: float64x2_t, c: float64x2_t) -> float64x2_t
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vmlal_high_lane_s16<const LANE: i32>(a: int32x4_t, b: int16x8_t, c: int16x4_t) -> int32x4_t {
     static_assert_uimm_bits!(LANE, 2);
-    unsafe {
-        vmlal_high_s16(
-            a,
-            b,
-            simd_shuffle!(
-                c,
-                c,
-                [
-                    LANE as u32,
-                    LANE as u32,
-                    LANE as u32,
-                    LANE as u32,
-                    LANE as u32,
-                    LANE as u32,
-                    LANE as u32,
-                    LANE as u32
-                ]
-            ),
-        )
-    }
+    unsafe { vmlal_high_s16(a, b, simd_shuffle!(c, c, [LANE as u32; 8])) }
 }
 #[doc = "Multiply-add long"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_high_laneq_s16)"]
@@ -14165,26 +14146,7 @@ pub fn vmlal_high_laneq_s16<const LANE: i32>(
     c: int16x8_t,
 ) -> int32x4_t {
     static_assert_uimm_bits!(LANE, 3);
-    unsafe {
-        vmlal_high_s16(
-            a,
-            b,
-            simd_shuffle!(
-                c,
-                c,
-                [
-                    LANE as u32,
-                    LANE as u32,
-                    LANE as u32,
-                    LANE as u32,
-                    LANE as u32,
-                    LANE as u32,
-                    LANE as u32,
-                    LANE as u32
-                ]
-            ),
-        )
-    }
+    unsafe { vmlal_high_s16(a, b, simd_shuffle!(c, c, [LANE as u32; 8])) }
 }
 #[doc = "Multiply-add long"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_high_lane_s32)"]
@@ -14195,13 +14157,7 @@ pub fn vmlal_high_laneq_s16<const LANE: i32>(
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vmlal_high_lane_s32<const LANE: i32>(a: int64x2_t, b: int32x4_t, c: int32x2_t) -> int64x2_t {
     static_assert_uimm_bits!(LANE, 1);
-    unsafe {
-        vmlal_high_s32(
-            a,
-            b,
-            simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]),
-        )
-    }
+    unsafe { vmlal_high_s32(a, b, simd_shuffle!(c, c, [LANE as u32; 4])) }
 }
 #[doc = "Multiply-add long"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_high_laneq_s32)"]
@@ -14216,13 +14172,7 @@ pub fn vmlal_high_laneq_s32<const LANE: i32>(
     c: int32x4_t,
 ) -> int64x2_t {
     static_assert_uimm_bits!(LANE, 2);
-    unsafe {
-        vmlal_high_s32(
-            a,
-            b,
-            simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]),
-        )
-    }
+    unsafe { vmlal_high_s32(a, b, simd_shuffle!(c, c, [LANE as u32; 4])) }
 }
 #[doc = "Multiply-add long"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_high_lane_u16)"]
@@ -14237,26 +14187,7 @@ pub fn vmlal_high_lane_u16<const LANE: i32>(
     c: uint16x4_t,
 ) -> uint32x4_t {
     static_assert_uimm_bits!(LANE, 2);
-    unsafe {
-        vmlal_high_u16(
-            a,
-            b,
-            simd_shuffle!(
-                c,
-                c,
-                [
-                    LANE as u32,
-                    LANE as u32,
-                    LANE as u32,
-                    LANE as u32,
-                    LANE as u32,
-                    LANE as u32,
-                    LANE as u32,
-                    LANE as u32
-                ]
-            ),
-        )
-    }
+    unsafe { vmlal_high_u16(a, b, simd_shuffle!(c, c, [LANE as u32; 8])) }
 }
 #[doc = "Multiply-add long"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_high_laneq_u16)"]
@@ -14271,26 +14202,7 @@ pub fn vmlal_high_laneq_u16<const LANE: i32>(
     c: uint16x8_t,
 ) -> uint32x4_t {
     static_assert_uimm_bits!(LANE, 3);
-    unsafe {
-        vmlal_high_u16(
-            a,
-            b,
-            simd_shuffle!(
-                c,
-                c,
-                [
-                    LANE as u32,
-                    LANE as u32,
-                    LANE as u32,
-                    LANE as u32,
-                    LANE as u32,
-                    LANE as u32,
-                    LANE as u32,
-                    LANE as u32
-                ]
-            ),
-        )
-    }
+    unsafe { vmlal_high_u16(a, b, simd_shuffle!(c, c, [LANE as u32; 8])) }
 }
 #[doc = "Multiply-add long"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_high_lane_u32)"]
@@ -14305,13 +14217,7 @@ pub fn vmlal_high_lane_u32<const LANE: i32>(
     c: uint32x2_t,
 ) -> uint64x2_t {
     static_assert_uimm_bits!(LANE, 1);
-    unsafe {
-        vmlal_high_u32(
-            a,
-            b,
-            simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]),
-        )
-    }
+    unsafe { vmlal_high_u32(a, b, simd_shuffle!(c, c, [LANE as u32; 4])) }
 }
 #[doc = "Multiply-add long"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_high_laneq_u32)"]
@@ -14326,13 +14232,7 @@ pub fn vmlal_high_laneq_u32<const LANE: i32>(
     c: uint32x4_t,
 ) -> uint64x2_t {
     static_assert_uimm_bits!(LANE, 2);
-    unsafe {
-        vmlal_high_u32(
-            a,
-            b,
-            simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]),
-        )
-    }
+    unsafe { vmlal_high_u32(a, b, simd_shuffle!(c, c, [LANE as u32; 4])) }
 }
 #[doc = "Multiply-add long"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_high_n_s16)"]
@@ -14475,26 +14375,7 @@ pub fn vmlsq_f64(a: float64x2_t, b: float64x2_t, c: float64x2_t) -> float64x2_t
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vmlsl_high_lane_s16<const LANE: i32>(a: int32x4_t, b: int16x8_t, c: int16x4_t) -> int32x4_t {
     static_assert_uimm_bits!(LANE, 2);
-    unsafe {
-        vmlsl_high_s16(
-            a,
-            b,
-            simd_shuffle!(
-                c,
-                c,
-                [
-                    LANE as u32,
-                    LANE as u32,
-                    LANE as u32,
-                    LANE as u32,
-                    LANE as u32,
-                    LANE as u32,
-                    LANE as u32,
-                    LANE as u32
-                ]
-            ),
-        )
-    }
+    unsafe { vmlsl_high_s16(a, b, simd_shuffle!(c, c, [LANE as u32; 8])) }
 }
 #[doc = "Multiply-subtract long"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_high_laneq_s16)"]
@@ -14509,26 +14390,7 @@ pub fn vmlsl_high_laneq_s16<const LANE: i32>(
     c: int16x8_t,
 ) -> int32x4_t {
     static_assert_uimm_bits!(LANE, 3);
-    unsafe {
-        vmlsl_high_s16(
-            a,
-            b,
-            simd_shuffle!(
-                c,
-                c,
-                [
-                    LANE as u32,
-                    LANE as u32,
-                    LANE as u32,
-                    LANE as u32,
-                    LANE as u32,
-                    LANE as u32,
-                    LANE as u32,
-                    LANE as u32
-                ]
-            ),
-        )
-    }
+    unsafe { vmlsl_high_s16(a, b, simd_shuffle!(c, c, [LANE as u32; 8])) }
 }
 #[doc = "Multiply-subtract long"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_high_lane_s32)"]
@@ -14539,13 +14401,7 @@ pub fn vmlsl_high_laneq_s16<const LANE: i32>(
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vmlsl_high_lane_s32<const LANE: i32>(a: int64x2_t, b: int32x4_t, c: int32x2_t) -> int64x2_t {
     static_assert_uimm_bits!(LANE, 1);
-    unsafe {
-        vmlsl_high_s32(
-            a,
-            b,
-            simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]),
-        )
-    }
+    unsafe { vmlsl_high_s32(a, b, simd_shuffle!(c, c, [LANE as u32; 4])) }
 }
 #[doc = "Multiply-subtract long"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_high_laneq_s32)"]
@@ -14560,13 +14416,7 @@ pub fn vmlsl_high_laneq_s32<const LANE: i32>(
     c: int32x4_t,
 ) -> int64x2_t {
     static_assert_uimm_bits!(LANE, 2);
-    unsafe {
-        vmlsl_high_s32(
-            a,
-            b,
-            simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]),
-        )
-    }
+    unsafe { vmlsl_high_s32(a, b, simd_shuffle!(c, c, [LANE as u32; 4])) }
 }
 #[doc = "Multiply-subtract long"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_high_lane_u16)"]
@@ -14581,26 +14431,7 @@ pub fn vmlsl_high_lane_u16<const LANE: i32>(
     c: uint16x4_t,
 ) -> uint32x4_t {
     static_assert_uimm_bits!(LANE, 2);
-    unsafe {
-        vmlsl_high_u16(
-            a,
-            b,
-            simd_shuffle!(
-                c,
-                c,
-                [
-                    LANE as u32,
-                    LANE as u32,
-                    LANE as u32,
-                    LANE as u32,
-                    LANE as u32,
-                    LANE as u32,
-                    LANE as u32,
-                    LANE as u32
-                ]
-            ),
-        )
-    }
+    unsafe { vmlsl_high_u16(a, b, simd_shuffle!(c, c, [LANE as u32; 8])) }
 }
 #[doc = "Multiply-subtract long"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_high_laneq_u16)"]
@@ -14615,26 +14446,7 @@ pub fn vmlsl_high_laneq_u16<const LANE: i32>(
     c: uint16x8_t,
 ) -> uint32x4_t {
     static_assert_uimm_bits!(LANE, 3);
-    unsafe {
-        vmlsl_high_u16(
-            a,
-            b,
-            simd_shuffle!(
-                c,
-                c,
-                [
-                    LANE as u32,
-                    LANE as u32,
-                    LANE as u32,
-                    LANE as u32,
-                    LANE as u32,
-                    LANE as u32,
-                    LANE as u32,
-                    LANE as u32
-                ]
-            ),
-        )
-    }
+    unsafe { vmlsl_high_u16(a, b, simd_shuffle!(c, c, [LANE as u32; 8])) }
 }
 #[doc = "Multiply-subtract long"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_high_lane_u32)"]
@@ -14649,13 +14461,7 @@ pub fn vmlsl_high_lane_u32<const LANE: i32>(
     c: uint32x2_t,
 ) -> uint64x2_t {
     static_assert_uimm_bits!(LANE, 1);
-    unsafe {
-        vmlsl_high_u32(
-            a,
-            b,
-            simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]),
-        )
-    }
+    unsafe { vmlsl_high_u32(a, b, simd_shuffle!(c, c, [LANE as u32; 4])) }
 }
 #[doc = "Multiply-subtract long"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_high_laneq_u32)"]
@@ -14670,13 +14476,7 @@ pub fn vmlsl_high_laneq_u32<const LANE: i32>(
     c: uint32x4_t,
 ) -> uint64x2_t {
     static_assert_uimm_bits!(LANE, 2);
-    unsafe {
-        vmlsl_high_u32(
-            a,
-            b,
-            simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]),
-        )
-    }
+    unsafe { vmlsl_high_u32(a, b, simd_shuffle!(c, c, [LANE as u32; 4])) }
 }
 #[doc = "Multiply-subtract long"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_high_n_s16)"]
@@ -14975,12 +14775,7 @@ pub fn vmul_lane_f64<const LANE: i32>(a: float64x1_t, b: float64x1_t) -> float64
 #[cfg(not(target_arch = "arm64ec"))]
 pub fn vmul_laneq_f16<const LANE: i32>(a: float16x4_t, b: float16x8_t) -> float16x4_t {
     static_assert_uimm_bits!(LANE, 3);
-    unsafe {
-        simd_mul(
-            a,
-            simd_shuffle!(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]),
-        )
-    }
+    unsafe { simd_mul(a, simd_shuffle!(b, b, [LANE as u32; 4])) }
 }
 #[doc = "Floating-point multiply"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_laneq_f16)"]
@@ -14992,25 +14787,7 @@ pub fn vmul_laneq_f16<const LANE: i32>(a: float16x4_t, b: float16x8_t) -> float1
 #[cfg(not(target_arch = "arm64ec"))]
 pub fn vmulq_laneq_f16<const LANE: i32>(a: float16x8_t, b: float16x8_t) -> float16x8_t {
     static_assert_uimm_bits!(LANE, 3);
-    unsafe {
-        simd_mul(
-            a,
-            simd_shuffle!(
-                b,
-                b,
-                [
-                    LANE as u32,
-                    LANE as u32,
-                    LANE as u32,
-                    LANE as u32,
-                    LANE as u32,
-                    LANE as u32,
-                    LANE as u32,
-                    LANE as u32
-                ]
-            ),
-        )
-    }
+    unsafe { simd_mul(a, simd_shuffle!(b, b, [LANE as u32; 8])) }
 }
 #[doc = "Floating-point multiply"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_laneq_f64)"]
@@ -15104,25 +14881,7 @@ pub fn vmulh_laneq_f16<const LANE: i32>(a: f16, b: float16x8_t) -> f16 {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vmull_high_lane_s16<const LANE: i32>(a: int16x8_t, b: int16x4_t) -> int32x4_t {
     static_assert_uimm_bits!(LANE, 2);
-    unsafe {
-        vmull_high_s16(
-            a,
-            simd_shuffle!(
-                b,
-                b,
-                [
-                    LANE as u32,
-                    LANE as u32,
-                    LANE as u32,
-                    LANE as u32,
-                    LANE as u32,
-                    LANE as u32,
-                    LANE as u32,
-                    LANE as u32
-                ]
-            ),
-        )
-    }
+    unsafe { vmull_high_s16(a, simd_shuffle!(b, b, [LANE as u32; 8])) }
 }
 #[doc = "Multiply long"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_high_laneq_s16)"]
@@ -15133,25 +14892,7 @@ pub fn vmull_high_lane_s16<const LANE: i32>(a: int16x8_t, b: int16x4_t) -> int32
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vmull_high_laneq_s16<const LANE: i32>(a: int16x8_t, b: int16x8_t) -> int32x4_t {
     static_assert_uimm_bits!(LANE, 3);
-    unsafe {
-        vmull_high_s16(
-            a,
-            simd_shuffle!(
-                b,
-                b,
-                [
-                    LANE as u32,
-                    LANE as u32,
-                    LANE as u32,
-                    LANE as u32,
-                    LANE as u32,
-                    LANE as u32,
-                    LANE as u32,
-                    LANE as u32
-                ]
-            ),
-        )
-    }
+    unsafe { vmull_high_s16(a, simd_shuffle!(b, b, [LANE as u32; 8])) }
 }
 #[doc = "Multiply long"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_high_lane_s32)"]
@@ -15162,12 +14903,7 @@ pub fn vmull_high_laneq_s16<const LANE: i32>(a: int16x8_t, b: int16x8_t) -> int3
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vmull_high_lane_s32<const LANE: i32>(a: int32x4_t, b: int32x2_t) -> int64x2_t {
     static_assert_uimm_bits!(LANE, 1);
-    unsafe {
-        vmull_high_s32(
-            a,
-            simd_shuffle!(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]),
-        )
-    }
+    unsafe { vmull_high_s32(a, simd_shuffle!(b, b, [LANE as u32; 4])) }
 }
 #[doc = "Multiply long"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_high_laneq_s32)"]
@@ -15178,12 +14914,7 @@ pub fn vmull_high_lane_s32<const LANE: i32>(a: int32x4_t, b: int32x2_t) -> int64
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vmull_high_laneq_s32<const LANE: i32>(a: int32x4_t, b: int32x4_t) -> int64x2_t {
     static_assert_uimm_bits!(LANE, 2);
-    unsafe {
-        vmull_high_s32(
-            a,
-            simd_shuffle!(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]),
-        )
-    }
+    unsafe { vmull_high_s32(a, simd_shuffle!(b, b, [LANE as u32; 4])) }
 }
 #[doc = "Multiply long"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_high_lane_u16)"]
@@ -15194,25 +14925,7 @@ pub fn vmull_high_laneq_s32<const LANE: i32>(a: int32x4_t, b: int32x4_t) -> int6
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vmull_high_lane_u16<const LANE: i32>(a: uint16x8_t, b: uint16x4_t) -> uint32x4_t {
     static_assert_uimm_bits!(LANE, 2);
-    unsafe {
-        vmull_high_u16(
-            a,
-            simd_shuffle!(
-                b,
-                b,
-                [
-                    LANE as u32,
-                    LANE as u32,
-                    LANE as u32,
-                    LANE as u32,
-                    LANE as u32,
-                    LANE as u32,
-                    LANE as u32,
-                    LANE as u32
-                ]
-            ),
-        )
-    }
+    unsafe { vmull_high_u16(a, simd_shuffle!(b, b, [LANE as u32; 8])) }
 }
 #[doc = "Multiply long"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_high_laneq_u16)"]
@@ -15223,25 +14936,7 @@ pub fn vmull_high_lane_u16<const LANE: i32>(a: uint16x8_t, b: uint16x4_t) -> uin
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vmull_high_laneq_u16<const LANE: i32>(a: uint16x8_t, b: uint16x8_t) -> uint32x4_t {
     static_assert_uimm_bits!(LANE, 3);
-    unsafe {
-        vmull_high_u16(
-            a,
-            simd_shuffle!(
-                b,
-                b,
-                [
-                    LANE as u32,
-                    LANE as u32,
-                    LANE as u32,
-                    LANE as u32,
-                    LANE as u32,
-                    LANE as u32,
-                    LANE as u32,
-                    LANE as u32
-                ]
-            ),
-        )
-    }
+    unsafe { vmull_high_u16(a, simd_shuffle!(b, b, [LANE as u32; 8])) }
 }
 #[doc = "Multiply long"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_high_lane_u32)"]
@@ -15252,12 +14947,7 @@ pub fn vmull_high_laneq_u16<const LANE: i32>(a: uint16x8_t, b: uint16x8_t) -> ui
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vmull_high_lane_u32<const LANE: i32>(a: uint32x4_t, b: uint32x2_t) -> uint64x2_t {
     static_assert_uimm_bits!(LANE, 1);
-    unsafe {
-        vmull_high_u32(
-            a,
-            simd_shuffle!(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]),
-        )
-    }
+    unsafe { vmull_high_u32(a, simd_shuffle!(b, b, [LANE as u32; 4])) }
 }
 #[doc = "Multiply long"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_high_laneq_u32)"]
@@ -15268,12 +14958,7 @@ pub fn vmull_high_lane_u32<const LANE: i32>(a: uint32x4_t, b: uint32x2_t) -> uin
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vmull_high_laneq_u32<const LANE: i32>(a: uint32x4_t, b: uint32x4_t) -> uint64x2_t {
     static_assert_uimm_bits!(LANE, 2);
-    unsafe {
-        vmull_high_u32(
-            a,
-            simd_shuffle!(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]),
-        )
-    }
+    unsafe { vmull_high_u32(a, simd_shuffle!(b, b, [LANE as u32; 4])) }
 }
 #[doc = "Multiply long"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_high_n_s16)"]
@@ -15436,7 +15121,7 @@ pub fn vmull_p64(a: p64, b: p64) -> p128 {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vmulq_lane_f64<const LANE: i32>(a: float64x2_t, b: float64x1_t) -> float64x2_t {
     static_assert!(LANE == 0);
-    unsafe { simd_mul(a, simd_shuffle!(b, b, [LANE as u32, LANE as u32])) }
+    unsafe { simd_mul(a, simd_shuffle!(b, b, [LANE as u32; 2])) }
 }
 #[doc = "Floating-point multiply"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_laneq_f64)"]
@@ -15447,7 +15132,7 @@ pub fn vmulq_lane_f64<const LANE: i32>(a: float64x2_t, b: float64x1_t) -> float6
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vmulq_laneq_f64<const LANE: i32>(a: float64x2_t, b: float64x2_t) -> float64x2_t {
     static_assert_uimm_bits!(LANE, 1);
-    unsafe { simd_mul(a, simd_shuffle!(b, b, [LANE as u32, LANE as u32])) }
+    unsafe { simd_mul(a, simd_shuffle!(b, b, [LANE as u32; 2])) }
 }
 #[doc = "Floating-point multiply"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmuls_lane_f32)"]
@@ -15599,12 +15284,7 @@ pub fn vmulxq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t {
 #[cfg(not(target_arch = "arm64ec"))]
 pub fn vmulx_lane_f16<const LANE: i32>(a: float16x4_t, b: float16x4_t) -> float16x4_t {
     static_assert_uimm_bits!(LANE, 2);
-    unsafe {
-        vmulx_f16(
-            a,
-            simd_shuffle!(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]),
-        )
-    }
+    unsafe { vmulx_f16(a, simd_shuffle!(b, b, [LANE as u32; 4])) }
 }
 #[doc = "Floating-point multiply extended"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulx_laneq_f16)"]
@@ -15616,12 +15296,7 @@ pub fn vmulx_lane_f16<const LANE: i32>(a: float16x4_t, b: float16x4_t) -> float1
 #[cfg(not(target_arch = "arm64ec"))]
 pub fn vmulx_laneq_f16<const LANE: i32>(a: float16x4_t, b: float16x8_t) -> float16x4_t {
     static_assert_uimm_bits!(LANE, 3);
-    unsafe {
-        vmulx_f16(
-            a,
-            simd_shuffle!(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]),
-        )
-    }
+    unsafe { vmulx_f16(a, simd_shuffle!(b, b, [LANE as u32; 4])) }
 }
 #[doc = "Floating-point multiply extended"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulxq_lane_f16)"]
@@ -15633,25 +15308,7 @@ pub fn vmulx_laneq_f16<const LANE: i32>(a: float16x4_t, b: float16x8_t) -> float
 #[cfg(not(target_arch = "arm64ec"))]
 pub fn vmulxq_lane_f16<const LANE: i32>(a: float16x8_t, b: float16x4_t) -> float16x8_t {
     static_assert_uimm_bits!(LANE, 2);
-    unsafe {
-        vmulxq_f16(
-            a,
-            simd_shuffle!(
-                b,
-                b,
-                [
-                    LANE as u32,
-                    LANE as u32,
-                    LANE as u32,
-                    LANE as u32,
-                    LANE as u32,
-                    LANE as u32,
-                    LANE as u32,
-                    LANE as u32
-                ]
-            ),
-        )
-    }
+    unsafe { vmulxq_f16(a, simd_shuffle!(b, b, [LANE as u32; 8])) }
 }
 #[doc = "Floating-point multiply extended"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulxq_laneq_f16)"]
@@ -15663,25 +15320,7 @@ pub fn vmulxq_lane_f16<const LANE: i32>(a: float16x8_t, b: float16x4_t) -> float
 #[cfg(not(target_arch = "arm64ec"))]
 pub fn vmulxq_laneq_f16<const LANE: i32>(a: float16x8_t, b: float16x8_t) -> float16x8_t {
     static_assert_uimm_bits!(LANE, 3);
-    unsafe {
-        vmulxq_f16(
-            a,
-            simd_shuffle!(
-                b,
-                b,
-                [
-                    LANE as u32,
-                    LANE as u32,
-                    LANE as u32,
-                    LANE as u32,
-                    LANE as u32,
-                    LANE as u32,
-                    LANE as u32,
-                    LANE as u32
-                ]
-            ),
-        )
-    }
+    unsafe { vmulxq_f16(a, simd_shuffle!(b, b, [LANE as u32; 8])) }
 }
 #[doc = "Floating-point multiply extended"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulx_lane_f32)"]
@@ -15692,7 +15331,7 @@ pub fn vmulxq_laneq_f16<const LANE: i32>(a: float16x8_t, b: float16x8_t) -> floa
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vmulx_lane_f32<const LANE: i32>(a: float32x2_t, b: float32x2_t) -> float32x2_t {
     static_assert_uimm_bits!(LANE, 1);
-    unsafe { vmulx_f32(a, simd_shuffle!(b, b, [LANE as u32, LANE as u32])) }
+    unsafe { vmulx_f32(a, simd_shuffle!(b, b, [LANE as u32; 2])) }
 }
 #[doc = "Floating-point multiply extended"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulx_laneq_f32)"]
@@ -15703,7 +15342,7 @@ pub fn vmulx_lane_f32<const LANE: i32>(a: float32x2_t, b: float32x2_t) -> float3
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vmulx_laneq_f32<const LANE: i32>(a: float32x2_t, b: float32x4_t) -> float32x2_t {
     static_assert_uimm_bits!(LANE, 2);
-    unsafe { vmulx_f32(a, simd_shuffle!(b, b, [LANE as u32, LANE as u32])) }
+    unsafe { vmulx_f32(a, simd_shuffle!(b, b, [LANE as u32; 2])) }
 }
 #[doc = "Floating-point multiply extended"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulxq_lane_f32)"]
@@ -15714,12 +15353,7 @@ pub fn vmulx_laneq_f32<const LANE: i32>(a: float32x2_t, b: float32x4_t) -> float
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vmulxq_lane_f32<const LANE: i32>(a: float32x4_t, b: float32x2_t) -> float32x4_t {
     static_assert_uimm_bits!(LANE, 1);
-    unsafe {
-        vmulxq_f32(
-            a,
-            simd_shuffle!(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]),
-        )
-    }
+    unsafe { vmulxq_f32(a, simd_shuffle!(b, b, [LANE as u32; 4])) }
 }
 #[doc = "Floating-point multiply extended"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulxq_laneq_f32)"]
@@ -15730,12 +15364,7 @@ pub fn vmulxq_lane_f32<const LANE: i32>(a: float32x4_t, b: float32x2_t) -> float
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vmulxq_laneq_f32<const LANE: i32>(a: float32x4_t, b: float32x4_t) -> float32x4_t {
     static_assert_uimm_bits!(LANE, 2);
-    unsafe {
-        vmulxq_f32(
-            a,
-            simd_shuffle!(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]),
-        )
-    }
+    unsafe { vmulxq_f32(a, simd_shuffle!(b, b, [LANE as u32; 4])) }
 }
 #[doc = "Floating-point multiply extended"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulxq_laneq_f64)"]
@@ -15746,7 +15375,7 @@ pub fn vmulxq_laneq_f32<const LANE: i32>(a: float32x4_t, b: float32x4_t) -> floa
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vmulxq_laneq_f64<const LANE: i32>(a: float64x2_t, b: float64x2_t) -> float64x2_t {
     static_assert_uimm_bits!(LANE, 1);
-    unsafe { vmulxq_f64(a, simd_shuffle!(b, b, [LANE as u32, LANE as u32])) }
+    unsafe { vmulxq_f64(a, simd_shuffle!(b, b, [LANE as u32; 2])) }
 }
 #[doc = "Floating-point multiply extended"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulx_lane_f64)"]
@@ -15916,7 +15545,7 @@ pub fn vmulxh_laneq_f16<const LANE: i32>(a: f16, b: float16x8_t) -> f16 {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vmulxq_lane_f64<const LANE: i32>(a: float64x2_t, b: float64x1_t) -> float64x2_t {
     static_assert!(LANE == 0);
-    unsafe { vmulxq_f64(a, simd_shuffle!(b, b, [LANE as u32, LANE as u32])) }
+    unsafe { vmulxq_f64(a, simd_shuffle!(b, b, [LANE as u32; 2])) }
 }
 #[doc = "Negate"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vneg_f64)"]
@@ -17916,8 +17545,7 @@ pub fn vqnegd_s64(a: i64) -> i64 {
 pub fn vqrdmlah_lane_s16<const LANE: i32>(a: int16x4_t, b: int16x4_t, c: int16x4_t) -> int16x4_t {
     static_assert_uimm_bits!(LANE, 2);
     unsafe {
-        let c: int16x4_t =
-            simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]);
+        let c: int16x4_t = simd_shuffle!(c, c, [LANE as u32; 4]);
         vqrdmlah_s16(a, b, c)
     }
 }
@@ -17931,7 +17559,7 @@ pub fn vqrdmlah_lane_s16<const LANE: i32>(a: int16x4_t, b: int16x4_t, c: int16x4
 pub fn vqrdmlah_lane_s32<const LANE: i32>(a: int32x2_t, b: int32x2_t, c: int32x2_t) -> int32x2_t {
     static_assert_uimm_bits!(LANE, 1);
     unsafe {
-        let c: int32x2_t = simd_shuffle!(c, c, [LANE as u32, LANE as u32]);
+        let c: int32x2_t = simd_shuffle!(c, c, [LANE as u32; 2]);
         vqrdmlah_s32(a, b, c)
     }
 }
@@ -17945,8 +17573,7 @@ pub fn vqrdmlah_lane_s32<const LANE: i32>(a: int32x2_t, b: int32x2_t, c: int32x2
 pub fn vqrdmlah_laneq_s16<const LANE: i32>(a: int16x4_t, b: int16x4_t, c: int16x8_t) -> int16x4_t {
     static_assert_uimm_bits!(LANE, 3);
     unsafe {
-        let c: int16x4_t =
-            simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]);
+        let c: int16x4_t = simd_shuffle!(c, c, [LANE as u32; 4]);
         vqrdmlah_s16(a, b, c)
     }
 }
@@ -17960,7 +17587,7 @@ pub fn vqrdmlah_laneq_s16<const LANE: i32>(a: int16x4_t, b: int16x4_t, c: int16x
 pub fn vqrdmlah_laneq_s32<const LANE: i32>(a: int32x2_t, b: int32x2_t, c: int32x4_t) -> int32x2_t {
     static_assert_uimm_bits!(LANE, 2);
     unsafe {
-        let c: int32x2_t = simd_shuffle!(c, c, [LANE as u32, LANE as u32]);
+        let c: int32x2_t = simd_shuffle!(c, c, [LANE as u32; 2]);
         vqrdmlah_s32(a, b, c)
     }
 }
@@ -17974,20 +17601,7 @@ pub fn vqrdmlah_laneq_s32<const LANE: i32>(a: int32x2_t, b: int32x2_t, c: int32x
 pub fn vqrdmlahq_lane_s16<const LANE: i32>(a: int16x8_t, b: int16x8_t, c: int16x4_t) -> int16x8_t {
     static_assert_uimm_bits!(LANE, 2);
     unsafe {
-        let c: int16x8_t = simd_shuffle!(
-            c,
-            c,
-            [
-                LANE as u32,
-                LANE as u32,
-                LANE as u32,
-                LANE as u32,
-                LANE as u32,
-                LANE as u32,
-                LANE as u32,
-                LANE as u32
-            ]
-        );
+        let c: int16x8_t = simd_shuffle!(c, c, [LANE as u32; 8]);
         vqrdmlahq_s16(a, b, c)
     }
 }
@@ -18001,8 +17615,7 @@ pub fn vqrdmlahq_lane_s16<const LANE: i32>(a: int16x8_t, b: int16x8_t, c: int16x
 pub fn vqrdmlahq_lane_s32<const LANE: i32>(a: int32x4_t, b: int32x4_t, c: int32x2_t) -> int32x4_t {
     static_assert_uimm_bits!(LANE, 1);
     unsafe {
-        let c: int32x4_t =
-            simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]);
+        let c: int32x4_t = simd_shuffle!(c, c, [LANE as u32; 4]);
         vqrdmlahq_s32(a, b, c)
     }
 }
@@ -18016,20 +17629,7 @@ pub fn vqrdmlahq_lane_s32<const LANE: i32>(a: int32x4_t, b: int32x4_t, c: int32x
 pub fn vqrdmlahq_laneq_s16<const LANE: i32>(a: int16x8_t, b: int16x8_t, c: int16x8_t) -> int16x8_t {
     static_assert_uimm_bits!(LANE, 3);
     unsafe {
-        let c: int16x8_t = simd_shuffle!(
-            c,
-            c,
-            [
-                LANE as u32,
-                LANE as u32,
-                LANE as u32,
-                LANE as u32,
-                LANE as u32,
-                LANE as u32,
-                LANE as u32,
-                LANE as u32
-            ]
-        );
+        let c: int16x8_t = simd_shuffle!(c, c, [LANE as u32; 8]);
         vqrdmlahq_s16(a, b, c)
     }
 }
@@ -18043,8 +17643,7 @@ pub fn vqrdmlahq_laneq_s16<const LANE: i32>(a: int16x8_t, b: int16x8_t, c: int16
 pub fn vqrdmlahq_laneq_s32<const LANE: i32>(a: int32x4_t, b: int32x4_t, c: int32x4_t) -> int32x4_t {
     static_assert_uimm_bits!(LANE, 2);
     unsafe {
-        let c: int32x4_t =
-            simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]);
+        let c: int32x4_t = simd_shuffle!(c, c, [LANE as u32; 4]);
         vqrdmlahq_s32(a, b, c)
     }
 }
@@ -18190,8 +17789,7 @@ pub fn vqrdmlahs_s32(a: i32, b: i32, c: i32) -> i32 {
 pub fn vqrdmlsh_lane_s16<const LANE: i32>(a: int16x4_t, b: int16x4_t, c: int16x4_t) -> int16x4_t {
     static_assert_uimm_bits!(LANE, 2);
     unsafe {
-        let c: int16x4_t =
-            simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]);
+        let c: int16x4_t = simd_shuffle!(c, c, [LANE as u32; 4]);
         vqrdmlsh_s16(a, b, c)
     }
 }
@@ -18205,7 +17803,7 @@ pub fn vqrdmlsh_lane_s16<const LANE: i32>(a: int16x4_t, b: int16x4_t, c: int16x4
 pub fn vqrdmlsh_lane_s32<const LANE: i32>(a: int32x2_t, b: int32x2_t, c: int32x2_t) -> int32x2_t {
     static_assert_uimm_bits!(LANE, 1);
     unsafe {
-        let c: int32x2_t = simd_shuffle!(c, c, [LANE as u32, LANE as u32]);
+        let c: int32x2_t = simd_shuffle!(c, c, [LANE as u32; 2]);
         vqrdmlsh_s32(a, b, c)
     }
 }
@@ -18219,8 +17817,7 @@ pub fn vqrdmlsh_lane_s32<const LANE: i32>(a: int32x2_t, b: int32x2_t, c: int32x2
 pub fn vqrdmlsh_laneq_s16<const LANE: i32>(a: int16x4_t, b: int16x4_t, c: int16x8_t) -> int16x4_t {
     static_assert_uimm_bits!(LANE, 3);
     unsafe {
-        let c: int16x4_t =
-            simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]);
+        let c: int16x4_t = simd_shuffle!(c, c, [LANE as u32; 4]);
         vqrdmlsh_s16(a, b, c)
     }
 }
@@ -18234,7 +17831,7 @@ pub fn vqrdmlsh_laneq_s16<const LANE: i32>(a: int16x4_t, b: int16x4_t, c: int16x
 pub fn vqrdmlsh_laneq_s32<const LANE: i32>(a: int32x2_t, b: int32x2_t, c: int32x4_t) -> int32x2_t {
     static_assert_uimm_bits!(LANE, 2);
     unsafe {
-        let c: int32x2_t = simd_shuffle!(c, c, [LANE as u32, LANE as u32]);
+        let c: int32x2_t = simd_shuffle!(c, c, [LANE as u32; 2]);
         vqrdmlsh_s32(a, b, c)
     }
 }
@@ -18248,20 +17845,7 @@ pub fn vqrdmlsh_laneq_s32<const LANE: i32>(a: int32x2_t, b: int32x2_t, c: int32x
 pub fn vqrdmlshq_lane_s16<const LANE: i32>(a: int16x8_t, b: int16x8_t, c: int16x4_t) -> int16x8_t {
     static_assert_uimm_bits!(LANE, 2);
     unsafe {
-        let c: int16x8_t = simd_shuffle!(
-            c,
-            c,
-            [
-                LANE as u32,
-                LANE as u32,
-                LANE as u32,
-                LANE as u32,
-                LANE as u32,
-                LANE as u32,
-                LANE as u32,
-                LANE as u32
-            ]
-        );
+        let c: int16x8_t = simd_shuffle!(c, c, [LANE as u32; 8]);
         vqrdmlshq_s16(a, b, c)
     }
 }
@@ -18275,8 +17859,7 @@ pub fn vqrdmlshq_lane_s16<const LANE: i32>(a: int16x8_t, b: int16x8_t, c: int16x
 pub fn vqrdmlshq_lane_s32<const LANE: i32>(a: int32x4_t, b: int32x4_t, c: int32x2_t) -> int32x4_t {
     static_assert_uimm_bits!(LANE, 1);
     unsafe {
-        let c: int32x4_t =
-            simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]);
+        let c: int32x4_t = simd_shuffle!(c, c, [LANE as u32; 4]);
         vqrdmlshq_s32(a, b, c)
     }
 }
@@ -18290,20 +17873,7 @@ pub fn vqrdmlshq_lane_s32<const LANE: i32>(a: int32x4_t, b: int32x4_t, c: int32x
 pub fn vqrdmlshq_laneq_s16<const LANE: i32>(a: int16x8_t, b: int16x8_t, c: int16x8_t) -> int16x8_t {
     static_assert_uimm_bits!(LANE, 3);
     unsafe {
-        let c: int16x8_t = simd_shuffle!(
-            c,
-            c,
-            [
-                LANE as u32,
-                LANE as u32,
-                LANE as u32,
-                LANE as u32,
-                LANE as u32,
-                LANE as u32,
-                LANE as u32,
-                LANE as u32
-            ]
-        );
+        let c: int16x8_t = simd_shuffle!(c, c, [LANE as u32; 8]);
         vqrdmlshq_s16(a, b, c)
     }
 }
@@ -18317,8 +17887,7 @@ pub fn vqrdmlshq_laneq_s16<const LANE: i32>(a: int16x8_t, b: int16x8_t, c: int16
 pub fn vqrdmlshq_laneq_s32<const LANE: i32>(a: int32x4_t, b: int32x4_t, c: int32x4_t) -> int32x4_t {
     static_assert_uimm_bits!(LANE, 2);
     unsafe {
-        let c: int32x4_t =
-            simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]);
+        let c: int32x4_t = simd_shuffle!(c, c, [LANE as u32; 4]);
         vqrdmlshq_s32(a, b, c)
     }
 }
diff --git a/library/stdarch/crates/stdarch-gen-arm/spec/neon/aarch64.spec.yml b/library/stdarch/crates/stdarch-gen-arm/spec/neon/aarch64.spec.yml
index 0ec8024fdfbb6..8574aacee6671 100644
--- a/library/stdarch/crates/stdarch-gen-arm/spec/neon/aarch64.spec.yml
+++ b/library/stdarch/crates/stdarch-gen-arm/spec/neon/aarch64.spec.yml
@@ -5374,7 +5374,7 @@ intrinsics:
     static_defs: ["const LANE: i32"]
     safety: safe
     types:
-      - ["q_lane_f64", float64x2_t, float64x1_t, "q_f64", '[LANE as u32, LANE as u32]']
+      - ["q_lane_f64", float64x2_t, float64x1_t, "q_f64", '[LANE as u32; 2]']
     compose:
       - FnCall: [static_assert!, ['LANE == 0']]
       - FnCall:
@@ -5443,11 +5443,11 @@ intrinsics:
     static_defs: ["const LANE: i32"]
     safety: safe
     types:
-      - ['_lane_f32', float32x2_t, float32x2_t, '1', '_f32', '[LANE as u32, LANE as u32]']
-      - ['_laneq_f32', float32x2_t, float32x4_t, '2', '_f32', '[LANE as u32, LANE as u32]']
-      - ['q_lane_f32', float32x4_t, float32x2_t, '1', 'q_f32', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
-      - ['q_laneq_f32', float32x4_t, float32x4_t, '2', 'q_f32', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
-      - ['q_laneq_f64', float64x2_t, float64x2_t, '1', 'q_f64', '[LANE as u32, LANE as u32]']
+      - ['_lane_f32', float32x2_t, float32x2_t, '1', '_f32', '[LANE as u32; 2]']
+      - ['_laneq_f32', float32x2_t, float32x4_t, '2', '_f32', '[LANE as u32; 2]']
+      - ['q_lane_f32', float32x4_t, float32x2_t, '1', 'q_f32', '[LANE as u32; 4]']
+      - ['q_laneq_f32', float32x4_t, float32x4_t, '2', 'q_f32', '[LANE as u32; 4]']
+      - ['q_laneq_f64', float64x2_t, float64x2_t, '1', 'q_f64', '[LANE as u32; 2]']
     compose:
       - FnCall: [static_assert_uimm_bits!, ['LANE', "{type[3]}"]]
       - FnCall:
@@ -5473,10 +5473,10 @@ intrinsics:
     static_defs: ["const LANE: i32"]
     safety: safe
     types:
-      - ['_lane_f16', float16x4_t, float16x4_t, '2', '_f16', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
-      - ['_laneq_f16', float16x4_t, float16x8_t, '3', '_f16', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
-      - ['q_lane_f16', float16x8_t, float16x4_t, '2', 'q_f16', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
-      - ['q_laneq_f16', float16x8_t, float16x8_t, '3', 'q_f16', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
+      - ['_lane_f16', float16x4_t, float16x4_t, '2', '_f16', '[LANE as u32; 4]']
+      - ['_laneq_f16', float16x4_t, float16x8_t, '3', '_f16', '[LANE as u32; 4]']
+      - ['q_lane_f16', float16x8_t, float16x4_t, '2', 'q_f16', '[LANE as u32; 8]']
+      - ['q_laneq_f16', float16x8_t, float16x8_t, '3', 'q_f16', '[LANE as u32; 8]']
     compose:
       - FnCall: [static_assert_uimm_bits!, ['LANE', "{type[3]}"]]
       - FnCall:
@@ -7755,14 +7755,14 @@ intrinsics:
     static_defs: ['const LANE: i32']
     safety: safe
     types:
-      - [_lane_s16, int16x4_t, int16x4_t, int16x4_t, '2', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
-      - [_laneq_s16, int16x4_t, int16x4_t, int16x8_t, '3', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
-      - [q_lane_s16, int16x8_t, int16x8_t, int16x4_t, '2', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
-      - [q_laneq_s16, int16x8_t, int16x8_t, int16x8_t, '3', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
-      - [_lane_s32, int32x2_t, int32x2_t, int32x2_t, '1', '[LANE as u32, LANE as u32]']
-      - [_laneq_s32, int32x2_t, int32x2_t, int32x4_t, '2', '[LANE as u32, LANE as u32]']
-      - [q_lane_s32, int32x4_t, int32x4_t, int32x2_t, '1', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
-      - [q_laneq_s32, int32x4_t, int32x4_t, int32x4_t, '2', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
+      - [_lane_s16, int16x4_t, int16x4_t, int16x4_t, '2', '[LANE as u32; 4]']
+      - [_laneq_s16, int16x4_t, int16x4_t, int16x8_t, '3', '[LANE as u32; 4]']
+      - [q_lane_s16, int16x8_t, int16x8_t, int16x4_t, '2', '[LANE as u32; 8]']
+      - [q_laneq_s16, int16x8_t, int16x8_t, int16x8_t, '3', '[LANE as u32; 8]']
+      - [_lane_s32, int32x2_t, int32x2_t, int32x2_t, '1', '[LANE as u32; 2]']
+      - [_laneq_s32, int32x2_t, int32x2_t, int32x4_t, '2', '[LANE as u32; 2]']
+      - [q_lane_s32, int32x4_t, int32x4_t, int32x2_t, '1', '[LANE as u32; 4]']
+      - [q_laneq_s32, int32x4_t, int32x4_t, int32x4_t, '2', '[LANE as u32; 4]']
     compose:
       - FnCall: [static_assert_uimm_bits!, [LANE, '{type[4]}']]
       - Let: [c, "{type[1]}", {FnCall: [simd_shuffle!, [c, c, "{type[5]}"]]}]
@@ -7839,14 +7839,14 @@ intrinsics:
     static_defs: ['const LANE: i32']
     safety: safe
     types:
-      - [_lane_s16, int16x4_t, int16x4_t, int16x4_t, '2', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
-      - [_laneq_s16, int16x4_t, int16x4_t, int16x8_t, '3', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
-      - [q_lane_s16, int16x8_t, int16x8_t, int16x4_t, '2', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
-      - [q_laneq_s16, int16x8_t, int16x8_t, int16x8_t, '3', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
-      - [_lane_s32, int32x2_t, int32x2_t, int32x2_t, '1', '[LANE as u32, LANE as u32]']
-      - [_laneq_s32, int32x2_t, int32x2_t, int32x4_t, '2', '[LANE as u32, LANE as u32]']
-      - [q_lane_s32, int32x4_t, int32x4_t, int32x2_t, '1', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
-      - [q_laneq_s32, int32x4_t, int32x4_t, int32x4_t, '2', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
+      - [_lane_s16, int16x4_t, int16x4_t, int16x4_t, '2', '[LANE as u32; 4]']
+      - [_laneq_s16, int16x4_t, int16x4_t, int16x8_t, '3', '[LANE as u32; 4]']
+      - [q_lane_s16, int16x8_t, int16x8_t, int16x4_t, '2', '[LANE as u32; 8]']
+      - [q_laneq_s16, int16x8_t, int16x8_t, int16x8_t, '3', '[LANE as u32; 8]']
+      - [_lane_s32, int32x2_t, int32x2_t, int32x2_t, '1', '[LANE as u32; 2]']
+      - [_laneq_s32, int32x2_t, int32x2_t, int32x4_t, '2', '[LANE as u32; 2]']
+      - [q_lane_s32, int32x4_t, int32x4_t, int32x2_t, '1', '[LANE as u32; 4]']
+      - [q_laneq_s32, int32x4_t, int32x4_t, int32x4_t, '2', '[LANE as u32; 4]']
     compose:
       - FnCall: [static_assert_uimm_bits!, [LANE, '{type[4]}']]
       - Let: [c, "{type[1]}", {FnCall: [simd_shuffle!, [c, c, "{type[5]}"]]}]
@@ -11138,7 +11138,7 @@ intrinsics:
       - FnCall:
           - simd_mul
           - - a
-            - FnCall: ["simd_shuffle!", [b, b, '[LANE as u32, LANE as u32]']]
+            - FnCall: ["simd_shuffle!", [b, b, '[LANE as u32; 2]']]
 
   - name: "vmuld_lane_f64"
     doc: "Floating-point multiply"
@@ -11195,7 +11195,7 @@ intrinsics:
       - FnCall:
           - simd_mul
           - - a
-            - FnCall: [simd_shuffle!, [b, b, '[LANE as u32, LANE as u32]']]
+            - FnCall: [simd_shuffle!, [b, b, '[LANE as u32; 2]']]
 
 
   # vmulq_laneq_f16
@@ -11212,8 +11212,8 @@ intrinsics:
     static_defs: ['const LANE: i32']
     safety: safe
     types:
-      - [float16x4_t, float16x8_t, '_lane', "[LANE as u32, LANE as u32, LANE as u32, LANE as u32]"]
-      - [float16x8_t, float16x8_t, 'q_lane', "[LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32]"]
+      - [float16x4_t, float16x8_t, '_lane', "[LANE as u32; 4]"]
+      - [float16x8_t, float16x8_t, 'q_lane', "[LANE as u32; 8]"]
     compose:
       - FnCall: [static_assert_uimm_bits!, [LANE, '3']]
       - FnCall:
@@ -11335,10 +11335,10 @@ intrinsics:
     static_defs: ['const LANE: i32']
     safety: safe
     types:
-      - [int32x4_t, int16x8_t, int16x4_t, '2', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
-      - [int32x4_t, int16x8_t, int16x8_t, '3', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
-      - [int64x2_t, int32x4_t, int32x2_t, '1', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
-      - [int64x2_t, int32x4_t, int32x4_t, '2', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
+      - [int32x4_t, int16x8_t, int16x4_t, '2', '[LANE as u32; 8]']
+      - [int32x4_t, int16x8_t, int16x8_t, '3', '[LANE as u32; 8]']
+      - [int64x2_t, int32x4_t, int32x2_t, '1', '[LANE as u32; 4]']
+      - [int64x2_t, int32x4_t, int32x4_t, '2', '[LANE as u32; 4]']
     compose:
       - FnCall: [static_assert_uimm_bits!, [LANE, '{type[3]}']]
       - FnCall:
@@ -11358,10 +11358,10 @@ intrinsics:
     static_defs: ['const LANE: i32']
     safety: safe
     types:
-      - [uint32x4_t, uint16x8_t, uint16x4_t, '2', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
-      - [uint32x4_t, uint16x8_t, uint16x8_t, '3', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
-      - [uint64x2_t, uint32x4_t, uint32x2_t, '1', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
-      - [uint64x2_t, uint32x4_t, uint32x4_t, '2', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
+      - [uint32x4_t, uint16x8_t, uint16x4_t, '2', '[LANE as u32; 8]']
+      - [uint32x4_t, uint16x8_t, uint16x8_t, '3', '[LANE as u32; 8]']
+      - [uint64x2_t, uint32x4_t, uint32x2_t, '1', '[LANE as u32; 4]']
+      - [uint64x2_t, uint32x4_t, uint32x4_t, '2', '[LANE as u32; 4]']
     compose:
       - FnCall: [static_assert_uimm_bits!, [LANE, '{type[3]}']]
       - FnCall:
@@ -11660,10 +11660,10 @@ intrinsics:
     static_defs: ['const LANE: i32']
     safety: safe
     types:
-      - [int16x8_t, int16x4_t, int32x4_t, '2', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
-      - [int16x8_t, int16x8_t, int32x4_t, '3', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
-      - [int32x4_t, int32x2_t, int64x2_t, '1', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
-      - [int32x4_t, int32x4_t, int64x2_t, '2', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
+      - [int16x8_t, int16x4_t, int32x4_t, '2', '[LANE as u32; 8]']
+      - [int16x8_t, int16x8_t, int32x4_t, '3', '[LANE as u32; 8]']
+      - [int32x4_t, int32x2_t, int64x2_t, '1', '[LANE as u32; 4]']
+      - [int32x4_t, int32x4_t, int64x2_t, '2', '[LANE as u32; 4]']
     compose:
       - FnCall: [static_assert_uimm_bits!, [LANE, "{type[3]}"]]
       - FnCall:
@@ -11682,10 +11682,10 @@ intrinsics:
     static_defs: ['const LANE: i32']
     safety: safe
     types:
-      - [uint16x8_t, uint16x4_t, uint32x4_t, '2', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
-      - [uint16x8_t, uint16x8_t, uint32x4_t, '3', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
-      - [uint32x4_t, uint32x2_t, uint64x2_t, '1', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
-      - [uint32x4_t, uint32x4_t, uint64x2_t, '2', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
+      - [uint16x8_t, uint16x4_t, uint32x4_t, '2', '[LANE as u32; 8]']
+      - [uint16x8_t, uint16x8_t, uint32x4_t, '3', '[LANE as u32; 8]']
+      - [uint32x4_t, uint32x2_t, uint64x2_t, '1', '[LANE as u32; 4]']
+      - [uint32x4_t, uint32x4_t, uint64x2_t, '2', '[LANE as u32; 4]']
     compose:
       - FnCall: [static_assert_uimm_bits!, [LANE, "{type[3]}"]]
       - FnCall:
@@ -11973,10 +11973,10 @@ intrinsics:
     static_defs: ['const LANE: i32']
     safety: safe
     types:
-      - [int32x4_t, int16x8_t, int16x4_t, '2', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
-      - [int32x4_t, int16x8_t, int16x8_t, '3', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
-      - [int64x2_t, int32x4_t, int32x2_t, '1', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
-      - [int64x2_t, int32x4_t, int32x4_t, '2', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
+      - [int32x4_t, int16x8_t, int16x4_t, '2', '[LANE as u32; 8]']
+      - [int32x4_t, int16x8_t, int16x8_t, '3', '[LANE as u32; 8]']
+      - [int64x2_t, int32x4_t, int32x2_t, '1', '[LANE as u32; 4]']
+      - [int64x2_t, int32x4_t, int32x4_t, '2', '[LANE as u32; 4]']
     compose:
       - FnCall: [static_assert_uimm_bits!, [LANE, '{type[3]}']]
       - FnCall: ['vmlal_high_{neon_type[2]}', [a, b, {FnCall: [simd_shuffle!, [c, c, '{type[4]}']]}]]
@@ -11992,10 +11992,10 @@ intrinsics:
     static_defs: ['const LANE: i32']
     safety: safe
     types:
-      - [uint32x4_t, uint16x8_t, uint16x4_t, '2', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
-      - [uint32x4_t, uint16x8_t, uint16x8_t, '3', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
-      - [uint64x2_t, uint32x4_t, uint32x2_t, '1', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
-      - [uint64x2_t, uint32x4_t, uint32x4_t, '2', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
+      - [uint32x4_t, uint16x8_t, uint16x4_t, '2', '[LANE as u32; 8]']
+      - [uint32x4_t, uint16x8_t, uint16x8_t, '3', '[LANE as u32; 8]']
+      - [uint64x2_t, uint32x4_t, uint32x2_t, '1', '[LANE as u32; 4]']
+      - [uint64x2_t, uint32x4_t, uint32x4_t, '2', '[LANE as u32; 4]']
     compose:
       - FnCall: [static_assert_uimm_bits!, [LANE, '{type[3]}']]
       - FnCall: ['vmlal_high_{neon_type[2]}', [a, b, {FnCall: [simd_shuffle!, [c, c, '{type[4]}']]}]]

From 820ca4803853f80be0dbd29e31a82f623107e769 Mon Sep 17 00:00:00 2001
From: sayantn <sayantn05@gmail.com>
Date: Wed, 25 Feb 2026 04:49:15 +0530
Subject: [PATCH 15/21] Update Intel SDE version to 10.5

---
 library/stdarch/ci/docker/x86_64-unknown-linux-gnu/Dockerfile | 2 +-
 library/stdarch/ci/docker/x86_64-unknown-linux-gnu/cpuid.def  | 3 ++-
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/library/stdarch/ci/docker/x86_64-unknown-linux-gnu/Dockerfile b/library/stdarch/ci/docker/x86_64-unknown-linux-gnu/Dockerfile
index 2743896375cf3..a357449d51e3d 100644
--- a/library/stdarch/ci/docker/x86_64-unknown-linux-gnu/Dockerfile
+++ b/library/stdarch/ci/docker/x86_64-unknown-linux-gnu/Dockerfile
@@ -12,7 +12,7 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
   build-essential \
   lld
 
-RUN wget http://ci-mirrors.rust-lang.org/stdarch/sde-external-9.58.0-2025-06-16-lin.tar.xz -O sde.tar.xz
+RUN wget http://ci-mirrors.rust-lang.org/stdarch/sde-external-10.5.0-2026-01-13-lin.tar.xz -O sde.tar.xz
 RUN mkdir intel-sde
 RUN tar -xJf sde.tar.xz --strip-components=1 -C intel-sde
 ENV CARGO_TARGET_X86_64_UNKNOWN_LINUX_GNU_RUNNER="/intel-sde/sde64 \
diff --git a/library/stdarch/ci/docker/x86_64-unknown-linux-gnu/cpuid.def b/library/stdarch/ci/docker/x86_64-unknown-linux-gnu/cpuid.def
index 342f7d83a63e3..acf023ed0dc49 100644
--- a/library/stdarch/ci/docker/x86_64-unknown-linux-gnu/cpuid.def
+++ b/library/stdarch/ci/docker/x86_64-unknown-linux-gnu/cpuid.def
@@ -12,7 +12,7 @@
 # CPUID_VERSION = 1.0
 #      Input      =>               Output
 # EAX      ECX    =>   EAX      EBX      ECX      EDX
-00000000 ******** => 00000024 756e6547 6c65746e 49656e69
+00000000 ******** => 00000029 756e6547 6c65746e 49656e69
 00000001 ******** => 00400f10 00100800 7ffaf3ff bfebfbff
 00000002 ******** => 76035a01 00f0b6ff 00000000 00c10000
 00000003 ******** => 00000000 00000000 00000000 00000000
@@ -48,6 +48,7 @@
 0000001e 00000001 => 000001ff 00000000 00000000 00000000
 00000024 00000000 => 00000001 00070002 00000000 00000000 #AVX10
 00000024 00000001 => 00000000 00000000 00000004 00000000
+00000029 ******** => 00000000 00000001 00000000 00000000
 80000000 ******** => 80000008 00000000 00000000 00000000
 80000001 ******** => 00000000 00000000 00000121 2c100000
 80000002 ******** => 00000000 00000000 00000000 00000000

From f01bcce3e22cf232d25db92335e6fbaa3f6daaf1 Mon Sep 17 00:00:00 2001
From: Folkert de Vries <folkert@folkertdev.nl>
Date: Wed, 25 Feb 2026 15:00:04 +0100
Subject: [PATCH 16/21] update to `resolver = 3`

---
 library/stdarch/Cargo.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/library/stdarch/Cargo.toml b/library/stdarch/Cargo.toml
index 5979096439118..e3963a69879a1 100644
--- a/library/stdarch/Cargo.toml
+++ b/library/stdarch/Cargo.toml
@@ -1,5 +1,5 @@
 [workspace]
-resolver = "1"
+resolver = "3"
 members = [
   "crates/*",
   "examples",

From cb3046e5f2f0736366c0fea4977a8df579d96311 Mon Sep 17 00:00:00 2001
From: ArunTamil21 <aruntamiln@gmail.com>
Date: Wed, 25 Feb 2026 23:56:42 +0000
Subject: [PATCH 17/21] Add missing runtime test for _mm_comige_ss and fix
 _mm_comigt_ss test

---
 .../stdarch/crates/core_arch/src/x86/sse.rs   | 22 +++++++++++++++++--
 1 file changed, 20 insertions(+), 2 deletions(-)

diff --git a/library/stdarch/crates/core_arch/src/x86/sse.rs b/library/stdarch/crates/core_arch/src/x86/sse.rs
index 2c4439a3f3a55..3f7781cc7dc4c 100644
--- a/library/stdarch/crates/core_arch/src/x86/sse.rs
+++ b/library/stdarch/crates/core_arch/src/x86/sse.rs
@@ -2816,14 +2816,32 @@ mod tests {
         let aa = &[3.0f32, 12.0, 23.0, NAN];
         let bb = &[3.0f32, 47.5, 1.5, NAN];
 
-        let ee = &[1i32, 0, 1, 0];
+        let ee = &[0i32, 0, 1, 0];
 
         for i in 0..4 {
             let a = _mm_setr_ps(aa[i], 1.0, 2.0, 3.0);
             let b = _mm_setr_ps(bb[i], 0.0, 2.0, 4.0);
 
-            let r = _mm_comige_ss(a, b);
+            let r = _mm_comigt_ss(a, b);
 
+            assert_eq!(
+                ee[i], r,
+                "_mm_comigt_ss({:?}, {:?}) = {}, expected: {} (i={})",
+                a, b, r, ee[i], i
+            );
+        }
+    }
+
+    #[simd_test(enable = "sse")]
+    fn test_mm_comige_ss() {
+        let aa = &[3.0f32, 23.0, 12.0, NAN];
+        let bb = &[3.0f32, 1.5, 47.5, NAN];
+        let ee = &[1i32, 1, 0, 0];
+
+        for i in 0..4 {
+            let a = _mm_setr_ps(aa[i], 1.0, 2.0, 3.0);
+            let b = _mm_setr_ps(bb[i], 0.0, 2.0, 4.0);
+            let r = _mm_comige_ss(a, b);
             assert_eq!(
                 ee[i], r,
                 "_mm_comige_ss({:?}, {:?}) = {}, expected: {} (i={})",

From f981ffe959bb8a8a99fa02b6864bb82d7b2d170a Mon Sep 17 00:00:00 2001
From: ArunTamil21 <aruntamiln@gmail.com>
Date: Thu, 26 Feb 2026 09:19:29 +0000
Subject: [PATCH 18/21] Remove _mm_comige_ss from skip list in x86-intel.rs

---
 library/stdarch/crates/stdarch-verify/tests/x86-intel.rs | 1 -
 1 file changed, 1 deletion(-)

diff --git a/library/stdarch/crates/stdarch-verify/tests/x86-intel.rs b/library/stdarch/crates/stdarch-verify/tests/x86-intel.rs
index 4136463f197fd..2ac05e28cb4ce 100644
--- a/library/stdarch/crates/stdarch-verify/tests/x86-intel.rs
+++ b/library/stdarch/crates/stdarch-verify/tests/x86-intel.rs
@@ -246,7 +246,6 @@ fn verify_all_signatures() {
                 "_xend",
                 "_xabort_code",
                 // Aliases
-                "_mm_comige_ss",
                 "_mm_cvt_ss2si",
                 "_mm_cvtt_ss2si",
                 "_mm_cvt_si2ss",

From 2fa37e7b3498a9cb46004fae5fd9d6d40835ad28 Mon Sep 17 00:00:00 2001
From: Folkert de Vries <folkert@folkertdev.nl>
Date: Thu, 26 Feb 2026 13:00:21 +0100
Subject: [PATCH 19/21] aarch64: fix UB in non-power-of-two reads and writes

---
 .../stdarch/crates/core_arch/src/macros.rs    | 29 ++++++++++++++-----
 1 file changed, 22 insertions(+), 7 deletions(-)

diff --git a/library/stdarch/crates/core_arch/src/macros.rs b/library/stdarch/crates/core_arch/src/macros.rs
index 5a582fe17772b..00e92428b3e7e 100644
--- a/library/stdarch/crates/core_arch/src/macros.rs
+++ b/library/stdarch/crates/core_arch/src/macros.rs
@@ -237,12 +237,12 @@ macro_rules! deinterleaving_load {
     ($elem:ty, $lanes:literal, 2, $ptr:expr) => {{
         use $crate::core_arch::macros::deinterleave_mask;
         use $crate::core_arch::simd::Simd;
-        use $crate::{mem::transmute, ptr};
+        use $crate::mem::transmute;
 
         type V = Simd<$elem, $lanes>;
         type W = Simd<$elem, { $lanes * 2 }>;
 
-        let w: W = ptr::read_unaligned($ptr as *const W);
+        let w: W = $crate::ptr::read_unaligned($ptr as *const W);
 
         let v0: V = simd_shuffle!(w, w, deinterleave_mask::<$lanes, 2, 0>());
         let v1: V = simd_shuffle!(w, w, deinterleave_mask::<$lanes, 2, 1>());
@@ -253,12 +253,20 @@ macro_rules! deinterleaving_load {
     ($elem:ty, $lanes:literal, 3, $ptr:expr) => {{
         use $crate::core_arch::macros::deinterleave_mask;
         use $crate::core_arch::simd::Simd;
-        use $crate::{mem::transmute, ptr};
+        use $crate::mem::{MaybeUninit, transmute};
 
         type V = Simd<$elem, $lanes>;
         type W = Simd<$elem, { $lanes * 3 }>;
 
-        let w: W = ptr::read_unaligned($ptr as *const W);
+        // NOTE: repr(simd) adds padding to make the total size a power of two.
+        // Hence reading W from ptr might read out of bounds.
+        let mut mem = MaybeUninit::<W>::uninit();
+        $crate::ptr::copy_nonoverlapping(
+            $ptr.cast::<$elem>(),
+            mem.as_mut_ptr().cast::<$elem>(),
+            $lanes * 3,
+        );
+        let w = mem.assume_init();
 
         let v0: V = simd_shuffle!(w, w, deinterleave_mask::<$lanes, 3, 0>());
         let v1: V = simd_shuffle!(w, w, deinterleave_mask::<$lanes, 3, 1>());
@@ -270,12 +278,12 @@ macro_rules! deinterleaving_load {
     ($elem:ty, $lanes:literal, 4, $ptr:expr) => {{
         use $crate::core_arch::macros::deinterleave_mask;
         use $crate::core_arch::simd::Simd;
-        use $crate::{mem::transmute, ptr};
+        use $crate::mem::transmute;
 
         type V = Simd<$elem, $lanes>;
         type W = Simd<$elem, { $lanes * 4 }>;
 
-        let w: W = ptr::read_unaligned($ptr as *const W);
+        let w: W = $crate::ptr::read_unaligned($ptr as *const W);
 
         let v0: V = simd_shuffle!(w, w, deinterleave_mask::<$lanes, 4, 0>());
         let v1: V = simd_shuffle!(w, w, deinterleave_mask::<$lanes, 4, 1>());
@@ -322,8 +330,15 @@ macro_rules! interleaving_store {
             simd_shuffle!($v.2, $v.2, identity::<{ $lanes * 2 }>());
 
         type W = Simd<$elem, { $lanes * 3 }>;
+
+        // NOTE: repr(simd) adds padding to make the total size a power of two.
+        // Hence writing W to ptr might write out of bounds.
         let w: W = simd_shuffle!(v0v1, v2v2, interleave_mask::<{ $lanes * 3 }, $lanes, 3>());
-        $crate::ptr::write_unaligned($ptr as *mut W, w);
+        $crate::ptr::copy_nonoverlapping(
+            (&w as *const W).cast::<$elem>(),
+            $ptr.cast::<$elem>(),
+            $lanes * 3,
+        );
     }};
 
     // N = 4

From 1d678f6b0859ea25c2abc22eb4104a025a5737e4 Mon Sep 17 00:00:00 2001
From: Deepesh Varatharajan <Deepesh.Varatharajan@windriver.com>
Date: Mon, 2 Mar 2026 04:22:23 -0800
Subject: [PATCH 20/21] Update call-llvm-intrinsics test for Rust 1.94.0 IR and
 multi-target CI

Rust 1.94 now passes constants directly to llvm.sqrt.f32 instead of
storing/loading via the stack.

- Updated the FileCheck pattern to match the new IR:
    // CHECK: call float @llvm.sqrt.f32(float 4.000000e+00)
  The test intent is unchanged: it still ensures the intrinsic is
  emitted as a 'call' (not 'invoke').

- Removed unnecessary local variables and Drop usage to work in
  `#![no_core]` mode with minicore.

- Added required crate attributes:
    #![feature(no_core, lang_items)]
    #![no_std]
    #![no_core]

- Replaced `//@ only-riscv64` (host-based execution) with explicit
  revisions for:
      riscv32gc-unknown-linux-gnu
      riscv64gc-unknown-linux-gnu
  This ensures deterministic multi-target coverage in CI without
  relying on the host architecture.

- Added `//@ needs-llvm-components: riscv` and
  `//@ min-llvm-version: 21` for CI compatibility.

Signed-off-by: Deepesh Varatharajan <Deepesh.Varatharajan@windriver.com>
---
 .../riscv-abi/call-llvm-intrinsics.rs         | 26 +++++++++----------
 1 file changed, 13 insertions(+), 13 deletions(-)

diff --git a/tests/codegen-llvm/riscv-abi/call-llvm-intrinsics.rs b/tests/codegen-llvm/riscv-abi/call-llvm-intrinsics.rs
index e72a649a530a8..fb520d38df3ca 100644
--- a/tests/codegen-llvm/riscv-abi/call-llvm-intrinsics.rs
+++ b/tests/codegen-llvm/riscv-abi/call-llvm-intrinsics.rs
@@ -1,17 +1,20 @@
+//@ add-minicore
 //@ compile-flags: -C no-prepopulate-passes
-
-//@ only-riscv64
+//@ revisions: riscv32gc riscv64gc
+//@ [riscv32gc] compile-flags: --target riscv32gc-unknown-linux-gnu
+//@ [riscv32gc] needs-llvm-components: riscv
+//@ [riscv64gc] compile-flags: --target riscv64gc-unknown-linux-gnu
+//@ [riscv64gc] needs-llvm-components: riscv
+//@ min-llvm-version: 21
 
 #![feature(link_llvm_intrinsics)]
+#![feature(no_core, lang_items)]
+#![no_std]
+#![no_core]
 #![crate_type = "lib"]
 
-struct A;
-
-impl Drop for A {
-    fn drop(&mut self) {
-        println!("A");
-    }
-}
+extern crate minicore;
+use minicore::*;
 
 extern "C" {
     #[link_name = "llvm.sqrt.f32"]
@@ -19,12 +22,9 @@ extern "C" {
 }
 
 pub fn do_call() {
-    let _a = A;
-
     unsafe {
         // Ensure that we `call` LLVM intrinsics instead of trying to `invoke` them
-        // CHECK: store float 4.000000e+00, ptr %{{.}}, align 4
-        // CHECK: call float @llvm.sqrt.f32(float %{{.}}
+        // CHECK: call float @llvm.sqrt.f32(float 4.000000e+00)
         sqrt(4.0);
     }
 }

From 083b5db4766dc02e939e3aa92182dbad37d05b4b Mon Sep 17 00:00:00 2001
From: Zalathar <Zalathar@users.noreply.github.com>
Date: Tue, 3 Mar 2026 14:06:03 +1100
Subject: [PATCH 21/21] Make `rustc_with_all_queries!` pass query modifiers as
 named values

---
 compiler/rustc_macros/src/query.rs            | 155 ++++++------
 .../rustc_middle/src/dep_graph/dep_node.rs    |   6 +-
 compiler/rustc_middle/src/query/plumbing.rs   | 212 +++++++---------
 .../rustc_query_impl/src/dep_kind_vtables.rs  |  22 +-
 compiler/rustc_query_impl/src/plumbing.rs     | 237 ++++++------------
 5 files changed, 260 insertions(+), 372 deletions(-)

diff --git a/compiler/rustc_macros/src/query.rs b/compiler/rustc_macros/src/query.rs
index 721cc7fe4d9b3..0a741d32ed61d 100644
--- a/compiler/rustc_macros/src/query.rs
+++ b/compiler/rustc_macros/src/query.rs
@@ -137,54 +137,23 @@ struct CacheOnDiskIf {
     block: Block,
 }
 
+/// See `rustc_middle::query::modifiers` for documentation of each query modifier.
 struct QueryModifiers {
-    /// The description of the query.
-    desc: Desc,
-
-    /// Use this type for the in-memory cache.
+    // tidy-alphabetical-start
+    anon: Option<Ident>,
     arena_cache: Option<Ident>,
-
-    /// Cache the query to disk if the `Block` returns true.
     cache_on_disk_if: Option<CacheOnDiskIf>,
-
-    /// A cycle error for this query aborting the compilation with a fatal error.
-    cycle_fatal: Option<Ident>,
-
-    /// A cycle error results in a delay_bug call
     cycle_delay_bug: Option<Ident>,
-
-    /// A cycle error results in a stashed cycle error that can be unstashed and canceled later
+    cycle_fatal: Option<Ident>,
     cycle_stash: Option<Ident>,
-
-    /// Don't hash the result, instead just mark a query red if it runs
-    no_hash: Option<Ident>,
-
-    /// Generate a dep node based on the dependencies of the query
-    anon: Option<Ident>,
-
-    /// Always evaluate the query, ignoring its dependencies
-    eval_always: Option<Ident>,
-
-    /// Whether the query has a call depth limit
     depth_limit: Option<Ident>,
-
-    /// Use a separate query provider for local and extern crates
-    separate_provide_extern: Option<Ident>,
-
-    /// Generate a `feed` method to set the query's value from another query.
+    desc: Desc,
+    eval_always: Option<Ident>,
     feedable: Option<Ident>,
-
-    /// When this query is called via `tcx.ensure_ok()`, it returns
-    /// `Result<(), ErrorGuaranteed>` instead of `()`. If the query needs to
-    /// be executed, and that execution returns an error, the error result is
-    /// returned to the caller.
-    ///
-    /// If execution is skipped, a synthetic `Ok(())` is returned, on the
-    /// assumption that a query with all-green inputs must have succeeded.
-    ///
-    /// Can only be applied to queries with a return value of
-    /// `Result<_, ErrorGuaranteed>`.
+    no_hash: Option<Ident>,
     return_result_from_ensure_ok: Option<Ident>,
+    separate_provide_extern: Option<Ident>,
+    // tidy-alphabetical-end
 }
 
 fn parse_query_modifiers(input: ParseStream<'_>) -> Result<QueryModifiers> {
@@ -272,6 +241,68 @@ fn parse_query_modifiers(input: ParseStream<'_>) -> Result<QueryModifiers> {
     })
 }
 
+fn make_modifiers_stream(query: &Query, modifiers: &QueryModifiers) -> proc_macro2::TokenStream {
+    let QueryModifiers {
+        // tidy-alphabetical-start
+        anon,
+        arena_cache,
+        cache_on_disk_if,
+        cycle_delay_bug,
+        cycle_fatal,
+        cycle_stash,
+        depth_limit,
+        desc: _,
+        eval_always,
+        feedable,
+        no_hash,
+        return_result_from_ensure_ok,
+        separate_provide_extern,
+        // tidy-alphabetical-end
+    } = modifiers;
+
+    let anon = anon.is_some();
+    let arena_cache = arena_cache.is_some();
+    let cache_on_disk = cache_on_disk_if.is_some();
+
+    let cycle_error_handling = if cycle_delay_bug.is_some() {
+        quote! { DelayBug }
+    } else if cycle_fatal.is_some() {
+        quote! { Fatal }
+    } else if cycle_stash.is_some() {
+        quote! { Stash }
+    } else {
+        quote! { Error }
+    };
+
+    let depth_limit = depth_limit.is_some();
+    let eval_always = eval_always.is_some();
+    let feedable = feedable.is_some();
+    let no_hash = no_hash.is_some();
+    let return_result_from_ensure_ok = return_result_from_ensure_ok.is_some();
+    let separate_provide_extern = separate_provide_extern.is_some();
+
+    // Giving an input span to the modifier names in the modifier list seems
+    // to give slightly more helpful errors when one of the callback macros
+    // fails to parse the modifier list.
+    let query_name_span = query.name.span();
+    quote_spanned! {
+        query_name_span =>
+        // Search for (QMODLIST) to find all occurrences of this query modifier list.
+        // tidy-alphabetical-start
+        anon: #anon,
+        arena_cache: #arena_cache,
+        cache_on_disk: #cache_on_disk,
+        cycle_error_handling: #cycle_error_handling,
+        depth_limit: #depth_limit,
+        eval_always: #eval_always,
+        feedable: #feedable,
+        no_hash: #no_hash,
+        return_result_from_ensure_ok: #return_result_from_ensure_ok,
+        separate_provide_extern: #separate_provide_extern,
+        // tidy-alphabetical-end
+    }
+}
+
 fn doc_comment_from_desc(list: &Punctuated<Expr, token::Comma>) -> Result<Attribute> {
     use ::syn::*;
     let mut iter = list.iter();
@@ -458,51 +489,13 @@ pub(super) fn rustc_queries(input: TokenStream) -> TokenStream {
             ReturnType::Type(..) => quote! { #return_ty },
         };
 
-        let mut modifiers_out = vec![];
-
-        macro_rules! passthrough {
-            ( $( $modifier:ident ),+ $(,)? ) => {
-                $( if let Some($modifier) = &modifiers.$modifier {
-                    modifiers_out.push(quote! { (#$modifier) });
-                }; )+
-            }
-        }
-
-        passthrough!(
-            arena_cache,
-            cycle_fatal,
-            cycle_delay_bug,
-            cycle_stash,
-            no_hash,
-            anon,
-            eval_always,
-            feedable,
-            depth_limit,
-            separate_provide_extern,
-            return_result_from_ensure_ok,
-        );
-
-        // If there was a `cache_on_disk_if` modifier in the real input, pass
-        // on a synthetic `(cache_on_disk)` modifier that can be inspected by
-        // macro-rules macros.
-        if modifiers.cache_on_disk_if.is_some() {
-            modifiers_out.push(quote! { (cache_on_disk) });
-        }
-
-        // This uses the span of the query definition for the commas,
-        // which can be important if we later encounter any ambiguity
-        // errors with any of the numerous macro_rules! macros that
-        // we use. Using the call-site span would result in a span pointing
-        // at the entire `rustc_queries!` invocation, which wouldn't
-        // be very useful.
-        let span = name.span();
-        let modifiers_stream = quote_spanned! { span => #(#modifiers_out),* };
+        let modifiers_stream = make_modifiers_stream(&query, modifiers);
 
         // Add the query to the group
         query_stream.extend(quote! {
             #(#doc_comments)*
-            [#modifiers_stream]
-            fn #name(#key_ty) #return_ty,
+            fn #name(#key_ty) #return_ty
+            { #modifiers_stream }
         });
 
         if let Some(feedable) = &modifiers.feedable {
diff --git a/compiler/rustc_middle/src/dep_graph/dep_node.rs b/compiler/rustc_middle/src/dep_graph/dep_node.rs
index 7efa013c3d999..6f85dba23dd49 100644
--- a/compiler/rustc_middle/src/dep_graph/dep_node.rs
+++ b/compiler/rustc_middle/src/dep_graph/dep_node.rs
@@ -268,8 +268,10 @@ macro_rules! define_dep_nodes {
         queries {
             $(
                 $(#[$q_attr:meta])*
-                [$($modifiers:tt)*]
-                fn $q_name:ident($K:ty) -> $V:ty,
+                fn $q_name:ident($K:ty) -> $V:ty
+                // Search for (QMODLIST) to find all occurrences of this query modifier list.
+                // Query modifiers are currently not used here, so skip the whole list.
+                { $($modifiers:tt)* }
             )*
         }
         non_queries {
diff --git a/compiler/rustc_middle/src/query/plumbing.rs b/compiler/rustc_middle/src/query/plumbing.rs
index ff6d87298028f..815c8a1baab61 100644
--- a/compiler/rustc_middle/src/query/plumbing.rs
+++ b/compiler/rustc_middle/src/query/plumbing.rs
@@ -333,44 +333,6 @@ macro_rules! query_helper_param_ty {
     ($K:ty) => { $K };
 }
 
-// Expands to `$yes` if the `arena_cache` modifier is present, `$no` otherwise.
-macro_rules! if_arena_cache {
-    ([] $then:tt $no:tt) => { $no };
-    ([(arena_cache) $($modifiers:tt)*] $yes:tt $no:tt) => { $yes };
-    ([$other:tt $($modifiers:tt)*] $yes:tt $no:tt) => {
-        if_arena_cache!([$($modifiers)*] $yes $no)
-    };
-}
-
-// Expands to `$yes` if the `separate_provide_extern` modifier is present, `$no` otherwise.
-macro_rules! if_separate_provide_extern {
-    ([] $then:tt $no:tt) => { $no };
-    ([(separate_provide_extern) $($modifiers:tt)*] $yes:tt $no:tt) => { $yes };
-    ([$other:tt $($modifiers:tt)*] $yes:tt $no:tt) => {
-        if_separate_provide_extern!([$($modifiers)*] $yes $no)
-    };
-}
-
-// Expands to `$yes` if the `return_result_from_ensure_ok` modifier is present, `$no` otherwise.
-macro_rules! if_return_result_from_ensure_ok {
-    ([] $then:tt $no:tt) => { $no };
-    ([(return_result_from_ensure_ok) $($modifiers:tt)*] $yes:tt $no:tt) => { $yes };
-    ([$other:tt $($modifiers:tt)*] $yes:tt $no:tt) => {
-        if_return_result_from_ensure_ok!([$($modifiers)*] $yes $no)
-    };
-}
-
-// Expands to `$item` if the `feedable` modifier is present.
-macro_rules! item_if_feedable {
-    ([] $($item:tt)*) => {};
-    ([(feedable) $($rest:tt)*] $($item:tt)*) => {
-        $($item)*
-    };
-    ([$other:tt $($modifiers:tt)*] $($item:tt)*) => {
-        item_if_feedable! { [$($modifiers)*] $($item)* }
-    };
-}
-
 macro_rules! define_callbacks {
     (
         // You might expect the key to be `$K:ty`, but it needs to be `$($K:tt)*` so that
@@ -378,8 +340,20 @@ macro_rules! define_callbacks {
         queries {
             $(
                 $(#[$attr:meta])*
-                [$($modifiers:tt)*]
-                fn $name:ident($($K:tt)*) -> $V:ty,
+                fn $name:ident($($K:tt)*) -> $V:ty
+                {
+                    // Search for (QMODLIST) to find all occurrences of this query modifier list.
+                    anon: $anon:literal,
+                    arena_cache: $arena_cache:literal,
+                    cache_on_disk: $cache_on_disk:literal,
+                    cycle_error_handling: $cycle_error_handling:ident,
+                    depth_limit: $depth_limit:literal,
+                    eval_always: $eval_always:literal,
+                    feedable: $feedable:literal,
+                    no_hash: $no_hash:literal,
+                    return_result_from_ensure_ok: $return_result_from_ensure_ok:literal,
+                    separate_provide_extern: $separate_provide_extern:literal,
+                }
             )*
         }
         // Non-queries are unused here.
@@ -394,20 +368,31 @@ macro_rules! define_callbacks {
                 pub type Key<'tcx> = $($K)*;
                 pub type Value<'tcx> = $V;
 
-                pub type LocalKey<'tcx> = if_separate_provide_extern!(
-                    [$($modifiers)*]
-                    (<Key<'tcx> as $crate::query::AsLocalQueryKey>::LocalQueryKey)
-                    (Key<'tcx>)
-                );
-
-                /// This type alias specifies the type returned from query providers and the type
-                /// used for decoding. For regular queries this is the declared returned type `V`,
-                /// but `arena_cache` will use `<V as ArenaCached>::Provided` instead.
-                pub type ProvidedValue<'tcx> = if_arena_cache!(
-                    [$($modifiers)*]
-                    (<Value<'tcx> as $crate::query::arena_cached::ArenaCached<'tcx>>::Provided)
-                    (Value<'tcx>)
-                );
+                /// Key type used by provider functions in `local_providers`.
+                /// This query has the `separate_provide_extern` modifier.
+                #[cfg($separate_provide_extern)]
+                pub type LocalKey<'tcx> =
+                    <Key<'tcx> as $crate::query::AsLocalQueryKey>::LocalQueryKey;
+                /// Key type used by provider functions in `local_providers`.
+                #[cfg(not($separate_provide_extern))]
+                pub type LocalKey<'tcx> = Key<'tcx>;
+
+                /// Return type of the `.ensure_ok()` method for this query,
+                /// which has the `return_result_from_ensure_ok` modifier.
+                #[cfg($return_result_from_ensure_ok)]
+                pub type EnsureOkReturnType = Result<(), rustc_errors::ErrorGuaranteed>;
+                /// Return type of the `.ensure_ok()` method for this query,
+                /// which does _not_ have the `return_result_from_ensure_ok` modifier.
+                #[cfg(not($return_result_from_ensure_ok))]
+                pub type EnsureOkReturnType = ();
+
+                /// Type returned from query providers and loaded from disk-cache.
+                #[cfg($arena_cache)]
+                pub type ProvidedValue<'tcx> =
+                    <Value<'tcx> as $crate::query::arena_cached::ArenaCached<'tcx>>::Provided;
+                /// Type returned from query providers and loaded from disk-cache.
+                #[cfg(not($arena_cache))]
+                pub type ProvidedValue<'tcx> = Value<'tcx>;
 
                 /// This helper function takes a value returned by the query provider
                 /// (or loaded from disk, or supplied by query feeding), allocates
@@ -420,23 +405,23 @@ macro_rules! define_callbacks {
                 ) -> Erased<Value<'tcx>> {
                     // For queries with the `arena_cache` modifier, store the
                     // provided value in an arena and get a reference to it.
-                    let value: Value<'tcx> = if_arena_cache!(
-                        [$($modifiers)*]
-                        {
-                            <Value<'tcx> as $crate::query::arena_cached::ArenaCached>::
-                                alloc_in_arena
-                            (
-                                tcx,
-                                &tcx.query_system.arenas.$name,
-                                provided_value,
-                            )
-                        }
-                        {
-                            // Otherwise, the provided value is the value (and `tcx` is unused).
-                            let _ = tcx;
-                            provided_value
-                        }
-                    );
+                    #[cfg($arena_cache)]
+                    let value: Value<'tcx> = {
+                        use $crate::query::arena_cached::ArenaCached;
+                        <Value<'tcx> as ArenaCached>::alloc_in_arena(
+                            tcx,
+                            &tcx.query_system.arenas.$name,
+                            provided_value,
+                        )
+                    };
+
+                    // Otherwise, the provided value is the value (and `tcx` is unused).
+                    #[cfg(not($arena_cache))]
+                    let value: Value<'tcx> = {
+                        let _ = tcx;
+                        provided_value
+                    };
+
                     erase::erase_val(value)
                 }
 
@@ -480,13 +465,11 @@ macro_rules! define_callbacks {
         #[derive(Default)]
         pub struct QueryArenas<'tcx> {
             $(
-                pub $name: if_arena_cache!(
-                    [$($modifiers)*]
-                    // Use the `ArenaCached` helper trait to determine the arena's value type.
-                    (TypedArena<<$V as $crate::query::arena_cached::ArenaCached<'tcx>>::Allocated>)
-                    // No arena for this query, so the field type is `()`.
-                    ()
-                ),
+                // Use the `ArenaCached` helper trait to determine the arena's value type.
+                #[cfg($arena_cache)]
+                pub $name: TypedArena<
+                    <$V as $crate::query::arena_cached::ArenaCached<'tcx>>::Allocated,
+                >,
             )*
         }
 
@@ -497,16 +480,14 @@ macro_rules! define_callbacks {
                 pub fn $name(
                     self,
                     key: query_helper_param_ty!($($K)*),
-                ) -> if_return_result_from_ensure_ok!(
-                    [$($modifiers)*]
-                    (Result<(), ErrorGuaranteed>)
-                    ()
-                ) {
-                    if_return_result_from_ensure_ok!(
-                        [$($modifiers)*]
-                        (crate::query::inner::query_ensure_error_guaranteed)
-                        (crate::query::inner::query_ensure)
-                    )(
+                ) -> $crate::queries::$name::EnsureOkReturnType {
+
+                    #[cfg($return_result_from_ensure_ok)]
+                    let ensure_fn = crate::query::inner::query_ensure_error_guaranteed;
+                    #[cfg(not($return_result_from_ensure_ok))]
+                    let ensure_fn = crate::query::inner::query_ensure;
+
+                    ensure_fn(
                         self.tcx,
                         &self.tcx.query_system.query_vtables.$name,
                         $crate::query::IntoQueryParam::into_query_param(key),
@@ -560,24 +541,22 @@ macro_rules! define_callbacks {
         }
 
         $(
-            item_if_feedable! {
-                [$($modifiers)*]
-                impl<'tcx, K: $crate::query::IntoQueryParam<$name::Key<'tcx>> + Copy>
-                    TyCtxtFeed<'tcx, K>
-                {
-                    $(#[$attr])*
-                    #[inline(always)]
-                    pub fn $name(self, value: $name::ProvidedValue<'tcx>) {
-                        let key = self.key().into_query_param();
-                        let erased_value = $name::provided_to_erased(self.tcx, value);
-                        $crate::query::inner::query_feed(
-                            self.tcx,
-                            dep_graph::DepKind::$name,
-                            &self.tcx.query_system.query_vtables.$name,
-                            key,
-                            erased_value,
-                        );
-                    }
+            #[cfg($feedable)]
+            impl<'tcx, K: $crate::query::IntoQueryParam<$name::Key<'tcx>> + Copy>
+                TyCtxtFeed<'tcx, K>
+            {
+                $(#[$attr])*
+                #[inline(always)]
+                pub fn $name(self, value: $name::ProvidedValue<'tcx>) {
+                    let key = self.key().into_query_param();
+                    let erased_value = $name::provided_to_erased(self.tcx, value);
+                    $crate::query::inner::query_feed(
+                        self.tcx,
+                        dep_graph::DepKind::$name,
+                        &self.tcx.query_system.query_vtables.$name,
+                        key,
+                        erased_value,
+                    );
                 }
             }
         )*
@@ -602,11 +581,11 @@ macro_rules! define_callbacks {
 
         pub struct ExternProviders {
             $(
-                pub $name: if_separate_provide_extern!(
-                    [$($modifiers)*]
-                    (for<'tcx> fn(TyCtxt<'tcx>, $name::Key<'tcx>) -> $name::ProvidedValue<'tcx>)
-                    ()
-                ),
+                #[cfg($separate_provide_extern)]
+                pub $name: for<'tcx> fn(
+                    TyCtxt<'tcx>,
+                    $name::Key<'tcx>,
+                ) -> $name::ProvidedValue<'tcx>,
             )*
         }
 
@@ -626,13 +605,10 @@ macro_rules! define_callbacks {
             fn default() -> Self {
                 ExternProviders {
                     $(
-                        $name: if_separate_provide_extern!(
-                            [$($modifiers)*]
-                            (|_, key| $crate::query::plumbing::default_extern_query(
-                                stringify!($name),
-                                &key
-                            ))
-                            ()
+                        #[cfg($separate_provide_extern)]
+                        $name: |_, key| $crate::query::plumbing::default_extern_query(
+                            stringify!($name),
+                            &key,
                         ),
                     )*
                 }
diff --git a/compiler/rustc_query_impl/src/dep_kind_vtables.rs b/compiler/rustc_query_impl/src/dep_kind_vtables.rs
index ceddef5385a58..0f3ef34d936a4 100644
--- a/compiler/rustc_query_impl/src/dep_kind_vtables.rs
+++ b/compiler/rustc_query_impl/src/dep_kind_vtables.rs
@@ -130,8 +130,20 @@ macro_rules! define_dep_kind_vtables {
         queries {
             $(
                 $(#[$attr:meta])*
-                [$($modifiers:tt)*]
-                fn $name:ident($K:ty) -> $V:ty,
+                fn $name:ident($K:ty) -> $V:ty
+                {
+                    // Search for (QMODLIST) to find all occurrences of this query modifier list.
+                    anon: $anon:literal,
+                    arena_cache: $arena_cache:literal,
+                    cache_on_disk: $cache_on_disk:literal,
+                    cycle_error_handling: $cycle_error_handling:ident,
+                    depth_limit: $depth_limit:literal,
+                    eval_always: $eval_always:literal,
+                    feedable: $feedable:literal,
+                    no_hash: $no_hash:literal,
+                    return_result_from_ensure_ok: $return_result_from_ensure_ok:literal,
+                    separate_provide_extern: $separate_provide_extern:literal,
+                }
             )*
         }
         non_queries {
@@ -154,9 +166,9 @@ macro_rules! define_dep_kind_vtables {
                 $crate::dep_kind_vtables::make_dep_kind_vtable_for_query::<
                     $crate::query_impl::$name::VTableGetter,
                 >(
-                    is_anon!([$($modifiers)*]),
-                    if_cache_on_disk!([$($modifiers)*] true false),
-                    is_eval_always!([$($modifiers)*]),
+                    $anon,
+                    $cache_on_disk,
+                    $eval_always,
                 )
             ),*
         ];
diff --git a/compiler/rustc_query_impl/src/plumbing.rs b/compiler/rustc_query_impl/src/plumbing.rs
index 17e6fba8ac9a5..425ca28910073 100644
--- a/compiler/rustc_query_impl/src/plumbing.rs
+++ b/compiler/rustc_query_impl/src/plumbing.rs
@@ -99,125 +99,6 @@ pub(super) fn try_mark_green<'tcx>(tcx: TyCtxt<'tcx>, dep_node: &DepNode) -> boo
     tcx.dep_graph.try_mark_green(tcx, dep_node).is_some()
 }
 
-macro_rules! cycle_error_handling {
-    ([]) => {{
-        rustc_middle::query::CycleErrorHandling::Error
-    }};
-    ([(cycle_fatal) $($rest:tt)*]) => {{
-        rustc_middle::query::CycleErrorHandling::Fatal
-    }};
-    ([(cycle_stash) $($rest:tt)*]) => {{
-        rustc_middle::query::CycleErrorHandling::Stash
-    }};
-    ([(cycle_delay_bug) $($rest:tt)*]) => {{
-        rustc_middle::query::CycleErrorHandling::DelayBug
-    }};
-    ([$other:tt $($modifiers:tt)*]) => {
-        cycle_error_handling!([$($modifiers)*])
-    };
-}
-
-macro_rules! is_anon {
-    ([]) => {{
-        false
-    }};
-    ([(anon) $($rest:tt)*]) => {{
-        true
-    }};
-    ([$other:tt $($modifiers:tt)*]) => {
-        is_anon!([$($modifiers)*])
-    };
-}
-
-macro_rules! is_eval_always {
-    ([]) => {{
-        false
-    }};
-    ([(eval_always) $($rest:tt)*]) => {{
-        true
-    }};
-    ([$other:tt $($modifiers:tt)*]) => {
-        is_eval_always!([$($modifiers)*])
-    };
-}
-
-macro_rules! is_depth_limit {
-    ([]) => {{
-        false
-    }};
-    ([(depth_limit) $($rest:tt)*]) => {{
-        true
-    }};
-    ([$other:tt $($modifiers:tt)*]) => {
-        is_depth_limit!([$($modifiers)*])
-    };
-}
-
-macro_rules! is_feedable {
-    ([]) => {{
-        false
-    }};
-    ([(feedable) $($rest:tt)*]) => {{
-        true
-    }};
-    ([$other:tt $($modifiers:tt)*]) => {
-        is_feedable!([$($modifiers)*])
-    };
-}
-
-/// Expands to `$yes` if the `no_hash` modifier is present, or `$no` otherwise.
-macro_rules! if_no_hash {
-    ([] $yes:tt $no:tt) => { $no };
-    ([(no_hash) $($modifiers:tt)*] $yes:tt $no:tt) => { $yes };
-    ([$other:tt $($modifiers:tt)*] $yes:tt $no:tt) => {
-        if_no_hash!([$($modifiers)*] $yes $no)
-    }
-}
-
-macro_rules! call_provider {
-    ([][$tcx:expr, $name:ident, $key:expr]) => {{
-        ($tcx.query_system.local_providers.$name)($tcx, $key)
-    }};
-    ([(separate_provide_extern) $($rest:tt)*][$tcx:expr, $name:ident, $key:expr]) => {{
-        if let Some(key) = $key.as_local_key() {
-            ($tcx.query_system.local_providers.$name)($tcx, key)
-        } else {
-            ($tcx.query_system.extern_providers.$name)($tcx, $key)
-        }
-    }};
-    ([$other:tt $($modifiers:tt)*][$($args:tt)*]) => {
-        call_provider!([$($modifiers)*][$($args)*])
-    };
-}
-
-/// Expands to one of two token trees, depending on whether the current query
-/// has the `cache_on_disk_if` modifier.
-macro_rules! if_cache_on_disk {
-    ([] $yes:tt $no:tt) => {
-        $no
-    };
-    // The `cache_on_disk_if` modifier generates a synthetic `(cache_on_disk)`,
-    // modifier, for use by this macro and similar macros.
-    ([(cache_on_disk) $($rest:tt)*] $yes:tt $no:tt) => {
-        $yes
-    };
-    ([$other:tt $($modifiers:tt)*] $yes:tt $no:tt) => {
-        if_cache_on_disk!([$($modifiers)*] $yes $no)
-    };
-}
-
-/// Conditionally expands to some token trees, if the current query has the
-/// `cache_on_disk_if` modifier.
-macro_rules! item_if_cache_on_disk {
-    ([] $($item:tt)*) => {};
-    ([(cache_on_disk) $($rest:tt)*] $($item:tt)*) => {
-        $($item)*
-    };
-    ([$other:tt $($modifiers:tt)*] $($item:tt)*) => {
-        item_if_cache_on_disk! { [$($modifiers)*] $($item)* }
-    };
-}
-
 /// The deferred part of a deferred query stack frame.
 fn mk_query_stack_frame_extra<'tcx, Cache>(
     (tcx, vtable, key): (TyCtxt<'tcx>, &'tcx QueryVTable<'tcx, Cache>, Cache::Key),
@@ -421,8 +302,20 @@ macro_rules! define_queries {
         queries {
             $(
                 $(#[$attr:meta])*
-                [$($modifiers:tt)*]
-                fn $name:ident($K:ty) -> $V:ty,
+                fn $name:ident($K:ty) -> $V:ty
+                {
+                    // Search for (QMODLIST) to find all occurrences of this query modifier list.
+                    anon: $anon:literal,
+                    arena_cache: $arena_cache:literal,
+                    cache_on_disk: $cache_on_disk:literal,
+                    cycle_error_handling: $cycle_error_handling:ident,
+                    depth_limit: $depth_limit:literal,
+                    eval_always: $eval_always:literal,
+                    feedable: $feedable:literal,
+                    no_hash: $no_hash:literal,
+                    return_result_from_ensure_ok: $return_result_from_ensure_ok:literal,
+                    separate_provide_extern: $separate_provide_extern:literal,
+                }
             )*
         }
         // Non-queries are unused here.
@@ -498,7 +391,16 @@ macro_rules! define_queries {
                     let _guard = tracing::span!(tracing::Level::TRACE, stringify!($name), ?key).entered();
 
                     // Call the actual provider function for this query.
-                    let provided_value = call_provider!([$($modifiers)*][tcx, $name, key]);
+
+                    #[cfg($separate_provide_extern)]
+                    let provided_value = if let Some(local_key) = key.as_local_key() {
+                        (tcx.query_system.local_providers.$name)(tcx, local_key)
+                    } else {
+                        (tcx.query_system.extern_providers.$name)(tcx, key)
+                    };
+
+                    #[cfg(not($separate_provide_extern))]
+                    let provided_value = (tcx.query_system.local_providers.$name)(tcx, key);
 
                     rustc_middle::ty::print::with_reduced_queries!({
                         tracing::trace!(?provided_value);
@@ -515,64 +417,67 @@ macro_rules! define_queries {
             {
                 QueryVTable {
                     name: stringify!($name),
-                    anon: is_anon!([$($modifiers)*]),
-                    eval_always: is_eval_always!([$($modifiers)*]),
-                    depth_limit: is_depth_limit!([$($modifiers)*]),
-                    feedable: is_feedable!([$($modifiers)*]),
+                    anon: $anon,
+                    eval_always: $eval_always,
+                    depth_limit: $depth_limit,
+                    feedable: $feedable,
                     dep_kind: dep_graph::DepKind::$name,
-                    cycle_error_handling: cycle_error_handling!([$($modifiers)*]),
+                    cycle_error_handling:
+                        rustc_middle::query::CycleErrorHandling::$cycle_error_handling,
                     state: Default::default(),
                     cache: Default::default(),
-                    will_cache_on_disk_for_key_fn: if_cache_on_disk!([$($modifiers)*] {
-                        Some(::rustc_middle::queries::_cache_on_disk_if_fns::$name)
-                    } {
-                        None
-                    }),
+
+                    #[cfg($cache_on_disk)]
+                    will_cache_on_disk_for_key_fn:
+                        Some(rustc_middle::queries::_cache_on_disk_if_fns::$name),
+                    #[cfg(not($cache_on_disk))]
+                    will_cache_on_disk_for_key_fn: None,
+
                     call_query_method_fn: |tcx, key| {
                         // Call the query method for its side-effect of loading a value
                         // from disk-cache; the caller doesn't need the value.
                         let _ = tcx.$name(key);
                     },
                     invoke_provider_fn: self::invoke_provider_fn::__rust_begin_short_backtrace,
-                    try_load_from_disk_fn: if_cache_on_disk!([$($modifiers)*] {
-                        Some(|tcx, key, prev_index, index| {
-                            // Check the `cache_on_disk_if` condition for this key.
-                            if !::rustc_middle::queries::_cache_on_disk_if_fns::$name(tcx, key) {
-                                return None;
-                            }
-
-                            let value: queries::$name::ProvidedValue<'tcx> =
-                                $crate::plumbing::try_load_from_disk(tcx, prev_index, index)?;
-
-                            // Arena-alloc the value if appropriate, and erase it.
-                            Some(queries::$name::provided_to_erased(tcx, value))
-                        })
-                    } {
-                        None
+
+                    #[cfg($cache_on_disk)]
+                    try_load_from_disk_fn: Some(|tcx, key, prev_index, index| {
+                        // Check the `cache_on_disk_if` condition for this key.
+                        if !rustc_middle::queries::_cache_on_disk_if_fns::$name(tcx, key) {
+                            return None;
+                        }
+
+                        let value: queries::$name::ProvidedValue<'tcx> =
+                            $crate::plumbing::try_load_from_disk(tcx, prev_index, index)?;
+
+                        // Arena-alloc the value if appropriate, and erase it.
+                        Some(queries::$name::provided_to_erased(tcx, value))
                     }),
-                    is_loadable_from_disk_fn: if_cache_on_disk!([$($modifiers)*] {
-                        Some(|tcx, key, index| -> bool {
-                            ::rustc_middle::queries::_cache_on_disk_if_fns::$name(tcx, key) &&
-                                $crate::plumbing::loadable_from_disk(tcx, index)
-                        })
-                    } {
-                        None
+                    #[cfg(not($cache_on_disk))]
+                    try_load_from_disk_fn: None,
+
+                    #[cfg($cache_on_disk)]
+                    is_loadable_from_disk_fn: Some(|tcx, key, index| -> bool {
+                        rustc_middle::queries::_cache_on_disk_if_fns::$name(tcx, key) &&
+                            $crate::plumbing::loadable_from_disk(tcx, index)
                     }),
+                    #[cfg(not($cache_on_disk))]
+                    is_loadable_from_disk_fn: None,
+
                     value_from_cycle_error: |tcx, cycle, guar| {
                         let result: queries::$name::Value<'tcx> =
                             FromCycleError::from_cycle_error(tcx, cycle, guar);
                         erase::erase_val(result)
                     },
-                    hash_value_fn: if_no_hash!(
-                        [$($modifiers)*]
-                        None
-                        {
-                            Some(|hcx, erased_value: &erase::Erased<queries::$name::Value<'tcx>>| {
-                                let value = erase::restore_val(*erased_value);
-                                rustc_middle::dep_graph::hash_result(hcx, &value)
-                            })
-                        }
-                    ),
+
+                    #[cfg($no_hash)]
+                    hash_value_fn: None,
+                    #[cfg(not($no_hash))]
+                    hash_value_fn: Some(|hcx, erased_value: &erase::Erased<queries::$name::Value<'tcx>>| {
+                        let value = erase::restore_val(*erased_value);
+                        rustc_middle::dep_graph::hash_result(hcx, &value)
+                    }),
+
                     format_value: |value| format!("{:?}", erase::restore_val::<queries::$name::Value<'tcx>>(*value)),
                     description_fn: $crate::queries::_description_fns::$name,
                     execute_query_fn: if incremental {
@@ -670,8 +575,8 @@ macro_rules! define_queries {
             query_result_index: &mut EncodedDepNodeIndex,
         ) {
             $(
-                item_if_cache_on_disk! {
-                    [$($modifiers)*]
+                #[cfg($cache_on_disk)]
+                {
                     $crate::plumbing::encode_query_results(
                         tcx,
                         &tcx.query_system.query_vtables.$name,